Python 抓取中国天气网天气数据

Python 抓取中国天气网天气数据

众所周知中国天气网提供的有个公共天气预报API接口,但是不知道咋的一直停在3月4号不更新了。最近做个天

气方面的APP需要用到一些天气数据,360的接口公司不让用。只好自己写一个python脚本放数据中心。


先发三个尚还能用的接口,据说万年历有未来7天预报的api。

http://weather.com.cn/data/zs/101280601.html 各种各样的指数

http://weather.com.cn/data/cityinfo/101280601.html 简洁天气信息

http://weather.com.cn/data/sk/101280601.html 实况天气


获取未来15天温度

#coding=utf-8import urllib,re,MySQLdb,ConfigParser,datetime,timey=time.strftime("%Y")m=time.strftime("%m")d=time.strftime("%d")now=y+'-'+m+'-'+dtodaylow=99;todayhigh=99;print nowdef getHtml(url):    page = urllib.urlopen(url)    html = page.read()    return htmldef getRel(html,reg):    mre = re.compile(reg)    relList = re.findall(mre,html)    return relListhtml = getHtml("http://weather.com.cn/weather/101280601.shtml")reg1 = r'<p >s<span>(.+)</span><i>°C</i>'  #temputer reg2 = r'</span>s</em>s<i>(.+)</i>'     #windreg3 = r'<p >(.+)</p>s<p >'     #index   reg4 = r'<section >s.+s.+s.+7d1"><b>(.+)</b>'        #feelingreg5 = r'<section >s.+s.+s.+7d1"><b>.+</b>(.+)</aside>'       #wearing suggesttemputer=getRel(html,reg1)todayhigh=temputer[0]todaylow=temputer[1]i=0j=0while i<len(temputer):    temputer[i]=temputer[i]+"℃~"+temputer[i+1]+"℃"    del temputer[i+1]    i=i+1    j=j+1wind=getRel(html,reg2)index=getRel(html,reg3)feeling=getRel(html,reg4)suggest=getRel(html,reg5)mlist=temputer+index+wind+feeling+suggestprint mlist_mlist = ['temp1','temp2','temp3','temp4','temp5','temp6','temp7','weather1','weather2','weather3','weather4','weather5','weather6','weather7','wind1','wind2','wind3','wind4','wind5','wind6','wind7','index','index48_d']f=open(r'Weather.html','w')f.write('{"weatherinfo":{"city":"深圳","city_en":"shenzhen","date_y":"'+y+'年'+m+'月'+d+'日","week":"星期五",')      len=len(_mlist)for i in range(0,len-1):    f.write('"'+_mlist[i]+'":"'+mlist[i]+'",')f.write('"'+_mlist[len-1]+'":"'+mlist[len-1]+'"'+'}}')f.close()try:    conn=MySQLdb.connect(host='localhost',user='root',passwd='123456',db='weather',charset='utf8')    cur=conn.cursor()    val=[now,todaylow,todayhigh,'-1']    print val    cur.execute('insert into record values(%s,%s,%s,%s)',val)    conn.commit()    cur.close()    conn.close()except MySQLdb.Error,e:    print "Mysql Error %d: %s" % (e.args[0], e.args[1])print 'Update and inser Database Success'#raw_input()


获取实时空气值(从Pm.in抓取)

#coding=utf-8import urllib,re,MySQLdb,time,datetimey=time.strftime("%Y")m=time.strftime("%m")d=time.strftime("%d")now=y+'-'+m+'-'+dpm='-1'print nowdef getHtml(url):    page = urllib.urlopen(url)    html = page.read()    return htmldef getRel(html,reg):    mre = re.compile(reg)    relList = re.findall(mre,html)    return relListhtml = getHtml("http://www.pm25.in/shenzhen")reg1 = r'<div class.*="value">s+([d|.]+)s+</div>'#air valuereg2 = r'<div >s+<h4>s+(.+)s+</h4>'   #indexreg3 = r'<p>建议采取的措施:s+(.+)s+</p>'           #excress suggestval=getRel(html,reg1)index=getRel(html,reg2)suggest=getRel(html,reg3)mlist=val+index+suggestpm=str(mlist[1])print mlist_mlist = ['aqi','pm2_5_24h','PM10/1h','CO/1h','NO2/1h','O3/1h','O3/8h','SO2/1h','quality','suggest']print _mlistf=open(r'AirCondition.html','w')f.write('[{')len=len(mlist)for i in range(0,len-2):    f.write('"'+_mlist[i]+'":'+mlist[i]+',')    f.write('"'+_mlist[len-2]+'":"'+mlist[len-2]+'",')f.write('"'+_mlist[len-1]+'":"'+mlist[len-1]+'"')f.write('}]')f.close()print now,pmtry:    conn=MySQLdb.connect(host='localhost',user='root',passwd='123456',db='weather',charset='utf8')    cur=conn.cursor()    sql = "UPDATE record SET pm2_5 = %s WHERE date = '%s'" % (str(pm),str(now))    print sql    cur.execute(sql)    conn.commit()    cur.close()    conn.close()except MySQLdb.Error,e:    print "Mysql Error %d: %s" % (e.args[0], e.args[1])print 'Air update and inser Database Success'#raw_input()


然后写个线程个半个小时更新下,最后用SimpleHttpServer运行就能访问了

# -*- coding: utf-8 -*-  import SimpleHTTPServerimport SocketServerPORT = 80Handler = SimpleHTTPServer.SimpleHTTPRequestHandlerhttpd = SocketServer.TCPServer(("", PORT), Handler)print "serving at port", PORThttpd.serve_forever()





PS:运行两天后发现中国天气网有个大坑,见下图,那就是晚上抓取的数据木有白天的温度,也就是最高温度。



果断转移阵地,从2345拿数据。

#coding=utf-8import urllib,re,ConfigParser,datetime,timedef getHtml(url):    page = urllib.urlopen(url)    html = page.read()    return htmldef getRel(html,reg):    mre = re.compile(reg)    relList = re.findall(mre,html)    return relListhtml = getHtml("http://tianqi.2345.com/shenzhen/59493.htm")reg1 = r'<font >(-?w+).{0,2}</font>'  #temputer reg2 = r'</span>s</em>s<i>(.+)</i>'     #windreg3 = r'<p >(.+)</p>s<p >'     #index   reg4 = r'<section >s.+s.+s.+7d1"><b>(.+)</b>'        #feelingreg5 = r'<section >s.+s.+s.+7d1"><b>.+</b>(.+)</aside>'       #wearing suggesttemputer=getRel(html,reg1)print temputer   






免责声明:本网信息来自于互联网,目的在于传递更多信息,并不代表本网赞同其观点。其原创性以及文中陈述文字和内容未经本站证实,对本文以及其中全部或者部分内容、文字的真实性、完整性、及时性本站不作任何保证或承诺,并请自行核实相关内容。本站不承担此类作品侵权行为的直接责任及连带责任。如若本网有任何内容侵犯您的权益,请及时联系我们,本站将会在24小时内处理完毕。
相关文章
返回顶部