最近要分析天氣對(duì)業(yè)務(wù)的影響拾枣,所以做了一個(gè)歷史天氣的爬蟲(chóng)
import demjson
import requests
class Weather_2345():
def __init__(self,area,begin_time,end_time):
self.area,self.begin_time,self.end_time= area,begin_time,end_time
self.headers= {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",}
self.prefix_url= "http://tianqi.2345.com/t/wea_history/js/{1}/{0}_{1}.js"
self.code_url= "http://tianqi.2345.com/js/citySelectData.js"
self.code_str= None
def get_weather(self):
'''
:return: 區(qū)域,日期,最高氣溫,最低氣溫,天氣,風(fēng)向,風(fēng)力,空氣指數(shù),空氣情況,空氣等級(jí)'''
result = []
time_list = self.__generate_time_list(self.begin_time,self.end_time)
for curr_month in time_list:
area_code = self.get_areacode(self.area)
url = self.prefix_url.format(area_code,curr_month)
try:
response = requests.get(url,headers=self.headers).text[16:-1]
response_dict = demjson.decode(response)
city = response_dict['city']
for line in response_dict['tqInfo']:
if line:
if "aqi" in line:
result.append((city,line["ymd"],line["bWendu"].rstrip("℃"),line["yWendu"].rstrip("℃"),
line["tianqi"],line["fengxiang"],line["fengli"],line["aqi"],
line["aqiInfo"],line["aqiLevel"]))
else:
result.append((city,line["ymd"],line["bWendu"].rstrip("℃"),line["yWendu"].rstrip("℃"),
line["tianqi"],line["fengxiang"],line["fengli"],None,None,None))
except Exception as e:
print("error url:" + url)
return result
def get_areacode(self,area):
if not self.code_str:
self.code_str= requests.get(self.code_url,headers=self.headers).text
try:
area_index = self.code_str.index(area,203)
except Exception as e:
return None
return self.code_str[area_index - 8: area_index - 3]
def __generate_time_list(self,begin_time,end_time):
time_list = []
for y in range(int(begin_time[:4]),int(end_time[:4])+ 1):
for m in range(1,13):
time_list.append("{:0>4d}{:0>2d}".format(y,m))
time_list = time_list[int(begin_time[4:])- 1:int(end_time[4:])- 12]
return time_list
if __name__== '__main__':
# 可以這么用
weather = Weather_2345('廣州','201811','201901')
print(weather.get_weather())
# 也可以這么用
weather = Weather_2345('下面會(huì)再次設(shè)置區(qū)域這里可以隨便填','201811','201901')
area_list = ["白云","從化","花都","海珠","黃埔","荔灣","南沙","番禺","天河","越秀","增城"]
with open('./weather.log','w',encoding='utf-8')as fw:
fw.write("區(qū)域,日期,最高氣溫,最低氣溫,天氣,風(fēng)向,風(fēng)力,空氣指數(shù),空氣情況,空氣等級(jí)" + "\n")
for area in area_list:
weather.area= area
weather_list = weather.get_weather()
for lines in weather_list:
fw.write(",".join(lines)+ '\n')