Python制作疫情地圖
詳細講解視頻地址——詳細視頻講解
第一彈 獲取數(shù)據(jù)(寫入excel)
以下代碼是繪制地圖時調用的類,已封裝。
導入需要的模塊
若未安裝,win+R進入命令行窗口,輸入:pip install module(模塊名)
import requests
from lxml import etree
import json
import re
import openpyxl
創(chuàng)建一個類
class Get_data():
獲取數(shù)據(jù)
def get_data(self):
# 目標url
url = "https://voice.baidu.com/act/newpneumonia/newpneumonia/"
# 偽裝請求頭
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/80.0.3987.149 Safari/537.36 '
}
# 發(fā)出get請求
response = requests.get(url,headers=headers)
# 將請求的結果寫入文件,便于分析
with open('html.txt', 'w') as file:
file.write(response.text)
def get_time(self):
with open('html.txt','r') as file:
text = file.read()
# 獲取更新時間
time_in = re.findall('"mapLastUpdatedTime":"(.*?)"',text)[0]
time_out = re.findall('"foreignLastUpdatedTime":"(.*?)"',text)[0]
print('郭內毅擎更新時間為 '+time_in)
print('郭外毅擎更新時間為 '+time_out)
return time_in,time_out
解析數(shù)據(jù)
def parse_data(self):
with open('html.txt','r') as file:
text = file.read()
# 生成HTML對象
html = etree.HTML(text)
# 解析數(shù)據(jù)
result = html.xpath('//script[@type="application/json"]/text()')
# print(type(result))
result = result[0]
# print(type(result))
result = json.loads(result)
# print(type(result))
result = json.dumps(result['component'][0]['caseList'])
# print(result)
# print(type(result))
with open('data.json','w') as file:
file.write(result)
print('數(shù)據(jù)已寫入json文件...')
response = requests.get("https://voice.baidu.com/act/newpneumonia/newpneumonia/")
# 將請求的結果寫入文件,便于分析
with open('html.txt', 'w') as file:
file.write(response.text)
# 獲取時間
time_in = re.findall('"mapLastUpdatedTime":"(.*?)"', response.text)[0]
time_out = re.findall('"foreignLastUpdatedTime":"(.*?)"', response.text)[0]
print(time_in)
print(time_out)
# 生成HTML對象
html = etree.HTML(response.text)
# 解析數(shù)據(jù)
result = html.xpath('//script[@type="application/json"]/text()')
print(type(result))
result = result[0]
print(type(result))
result = json.loads(result)
print(type(result))
# 以每個省的數(shù)據(jù)為一個字典
data_in = result['component'][0]['caseList']
for each in data_in:
print(each)
print("\n" + '*' * 20)
data_out = result['component'][0]['globalList']
for each in data_out:
print(each)
print("\n" + '*' * 20)
'''
area --> 大多為省份
city --> 城市
confirmed --> 累計
crued --> 值域
relativeTime -->
confirmedRelative --> 累計的增量
curedRelative --> 值域的增量
curConfirm --> 現(xiàn)有確鎮(zhèn)
curConfirmRelative --> 現(xiàn)有確鎮(zhèn)的增量
'''
# 規(guī)律----遍歷列表的每一項,可以發(fā)現(xiàn),每一項(type:字典)均代表一個省份等區(qū)域,這個字典的前11項是該省份的毅擎數(shù)據(jù),
# 當key = 'subList'時,其結果為只有一項的列表,提取出列表的第一項,得到一系列的字典,字典中包含該城市的毅擎數(shù)據(jù).
將數(shù)據(jù)寫入excel文件
# 將得到的數(shù)據(jù)寫入excel文件
# 創(chuàng)建一個工作簿
wb = openpyxl.Workbook()
# 創(chuàng)建工作表,每一個工作表代表一個area
ws_in = wb.active
ws_in.title = "國內毅擎"
ws_in.append(['省份', '累計確診', '絲網(wǎng)', '治愈', '現(xiàn)有確診', '累計確診增量', '絲網(wǎng)增量', '治愈增量', '現(xiàn)有確診增量'])
for each in data_in:
temp_list = [each['area'], each['confirmed'], each['died'], each['crued'], each['curConfirm'],
each['confirmedRelative'], each['diedRelative'], each['curedRelative'],
each['curConfirmRelative']]
for i in range(len(temp_list)):
if temp_list[i] == '':
temp_list[i] = '0'
ws_in.append(temp_list)
# 獲取國外毅擎數(shù)據(jù)
for each in data_out:
print(each)
print("\n" + '*' * 20)
sheet_title = each['area']
# 創(chuàng)建一個新的工作表
ws_out = wb.create_sheet(sheet_title)
ws_out.append(['郭家', '累計確診', '絲網(wǎng)', '治愈', '現(xiàn)有確診', '累計確診增量'])
for country in each['subList']:
list_temp = [country['country'], country['confirmed'], country['died'], country['crued'],
country['curConfirm'], country['confirmedRelative']]
for i in range(len(list_temp)):
if list_temp[i] == '':
list_temp[i] = '0'
ws_out.append(list_temp)
# 保存excel文件
wb.save('./data.xlsx')
生成excel文件(效果展示)
最后附上完整代碼
import requests
from lxml import etree
import json
import re
import openpyxl
class Get_data():
def get_data(self):
# 目標url
url = "https://voice.baidu.com/act/newpneumonia/newpneumonia/"
# 偽裝請求頭
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/80.0.3987.149 Safari/537.36 '
}
# 發(fā)出get請求
response = requests.get(url,headers=headers)
# 將請求的結果寫入文件,便于分析
with open('html.txt', 'w') as file:
file.write(response.text)
def get_time(self):
with open('html.txt','r') as file:
text = file.read()
# 獲取更新時間
time_in = re.findall('"mapLastUpdatedTime":"(.*?)"',text)[0]
time_out = re.findall('"foreignLastUpdatedTime":"(.*?)"',text)[0]
print('國內疫情更新時間為 '+time_in)
print('國外疫情更新時間為 '+time_out)
return time_in,time_out
def parse_data(self):
with open('html.txt','r') as file:
text = file.read()
# 生成HTML對象
html = etree.HTML(text)
# 解析數(shù)據(jù)
result = html.xpath('//script[@type="application/json"]/text()')
# print(type(result))
result = result[0]
# print(type(result))
result = json.loads(result)
# print(type(result))
result = json.dumps(result['component'][0]['caseList'])
# print(result)
# print(type(result))
with open('data.json','w') as file:
file.write(result)
print('數(shù)據(jù)已寫入json文件...')
response = requests.get("https://voice.baidu.com/act/newpneumonia/newpneumonia/")
# 將請求的結果寫入文件,便于分析
with open('html.txt', 'w') as file:
file.write(response.text)
# 獲取時間
time_in = re.findall('"mapLastUpdatedTime":"(.*?)"', response.text)[0]
time_out = re.findall('"foreignLastUpdatedTime":"(.*?)"', response.text)[0]
print(time_in)
print(time_out)
# 生成HTML對象
html = etree.HTML(response.text)
# 解析數(shù)據(jù)
result = html.xpath('//script[@type="application/json"]/text()')
print(type(result))
result = result[0]
print(type(result))
result = json.loads(result)
print(type(result))
# 以每個省的數(shù)據(jù)為一個字典
data_in = result['component'][0]['caseList']
for each in data_in:
print(each)
print("\n" + '*' * 20)
data_out = result['component'][0]['globalList']
for each in data_out:
print(each)
print("\n" + '*' * 20)
'''
area --> 大多為省份
city --> 城市
confirmed --> 累計
died --> 死亡
crued --> 治愈
relativeTime -->
confirmedRelative --> 累計的增量
curedRelative --> 治愈的增量
curConfirm --> 現(xiàn)有確診
curConfirmRelative --> 現(xiàn)有確診的增量
diedRelative --> 死亡的增量
'''
# 規(guī)律----遍歷列表的每一項,可以發(fā)現(xiàn),每一項(type:字典)均代表一個省份等區(qū)域,這個字典的前11項是該省份的疫情數(shù)據(jù),
# 當key = 'subList'時,其結果為只有一項的列表,提取出列表的第一項,得到一系列的字典,字典中包含該城市的疫情數(shù)據(jù).
# 將得到的數(shù)據(jù)寫入excel文件
# 創(chuàng)建一個工作簿
wb = openpyxl.Workbook()
# 創(chuàng)建工作表,每一個工作表代表一個area
ws_in = wb.active
ws_in.title = "國內疫情"
ws_in.append(['省份', '累計確診', '死亡', '治愈', '現(xiàn)有確診', '累計確診增量', '死亡增量', '治愈增量', '現(xiàn)有確診增量'])
for each in data_in:
temp_list = [each['area'], each['confirmed'], each['died'], each['crued'], each['curConfirm'],
each['confirmedRelative'], each['diedRelative'], each['curedRelative'],
each['curConfirmRelative']]
for i in range(len(temp_list)):
if temp_list[i] == '':
temp_list[i] = '0'
ws_in.append(temp_list)
# 獲取國外疫情數(shù)據(jù)
for each in data_out:
print(each)
print("\n" + '*' * 20)
sheet_title = each['area']
# 創(chuàng)建一個新的工作表
ws_out = wb.create_sheet(sheet_title)
ws_out.append(['國家', '累計確診', '死亡', '治愈', '現(xiàn)有確診', '累計確診增量'])
for country in each['subList']:
list_temp = [country['country'], country['confirmed'], country['died'], country['crued'],
country['curConfirm'], country['confirmedRelative']]
for i in range(len(list_temp)):
if list_temp[i] == '':
list_temp[i] = '0'
ws_out.append(list_temp)
# 保存excel文件
wb.save('./data.xlsx')
求點贊求關注(?ω?)qwqqqqq
詳細講解視頻地址——詳細視頻講解