一室叉、查看網(wǎng)頁源碼
通過搜索我們可以發(fā)現(xiàn)網(wǎng)頁源碼中并沒有我們想要抓取的內(nèi)容
二趋箩、打開開發(fā)者工具(F12)
重新刷新下網(wǎng)頁
可能有人會要問刷新完有這么多數(shù)據(jù)這怎么找赏寇,我們可以通過搜索關(guān)鍵詞來找
獲取請求地址
代碼
獲取數(shù)據(jù)
def url_parse():
url="https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=rank&page_limit=20&page_start=0"
headers={"User-Agent":UserAgent().random}
response=requests.get(url=url,headers=headers).json()
print(response)
return response
內(nèi)容解析
def content_parse(res):
vedio_name=[]
vedio_rate=[]
content=res["subjects"]
for i in content:
name=i["title"]
rate=i["rate"]
vedio_name.append(name)
vedio_rate.append(float(rate))
print(name,rate)
return vedio_name ,vedio_rate
制作圖表
def make_pic(name,rate):
fig=plt.figure(figsize=(15,8),dpi=80)
font=FontProperties(fname=r"STZHONGS.TTF",size=12)
plt.barh(name[::-1],rate[::-1],color="red")
x_=[i*0.5 for i in range(1,21)]
plt.xticks(x_,fontproperties=font)
plt.yticks(name,fontproperties=font)
plt.savefig("豆瓣.png")
plt.show()
完整代碼
import requests
from fake_useragent import UserAgent
from matplotlib import pyplot as plt
from matplotlib.font_manager import FontProperties
#獲取數(shù)據(jù)
def url_parse():
url="https://movie.douban.com/j/search_subjects?type=movie&tag=%E8%B1%86%E7%93%A3%E9%AB%98%E5%88%86&sort=rank&page_limit=20&page_start=0"
headers={"User-Agent":UserAgent().random}
response=requests.get(url=url,headers=headers).json()
#print(response)
return response
#處理內(nèi)容
def content_parse(res):
vedio_name=[]
vedio_rate=[]
content=res["subjects"]
for i in content:
name=i["title"]
rate=i["rate"]
vedio_name.append(name)
vedio_rate.append(float(rate))
print(name,rate)
return vedio_name ,vedio_rate
#制作圖表
# def make_pic(name,rate):
# fig=plt.figure(figsize=(15,8),dpi=80)
# font=FontProperties(fname=r"STZHONGS.TTF",size=12)
# plt.barh(name[::-1],rate[::-1],color="red")
# plt.xticks(fontproperties=font)
# plt.yticks(name,fontproperties=font)
# plt.savefig("豆瓣.png")
# plt.show()
#主函數(shù)
def main():
data=url_parse()
name,rate=content_parse(data)
# make_pic(name,rate)
if __name__ == '__main__':
main()