python爬蟲爬圖片
爬蟲爬baidu圖片
第一步
載入爬蟲模塊
from requests_html import HTMLSession #載入爬蟲模塊
第二步
創(chuàng)建session對象
from requests_html import HTMLSession #載入爬蟲模塊
session =HTMLSession() #創(chuàng)建完畢
推薦Python大牛在線分享技術(shù) 扣qun:855408893
領(lǐng)域:web開發(fā),爬蟲凌那,數(shù)據(jù)分析,數(shù)據(jù)挖掘梧税,人工智能
零基礎(chǔ)到項目實戰(zhàn),7天學習上手做項目
第三步
獲得發(fā)現(xiàn)百度圖片搜索規(guī)律并發(fā)起請求并匹配到圖片的url
[http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=我們搜圖片的關(guān)鍵字](http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=%60%E6%88%91%E4%BB%AC%E6%90%9C%E5%9B%BE%E7%89%87%E7%9A%84%E5%85%B3%E9%94%AE%E5%AD%97)
from requests_html import HTMLSession #載入爬蟲模塊
session =HTMLSession() #創(chuàng)建完畢
#拿二傻子為了
response = session.get('http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=二傻子')
#獲取我們圖片的url的正則匹配格式
img_url_regex = '"thumbURL":"{}",'
#解析并獲取圖片url_list
img_url_list = response.html.search_all(img_url_regex)
第四步
訪問圖片url并且保存下來
from requests_html import HTMLSession #載入爬蟲模塊
session =HTMLSession() #創(chuàng)建完畢
#拿二傻子為了
response = session.get('http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word=二傻子')
#獲取我們圖片的url的正則匹配格式
img_url_regex = '"thumbURL":"{}",'
#解析并獲取圖片url_list
img_url_list = response.html.search_all(img_url_regex)
mun=0
for url in img_url_list:
mun+=1
#訪問圖片鏈接
response= session.get(url[0])
#保存二進制并保存至本地
with open(f'第{mun}張.jpg','wb') as fw:
fw.write(response.content)
第五步
類的封裝
from requests_html import HTMLSession
class BaiDuImg:
session = HTMLSession()
img_url_regex = '"thumbURL":"{}",'
url=''
img_url_list =[]
def get_search(self):
search=input()
self.url=f'http://image.baidu.com/search/index?tn=baiduimage&fm=result&ie=utf-8&word={search}'
def get_img_url_list(self):
response=self.session.get(self.url)
self.img_url_list = response.html.search_all(img_url_regex)
def save_img(self):
mun = 0
for url in self.img_url_list:
mun += 1
# 訪問圖片鏈接
response = self.session.get(url[0])
# 保存二進制并保存至本地
with open(f'第{mun}張.jpg', 'wb') as fw:
fw.write(response.content)
def run(self):
self.get_search()
self.get_img_url_list()
self.save_img()
if __name__ == '__main__':
baidu=BaiDuImg()
baidu.run()