python版本:3.5
源代碼:
#coding=utf8
import re
import requests
import os
'''
爬取當(dāng)當(dāng)網(wǎng)五星級圖書榜的圖書鏈接,并下載圖片到一個目錄當(dāng)中
網(wǎng)址:http://bang.dangdang.com/books/fivestars/01.00.00.00.00.00-recent30-0-0-1-1
'''
for page in range(1,26):
url = 'http://bang.dangdang.com/books/fivestars/01.00.00.00.00.00-recent30-0-0-1-' + str(page)
html = requests.get(url).text
#圖片鏈接所對應(yīng)的正則表達(dá)式
pat = '<div class="pic"><a href="(.*?)" target="_blank"><img src="(.*?)" alt=".*?"\s*title=".*?"/></a></div> '
pic_url = re.findall(pat,html,re.S)
for i in range(0,len(pic_url)):
each = pic_url[i]
print(each)
try:
pic= requests.get(each[1], timeout=10)
except requests.exceptions.ConnectionError:
print('【錯誤】當(dāng)前圖片無法下載')
continue
path = 'E:/pictest/dangdang_5star/'
#查看該目錄是否存在,若不存在吟榴,則建一個
if not os.path.exists(path):
os.mkdir(path)
file = path + str(page) + '_' + str(i + 1) + '.jpg'
fp = open(file,'wb')
fp.write(pic.content)
fp.close()