這個爬蟲比較簡單刚梭,一個固定URL,數(shù)據(jù)獲取也比較有規(guī)律贴铜。最后是把獲取到到書名氓扛、簡介、評分维苔、作者出版社信息寫入本地的一個文件中
# coding:utf-8
import requests
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding('utf8')
def get_latest_book():
url='https://book.douban.com/latest?icn=index-latestbook-all'
response=requests.get(url)
bookList=[]
soup=BeautifulSoup(response.content,'html.parser')
bookNameList=soup.select('.article .detail-frame a')
bookRateList=soup.select('.article .color-lightgray')
bookInfoList=soup.select('.article .color-gray')
bookDetailList=soup.select('.article .detail')
for v in range(len(bookNameList)):
bookDict={}
bookDict={
'bookName':bookNameList[v].text,
'bookRate':bookRateList[v].text,
'bookInfo':bookInfoList[v].text,
'bookDetail':bookDetailList[v].text
}
bookList.append(bookDict)
return bookList
def write_file():
with open('豆瓣新書速遞.txt','w+') as f:
bookList=get_latest_book()
for book in bookList:
bookName=book['bookName']
bookRate=book['bookRate']
bookInfo=book['bookInfo']
bookDetail=book['bookDetail']
f.write(bookName)
f.write(bookRate)
f.write(bookInfo)
f.write(bookDetail)
print '數(shù)據(jù)寫入完畢'
write_file()