import requests
from lxml import etree
import xlwt
all_info_list=[]
def get_info(url):
? ? res = requests.get(url)
? ? html = etree.HTML(res.text)
? ? infos = html.xpath('//div[@class="recommend-article"]/ul/li/div[@class="recmd-right"]')
? ? for info in infos:
? ? ? ? title = info.xpath('a/text()')
? ? ? ? laughs = info.xpath('div/div/span[1]/text()')
? ? ? ? comments = info.xpath('div/div/span[4]/text()')
? ? ? ? id = info.xpath('div/a/span/text()')
? ? ? ? # print(title,laughs,comments,id)
? ? ? ? info_list = [title, laughs, comments, id]
? ? ? ? all_info_list.append(info_list)
if __name__ == '__main__':
? ? book = xlwt.Workbook(encoding='utf-8')
? ? sheet = book.add_sheet('Sheet1')
? ? header = ['題目','好笑數(shù)','評論數(shù)','作者']
? ? for t in range(len(header)):
? ? ? ? sheet.write(0, t, header[t])
? ? urls = ['https://www.qiushibaike.com/8hr/page/{}/'.format(str(i)) for i in range(1,14)]
? ? for url in urls:
? ? ? ? get_info(url)
? ? i = 1
? ? for list in all_info_list:
? ? ? ? j = 0
? ? ? ? for data in list:
? ? ? ? ? ? sheet.write(i, j, data)
? ? ? ? ? ? j += 1
? ? ? ? i += 1
? ? book.save('C:/Users/madin/Desktop/糗事百科.xls')