import requests
from lxml import etree
import xlwt
#作業(yè)二 :使用LXML方法爬取MIXDJ音樂教室網(wǎng)站的最新單曲
#輸出:顯示并保存英文歌名和金幣數(shù)據(jù)到xls文件
all_info_list=[]
def get_info(url):
? ? ? res = requests.get(url)
? ?? html = etree.HTML(res.text)
? ?? infos = html.xpath('//div[@class="jp_container"]/ul/li')
? ?? for info in infos:
? ? ? ? ? ?? songs = info.xpath('div[2]/div[1]/h4/a/text()')[0]
? ? ? ? ? ?? coin = info.xpath('div[2]/div[1]/p/a[4]/text()')[0]
? ? ? ? ? ?? print(songs,coin)
? ? ? ? ? ?? info_list = [songs,coin]
? ? ? ? ? ?? all_info_list.append(info_list)
if __name__ =='__main__':
? ?? book = xlwt.Workbook(encoding='utf-8')
? ? sheet = book.add_sheet('Sheet1')
? ? header = ['Songs name','Gold coin']
? ? for t in range(len(header)):
? ? ? ? ? sheet.write(0, t, header[t])
? ? urls = ['http://mixdj.cn/music/lists/id/1/p/{}.html'.format(str(i))for iin range(1,6)]
? ? for url? in urls:
? ? ? ?? get_info(url)
? ? i =1
? ? for listi n all_info_list:
? ? ? ?? j =0
? ? ? ?? for data in list:
? ? ? ? ? ? ? sheet.write(i, j, data)
? ? ? ? ? ? ? j +=1
? ? ? ?? i +=1
? ? book.save('F:/py/2homework_songs.xls')