# 酷狗歌曲榜單TOP500
import requests
headers ={
? 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36'
}
from lxml import etree
import xlwt
def get_info(url):
? res = requests.get(url,headers=headers)
? html = etree.HTML(res.text)
? infos = html.xpath('//div[@class="pc_temp_songlist "]/ul/li')
? for info in infos:
??? rank1 = info.xpath('span[3]')[0]
??? rank = rank1.xpath('string(.)').strip()
??? name = info.xpath('a/text()')[0]
??? singer = name.split('-')[0]
??? song = name.split('-')[1]
??? time = info.xpath('span[4]/span/text()')[0].strip()
??? print(rank,singer,song,time)
if __name__ == '__main__':
??urls = ['https://www.kugou.com/yy/rank/home/{}-8888.html'.format(str(i)) for i in range(1,24)]
? for url in urls:
??? get_info(url)
# 糗事百科24小時
import requests
headers ={
? 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36'
}
from lxml import etree
? def get_info(url):
? res = requests.get(url,headers=headers)
? html = etree.HTML(res.text)
? infos = html.xpath('//div[@id="content-left"]/div')
? for info in infos:
??? name = info.xpath('div[1]/a[2]/h2/text()')
??? laugh = info.xpath('div[2]/span[1]/i/text()')
??? content = info.xpath('a[1]/div/span/text()')
??? comment = info.xpath('div[2]/span[2]/a/i/text()')
??? print(name,content,laugh,comment)
if __name__ == '__main__':
? urls = ['https://www.qiushibaike.com/hot/page/{}/'.format(str(i)) for i in range(1,14)]
? for url in urls:
??? get_info(url)