練習(xí)本地,還是自己寫(xiě)的赔退,沒(méi)什么含量,但還是需要記錄下
爬取圖片已骇,標(biāo)題离钝,標(biāo)簽,內(nèi)容褪储,分?jǐn)?shù)
//各自的Copy_selector如下
圖片:body > div.main-content > ul > li:nth-child(1) > img
標(biāo)題:body > div.main-content > ul > li:nth-child(1) > div.article-info > h3 > a
內(nèi)容:body > div.main-content > ul > li:nth-child(1) > div.article-info > p.description
分?jǐn)?shù):body > div.main-content > ul > li:nth-child(1) > div.rate > span
標(biāo)簽:body > div.main-content > ul > li:nth-child(2) > div.article-info > p.meta-info > span:nth-child(2)
代碼如下:
#導(dǎo)入BeautifulSoup庫(kù)
from bs4 import BeautifulSoup
#空數(shù)組
dataArray = [ ]
#解析本地網(wǎng)頁(yè)
with open('/Users/wangyi/代碼練習(xí)/Python/解析網(wǎng)頁(yè)中的元素/web/new_index.html','r') as we_data:
Soup = BeautifulSoup(we_data,'lxml')
#打印解析是否成功
# print(Soup)
# 圖片
images = Soup.select('body > div.main-content > ul > li > img')
# 標(biāo)題
titles = Soup.select('body > div.main-content > ul > li > div.article-info > h3 > a')
# 內(nèi)容
contents = Soup.select('body > div.main-content > ul > li > div.article-info > p.description')
# 分?jǐn)?shù)
fractions = Soup.select('body > div.main-content > ul > li > div.rate > span')
# 標(biāo)簽
labels = Soup.select('body > div.main-content > ul > li > div.article-info > p.meta-info')
#打印抓取到的數(shù)據(jù)
#print(images,titles,contents,fractions,labels,sep='\n=========\n')
#遍歷添加到data字典里
for image,title,content,fraction,label in zip(images,titles,contents,fractions,labels):
data = {
'image' : image.get('src'),
'title' : title.get_text(),
'content' : content.get_text(),
'label': list(label.stripped_strings),
'fraction' : fraction.get_text()
}
#添加到數(shù)組中
dataArray.append(data)
#遍歷數(shù)組
for i in dataArray:
#獲取大于3的文章
if float(i['fraction']) > 3:
#打印文章標(biāo)題和內(nèi)容
print(i['title'],'======',i['content'])