from bs4 import BeautifulSoup
with open('D:\Py\Plan-for-combating-master\week1/1_2/1_2answer_of_homework/index.html','r') as wb_data:
Soup=BeautifulSoup(wb_data,'lxml')
prices = Soup.select('body > div:nth-of-type(1) > div > div.col-md-9 > div:nth-of-type(2) > div > div > div.caption > h4.pull-right')
titles = Soup.select('body > div > div > div.col-md-9 > div > div > div > div.caption > h4 > a')
reviews = Soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p.pull-right')
images= Soup.select('body > div > div > div.col-md-9 > div > div > div > img')
rates = Soup.select('body > div > div > div.col-md-9 > div > div > div > div.ratings > p:nth-of-type(2)')
for price,title,review,image,rate in zip(prices,titles,reviews,images,rates):
data={
'price':price.get_text(),
'title':title.get_text(),
'review':review.get_text(),
'image':image.get('src'),
'rate': len(rate.find_all("span", "glyphicon glyphicon-star"))
}
print(data)
重點
- nth-child(1)&nth-of-type(1)的差別:
前者指的是父節(jié)點下的第一個元素
后者指的是該類型的第一個元素
不一定要嚴(yán)格用nth-of-type
可以放開范圍查找find_all("標(biāo)簽“,"class")
-
兩種文件讀取方式
第一種
fs = open("文件地址”嘀趟,’r')
print(fs.read())
fs.close
- 第二種
with open("文件地址“,‘r') as fs:
print(fs.read())