爬取商品信息
import requests
from bs4 import BeautifulSoup
import os
os.chdir('/Users/baidu/Desktop/爬蟲/Plan-for-combating-master/week1/1_2/1_2answer_of_homework/1_2_homework_required/')
with open('index.html', 'r') as wb_data:
soup = BeautifulSoup(wb_data, 'lxml')
images = soup.find_all('div', {'class': 'thumbnail'})
prices = soup.find_all('h4', {'class': 'pull-right'})
titles = soup.find_all('div', {'class': 'caption'})
reviews = soup.find_all('div', {'class': 'ratings'})
stars = soup.find_all('div', {'class': 'ratings'})
for image, price, title, review, star in zip(images, prices, titles, reviews, stars):
star_num = len(star.find_all('p')[1].find_all('span', {'class': 'glyphicon glyphicon-star'}))
data = {
'image': image.img['src'],
'price': price.get_text(),
'title': title.a.get_text(),
'review': review.p.get_text().split(' ')[0],
'star': str(star_num) + '星'
}
print(data)