我的代碼
from bs4 import BeautifulSoup
import requests
import time
#獲取產(chǎn)品鏈接
def get_links_from(who_sells):
links = []
urls = ['http://bj.58.com/pbdn/{}/pn{}/'.format(who_sells,i) for i in range(1,10)]
for signal_url in urls:
wb_data = requests.get(signal_url)
time.sleep(5) #避免訪問頻率過高,被網(wǎng)站反爬取
if wb_data.status_code == 200:
soup = BeautifulSoup(wb_data.text, 'lxml')
for link in soup.select('td.t a.t'):
links.append(link.get('href'))
return links
def get_detail(who_sells=0):
urls = get_links_from(who_sells)
for url in urls:
wd_data = requests.get(url)
soup = BeautifulSoup(wd_data.text, 'lxml')
#在瀏覽器的代碼檢查中靈活利用搜索定位元素位置,縮減代碼長度
catalogs = soup.select('div > span > a')
titles = soup.select('.info_titile')
prices = soup.select('.price_now i')
tags = soup.select('.biaoqian_li')
ranges = soup.select('.palce_li span i')
views = soup.select('.look_time')
#因?yàn)楸纠衧elect返回列表中只有一個(gè)元素,可以直接在字典中賦值,如price[0]
for catalog, title, price, tag, range,view in zip(catalogs, titles, prices, tags, ranges,views):
data = {
'catalog': catalog.get_text(),
'title': title.get_text(),
'price': price.get_text(),
'tag': tag.get_text(),
'range': range.get_text(),
'view': view.get_text(),
}
print(data)
#根據(jù)傳入?yún)?shù)抓取不同數(shù)據(jù),0代表個(gè)人(默認(rèn)),1代表商家
get_detail()
總結(jié)
最后編輯于 :
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者