直接看代碼:
# -*- coding:utf-8 -*-
# **********************************
# ** http://weibo.com/lixiaodaoaaa #
# ****** by:lixiaodaoaaa ***********
from bs4 import BeautifulSoup
import sys
import requests
import time
def detailOper(url):
web_data = requests.get(url)
soup = BeautifulSoup(web_data.text, 'lxml')
titles = soup.select('div.list > ul > li > div > p.infoBox > a')
prices = soup.select('div.list > ul > li > div > p.priType-s > span > i')
print (" open the url is " + url)
for title, price in zip(titles, prices):
data = {
'title': title.get_text().encode(encoding="utf-8"),
'detailHerf': title.get('href'),
'price': price.get_text().replace(u'萬', '').replace(' ', '')
}
print(data['title'])
print(data['detailHerf'])
print(data['price'])
def start():
urls = ['http://www.guazi.com/tj/buy/o{}/'.format(str(i)) for i in range(1, 30, 1)]
for url in urls:
time.sleep(5)
detailOper(url)
if __name__ == '__main__':
start()
首先我們看這句話:
if __name__ == '__main__':
解釋:直接調(diào)用了main函數(shù)啟動(dòng)了頁面。
我們經(jīng)常需要訪問一個(gè)ulr的第1頁 第2頁 第3頁 要做一個(gè)循環(huán)怎么辦呢熔号?我們舉個(gè)例子http://www.guazi.com/tj/buy/o1 o2 o3 o3 只有后面變廷雅。前面不變忌锯。這個(gè)是一個(gè)列表說白了就是個(gè)List.一句話怎么寫呢企巢?看這里:
urls = ['http://www.guazi.com/tj/buy/o{}/'.format(str(i)) for i in range(1, 30, 1)]
這就是一個(gè)可以遍歷的對(duì)象榔袋。OK了 總結(jié)完畢烈和。