開始有一些思路寫代碼的思路了,還要繼續(xù)寫下去放航。
最終成果
我的代碼
from bs4 import BeautifulSoup
import requests
import time
url = ['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(str(i)) for i in range(-1, 14)]
headers = {
'User-Agent':'User-Agent:Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36Query String Parametersview sourceview URL encoded'
}
def get_detail_info(url,nu):
wb_data = requests.get(url,headers=headers)
soup = BeautifulSoup(wb_data.text, 'lxml')
titles = soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')
addres = soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span')
damos = soup.select('div.day_l > span')
frimgs = soup.select('div.pho_show_l > div.pho_show_big > div > img ')
homnimgs = soup.select('div.js_box.clearfix > div.member_pic > a > img')
homnsexs = soup.select('div.js_box.clearfix > div.member_pic > div')
homnames = soup.select('div.js_box.clearfix > div.w_240 > h6 > a')
sexs=[];sextr = str(homnsexs)
for a in range(0,len(homnsexs)):
if sextr.find('1') > 0:
sexs.append('女')
elif sextr.find('_') > 0:
sexs.append('男')
else:
sexs.append('null')
a += 1
time.sleep(3)
for title,addre,damo,frimg,homnimg,sex,homname in zip(titles,addres,damos,frimgs,homnimgs,sexs,homnames):
data = {
'number' : nu,
'title' : title.get_text(),
'addre' : (addre.get_text()).rstrip(),
'damo' : damo.get_text(),
'frimg' : frimg.get('src'),
'homnimg': homnimg.get('src'),
'homnsex': sex,
'homname': homname.get_text(),
}
print(data)
nu=0
for num in range(1,14):
wb_data = requests.get(url[num],headers=headers)
soup = BeautifulSoup(wb_data.text,'lxml')
addinfos = soup.select(' ul > li > a.resule_img_a')
time.sleep(3)
for addinfo in addinfos:
url1 = [
addinfo.get('href')
]
nu += 1
get_detail_info(''.join(url1).replace('[', '').replace(']', '').replace(',', '').replace(' ', ''),nu)
總結(jié)
- 自己在類這一塊和函數(shù)的基礎(chǔ)還是很薄弱握巢,都沒有想到怎么用魔法方法、屬性和迭代器
- 異常的處理還沒有完全掌握使用的方法
- 還需要更加熟悉各種類型方法的使用