Python3項目:練習(xí)一爬取單一租房頁面信息
-需要抓取的頁面信息:
-抓取頁面:http://bj.xiaozhu.com/search-duanzufang-p1-0/ ?(點(diǎn)擊進(jìn)去第一個租房信息)
-抓取效果:
-項目代碼:
from bs4 import BeautifulSoup
import requests
import time
url = 'http://bj.xiaozhu.com/fangzi/3686435130.html'
html = requests.get(url)
soup = BeautifulSoup(html.text,'lxml')
title = soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > h4 > em')
adr = soup.select('body > div.wrap.clearfix.con_bg > div.con_l > div.pho_info > p > span')
money = soup.select('div.day_l')
pic = soup.select('#curBigImage')
touxiang = soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img')
name = soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a')
gender = soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > span')
for title,adr,money,pic,touxiang,name,gender in zip(title,adr,money,pic,touxiang,name,gender):
??? data = {
??????? 'title' : title.get_text(),
??????? 'adr' : adr.get_text(),
??????? 'money' : money.get_text(),
??????? 'pic' : pic.get('src'),
??????? 'touxiang' : touxiang.get('src'),
??????? 'name' : name.get_text(),
??????? 'gender' : gender.get('class'),? #關(guān)于房東性別,想通過“member_girl_ico”屬性抓取撞羽,但觀察發(fā)現(xiàn)有許多房東沒有這個屬性
??????? }
??? print(data)