frombs4importBeautifulSoup
importrequests
importtime
url=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/?startDate=2016-06-13&endDate=2016-06-14'.format(i)foriin
range(1,15,1)]
lianjie1=[]
deflian(url1):
info=[]
wb_data=requests.get(url1)
soup=BeautifulSoup(wb_data.text,'lxml')
time.sleep(2)
lianjie=soup.find_all(style='cursor:pointer')
foriinlianjie:
abc=i.get('detailurl')
url=abc
wb_data=requests.get(url)
soup=BeautifulSoup(wb_data.text,'lxml')
titles=soup.select(' h4 > em')
addresss=soup.select('p > span.pr5')
prices=soup.select('div.day_l > span')
images=soup.find_all(id='curBigImage')
imagespeople=soup.select('div.member_pic > a > img')
sexs=soup.select('div.member_pic > div')
name_oweners=soup.select('div.w_240 > h6 > a')
defsexss(valuse):
job=valuse
job3=[]
foriinjob:
job1=i.get('class')
ifjob1[0]=='member_ico1':
job2='女'
job3.append(job2)
elifjob1[0]=='memeber_ico':
job2='男'
job3.append(job2)
else:
job2='性別未知'
job3.append(job2)
return(job3)
job4=sexss(sexs)
fortitle,address,price,image,imagepeople,sex,name_owenerinzip(titles,addresss,prices,images,imagespeople,sexs,name_oweners):
data={
'title':title.get_text(),
'address':address.get_text(),
'price':price.get_text(),
'image':image.get('src'),
'imagepeople':imagepeople.get('src'),
'sex':job4,
'name_owener':name_owener.get_text()
}
info.append(data)
foriininfo:
print(i['title'], i['address'],str( i['price'])+'¥', i['image'], i['imagepeople'],i['sex'],i['name_owener'])
foriinurl:
countent=lian(i)
這個(gè)項(xiàng)目做完后,我進(jìn)一步對(duì)網(wǎng)頁(yè)的構(gòu)成加深了理解,并建立了玩蟲的決心植酥,但是發(fā)現(xiàn)灸姊,還是存在老問(wèn)題同辣,就是爬出來(lái)的數(shù)據(jù)有大量重復(fù)的,希望老師能給指導(dǎo),下一步的研究方向是python對(duì)execl的操作橡伞!