#coding=utf-8
from time import sleep
import requests
import sys
from pyquery import PyQuery as pq
reload(sys)
sys.setdefaultencoding("utf-8")
class Gra():
def __init__(self):
self.user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
self.headers = {
'User-Agent': self.user_agent
}
fo = open("/Users/luomeng/Desktop/room.txt", "w")
fo.write("")
fo.close()
fo = open("/xxx/room.txt", "a+")
fo.write("城市,日期,出售單價(jià),出售供給量,售租比\n")
fo.close()
def grabData(self, cityid, cityname):
url = "http://www.xxxx.cn/market/chartsdatanew.html"
data = {
'city': cityid,
'proptype': '11',
'district': 'allsq1',
'sinceyear': '5',
'flag': '1',
'isv3': '0',
'type': 'forsale',
'matchrand': 'a0b92382',
'based': 'supply',
'dtype': 'line'
}
cookiesstr = "userchannel=direct; Hm_lvt_c2a7a3cec6f9dd8849155424efab19c7=1556094056; deviceStr=4f0750b83bbd41af37678bf609bfe2b5; cityredata=43464a06c67b93a525ab1b09566f7b73; city="+ cityid +"; thirdLog_fromurl=aHR0cDovL3d3dy5jcmVwcmljZS5jbi91cmJhbi9iai5odG1s; cityurl=e8727615c054b43; Hm_lpvt_c2a7a3cec6f9dd8849155424efab19c7=155615774"
cookies = {}
for line in cookiesstr.split(';'):
name, value = line.strip().split('=', 1)
cookies[name] = value
# 城市
data["city"] = cityid
# 價(jià)格
prices = []
data["based"] = "price"
resp = requests.get(url, params=data, headers=self.headers, cookies=cookies)
print resp.url
resp = resp.json()
for r in resp["data"][0]["rows"]:
month = r["month"]
try:
date = r["data"]
except:
date = 0
prices.append({"cityid": cityid, "month": month, "data": date})
# 新增房源
supplys = []
data["based"] = "supply"
resp = requests.get(url, params=data, headers=self.headers, cookies=cookies)
print resp.url
resp = resp.json()
for r in resp["data"][0]["rows"]:
month = r["month"]
try:
date = r["data"]
except:
date = 0
supplys.append({"cityid": cityid, "month": month, "data": date})
# 新增房源
slratios = []
data["based"] = "slratio"
resp = requests.get(url, params=data, headers=self.headers, cookies=cookies)
print resp.url
resp = resp.json()
for r in resp["data"][0]["rows"]:
month = r["month"]
try:
date = r["data"]
except:
date = 0
slratios.append({"cityid": cityid, "month": month, "data": date})
# 拼接數(shù)據(jù)
citydata = []
for p in prices:
ct = p["cityid"]
month = p["month"]
price = p["data"]
for s1 in supplys:
if ct == s1["cityid"] and month == s1["month"]:
supply = s1["data"]
break
for s2 in slratios:
if ct == s2["cityid"] and month == s2["month"]:
slratio = s2["data"]
break
citydata.append({"cityname": cityname, "month": month, "price": price, "supply": supply, "slratio": slratio})
# 輸出數(shù)據(jù)并寫書文件
fo = open("/Users/luomeng/Desktop/room.txt", "a+")
for cd in citydata:
fo.write("%s,%s,%s,%s,%s\n" % (cd["cityname"], cd["month"], cd["price"], cd["supply"], cd["slratio"]))
fo.close()
def getCity(self):
resp = requests.get("http://www.xxxx.cn//market/ajaxselectcity.html", headers=self.headers)
d = pq(resp.text)
citylist = []
i = 0
long = len(d('.citylistbox').children().children())
while i < long:
data = d('.citylistbox').children().children().eq(i)
citylist.append({"cityid": data.attr("onclick").replace("cityselecta('", "").replace("');", ""), "cityname": data.text()})
i += 1
return citylist
if __name__ == "__main__":
gra = Gra()
citylist = gra.getCity()
for city in citylist:
cityid = city["cityid"]
cityname = city["cityname"]
print cityname, "開始抓取..."
gra.grabData(cityid, cityname)
sleep(5)
【python16】爬蟲
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
- 文/潘曉璐 我一進(jìn)店門壕探,熙熙樓的掌柜王于貴愁眉苦臉地迎上來,“玉大人郊丛,你說我怎么就攤上這事李请。” “怎么了厉熟?”我有些...
- 文/不壞的土叔 我叫張陵导盅,是天一觀的道長。 經(jīng)常有香客問我庆猫,道長认轨,這世上最難降的妖魔是什么? 我笑而不...
- 正文 為了忘掉前任月培,我火速辦了婚禮嘁字,結(jié)果婚禮上,老公的妹妹穿的比我還像新娘杉畜。我一直安慰自己纪蜒,他們只是感情好,可當(dāng)我...
- 文/花漫 我一把揭開白布此叠。 她就那樣靜靜地躺著纯续,像睡著了一般。 火紅的嫁衣襯著肌膚如雪。 梳的紋絲不亂的頭發(fā)上猬错,一...
- 文/蒼蘭香墨 我猛地睜開眼,長吁一口氣:“原來是場噩夢啊……” “哼鳖藕!你這毒婦竟也來了魔慷?” 一聲冷哼從身側(cè)響起,我...
- 序言:老撾萬榮一對情侶失蹤著恩,失蹤者是張志新(化名)和其女友劉穎院尔,沒想到半個(gè)月后,有當(dāng)?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體页滚,經(jīng)...
- 正文 獨(dú)居荒郊野嶺守林人離奇死亡召边,尸身上長有42處帶血的膿包…… 初始之章·張勛 以下內(nèi)容為張勛視角 年9月15日...
- 正文 我和宋清朗相戀三年铺呵,在試婚紗的時(shí)候發(fā)現(xiàn)自己被綠了裹驰。 大學(xué)時(shí)的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片。...
- 正文 年R本政府宣布,位于F島的核電站闷愤,受9級特大地震影響整葡,放射性物質(zhì)發(fā)生泄漏。R本人自食惡果不足惜讥脐,卻給世界環(huán)境...
- 文/蒙蒙 一遭居、第九天 我趴在偏房一處隱蔽的房頂上張望。 院中可真熱鬧旬渠,春花似錦俱萍、人聲如沸。這莊子的主人今日做“春日...
- 文/蒼蘭香墨 我抬頭看了看天上的太陽。三九已至,卻和暖如春岳颇,著一層夾襖步出監(jiān)牢的瞬間照捡,已是汗流浹背。 一陣腳步聲響...
- 正文 我出身青樓掂摔,卻偏偏與公主長得像术羔,于是被迫代替她去往敵國和親。 傳聞我的和親對象是個(gè)殘疾皇子乙漓,可洞房花燭夜當(dāng)晚...
推薦閱讀更多精彩內(nèi)容
- scrapy框架是爬蟲界最為強(qiáng)大的框架同诫,沒有之一粤策,它的強(qiáng)大在于它的高可擴(kuò)展性和低耦合,使使用者能夠輕松的實(shí)現(xiàn)更改和...
- 爬蟲(Spider),反爬蟲(Anti-Spider)霹俺,反反爬蟲(Anti-Anti-Spider) 之間恢宏壯闊...
- 通用爬蟲和聚焦爬蟲 根據(jù)使用場景丙唧,網(wǎng)絡(luò)爬蟲可分為 通用爬蟲 和 聚焦爬蟲 兩種. 通用爬蟲 通用網(wǎng)絡(luò)爬蟲 是 捜索...