這是一個十分簡陋的初學(xué)項目拦坠,現(xiàn)已重寫??
房價數(shù)據(jù)爬取接口+數(shù)據(jù)可視化 - 簡書 (jianshu.com)
1.使用Nodejs 爬取樓盤信息
爬取頁:
image.png
使用http爬取頁面爱咬,cheerio分析爬取需要元素的數(shù)據(jù)铅檩,本項目爬取樓盤的名稱岂津、地址以及每平米的價格
const https = require("https");
const cheerio = require("cheerio");
const fs = require("fs");
const startPage = 1; // 開始頁
const endPage = 100; // 結(jié)束頁
let page = startPage; // 當(dāng)前抓取頁
let total = 0; // 數(shù)據(jù)總數(shù)
// 初始化url
const url = "https://hz.fang.lianjia.com/loupan/";
// 收集最終數(shù)據(jù)
let result = [];
// 抓取開始
getData(url);
/**
* 抓取數(shù)據(jù)請求函數(shù)
* @param {抓取地址} url
*/
function getData(url) {
https.get(url, res => {
let data = "";
res.on("data", function(chunk) {
data += chunk;
});
res.on("end", function() {
let formatData = filter(data); // 篩選出需要的數(shù)據(jù)
result = result.concat(formatData); // 拼接此次抓取到的數(shù)據(jù)
page++;
if (page <= endPage) {
// 繼續(xù)抓取下一頁
// 通過分析 url 規(guī)律坎怪,拼出下一頁的 url
let tempUrl = "https://hz.fang.lianjia.com/loupan/pg" + page;
getData(tempUrl); // 遞歸繼續(xù)抓取
} else {
fs.writeFile("url.js", "let data = " + JSON.stringify(result), err => {
if (!err) console.log("success~");
});
}
});
});
}
/**
* 處理抓取到的dom函數(shù)
* @param {dom數(shù)據(jù)} data
*/
function filter(data) {
let final = []; // 用來存儲本頁所有數(shù)據(jù)信息
//將頁面源代碼轉(zhuǎn)換為$對象
let $ = cheerio.load(data);
if (total == 0)
// 如果沒獲取過總數(shù),那么獲取一次總數(shù)
total = $(".resblock-list-container resblock-have-find span.value").text();
// 找到列表外層
let items = $(
".resblock-list-container .resblock-list-wrapper .resblock-list"
);
// 遍歷處理每一條數(shù)據(jù)
items.each((index, item) => {
let temp = {}; // 用來存儲此條數(shù)據(jù)的信息
let price;
let title = $(item)
.find("a.name")
.text()
.replace(/\s/g, "");
if (
$(item)
.find("span.desc")
.text()
.indexOf("元/平(均價)") >= 0
) {
price = $(item)
.find("span.number")
.text();
} else {
return final;
}
// 過濾萬/套的數(shù)據(jù)方便處理
let info = $(item)
.find("div.resblock-location")
.text()
.replace(/\s/g, "");
let address = info;
temp.name = title;
temp.value = price;
temp.address = address;
console.log(temp);
final.push(temp);
});
return final;
}
結(jié)果保存在該目錄的url.js文件中
image.png
2.獲取數(shù)據(jù)后需要進(jìn)行對數(shù)據(jù)的處理以獲取各個樓盤的坐標(biāo)
本項目使用百度地圖api對數(shù)據(jù)地址進(jìn)行地址解析獲得經(jīng)緯度
var myGeo = new BMap.Geocoder();
myGeo.getPoint(
item.address,
function(point) {
//point為api返回的地址經(jīng)緯度
},
"杭州市"
);
經(jīng)處理后的部分?jǐn)?shù)據(jù):
image.png
3.使用echarts+bmp顯示房價散點圖
var myChart = echarts.init(document.getElementById("container"));
myChart.setOption({
backgroundColor: "transparent",
title: {
text: "杭州樓盤房價 - 百度地圖",
left: "right"
},
tooltip: {
trigger: "item",
formatter: function(params) {
return params.data.name + ":" + params.data.value[2];
}
},
bmap: {
center: [120.15, 30.28],//顯示地圖的中心
zoom: 12,//縮放倍數(shù)
roam: true,
mapStyle: {//百度地圖樣式可在該網(wǎng)站在線編輯 [http://lbsyun.baidu.com/index.php?title=open/custom](http://lbsyun.baidu.com/index.php?title=open/custom)
styleJson: [
{
featureType: "road",
elementType: "all",
stylers: {
lightness: 20
}
},
{
featureType: "highway",
elementType: "geometry",
stylers: {
color: "#f49935"
}
},
{
featureType: "local",
elementType: "labels",
stylers: {
visibility: "off"
}
},
{
featureType: "water",
elementType: "all",
stylers: {
color: "#d1e5ff"
}
},
{
featureType: "city",
elementType: "labels",
stylers: {
visibility: "off"
}
}
]
}
},
series: [
{
name: "hzlp",
type: "scatter",
coordinateSystem: "bmap",
data: data,//設(shè)定數(shù)據(jù)源
symbolSize: function(val) {
return val[2] / 5000; //調(diào)整圓點的大小為value/5000
},
label: {
formatter: "",
position: "right"
},
itemStyle: {
color: "#ff3333"
},
emphasis: {
label: {
show: true
}
}
}
]
});
效果:
1.jpg
2.jpg
具體項目地址:
Nodejs爬取杭州鏈家樓盤數(shù)據(jù):https://github.com/kakuuuu/Node_Crawler_lj
杭州鏈家樓盤數(shù)據(jù)可視化:https://github.com/kakuuuu/houseprice-visualization