/**
- 教程:https://blog.csdn.net/Qc1998/article/details/83154558
- cheerio使用:https://www.cnblogs.com/zjx2011/p/6554772.html
*/
var cheerio = require('cheerio');
var superagent = require('superagent');
var charset = require('superagent-charset');
superagent = charset(superagent)
superagent.get('https://www.88ys.cc/')
.charset('utf8')
.end(function (err,res) {
if (err) {
console.log(err);
return err;
}
// console.log(res.text)
var $ = cheerio.load(res.text);
var content= $('.index-area ul li .name');
$element = $(content);
$element.each(function(i, elem) {//迭代
console.log($(this).text());//注意這里是this
})
})
下載圖片版本
/**
* 需要安裝對(duì)應(yīng)的npm包
* 1洪灯、$ npm install express --save
* 2、$ npm install superagent –save
* 3旗们、$ npm install superagent-charset
* 4、$ npm install cheerio
* */
// 第一步裙戏,建立基本的請(qǐng)求監(jiān)聽(tīng)
express = require('express');
var app = express();
// 第二步轮蜕,這里是請(qǐng)求及處理DOM相關(guān)的
var superagent = require('superagent');//類(lèi)似于Axios、Request捣辆、Superagent奴愉、Fetch
var charset = require('superagent-charset');//用于轉(zhuǎn)碼utf8
superagent = charset(superagent);
var cheerio = require('cheerio');//作用于服務(wù)器琅摩,類(lèi)似于前端的jquery
//第三步,這個(gè)是讀寫(xiě)文件
const fs = require('fs');
app.get('/', function (request, response, next) {
response.writeHead(200, {'Content-Type': 'text/html; charset=utf-8'});
response.write("創(chuàng)建爬蟲(chóng)<br>");
superagent.get("http://image.so.com/i?q=%E5%88%98%E4%BA%A6%E8%8F%B2&src=tab_www#/")
.charset('utf8')
.buffer(true)
.end(function (err,res) {
if (err) {
console.log(err);
return err;
}
response.write("獲得數(shù)據(jù)<br>");
// 獲得某個(gè)dom
const $ = cheerio.load(res.text);
const imgList =JSON.parse($('script[id="initData"]').html()).list;
dowmImg(imgList,'./imgs',response);
// 關(guān)閉數(shù)據(jù)流
response.end("操作完成<br>");
})
})
app.listen(3000, function () {
console.log('app is listening at port 3000');
});
/**
*下載圖片
* @param {*} [imgsArr=[]]
* @param {string} [filePath='./imgs']
*/
function dowmImg(imgList=[],filePath='./imgs',response){
if(!(imgList instanceof Array)) return;
let res;
imgList.forEach((item,index)=>{
let {title,img}=item;
if(img.indexOf("http")==-1) return;
res= superagent.get(img);
res.pipe(fs.createWriteStream(`${filePath}/${index}.png`));//這個(gè)是一個(gè)同步函數(shù)
response.write(`<a href=${img}>${title}</a><br />`)
})
}