多说无益,直接上代码
var http = require("http");
var express = require("express");
var request = require('request');
var async = require("async");
var fs = require('fs');
var iconv = require('iconv-lite');
var cheerio = require('cheerio');
var app = express();
let k = 1;
let breakNum = 1;
down(k)
async function down(i) {
let old = 0;
request({
url: "https://www.biqugex.com/book_" + i + "/",//请求路径
method: "GET",//请求方式,默认为get
headers: {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=0",
"Connection": "keep - alive",
"Host": "www.biqugex.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36",
},
encoding: null,
}, await function (err, res, body) {
//转码
let html = iconv.decode(body, "gb2312");
//小说名
let bookName = html.match(/<h2>.*<\/h2>/)[0].replace(/<(\S*?)[^>]*>.*?|<.*? \/>/g, "");
//获取小说章节元素
let textBody = html.split("</dt>")[2].split("</dl>")[0];
//拆分元素获取地址
let url = textBody.split('"');
let urlList = [];
console.log(urlList.length)
for (let i = 1; i < url.length; i += 2) {
//把地址存入数组
urlList.push("https://www.biqugex.com" + url[i])
}
console.log(urlList.length)
fs.mkdir(__dirname + "/" + bookName + "/", function (err) {
if (err) {
return console.error(err);
}
console.log("目录创建成功。");
});
async.mapLimit(urlList, 50, function (url, callback) {
request({
url: url,//请求路径
method: "GET",//请求方式,默认为get
headers: {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=0",
"Connection": "keep - alive",
"Host": "www.biqugex.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36",
},
encoding: null,
}, function (err, res, body) {
if (err) {
if (err.code == "ENOTFOUND") {
breakNum++
return
}
console.log(url)
console.log(body)
console.log(err)
}
console.log(old, urlList.length - 1)
if (old == urlList.length - breakNum) {
console.log(121212)
if (k > 100) {
return
}
k++;
down(k)
}
let htmlBody = iconv.decode(body, "gb2312");
let $ = cheerio.load(htmlBody);
//过滤本章地址
let title = $("h1").text()
let testText = $("#content").text().replace(/(https?|ftp|file):\/\/[-A-Za-z0-9+&@#\/%?=~_|!:,.;]+[-A-Za-z0-9+&@#\/%=~_|]/g, "");
//过滤笔趣阁广告
let text = testText.replace("请记住本书首发域名:www.biqugex.com。笔趣阁手机版阅读网址:m.biqugex.com", "")
old++
fs.writeFile('./' + bookName + '/' + title + ".txt", text, function (err) {
})
callback(null, url)
})
}, function (error, results) {
console.log("result :");
console.log(results);
})
})
}
app.listen(3000);
一步一个坑的走过来的,虽然不到一百行代码,写起来可真是要命