var Nightmare = require('nightmare');
// https://duckduckgo.com
function open(page) {
var nightmare = Nightmare({ show: false });
// console.log(`开始爬取=https://t0904.91zuixindizhi.com/forumdisplay.php?fid=19&page=${page}`);
nightmare
.goto(`https://t0904.91zuixindizhi.com/forumdisplay.php?fid=19&page=${page}`)
.wait('.datatable .subject,.new')
.evaluate(function () {
let list = document.querySelectorAll('.datatable .subject,.new')
let ya = []
list.forEach(e => {
let arr = e.querySelectorAll('a')
if (arr.length > 0)
if (arr[0].innerText.includes("北京")) // 在这里更改检索关键词
ya.push({ href: arr[0].href, text: arr[0].innerText })
})
return ya
})
.end()
.then(function (result) {
result.forEach(e => e['page'] = page)
if (result.length > 0)
console.log(result);
else
console.log(`第${page}页没有所需数据`);
})
.catch(function (error) {
console.error('Search failed:', error);
});
}
function sleep(time) {
return new Promise((resolve) => setTimeout(resolve, time));
}
async function run() {
let page = 462
console.time('爬取耗时:');
while (page < 1000) {
open(page)
await sleep(5000);
page++
}
console.timeEnd('爬取耗时:');
}
run()
标签:function,爬取,arr,console,ooOOOO,result,集锦,page
From: https://www.cnblogs.com/lambertlt/p/17727976.html