在外网偷偷爬点小说资源
import puppeteer from 'puppeteer-core' import fs from 'fs' (async () => { const userAgent="Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1" const browser = await puppeteer.launch({ // args: [`--user-agent=${userAgent}`], headless: false, userDataDir:'/Users/caoke/chromedefault', 'executablePath':'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome' }); const page = await browser.newPage(); // page.setUserAgent(userAgent) // page.setViewport({ // width:390, // height:844, // }) page.setViewport({ width:1024, height:768, }) const data={} let url='https://www.xxx.com/novel/intro?id=1018852882323214336' await page.goto(url); data.url=url; await page.waitForSelector('#root > div.intro_box > div.content_box > div.novel_box > ul.intro > li:nth-child(3) > div') data.name = await page.$eval('#root > div.intro_box > div.content_box > div.novel_box > ul.intro > li:nth-child(1) > div.info > div.title > h1', el => el.innerText); data.author = await page.$eval('#root > div.intro_box > div.content_box > div.novel_box > ul.intro > li:nth-child(1) > div.info > div.author', el => el.innerText); data.tags = await page.$eval('#root > div.intro_box > div.content_box > div.novel_box > ul.intro > li:nth-child(1) > div.info > div.tag', el => { const tags = [] el.querySelectorAll('a').forEach(function (node){ tags.push(node.innerText) }) return tags }); data.intro = await page.$eval('#root > div.intro_box > div.content_box > div.novel_box > ul.intro > li:nth-child(3) > div > h2', el => el.innerText); console.log(data) data.list=[] const nList = await page.$eval('#root > div.intro_box > div.content_box > div.novel_box > ul.catalog', el => { const nodeList=el.querySelectorAll('a') const ans=[] nodeList.forEach(node => { ans.push({ href:node.href, title:node.innerText, }) }) return ans }); for(let i=0;i<nList.length;i++){ const url=nList[i].href await page.goto(url); const item={} await page.waitForSelector('#content > div.chapter > div.title_box > span') item.title = await page.$eval('#content > div.chapter > div.title_box > span', el => el.innerText); item.info = await page.$eval('.article', el => el.innerText); data.list.push(item) console.log(data) } fs.writeFileSync(data.name+'.json',JSON.stringify(data,null,2)) // await browser.close(); })();
标签:box,el,浏览器,await,爬虫,intro,div,小说,page From: https://www.cnblogs.com/caoke/p/18349819