首页 > 其他分享 >利用 puppeteer 抓取实时交易信息

利用 puppeteer 抓取实时交易信息

时间:2023-03-28 19:07:48浏览次数:56  
标签:function const await 抓取 实时 puppeteer params console page

使用 python 没办法抓取 http://39.107.99.235:1008/ 这个站的交易信息。

于是就选了 puppeteer,它运行一个 headless 的 chrome 浏览器,我们可以模拟人工操作,取获取数据信息。

安装 puppeteer

首先我们建立一个叫 btcdata 的目录。
进入这个目录,用 npm init 初始化项目。
然后安装 puppeteer


mkdir btcdata
cd btcdata
npm init
npm install --save puppeteer

打开一个网页

使用 puppeteer 打开一个浏览器,在开发中把 headless 设为 false,
这时候它打开一浏览器,我们从现在开始一步一步抓取我们的数据。


const puppeteer = require('puppeteer');
const remoteHost = 'http://39.107.99.235:1008'
async functioin main() {
    const browser = await puppeteer.launch({headless: false});
    const page = await browser.newPage();
    await page.goto(`${remoteHost}/market/login.php`);
}
main()

模拟鼠标点击登陆

利用浏览器的审查元素,我们找到登陆按钮,然后模拟人工点击。


const loginBtn = await page.$('input[type=submit]');
    await loginBtn.click();

抓取 websockets 数据包

使用浏览器网络事件快照 websockets 数据包


async function capture(page) {
  const client = await page.target().createCDPSession()
  await client.send('Network.enable')
  client.on('Network.webSocketCreated', function(params){
    console.log(`创建 WebSocket 连接:${params.url}`)
  })
  client.on('Network.webSocketClosed', function(params){
    console.log(`WebSocket 连接关闭`)
  })
  client.on('Network.webSocketFrameSent', function(params){
    console.log(`发送 WebSocket 消息:${params.response.payloadData}`)
  })
  client.on('Network.webSocketFrameReceived', function(params){
    console.log(`收到 WebSocket 消息:${params.response.payloadData}`)
  })
  client.on('Network.webSocketWillSendHandshakeRequest', function(params){
    console.log(`准备发送 WebSocket 握手消息`)
  })
  client.on('Network.webSocketHandshakeResponseReceived', function(params){
    console.log(`接收到 WebSocket 握手消息`)
  })
}

    // 在 newPage 后添加
    await capture(page);

抓取普通请求数据包

使用页面 respose 事件获取请求信息


// 在newPage 后添加
  await page.on('response', async response => {
    if (response.url().startsWith(`${remoteHost}/redis.php`)){
      const text = await response.text();
      console.log(text);
    }
  });

完整应用脚本


const puppeteer = require('puppeteer');
const express = require('express');
const expressWs = require('express-ws');

const app = express();
expressWs(app);

let funcs = [];
let browser = null;

const remoteHost = 'http://39.107.99.235:1008'

app.ws('/ws', function (ws, req){
  funcs.push(ws);
  ws.on('message', function (msg) {
    console.log(msg);
  });
})

function forwardToWs(data) {
  // console.log(data);
  funcs = funcs.filter(function(ws) {
    try {
      ws.send(data)
    } catch(e) {
      console.error(e)
      return false;
    }
    return true;
  });
}

async function capture(page, callback) {
  const client = await page.target().createCDPSession()
  await client.send('Network.enable')
  client.on('Network.webSocketCreated', function(params){
    console.log(`创建 WebSocket 连接:${params.url}`)
  })
  client.on('Network.webSocketClosed', function(params){
    console.log(`WebSocket 连接关闭`)
  })
  client.on('Network.webSocketFrameSent', function(params){
    console.log(`发送 WebSocket 消息:${params.response.payloadData}`)
  })
  client.on('Network.webSocketFrameReceived', function(params){
    // console.log(`收到 WebSocket 消息:${params.response.payloadData}`)
    callback(params.response.payloadData);
  })
  client.on('Network.webSocketWillSendHandshakeRequest', function(params){
    console.log(`准备发送 WebSocket 握手消息`)
  })
  client.on('Network.webSocketHandshakeResponseReceived', function(params){
    console.log(`接收到 WebSocket 握手消息`)
  })
}

async function otherPage(browser, marketid) {
  const page = await browser.newPage();
  await capture(page, forwardToWs)
  await page.goto(`${remoteHost}/market/market.php?marketid=${marketid}`);
}

async function start() {
  browser = await puppeteer.launch({headless: false});
  const page = await browser.newPage();
  await capture(page, forwardToWs)
  await page.goto(`${remoteHost}/market/login.php`);
  const loginBtn = await page.$('input[type=submit]');
  await loginBtn.click();
  await otherPage(browser, 2);
  await otherPage(browser, 3);
  await otherPage(browser, 9);
  await otherPage(browser, 4);
  await otherPage(browser, 7);
}

app.post('/start', async function(req, res) {
  await start();
  res.json({'result': 'OK'});
});

app.post('/stop', async function(req, res) {
  await browser.close();
  res.json({'result': 'OK'});
});

app.post('/restart', async function(req, res) {
  await browser.close();
  await start();
  res.json({'result': 'OK'});
});

app.get('/fetch', async function(req, res) {
  const code = req.query.code;
  const type = req.query.type || 'line'
  const interval = req.query.interval || '1m';
  const page = await browser.newPage();
  await page.on('response', async response => {
    if (response.url().startsWith(`${remoteHost}/redis.php`)){
      const text = await response.text();
      const idx = text.indexOf('[[');
      res.send(text.substring(idx, text.length - 1));
      await page.close()
    }
  });
  await page.goto(`${remoteHost}/market/page.php?code=${code}&type=${type}&interval=${interval}`);
})

app.listen('127.0.0.1', 8080);



作者:Lupino

链接:https://www.jianshu.com/p/deff4bb94927

来源:简书

著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。

标签:function,const,await,抓取,实时,puppeteer,params,console,page
From: https://blog.51cto.com/u_16036560/6155223

相关文章