首页 > 其他分享 >爬取动态canvas

爬取动态canvas

时间:2023-02-22 10:46:41浏览次数:34  
标签:canvas String base64 System 爬取 byte 动态 page out

之前做爬取动态canvas 是用selenuim写的

效果很不了理想

由于时间关系,让前端渲染截图转为base64发送到后端

 

最近比较有空  在网上找到了 无头浏览器  phantomjs

 

能做  比selenuim强  没打开页面渲染   但是也没强多少   内存和cpu占用还是比较高,速度也慢

 

下载phantomjs  配置环境变量   要在path路径下      不能  配用户变量  不然没效果

 

贴代码

package com.example.testmap;

import org.junit.jupiter.api.Test;

import java.io.*;
import java.util.Base64;
import java.util.UUID;


public class phantomJSTest {


    @Test
    void contextLoads() throws IOException {

        String url = "https://www.fs121.com/emergencyWarning/FoShan";

        long startTime = System.currentTimeMillis();
        String base = getAjaxContent(url);
        long endTime = System.currentTimeMillis();

        System.out.println("截图时间:"+(endTime-startTime)+"ms");

        long startBase = System.currentTimeMillis();
        this.generateImage(base,"");
        long endBase = System.currentTimeMillis();
        System.out.println("处理base64并写入磁盘时间:"+(endBase-startBase)+"ms");


        System.out.println("成功了,总耗时:"+(endBase-startTime)+"ms");
    }



    public static String getAjaxContent(String url) throws IOException {


        Process p = Runtime.getRuntime().exec("phantomjs D:\\workSoft\\phantomjs\\test\\code.js "+url);

        InputStream is = p.getInputStream();

        BufferedReader br = new BufferedReader(new InputStreamReader(is));

        StringBuffer sbf = new StringBuffer();

        String tmp = br.readLine();

        while(tmp != null){
            sbf.append(tmp);
            tmp = br.readLine();
        }

//        System.out.println(sbf.toString());
//        System.out.println(sbf.length());
        return sbf.toString();
    }


    /**
     * inputStream转byte数组
     *
     * @param inputStream 输入流对象
     * @return byte数组
     */
    public static byte[] inputStreamToByteArray(InputStream inputStream) {
        try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
            byte[] buffer = new byte[1024];
            int num;
            while ((num = inputStream.read(buffer)) != -1) {
                byteArrayOutputStream.write(buffer, 0, num);
            }
            byteArrayOutputStream.flush();
            return byteArrayOutputStream.toByteArray();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return new byte[]{};
    }





    public static String generateImage(String base64, String path) {
        // 解密
        try {
            String savePath = "/**/imgtest/";
            // 图片分类路径+图片名+图片后缀
            String imgClassPath = path.concat(UUID.randomUUID().toString()).concat(".jpg");

            imgClassPath = "C:\\Users\\HTHT\\Desktop\\testimg.png";

            // 去掉base64前缀 data:image/jpeg;base64,
//            base64 = base64.substring(base64.indexOf(",", 1) + 1);
            // 解密,解密的结果是一个byte数组
            Base64.Decoder decoder = Base64.getDecoder();
            byte[] imgbytes = decoder.decode(base64);
            for (int i = 0; i < imgbytes.length; ++i) {
                if (imgbytes[i] < 0) {
                    imgbytes[i] += 256;
                }
            }

            // 保存图片
            OutputStream out = new FileOutputStream(imgClassPath);
            out.write(imgbytes);
            out.flush();
            out.close();
            // 返回图片的相对路径 = 图片分类路径+图片名+图片后缀
            return imgClassPath;
        } catch (IOException e) {
            return null;
        }
    }

}

 

 

然后就是js代码  

 

狗日的前端  叫他帮我写这点代码写了一下午

phantom.outputEncoding = "gbk";

system = require('system')

address = system.args[1];

var page = require("webpage").create();



page.viewportSize = { width: 1024, height: 768 };
//the clipRect is the portion of the page you are taking a screenshot of
page.clipRect = { top: 200, left: 210, width: 1024, height: 600};


// page.zoomFactor = 0.1;
// page.viewportSize = {
//   width: 1920,
//   height: 1080,
// };
//var url = "https://www.fs121.com/emergencyWarning/FoShan";

var url = address;

page.open(url, function(status) {

  if(status === "success") {
    window.setTimeout(function () {
        console.log(page.renderBase64('PNG'));
        // page.render('test.png');
        phantom.exit();
    }, 1000);
  }else{
    console.log('失败了....');
  }
})

 

运行截图

 

标签:canvas,String,base64,System,爬取,byte,动态,page,out
From: https://www.cnblogs.com/bawanglong168/p/17143512.html

相关文章