首页 > 编程语言 >如何在Java语言中使用爬虫ip

如何在Java语言中使用爬虫ip

时间:2022-10-24 15:01:09浏览次数:51  
标签:Java String java ip 爬虫 Proxy new import password

企业客户做大数据抓取都会用到爬虫IP,质量好的爬虫IP可以让爬虫工作事半功倍,如何高效的爬取目标数据就显得尤为重要。影响这样的结果不仅仅是因为爬虫IP问题,还有可能是技术在写代码时候的优化问题。下文是有关使用java语言的代码示例可以一起看看。

Java HttpURLConnection

package com.qgproxy;

import java.io.ByteArrayOutputStream;

import java.io.InputStream;

import java.net.Authenticator;

import java.net.HttpURLConnection;

import java.net.InetSocketAddress;

import java.net.PasswordAuthentication;

import java.net.Proxy;

import java.net.URL;

class QGProxyAuthenticatorg extends Authenticator {

private String user, password;

public QGProxyAuthenticator(String user, String password) {

this.user = user;

this.password = password;

}

protected PasswordAuthentication getPasswordAuthentication() {

return new PasswordAuthentication(user, password.toCharArray());

}

}

class QGProxy {

public static void main(String args[]) {

String targetUrl = "http://jshk.com.cn";

String proxyIp = "219.151.125.106";

int proxyPort = 31615;

String authKey = "895314XY";

String password = "24D6YB309ZCB";

try {

URL url = new URL(targetUrl);

Authenticator.setDefault(new QGProxyAuthenticator(authKey, password));

InetSocketAddress socketAddress = new InetSocketAddress(proxyIp, proxyPort);

Proxy proxy = new Proxy(Proxy.Type.HTTP, socketAddress);

HttpURLConnection connection = (HttpURLConnection) url.openConnection(proxy);

byte[] response = readStream(connection.getInputStream());

System.out.println(new String(response));

} catch (Exception e) {

System.out.println(e.getLocalizedMessage());

}

}

public static byte[] readStream(InputStream inStream) throws Exception {

ByteArrayOutputStream outSteam = new ByteArrayOutputStream();

byte[] buffer = new byte[1024];

int len = -1;

while ((len = inStream.read(buffer)) != -1) {

outSteam.write(buffer, 0, len);

}

outSteam.close();

inStream.close();

return outSteam.toByteArray();

}

}

Java okhttp

package com.qgproxy;

import okhttp3.*;

import java.io.IOException;

import java.net.InetSocketAddress;

import java.net.Proxy;

import java.util.concurrent.TimeUnit;

public class QGProxy {

final static String proxyIp = "219.151.125.106";

final static Integer proxyPort = 31615;

final static String authKey = "895314XY";

final static String password = "24D6YB309ZCB";

static OkHttpClient client;

static {

Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(proxyIp, proxyPort));

Authenticator proxyAuthenticator = (route, response) -> {

String credential = Credentials.basic(authKey, password);

return response.request().newBuilder().header("Proxy-Authorization", credential).build();

};

client = new OkHttpClient().newBuilder()

.connectTimeout(10, TimeUnit.SECONDS)

.readTimeout(10, TimeUnit.SECONDS)

.proxy(proxy)

.proxyAuthenticator(proxyAuthenticator)

.connectionPool(new ConnectionPool(4, 2, TimeUnit.SECONDS))

.build();

}

public static void main(String[] args) throws IOException {

Request request = new Request.Builder().url("http://jshk.com.cn").build();

Response response = client.newCall(request).execute();

System.out.println(response.body().string());

}

}

标签:Java,String,java,ip,爬虫,Proxy,new,import,password
From: https://blog.51cto.com/u_13488918/5789799

相关文章