一、获取IP地址
二、判断地址是否可用。并把可的地址保存。
import requests
from lxml import etree
import time
headers = {
"User-Agent": "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) "
"Chrome/86.0.4240.198Safari/537.36 "
}
url = "http://www.66ip.cn/areaindex_10/"
for page in range(1, 11):
page_url = url + str(page) + ".html"
resp = requests.get(url=page_url, headers=headers)
resp.encoding = resp.apparent_encoding
e = etree.HTML(resp.text)
ips = e.xpath("//div[1]/table//tr/td[1]/text()")
ports = e.xpath("//div[1]/table//tr/td[2]/text()")
addrs = e.xpath("//div[1]/table//tr/td[3]/text()")
# print(ips)
# print(ports)
# print(addrs)
with open("06-IP代理.txt","w",encoding="utf-8") as f:
for i,p,a in zip(ips,ports,addrs):
f.write(f"---{i}---{p}---{a}\n")
time.sleep(5)
print("保存完毕")
import requests
from lxml import etree
import time
headers = {
"User-Agent": "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) "
"Chrome/86.0.4240.198Safari/537.36 "
}
url = "http://www.66ip.cn/areaindex_10/"
for page in range(1, 11):
page_url = url + str(page) + ".html"
resp = requests.get(url=page_url, headers=headers)
resp.encoding = resp.apparent_encoding
e = etree.HTML(resp.text)
ips = e.xpath("//div[1]/table//tr/td[1]/text()")
ports = e.xpath("//div[1]/table//tr/td[2]/text()")
addrs = e.xpath("//div[1]/table//tr/td[3]/text()")
# print(ips)
# print(ports)
# print(addrs)
for ip,port in zip(ips,ports):
if ip != 'ip':
proxies_dict = {
"https": "https://" + ip + ':' + port,
}
print(proxies_dict)
try:
res = requests.get(url="https://www.baidu.com", headers=headers, proxies=proxies_dict, timeout=2)
res.encoding = res.apparent_encoding
if res.status_code == 200:
with open("06-IP代理-可用.txt","a",encoding="utf-8") as f:
f.write(f"{ip}---{port}\n")
except:
print("当前代理不可用")
time.sleep(3)
print("保存完毕")
1、xpath解析数据。
e = etree.HTML(resp.text)
ips = e.xpath("//div[1]/table//tr/td[1]/text()")
2、txt文本格式保存数据。
with open("06-IP代理.txt","w",encoding="utf-8") as f:
for i,p,a in zip(ips,ports,addrs):
f.write(f"---{i}---{p}---{a}\n")