三.代理
案例
import requests
import re
headers={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
}
def get_ip():
while 1:
url="http://www.66ip.cn/mo.php?sxb=&tqsl=5&port=&export=&ktip=&sxa=&submit=%CC%E1++%C8%A1&textarea="
resp = requests.get(url, headers=headers)
page_sourse = resp.text
obj = re.compile(r"var mediav_ad_height = '60';.*?</script>(?P<ipss>.*?)</div>", re.S)
result = obj.search(page_sourse)
ips = result.group("ipss").strip().replace("\r", "").replace("\n", "").replace("\t", "").split("<br />")[0:-1]
print(ips)
for ip in ips:
yield ip
def spider():
url=""
resp=requests.get(url,headers=headers)
while 1:
try:
proxy_ip=next(gen)
proxy={
"http": "http://" + proxy_ip,
# "https": "https://" + proxy_ip,
}
print(proxy)
resp=requests.get(url,proxies=proxy,verify=False,headers=headers)
resp.encoding="utf-8"
return resp.text
except Exception as e:
print(e)
print("报错了!")
if __name__ == '__main__':
gen=get_ip()
# for i in range(2):
page=spider()
print(page)
标签:get,ip,resp,代理,headers,proxy,print,第三方
From: https://www.cnblogs.com/ckeri/p/18109465