爬虫实战---爬取图片
import requests
import re
for page in range(1,11):
if page==1:
url="http://www.netbian.com/meinv/index.htm"
else:
url=f'http://www.netbian.com/meinv/index_{page}.htm'
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.178.400 QQBrowser/11.2.5170.400'
}
response=requests.get(url,headers=headers)
response.encoding=response.apparent_encoding
img_info=re.findall('<a href="(.*?)" title=".*?" target="_blank"><img src=".*?" alt="(.*?)" />',response.text)
#(.*?)表示我们要的信息,.*?表示要查找的信息
for link,title in img_info:
link_url='http://www.netbian.com'+link
response_1 = requests.get(url=link_url,headers=headers)
response_1.encoding=response_1.apparent_encoding
img_url=re.findall('target="_blank"><img src="(.*?)" alt=".*?"',response_1.text)[0]
print(img_url)
img_content = requests.get(url=img_url,headers=headers).content
with open('img\\'+title+'.jpg',mode='wb') as f:
f.write(img_content)
标签:爬取,www,encoding,url,爬虫,---,headers,link,response
From: https://www.cnblogs.com/xiaopixiong/p/17172928.html