Python爬虫之爬取绝对领域美女图片
第一步:
导入模块:
import requests
from lxml import etree
第二步:定义函数:
def get_url(start_url):
response=requests.get(start_url)
data=etree.HTML(response.text)#转化成xpath能处理的对象
#print(data)
new_url_list=data.xpath('//div[@class="post-module-thumb"]/a/@href')
for url in new_url_list:
res=requests.get(url)
img_data=etree.HTML(res.text)
img_url_list=img_data.xpath('//div[@class="entry-content"]//img/@src')
for img_url in img_url_list:
#print(img_url)
file_name=img_url.split("/")[-2]+img_url.split("/")[-1]
print(file_name)
result=requests.get(img_url).content
with open("图片/"+file_name,'wb') as f:
f.write(result)
print("正在下载",file_name)
最后主体运行函数:
if __name__=="__main__":#python的程序入口
for i in range(50,78):
start_url='https://www.jdlingyu.com/tuji/hentai/gctt/page/{}'.format(i)
get_url(start_url)
完整代码:
import requests
from lxml import etree
def get_url(start_url):
response=requests.get(start_url)
data=etree.HTML(response.text)#转化成xpath能处理的对象
#print(data)
new_url_list=data.xpath('//div[@class="post-module-thumb"]/a/@href')
for url in new_url_list:
res=requests.get(url)
img_data=etree.HTML(res.text)
img_url_list=img_data.xpath('//div[@class="entry-content"]//img/@src')
for img_url in img_url_list:
#print(img_url)
file_name=img_url.split("/")[-2]+img_url.split("/")[-1]
print(file_name)
result=requests.get(img_url).content
with open("图片/"+file_name,'wb') as f:
f.write(result)
print("正在下载",file_name)
if __name__=="__main__":#python的程序入口
for i in range(50,78):
start_url='https://www.jdlingyu.com/tuji/hentai/gctt/page/{}'.format(i)
get_url(start_url)
运行成功截图如下:
我在外面自己创建一个文件夹用来存储图片: