爬取的网址:url
import requests
from lxml import etree
from time import sleep
# 爬取多页页的代码
def pageTotall(num=None):
for i in range(num): # 从0到68
if i == 0:
i = ''
page = f'index{i}.htm'
else:
page = f'index_{i}.htm'
url = f'http://www.netbian.com/mei/{page}'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
}
resp = requests.get(url=url, headers=headers)
resp.encoding = 'gbk' # 如乱码,显示中文
# print(resp.text) 打印返回的html
e = etree.HTML(resp.text)
img_urls = e.xpath('//div[@class="list"]/ul/li/a//img/@src')
img_names = e.xpath('//div[@class="list"]/ul/li/a//img/@alt')
for n, u in zip(img_names, img_urls):
# print(f'图片名:{u}\t地址:{n}')
# 图片响应
img_resp = requests.get(url=u, headers=headers)
with open(f'./img_totals/{n}.jpg', "wb") as f:
f.write(img_resp.content)
print(f'正在下载第{i}页: 图片名为:{n}')
sleep(1) # 延迟时间 防止被封
num = int(input("请输入你要打印的页数:"))
pageTotall(num)
将爬取的图片进行合成 (以后可以用的到,只需把图片放到指定文件夹即可)
# 处理图片的工具 这里我使用 pillow
from PIL import Image
import os # 自带工具,专门来处理文件
im = Image.open('img_f/咳咳.jpg') # 打开文件
print(im.size) # 大小
w,h = im.size
"""
下次合成图片只需改这里即可
"""
image_row = 4 # 生成的行
image_column = 4 # 生成的列
# print(os.listdir('./img_f'))
# 存储了所有名字
names = os.listdir('./img_f')
# for n in os.listdir('./img_f'):
# print(n)
# 新的画布
new_img = Image.new('RGB',(image_column*w,image_row*h ))
for y in range(image_row):
for x in range(image_column):
o_img = Image.open('./img_f/' + names[image_column*y + x]) # 打开要合成的图片
new_img.paste(o_img,(x*w,y*h))
new_img.save('new_img.jpg')
标签:img,合成,爬取,image,print,new,resp,图片
From: https://www.cnblogs.com/socoo-/p/16961962.html