class FctpItem(scrapy.Item):
# define the fields for your item here like:
image_urls = scrapy.Field()
images = scrapy.Field()
from scrapy.pipelines.images import ImagesPipeline
from scrapy.http import Request
from pathlib import Path
class FctpPipeline(ImagesPipeline):
def get_media_requests(self, item, info):
image_url = item['image_urls']
yield Request(image_url,meta={'item':item})
def file_path(self, request, response=None, info=None, *, item=None):
# 修改返回值决定保存位置
item = request.meta['item']
filepath = item['images']+'.jpg'
while Path('./data/'+filepath).exists():
# 图片名重复后保存, data是保存的文件目录
filepath = filepath.replace('.jpg','_.jpg')
return filepath
def item_completed(self, results, item, info):
return item
标签:filepath,image,jpg,爬取,item,scrapy,images,图片
From: https://www.cnblogs.com/meizhengchao/p/17113924.html