【桌面壁纸】电脑桌面壁纸图片大全_高清壁纸背景图-ZOL桌面壁纸
import os import scrapy from fake_useragent import UserAgent class ZolMeinvSpider(scrapy.Spider): name = 'zol_meinv' base_url = 'https://desk.zol.com.cn' start_url = base_url + '/meinv/' def start_requests(self): ua = UserAgent() yield scrapy.Request(self.start_url, headers={'User-Agent': ua.random}, callback=self.parse, meta={'page_number': 1}, dont_filter=True) def parse(self, response): # 提取图片链接 img_links = response.css('.pic-list2 img::attr(src)').getall() for img_link in img_links: yield scrapy.Request(img_link, callback=self.download_image, meta={'img_link': img_link}, priority=100) # 处理翻页 current_page = response.css('span.active::text').get() next_page = response.css('a.next::attr(href)').get() prev_page = response.css('a.prev::attr(href)').get() if prev_page: prev_page_url = self.base_url + prev_page yield scrapy.Request(prev_page_url, headers={'User-Agent': UserAgent().random}, callback=self.parse, meta={'page_number': int(current_page) - 1}, dont_filter=True) if next_page: next_page_url = self.base_url + next_page yield scrapy.Request(next_page_url, headers={'User-Agent': UserAgent().random}, callback=self.parse, meta={'page_number': int(current_page) + 1}, dont_filter=True) def download_image(self, response): img_link = response.meta['img_link'] # 指定存储图片的目录 img_dir = 'downloaded_images' if not os.path.exists(img_dir): os.makedirs(img_dir) file_name = os.path.join(img_dir, img_link.split('/')[-1]) with open(file_name, 'wb') as f: f.write(response.body)
输出结果
标签:桌面壁纸,img,url,self,response,爬取,scrapy,link,page From: https://blog.csdn.net/qq_68809241/article/details/143647726