import random
from scrapy import signals
import fake_useragent
# 加载settings中的配置
from scrapy.utils.project import get_project_settings
class RandomUserAgentMiddleware:
def __init__(self):
allUA = fake_useragent.UserAgent().data_browsers
self.pcUA = []
self.mobileUA = []
for eUA in allUA:
if eUA['type'] == 'pc':
self.pcUA.append(eUA['useragent'])
elif eUA['type'] == 'mobile':
self.mobileUA.append(eUA['useragent'])
@classmethod
def from_crawler(cls, crawler):
# This method is used by Scrapy to create your spiders_news_bak.
s = cls()
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
return s
def spider_opened(self, spider):
spider.logger.info('加载随机User-Agent中间件')
def process_request(self, request, spider):
customSettings = spider.settings
user_agent_type = customSettings.get('USER_AGENT_TYPE')
if not request.headers.get(b'User-Agent', None):
if user_agent_type == "mobile":
request.headers.setdefault(b'User-Agent', random.choice(self.mobileUA))
elif user_agent_type == "pc":
request.headers.setdefault(b'User-Agent', random.choice(self.pcUA))
else:
request.headers.setdefault(b'User-Agent', random.choice(self.pcUA))
标签:中间件,self,request,spider,Agent,scrapy,User,type
From: https://blog.csdn.net/qq_35240081/article/details/140252045