自定义扩展时,利用信号在指定位置注册制定操作
源码剖析:
1 from scrapy.extensions.telnet import TelnetConsole #查看TelnetConsole源码 2 3 # Enable or disable extensions 4 # See https://doc.scrapy.org/en/latest/topics/extensions.html 5 EXTENSIONS = { 6 'scrapy.extensions.telnet.TelnetConsole': None, 7 # 'test002.extensions.MyExtend':300, 8 }
查看 TelnetConsole 类:
1 class TelnetConsole(protocol.ServerFactory): 2 3 def __init__(self, crawler): 4 if not crawler.settings.getbool('TELNETCONSOLE_ENABLED'): 5 raise NotConfigured 6 if not TWISTED_CONCH_AVAILABLE: 7 raise NotConfigured 8 self.crawler = crawler 9 self.noisy = False 10 self.portrange = [int(x) for x in crawler.settings.getlist('TELNETCONSOLE_PORT')] 11 self.host = crawler.settings['TELNETCONSOLE_HOST'] 12 self.crawler.signals.connect(self.start_listening, signals.engine_started) 13 self.crawler.signals.connect(self.stop_listening, signals.engine_stopped) 14 15 @classmethod 16 def from_crawler(cls, crawler): 17 return cls(crawler) 18 19 def start_listening(self): 20 self.port = listen_tcp(self.portrange, self.host, self) 21 h = self.port.getHost() 22 logger.debug("Telnet console listening on %(host)s:%(port)d", 23 {'host': h.host, 'port': h.port}, 24 extra={'crawler': self.crawler}) 25 26 def stop_listening(self): 27 self.port.stopListening() 28 29 def protocol(self): 30 telnet_vars = self._get_telnet_vars() 31 return telnet.TelnetTransport(telnet.TelnetBootstrapProtocol, 32 insults.ServerProtocol, manhole.Manhole, telnet_vars) 33 34 def _get_telnet_vars(self): 35 # Note: if you add entries here also update topics/telnetconsole.rst 36 telnet_vars = { 37 'engine': self.crawler.engine, 38 'spider': self.crawler.engine.spider, 39 'slot': self.crawler.engine.slot, 40 'crawler': self.crawler, 41 'extensions': self.crawler.extensions, 42 'stats': self.crawler.stats, 43 'settings': self.crawler.settings, 44 'est': lambda: print_engine_status(self.crawler.engine), 45 'p': pprint.pprint, 46 'prefs': print_live_refs, 47 'hpy': hpy, 48 'help': "This is Scrapy telnet console. For more info see: " \ 49 "https://doc.scrapy.org/en/latest/topics/telnetconsole.html", 50 } 51 self.crawler.signals.send_catch_log(update_telnet_vars, telnet_vars=telnet_vars) 52 return telnet_vars
分析:
self.start_listening&self.stop_listening 是可以自定义的方法
signals.engine_started&signals.engine_stopped 是指定信号
在指定信号上注册操作
查找信号:
进入signals查看
1 engine_started = object() 2 engine_stopped = object() 3 spider_opened = object() 4 spider_idle = object() 5 spider_closed = object() 6 spider_error = object() 7 request_scheduled = object() 8 request_dropped = object() 9 response_received = object() 10 response_downloaded = object() 11 item_scraped = object() 12 item_dropped = object() 13 14 # for backwards compatibility 15 stats_spider_opened = spider_opened 16 stats_spider_closing = spider_closed 17 stats_spider_closed = spider_closed 18 19 item_passed = item_scraped 20 21 request_received = request_scheduled
根据上面源码,我们可以源码进行自定扩展:
1 from scrapy import signals 2 3 class MyExtend: 4 5 def __init__(self,crawler): 6 self.crawler = crawler 7 # 钩子上挂障碍物 8 # 在指定信号上注册操作 9 self.crawler.signals.connect(self.start,signals.engine_started) 10 self.crawler.signals.connect(self.close,signals.spider_closed) 11 12 @classmethod 13 def from_crawler(cls,crawler): 14 return cls(crawler) 15 16 def start(self): 17 print('signals.engine_started') 18 19 def close(self): 20 print('signals.spider_closed')
1 from scrapy.extensions.telnet import TelnetConsole 2 3 # Enable or disable extensions 4 # See https://doc.scrapy.org/en/latest/topics/extensions.html 5 EXTENSIONS = { 6 # 'scrapy.extensions.telnet.TelnetConsole': None, 7 'test002.extensions.MyExtend':300, 8 }标签:engine,自定义,框架,self,spider,telnet,signals,scrapy,crawler From: https://www.cnblogs.com/huangm1314/p/10440203.html