import scrapy
class BaiduSpider(scrapy.Spider):
name = "baidu"
allowed_domains = ["baidu.com"]
start_urls = ["https://baidu.com"]
def parse(self, response):
title = response.xpath('//title/text()').get()
print(title)
yield scrapy.Request('https://baidu.com',callback=self.parse_info)
def parse_info(self, response):
title = response.xpath('//title/text()').get()
print(title)
yield scrapy.Request('https://baidu.com',callback=self.parse_info)
理论上是死循环
默认去重
import scrapy
class BaiduSpider(scrapy.Spider):
name = "baidu"
allowed_domains = ["baidu.com"]
start_urls = ["https://baidu.com"]
def start_requests(self):
for url in self.start_urls:
# dont_filter:取消去重 True继续访问,Falsa取消访问
yield scrapy.Request(url, dont_filter=True)
def parse(self, response):
title = response.xpath('//title/text()').get()
print(title)
yield scrapy.Request('https://baidu.com',callback=self.parse_info)
def parse_info(self, response):
title = response.xpath('//title/text()').get()
print(title)
yield scrapy.Request('https://baidu.com',callback=self.parse_info)
标签:baidu,parse,dont,title,self,Request,filter,scrapy,response From: https://www.cnblogs.com/jiangjiayun/p/17502989.html