import scrapy
class DouSpider(scrapy.Spider):
name = "dou"
#allowed_domains = ["www.douban.com"]
start_urls = ["https://www.douban.com/doulist/113652271/"]
def parse(self, response):
div_ = response.xpath('/html/body/div[3]/div[1]/div/div[1]')
div_list = div_.xpath('./div[contains(@class, "doulist-item")]')
for div in div_list:
# print(div)
print(div)
name = div.xpath('./div/div[2]/div[2]/a/text()')[0].extract()
content = div.xpath('./div/div[2]/div[4]/text()').extract_first()
print(name ,content)
标签:xpath,www,name,scrapy,print,div,解析,数据
From: https://www.cnblogs.com/lin513/p/18048435