#循环爬取起点中文网标签:img,text,li,爬取,ul,循环,div,id,页面 From: https://www.cnblogs.com/txa2003/p/16950789.html
import requests
from lxml import etree
ua={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'}
z = 1;
for i in range(2,5):
res=requests.get('https://www.qidian.com/all/page{}/'.format(i),headers=ua)
res.encoding='utf-8'
t=etree.HTML(res.text)
biaoti=t.xpath('//*[@id="book-img-text"]/ul/li/div[2]/h2/a/text()')
zuozhe=t.xpath('//*[@id="book-img-text"]/ul/li/div[2]/p[1]/a[1]/text()')
leixing=t.xpath('//*[@id="book-img-text"]/ul/li/div[2]/p[1]/a[2]/text()')
lianzai=t.xpath('//*[@id="book-img-text"]/ul/li/div[2]/p[1]/span/text()')
jianjie=t.xpath('//*[@id="book-img-text"]/ul/li/div[2]/p[2]/text()')
for a,b,c,d,e in zip(biaoti,zuozhe,leixing,lianzai,jianjie):
print(str(z)+" "+a+" "+b+" "+c+" "+d+" "+e+"\n")
z=z+1