#循环爬取起点中文网并存储标签:存储,img,text,li,爬取,ul,div,id,页面 From: https://www.cnblogs.com/txa2003/p/16950790.html
import requests
from lxml import etree
ua={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'}
z = 1;
f=open("qidian.txt",'w',encoding="utf-8") #
for i in range(2,5):
res=requests.get('https://www.qidian.com/all/page{}/'.format(i),headers=ua)
res.encoding='utf-8'
t=etree.HTML(res.text)
biaoti=t.xpath('//*[@id="book-img-text"]/ul/li/div[2]/h2/a/text()')
zuozhe=t.xpath('//*[@id="book-img-text"]/ul/li/div[2]/p[1]/a[1]/text()')
leixing=t.xpath('//*[@id="book-img-text"]/ul/li/div[2]/p[1]/a[2]/text()')
lianzai=t.xpath('//*[@id="book-img-text"]/ul/li/div[2]/p[1]/span/text()')
jianjie=t.xpath('//*[@id="book-img-text"]/ul/li/div[2]/p[2]/text()')
for a,b,c,d,e in zip(biaoti,zuozhe,leixing,lianzai,jianjie):
f.write(str(z)+" "+a+" "+b+" "+c+" "+d+" "+e+"\n")
z=z+1
f.close()