点击查看代码
import urllib.request
from lxml import etree
import xlwt
# 请求地址
url = 'https://www.qidian.com/all/action1-page1'
# 用户代理
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.0.2242 SLBChan/10'
}
# 定制请求头
request = urllib.request.Request(url=url,headers=headers)
# print(request)
# 发送请求访问服务器,返回响应对象
response = urllib.request.urlopen(request)
# 解码响应对象,得到页面源码
content = response.read().decode('utf-8')
# print(content)
# 解析服务器响应的文件
parse_html = etree.HTML(content)
# 编写xpath路径,获取想要的数据,xpath的返回值是列表类型
# 小说路径:
bookurl=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/a/@href')
# 小说照片:
bookps=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/a/img/@src')
# 小说名称:
bookname=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/h2/a/text()')
# 小说作者:
bookauthor=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/p/a[1]/text()')
# 小说大类别:
booktype=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/p/a[2]/text()')
# 小说小类别:
bookmintype=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/p/a[@class="go-sub-type"]/text()')
# 小说完本:
bookend=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/p[1]/span[1]/text()')
# 小说简介:
bookcoll=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/p[@class="intro"]/text()')
# print(len(bookurl))
# print(bookps)
# print(bookname)
# print(bookauthor)
# print(booktype)
# print(bookmintype)
# print(bookend)
# print(type(bookcoll))
# datalist=[bookurl,bookps,bookname,bookauthor,booktype,bookmintype,bookend,bookcoll]
# print(len(datalist))
wookbook = xlwt.Workbook(encoding="utf-8") # 创建一个Workbook对象
worksheet = wookbook.add_sheet("起点") #创建一个一个表
col = ('小说连接', "图片链接", "小说名称", "小说作者", "小说大类别", "小说小类别", "小说完本", "小说简介")
for i in range(0,8):
worksheet.write(0,i,col[i])
for i in range(0,20):
worksheet.write(i + 1, 0, bookurl[i])
worksheet.write(i + 1, 1, bookps[i])
worksheet.write(i + 1, 2, bookname[i])
worksheet.write(i + 1, 3, bookauthor[i])
worksheet.write(i + 1, 4, booktype[i])
worksheet.write(i + 1, 5, bookmintype[i])
worksheet.write(i + 1, 6, bookend[i])
worksheet.write(i + 1, 7, bookcoll[i])
# 覆盖保存
wookbook.save(r"C:\Users\Administrator\Desktop\book.xls") #保存数据表