首页 > 其他分享 >爬取起点小说信息存入excel

爬取起点小说信息存入excel

时间:2022-10-21 12:55:05浏览次数:48  
标签:xpath parse img worksheet excel 存入 爬取 print class

点击查看代码
import urllib.request
from lxml import etree
import xlwt
# 请求地址


url = 'https://www.qidian.com/all/action1-page1'
# 用户代理
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.0.2242 SLBChan/10'
}
# 定制请求头

request = urllib.request.Request(url=url,headers=headers)
# print(request)
# 发送请求访问服务器,返回响应对象
response = urllib.request.urlopen(request)
# 解码响应对象,得到页面源码
content = response.read().decode('utf-8')
# print(content)
# 解析服务器响应的文件
parse_html = etree.HTML(content)

# 编写xpath路径,获取想要的数据,xpath的返回值是列表类型
# 小说路径:
bookurl=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/a/@href')
# 小说照片:
bookps=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/a/img/@src')
# 小说名称:
bookname=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/h2/a/text()')
# 小说作者:
bookauthor=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/p/a[1]/text()')
# 小说大类别:
booktype=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/p/a[2]/text()')
# 小说小类别:
bookmintype=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/p/a[@class="go-sub-type"]/text()')
# 小说完本:
bookend=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/p[1]/span[1]/text()')
# 小说简介:
bookcoll=parse_html.xpath('//ul[@class="all-img-list cf"]/li/div/p[@class="intro"]/text()')

# print(len(bookurl))
# print(bookps)
# print(bookname)
# print(bookauthor)
# print(booktype)
# print(bookmintype)
# print(bookend)
# print(type(bookcoll))

# datalist=[bookurl,bookps,bookname,bookauthor,booktype,bookmintype,bookend,bookcoll]
# print(len(datalist))

wookbook = xlwt.Workbook(encoding="utf-8") # 创建一个Workbook对象
worksheet = wookbook.add_sheet("起点") #创建一个一个表
col = ('小说连接', "图片链接", "小说名称", "小说作者", "小说大类别", "小说小类别", "小说完本", "小说简介")
for i in range(0,8):
    worksheet.write(0,i,col[i])

for i in range(0,20):
    worksheet.write(i + 1, 0, bookurl[i])
    worksheet.write(i + 1, 1, bookps[i])
    worksheet.write(i + 1, 2, bookname[i])
    worksheet.write(i + 1, 3, bookauthor[i])
    worksheet.write(i + 1, 4, booktype[i])
    worksheet.write(i + 1, 5, bookmintype[i])
    worksheet.write(i + 1, 6, bookend[i])
    worksheet.write(i + 1, 7, bookcoll[i])
# 覆盖保存
wookbook.save(r"C:\Users\Administrator\Desktop\book.xls") #保存数据表

标签:xpath,parse,img,worksheet,excel,存入,爬取,print,class
From: https://www.cnblogs.com/lzp110119/p/16813095.html

相关文章