此程序需要安装这些模块
一定要先创建一个叫做 ‘剑来’ 的文件夹
import asyncio
import requests
import re
import aiohttp
import aiofiles
async def aiodownload(herf,title):
headers = {
"User - Agent":
"Mozilla / 5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 121.0.0.0Safari / 537.36Edg / 121.0.0.0"
}
async with aiohttp.ClientSession() as session:
async with session.get(herf,headers=headers) as resp:
data = await resp.text()
#print(data)
obj = re.compile(r'<div id="chaptercontent" class="Readarea ReadAjax_content">(?P<content>.*?)请收藏本站:https://www.bqgui.cc')
novels = obj.finditer(data)
for i in novels:
novel = i.group("content").replace('<br /><br />', "")
async with aiofiles.open('剑来/' + title, mode='w', encoding='utf-8') as f:
await f.write(novel)
async def getherf(url):
resp = requests.get(url)
obj = re.compile(r"<dd><a href =(?P<herf>.*?)>(?P<title>.*?)</a></dd>")
results = obj.finditer(resp.text)
tasks = []
for result in results:
herf = 'https://www.bqgui.cc'+str(result.group("herf")).replace('"',"")
title = result.group("title")
print(herf,title)
d = asyncio.create_task(aiodownload(herf,title))
tasks.append(d)
await asyncio.wait(tasks)
if __name__ == '__main__':
url = 'https://www.bqgui.cc/book/1031/'
asyncio.run(getherf(url))
爬取结果
标签:__,title,resp,herf,async,import,小说,取剑来 From: https://blog.csdn.net/m0_74823576/article/details/140936521