import re
import aiohttp
import asyncio
class Asyn:
def __init__(self):
self.__headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.48',
}
async def fetch(self, session, url):
print("Sending request:", url)
async with session.get(url, headers=self.__headers) as response:
content = await response.text()
poetry_infos = re.findall(r'textarea style=".*>(.*?)——(.*?)https://so.gushiwen.cn/shiwenv_.*.aspx</textarea>', content)
for item in poetry_infos:
print(item)
# Uncomment the lines below to write the data to a file
with open('古诗内容111.txt', 'a', encoding='utf-8') as f:
f.write(f'{item}\n')
async def main(self):
async with aiohttp.ClientSession() as session:
url = "https://so.gushiwen.org/gushi/tangshi.aspx"
html = await session.get(url, headers=self.__headers)
lsc = re.findall(r'<span><a href="/(.*)" target="_blank">.*</a>.*</span>', await html.text())
tasks = [self.fetch(session, f"https://so.gushiwen.org/{path}") for path in lsc]
await asyncio.gather(*tasks)
# Create an instance and run the main method
asyn = Asyn()
# asyncio.run(asyn.main()) # this error so use
loop = asyncio.get_event_loop()
loop.run_until_complete(asyn.main())
标签:异步,url,self,爬虫,demo2,headers,session,async,main
From: https://www.cnblogs.com/code3/p/17455710.html