编写代码时遇到的问题
- 写入txt(一开始忘记了tuple类型需要转换为str)
- 处理逻辑(一开始并没有将 body与url绑定到一起,所以或返回所有的url)
- 关闭太早(这点是有点疑惑的,难道不用关闭文本么?加了一个f.close就错了)
- 匹配时忘记了大小写
import requests from bs4 import BeautifulSoup import pandas as pd from openpyxl import Workbook import concurrent.futures # 读取 .txt 文件中的 URL with open("urls.txt", "r") as file: urls = file.read().splitlines() # 存储 URL data = [] def detect_cvss(url): try: response = requests.get(url) soup = BeautifulSoup(response.text, "html.parser") body = soup.find("body").text tuple=(url,body) if "xxxx" not in tuple[1]: print(tuple[0]) return (tuple[0]) except: return None with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor: futures = [executor.submit(detect_cvss, url) for url in urls] for future in concurrent.futures.as_completed(futures): result = future.result() if result: data.append(result) #将 URL 和 title 写入 txt 文件 with open("no_xxxx.txt", 'w') as f: for i in data: f.write(i + '\n') f.close();
标签:futures,tuple,URL,关键词,url,import,txt From: https://www.cnblogs.com/BreakLAC/p/17102404.html