# 从Selenium和concurrent.futures导入必要的模块和类 from selenium import webdriver from selenium.webdriver.chrome.options import Options from concurrent.futures import ThreadPoolExecutor import concurrent.futures.thread # 定义一个函数,用于捕获整个页面的截图 def capture_full_page_screenshot(url, save_path): # 设置Chrome选项以无头模式运行(不显示浏览器窗口) chrome_options = Options() chrome_options.add_argument("--headless") chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") # 使用指定选项初始化Chrome WebDriver实例 driver = webdriver.Chrome(options=chrome_options) try: # 导航到指定的URL driver.get(url) # 定义一个lambda函数,用于获取整个网页的高度 S = lambda X: driver.execute_script('return document.body.parentNode.scroll'+X) # 设置窗口大小以捕获整个网页 driver.set_window_size(1280, 10000) # 将网页的截图保存到指定路径 driver.save_screenshot(save_path) # 打印消息,指示截图的保存位置 print(f"截图已保存至:{save_path}") finally: # 退出WebDriver实例以释放资源 driver.quit() # 定义一个函数,使用并发执行从文件中的URL列表捕获截图 def capture_screenshots_from_file(url_list): # 使用ThreadPoolExecutor以并发方式执行每个URL的capture_full_page_screenshot with ThreadPoolExecutor() as executor: # 使用字典存储future对象及其相应的URL futures = {executor.submit(capture_full_page_screenshot, url, f"{url.split('//')[-1].replace('/', '_')}.png"): url for url in url_list} # 遍历已完成的future for future in concurrent.futures.as_completed(futures): url = futures[future] try: # 获取已完成future的结果(或者在执行过程中出现异常时引发异常) future.result() except Exception as e: # 如果执行过程中出现异常,则打印错误消息 print(f"捕获 {url} 的截图时出现错误:{e}") # 示例用法 # 从文件中读取URL websites_file_path = "websites.txt" with open(websites_file_path, 'r') as file: urls = [line.strip() for line in file] # 使用URL列表调用capture_screenshots_from_file函数 capture_screenshots_from_file(urls)
标签:futures,url,截取,编程,----,URL,file,options,chrome From: https://www.cnblogs.com/GKLBB/p/17853387.html