from selenium import webdriver标签:fetched,keyword,links,driver,爬虫,花瓣,save,dir From: https://www.cnblogs.com/zly324/p/17749575.html
from selenium.webdriver.common.by import By
import time
import os
BASE_URL = "https://huaban.com/search?q={keyword}&sort=all&type=board&category=industrial_design"
def search_and_save_links(driver, keyword, save_dir):
driver.get(BASE_URL.format(keyword=keyword))
fetched_links = set()
prev_num_links = -1
# 滚动页面直到没有新的链接为止
while len(fetched_links) > prev_num_links:
prev_num_links = len(fetched_links)
# 获取所有符合条件的链接
elements = driver.find_elements(By.XPATH, '//a[contains(@class, "UwLo0Up5") and starts-with(@href, "/boards/")]')
for ele in elements:
link = ele.get_attribute('href')
fetched_links.add(link)
# 滚动
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(5) # 增加等待时间
# 保存链接到txt文件
with open(os.path.join(save_dir, f"{keyword}.txt"), "w", encoding="utf-8") as file:
for link in fetched_links:
file.write(link + "\n")
def main():
# 获取关键词、保存地址
keywords = input("请输入搜索关键词,用','隔开:").split(",")
save_dir = input("请输入保存地址,例如:C:\\Users\\皮皮\\Desktop:")
if not os.path.exists(save_dir):
os.makedirs(save_dir)
# 统一浏览器初始化
driver = webdriver.Chrome()
for keyword in keywords:
keyword = keyword.strip()
try:
search_and_save_links(driver, keyword, save_dir)
except Exception as e:
print(f"Error occurred while fetching links for keyword {keyword}: {e}")
driver.quit()
if __name__ == "__main__":
main()