import requests from bs4 import BeautifulSoup, NavigableString, Tag from fake_useragent import UserAgent BASE_URL = "https://ww1.gogoanime2.org" #BASE_URL="https://search.bilibili.com" def search_scraper(anime_name: str) -> list: """ 根据动漫名称搜索并返回动漫列表。 Args: anime_name (str): 动漫名称。 Returns: list: 包含动漫信息的列表。 """ search_url = f"{BASE_URL}/search/{anime_name}" #search_url = f"https://search.bilibili.com/all?keyword={anime_name}&search_source=1" response = requests.get( search_url, headers={"UserAgent": UserAgent().chrome} ) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") anime_ul = soup.find("ul", {"class": "items"}) if anime_ul is None or isinstance(anime_ul, NavigableString): msg = f"未找到名为 {anime_name} 的任何动漫" raise ValueError(msg) anime_li = anime_ul.children anime_list = [] for anime in anime_li: if isinstance(anime, Tag): anime_url = anime.find("a") if anime_url is None or isinstance(anime_url, NavigableString): continue anime_title = anime.find("a") if anime_title is None or isinstance(anime_title, NavigableString): continue anime_list.append({"title": anime_title["title"], "url": anime_url["href"]}) return anime_list def search_anime_episode_list(episode_endpoint: str) -> list: """ 根据动漫 URL 获取动漫的剧集列表。 Args: episode_endpoint (str): 动漫的剧集列表的 URL。 Returns: list: 包含剧集信息的列表。 """ request_url = f"{BASE_URL}{episode_endpoint}" response = requests.get(url=request_url, headers={"UserAgent": UserAgent().chrome}) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") episode_page_ul = soup.find("ul", {"id": "episode_related"}) if episode_page_ul is None or isinstance(episode_page_ul, NavigableString): msg = f"未找到任何名为 {anime_name} 的动漫剧集" raise ValueError(msg) episode_page_li = episode_page_ul.children episode_list = [] for episode in episode_page_li: if isinstance(episode, Tag): url = episode.find("a") if url is None or isinstance(url, NavigableString): continue title = episode.find("div", {"class": "name"}) if title is None or isinstance(title, NavigableString): continue episode_list.append( {"title": title.text.replace(" ", ""), "url": url["href"]} ) return episode_list def get_anime_episode(episode_endpoint: str) -> list: """ 获取动漫剧集的观看和下载链接。 Args: episode_endpoint (str): 剧集的 URL。 Returns: list: 包含观看和下载链接的列表。 """ episode_page_url = f"{BASE_URL}{episode_endpoint}" response = requests.get( url=episode_page_url, headers={"User-Agent": UserAgent().chrome} ) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") url = soup.find("iframe", {"id": "playerframe"}) if url is None or isinstance(url, NavigableString): msg = f"未找到 {episode_endpoint} 的观看和下载链接" raise RuntimeError(msg) episode_url = url["src"] if not isinstance(episode_url, str): msg = f"未找到 {episode_endpoint} 的观看和下载链接" raise RuntimeError(msg) download_url = episode_url.replace("/embed/", "/playlist/") + ".m3u8" return [f"{BASE_URL}{episode_url}", f"{BASE_URL}{download_url}"] if __name__ == "__main__": anime_name = input("输入动漫名称:").strip() anime_list = search_scraper(anime_name) print("\n") if len(anime_list) == 0: print("未找到该名称的任何动漫") else: print(f"找到 {len(anime_list)} 个结果:") for i, anime in enumerate(anime_list): anime_title = anime["title"] print(f"{i+1}. {anime_title}") anime_choice = int(input("\n请选择列表中的一个:").strip()) chosen_anime = anime_list[anime_choice - 1] print(f"您选择了 {chosen_anime['title']}。正在搜索剧集...") episode_list = search_anime_episode_list(chosen_anime["url"]) if len(episode_list) == 0: print("未找到该动漫的任何剧集") else: print(f"找到 {len(episode_list)} 个结果:") for i, episode in enumerate(episode_list): print(f"{i+1}. {episode['title']}") episode_choice = int(input("\n请选择一个剧集:").strip()) chosen_episode = episode_list[episode_choice - 1] print(f"您选择了 {chosen_episode['title']}。正在搜索...") episode_url, download_url = get_anime_episode(chosen_episode["url"]) print(f"\n要观看,请使用Ctrl +单击 {episode_url}。") print(f"要下载,请使用Ctrl +单击 {download_url}。")
需要安装 pip install fake_useragent , bs4 运行结果: 标签:episode,获取,url,title,list,动漫,anime,播放 From: https://www.cnblogs.com/mlhelloworld/p/18000865