import requests from bs4 import BeautifulSoup import pandas as pd import time def crawl_douban_music_top250(): data = [] base_url = "https://music.douban.com/top250" for i in range(0, 250, 25): url = f"{base_url}?start={i}" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" } response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') music_items = soup.find_all('tr', class_='item') for item in music_items: name = item.find('div', class_='pl2').a.text.strip() link = item.find('div', class_='pl2').a['href'] score = item.find('span', class_='rating_nums').text score_people = item.find('span', class_='pl').text.strip().replace('人评价', '') data.append({ "名称": name, "链接": link, "评分": score, "评分人数": score_people }) time.sleep(1) df = pd.DataFrame(data) df.to_csv('豆瓣音乐top250.csv', index=False, encoding='utf-8') if __name__ == "__main__": crawl_douban_music_top250()标签:__,Python,find,爬取,item,music,top250,class From: https://blog.csdn.net/qq_68809241/article/details/143423354