# @Author:林云
# @Time:2022/11/20 18:05
# @File:KuGouYinyue.py
# @Project:PycharmProjects
import json
import os
from time import sleep
import requests
from lxml import etree
# 伪装
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.52'
}
# 共有500首歌曲,每个页面有22首歌曲,所以需要循环500/22->23次
for n in range(23):
# 摘取并拼凑出每个页面的url地址
home_url = f'https:www.kugou.com/yy/rank/home/{n + 1}-8888.html?from=homepage'
# 获取对应页面的文本数据
home_res = requests.get(url=home_url, headers=headers).text
# print(home_res)
e = etree.HTML(home_res)
# 通过xpath对文本数据进行解析,获取包含有相应页面所有hash值和album_id值的数据
ms_url = e.xpath('//script[@type="text/javascript"]/text()')
# print(type(ms_url),len(ms_url))
# print(ms_url)
for list in ms_url:
# 去除数据中的换行
list = list.strip()
# print(list)
# 提取包含hash值和album_id值的字节集合,并将其中的"-"转换为unicode编码:"\\u002d"
list = list[list.index('[{'):list.rindex(';')].replace('-', '\\u002d').encode('utf-8')
# print(list)
# 将unicode编码的数据转换为中文的json数据
list = list.decode("unicode_escape")
# print(list)
# 将JSON数据并将其转换为字典
hash = json.loads(list)
# print(len(hash))
# print(hash,type(hash))
for i in range(len(hash)):
h = hash[i]
# 获取每个歌曲对应的hash值、album值、FileName值
m_h = h.get('Hash')
m_id = h.get('album_id')
name = h.get('FileName')
# print(m_h,m_id,name)
# 将hash值、album值代入对应歌曲播放页面的url地址
music_url = f'https://wwwapi.kugou.com/yy/index.php?r=play/getdata&hash={m_h}' \
f'&mid=e496ef938c1254f6efccb2e7cbccd1fb&album_id={m_id}'
# print(music_url)
music_res = requests.get(music_url, headers).text
music_res = json.loads(music_res)
# 获取对应歌曲的播放源地址
ms_url = music_res['data'].get('play_url')
# print(ms_url)
ms_res = requests.get(ms_url, headers)
i = i + 1
count = (len(hash) * n) + i
# print(count)
if count < 10:
count = '00' + str(count)
elif 10 <= count < 100:
count = '0' + str(count)
else:
count = count
if not os.path.exists('./musicTop500'):
os.makedirs('./musicTop500')
with open(f'./musicTop500/{count}-{name}.mp3', 'wb') as f:
f.write(ms_res.content)
print(f'{name}下载完成!!!')
# 停顿时间(s 秒)可以增大一些,不然容易被封
sleep(2)
标签:hash,Python,res,list,print,url,ms,取酷,Top500
From: https://www.cnblogs.com/yangSad/p/16911012.html