https://github.com/BtbN/FFmpeg-Builds/releases/tag/latest
这是ffmpeg下载地址,下载好要配置环境变量,合成视频要用到,因为B站的视频和音频是分开的
花了段时间分析的下BiliBili的网页结构,根据XHR分析出B站的视频分为音频与视频组成,分别爬取出音频与视频,在用ffmpge工具将他两整合到一起
import json
import requests
import os
from lxml import etree
import re
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
'referer': 'https://www.bilibili.com/'}
def get_requests(url, num):
res = requests.get(url, headers=headers)
print(res.url)
write_down(res.content, num)
# 当前目录下创建目录
def write_down(content, num):
path = 'BiliBili'
if not os.path.exists(path):
os.mkdir(path)
with open(path + '\\' + str(num) + '.mp4', 'wb') as f:
f.write(content)
f.close()
def get_together():
# 正则表达式给MP4取名
content = re.compile('video/([0-9a-zA-Z].*)\?')
content = content.findall(url)
if not content:
content = url.split('/')[-1]
else:
content = content[0]
pwd = os.getcwd()
os.system(
'ffmpeg -i "{0}{1}BiliBili\\3.mp4" -i "{0}{1}BiliBili\\2.mp4" -c copy "{0}{1}BiliBili\\{2}.mp4"'.format(pwd,
'\\',
content)) # 如果出现乱码,就在当前目录下cmd运行ffmpeg -i "1.mp4" -i "2.mp4" -c copy "3.mp4“,可能是你的ffmpeg版本太低
os.remove(os.getcwd() + '\\BiliBili\\' + '3.mp4')
os.remove(os.getcwd() + '\\BiliBili\\' + '2.mp4')
print('下载成功')
# 获取视频与音频的json
def get_json(url):
res = requests.get(url, headers=headers)
tree = etree.HTML(res.text)
play_info = tree.xpath('//script[4]/text()')
play_info = str(play_info)
play_info = play_info.split('=', 1)
play_info = str(play_info[1]).split('\']')
play_info_js = json.loads(play_info[0])
video_info_js = play_info_js['data']['dash']['video'][1]['baseUrl']
audio_info_js = play_info_js['data']['dash']['audio'][1]['baseUrl']
# print(video_info_js)
# print(audio_info_js)
get_requests(video_info_js, 3)
get_requests(audio_info_js, 2)
if __name__ == '__main__':
url = str(input('请输入BiliBili的视频URL:'))
get_json(url)
get_together()
标签:info,BiliBili,play,视频,get,爬取,mp4,content
From: https://www.cnblogs.com/miyol/p/16721394.html