前言:
最近一直在看python方面的知识,突然对python对视频如何处理非常感兴趣,于是百度一下,看看网上的方法,根据兴趣,试着从网上下载一个m3u8的实例看看。
本实例为个人学习使用,试着用了线程的方法。
一、由于时间紧,没有直接从网页中获取m3u8网址,而是把m3u8地址放到excel表格中。
二、从表格中读取m3u8地址,从而获取视频真实地址进行下载。
一、多任务异步处理
# coding:utf-8
import requests
import re
from aiohttp import ClientTimeout, ClientError
from openpyxl import load_workbook
import asyncio
import aiohttp
headers = {
"User-Agent": "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) "
"Chrome/86.0.4240.198Safari/537.36 "
}
async def get_page(url, title):
print("正在下载", url)
timeout = ClientTimeout(total=5000)
async with aiohttp.ClientSession(timeout=timeout) as session:
try:
async with await session.get(url, headers=headers) as response:
page_content = await response.read()
with open(f"{title}.mp4", "ab+") as f:
f.write(page_content)
except ClientError as e:
print(e)
wk = load_workbook(r"E:\project\project03\m3u8 - 副本 (2) - 副本.xlsx")
sheet = wk.active
names = []
urls = []
for i, j in zip(sheet["A"], sheet["B"]):
names.append(i.value)
urls.append(j.value)
wk.close()
for i, j in zip(names, urls):
resp = requests.get(j, headers=headers)
data = resp.text
# print(data)
indexs_list = re.findall(r'https(.*?).ts', data)
stasks = []
for k in indexs_list:
ts_url = "https" + k + ".ts"
c = get_page(ts_url, i)
task = asyncio.ensure_future(c)
stasks.append(task)
print(f"{i}下载完成")
loop = asyncio.get_event_loop()
# 需要将任务列表封装到wait中
loop.run_until_complete(asyncio.wait(stasks))
二、普通顺序处理:
# coding:utf-8
import requests
import re
from openpyxl import load_workbook
headers = {
"User-Agent": "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) "
"Chrome/86.0.4240.198Safari/537.36 "
}
wk = load_workbook(r"E:\project\project03\m3u8 - 副本 (2).xlsx")
sheet = wk.active
for i, j in zip(sheet["A"], sheet["B"]):
# print(j.value)
print(f"正在下载{i.value}集", end=" ")
resp = requests.get(j.value, headers=headers)
data = resp.text
# print(data)
indexs_list = re.findall(r'https(.*?).ts', data)
for k in indexs_list:
ts_url = "https" + k + ".ts"
res = requests.get(ts_url, headers=headers)
data = res.content
with open(f"{i.value}.mp4", "ab+") as f:
f.write(data)
f.flush()
print("下载完毕")
wk.close()