先创建dou_url.txt,其实不用创建也行,运行第一遍代码的时候程序会自动创建
dou_url.txt里面可以填写多个视频url即可爬取,也可以填url加文字,因为用了正则表达式匹配。。。
比如:7.12 oQK:/ 重生回来第一节课,就给老师上了一课%%漫画解说 %%漫画 %%二次元 https://v.douyin.com/LK9nmKp/ 复制此链接,打开Dou音搜索,直接观看视频!
其实原理就是正则表达式匹配url
import time
import requests
import re
from selenium.webdriver import Chrome
import os
def create_note():
if os.path.exists('dou_url.txt') == False:
open('dou_url.txt', mode='w')
print('创建 dou_url.txt 成功')
print('请在 dou_url.txt 输入内容')
return False
else:
print('dou_url.txt 已存在')
return True
def read_txt():
with open('dou_url.txt', 'r', encoding=u'utf-8') as f:
data = f.read() # 读取文件
pattern = re.compile('[a-zA-z]+://[^\s]*')
url = pattern.findall(data)
print(url)
return url
def open_url(url):
web.get(url)
time.sleep(2)
try:
while web.find_element_by_xpath('//*[@id="captcha-verify-image"]') is not None:
print(end='')
except:
print(end='')
time.sleep(1)
source = web.page_source
r = re.compile(r'<source class="" src="(.*?)" type="">')
res = r.findall(source)
res_url = 'https:' + res[0]
print(res_url)
return res_url
def download_video():
url = read_txt()
path = 'douyin'
if not os.path.exists(path):
os.mkdir(path)
for i in url:
video_url = open_url(i)
with open(path + '\\' + f"{i.split('/')[-2]}" + '.mp4', 'wb') as f:
f.write(requests.get(video_url).content)
f.close()
if __name__ == '__main__':
if create_note() == True:
web = Chrome()
download_video()
标签:批量,url,res,音视频,取抖,print,path,txt,dou
From: https://www.cnblogs.com/miyol/p/16722363.html