练手的项目,搜索的资源大部分都是枪版的。。。
import mechanicalsoup
import re
import json
def extract_episode_number(label):
match = re.search(r"(\d+)[^0-9]*集", label)
if match:
return int(match.group(1))
else:
return 1
def get_movie(keyword, pages=1):
browser = mechanicalsoup.StatefulBrowser()
movie_links = []
for page in range(1, pages+1):
url = f'https://vidhub.in/search/{keyword}/page/{page}/'
browser.open(url)
titles = browser.get_current_page().select('.title a')
jis = browser.get_current_page().select('.label')
print("第 "+str(page)+" 页")
for title,ji in zip(titles,jis):
href = title['href']
text = title.text
href = href.replace('/detail/', '/watch/').replace('.html', '/1.html')
print(f'电影名: {text}')
for i in range(extract_episode_number(str(ji))+1):
if i>1:
print("第"+str(i)+"集")
new_href= href.replace('/1.html', f'/{i+1}.html')
browser.open(new_href)
page_content = browser.get_current_page().prettify()
pattern = re.compile(r'const playUrls = ({.*?});', re.DOTALL)
match = pattern.search(page_content)
if match:
playUrls = match.group(1)
urls_dict = json.loads(playUrls)
print("播放列表:")
for key, value in urls_dict.items():
bjm3u8_link = value.replace('\\/', '/')
print(bjm3u8_link)
movie_links.append(bjm3u8_link)
print("*" * 10)
return movie_links
#搜索
get_movie("周处除三害", pages=1)#关键词和页数
把搜索到的资源地址选一个替换下方的src路径
<!DOCTYPE html>
<html>
<head>
<link href="https://vjs.zencdn.net/7.16.0/video-js.min.css" rel="stylesheet">
<script src="https://vjs.zencdn.net/7.16.0/video.min.js"></script>
</head>
<body>
<video id="my-video" class="video-js vjs-default-skin" controls></video>
<script>
var player = videojs("my-video");
var options = {
sources: [{
src: "https://vod12.xmyysw.com/20240114/8JDWrBUn7v3/index.m3u8",
type: "application/x-mpegURL"
}]
};
player.ready(function() {
player.src(options.sources);
});
</script>
</body>
</html>
标签:在线,get,python,href,搜索,print,page,match,browser
From: https://www.cnblogs.com/qcy-blog/p/18108831