首页 > 编程语言 >python 某音文件下载

python 某音文件下载

时间:2023-10-27 17:12:10浏览次数:51  
标签:某音 headers python res url https print com 下载

import time
# from pyquery import PyQuery as pq
import commonMethod
import datetime
import requests
import re
import os
import json

pattern_1 = '<source class="" src="//v3-web.douyinvod.com/(.{486,488}) type="">'
# pattern_2 = '<source class="" src="//v26-web.douyinvod.com/(.{486,488}) type="">'
# pattern_3 = '<source class="" src="//www.douyin.com/aweme/v1/play/(.+) type="">'

pattern_4='https://www.douyin.com/video/(\d+)'

def get_info_by_pattern(text,pattern):
    p = re.compile(pattern)
    p_res = p.findall(text)
    return p_res

def get_headdouyinvod_com():
    headers = {
        "Connection": "keep-alive",
        "Pragma": "no-cache",
        "Cache-Control": "no-cache",
        "sec-ch-ua": "\"Google Chrome\";v=\"95\", \"Chromium\";v=\"95\", \";Not A Brand\";v=\"99\"",
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": "\"Windows\"",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "Sec-Fetch-Site": "none",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-User": "?1",
        "Sec-Fetch-Dest": "document",
        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
    }
    return headers

# 获取抖音链接
def get_douyin_url(driver1,sharl_url):
    try:
        if 'https' not in sharl_url:
            sharl_url = 'https://www.douyin.com/video/' + sharl_url
        VID = ''
        p_res = get_info_by_pattern(sharl_url, pattern_4)
        if len(p_res) > 0:
            VID = p_res[0]

        # driver1 = commonMethod.getDriver('',False)
        url = 'https://www.douyin.com'
        driver1.get(url)
        # driver1.delete_all_cookies()
        list_cooke = [
            {'domain': '.douyin.com', 'expiry': 1698999663, 'httpOnly': False, 'name': 'VIDEO_FILTER_MEMO_SELECT',
             'path': '/', 'secure': False, 'value': '%7B%22expireTime%22%3A1698999663897%2C%22type%22%3A1%7D'},
           ...]

        for cook in list_cooke:
            driver1.add_cookie(cook)
        time.sleep(5)
        driver1.refresh()

        # cookies = driver1.get_cookies()
        # print(cookies)
        # sharl_url = 'https://www.douyin.com/video/7294079788010999040'
        driver1.get(sharl_url)
        time.sleep(5)
        txt = driver1.execute_script("return document.documentElement.outerHTML")
        txt = txt.replace('amp;', '')
        # txt = pq(selenium_html)
        # pattern_1 = '<source class="" src="//v3-web.douyinvod.com/(.+) type="">'
        # pattern_2 = '<source class="" src="//v26-web.douyinvod.com/(.+) type="">'

        p_res = get_info_by_pattern(txt, pattern_1)
        if len(p_res) > 0:
            p_res = p_res[0].replace('"', '')
            # print(len(p_res))
            herf1 = 'https://v3-web.douyinvod.com/' + p_res
            print(herf1)
            return herf1,VID
    except Exception as ex:
        print('获取链接发生异常:',sharl_url,ex)

    return '',''
    # p_res = get_info_by_pattern(txt, pattern_2)
    # if len(p_res) > 0:
    #     p_res = p_res[0].replace('"', '')
    #     # print(len(p_res))
    #     herf1 = 'https://v26-web.douyinvod.com/' + p_res
    #     print(herf1)
    #     return herf1,VID
    # p_res = get_info_by_pattern(txt, pattern_3)
    # if len(p_res) > 0:
    #     p_res = p_res[0].replace('"', '')
    #     print(len(p_res))
    #     herf1 = 'https://www.douyin.com/aweme/v1/play/' + p_res
    #     print(herf1)
    #     return herf1,VID
    # return '',VID

#下载抖音文件
def dowfile_v3_web_douyinvod_com(file_url, fileName):
    page_size = 1024 * 128
    # url = "https://v3-web.douyinvod.com/4643edd0f5d68ed70fd6c6681f98ecdc/653b7807/video/tos/cn/tos-cn-ve-15c001-alinc2/ossMXydUtABkQozrA7g6NnEgHfeAOn9BkDhIAz/"
    # url='https://v3-web.douyinvod.com/4643edd0f5d68ed70fd6c6681f98ecdc/653b7807/video/tos/cn/tos-cn-ve-15c001-alinc2/ossMXydUtABkQozrA7g6NnEgHfeAOn9BkDhIAz/?a=6383&ch=26&cr=3&dr=0&lr=all&cd=0%7C0%7C0%7C3&cv=1&br=893&bt=893&cs=0&ds=3&ft=GN7rKGVVywIiRZm8Zmo~1u249EAp2yfbEvrK3ugu0mo0g3&mime_type=video_mp4&qs=1&rc=ZTpoODQzZmg3NGU8NjdlN0BpamZ4dGY6Zm92bjMzNGkzM0A0MDMvMjFgNTYxNmEuY2FgYSNtNi1ncjRvYXBgLS1kLTBzcw%3D%3D&btag=e00008000&dy_q=1698392554&feature_id=46a7bb47b4fd1280f3d3825bf2b29388&l=20231027154234B07D3936DE469D034161'

    headers = get_headdouyinvod_com()
    date = datetime.datetime.now()
    strstart = date.strftime("%Y-%m-%d-%H-%M-%S")
    print('开始下载:', strstart)

    try:
        response = requests.get(file_url, headers=headers, stream=True)
        # response = requests.get(url, headers=headers, params=params, stream=True)
        if response.status_code == 200 or response.status_code == 206:
            headers = json.loads(str(response.headers).replace('"', '').replace('\'', '"'))
            print(headers)
            with(open(fileName, 'ab')) as f:
                for chunk in response.iter_content(chunk_size=512):
                    if chunk:
                        f.write(chunk)

            if 'Content-Range' in headers:
                # Content_Range = str(headers['Content-Length'])
                # total_length = int(Content_Range)
                Content_Range = str(headers['Content-Range'])
                total_length = int(Content_Range.split('/')[1])
                if total_length > page_size:
                    rest_size = total_length - page_size
                    loop_count = int(rest_size / page_size)
                    if rest_size % page_size > 0:
                        loop_count += 1
                    if loop_count > 0:
                        flag_minus = False
                        for i in range(1, loop_count + 1):
                            loop_index = i
                            if flag_minus:
                                loop_index -= 1
                            sfrom = page_size * loop_index + 1
                            sto = page_size * (loop_index + 1)
                            headers['Range'] = 'bytes=%d-%d' % (sfrom, sto)
                            print(loop_index, loop_count)
                            try:
                                # proxy_one = getIP()
                                # proxies = {"https": "https://" + proxy_one}
                                response = requests.get(url, headers=headers, stream=True)
                                if response.status_code == 200 or response.status_code == 206:
                                    headers = json.loads(str(response.headers).replace('"', '').replace('\'', '"'))
                                    # print(headers)
                                    with(open(fileName, 'ab')) as f:
                                        for chunk in response.iter_content(chunk_size=512):
                                            if chunk:
                                                f.write(chunk)
                                flag_minus = False
                            except Exception as ex:
                                print(ex)
                                time.sleep(10)
                                if str(ex).find('Max retries exceeded with url') > -1:
                                    print('Max retries exceeded with url')
                                    flag_minus = True
                            time.sleep(1)

                        date = datetime.datetime.now()
                        strend = date.strftime("%Y-%m-%d %H:%M:%S")
                        print('完成下载:', strstart, strend)

    except Exception as ex:
        print('下载文件发生异常:',file_url,ex)

# 下载抖音文件,
def down_file(sharl_url_list,strDirectory):
    '''
    下载抖音文件
    :param sharl_url: 抖音详细链接列表,类似:['https://www.douyin.com/video/7294079788010999040','https://www.douyin.com/video/7293552737067928868']
    :param strDirectory: 抖音文件存储路径,类似:D:/douyin_file_down/202310
    :return:
    '''
    # strDirectory = os.getcwd()
    # sharl_url = 'https://www.douyin.com/video/7294079788010999040'
    try:
        driver1 = commonMethod.getDriver('', False)
        for sharl_url in sharl_url_list:
            file_url, VID = get_douyin_url(driver1, sharl_url)
            # file_url, VID ='https://v3-web.douyinvod.com/4643edd0f5d68ed70fd6c6681f98ecdc/653b7807/video/tos/cn/tos-cn-ve-15c001-alinc2/ossMXydUtABkQozrA7g6NnEgHfeAOn9BkDhIAz/?a=6383&ch=26&cr=3&dr=0&lr=all&cd=0%7C0%7C0%7C3&cv=1&br=893&bt=893&cs=0&ds=3&ft=GN7rKGVVywIiRZm8Zmo~1u249EAp2yfbEvrK3ugu0mo0g3&mime_type=video_mp4&qs=1&rc=ZTpoODQzZmg3NGU8NjdlN0BpamZ4dGY6Zm92bjMzNGkzM0A0MDMvMjFgNTYxNmEuY2FgYSNtNi1ncjRvYXBgLS1kLTBzcw%3D%3D&btag=e00008000&dy_q=1698392554&feature_id=46a7bb47b4fd1280f3d3825bf2b29388&l=20231027154234B07D3936DE469D034161','7294079788010999040'
            fileName = os.path.join(strDirectory, VID + '.mp4')
            if len(file_url) > 0:
                dowfile_v3_web_douyinvod_com(file_url, fileName)
        driver1.close()
    except Exception as ex:
        print(ex)

if __name__ == "__main__":
    # strDirectory = os.getcwd()
    # 保存路径
    strDirectory = 'D:/douyin/file/202310'
    # sharl_url='https://www.douyin.com/video/7293555365818453274'
    sharl_url_list=['https://www.douyin.com/video/7293555365818453274',
                    'https://www.douyin.com/video/7293552737067928868',
                    'https://www.douyin.com/video/7293555206388780324']
    down_file(sharl_url_list, strDirectory)
View Code

 

标签:某音,headers,python,res,url,https,print,com,下载
From: https://www.cnblogs.com/shaosks/p/17792792.html

相关文章

  • Django和Vue.js是两种不同的框架,它们各自有自己的特点和用途¹。 **Django**¹: - Dja
    Django和Vue.js是两种不同的框架,它们各自有自己的特点和用途¹。**Django**¹:-Django是一个开放源代码的PythonWeb应用框架¹。-它采用了MTV(模型,视图,模板)的软件设计模式¹。-Django可以方便、快捷地创建高品质、易维护、数据库驱动的应用程序¹。-Django还包含许多功能......
  • MT管理器逆向改仿真银行app软件,提供教程和成品下载
    我这边的思路是通过MT管理器去逆向一个银行仿真APP,让里面的默认信息都自定义修改成我自己的,大佬可以不用看,但是一些新手有必要去学习一下。教程开始:1、需要的两个附件工具已经导入到了模拟器里面了2、用Mt管理器提取这个模拟器的安装包3、点查看,进入压缩包目录里4、然后点......
  • wpf webview2动态修改下载文件的下载路径 文件下载路径选择
    通过webview2下载文件时候会将文件保存在用户的默认下载目录,如果想调整成通过弹窗选择下载路径的方式则需要将默认行为做出修改。本文通过CoreWebView2_DownloadStarting这个事件来调整下载路径,基本思路为通过弹窗让用户选择需要保存的路径,如果用户取消了此操作则通过这个事件......
  • Python打不开问题解决方案大全
    在使用Python进行编程开发的过程中,我们不可避免会遇到Python打不开的问题。这些问题可能是由于环境配置、包管理和依赖文件等问题所导致的,但不管是何种原因,我们都需要解决它们才能顺利地进行工作。本文将从多个方面为大家详细介绍Python打不开问题的解决方法。一、Python环境配......
  • python3.8 debug记录
    AttributeError:module'numpy'hasnoattribute'typeDict'pip3installnumpy==1.18.5ModuleNotFoundError:Nomodulenamed'sklearn'pipinstallscikit-learn(注:安装这个库可能与已有的numpy版本冲突,需要多卸载安装几次)......
  • 如何借助python第三方库存取不同应用程序的用户名、密码
    在之前的一系列文章中,小爬分享了很多用Pywin32、uiAutomation、sapGuiScript等技术实现应用程序或者Web网站(如SAP、Excel、outLook邮件系统、OA系统)的自动化操作的文章。但是,这些文章都绕开了一个知识点:如何优雅地实现自动登录。与其说是想聊聊如何实现自动登录,其实是绕到了......
  • 攻克数字--魔方罗盘的商品榜单数据生成python代码
    ①先配置,方法同数据解析入库(https://www.cnblogs.com/gkdata/p/17792339.html)②生成python代码importsyssys.path.append(r'D:\安装包\GKData\ApiConfig')importGkGk.Guid2DB_Path=r'"D:\安装包\GKData\3rdLibs\Guid2DB.exe"'_headers_dict......
  • Python 轻松生成PDF文档
    PDF(PortableDocumentFormat)是一种常用的文档格式,具有跨平台兼容性、保真性、安全性和交互性等特点。我们日常生活工作中的合同、报告、论文等通常都采用PDF格式,以确保文档在不同的操作系统(例如Windows、Mac、Linux)和设备上被查看时都能保持外观的一致性。Python是一种高效简洁......
  • 一周学会python3基础
    3Python基础Python程序员必须确保用户提供输入,并根据输入提供输出,这样才能拥有动态应用程序。Python解释器和程序中的所有函数都可以访问用户的输入值。3.1为什么需要输入值?应用程序的生存依赖于输入值。从网络应用程序到最新的元宇宙应用程序,一切运行都依赖于用户的输入值。例......
  • 一些不错的python 特征工程包
    特征工程在机器学习中是比较重要的,而且也是比较花费时间的,而且对于不同场景的业务(序列,机器视觉,NLP)会有不同的处理方式,整理了一些日常使用比较多的工具,可以参考工具包scikit-learn 比较老牌了,提供了不少特征工程的工具包,同时也提供了不少相关的算法实现autofeat 实现上与scikit-le......