本文所有教程及源码、软件仅为技术研究。不涉及计算机信息系统功能的删除、修改、增加、干扰,更不会影响计算机信息系统的正常运行。不得将代码用于非法用途,如侵立删! |
抖音个人视频列表采集
环境
- win10、mac
- Python3.9
2023.3.3更新
现在需要效验ck中的tk参数,特此更新一下
获取所需参数
def get_cookie(self):
count = 10
while count:
try:
session = requests.session()
h = session.get(url, verify=False, allow_redirects=False, timeout=20).headers
x_vc_bdturing_parameters = h.get('x-vc-bdturing-parameters')
if not x_vc_bdturing_parameters:
count -= 1
logger.info(f'提取:x_vc_bdturing_parameters 失败,重试!')
time.sleep(random.randint(3, 5))
continue
verify_data = json.loads(base64.b64decode(h.get('x-vc-bdturing-parameters')).decode("utf-8"))
fp = verify_data.get("fp")
detail = verify_data.get("detail")
logger.info(f"成功提取:{fp}, 开始验证")
try: # 有几率报错,报错重试
msg = Verify().verify(fp, detail)
logger.info(msg)
except Exception as e:
logger.info(f"{e}")
continue
if msg.get('code') != 200:
logger.info(f"{msg.get('message')},重试")
continue
logger.info(f"ck s_v_web_id:{fp}, {msg.get('message')}")
s_v_web_id = f's_v_web_id={verify_data.get("fp")};'
self.cookie = s_v_web_id
return
except Exception as e:
logger.info(f'提取:x_vc_bdturing_parameters 出错:{e}')
time.sleep(random.randint(3, 5))
continue
根据作者id获取所有视频
def get_aweme_list(self, sec_user_id):
"""
根据作者ID获取所有视频ID
"""
headers = {
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9",
"bd-ticket-guard-client-csr": "LS0tLS1CRUdJTiBDRVJUSUZJQ0FURSBSRVFVRVNULS0tLS0NCk1JSUJEekNCdFFJQkFEQW5NUXN3Q1FZRFZRUUdFd0pEVGpFWU1CWUdBMVVFQXd3UFltUmZkR2xqYTJWMFgyZDENCllYSmtNRmt3RXdZSEtvWkl6ajBDQVFZSUtvWkl6ajBEQVFjRFFnQUVWdHJwOUhyOTdwRCttcGVxcTZIZzBUanUNCnJQRVpGSVQzajBTUGFQNGVGaXRzeHU5U3U2ZWJFWHVDNDVlYkMxbExFVlBGVXNPZFF6TWlsTjFmWThDdlZxQXMNCk1Db0dDU3FHU0liM0RRRUpEakVkTUJzd0dRWURWUjBSQkJJd0VJSU9kM2QzTG1SdmRYbHBiaTVqYjIwd0NnWUkNCktvWkl6ajBFQXdJRFNRQXdSZ0loQU5WOWlTOUVzVGszem5KOFprTDVNKzNZTk11NTRRNnF6Qm5kUy9Yd1Y3b1INCkFpRUFxbERLTkcrcUMyMjBBQ1B2Z1IrVlI2VWh3RXhUOEZTS0N2LzU2clBrMmNzPQ0KLS0tLS1FTkQgQ0VSVElGSUNBVEUgUkVRVUVTVC0tLS0tDQo=",
"bd-ticket-guard-version": "2",
"cache-control": "no-cache",
"pragma": "no-cache",
"referer": "https://www.douyin.com/",
"sec-ch-ua": "^\\^Chromium^^;v=^\\^110^^, ^\\^Not",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "^\\^Windows^^",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"cookie": self.cookie,
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36"
}
params = {
"aid": "6383",
"channel": "channel_pc_web",
"sec_user_id": sec_user_id,
"max_cursor": int(time.time()) * 1000,
"locate_item_id": "7064150578586193188",
"locate_query": "false",
"show_live_replay_strategy": "1",
"count": "100",
"publish_video_strategy_type": "2",
"pc_client_type": "1",
"version_code": "170400",
"version_name": "17.4.0",
"cookie_enabled": "true",
"screen_width": "1680",
"screen_height": "1050",
"browser_language": "zh-CN",
"browser_platform": "Win32",
"browser_name": "Chrome",
"browser_version": "108.0.0.0",
"browser_online": "true",
"engine_name": "Blink",
"engine_version": "108.0.0.0",
"os_name": "Windows",
"os_version": "10",
"cpu_core_num": "4",
"device_memory": "8",
"platform": "PC",
"downlink": "10",
"effective_type": "4g",
"round_trip_time": "100",
"msToken": ""
}
x_b = self._get_xb(params=params)
print(x_b)
params['X-Bogus'] = x_b
response = self._parse_url(url, headers=headers, params=params)
# print(response.text)
# logger.info(response.text)
aweme_list = response.json().get('aweme_list')
if not aweme_list:
logger.info(f'获取失败:{sec_user_id}')
return None
aweme_ids = [i.get('aweme_id') for i in aweme_list]
descs = [i.get('desc') for i in aweme_list]
logger.info(f'成功获取{sec_user_id}视频ID列表:{len(aweme_ids)}条')
for i in zip(aweme_ids, descs):
print(i)
s_v_web_id 滑块验证,和x-b算法,之前文章有分析过,再次就不过多赘述了,有兴趣的可以去看下:https://cenjy.blog.csdn.net/article/details/126193773
效果
资源下载
https://download.csdn.net/download/qq_38154948/87527573
本文仅供学习交流使用,如侵立删! |
标签:info,视频,get,列表,aweme,抖音,sec,logger,id From: https://www.cnblogs.com/c1033383881/p/17176553.html