作为新手小白学习爬虫,重要的就是实战经验,爬虫语言有多种,今天我们就通过python语言来做爬虫视频,下面的代码值得大家借鉴参考。
# coding=utf-8标签:__,pcursor,typename,python,爬虫,url,json,新手,data From: https://blog.51cto.com/u_13488918/5992184
import json
import os.path
import pprint
import requests
def get_page(pcursor):
path = 'video/'
if not os.path.exists(path):
os.mkdir(path)
# 爬取对象'https://www.kuaishou.com/profile/3xhv7zhkfr3rqag'
"""
ctrl+r 批量替换
https://www.kuaishou.com/short-video/3xw5fmcf9jdap29?authorId=3xhv7zhkfr3rqag&streamSource=profile&area=profilexxnull
https://www.kuaishou.com/short-video/3xf98wc5q2cuxtq?authorId=3xhv7zhkfr3rqag&streamSource=profile&area=profilexxnull
"""
url = 'https://www.kuaishou.com/graphql'
headers = {
'content-type': 'application/json',
'Cookie': 'kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; did=web_72314bf978cb158dd7034b2370d2ae70',
'Host': 'www.kuaishou.com',
'Origin': 'https://www.kuaishou.com',
'Referer': 'https://www.kuaishou.com/short-video/3x6v3xmcjsd5cki?authorId=3xhv7zhkfr3rqag&streamSource=profile&area=profilexxnull',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36',
}
data = {
"operationName": "visionProfilePhotoList",
"query": "query visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n type\n author {\n id\n name\n following\n headerUrl\n headerUrls {\n cdn\n url\n __typename\n }\n __typename\n }\n tags {\n type\n name\n __typename\n }\n photo {\n id\n duration\n caption\n likeCount\n realLikeCount\n coverUrl\n coverUrls {\n cdn\n url\n __typename\n }\n photoUrls {\n cdn\n url\n __typename\n }\n photoUrl\n liked\n timestamp\n expTag\n animatedCoverUrl\n stereoType\n videoRatio\n profileUserTopPhoto\n __typename\n }\n canAddComment\n currentPcursor\n llsid\n status\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n",
"variables": {"userId": "3xhv7zhkfr3rqag", "pcursor": pcursor, "page": "detail", "webPageArea": "profilexxnull"}
}
rsp = requests.post(url=url, json=data, headers=headers)
# 第一种方式转成json
# json_data = json.loads(rsp.text)
# 或者
json_data = rsp.json()
# print(json_data, type(json_data))
url_list = json_data['data']['visionProfilePhotoList']['feeds']
pcursor = json_data['data']['visionProfilePhotoList']['pcursor']
# print(url_list)
# pprint.pprint(url_list)
for key in url_list:
# 视屏标题
title = key['photo']['caption']
# print(title)
# 视频url
new_url = key['photo']['photoUrl']
# print(title, new_url)
# 发送请求
content_data = requests.get(url=new_url).content
# 保存目录
with open(f'video/{title}.mp4', mode='wb') as f:
f.write(content_data)
print(f'=======================正在下载标题为 {title} 的快手短视频==========================')
if pcursor != "no_more":
get_page(pcursor)
get_page("")