通过爬虫方式获取小红书授权登录的cookie的代码

标签：code 小红书爬虫 content url qr cookie id

1、代码里的normal_sign.js代码是某书签名算法xs,xt的实现-CSDN博客里的;

2、CookieUtil工具代码见抖音最新bd-ticket-guard-client-data逆向方法（2024年11月）-CSDN博客里的CookieUtil.py；

import json
import time
import zlib
from urllib.parse import urlparse

import execjs
import requests
import random
import uuid
import os
import qrcode
import hashlib
from fake_useragent import UserAgent

from CookieUtil import CookieUtil

normal_js = execjs.compile(open(r"xiaohongshu/normal_sign.js", "r", encoding="utf-8").read())


def get_platform_code():
    # PlatformCode.MacOs; // 3
    # PlatformCode.Android; // 2
    # PlatformCode.iOS;    // 1
    # PlatformCode.Linux;  // 4
    # PlatformCode.other;  // 5
    return 3

def gen_random_string(length):
    CHARSET = '0123456789abcdefghijklmnopqrstuvwxyz'
    return ''.join(random.choice(CHARSET) for _ in range(length))

def generate_local_id():
    LOCAL_ID_SECRET_VERSION = "0"
    platform_code = get_platform_code()
    timestamp_hex = hex(int(time.time() * 1000))[2:]  # 获取当前时间的十六进制字符串
    random_str = gen_random_string(30)
    u = f"{timestamp_hex}{random_str}{platform_code}{LOCAL_ID_SECRET_VERSION}000"
    crc32_value = zlib.crc32(u.encode('utf-8')) & 0xffffffff
    return (u + str(crc32_value))[:52]

def get_gid_acw_tc(user_agent):
    url = "https://www.xiaohongshu.com/api/sec/v1/shield/webprofile"

    headers = {
        'origin': 'https://creator.xiaohongshu.com',
        'referer': 'https://creator.xiaohongshu.com/',
        'user-agent': user_agent,
        'content-type': 'application/json;charset=UTF-8'
    }

    data = {
        "platform": "Mac OS",
        "sdkVersion": "3.7.8",
        "svn": "2",
        "profileData": ""
    }

    json_str = json.dumps(data, separators=(',', ':'), ensure_ascii=False)
    response = requests.post(url, headers=headers, data=json_str)
    cookies = response.cookies
    return cookies.get("gid"), cookies.get("acw_tc")

def create_qr_code(qr_code_id):
    url = f"https://customer.xiaohongshu.com/loginconfirm?fullscreen=true&sceneId=sso&qrCodeId={qr_code_id}"
    qr = qrcode.QRCode(
        version=1,  # 控制QR码的大小（1为最小）
        error_correction=qrcode.constants.ERROR_CORRECT_L,  # 容错率
        box_size=10,  # 每个“点”的像素大小
        border=4,  # 边框的宽度
    )

    qr.add_data(url)
    qr.make(fit=True)

    img = qr.make_image(fill="black", back_color="white")
    img.save("xiaohongshu_qrcode.png")
    os.system("open xiaohongshu_qrcode.png")

def get_qr_code(cookie_content, user_agent):
    request_url = "https://customer.xiaohongshu.com/api/cas/customer/web/qr-code"

    data = {
        "service": "https://creator.xiaohongshu.com"
    }

    parsed_url = urlparse(request_url)
    uri_with_query = parsed_url.path + ('?' + parsed_url.query if parsed_url.query else '')

    cookie_dict = CookieUtil.cookies_to_dict(cookie_content)
    a1 = cookie_dict['a1']

    xs = normal_js.call("get_xs_xt", uri_with_query, data, a1)

    headers = {
        'user-agent': user_agent,
        'x-s': xs['X-s'],
        'x-t': str(xs['X-t']),
        'Cookie': cookie_content,
        'content-type': 'application/json;charset=UTF-8'
    }

    print(headers['x-s'])
    print(headers['x-t'])

    json_str = json.dumps(data, separators=(',', ':'), ensure_ascii=False)
    response = requests.post(request_url, headers=headers, data=json_str)
    print(response.status_code)
    jsonObj = json.loads(response.text)
    print(json.dumps(jsonObj, indent=4, ensure_ascii=False))
    qr_code_id = jsonObj['data']['id']
    # 将二维码保存为本地图片，方便扫码测试
    create_qr_code(qr_code_id)
    return qr_code_id


def check_login_status(cookie_content, user_agent, qr_code_id):
    request_url = f"https://customer.xiaohongshu.com/api/cas/customer/web/qr-code?service=https:%2F%2Fcreator.xiaohongshu.com&qr_code_id={qr_code_id}&source="

    parsed_url = urlparse(request_url)
    uri_with_query = parsed_url.path + ('?' + parsed_url.query if parsed_url.query else '')

    cookie_dict = CookieUtil.cookies_to_dict(cookie_content)
    a1 = cookie_dict['a1']

    while True:
        xs = normal_js.call("get_xs_xt", uri_with_query, '', a1)

        headers = {
            'origin': 'https://creator.xiaohongshu.com',
            'referer': 'https://creator.xiaohongshu.com/',
            'user-agent': user_agent,
            'x-s': xs['X-s'],
            'x-t': str(xs['X-t']),
            'Cookie': cookie_content
        }
        response = requests.get(request_url, headers=headers)
        print(response.status_code)
        print(response.text)
        jsonObj = json.loads(response.text)
        status = jsonObj['data']['status']
        if status == 1:
            print("登录成功!!!")
            cookie_string = '; '.join([f'{key}={value}' for key, value in response.cookies.items()])
            return cookie_content + "; " + cookie_string
        elif status == 2:
            print("待扫码...")
        elif status == 3:
            print("已扫码，请在手机app上确认...")
        elif status == 4:
            print("二维码已过期，请重新生成!")
            break
        else:
            print("status: ", status)
            break

        time.sleep(1)


def compute_md5(input_string):
    # 创建MD5哈希对象
    md5_hash = hashlib.md5()

    # 计算哈希值，输入字符串需要先编码为字节
    md5_hash.update(input_string.encode('utf-8'))

    # 获取计算结果并转为16进制字符串表示
    return md5_hash.hexdigest()

def login(user_agent):
    a1 = generate_local_id()
    webid = compute_md5(a1)
    sec_poison_id = str(uuid.uuid4())
    websectiga = gen_random_string(64)
    gid, acw_tc = get_gid_acw_tc(user_agent)

    cookie_content = f"xsecappid=ugc; a1={a1}; webId={webid}; websectiga={websectiga}; sec_poison_id= {sec_poison_id}; gid={gid}; acw_tc={acw_tc}"
    qr_code_id = get_qr_code(cookie_content, user_agent)

    login_cookie_content = check_login_status(cookie_content, user_agent, qr_code_id)
    print("login cookie:\n", login_cookie_content)
    with open('xiaohongshu_cookie.txt', 'w', encoding='utf-8') as file:
        file.write(login_cookie_content)


if __name__ == '__main__':
    ua = UserAgent(platforms=['pc'], os=["windows", "macos"])
    user_agent = ua.chrome

    login(user_agent)

标签：code,小红书,爬虫,content,url,qr,cookie,id
From： https://blog.csdn.net/sh_moranliunian/article/details/143747074

通过爬虫方式获取小红书授权登录的cookie的代码

1、代码里的normal_sign.js代码是某书签名算法xs,xt的实现-CSDN博客里的;

2、CookieUtil工具代码见抖音最新bd-ticket-guard-client-data逆向方法（2024年11月）-CSDN博客里的CookieUtil.py；

相关文章

赞助商

阅读排行