首页 > 其他分享 >海南免税商城数据爬取(破解滑块验证码)

海南免税商城数据爬取(破解滑块验证码)

时间:2024-04-22 22:35:36浏览次数:22  
标签:ident 滑块 stamp 验证码 爬取 result str time data

1. python部分

main.py

# -*- coding:utf-8 -*-

# @Time : 2024/4/15 21:24
# @Author : 快乐的小猴子
# @Version :
# @Function :
import subprocess
from functools import partial  # 专门用来固定参数的
subprocess.Popen = partial(subprocess.Popen, encoding="utf-8")
import execjs
import requests
import base64
"""
https://m.hltmsp.com/passport/login?backURL=%2F%2Fm.hltmsp.com%2Fuser
表单携带参数  sign
第一次的ident的值为时间戳  后续的ident的值为从响应里进行获取

思路
第一次登录请求会失败  并返回滑动验证
滑动验证通过后  再次进行登录  所以当前一共是4次请求
第一次请求
携带当前的参数 其中第一次参数中的ident为时间戳
第二次请求
请求当前的滑动验证码的接口 获取滑动验证的俩个图片  其中会返回加密后的ident和滑动验证码的高度y
第三次请求
将滑动验证后的x,y的值进行传递 当前参数中的ident为上次返回的
第四次请求
进行登录 将上面的ident获取 并进行传递

注意
会有一定概率失败 重新运行即可
"""
# 创建session对象
session = requests.Session()
# 登录接口URL
url = 'https://api.hltmsp.com/user/index/login'
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
    'Sec-Ch-Ua-Platform': "Android",
    'Sec-Fetch-Site': 'same-site',
    'Referer': 'https://m.hltmsp.com/',
}
# 账号信息
mobile = ""  # 自己注册
password = "" # 自己注册
nonce_str = 'o5ja9e8rrf6foed8r7dfewdrfsd'    # 随机值
data = {
    "mobile": mobile,
    "password": password,
    "platformId": "1",
    "shop_id": "6",
    "token": "",
    "appid": "duty_h5",
    "nonce_str": nonce_str,
}
with open('mianshui.js', 'r', encoding='UTF-8') as f:
    js_code = f.read()
context = execjs.compile(js_code)
result = context.call('mianshui_login', mobile, password, nonce_str)
print(result, '第一次登录请求')
data['sign'] = result['sign']
data['time_stamp'] = result['time_stamp']
data['ident'] = result['ident']
session.post(url, headers=headers, data=data)
'appid=duty_h5&ident=1701331075944&nonce_str=ofweg8efj3j&platformId=1&shop_id=6&t=1701331075943&time_stamp=33893723382&app_key=hltmsp5615466'
result = context.call('mianshui_code')


print(result, '第二次请求-获取验证码')

params = {
    "platformId": "1",
    "shop_id": "6",
    "t": result['ident'],
    "ident": result['ident'],
    "appid": "duty_h5",
    "nonce_str": "ofweg8efj3j",
    "time_stamp": result['time_stamp'],
    "sign": result['sign']
}
code_url = 'https://api.hltmsp.com/slider/index/get-code'
res = session.get(code_url, headers=headers, params=params)
code_data = res.json()['data']
y = code_data['y']  # 为当前的滑动验证码缺口的高度
ident = code_data['ident']  # 进行滑动验证后传递到后台get传参的值
# base64滑块验证图片
# 取出俩张验证码图片
puzzle = code_data['puzzle'].split(',')[1]
watermark = code_data['watermark'].split(',')[1]
watermark_path = "watermark.png"
puzzle_path = "puzzle.png"
puzzle_data = base64.b64decode(puzzle)
# 将bas464验证码图片写入本地
with open(puzzle_path, 'wb') as f:
    f.write(puzzle_data)
watermark_data = base64.b64decode(watermark)
with open(watermark_path, 'wb') as f:
    f.write(watermark_data)
# 图鉴验证码识别
from tujian import base64_api
result = base64_api(uname='用户名', pwd='密码', img=puzzle_path, watermark=watermark_path, typeid=18)

# 拼凑验证码的滑动后的值
position = str(result.split(',')[0])+'_'+str(y)
check_params = {
    "position": position,
    "ident": ident,
    "platformId": "1",
    "shop_id": "6",
    "appid": "duty_h5",
    "nonce_str": nonce_str,
    # "t": "1701333773050",
    # "time_stamp": "45063372338",
    # "sign": "2897A6ADD258E1789EBF4756CD9B7007"
}

# 'appid=duty_h5&ident=${ident}&nonce_str=${nonce_str}&platformId=1&position=${position}&shop_id=6&t=1701334716993&time_stamp=${time_stamp}&app_key=hltmsp5615466'
check_url = 'https://api.hltmsp.com/slider/index/check'
result = context.call('mianshui_check', ident, nonce_str, position)
print(result, '第三次 发送验证码值')
check_params['sign'] = result['sign']
check_params['time_stamp'] = result['time_stamp']
check_params['t'] = result['time']
# 进行验证码请求
res = session.get(check_url, headers=headers, params=check_params)
print(res.json())


# 最后一次登录
result = context.call('mianshui_login', mobile, password, data['nonce_str'], ident)
print(result, '第四次 进行最后登录')
data['sign'] = result['sign']
data['time_stamp'] = result['time_stamp']
data['ident'] = ident
res = session.post(url, headers=headers, data=data)
print(res.json())

tujian.py

import base64
import json
import requests
"""
识别验证码图片 图鉴
"""

def base64_api(uname, pwd, img,watermark, typeid):
    with open(img, 'rb') as f:
        base64_data = base64.b64encode(f.read())
        b64 = base64_data.decode()
    with open(watermark, 'rb') as f:
        base64_data = base64.b64encode(f.read())
        b64_imageback = base64_data.decode()
    data = {"username": uname, "password": pwd, "typeid": typeid, "image": b64, 'imageback':b64_imageback}
    result = json.loads(requests.post("http://api.ttshitu.com/predict", json=data).text)
    if result['success']:
        return result["data"]["result"]
    else:
        # !!!!!!!注意:返回 人工不足等 错误情况 请加逻辑处理防止脚本卡死 继续重新 识别
        return result["message"]
    return ""


if __name__ == "__main__":
    watermark = "watermark.png"
    puzzle = "puzzle.png"
    result = base64_api(uname='luckyboyxlg', pwd='17346570232', img=puzzle,watermark=watermark, typeid=18)
    print(result)
    print(type(result))
    # 拼凑验证码的滑动后的值
    position = str(result.split(',')[0])+'_'+str(1)
    print(position)

mianshui.js

const CryptoJS = require('crypto-js');
/*
* 优化后的
* */

// 登录方法
function mianshui_login(mobile,password, nonce_str, i=null){
    // 第一次登录的ident的值为生成的时间戳 最后一次登录需要传递进来的ident
    var ident = i?i:new Date().getTime();
    let  t = 2022051288 + Math.floor(Date.now() / 1e3) + "";
    let n = Math.floor(9 * Math.random()) + 1;
    let time_stamp = "" + n + t.substring(t.length - n, t.length) + t.substring(0, t.length - n)
    let o = `appid=duty_h5&ident=${ident}&mobile=${mobile}&nonce_str=${nonce_str}&password=${password}&platformId=1&shop_id=6&time_stamp=${time_stamp}&app_key=hltmsp5615466`
    const sign = CryptoJS.MD5(o).toString().toUpperCase();
    console.log(sign)
    return {sign:sign, time_stamp: time_stamp, ident: ident}
}
/*'appid=duty_h5&ident=1701325763030&mobile=17333333333&nonce_str=wob2whdd9f91f6degffcf4eftr&password=17333333333&platformId=1&shop_id=6&time_stamp=97233770513&app_key=hltmsp5615466'*/


// 发送验证码值
function mianshui_check(ident, nonce_str, position){
    var time = (new Date).getTime();
    let  t = 2022051288 + Math.floor(Date.now() / 1e3) + "";
    let n = Math.floor(9 * Math.random()) + 1;
    let time_stamp = "" + n + t.substring(t.length - n, t.length) + t.substring(0, t.length - n)
    let t_time = (new Date).getTime();
    // let o = `appid=duty_h5&ident=${ident}&nonce_str=ofweg8efj3j&platformId=1&shop_id=6&t=${ident+2}&time_stamp=${time_stamp}&app_key=hltmsp5615466`
    let o = `appid=duty_h5&ident=${ident}&nonce_str=${nonce_str}&platformId=1&position=${position}&shop_id=6&t=${t_time}&time_stamp=${time_stamp}&app_key=hltmsp5615466`
    const sign = CryptoJS.MD5(o).toString().toUpperCase();
    console.log(sign)
    return {sign:sign, time_stamp: time_stamp, time:time}
}
// 获取验证码
function mianshui_code(){
    let ident = (new Date).getTime()
    let  t = 2022051288 + Math.floor(Date.now() / 1e3) + "";
    let n = Math.floor(9 * Math.random()) + 1;
    let time_stamp = "" + n + t.substring(t.length - n, t.length) + t.substring(0, t.length - n)
    let o = `appid=duty_h5&ident=${ident}&nonce_str=ofweg8efj3j&platformId=1&shop_id=6&t=${ident+2}&time_stamp=${time_stamp}&app_key=hltmsp5615466`
    const sign = CryptoJS.MD5(o).toString().toUpperCase();
    console.log(sign)
    return {sign:sign, time_stamp: time_stamp, ident: ident}
}

说明:
在Terminal控制台安装Crypto加密库,执行 npm install crypto-js命令,如需更换npm镜像可参考 js逆向部分 的更换npm为国内镜像随笔文章。

标签:ident,滑块,stamp,验证码,爬取,result,str,time,data
From: https://www.cnblogs.com/zczhaod/p/18136953

相关文章

  • 使用pytesseract库识别图形验证码的简单案例
    importrequestsfromPILimportImageimportpytesseract#获取验证码图片url='http://jw.glutnn.cn/academic/getCaptcha.do?captchaCheckCode=0&random=0.20354331774429668'response=requests.get(url)#保存验证码图片withopen('captcha.png',&......
  • 爬虫爬取网站
    1.建数据库product表和product_data表CREATETABLEproduct(idINTAUTO_INCREMENTPRIMARYKEY,nameVARCHAR(255)NOTNULL,urlVARCHAR(255)NOTNULL);CREATETABLEproduct_data(idINTAUTO_INCREMENTPRIMARYKEY,product_idINT,pr......
  • HarmonyOS NEXT应用开发—验证码布局
    介绍本示例介绍如何使用Text组件实现验证码场景,并禁用对内容的选中、复制、光标。效果图预览使用说明单击组件可弹出输入法在进行验证码输入时,无法对中间单个数字进行更改,无法选中输入内容,无光标实现思路因为要禁用复制、选中等功能,这里使用了Text组件,而不是TextInput......
  • 企业微信hook 最新版 、企微输入验证码,二次扫码方案、发名片收消息功能,企业微信hook源
    ​产品说明在PC端企业微信客户端上发送自定义名片、封装企业微信功能为DLL,可与其他语言调用实现功能。DLL可以监听企业微信的所有消息接收和群消息,根据需求实现机器人、群发、自动消息推送、聊天机器人、监管数据收集等功能企业微信hook,企业微信功能api,自定义开发功能清......
  • 5. 验证码实现
    1.ImageServletpackagecom.fei.servlet;importjavax.imageio.ImageIO;importjavax.servlet.ServletException;importjavax.servlet.http.HttpServlet;importjavax.servlet.http.HttpServletRequest;importjavax.servlet.http.HttpServletResponse;importjava.a......
  • 建设库数据爬取
    1.python部分:#-*-coding:utf-8-*-#@Time:2024/4/1417:57#@Author:快乐的小猴子#@Version:#@Function:importrequestsimportjsonimportsubprocessfromfunctoolsimportpartial#专门用来固定参数的subprocess.Popen=partial(subprocess.Pop......
  • jy验证码协议加密分析学习记录
    目录流程参考免责声明:分析请求与加密参数register请求获取challenge和gt第一次W值分析第二次提交W分析第二次W明文值分析第三个W值分析验证小坑流程参考免责声明:本文章主要用于技术交流学习,作者不承担任何滥用技术所产生的法律责任。如有侵权,请联系作者删除!分析请求与加......
  • 喜马拉雅节目爬取
    1.python部分:喜马拉雅.py#-*-coding:utf-8-*-#@Time:2024/4/149:47#@Author:快乐的小猴子#@Version:#@Function:爬取喜马拉雅节目数据importrequestsimportsubprocessfromfunctoolsimportpartial#专门用来固定参数的subprocess.Popen=parti......
  • 图片验证码 python版本
    importrandomimportstringimportbase64fromioimportBytesIOfromPILimportImage,ImageDraw,ImageFontchars_lower_digits=string.ascii_lowercase+string.digitsdefrndColor():"""生成随机颜色:return:""&quo......
  • 基于python数据爬取和可视化
    系统介绍本系统介绍了一个基于基于Python的电影数据爬取和可视化技术的研究和方案设计,采用Python语言和Django架构构建。通过网络爬虫爬取豆瓣电影评论数据,利用数据清洗和处理技术,建立了一个全面的电影信息数据库。使用Python中强大的数据处理库进行统计分析,常见的一些库panda......