摘要:
python采集京东app搜索商品数据(2023-10-30)
一、技术要点:
1、cookie可以从手机app端用charles抓包获取;
2、无需安装nodejs,纯python源码;
3、搜索接口为:functionId=search;
4、clientVersion = "10.1.4"同时也支持更高的版本;
5、sign签名算法已转成python源码;
6、body为:
{"addrFilter":"1","addressId":"0","articleEssay":"1","deviceidTail":"83","exposedCount":"0","frontExpids":"F_0_0","gcAreaId":"25,2258,2259,57314","gcLat":"24.343493","gcLng":"102.548052","imagesize":{"gridImg":"531x531","listImg":"390x390","longImg":"531x680"},"insertArticle":"1","insertScene":"1","insertedCount":"0","isCorrect":"1","keyword":keyword,"latitude":"24.343321","localNum":"0","longitude":"102.548038","newMiddleTag":"1","newVersion":"3","oneBoxMod":"1","orignalSearch":"1","orignalSelect":"1","page":page,"pageEntrance":"1","pagesize":"10","posAreaId":"25,2258,2259,57314","pvid":pvid,"searchVersionCode":"9398","secondInsedCount":"0","showShopTab":"yes","showStoreTab":"1","stock":"1"}
uri编码后为:
"body=%7B%22addrFilter%22%3A%221%22%2C%22addressId%22%3A%220%22%2C%22articleEssay%22%3A%221%22%2C%22deviceidTail%22%3A%2283%22%2C%22exposedCount%22%3A%220%22%2C%22frontExpids%22%3A%22F_0_0%22%2C%22gcAreaId%22%3A%2225%2C2258%2C2259%2C57314%22%2C%22gcLat%22%3A%2224.343493%22%2C%22gcLng%22%3A%22102.548052%22%2C%22imagesize%22%3A%7B%22gridImg%22%3A%22531x531%22%2C%22listImg%22%3A%22390x390%22%2C%22longImg%22%3A%22531x680%22%7D%2C%22insertArticle%22%3A%221%22%2C%22insertScene%22%3A%221%22%2C%22insertedCount%22%3A%220%22%2C%22isCorrect%22%3A%221%22%2C%22keyword%22%3A%22%E5%B0%8F%E5%AD%A6%E7%94%9F%E4%B9%A6%E5%8C%85%22%2C%22latitude%22%3A%2224.343321%22%2C%22localNum%22%3A%220%22%2C%22longitude%22%3A%22102.548038%22%2C%22newMiddleTag%22%3A%221%22%2C%22newVersion%22%3A%223%22%2C%22oneBoxMod%22%3A%221%22%2C%22orignalSearch%22%3A%221%22%2C%22orignalSelect%22%3A%221%22%2C%22page%22%3A%221%22%2C%22pageEntrance%22%3A%221%22%2C%22pagesize%22%3A%2210%22%2C%22posAreaId%22%3A%2225%2C2258%2C2259%2C57314%22%2C%22pvid%22%3A%22%22%2C%22searchVersionCode%22%3A%229398%22%2C%22secondInsedCount%22%3A%220%22%2C%22showShopTab%22%3A%22yes%22%2C%22showStoreTab%22%3A%221%22%2C%22stock%22%3A%221%22%7D&"
7、sign签名需要的参数:
sign=service.get_st_sign_sv(data_json, functionId, uuid, clientVersion)
data_json :即body
functionId :接口类型,即:functionId ="search"
uuid :设备标识,可抓包取得,即:uuid="d5aada6c69ce7237"
clientVersion:app版本号,即:clientVersion = "10.1.4"
# -*- coding: UTF-8 -*-
import requests,json
import pkgutil
import time
from urllib.parse import urlparse, parse_qs, urlunparse
import hashlib
import execjs
from urllib.parse import quote
import io
import sys
from coreMethod import service
def savetofile(text,filename):
file = open(filename, "w",encoding='utf-8' )
file.write(text)
file.close()
def print_hi(name):
# Use a breakpoint in the code line below to debug your script.
print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint.
def jdsearch(keyword,page,pvid):
body={"addrFilter":"1","addressId":"0","articleEssay":"1","deviceidTail":"83","exposedCount":"0","frontExpids":"F_0_0","gcAreaId":"25,2258,2259,57314","gcLat":"24.343493","gcLng":"102.548052","imagesize":{"gridImg":"531x531","listImg":"390x390","longImg":"531x680"},"insertArticle":"1","insertScene":"1","insertedCount":"0","isCorrect":"1","keyword":keyword,"latitude":"24.343321","localNum":"0","longitude":"102.548038","newMiddleTag":"1","newVersion":"3","oneBoxMod":"1","orignalSearch":"1","orignalSelect":"1","page":page,"pageEntrance":"1","pagesize":"10","posAreaId":"25,2258,2259,57314","pvid":pvid,"searchVersionCode":"9398","secondInsedCount":"0","showShopTab":"yes","showStoreTab":"1","stock":"1"}
data_json = json.dumps(body, ensure_ascii=False, separators=(",", ":"))
data="body="+quote(data_json)+"&"
data_len=len(data)
print(data_len)
functionId ="search"
uuid="d5aada6c69ce7237" # uuid = "ZNVrYWHrDwC2EWDvDzSzDm=="
clientVersion = "10.1.4"
api_url="https://api.m.jd.com/client.action?functionId=search&clientVersion=10.1.4&build=90060&client=android&d_brand=Xiaomi&d_model=MI4LTE&osVersion=6.0.1&screen=1920*1080&partner=ks006&oaid=&eid=eidA0c138122bas4uo1qCosmRnqrZBkTZ+zEF7qNa5UCxrzSE5IyVBHJw4jzuBKyNz0TPXE0oY0j0H/viRPJy5RUE1KWCJuMWV52ufEtPyZiLpXsetVD&sdkVersion=23&lang=zh_CN&eu=8363533373230323933313336333&fv=93D2634303938303363663032626&uuid=d5aada6c69ce7237&aid=d5aada6c69ce7237&area=25_2258_2259_57314&networkType=wifi&wifiBssid=d9077de60f51d1d1d6f228a96f318e0c&uts=0f31TVRjBSsqndu4%2FjgUPz6uymy50MQJ57QGqe4EVdPCC%2F%2BLaIc%2B0FPy%2BbUQbRC5NoDqqw5SzXn%2B5IzKEzRV7U4tEboVX3RmAjshdIn%2BrmSbjWta516veiyWgiDkt89AHGr9Rp0fcz01k8UAHnDKv9FJVcx9o%2BHKLIDKAa6QQUSBNqX7wvJlkldVcZElJ1qX8Gf%2F4RKU8CYMjipFaink5w%3D%3D&uemps=0-0&harmonyOs=0";
sign=service.get_st_sign_sv(data_json, functionId, uuid, clientVersion)
# print('技术支持:复制:byc6352')
api_url=api_url+'&'+sign
real_url = api_url.replace("https://api.m.jd.com", "")
print(real_url)
# print(data_json)
# data_url=quote(data_json)
# print(data_url)
headers={
"method":"POST",
"path": real_url,
"scheme": "https",
"charset": "UTF-8",
"jdc-backup": 'pin=jd_eiFeOmfKYTrp;wskey=AAJk-9PXAEDx4VigFTYENYvbbF_ih0fwKNPxtlGjLBxeKZDIHTo0dlOGi9kthRmaIPxAQc8LOjXjivrJlZRmTDk92_7gn7xD;whwswswws=AAo0qnXeKECW5dHtHD2SyG-aA65Td2U3WT7WUwAAAAAA;unionwsws={"devicefinger":"eidA0c138122bas4uo1qCosmRnqrZBkTZ+zEF7qNa5UCxrzSE5IyVBHJw4jzuBKyNz0TPXE0oY0j0H\/viRPJy5RUE1KWCJuMWV52ufEtPyZiLpXsetVD","jmafinger":"AAo0qnXeKECW5dHtHD2SyG-aA65Td2U3WT7WUwAAAAAA"};',
"Authority": "api.m.jd.com",
"Accept": "*/*",
# "Accept-Encoding": "br,gzip,deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cookie": 'pin=jd_LStcEbIzGjuR;wskey=AAJkcwjDAECOW8kziBHNbB1cl-BdbgXIW1TU1mHHu7XkyHRHGTt36GiksxgJbLVNZ4Ni-XKKKXahMVmrVoH6IagBm2_RD_-n;whwswswws=JD012145b9Ooh6gW1WYH168526050472603eFWOcdKH3-r3FyvX7W1dFwIKf7xAscQeSZ4_zhAV07wcda_pYWPbGQW_fnTydijhfpLKLUlVzIXVOor8dB-N7vkfs3rWJTxi0p1xghd~lnPHS6M-VPm1DZUIGaKbW62XTKAcWHCstLD9GZCUslR9H9xVy8TeBaMkOHcMNNOZ-86d4vztzqMQD2jeEoOoHmHbOUCNqSQL3MKF19Ct66wbQf3UlOZ7A9SdiTCPFifNr5Fwf7rWgoO2rnN4XQXFJ-YkcJvtdXPQ11I6r3V44VJc;unionwsws={"devicefinger":"eidAe50181223cs8uIOuW0dWRka9iu\/jXMDOK7iDr+Oor0tc4JwaCNymIsN1KVlEBsjUDXYycNGHW5iEV8gds4\/u1TN0isUUwp8eUG+Fk\/pxSMrh\/flR","jmafinger":"JD012145b9Ooh6gW1WYH168526050472603eFWOcdKH3-r3FyvX7W1dFwIKf7xAscQeSZ4_zhAV07wcda_pYWPbGQW_fnTydijhfpLKLUlVzIXVOor8dB-N7vkfs3rWJTxi0p1xghd~lnPHS6M-VPm1DZUIGaKbW62XTKAcWHCstLD9GZCUslR9H9xVy8TeBaMkOHcMNNOZ-86d4vztzqMQD2jeEoOoHmHbOUCNqSQL3MKF19Ct66wbQf3UlOZ7A9SdiTCPFifNr5Fwf7rWgoO2rnN4XQXFJ-YkcJvtdXPQ11I6r3V44VJc"};',
"cache-control": "no-cache",
"content-type": "application/x-www-form-urlencoded; charset=UTF-8",
"content-length": str(data_len),
"user-agent": "okhttp/3.12.1;jdmall;android;version/10.1.4;build/90060;screen/1080x1920;os/6.0.1;network/wifi;",
}
# body.encode(encoding='utf-8')
res=requests.post(url=api_url, headers=headers,data=data)
print(res)
text=res.text
savetofile(text,"data.txt")
print(text)
return text
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
print_hi('京东app搜索接口。技术支持:~~:byc6352;~~~:39848872')
keyword="小学生书包"
page="1"
pvid="" #为空
jdsearch(keyword,page,pvid)