url:https://www.ximalaya.com/channel/11/
分析过程
-
抓包,关注有页面数据回显的数据包。
该url的请求头中有个加密的参数,找到该参数的加密过程。
-
由于该参数名比较不常见,可以直接全局搜索这个参数名。
只有一处,打断点。
-
切换页码,触发断点。
-
非常直接,
xm-sign
是由d.getSign()
生成的,直接找到getSign
函数的定位。
-
在该函数的第一行打断点,让程序运行到该断点处。
单步调试,看看每个变量的值是什么。
n = s() ? Date.now() : window.XM_SERVER_CLOCK || 0
,由于s()
的值恒为false,故n = window.XM_SERVER_CLOCK
。但window.XM_SERVER_CLOCK
的值和Date.now()
的值相差不多,猜测估计是服务器上的时间戳,就先不用管了。
t = this[c("0x13")]
,c("0x13")
的值为secretKey,故t
的值为himalaya-
。
e = n
这个就不解释了。
r = Date[c("0x25")]()
,c("0x25")
的值为now,故Date[c("0x25")]()
相当于Date.now()
,获取时间戳。
这里涉及到了c
,其是用于获取s
中的某个值。
s
的值如下。
那么c("0x")
这样形式的就是去获取s
中的某一个值。
继续往下看。
("{" + t + e + "}(" + u(100) + ")" + e + "(" + u(100) + ")" + r)
,这就是一个字符串的拼接,涉及到了u
函数,看看u
函数是啥。
c("0x28")
的值为random,故Math[c("0x28")]()
相当于Math.random()
获取一个随机值,所以本质上u
函数就是生成一个随机数。
[c("0xa")](/{([\w-]+)}/, (function(t, e) {return a(e) }))
,[c("0xa")]
的值是replace,那么就是将前面拼接的字符串中符合/{([\w-]+)}/
格式的替换成(function(t, e) {return a(e) })
,那么关键点就在a
函数上了,找到定义处。
打断点。
单步调试。由于t
不为null,故if语句不执行,直接执行r[c("0x3c")](u(t, e))
。c("0x3c")
的值为wordsToBytes
。
r[c("0x3c")]
的值如下。
u
函数的定义如下。
这里面的代码不需要完全看懂,最后只需要复制该代码进行调用即可。
return e && e[c("0x2")] ? n : e && e[c("0x2a")] ? s[c("0x18")](n) : r[c("0x21")](n)
,由于e
的值为undefined,故最后返回的是r[c("0x21")](n)
,该函数就是将字节转换成16进制。
整个加密过程分析完了,只需要将相关代码复制到js中即可。(注意,由于其中很多变量名都是一样的,所以在复制的时候需要对其进行对应的区分,这里很容易出错)
完整的js代码如下:
var s=[
"xmSign",
"slice",
"asBytes",
"object",
"readFloatLE",
"Illegal argument ",
"charCodeAt",
"send",
"himalaya-",
"prototype",
"replace",
"join",
"clockTimer",
"_gg",
"isBuffer",
"initServerTimeUrl",
"stringToBytes",
"_hh",
"INISTAL_TIME",
"secretKey",
"_ii",
"push",
"responseText",
"GET",
"bytesToString",
"binary",
"indexOf",
"length",
"toString",
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/",
"endian",
"exports",
"XM_SERVER_CLOCK",
"bytesToHex",
"floor",
"call",
"_blocksize",
"now",
"CLOCK_UPDATE_INTERVAL",
"getSign",
"random",
"protocol",
"asString",
"amd",
"pow",
"https:",
"_instance",
"_ff",
"bytesToWords",
"https://www.ximalaya.com/revision/time",
"host",
"onreadystatechange",
"encoding",
"isArray",
"start",
"function",
"URL",
"charAt",
"bin",
"constructor",
"wordsToBytes",
"fromCharCode",
"location",
"undefined",
"readyState",
"rotl",
"/revision/time",
"_isBuffer",
"getServerDate",
"then",
"test",
"substr",
"updateClock"
];
c = function(t, e) {
return s[t -= 0]
};
u = function(t) {
return ~~(Math[c("0x28")]() * t)
};
var r = {
rotl: function(t, e) {
return t << e | t >>> 32 - e
},
rotr: function(t, e) {
return t << 32 - e | t >>> e
},
endian: function(t) {
if (t[c("0x3b")] == Number)
return 16711935 & r[c("0x41")](t, 8) | 4278255360 & r[c("0x41")](t, 24);
for (var e = 0; e < t[c("0x1b")]; e++)
t[e] = r.endian(t[e]);
return t
},
randomBytes: function(t) {
for (var e = []; t > 0; t--)
e[c("0x15")](Math[c("0x22")](256 * Math[c("0x28")]()));
return e
},
bytesToWords: function(t) {
for (var e = [], r = 0, n = 0; r < t[c("0x1b")]; r++,
n += 8)
e[n >>> 5] |= t[r] << 24 - n % 32;
return e
},
wordsToBytes: function(t) {
for (var e = [], r = 0; r < 32 * t[c("0x1b")]; r += 8)
e[c("0x15")](t[r >>> 5] >>> 24 - r % 32 & 255);
return e
},
bytesToHex: function(t) {
for (var e = [], r = 0; r < t[c("0x1b")]; r++)
e[c("0x15")]((t[r] >>> 4)[c("0x1c")](16)),
e[c("0x15")]((15 & t[r])[c("0x1c")](16));
return e[c("0xb")]("")
},
hexToBytes: function(t) {
for (var e = [], r = 0; r < t[c("0x1b")]; r += 2)
e[c("0x15")](parseInt(t[c("0x47")](r, 2), 16));
return e
},
bytesToBase64: function(t) {
for (var r = [], n = 0; n < t[c("0x1b")]; n += 3)
for (var i = t[n] << 16 | t[n + 1] << 8 | t[n + 2], o = 0; o < 4; o++)
8 * n + 6 * o <= 8 * t[c("0x1b")] ? r.push(e[c("0x39")](i >>> 6 * (3 - o) & 63)) : r[c("0x15")]("=");
return r[c("0xb")]("")
},
base64ToBytes: function(t) {
t = t.replace(/[^A-Z0-9+\/]/gi, "");
for (var r = [], n = 0, i = 0; n < t[c("0x1b")]; i = ++n % 4)
0 != i && r.push((e[c("0x1a")](t.charAt(n - 1)) & Math[c("0x2c")](2, -2 * i + 8) - 1) << 2 * i | e[c("0x1a")](t.charAt(n)) >>> 6 - 2 * i);
return r
}
};
uuu = function(t, e) {
var nn = {
utf8: {
stringToBytes: function(t) {
return nn[c("0x3a")].stringToBytes(unescape(encodeURIComponent(t)))
},
bytesToString: function(t) {
return decodeURIComponent(escape(nn[c("0x3a")][c("0x18")](t)))
}
},
bin: {
stringToBytes: function(t) {
for (var e = [], r = 0; r < t[c("0x1b")]; r++)
e[c("0x15")](255 & t[c("0x6")](r));
return e
},
bytesToString: function(t) {
for (var e = [], r = 0; r < t[c("0x1b")]; r++)
e[c("0x15")](String[c("0x3d")](t[r]));
return e[c("0xb")]("")
}
}
};
var o = nn.utf8;
t[c("0x3b")] == String ? t = e && e[c("0x34")] === c("0x19") ? s[c("0x10")](t) : o.stringToBytes(t) : a(t) ? t = Array[c("0x9")][c("0x1")][c("0x23")](t, 0) : Array[c("0x35")](t) || (t = t[c("0x1c")]());
for (var n = r[c("0x30")](t), i = 8 * t[c("0x1b")], l = 1732584193, f = -271733879, d = -1732584194, h = 271733878, p = 0; p < n[c("0x1b")]; p++)
n[p] = 16711935 & (n[p] << 8 | n[p] >>> 24) | 4278255360 & (n[p] << 24 | n[p] >>> 8);
n[i >>> 5] |= 128 << i % 32,
n[14 + (i + 64 >>> 9 << 4)] = i;
var g = function(t, e, r, n, i, o, a) {
var s = t + (e & r | ~e & n) + (i >>> 0) + a;
return (s << o | s >>> 32 - o) + e
}
, v = function(t, e, r, n, i, o, a) {
var s = t + (e & n | r & ~n) + (i >>> 0) + a;
return (s << o | s >>> 32 - o) + e
}
, y = function(t, e, r, n, i, o, a) {
var s = t + (e ^ r ^ n) + (i >>> 0) + a;
return (s << o | s >>> 32 - o) + e
}
, m = function(t, e, r, n, i, o, a) {
var s = t + (r ^ (e | ~n)) + (i >>> 0) + a;
return (s << o | s >>> 32 - o) + e
};
for (p = 0; p < n[c("0x1b")]; p += 16) {
var b = l
, E = f
, T = d
, w = h;
l = g(l, f, d, h, n[p + 0], 7, -680876936),
h = g(h, l, f, d, n[p + 1], 12, -389564586),
d = g(d, h, l, f, n[p + 2], 17, 606105819),
f = g(f, d, h, l, n[p + 3], 22, -1044525330),
l = g(l, f, d, h, n[p + 4], 7, -176418897),
h = g(h, l, f, d, n[p + 5], 12, 1200080426),
d = g(d, h, l, f, n[p + 6], 17, -1473231341),
f = g(f, d, h, l, n[p + 7], 22, -45705983),
l = g(l, f, d, h, n[p + 8], 7, 1770035416),
h = g(h, l, f, d, n[p + 9], 12, -1958414417),
d = g(d, h, l, f, n[p + 10], 17, -42063),
f = g(f, d, h, l, n[p + 11], 22, -1990404162),
l = g(l, f, d, h, n[p + 12], 7, 1804603682),
h = g(h, l, f, d, n[p + 13], 12, -40341101),
d = g(d, h, l, f, n[p + 14], 17, -1502002290),
l = v(l, f = g(f, d, h, l, n[p + 15], 22, 1236535329), d, h, n[p + 1], 5, -165796510),
h = v(h, l, f, d, n[p + 6], 9, -1069501632),
d = v(d, h, l, f, n[p + 11], 14, 643717713),
f = v(f, d, h, l, n[p + 0], 20, -373897302),
l = v(l, f, d, h, n[p + 5], 5, -701558691),
h = v(h, l, f, d, n[p + 10], 9, 38016083),
d = v(d, h, l, f, n[p + 15], 14, -660478335),
f = v(f, d, h, l, n[p + 4], 20, -405537848),
l = v(l, f, d, h, n[p + 9], 5, 568446438),
h = v(h, l, f, d, n[p + 14], 9, -1019803690),
d = v(d, h, l, f, n[p + 3], 14, -187363961),
f = v(f, d, h, l, n[p + 8], 20, 1163531501),
l = v(l, f, d, h, n[p + 13], 5, -1444681467),
h = v(h, l, f, d, n[p + 2], 9, -51403784),
d = v(d, h, l, f, n[p + 7], 14, 1735328473),
l = y(l, f = v(f, d, h, l, n[p + 12], 20, -1926607734), d, h, n[p + 5], 4, -378558),
h = y(h, l, f, d, n[p + 8], 11, -2022574463),
d = y(d, h, l, f, n[p + 11], 16, 1839030562),
f = y(f, d, h, l, n[p + 14], 23, -35309556),
l = y(l, f, d, h, n[p + 1], 4, -1530992060),
h = y(h, l, f, d, n[p + 4], 11, 1272893353),
d = y(d, h, l, f, n[p + 7], 16, -155497632),
f = y(f, d, h, l, n[p + 10], 23, -1094730640),
l = y(l, f, d, h, n[p + 13], 4, 681279174),
h = y(h, l, f, d, n[p + 0], 11, -358537222),
d = y(d, h, l, f, n[p + 3], 16, -722521979),
f = y(f, d, h, l, n[p + 6], 23, 76029189),
l = y(l, f, d, h, n[p + 9], 4, -640364487),
h = y(h, l, f, d, n[p + 12], 11, -421815835),
d = y(d, h, l, f, n[p + 15], 16, 530742520),
l = m(l, f = y(f, d, h, l, n[p + 2], 23, -995338651), d, h, n[p + 0], 6, -198630844),
h = m(h, l, f, d, n[p + 7], 10, 1126891415),
d = m(d, h, l, f, n[p + 14], 15, -1416354905),
f = m(f, d, h, l, n[p + 5], 21, -57434055),
l = m(l, f, d, h, n[p + 12], 6, 1700485571),
h = m(h, l, f, d, n[p + 3], 10, -1894986606),
d = m(d, h, l, f, n[p + 10], 15, -1051523),
f = m(f, d, h, l, n[p + 1], 21, -2054922799),
l = m(l, f, d, h, n[p + 8], 6, 1873313359),
h = m(h, l, f, d, n[p + 15], 10, -30611744),
d = m(d, h, l, f, n[p + 6], 15, -1560198380),
f = m(f, d, h, l, n[p + 13], 21, 1309151649),
l = m(l, f, d, h, n[p + 4], 6, -145523070),
h = m(h, l, f, d, n[p + 11], 10, -1120210379),
d = m(d, h, l, f, n[p + 2], 15, 718787259),
f = m(f, d, h, l, n[p + 9], 21, -343485551),
l = l + b >>> 0,
f = f + E >>> 0,
d = d + T >>> 0,
h = h + w >>> 0
}
return r[c("0x1e")]([l, f, d, h])
};
function a(t, e) {
if (null == t)
throw new Error(c("0x5") + t);
var n = r[c("0x3c")](uuu(t, e));
return e && e[c("0x2")] ? n : e && e[c("0x2a")] ? s[c("0x18")](n) : r[c("0x21")](n)
}
function getSign() {
var t, e, r, n = 0;
return n = Date.now(),
t = "himalaya-",
e = n,
r = Date[c("0x25")]()+9876, // 这里不一定是加9876,随便一个数字都可以,可以多看看流量包,观察下两个时间戳之间的差值
("{" + t + e + "}(" + u(100) + ")" + e + "(" + u(100) + ")" + r)[c("0xa")](/{([\w-]+)}/, (function(t, e) {
return a(e)
}
))
}
测试一下。
得到了我们想要的结果,就可以编写python代码来调用getSign
函数获取页面数据了。
import requests
import execjs
pageNum = input("请输入您要查询第几页:")
url = "https://www.ximalaya.com/revision/metadata/v2/channel/albums?pageNum={}&pageSize=50&sort=1&metadata=&groupId=11".format(pageNum)
file_object = open("解密.js", mode="r", encoding="utf-8")
exec_code = file_object.read()
exec_js = execjs.compile(exec_code)
xm_sign = exec_js.call("getSign")
resp = requests.get(url, headers={"Xm-Sign": xm_sign, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"})
print(resp.text)
运行,成功获取到数据。