原文链接
python爬虫 - Python3.x+Fiddler抓取APP数据 - 学习分享 - SegmentFault 思否
我爬取的为浏览器数据
可以看到在这里我们成功爬取到了浏览器数据,但是在实现python中爬取数据遇到了很多版本以及配置缺少的问题,同时我的版本中http为2,不知道有没有其他影响,代码目前为
""" GET https://m.baidu.com/s?word=%E7%9F%B3%E5%AE%B6%E5%BA%84&opfc=1 HTTP/2 host: m.baidu.com upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Linux; Android 7.1.2; SM-G977N Build/LMY48Z; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/75.0.3770.143 Mobile Safari/537.36 accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3 referer: https://m.baidu.com/?from=844b&vit=fps accept-encoding: gzip, deflate accept-language: zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7 x-requested-with: com.android.browser cookie: BAIDUID=3ED233718A42B05529E739BC5B06191C:FG=1; BAIDUID_BFESS=3ED233718A42B05529E739BC5B06191C:FG=1; bd_af=1; kleck=4e9a1215601bba92d8209d1d02da24a2; BDSVRTM=6; BDORZ=AE84CDB3A529C0F8A2B9DCDD1D18B695; SE_LAUNCH=5%3A1705329123; POLYFILL=0; BA_HECTOR=a4al8580a4282ga4000k0ka4v2ttic1iqagfc1s; ZFY=aZwTfnzUTuuEpLiW5Hy5F3zJcwRrFA4RMpopOVBZqQI:C; MSA_WH=450_724; MSA_PBT=148; MSA_ZOOM=1000; MSA_PHY_WH=900_1600; shifen[680740050152_15861]=1705329137; H_WISE_SIDS=282632_286879_110085_287513_287837_287168_280169_284912_288373_283782_287982_288710_288714_288718_288601_288742_288746_288749_287620_284816_269050_265881_281890_289949_289950_289956_290237_290370_290498_286491_290555_290562_282553_282815_289431_290987_287977_286234_291051_203517_291151_277936_290425_288871_256739_290667_288252_291510_281879_291398_286910_291727_290567_283016_291868_291956_291989_292027_292135_292224_292167_292249_292247_292251_284551_292073_292089_289527_291192_292327_292314_292357_292363_287174_287718_282466_292506_292345_292614_292710_292773_292414_292459_292453_292822_292804_287703_289094_292583_292893_291327_8000065_8000107_8000126_8000142_8000143_8000149_8000151_8000163_8000175_8000185; PSINO=1; H_WISE_SIDS_BFESS=282632_286879_110085_287513_287837_287168_280169_284912_288373_283782_287982_288710_288714_288718_288601_288742_288746_288749_287620_284816_269050_265881_281890_289949_289950_289956_290237_290370_290498_286491_290555_290562_282553_282815_289431_290987_287977_286234_291051_203517_291151_277936_290425_288871_256739_290667_288252_291510_281879_291398_286910_291727_290567_283016_291868_291956_291989_292027_292135_292224_292167_292249_292247_292251_284551_292073_292089_289527_291192_292327_292314_292357_292363_287174_287718_282466_292506_292345_292614_292710_292773_292414_292459_292453_292822_292804_287703_289094_292583_292893_291327_8000065_8000107_8000126_8000142_8000143_8000149_8000151_8000163_8000175_8000185; BAIDULOC=11975304.608602_4139954.0751319_1_359_1705329581618; H5LOC=1; BAIDULOC_BFESS=11975304.608602_4139954.0751319_1_359_1705329581618; seClickID=52f3551186394218; wpr=0; COOKIE_SESSION=446_0_0_2_0_t2_11_2_1_0_0_1_9_1705329568%7C2%23446_t2_0_1_1_2_5_2_1705329568%7C2; FC_MODEL=0_0_0_0_700.68_0_0_0_0_0_720.21_0_2_11_1_5_0_0_1705329568%7C2%23700.68_0_0_2_1_0_1705329568%7C2%230_ax_1_0_0_0_0_1705329568; shifen[730377201155_68091]=1705330314; BCLID=11154939026636247212; BCLID_BFESS=11154939026636247212; BDSFRCVID=jhkOJeC62GYZcUJq6tUy5PqNP2K2rBJTH6amWV8ifk80i3kPJfb2EG0PdU8g0KuhSv7IogKKBmOTHg-F_2uxOjjg8UtVJeC6EG0Ptf8g0x5; BDSFRCVID_BFESS=jhkOJeC62GYZcUJq6tUy5PqNP2K2rBJTH6amWV8ifk80i3kPJfb2EG0PdU8g0KuhSv7IogKKBmOTHg-F_2uxOjjg8UtVJeC6EG0Ptf8g0x5; H_BDCLCKID_SF=JnPjVI-2JK83qJTph47hqR-8MxrK2JT3KC_X3b7Ef-bVsh7_bf--D60HyHDO-J_qbNQX_xJhBfJHolTg2Rjxy5K_htJjt4CfKH4e24jc2POkqC3HQT3mXlQbbN3i3xrwBKJuWb3cWhoV8UbSbIcPBTD02-nBat-OQ6npaJ5nJq5nhMJmb67JDbv0eG_DqT_OtbC8V-35b5rWjJjvM-n_bntJ5eT22-us2DbW2hcH0KLKsUTL-pokKjK33qutQnowLnriotoPWfb1MRjz3pDWMtKfea30J-nTWDcaoq5TtUJfSDnTDMRhMhtBjPryKMni0Dj9-pnjHlQrh459XP68bTkA5bjZKxtq3mkjbPbDfn02JKKu-n5jHjoWDG_f3H; H_BDCLCKID_SF_BFESS=JnPjVI-2JK83qJTph47hqR-8MxrK2JT3KC_X3b7Ef-bVsh7_bf--D60HyHDO-J_qbNQX_xJhBfJHolTg2Rjxy5K_htJjt4CfKH4e24jc2POkqC3HQT3mXlQbbN3i3xrwBKJuWb3cWhoV8UbSbIcPBTD02-nBat-OQ6npaJ5nJq5nhMJmb67JDbv0eG_DqT_OtbC8V-35b5rWjJjvM-n_bntJ5eT22-us2DbW2hcH0KLKsUTL-pokKjK33qutQnowLnriotoPWfb1MRjz3pDWMtKfea30J-nTWDcaoq5TtUJfSDnTDMRhMhtBjPryKMni0Dj9-pnjHlQrh459XP68bTkA5bjZKxtq3mkjbPbDfn02JKKu-n5jHjoWDG_f3H; __bsi=11480918622058509743_00_19_N_N_0_0303_c02f_Y """ import requests url='https://m.baidu.com/s?word=%E7%9F%B3%E5%AE%B6%E5%BA%84&opfc=1' headers = { 'host': 'm.baidu.com', 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Linux; Android 7.1.2; SM-G977N Build/LMY48Z; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/75.0.3770.143 Mobile Safari/537.36', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3', 'referer': 'https://m.baidu.com/?from=844b&vit=fps', 'accept-encoding': 'gzip, deflate', 'accept-language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7', 'x-requested-with': 'com.android.browser', 'cookie': 'BAIDUID=3ED233718A42B05529E739BC5B06191C:FG=1; BAIDUID_BFESS=3ED233718A42B05529E739BC5B06191C:FG=1; bd_af=1; kleck=4e9a1215601bba92d8209d1d02da24a2; BDSVRTM=6; BDORZ=AE84CDB3A529C0F8A2B9DCDD1D18B695; SE_LAUNCH=5%3A1705329123; POLYFILL=0; BA_HECTOR=a4al8580a4282ga4000k0ka4v2ttic1iqagfc1s; ZFY=aZwTfnzUTuuEpLiW5Hy5F3zJcwRrFA4RMpopOVBZqQI:C; MSA_WH=450_724; MSA_PBT=148; MSA_ZOOM=1000; MSA_PHY_WH=900_1600; shifen[680740050152_15861]=1705329137; H_WISE_SIDS=...(此处省略了大量内容)... __bsi=11480918622058509743_00_19_N_N_0_0303_c02f_Y' } re = requests.get(url=url,headers=headers) print(re.text)
标签:baidu,BFESS,15,爬虫,accept,2024,application,com,MSA From: https://www.cnblogs.com/azwz/p/17966645