1.抓包参数分析
我们可以看出,stageId参数随着时间的变化而变化,pageNo随着页数的增加+1,其他参数不变
2.代码部分
import requests import re def orderBy_get(): url = 'http://jxzgsgzs.com/js/price.js?v=1.7.2' header ={ 'User-Agent': '' } response = requests.get(url=url,headers=header).text # print(response_list) ex = "var ORDERBY = '(.*?)';" orderBy = re.findall(ex,response)[0] return orderBy def ids_get(): url = 'http://jxzgsgzs.com/jia-xing-fruit-webapi/stage?reportCycle=10&rawDataIsPublish=true&pageSize=1&orderBy=-reportTimeStart' header = { 'User-Agent': '' } json_list = requests.get(url=url,headers=header).json() for dic in json_list['data']: ids = dic['id'] reportTimeEnd = dic['reportTimeEnd'] return ids,reportTimeEnd
import requests import json import time from test import ids_get,orderBy_get ids,reportTimeEnd= ids_get() orderBy = orderBy_get() for datadd in range(1,29): print('数据爬取第{}页'.format(datadd)) url = 'http://jxzgsgzs.com/jia-xing-fruit-webapi/rawDataExpansion?' headers = { "User-Agent": "" } data = { 'stageId': ids, 'orderBy': orderBy, 'parentStructId': 1, 'pageNo': datadd } json_ids = requests.get(url=url, headers=headers, params=data).json() time.sleep(5) for dic in json_ids['data']: content_list = [] content = {} content['category'] = dic['category'] content['city'] = dic['city'] content['id'] = str(dic['id']) content['kind'] = dic['kind'] content['placeOfOrigin'] = dic['placeOfOrigin'] content['price'] = str(dic['price']) content['specification'] = dic['specification'] content['totalSalesVolume'] = str(dic['totalSalesVolume']) #总销售额 content['totalTurnover'] = str(dic['totalTurnover']) #总经营额 content['data'] = str(reportTimeEnd) content_list.append(content) print(content_list) with open('嘉兴水果数据.csv','a', encoding='utf-8') as f: for content in content_list: f.write(content['category'] + ',' + content['city']+ ','+ content['id'] + ',' + content['kind'] + ','+ content['placeOfOrigin'] + ',' + content['price'] + ','+ content['specification'] + ','+ content['totalSalesVolume'] + ','+ content['totalTurnover'] + ','+ content['data']+ '\n')
3.数据展示
标签:orderBy,ids,get,python,嘉兴,爬虫,dic,content,url From: https://www.cnblogs.com/icekele/p/17369285.html