import json import subprocess from functools import partial import requests subprocess.Popen = partial(subprocess.Popen,encodings = 'utf-8') import execjs # from mySav class JZSC: def __init__(self,total_page): self.url = 'https://jzsc.mohurd.gov.cn/APi/webApi/dataservice/query/comp/list' self.encrypt_data = '' self.dec_data = '' self.total_page = total_page def get_encrypt_data(self,pg): params = { 'pg':pg, 'pgsz':15, 'total':450 } headers = { 'Referer':'https: // jzsc.mohurd.gov.cn / data / company', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36' } self.encrypt_data = requests.get(self.url,headers = headers,params=params).text # 解密数据 def js_decrypt_data(self): # 调用js代码进行解密数据 with open('tuitui.js','r',encoding='utf-8') as f: js_obj = execjs.compile(f.read()) self.dec_data = js_obj.call("h",self.encrypt_data) def parse_save_data(self): # 对解密的数据进行提取并保存 # 如果用户输入页码过大,报错,捕获错误 try: # 转换为字典对象 json_data = json.loads(self.dec_data) # 获取到数据 data_list = json_data['data']['list'] # 获取到数据 tb_header = ['统一社会信用代码','企业名称','企业法定代表人','企业注册属地'] for data in data_list: # 统一社会信用代码 QY_ORG_CODE = data['QY_ORG_CODE'] + '\t' if data['QY_ORG_CODE'] else None # 企业名称 QY_NAME = data['QY_NAME'] if data['QY_NAME'] else None try: QY_FR_NAME = data['QY_FR_NAME'] if data['QY_FR_NAME'] else None except: QY_FR_NAME = None QY_REGION_NAME = data['QY_REGION_NAME'] if data['QY_REGION_NAME'] else None save_list = [QY_ORG_CODE,QY_NAME,QY_FR_NAME,QY_REGION_NAME] # 把据保存到json文件中 return False except: print("hhhhh,结束了") return True def run(self): for i in range(self.total_page+1): # 请求得到加密后的数据 self.get_encrypt_data(i) # 调用js代码进行解密数据 self.js_decrypt_data() # 对解密的数据进行提取并保存 finish = self.parse_save_data() # 如果finish返回Tre:说明爬取完毕,退出循环 if finish: return print(f'恭喜,第{i + 1} 页数据爬取完毕!!!') # https://jzsc.mohurd.gov.cn/data/company if __name__ == '__main__': total_page = int(input(">>>请输入要爬取的页码总数:")) JZSC(total_page).run()
标签:NAME,QY,self,js,mohurd,total,data From: https://www.cnblogs.com/xingmeng63/p/17591545.html