首页 > 其他分享 >mohurd

mohurd

时间:2023-07-30 15:55:39浏览次数:31  
标签:NAME QY self js mohurd total data

import json
import subprocess
from functools import partial

import requests

subprocess.Popen = partial(subprocess.Popen,encodings = 'utf-8')
import execjs
# from mySav


class JZSC:
    def __init__(self,total_page):
        self.url = 'https://jzsc.mohurd.gov.cn/APi/webApi/dataservice/query/comp/list'
        self.encrypt_data = ''
        self.dec_data = ''
        self.total_page = total_page

    def get_encrypt_data(self,pg):
        params = {
            'pg':pg,
            'pgsz':15,
            'total':450
        }
        headers = {
            'Referer':'https: // jzsc.mohurd.gov.cn / data / company',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
        }

        self.encrypt_data = requests.get(self.url,headers = headers,params=params).text


    # 解密数据
    def js_decrypt_data(self):
        # 调用js代码进行解密数据
        with open('tuitui.js','r',encoding='utf-8') as f:
            js_obj = execjs.compile(f.read())
        self.dec_data = js_obj.call("h",self.encrypt_data)


    def parse_save_data(self):
        # 对解密的数据进行提取并保存
        # 如果用户输入页码过大,报错,捕获错误
        try:
            # 转换为字典对象
            json_data = json.loads(self.dec_data)
            # 获取到数据
            data_list = json_data['data']['list']
            # 获取到数据
            tb_header = ['统一社会信用代码','企业名称','企业法定代表人','企业注册属地']
            for data in data_list:
                # 统一社会信用代码
                QY_ORG_CODE = data['QY_ORG_CODE'] + '\t' if data['QY_ORG_CODE'] else None
                # 企业名称
                QY_NAME = data['QY_NAME'] if data['QY_NAME'] else None
                try:
                    QY_FR_NAME = data['QY_FR_NAME'] if data['QY_FR_NAME'] else None
                except:
                    QY_FR_NAME = None

                QY_REGION_NAME = data['QY_REGION_NAME'] if data['QY_REGION_NAME'] else None
                save_list = [QY_ORG_CODE,QY_NAME,QY_FR_NAME,QY_REGION_NAME]


            # 把据保存到json文件中

            return False
        except:
            print("hhhhh,结束了")
            return True



    def run(self):
        for i in range(self.total_page+1):
            # 请求得到加密后的数据
            self.get_encrypt_data(i)
            # 调用js代码进行解密数据
            self.js_decrypt_data()
            # 对解密的数据进行提取并保存
            finish = self.parse_save_data()

            # 如果finish返回Tre:说明爬取完毕,退出循环
            if finish:
                return

            print(f'恭喜,第{i + 1} 页数据爬取完毕!!!')


# https://jzsc.mohurd.gov.cn/data/company

if __name__ == '__main__':
    total_page = int(input(">>>请输入要爬取的页码总数:"))
    JZSC(total_page).run()

  

标签:NAME,QY,self,js,mohurd,total,data
From: https://www.cnblogs.com/xingmeng63/p/17591545.html

相关文章