from openpyxl import Workbook import pandas as pd import numpy as np import sys,time,re,csv path="f:/te/qh.csv" path1="F:/BaiduNetdiskDownload\行政许可/行政许可/行政许可.csv" ##num_rows = sum(1 for row in open(path,encoding="utf-8")) ##num_rows1 = sum(1 for row in open(path1,encoding="utf-8")) chunksize = 10000 chunk_pointer = 0 tt="f:/te/qhv1.xlsx" writer = pd. ExcelWriter(tt, engine= 'openpyxl') # 循环读取 CSV 文件的每个块 def read_csv_feature(filePath): # 读取文件 f = open(filePath, encoding='utf-8') reader = pd.read_csv(f, sep=',', iterator=True,low_memory=False) loop = True chunkSize = 100000 chunks = [] while loop: try: chunk = reader.get_chunk(chunkSize) chunks.append(chunk) except StopIteration: loop = False print('Iteration is END!!!') df = pd.concat(chunks, axis=0, ignore_index=True) f.close() return df f = open(path1, encoding='utf-8') cxx=['company_id','unified_code','ent_name','reg_capital','real_capital','reg_no','legal_person','open_status','old_ent_name','industry','tax_no','license_number','org_no', 'authority','annual_date','start_date','ent_type','open_time','district','district_code','reg_addr','scope','state','create_time','update_time','数据来源'] reader = pd.read_csv(f, sep=',', iterator=True,low_memory=False) f1 = open(path, encoding='utf-8') reader1 = pd.read_csv(f1, sep=',', iterator=True,low_memory=False,names=cxx) loop = loop1=True chunkSize = 5000 chunks =[] chunks1=[] ab=0 tff=0 while loop: ab=ab+1 try: chunk = reader.get_chunk(120000) chunks.append(chunk) except: loop=False df = pd.concat(chunks, axis=0, ignore_index=True) df.drop(columns=['state','create_time','update_time','数据来源']) print(df) df.to_csv("f:/te/qinghai"+str(ab)+".csv")
标签:分割,csv,PYTHON,True,chunk,df,pd,CSV,open From: https://www.cnblogs.com/xkdn/p/17643763.html