使用openpyxl库实在是太慢,使用openpyxl的只读模式又需要excel的准确尺寸信息,但是每个excel的尺寸都不一样。
我使用zipfile解压excel文件,直接读取xml文件进行合并:
import os from openpyxl import load_workbook from openpyxl import Workbook import zipfile import re total_row_number=0 #当此值等于0时,需要写入第一行列头 def 获取行数和列数(path:str)->tuple: ''' 返回excel中的第一个sheet页中的数据、第一个sheet的总行数和总列数 ''' zip_file= zipfile.ZipFile(path) zip_list = zip_file.namelist() # 得到压缩包里所有文件 for zip_path in zip_list: if('xl/worksheets/sheet1.xml' == zip_path): filepath = zip_file.extract(zip_path) lines=None row_number=0 col_number=0 result=[] #形成二维列表 monitor_typeName = os.path.basename(path).split('-')[0] with open(filepath,'r',encoding='utf-8') as in_fd: lines = in_fd.readlines() for line in lines: if('<row r=' == line[:7]): #行 row_number+=1 elif('<c r=' == line[:5]): #列 regex_str= r'<c.+?/c>' col_list = re.findall(regex_str,line) col_number=max(col_number,len(col_list)) # t=[re.findall(r'(?<=<t>).+?(?=</t>)|(?<=<v>).+?(?=</v>)',col)[0] for col in col_list] t=[re.sub(r'<.+?>',"",col) for col in col_list] result.append([monitor_typeName]+t) result[0][0]='监测类型' #修正下列头 os.remove(filepath) zip_file.close() return (result,row_number,col_number); basic_path ='D:\\BaiduSyncdisk\\temp\\2024年3月4日\\导出所有类型的变电台账数据\\' files = [f for f in os.listdir(basic_path) if f.endswith(".xlsx")] def 写入到excel中()->None: #速度太慢了。 global files,basic_path out_workbook = Workbook(write_only=True) out_sheet=out_workbook.create_sheet() for file in files: path="{}{}".format(basic_path,file) if('明细合并结果' in path):continue data_2list,row_number,col_number=获取行数和列数(path) if(0!=total_row_number): data_2list=data_2list[1:] #去掉猎头 for line in data_2list: out_sheet.append(line) total_row_number+=row_number print("{} 以及合并完成。".format(file)) out_workbook.save('{}{}'.format(basic_path,"明细合并结果.xlsx")) #只写模式只能保存一次 out_workbook.close(); def 写入到csv文件中(): global files,basic_path,total_row_number with open("{}明细合并结果.csv".format(basic_path),'w',encoding='utf-8') as out_fd: for file in files: path="{}{}".format(basic_path,file) if('明细合并结果' in path):continue data_2list,row_number,col_number=获取行数和列数(path) if(0!=total_row_number): data_2list=data_2list[1:] #去掉猎头 str_list=['\t'.join(data) for data in data_2list] out_str='\n'.join(str_list) print(out_str,file=out_fd) total_row_number+=row_number print("{} 以及合并完成。".format(file)); if __name__ == "__main__": 写入到csv文件中()
标签:python,Excel,合并,number,file,path,out,col,row From: https://www.cnblogs.com/love-DanDan/p/18056230