合并Excel文件
需求:
- 把多个Excel文件合并到一个Excel文件的不同表格中。且需要合并的文件前后缀一致。
- 对合并完成的文件中每张表指定列找出最大值标红
XXX表示需要自己填写的内容
import os
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import Font
def merge_excel_files_0(input_folder_path, output_file_path):
excel_files = [file for file in os.listdir(input_folder_path) if file.startswith('XXX') and file.endswith('.xlsx')]
# 创建一个空的DataFrame用于存储合并后的数据
merged_data = pd.DataFrame()
# 循环读取每个excel文件,并将数据合并到merged_data中
for file in excel_files:
file_path = os.path.join(input_folder_path, file)
df = pd.read_excel(file_path)
merged_data = pd.concat([merged_data, df], ignore_index=True)
# 将合并后的数据保存到新的excel文件中
merged_data.to_excel(output_file_path, sheet_name='hp_all', index=False)
def call_0():
# 指定 输入文件夹路径 和 输出文件
input_folder_path = "XXX"
output_file_path = "XXX"
merge_excel_files_0(input_folder_path, output_file_path)
def merge_excel_files(input_file_path, output_file_path, sheet_name, start_with_str):
# ####
# 将多张表格合并,且有规律的命名
# 需要提前新建一个有内容的表格作为输入(即call_0())
# ####
file_list = os.listdir(input_file_path)
with pd.ExcelWriter(output_file_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
merge_data = pd.DataFrame()
for file_name in file_list:
if file_name.startswith(start_with_str) and file_name.endswith('.xlsx'):
file_path = os.path.join(input_file_path, file_name)
print(file_path)
df = pd.read_excel(file_path)
merge_data = pd.concat([merge_data, df], ignore_index=True)
merge_data.to_excel(writer, sheet_name=sheet_name, index=False)
def call1():
root_dir = 'XXX'
input_dirs = ['XXX', 'XXX', 'XXX', 'XXX']
output_file_path = 'XXX'
start_with_strs = [['XXX','XXX'],
['XXX', 'XXX'],
['XXX','XXX']]
for start_with_str,sheet in start_with_strs:
for input_dir in input_dirs:
input_file_path = os.path.join(root_dir, input_dir, 'result')
print(input_file_path)
sheet_name = input_dir + '_' +sheet
merge_excel_files(input_file_path, output_file_path, sheet_name, start_with_str)
def highlight_max_value(input_file_path, columns_to_highlight):
# ####
# 对于输入的excel文件中每张表找出指定的若干列中最大值并标红
# ####
wb = load_workbook(input_file_path)
# 遍历每个表
for sheet_name in wb.sheetnames:
sheet = wb[sheet_name]
# 遍历指定的列
for column in columns_to_highlight:
max_value = float('-inf') # 初始最大值设为负无穷,原数据可能存在空值
max_cell = None
# 找到列中的最大值及其单元格
for row in range(2, sheet.max_row + 1): # 从第二行开始遍历,跳过标题行
cell_value = sheet[f'{column}{row}'].value
if cell_value is not None and cell_value > max_value:
max_value = cell_value
max_cell = sheet[f'{column}{row}']
# 将最大值的单元格标记为加粗和红色
if max_cell is not None:
max_cell.font = Font(color="FF0000", bold=True)
wb.save(input_file_path)
def call_2():
# 指定要处理的excel文件路径 和 要 加粗标红 的列
input_file_path = "XXX"
columns_to_highlight = ['XXX', 'XXX']
highlight_max_value(input_file_path, columns_to_highlight)
if __name__ == '__main__':
call_2()
标签:文件,sheet,Excel,XXX,合并,excel,file,input,path
From: https://www.cnblogs.com/benbenlzw/p/18054240