import pandas as pd
import os
import tkinter as tk
from tkinter import filedialog
# 创建tkinter窗口
root = tk.Tk()
root.withdraw() # 隐藏主窗口
def get_desk_p():
return os.path.join(os.path.expanduser('~'), "Desktop")
print(get_desk_p())
# 弹出对话框选择文件夹
folder_path = filedialog.askdirectory()
# 处理选择的文件夹
if folder_path:
print("选择的文件夹路径:(提取合同编号用)", folder_path)
# 进行其他操作...
else:
print("未选择文件夹!")
# 定义可以word
keyword1 = "需方:"
keyword2 = "合同编号"
# keyword3 = "合计"
# 创建一个空表
result_df = pd.DataFrame(columns=['A', 'B', 'C'])
# 遍历文件夹中的文件
for filename in os.listdir(folder_path):
if filename.endswith('.xlsx') or filename.endswith('.xls'):
file_path = os.path.join(folder_path, filename)
# 将每个文件读取为 DataFrame
df = pd.read_excel(file_path)
# filename_cell=df(filename)
# Extract cells containing keyword1
keyword1_cells = df.stack().str.contains(keyword1, na=False)
keyword1_values = df.stack()[keyword1_cells].values
keyword1_values = keyword1_values.astype(str)
# Extract cells containing keyword2
keyword2_cells = df.stack().str.contains(keyword2, na=False)
keyword2_values = df.stack()[keyword2_cells].values
keyword2_values = keyword2_values.astype(str)
# keyword3_cells = df.stack().str.contains(keyword3, na=False)
# keyword3_next_values = df.stack()[keyword3_cells].values
# keyword3_next_values = keyword3_next_values.astype(str)
# 将值追加到结果DataFrame中
result_df = result_df._append(
{'A': ', '.join(keyword1_values.tolist()), 'C': ', '.join(keyword2_values.tolist()),
'B': filename.replace('.xls', '')}, ignore_index=True)
keywords = ['需方:', '合同编号:']
for keyword in keywords:
result_df.replace(keyword, '', regex=True, inplace=True)
# 将结果 DataFrame 保存为新表格
# result_df.to_excel('C:/Users/Administrator/Desktop/2222.xlsx',index=False)
result_df.to_excel(get_desk_p() + '/合同222.xlsx', index=False)
print('完成', folder_path, '的提取,在桌面合同222表格里')
标签:提取,df,cells,values,合同,path,keyword2,台账,keyword1
From: https://blog.51cto.com/u_16240224/7274421