##Code Description: Statistics of attendance records(基于从打卡机导出的Excel表格,统计每个人每天的打卡时长,以及总打卡时长,将结果放入一个新的Excel表中)
##Author: RuichaoMao
##Date: 4-16-2024
import pandas as pd
from openpyxl import load_workbook
# 读取aaa.xlsx文件中的所有sheet
xls = pd.ExcelFile('attendance.xls')
sheet_names = xls.sheet_names
# 将处理后的所有sheet写入新的Excel文件
writer = pd.ExcelWriter('output.xlsx', engine='openpyxl')
# 遍历每个sheet进行处理
for sheet_name in sheet_names:
# 读取sheet数据
df = pd.read_excel(xls, sheet_name)
# 对于前两个sheet不做任何操作
if sheet_name in sheet_names[:2]:
# 写入到新的Excel文件中
df.to_excel(writer, sheet_name=sheet_name, index=False)
# 对于后面的sheet进行处理
#if sheet_name != sheet_names[0] and sheet_name != sheet_names[1]:
#if sheet_name == sheet_names[5]:
else:
for column in [1,16,31]:
for row in range(11,25):
#column = 16
# 提取时间字符串
start_time_str = str(df.iloc[row, column])
end_time_str = str(df.iloc[row, column+2])
print (df.iloc[row, 1], df.iloc[row, column+2])
# 根据时间字符串的长度判断时间格式
print (len(start_time_str),len(end_time_str))
if len(start_time_str) == 5:
pass
elif len(start_time_str) == 8: # 格式为小时:分钟:秒
# 去掉秒,重新构造时间字符串
start_time_str = start_time_str[:5]
else:
pass
#raise ValueError("Unsupported time format")
if len(end_time_str) == 5:
pass
elif len(end_time_str) == 8: # 格式为小时:分钟:秒
# 去掉秒,重新构造时间字符串
end_time_str = end_time_str[:5]
else:
pass
#raise ValueError("Unsupported time format")
if len(end_time_str) == 5 and len(end_time_str) == 5:
# 将时间字符串转换为 datetime 对象
start_time = pd.to_datetime(start_time_str, format='%H:%M')
end_time = pd.to_datetime(end_time_str, format='%H:%M')
print (start_time, end_time)
# 计算时间差并转换成小时和分钟
time_diff = end_time - start_time
hours = int(time_diff.seconds // 3600)
minutes = int((time_diff.seconds % 3600) // 60)
# 将结果放入第12行第13列
df.iloc[row, column+11] = f"{hours}小时{minutes}分钟"
print (df.iloc[row, column+11])
else:
df.iloc[row, column+11] = f"无法计算"
print (df.iloc[row, column+11])
#total time
times = df.iloc[11:26,column+11].tolist()
print (times)
#df.iloc[10, column+11] = f"Total:"
# 初始化总分钟数
total_minutes = 0
# 遍历每个时间并将其转换为分钟并相加
for time_str in times:
if isinstance(time_str, str):
if '小时' in time_str and '分钟' in time_str:
print (time_str)
#hours, minutes = map(int, time_str.split('小时')[0:2])
hours = int(time_str.split('小时')[0])
minutes = int(time_str.split('小时')[1].split('分钟')[0])
total_minutes += hours * 60 + minutes
#elif '小时' in time_str:
#hours = int(time_str.split('小时')[0])
#total_minutes += hours * 60
#elif '分钟' in time_str:
#minutes = int(time_str.split('分钟')[0])
#total_minutes += minutes
# 将总分钟数转换为小时和分钟
hours = total_minutes // 60
minutes = total_minutes % 60
df.iloc[10, column+11] = f"Total:{hours}小时{minutes}分钟"
# 写入到新的Excel文件中
df.to_excel(writer, sheet_name=sheet_name, index=False)
book = writer.book
book.save('output.xlsx')
# 保存并关闭新的Excel文件
writer.close()