import os
import hashlib
import shutil
def file_hash(filepath):
"""计算文件的MD5哈希值"""
hash_md5 = hashlib.md5()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def remove_duplicates(root_dir):
"""删除指定目录及其子目录中的重复文件"""
hashes = {}
for dirpath, dirnames, filenames in os.walk(root_dir):
for filename in filenames:
filepath = os.path.join(dirpath, filename)
filehash = file_hash(filepath)
if filehash in hashes:
# 如果文件哈希已存在,删除这个重复文件
print(f"Deleting duplicate file: {filepath}")
os.remove(filepath)
else:
# 否则,添加文件哈希到字典
hashes[filehash] = filepath
# 使用示例
root_directory = os.getcwd()
remove_duplicates(root_directory)
标签:文件,遍历,hash,filepath,root,文件夹,和子,os,md5
From: https://www.cnblogs.com/redufa/p/18521429