原文连接:https://blog.csdn.net/babytiger/article/details/90293866
# /usr/bin/env python
# -*- coding:utf-8 -*-
# 运行的代码文件要放到删除重复的文件或图片所包含的目录中
import os
import hashlib
base = ("D:\\working\\Python\\readHeadFileTraining\\soldierDetection\\data\\download") # 这里就是你要清缴的文件们了
def filecount():
filecount = int(os.popen('dir '+base+'/B |find /V /C ""').read())
return (filecount)
def md5sum(filename):
f = open(filename, 'rb')
md5 = hashlib.md5()
fb = f.read()
md5.update(str(fb).encode('utf-8'))
f.close()
return (md5.hexdigest())
def delfile():
all_md5 = {}
filedir = os.walk(base)
deletefileID=1
for i in filedir:
for tlie in i[2]:
if md5sum(base+'\\'+ tlie) in all_md5.values():
os.remove(base+'\\'+tlie)
print(deletefileID,'will delete '+base+'\\'+tlie)
deletefileID=deletefileID+1
else:
all_md5[tlie] = md5sum(base+'\\'+ tlie)
if __name__ == '__main__':
oldf = filecount()
print('去重前有', oldf, '个文件\n\n\n请稍等正在删除重复文件...')
delfile()
print('\n\n去重后剩', filecount(), '个文件')
print('\n\n一共删除了', oldf - filecount(), '个文件\n\n')
标签:__,python,tlie,base,print,转载,filecount,md5 From: https://www.cnblogs.com/xianzhewujie/p/16751286.html