Python實現(xiàn)識別文件夾以及子文件夾中的重復(fù)文件厘肮,使用前需要注意幾點:
1泡徙、python使用python3
2径筏、確保代碼中的folder_path路徑正確
3该肴、確保代碼中的output_file路徑正確且存在
4情竹、確保有訪問文件夾的權(quán)限,不行就使用sudo
一、python3版本
import os
import hashlib
def calculate_md5(file_path):
"""計算文件的MD5值"""
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def find_duplicate_images(folder_path):
"""查找文件夾中的重復(fù)圖片"""
md5_dict = {}
for root, _, files in os.walk(folder_path):
for file in files:
if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.apk')):
file_path = os.path.join(root, file)
file_md5 = calculate_md5(file_path)
if file_md5 in md5_dict:
md5_dict[file_md5].append(file_path)
else:
md5_dict[file_md5] = [file_path]
return {k: v for k, v in md5_dict.items() if len(v) > 1}
def save_to_txt(md5_dict, output_file):
"""將重復(fù)圖片信息保存到txt文件中"""
with open(output_file, "w") as f:
for md5, files in md5_dict.items():
f.write(f"{md5}:\n")
for file in files:
f.write(f"{file}\n")
f.write("\n")
if __name__ == "__main__":
folder_path = "/home/ubuntu/Downloads" # 替換為你的文件夾路徑
output_file = "duplicate_files.txt"
duplicates = find_duplicate_images(folder_path)
save_to_txt(duplicates, output_file)
print(f"結(jié)果已保存到 {output_file}")
二秦效、python2版本:
# -*- coding: utf-8 -*-
import os
import hashlib
def calculate_md5(file_path):
#計算文件的MD5值
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
while True:
chunk = f.read(4096)
if not chunk:
break
hash_md5.update(chunk)
return hash_md5.hexdigest()
def find_duplicate_images(folder_path):
#查找文件夾中的重復(fù)圖片
md5_dict = {}
for root, _, files in os.walk(folder_path):
for file in files:
if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.apk')):
file_path = os.path.join(root, file)
file_md5 = calculate_md5(file_path)
if file_md5 in md5_dict:
md5_dict[file_md5].append(file_path)
else:
md5_dict[file_md5] = [file_path]
return dict((k, v) for k, v in md5_dict.items() if len(v) > 1)
def save_to_txt(md5_dict, output_file):
#將重復(fù)圖片信息保存到txt文件中
with open(output_file, "w") as f:
for md5, files in md5_dict.items():
f.write("{}:\n".format(md5))
for file in files:
f.write("{}\n".format(file))
f.write("\n")
if __name__ == "__main__":
folder_path = "/home/ubuntu/Downloads" # 替換為你的文件夾路徑
output_file = "duplicate_files.txt"
duplicates = find_duplicate_images(folder_path)
save_to_txt(duplicates, output_file)
print "結(jié)果已保存到 {}".format(output_file)