- 相同文件的名字和類型可能不同, 但是md5值相同
- 使用shell獲取文件大小和md5值
- 效率并不好, 7000張照片用了接近300s, 求大神給點(diǎn)建議和方法
import os
import re
def _withPopen(commands):
return os.popen(commands)
def _getMD5(string):
items = string.split("\n")
dictionary = {}
repeats = set()
patter = ".*\((.*)\) = (\\w+)"
reg = re.compile(patter)
for item in items:
if(reg.match(item)):
key = reg.sub("\\1",item)
value = reg.sub("\\2",item)
if(dictionary.has_key(value)):
repeats.add(value)
dictionary.setdefault(value,[]).append(key)
rep = {}
for item in repeats:
rep[item]=dictionary[item]
return rep
path = "input your path"
command = "ls -psS "+path + " | grep \"^\ *[1-9]\""
results = _withPopen(command).read()
items = results.split("\n")
dictionary = {}
repeats = set()
for item in items:
if(re.match(" *\\d+ .+",item)):
s = item.strip().split(" ",1)
value = s[0]
key = s[1]
if(dictionary.has_key(value)):
repeats.add(value)
dictionary.setdefault(value,[]).append(key)
aaa = []
for key,value in dictionary.items():
if(len(value)>1):
aaa.append(value)
for item in aaa:
string = ""
for i in item:
string += "\""+path+i+"\" "
result = os.popen("md5 "+string).read()
print _getMD5(result)
最后編輯于 :
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請(qǐng)聯(lián)系作者