andrey@evilhorse ~ $ cat md5er.py
# encoding: utf-8
import os
import hashlib
import sys
block_size = 1048576
res = {}
for root, dirs, files in os.walk("АХТУНГ!!!"):
for filename in files:
full_path = os.path.join(root, filename)
print "Processing %s: " % full_path,
sys.stdout.flush()
m = hashlib.md5()
f = file(full_path)
i = 0
while True:
s = f.read(block_size)
if len(s) == 0: break
m.update(s)
s = ''
i += 1
if i % 25 == 0:
print "%dM " % (i),
sys.stdout.flush()
print "done"
f.close()
if res.has_key(m.digest()): res[m.digest()].append(full_path)
else: res[m.digest()] = [full_path]
for digest, value in res.iteritems():
if len(value) > 1:
print u'saem filez: %s' % str(value)
andrey@evilhorse ~ $
где-то вот так