#!/usr/bin/python # -*- coding: utf-8 -*- import os, stat, random, hashlib mainpath = "/home/user1/Documents" scanpath = "/media/recovery/user1/Documents" def md5sumfile(filename): m = hashlib.md5() try: f = file(filename, 'rb') while True: d = f.read(8096) if not d: break m.update(d) except: return random.random() f.close() return m.hexdigest() def walkdir(toplevel, dirname = ""): if dirname.find("/.svn/") > -1: return entries = os.listdir(os.path.join(toplevel, dirname)) files = [] has_subdir = False for entry in entries: try: st = os.lstat(os.path.join(toplevel, dirname, entry)) except os.error: continue if stat.S_ISDIR(st.st_mode): walkdir(toplevel, os.path.join(dirname, entry)) has_subdir = True elif stat.S_ISREG(st.st_mode): files.append(os.path.join(dirname, entry)) #print "Checking %i files in %r" % (len(files), dirname) if len(files) == 0 and not has_subdir: print "Empty directory %r" % dirname for file in files: #print "Test: %r" % file filename1 = os.path.join(mainpath, file) filename2 = os.path.join(toplevel, file) if os.path.exists(filename1): #test for equal size size1 = os.path.getsize(filename1) size2 = os.path.getsize(filename2) if size1 == size2: #test for equal checksum md51 = md5sumfile(filename1) md52 = md5sumfile(filename2) if md51 == md52 : print "Removing %s" % filename2 try: os.remove(filename2) pass except os.error: continue else: print "File %r exists, but has different content" % file else: print "File %r exists, but has different content" % file walkdir(scanpath)