# Copyright Todd Korody 2007
# Licensed under the GPLv2
# http://www.gnu.org/copyleft/gpl.html
import stat, sys, os, glob, shutil, string, Numeric, fnmatch
from stat import *
from mutagen import mp3
from mutagen import oggvorbis
from mutagen.oggflac import OggFLAC
from mutagen.flac import FLAC, FLACNoHeaderError
class DMMCore:
"Main class to find media duplicates"
def __init__(self, output):
self.RATE = .15 # Lower the RATE the less matches will be returned
self.files = {}
self.x = -1
self.count_of_files = 0
self.recursive = False # By default no recursion
self.list_of_directories = []
self.count = False
self.filesize = False
self.output = output
def set_recursive(self,switch):
self.recursive = switch
def set_rate(self,switch):
self.RATE = switch
def set_count(self,switch):
self.count = switch
def set_filesize(self,switch):
self.filesize = switch
def add_dir(self,directory):
self.list_of_directories.append(directory)
def count_files(self):
count_of_files = 0
for arg in self.list_of_directories:
for file in GlobDirectoryWalker(arg, "*.mp3", self.recursive):
count_of_files = count_of_files + 1
for file in GlobDirectoryWalker(arg, "*.ogg", self.recursive):
count_of_files = count_of_files + 1
for file in GlobDirectoryWalker(arg, "*.flac", self.recursive):
count_of_files = count_of_files + 1
return count_of_files
def load_matrix(self,extension):
for arg in self.list_of_directories:
for file in GlobDirectoryWalker(arg, extension, self.recursive):
self.x = self.x + 1
self.files[self.x] = file
file = os.path.split(file)[1]
# This could replace stuff it wasn't meant to but most
# likely it shouldn't have been there in the first place
file = file.replace('.mp3','')
file = file.replace('.ogg','')
file = file.replace('.flac','')
param_open = False
for letter in file:
if (letter == '('):
param_open = True
if (letter == ')'):
param_open = False
if (letter.isalpha() and not param_open):
letter = letter.upper()
value = (ord(letter) - 65)
self.matrix[self.x,value] = self.matrix[self.x,value] + 1
def format_output(self,dup_file,size):
if (dup_file[-3:] == 'mp3'):
try:
info = mp3.MP3(dup_file)
bitrate1 = info.info.bitrate
length1 = int(info.info.length)
except:
bitrate1 = 0
length1 = 0
self.output.write(dup_file,bitrate1,length1,size)
elif (dup_file[-3:] == 'ogg'):
try:
info = oggvorbis.OggVorbis(dup_file)
bitrate1 = info.info.bitrate
length1 = int(info.info.length)
except:
bitrate1 = 0
length1 = 0
self.output.write(dup_file,bitrate1,length1,size)
elif (dup_file[-4:] == 'flac'):
try:
try:
audio = FLAC(dup_file)
length1 = int(audio.info.length)
except FLACNoHeaderError:
try:
audio = OggFLAC(dup_file)
length1 = int(audio.info.length)
except:
length1 = 0
except:
# the nested is a bit much...
length1 = 0
bitrate1 = 0
self.output.write(dup_file,bitrate1,length1,size)
def run(self,output):
self.matrix = Numeric.zeros([self.count_files(),36], Numeric.Int)
self.load_matrix("*.mp3")
self.load_matrix("*.ogg")
self.load_matrix("*.flac")
for i in range(self.x+1):
for ii in range(i+1, self.x+1):
hit = 0
miss = 0
for alpha in range(26):
if ((self.count == True) and (self.matrix[i,alpha] == 0) and (self.matrix[ii,alpha]) == 0):
continue
if (self.matrix[i,alpha] == self.matrix[ii,alpha]):
hit = hit + 1
else:
miss = miss + 1
if (hit != 0): # // divide by zero error
if ((float(miss) / float(hit)) < self.RATE):
file1 = os.stat(self.files[i])[stat.ST_SIZE]
file2 = os.stat(self.files[ii])[stat.ST_SIZE]
# If wanted ensure files are within a certain size of each other
if ((self.filesize == True) and (file1 != 0) and (file2 !=0)):
diff = abs(file2 - file1) / ((.5 * (file2 + file1)))
if (diff > .1):
continue
self.format_output(self.files[i],file1)
self.format_output(self.files[ii],file2)
self.output.write_seperator()
class GlobDirectoryWalker: # This class is NOT my code if it is in violation tell me and it will be removed
# a forward iterator that traverses a directory tree
def __init__(self, directory, pattern="*", recursive=False):
self.stack = [directory]
self.pattern = pattern
self.files = []
self.index = 0
self.recursive = recursive
def __getitem__(self, index):
while 1:
try:
file = self.files[self.index]
self.index = self.index + 1
except IndexError:
# pop next directory from stack
self.directory = self.stack.pop()
self.files = os.listdir(self.directory)
self.index = 0
else:
# got a filename
fullname = os.path.join(self.directory, file)
if (self.recursive == True):
if os.path.isdir(fullname) and not os.path.islink(fullname):
self.stack.append(fullname)
if fnmatch.fnmatch(file, self.pattern):
return fullname