# Copyright Todd Korody 2007
# Licensed under the GPLv2
# http://www.gnu.org/copyleft/gpl.html

import stat, sys, os, glob, shutil, string, Numeric, fnmatch
from stat import *
from mutagen import mp3
from mutagen import oggvorbis
from mutagen.oggflac import OggFLAC
from mutagen.flac import FLAC, FLACNoHeaderError

class DMMCore:
	"Main class to find media duplicates"
	def __init__(self, output):
		self.RATE = .15 # Lower the RATE the less matches will be returned
		self.files = {}
		self.x = -1
		self.count_of_files = 0
		self.recursive = False  # By default no recursion
		self.list_of_directories = []
		self.count = False
		self.filesize = False
		self.output = output
		
	def set_recursive(self,switch):
		self.recursive = switch

	def set_rate(self,switch):
		self.RATE = switch

	def set_count(self,switch):
		self.count = switch

	def set_filesize(self,switch):
		self.filesize = switch

	def add_dir(self,directory):
		self.list_of_directories.append(directory)

	def count_files(self):
		count_of_files = 0

		for arg in self.list_of_directories:
			for file in GlobDirectoryWalker(arg, "*.mp3", self.recursive):
				count_of_files = count_of_files + 1	
			for file in GlobDirectoryWalker(arg, "*.ogg", self.recursive):
				count_of_files = count_of_files + 1
			for file in GlobDirectoryWalker(arg, "*.flac", self.recursive):
				count_of_files = count_of_files + 1
		
		return count_of_files		

	def load_matrix(self,extension):
		for arg in self.list_of_directories:
			for file in GlobDirectoryWalker(arg, extension, self.recursive):
				self.x = self.x + 1
				self.files[self.x] = file
				file = os.path.split(file)[1]
				# This could replace stuff it wasn't meant to but most
				# likely it shouldn't have been there in the first place
				file = file.replace('.mp3','')
				file = file.replace('.ogg','')
				file = file.replace('.flac','')
				param_open = False
				for letter in file:
					if (letter == '('):
						param_open = True
					if (letter == ')'):
						param_open = False
					if (letter.isalpha() and not param_open):
						letter = letter.upper()
						value = (ord(letter) - 65)
						self.matrix[self.x,value] = self.matrix[self.x,value] + 1
		
	def format_output(self,dup_file,size):
		if (dup_file[-3:] == 'mp3'):
			try:
				info = mp3.MP3(dup_file)
				bitrate1 = info.info.bitrate
				length1 = int(info.info.length)
			except:
				bitrate1 = 0
				length1 = 0
			self.output.write(dup_file,bitrate1,length1,size)
		elif (dup_file[-3:] == 'ogg'):
			try:
				info = oggvorbis.OggVorbis(dup_file)
				bitrate1 = info.info.bitrate
				length1 = int(info.info.length)
			except:
				bitrate1 = 0
				length1 = 0
			self.output.write(dup_file,bitrate1,length1,size)
		elif (dup_file[-4:] == 'flac'):
			try:
				try:
					audio = FLAC(dup_file)
					length1 = int(audio.info.length)
				except FLACNoHeaderError:
					try:
						audio = OggFLAC(dup_file)
						length1 = int(audio.info.length)
					except:
						length1 = 0
			except:
				# the nested is a bit much...
				length1 = 0
			bitrate1 = 0
			self.output.write(dup_file,bitrate1,length1,size)

	def run(self,output):
		self.matrix = Numeric.zeros([self.count_files(),36], Numeric.Int)
		self.load_matrix("*.mp3")
		self.load_matrix("*.ogg")
		self.load_matrix("*.flac")

		for i in range(self.x+1):
			for ii in range(i+1, self.x+1):
				hit = 0
				miss = 0
				for alpha in range(26):
					if ((self.count == True) and (self.matrix[i,alpha] == 0) and  (self.matrix[ii,alpha]) == 0):
						continue
					if (self.matrix[i,alpha] == self.matrix[ii,alpha]):
						hit = hit + 1
					else:
						miss = miss + 1

				if (hit != 0):  # // divide by zero error
					if ((float(miss) / float(hit)) < self.RATE):
						file1 = os.stat(self.files[i])[stat.ST_SIZE]
						file2 = os.stat(self.files[ii])[stat.ST_SIZE]
						# If wanted ensure files are within a certain size of each other
						if ((self.filesize == True) and (file1 != 0) and (file2 !=0)):
							diff = abs(file2 - file1) / ((.5 * (file2 + file1)))
							if (diff > .1):
								continue

						self.format_output(self.files[i],file1)
						self.format_output(self.files[ii],file2)
						self.output.write_seperator()



class GlobDirectoryWalker:  # This class is NOT my code if it is in violation tell me and it will be removed
    # a forward iterator that traverses a directory tree
    def __init__(self, directory, pattern="*", recursive=False):
        self.stack = [directory]
        self.pattern = pattern
        self.files = []
        self.index = 0
        self.recursive = recursive
    def __getitem__(self, index):
        while 1:
            try:
                file = self.files[self.index]
                self.index = self.index + 1
            except IndexError:
                # pop next directory from stack
                self.directory = self.stack.pop()
                self.files = os.listdir(self.directory)
                self.index = 0
            else:
                # got a filename
                fullname = os.path.join(self.directory, file)
		if (self.recursive == True):
			if os.path.isdir(fullname) and not os.path.islink(fullname):
				self.stack.append(fullname)
                if fnmatch.fnmatch(file, self.pattern):
                    return fullname