Music Playlist Deduplicator

import os

import hashlib

import tkinter as tk

from tkinter import ttk, filedialog, messagebox

from tinytag import TinyTag


SUPPORTED_EXT = (".mp3", ".wav", ".flac", ".m4a")



# --------------------------------------------------

# Helpers

# --------------------------------------------------

def file_hash(path, block_size=65536):

    """Compute SHA256 hash of a file."""

    h = hashlib.sha256()

    with open(path, "rb") as f:

        for block in iter(lambda: f.read(block_size), b""):

            h.update(block)

    return h.hexdigest()



def get_metadata(path):

    """Extract audio metadata."""

    try:

        tag = TinyTag.get(path)

        return {

            "title": (tag.title or "").strip().lower(),

            "artist": (tag.artist or "").strip().lower(),

            "duration": round(tag.duration or 0, 1)

        }

    except Exception:

        return {"title": "", "artist": "", "duration": 0}



# --------------------------------------------------

# Main App

# --------------------------------------------------

class MusicDeduplicator:

    def __init__(self, root):

        self.root = root

        self.root.title("Music Playlist Deduplicator")

        self.root.geometry("900x520")


        self.files = []

        self.duplicates = []


        self.build_ui()


    # ---------------- UI ----------------

    def build_ui(self):

        top = ttk.Frame(self.root)

        top.pack(fill="x", padx=10, pady=10)


        ttk.Button(top, text="Select Music Folder", command=self.select_folder).pack(side="left")

        ttk.Button(top, text="Scan for Duplicates", command=self.scan).pack(side="left", padx=10)


        self.status = ttk.Label(top, text="No folder selected")

        self.status.pack(side="left", padx=10)


        self.tree = ttk.Treeview(

            self.root,

            columns=("method", "file1", "file2"),

            show="headings"

        )

        self.tree.heading("method", text="Duplicate Type")

        self.tree.heading("file1", text="Song A")

        self.tree.heading("file2", text="Song B")


        self.tree.column("method", width=140)

        self.tree.column("file1", width=360)

        self.tree.column("file2", width=360)


        self.tree.pack(fill="both", expand=True, padx=10, pady=10)


    # ---------------- Logic ----------------

    def select_folder(self):

        self.folder = filedialog.askdirectory()

        if self.folder:

            self.status.config(text=self.folder)


    def scan(self):

        if not hasattr(self, "folder"):

            messagebox.showerror("Error", "Select a music folder first!")

            return


        self.tree.delete(*self.tree.get_children())

        self.files = []

        self.duplicates = []


        for root, _, files in os.walk(self.folder):

            for f in files:

                if f.lower().endswith(SUPPORTED_EXT):

                    path = os.path.join(root, f)

                    self.files.append(path)


        self.find_duplicates()

        self.display_results()


    def find_duplicates(self):

        # --- 1. Exact file hash duplicates ---

        hash_map = {}

        for path in self.files:

            try:

                h = file_hash(path)

                if h in hash_map:

                    self.duplicates.append(("Exact File", hash_map[h], path))

                else:

                    hash_map[h] = path

            except Exception:

                pass


        # --- 2. Metadata duplicates ---

        meta_map = {}

        for path in self.files:

            meta = get_metadata(path)

            key = (meta["title"], meta["artist"], meta["duration"])


            if key != ("", "", 0):

                if key in meta_map:

                    self.duplicates.append(("Metadata Match", meta_map[key], path))

                else:

                    meta_map[key] = path


    def display_results(self):

        if not self.duplicates:

            messagebox.showinfo("Result", "No duplicate songs found!")

            return


        for method, f1, f2 in self.duplicates:

            self.tree.insert("", "end", values=(method, f1, f2))


        messagebox.showinfo(

            "Result",

            f"Found {len(self.duplicates)} duplicate pairs."

        )



# --------------------------------------------------

# RUN

# --------------------------------------------------

if __name__ == "__main__":

    root = tk.Tk()

    app = MusicDeduplicator(root)

    root.mainloop()


No comments: