import os
import hashlib
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
from tinytag import TinyTag
SUPPORTED_EXT = (".mp3", ".wav", ".flac", ".m4a")
# --------------------------------------------------
# Helpers
# --------------------------------------------------
def file_hash(path, block_size=65536):
"""Compute SHA256 hash of a file."""
h = hashlib.sha256()
with open(path, "rb") as f:
for block in iter(lambda: f.read(block_size), b""):
h.update(block)
return h.hexdigest()
def get_metadata(path):
"""Extract audio metadata."""
try:
tag = TinyTag.get(path)
return {
"title": (tag.title or "").strip().lower(),
"artist": (tag.artist or "").strip().lower(),
"duration": round(tag.duration or 0, 1)
}
except Exception:
return {"title": "", "artist": "", "duration": 0}
# --------------------------------------------------
# Main App
# --------------------------------------------------
class MusicDeduplicator:
def __init__(self, root):
self.root = root
self.root.title("Music Playlist Deduplicator")
self.root.geometry("900x520")
self.files = []
self.duplicates = []
self.build_ui()
# ---------------- UI ----------------
def build_ui(self):
top = ttk.Frame(self.root)
top.pack(fill="x", padx=10, pady=10)
ttk.Button(top, text="Select Music Folder", command=self.select_folder).pack(side="left")
ttk.Button(top, text="Scan for Duplicates", command=self.scan).pack(side="left", padx=10)
self.status = ttk.Label(top, text="No folder selected")
self.status.pack(side="left", padx=10)
self.tree = ttk.Treeview(
self.root,
columns=("method", "file1", "file2"),
show="headings"
)
self.tree.heading("method", text="Duplicate Type")
self.tree.heading("file1", text="Song A")
self.tree.heading("file2", text="Song B")
self.tree.column("method", width=140)
self.tree.column("file1", width=360)
self.tree.column("file2", width=360)
self.tree.pack(fill="both", expand=True, padx=10, pady=10)
# ---------------- Logic ----------------
def select_folder(self):
self.folder = filedialog.askdirectory()
if self.folder:
self.status.config(text=self.folder)
def scan(self):
if not hasattr(self, "folder"):
messagebox.showerror("Error", "Select a music folder first!")
return
self.tree.delete(*self.tree.get_children())
self.files = []
self.duplicates = []
for root, _, files in os.walk(self.folder):
for f in files:
if f.lower().endswith(SUPPORTED_EXT):
path = os.path.join(root, f)
self.files.append(path)
self.find_duplicates()
self.display_results()
def find_duplicates(self):
# --- 1. Exact file hash duplicates ---
hash_map = {}
for path in self.files:
try:
h = file_hash(path)
if h in hash_map:
self.duplicates.append(("Exact File", hash_map[h], path))
else:
hash_map[h] = path
except Exception:
pass
# --- 2. Metadata duplicates ---
meta_map = {}
for path in self.files:
meta = get_metadata(path)
key = (meta["title"], meta["artist"], meta["duration"])
if key != ("", "", 0):
if key in meta_map:
self.duplicates.append(("Metadata Match", meta_map[key], path))
else:
meta_map[key] = path
def display_results(self):
if not self.duplicates:
messagebox.showinfo("Result", "No duplicate songs found!")
return
for method, f1, f2 in self.duplicates:
self.tree.insert("", "end", values=(method, f1, f2))
messagebox.showinfo(
"Result",
f"Found {len(self.duplicates)} duplicate pairs."
)
# --------------------------------------------------
# RUN
# --------------------------------------------------
if __name__ == "__main__":
root = tk.Tk()
app = MusicDeduplicator(root)
root.mainloop()
No comments:
Post a Comment