Smart Resume Formatter

from docx import Document

from docx.shared import Pt

from fpdf import FPDF


# ---------------------------

# 1. Format Resume into Word

# ---------------------------

def create_word_resume(data, filename="resume.docx"):

    doc = Document()

    

    # Title (Name)

    title = doc.add_paragraph(data["name"])

    title.style = doc.styles['Title']

    

    # Contact Info

    doc.add_paragraph(f'Email: {data["email"]} | Phone: {data["phone"]}')

    

    # Sections

    doc.add_heading('Summary', level=1)

    doc.add_paragraph(data["summary"])

    

    doc.add_heading('Experience', level=1)

    for job in data["experience"]:

        doc.add_paragraph(f"{job['role']} at {job['company']} ({job['years']})")

        doc.add_paragraph(job["details"], style="List Bullet")

    

    doc.add_heading('Education', level=1)

    for edu in data["education"]:

        doc.add_paragraph(f"{edu['degree']} - {edu['institution']} ({edu['year']})")

    

    doc.add_heading('Skills', level=1)

    doc.add_paragraph(", ".join(data["skills"]))

    

    doc.save(filename)

    print(f"✅ Word Resume saved as {filename}")



# ---------------------------

# 2. Format Resume into PDF

# ---------------------------

def create_pdf_resume(data, filename="resume.pdf"):

    pdf = FPDF()

    pdf.add_page()

    pdf.set_font("Arial", 'B', 16)

    

    # Title (Name)

    pdf.cell(200, 10, data["name"], ln=True, align="C")

    

    pdf.set_font("Arial", '', 12)

    pdf.cell(200, 10, f'Email: {data["email"]} | Phone: {data["phone"]}', ln=True, align="C")

    

    # Sections

    pdf.set_font("Arial", 'B', 14)

    pdf.cell(200, 10, "Summary", ln=True)

    pdf.set_font("Arial", '', 12)

    pdf.multi_cell(0, 10, data["summary"])

    

    pdf.set_font("Arial", 'B', 14)

    pdf.cell(200, 10, "Experience", ln=True)

    pdf.set_font("Arial", '', 12)

    for job in data["experience"]:

        pdf.multi_cell(0, 10, f"{job['role']} at {job['company']} ({job['years']})\n - {job['details']}")

    

    pdf.set_font("Arial", 'B', 14)

    pdf.cell(200, 10, "Education", ln=True)

    pdf.set_font("Arial", '', 12)

    for edu in data["education"]:

        pdf.cell(200, 10, f"{edu['degree']} - {edu['institution']} ({edu['year']})", ln=True)

    

    pdf.set_font("Arial", 'B', 14)

    pdf.cell(200, 10, "Skills", ln=True)

    pdf.set_font("Arial", '', 12)

    pdf.multi_cell(0, 10, ", ".join(data["skills"]))

    

    pdf.output(filename)

    print(f"✅ PDF Resume saved as {filename}")



# ---------------------------

# Example Data

# ---------------------------

resume_data = {

    "name": "John Doe",

    "email": "john.doe@email.com",

    "phone": "+1-234-567-890",

    "summary": "Passionate software engineer with 5+ years of experience in building scalable applications.",

    "experience": [

        {"role": "Backend Developer", "company": "TechCorp", "years": "2020-2023", "details": "Developed APIs and microservices using Python & Django."},

        {"role": "Software Engineer", "company": "CodeWorks", "years": "2017-2020", "details": "Worked on automation tools and optimized system performance."}

    ],

    "education": [

        {"degree": "B.Sc. Computer Science", "institution": "XYZ University", "year": "2017"}

    ],

    "skills": ["Python", "Django", "Flask", "SQL", "Docker", "AWS"]

}


# Run both functions

create_word_resume(resume_data)

create_pdf_resume(resume_data)


AI Workout Form Corrector

import cv2

import mediapipe as mp

import numpy as np


mp_drawing = mp.solutions.drawing_utils

mp_pose = mp.solutions.pose


# -----------------------

# Calculate angle between 3 points

# -----------------------

def calculate_angle(a, b, c):

    a = np.array(a)  # First

    b = np.array(b)  # Mid

    c = np.array(c)  # End

    

    radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])

    angle = np.abs(radians*180.0/np.pi)

    

    if angle > 180.0:

        angle = 360 - angle

    return angle


# -----------------------

# Main workout tracker (Squats Example)

# -----------------------

cap = cv2.VideoCapture(0)


with mp_pose.Pose(min_detection_confidence=0.7, min_tracking_confidence=0.7) as pose:

    counter = 0

    stage = None

    

    while cap.isOpened():

        ret, frame = cap.read()

        if not ret:

            break

        

        # Recolor image

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        image.flags.writeable = False

        

        # Make detection

        results = pose.process(image)

        

        # Recolor back to BGR

        image.flags.writeable = True

        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        

        try:

            landmarks = results.pose_landmarks.landmark

            

            # Get coordinates

            hip = [landmarks[mp_pose.PoseLandmark.LEFT_HIP.value].x,

                   landmarks[mp_pose.PoseLandmark.LEFT_HIP.value].y]

            knee = [landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value].x,

                    landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value].y]

            ankle = [landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value].x,

                     landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value].y]

            

            # Calculate angle

            angle = calculate_angle(hip, knee, ankle)

            

            # Visualize angle

            cv2.putText(image, str(int(angle)),

                        tuple(np.multiply(knee, [640, 480]).astype(int)),

                        cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA

                        )

            

            # Squat counter logic

            if angle > 160:

                stage = "up"

            if angle < 90 and stage == "up":

                stage = "down"

                counter += 1

                print(f"✅ Squat count: {counter}")

            

            # Feedback

            if angle < 70:

                feedback = "Too Low! Go Higher"

            elif 70 <= angle <= 100:

                feedback = "Perfect Depth ✅"

            else:

                feedback = "Stand Tall"

            

            cv2.putText(image, feedback, (50,100),

                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)

            

        except:

            pass

        

        # Render detections

        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,

                                  mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),

                                  mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)

                                 )               

        

        cv2.imshow('AI Workout Form Corrector - Squats', image)

        

        if cv2.waitKey(10) & 0xFF == ord('q'):

            break

    

    cap.release()

    cv2.destroyAllWindows()


Voice Emotion Detector

import os

import librosa

import numpy as np

import sounddevice as sd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.svm import SVC

from sklearn.preprocessing import LabelEncoder, StandardScaler

import pickle


# -----------------------

# STEP 1: Feature Extraction

# -----------------------

def extract_features(file_path):

    y, sr = librosa.load(file_path, duration=3, offset=0.5)

    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)

    chroma = librosa.feature.chroma_stft(y=y, sr=sr)

    mel = librosa.feature.melspectrogram(y=y, sr=sr)

    

    # Take mean of each feature

    mfccs = np.mean(mfcc.T, axis=0)

    chroma = np.mean(chroma.T, axis=0)

    mel = np.mean(mel.T, axis=0)


    return np.hstack([mfccs, chroma, mel])


# -----------------------

# STEP 2: Training (Demo Dataset Simulation)

# -----------------------

def train_model():

    # Normally, load a dataset (RAVDESS, CREMA-D etc.)

    # Here, we'll simulate with few .wav files in "dataset/" folder

    

    emotions = {

        "angry": "angry",

        "happy": "happy",

        "sad": "sad",

        "neutral": "neutral"

    }

    

    X, y = [], []

    dataset_path = "dataset"  # folder with wav files: angry1.wav, happy2.wav, etc.

    

    for file in os.listdir(dataset_path):

        if file.endswith(".wav"):

            label = file.split("_")[0]  # e.g., angry_1.wav → "angry"

            feature = extract_features(os.path.join(dataset_path, file))

            X.append(feature)

            y.append(label)

    

    X = np.array(X)

    y = np.array(y)

    

    # Encode labels

    encoder = LabelEncoder()

    y = encoder.fit_transform(y)

    

    scaler = StandardScaler()

    X = scaler.fit_transform(X)

    

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    

    model = SVC(kernel="linear", probability=True)

    model.fit(X_train, y_train)

    

    acc = model.score(X_test, y_test)

    print(f"Model trained with accuracy: {acc*100:.2f}%")

    

    # Save model

    with open("emotion_model.pkl", "wb") as f:

        pickle.dump((model, encoder, scaler), f)


# -----------------------

# STEP 3: Record & Predict

# -----------------------

def record_and_predict(duration=3, fs=22050):

    print("Recording...")

    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)

    sd.wait()

    print("Recording complete. Saving as temp.wav...")

    librosa.output.write_wav("temp.wav", recording.flatten(), sr=fs)


    with open("emotion_model.pkl", "rb") as f:

        model, encoder, scaler = pickle.load(f)

    

    features = extract_features("temp.wav").reshape(1, -1)

    features = scaler.transform(features)

    pred = model.predict(features)[0]

    probas = model.predict_proba(features)[0]

    

    emotion = encoder.inverse_transform([pred])[0]

    print(f"Detected Emotion: {emotion}")

    

    # Plot probabilities

    plt.bar(encoder.classes_, probas)

    plt.title("Emotion Prediction Confidence")

    plt.show()


# -----------------------

# MAIN

# -----------------------

if __name__ == "__main__":

    if not os.path.exists("emotion_model.pkl"):

        print("Training model...")

        train_model()

    

    record_and_predict()


Braille Translator Tool

import tkinter as tk

from tkinter import filedialog, messagebox

from PIL import Image, ImageDraw, ImageTk

import math

import os


# -------------------------

# Braille mapping utilities

# -------------------------


# Dot-number definitions for letters a..z (Grade-1 braille)

LETTER_DOTS = {

    'a': [1],

    'b': [1,2],

    'c': [1,4],

    'd': [1,4,5],

    'e': [1,5],

    'f': [1,2,4],

    'g': [1,2,4,5],

    'h': [1,2,5],

    'i': [2,4],

    'j': [2,4,5],

    'k': [1,3],

    'l': [1,2,3],

    'm': [1,3,4],

    'n': [1,3,4,5],

    'o': [1,3,5],

    'p': [1,2,3,4],

    'q': [1,2,3,4,5],

    'r': [1,2,3,5],

    's': [2,3,4],

    't': [2,3,4,5],

    'u': [1,3,6],

    'v': [1,2,3,6],

    'w': [2,4,5,6],

    'x': [1,3,4,6],

    'y': [1,3,4,5,6],

    'z': [1,3,5,6],

}


# Common punctuation (Grade-1)

PUNCT_DOTS = {

    ',': [2],

    ';': [2,3],

    ':': [2,4],

    '.': [2,5,6],

    '?': [2,6],

    '!': [2,3,5],

    '(': [2,3,6,5],  # open parenthesis commonly encoded as ⠶ (but implementations vary)

    ')': [3,5,6,2],  # mirrored / alternative — we'll use same as '(' for simplicity

    "'": [3],

    '-': [3,6],

    '/': [3,4],

    '"': [5,6,2,3],  # approximate

    '@': [4,1],      # uncommon; approximate

    '#': [3,4,5,6],  # number sign (we will use official number sign below)

}


# Braille special signs

NUMBER_SIGN = [3,4,5,6]   # ⠼

CAPITAL_SIGN = [6]        # prefix for capital (single capital) — optional use

SPACE = []                # no dots for space -> unicode U+2800


# Build dot -> Unicode mapping utility

def dots_to_braille_unicode(dots):

    """

    dots: list of integers 1..8 (we use 1..6)

    returns: single unicode braille character

    """

    code = 0x2800

    for d in dots:

        if 1 <= d <= 8:

            code += 1 << (d - 1)

    return chr(code)


# Precompute maps

LETTER_TO_BRAILLE = {ch: dots_to_braille_unicode(dots) for ch, dots in LETTER_DOTS.items()}

PUNCT_TO_BRAILLE = {p: dots_to_braille_unicode(dots) for p, dots in PUNCT_DOTS.items()}

NUMBER_SIGN_CHAR = dots_to_braille_unicode(NUMBER_SIGN)

CAPITAL_SIGN_CHAR = dots_to_braille_unicode(CAPITAL_SIGN)

SPACE_CHAR = chr(0x2800)


# Digits mapping in Grade-1: number sign + letters a-j represents 1-0

DIGIT_TO_LETTER = {

    '1': 'a', '2': 'b', '3': 'c', '4': 'd', '5': 'e',

    '6': 'f', '7': 'g', '8': 'h', '9': 'i', '0': 'j'

}


# -------------------------

# Translation function

# -------------------------

def translate_to_braille(text, use_capital_prefix=True, use_number_prefix=True):

    """

    Translate plain text into Grade-1 Braille Unicode string.

    Options:

      - use_capital_prefix: if True, prefix capitals with the capital sign (⠠)

      - use_number_prefix: if True, prefix digit sequences with number sign (⠼)

    Returns braille_unicode_string

    """

    out = []

    i = 0

    n = len(text)

    while i < n:

        ch = text[i]

        if ch.isspace():

            out.append(SPACE_CHAR)

            i += 1

            continue


        # Digit sequence handling

        if ch.isdigit():

            if use_number_prefix:

                out.append(NUMBER_SIGN_CHAR)

            # consume contiguous digits

            while i < n and text[i].isdigit():

                d = text[i]

                letter_equiv = DIGIT_TO_LETTER.get(d, None)

                if letter_equiv:

                    out.append(LETTER_TO_BRAILLE[letter_equiv])

                else:

                    # fallback: space for unknown

                    out.append(SPACE_CHAR)

                i += 1

            continue


        # Letter

        if ch.isalpha():

            if ch.isupper():

                if use_capital_prefix:

                    out.append(CAPITAL_SIGN_CHAR)

                ch_low = ch.lower()

            else:

                ch_low = ch

            code = LETTER_TO_BRAILLE.get(ch_low)

            if code:

                out.append(code)

            else:

                out.append(SPACE_CHAR)

            i += 1

            continue


        # Punctuation

        if ch in PUNCT_TO_BRAILLE:

            out.append(PUNCT_TO_BRAILLE[ch])

            i += 1

            continue


        # Fallback: try common mapping for punctuation by replacement

        if ch == '"':

            out.append(PUNCT_TO_BRAILLE.get('"', SPACE_CHAR))

            i += 1

            continue


        # Unknown character: attempt to include as space placeholder

        out.append(SPACE_CHAR)

        i += 1


    return "".join(out)


# -------------------------

# Braille image rendering

# -------------------------

def render_braille_image(braille_text, dot_radius=8, dot_gap=10, cell_gap=16, bg_color=(255,255,255)):

    """

    Render braille_text (unicode braille characters) into a PIL Image.

    Each braille cell is 2 (columns) x 3 (rows) of dots.

    We read the Unicode braille codepoints and draw filled circles for active dots.

    Returns PIL.Image (RGB).

    """

    # Compute rows & columns: we'll wrap to a max columns per line for reasonable width

    max_cols = 40  # characters per row, adjust if needed


    # Split into lines by breaking long strings

    chars = list(braille_text)

    lines = [chars[i:i+max_cols] for i in range(0, len(chars), max_cols)]


    # cell size

    cell_w = dot_radius*2 + dot_gap

    cell_h = dot_radius*3 + dot_gap*2  # 3 rows

    img_w = len(lines[0]) * (cell_w + cell_gap) + 2*cell_gap if lines else 200

    img_h = len(lines) * (cell_h + cell_gap) + 2*cell_gap if lines else 100


    img = Image.new("RGB", (img_w, img_h), color=bg_color)

    draw = ImageDraw.Draw(img)


    for row_idx, line in enumerate(lines):

        for col_idx, ch in enumerate(line):

            x0 = cell_gap + col_idx * (cell_w + cell_gap)

            y0 = cell_gap + row_idx * (cell_h + cell_gap)

            # Determine dot pattern from unicode char

            codepoint = ord(ch)

            base = 0x2800

            mask = codepoint - base

            # dot positions for 1..6 are arranged:

            # (col0,row0)=dot1  (col1,row0)=dot4

            # (col0,row1)=dot2  (col1,row1)=dot5

            # (col0,row2)=dot3  (col1,row2)=dot6

            dot_positions = [

                (0,0,1),  # dot1

                (0,1,2),  # dot2

                (0,2,3),  # dot3

                (1,0,4),  # dot4

                (1,1,5),  # dot5

                (1,2,6),  # dot6

            ]

            for col, r, dotn in dot_positions:

                bit = (mask >> (dotn-1)) & 1

                cx = x0 + col * (dot_radius + dot_gap/2) + dot_radius + 4

                cy = y0 + r * (dot_radius + dot_gap/2) + dot_radius + 4

                bbox = [cx - dot_radius, cy - dot_radius, cx + dot_radius, cy + dot_radius]

                if bit:

                    draw.ellipse(bbox, fill=(0,0,0))

                else:

                    # draw faint circle to indicate empty dot (optional)

                    draw.ellipse(bbox, outline=(200,200,200))

    return img


# -------------------------

# GUI

# -------------------------

class BrailleGUI:

    def __init__(self, root):

        self.root = root

        root.title("Braille Translator Tool — Grade-1 (Uncontracted)")

        root.geometry("820x520")


        # Input frame

        frame_in = tk.LabelFrame(root, text="Input Text", padx=8, pady=8)

        frame_in.pack(fill="both", padx=12, pady=8)


        self.text_input = tk.Text(frame_in, height=6, wrap="word", font=("Arial", 12))

        self.text_input.pack(fill="both", expand=True)

        self.text_input.insert("1.0", "Hello, World! 123")


        # Controls

        ctrl = tk.Frame(root)

        ctrl.pack(fill="x", padx=12)

        tk.Button(ctrl, text="Translate", command=self.on_translate).pack(side="left", padx=6, pady=6)

        tk.Button(ctrl, text="Render Braille Image (Preview)", command=self.on_render_preview).pack(side="left", padx=6, pady=6)

        tk.Button(ctrl, text="Save Braille Image...", command=self.on_save_image).pack(side="left", padx=6, pady=6)

        tk.Button(ctrl, text="Copy Braille Unicode to Clipboard", command=self.on_copy_clipboard).pack(side="left", padx=6, pady=6)


        # Output frame (braille unicode text)

        frame_out = tk.LabelFrame(root, text="Braille (Unicode)", padx=8, pady=8)

        frame_out.pack(fill="both", padx=12, pady=8, expand=True)


        self.braille_text_widget = tk.Text(frame_out, height=6, wrap="word", font=("Segoe UI Symbol", 20))

        self.braille_text_widget.pack(fill="both", expand=True)

        self.braille_text_widget.config(state="disabled")


        # Image preview area

        preview_frame = tk.LabelFrame(root, text="Image Preview", padx=8, pady=8)

        preview_frame.pack(fill="both", padx=12, pady=8)

        self.preview_label = tk.Label(preview_frame)

        self.preview_label.pack()

        self.last_preview_image = None  # keep reference to avoid GC


    def on_translate(self):

        txt = self.text_input.get("1.0", "end").rstrip("\n")

        if not txt.strip():

            messagebox.showwarning("Input required", "Please enter some text to translate.")

            return

        braille = translate_to_braille(txt)

        self.braille_text_widget.config(state="normal")

        self.braille_text_widget.delete("1.0", "end")

        self.braille_text_widget.insert("1.0", braille)

        self.braille_text_widget.config(state="disabled")


    def on_render_preview(self):

        braille = self.braille_text_widget.get("1.0", "end").rstrip("\n")

        if not braille:

            messagebox.showinfo("No Braille", "Translate text first (click Translate).")

            return

        img = render_braille_image(braille, dot_radius=8, dot_gap=10, cell_gap=14)

        self.show_preview(img)


    def on_save_image(self):

        braille = self.braille_text_widget.get("1.0", "end").rstrip("\n")

        if not braille:

            messagebox.showinfo("No Braille", "Translate text first (click Translate).")

            return

        img = render_braille_image(braille, dot_radius=10, dot_gap=12, cell_gap=16)

        path = filedialog.asksaveasfilename(defaultextension=".png", filetypes=[("PNG image","*.png")], title="Save Braille image")

        if path:

            img.save(path)

            messagebox.showinfo("Saved", f"Braille image saved to:\n{path}")


    def on_copy_clipboard(self):

        braille = self.braille_text_widget.get("1.0", "end").rstrip("\n")

        if not braille:

            messagebox.showinfo("No Braille", "Translate text first (click Translate).")

            return

        # Use Tk clipboard

        self.root.clipboard_clear()

        self.root.clipboard_append(braille)

        messagebox.showinfo("Copied", "Braille Unicode copied to clipboard.")


    def show_preview(self, pil_img):

        # Resize preview if too big

        max_w, max_h = 760, 240

        w, h = pil_img.size

        scale = min(max_w / w, max_h / h, 1.0)

        if scale < 1.0:

            pil_img = pil_img.resize((int(w*scale), int(h*scale)), Image.LANCZOS)

        tk_img = ImageTk.PhotoImage(pil_img)

        self.preview_label.config(image=tk_img)

        self.preview_label.image = tk_img  # keep ref


# -------------------------

# Run the app

# -------------------------

def main():

    root = tk.Tk()

    app = BrailleGUI(root)

    root.mainloop()


if __name__ == "__main__":

    main()


Music Sheet to Audio Converter

 """

sheet_to_midi.py


Simple prototype: Convert a scanned single-line, monophonic staff in TREBLE CLEF

to a MIDI file using OpenCV -> heuristic notehead detection -> music21.


Limitations:

 - Monophonic, printed notation, single staff detection.

 - Treats each notehead as a quarter note by default.

 - No clef/key/time signature detection (assumes treble clef, 4/4).

 - Not a replacement for full OMR systems like Audiveris.


Usage:

    python sheet_to_midi.py input_image.png output.mid

"""


import sys

import cv2

import numpy as np

import math

from music21 import stream, note, midi, tempo, meter

from PIL import Image


# -------------------------

# Utility & image helpers

# -------------------------

def load_image(path):

    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

    if img is None:

        raise FileNotFoundError(f"Cannot open image: {path}")

    return img


def binarize(img):

    # Adaptive threshold - robust to lighting

    th = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,

                               cv2.THRESH_BINARY_INV, 15, 10)

    return th


# -------------------------

# Staff line detection

# -------------------------

def detect_staff_lines(binary_img, debug=False):

    """

    Detect horizontal staff lines using morphological operations and Hough or projection.

    Returns list of y-positions of detected lines (sorted).

    """

    h, w = binary_img.shape


    # Use horizontal morphological kernel to enhance staff lines

    horizontal_size = max(10, w // 30)

    horiz_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_size, 1))

    hor = cv2.morphologyEx(binary_img, cv2.MORPH_OPEN, horiz_kernel)


    # Sum across columns to get projection

    proj = np.sum(hor, axis=1)

    # Normalize

    proj = (proj - proj.min()) / (proj.max() - proj.min() + 1e-9)


    # Find peaks in projection where staff lines are

    thresh = 0.15  # tunable

    candidates = np.where(proj > thresh)[0]


    if len(candidates) == 0:

        return []


    # Group contiguous regions into single lines (cluster by gaps)

    lines = []

    current = [candidates[0]]

    for r in candidates[1:]:

        if r - current[-1] <= 2:

            current.append(r)

        else:

            # average

            lines.append(int(np.mean(current)))

            current = [r]

    if current:

        lines.append(int(np.mean(current)))


    # Staffs are sets of 5 lines close to each other. Find clusters of 5 lines

    # For simplicity, find any groups of 5 lines with roughly equal spacing

    # If more than 5 lines are present (multiple staves), return the first 5-line group

    if len(lines) < 5:

        return lines  # fallback


    # sliding window of size 5, measure spacing variance

    best_group = None

    best_score = 1e9

    for i in range(0, len(lines) - 4):

        group = lines[i:i+5]

        spacings = np.diff(group)

        score = np.var(spacings)  # we want equal spacings

        if score < best_score:

            best_score = score

            best_group = group


    if best_group is None:

        return lines[:5]

    return best_group


# -------------------------

# Note head detection

# -------------------------

def detect_noteheads(binary_img, staff_lines, debug=False):

    """

    Detect connected components that look like noteheads.

    Return list of bounding boxes (x, y, w, h).

    """

    # Remove staff lines from binary image to avoid splitting noteheads:

    img_nolines = binary_img.copy()

    # Create a mask of lines using morphological ops similar to detection

    h, w = binary_img.shape

    horizontal_size = max(10, w // 30)

    horiz_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_size, 1))

    hor = cv2.morphologyEx(binary_img, cv2.MORPH_OPEN, horiz_kernel)

    img_nolines = cv2.bitwise_and(img_nolines, cv2.bitwise_not(hor))


    # Morph close small gaps to make noteheads full blobs

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))

    img_nolines = cv2.morphologyEx(img_nolines, cv2.MORPH_CLOSE, kernel, iterations=1)


    # Find contours

    contours, _ = cv2.findContours(img_nolines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)


    boxes = []

    for cnt in contours:

        x, y, wbox, hbox = cv2.boundingRect(cnt)

        area = cv2.contourArea(cnt)

        # heuristics for notehead sizes

        if area < 30:  # too small noise

            continue

        # discard very tall/thin objects (likely stems or flags)

        if hbox > 3 * wbox and hbox > 40:

            continue

        # also discard huge regions (like staff text)

        if wbox > binary_img.shape[1] * 0.6:

            continue

        boxes.append((x, y, wbox, hbox))


    # Sort left-to-right

    boxes.sort(key=lambda b: b[0])

    return boxes


# -------------------------

# Map vertical position to pitch (treble clef)

# -------------------------

def map_y_to_pitch(y_center, staff_lines):

    """

    Given y-coordinate and list of 5 staff line y-positions (top->bottom),

    compute the pitch name using treble clef mapping.

    We'll map lines & spaces to steps; middle C is one ledger line below staff in treble clef.

    The mapping: from top line down:

      Line 1 (top) -> F5

      Space -> E5

      Line 2 -> D5

      ...

    We'll build a scale of positions (lines and spaces) with corresponding MIDI note numbers.

    """

    # Convert staff_lines sorted top->bottom

    lines = sorted(staff_lines)

    # staff spacing

    spacing = np.median(np.diff(lines))

    # Build reference positions: lines and spaces extending several positions above/below

    # We'll define positions with index 0 at top line, increasing downward by half-step (line->space->line)

    positions = []

    labels = []  # MIDI numbers

    # Let's compute the center y of each "position" for -6..+12 positions relative to top line

    # Determine MIDI mapping: top line (F5) midi 77. Use standard: F5=77, E5=76, D5=74? Wait careful...

    # Simpler: define mapping for relative positions using steps in diatonic scale (not semitone), but easiest is map to note names by index:

    # We'll build a list of note names starting from some reference. Let's compute using music21 for correctness.

    from music21 import pitch

    # We'll compute positions: every half staff-step is spacing/2

    half = spacing / 2.0

    # Let's create position centers from -6 to +18 (enough ledger lines)

    pos_centers = [lines[0] - 6*half + i*half for i in range(40)]

    # Now assign note names: find which position corresponds to which diatonic step.

    # Determine which index corresponds to the top staff line (lines[0])

    idx_top_line = int(round((lines[0] - pos_centers[0]) / half))

    # For treble clef: top line is F5 (MIDI 77)

    top_midi = pitch.Pitch('F5').midi  # 77

    # Each position step (line->space->line) moves by one diatonic step (i.e., one scale degree), which may be 1 or 2 semitones.

    # But easier: we can build a list of midi numbers by moving by semitone steps of a diatonic scale: approximate by mapping every position to midi by using

    # semitone step of 1 for each half-step (this maps to chromatic steps which is fine but won't respect staff spacing perfectly for accidentals).

    # Simpler: treat each position as semitone steps from top line: top line index -> top_midi, next half position -> top_midi - 1, etc.

    # This yields a chromatic mapping: adjacent positions = 1 semitone. This is a simplification (in real staff adjacent positions are diatonic).

    midi_for_pos = []

    for i in range(len(pos_centers)):

        midi_for_pos.append(top_midi - (i - idx_top_line))


    # Identify closest pos index for given y_center

    diffs = [abs(y_center - c) for c in pos_centers]

    pos_idx = int(np.argmin(diffs))

    midi = int(round(midi_for_pos[pos_idx]))

    # Convert midi to note name

    p = pitch.Pitch()

    p.midi = midi

    return p.nameWithOctave


# -------------------------

# Build music21 stream from detected notes

# -------------------------

def build_stream_from_boxes(boxes, staff_lines, tempo_bpm=100):

    s = stream.Stream()

    s.append(tempo.MetronomeMark(number=tempo_bpm))

    # Simple 4/4 time signature

    s.append(meter.TimeSignature('4/4'))


    # For each bounding box left->right, map to pitch and create quarter notes

    # More advanced: group boxes near same x to chords, or detect stems to find durations (not implemented)

    for (x, y, wbox, hbox) in boxes:

        cx = x + wbox / 2.0

        cy = y + hbox / 2.0

        pitch_name = map_y_to_pitch(cy, staff_lines)

        n = note.Note(pitch_name)

        n.duration.quarterLength = 1.0  # quarter note default

        s.append(n)

    return s


# -------------------------

# Main flow

# -------------------------

def process_image_to_midi(input_path, output_midi_path, debug=False):

    img = load_image(input_path)

    bin_img = binarize(img)


    staff_lines = detect_staff_lines(bin_img, debug=debug)

    if not staff_lines or len(staff_lines) < 5:

        print("Warning: could not detect 5 staff lines reliably. Trying to proceed with available lines.")

    else:

        print("Detected staff lines (y-coordinates):", staff_lines)


    boxes = detect_noteheads(bin_img, staff_lines, debug=debug)

    if not boxes:

        print("No noteheads detected. Exiting.")

        return False


    print(f"Detected {len(boxes)} candidate noteheads (left→right).")

    for i, b in enumerate(boxes, start=1):

        x, y, wbox, hbox = b

        print(f"{i}: x={x}, y={y}, w={wbox}, h={hbox}")


    music_stream = build_stream_from_boxes(boxes, staff_lines, tempo_bpm=100)


    # Export to MIDI

    mf = midi.translate.streamToMidiFile(music_stream)

    mf.open(output_midi_path, 'wb')

    mf.write()

    mf.close()

    print(f"MIDI saved to {output_midi_path}")

    return True


# -------------------------

# CLI

# -------------------------

if __name__ == "__main__":

    if len(sys.argv) < 3:

        print("Usage: python sheet_to_midi.py input_image.png output.mid")

        sys.exit(1)

    inp = sys.argv[1]

    out = sys.argv[2]

    ok = process_image_to_midi(inp, out, debug=True)

    if not ok:

        sys.exit(2)