Python for Engineers

Smart Resume Formatter

from docx import Document

from docx.shared import Pt

from fpdf import FPDF

# ---------------------------

# 1. Format Resume into Word

# ---------------------------

def create_word_resume(data, filename="resume.docx"):

doc = Document()

# Title (Name)

title = doc.add_paragraph(data["name"])

title.style = doc.styles['Title']

# Contact Info

doc.add_paragraph(f'Email: {data["email"]} | Phone: {data["phone"]}')

# Sections

doc.add_heading('Summary', level=1)

doc.add_paragraph(data["summary"])

doc.add_heading('Experience', level=1)

for job in data["experience"]:

doc.add_paragraph(f"{job['role']} at {job['company']} ({job['years']})")

doc.add_paragraph(job["details"], style="List Bullet")

doc.add_heading('Education', level=1)

for edu in data["education"]:

doc.add_paragraph(f"{edu['degree']} - {edu['institution']} ({edu['year']})")

doc.add_heading('Skills', level=1)

doc.add_paragraph(", ".join(data["skills"]))

doc.save(filename)

print(f"✅ Word Resume saved as {filename}")

# ---------------------------

# 2. Format Resume into PDF

# ---------------------------

def create_pdf_resume(data, filename="resume.pdf"):

pdf = FPDF()

pdf.add_page()

pdf.set_font("Arial", 'B', 16)

# Title (Name)

pdf.cell(200, 10, data["name"], ln=True, align="C")

pdf.set_font("Arial", '', 12)

pdf.cell(200, 10, f'Email: {data["email"]} | Phone: {data["phone"]}', ln=True, align="C")

# Sections

pdf.set_font("Arial", 'B', 14)

pdf.cell(200, 10, "Summary", ln=True)

pdf.set_font("Arial", '', 12)

pdf.multi_cell(0, 10, data["summary"])

pdf.set_font("Arial", 'B', 14)

pdf.cell(200, 10, "Experience", ln=True)

pdf.set_font("Arial", '', 12)

for job in data["experience"]:

pdf.multi_cell(0, 10, f"{job['role']} at {job['company']} ({job['years']})\n - {job['details']}")

pdf.set_font("Arial", 'B', 14)

pdf.cell(200, 10, "Education", ln=True)

pdf.set_font("Arial", '', 12)

for edu in data["education"]:

pdf.cell(200, 10, f"{edu['degree']} - {edu['institution']} ({edu['year']})", ln=True)

pdf.set_font("Arial", 'B', 14)

pdf.cell(200, 10, "Skills", ln=True)

pdf.set_font("Arial", '', 12)

pdf.multi_cell(0, 10, ", ".join(data["skills"]))

pdf.output(filename)

print(f"✅ PDF Resume saved as {filename}")

# ---------------------------

# Example Data

# ---------------------------

resume_data = {

"name": "John Doe",

"email": "john.doe@email.com",

"phone": "+1-234-567-890",

"summary": "Passionate software engineer with 5+ years of experience in building scalable applications.",

"experience": [

{"role": "Backend Developer", "company": "TechCorp", "years": "2020-2023", "details": "Developed APIs and microservices using Python & Django."},

{"role": "Software Engineer", "company": "CodeWorks", "years": "2017-2020", "details": "Worked on automation tools and optimized system performance."}

"education": [

{"degree": "B.Sc. Computer Science", "institution": "XYZ University", "year": "2017"}

"skills": ["Python", "Django", "Flask", "SQL", "Docker", "AWS"]

}

# Run both functions

create_word_resume(resume_data)

create_pdf_resume(resume_data)

AI Workout Form Corrector

import cv2

import mediapipe as mp

import numpy as np

mp_drawing = mp.solutions.drawing_utils

mp_pose = mp.solutions.pose

# -----------------------

# Calculate angle between 3 points

# -----------------------

def calculate_angle(a, b, c):

a = np.array(a) # First

b = np.array(b) # Mid

c = np.array(c) # End

radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])

angle = np.abs(radians*180.0/np.pi)

if angle > 180.0:

angle = 360 - angle

return angle

# -----------------------

# Main workout tracker (Squats Example)

# -----------------------

cap = cv2.VideoCapture(0)

with mp_pose.Pose(min_detection_confidence=0.7, min_tracking_confidence=0.7) as pose:

counter = 0

stage = None

while cap.isOpened():

ret, frame = cap.read()

if not ret:

break

# Recolor image

image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

image.flags.writeable = False

# Make detection

results = pose.process(image)

# Recolor back to BGR

image.flags.writeable = True

image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

try:

landmarks = results.pose_landmarks.landmark

# Get coordinates

hip = [landmarks[mp_pose.PoseLandmark.LEFT_HIP.value].x,

landmarks[mp_pose.PoseLandmark.LEFT_HIP.value].y]

knee = [landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value].x,

landmarks[mp_pose.PoseLandmark.LEFT_KNEE.value].y]

ankle = [landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value].x,

landmarks[mp_pose.PoseLandmark.LEFT_ANKLE.value].y]

# Calculate angle

angle = calculate_angle(hip, knee, ankle)

# Visualize angle

cv2.putText(image, str(int(angle)),

tuple(np.multiply(knee, [640, 480]).astype(int)),

cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA

)

# Squat counter logic

if angle > 160:

stage = "up"

if angle < 90 and stage == "up":

stage = "down"

counter += 1

print(f"✅ Squat count: {counter}")

# Feedback

if angle < 70:

feedback = "Too Low! Go Higher"

elif 70 <= angle <= 100:

feedback = "Perfect Depth ✅"

else:

feedback = "Stand Tall"

cv2.putText(image, feedback, (50,100),

cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)

except:

pass

# Render detections

mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,

mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),

mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)

)

cv2.imshow('AI Workout Form Corrector - Squats', image)

if cv2.waitKey(10) & 0xFF == ord('q'):

break

cap.release()

cv2.destroyAllWindows()

Voice Emotion Detector

import os

import librosa

import numpy as np

import sounddevice as sd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.svm import SVC

from sklearn.preprocessing import LabelEncoder, StandardScaler

import pickle

# -----------------------

# STEP 1: Feature Extraction

# -----------------------

def extract_features(file_path):

y, sr = librosa.load(file_path, duration=3, offset=0.5)

mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)

chroma = librosa.feature.chroma_stft(y=y, sr=sr)

mel = librosa.feature.melspectrogram(y=y, sr=sr)

# Take mean of each feature

mfccs = np.mean(mfcc.T, axis=0)

chroma = np.mean(chroma.T, axis=0)

mel = np.mean(mel.T, axis=0)

return np.hstack([mfccs, chroma, mel])

# -----------------------

# STEP 2: Training (Demo Dataset Simulation)

# -----------------------

def train_model():

# Normally, load a dataset (RAVDESS, CREMA-D etc.)

# Here, we'll simulate with few .wav files in "dataset/" folder

emotions = {

"angry": "angry",

"happy": "happy",

"sad": "sad",

"neutral": "neutral"

}

X, y = [], []

dataset_path = "dataset" # folder with wav files: angry1.wav, happy2.wav, etc.

for file in os.listdir(dataset_path):

if file.endswith(".wav"):

label = file.split("_")[0] # e.g., angry_1.wav → "angry"

feature = extract_features(os.path.join(dataset_path, file))

X.append(feature)

y.append(label)

X = np.array(X)

y = np.array(y)

# Encode labels

encoder = LabelEncoder()

y = encoder.fit_transform(y)

scaler = StandardScaler()

X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = SVC(kernel="linear", probability=True)

model.fit(X_train, y_train)

acc = model.score(X_test, y_test)

print(f"Model trained with accuracy: {acc*100:.2f}%")

# Save model

with open("emotion_model.pkl", "wb") as f:

pickle.dump((model, encoder, scaler), f)

# -----------------------

# STEP 3: Record & Predict

# -----------------------

def record_and_predict(duration=3, fs=22050):

print("Recording...")

recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)

sd.wait()

print("Recording complete. Saving as temp.wav...")

librosa.output.write_wav("temp.wav", recording.flatten(), sr=fs)

with open("emotion_model.pkl", "rb") as f:

model, encoder, scaler = pickle.load(f)

features = extract_features("temp.wav").reshape(1, -1)

features = scaler.transform(features)

pred = model.predict(features)[0]

probas = model.predict_proba(features)[0]

emotion = encoder.inverse_transform([pred])[0]

print(f"Detected Emotion: {emotion}")

# Plot probabilities

plt.bar(encoder.classes_, probas)

plt.title("Emotion Prediction Confidence")

plt.show()

# -----------------------

# MAIN

# -----------------------

if __name__ == "__main__":

if not os.path.exists("emotion_model.pkl"):

print("Training model...")

train_model()

record_and_predict()

Braille Translator Tool

import tkinter as tk

from tkinter import filedialog, messagebox

from PIL import Image, ImageDraw, ImageTk

import math

import os

# -------------------------

# Braille mapping utilities

# -------------------------

# Dot-number definitions for letters a..z (Grade-1 braille)

LETTER_DOTS = {

'a': [1],

'b': [1,2],

'c': [1,4],

'd': [1,4,5],

'e': [1,5],

'f': [1,2,4],

'g': [1,2,4,5],

'h': [1,2,5],

'i': [2,4],

'j': [2,4,5],

'k': [1,3],

'l': [1,2,3],

'm': [1,3,4],

'n': [1,3,4,5],

'o': [1,3,5],

'p': [1,2,3,4],

'q': [1,2,3,4,5],

'r': [1,2,3,5],

's': [2,3,4],

't': [2,3,4,5],

'u': [1,3,6],

'v': [1,2,3,6],

'w': [2,4,5,6],

'x': [1,3,4,6],

'y': [1,3,4,5,6],

'z': [1,3,5,6],

}

# Common punctuation (Grade-1)

PUNCT_DOTS = {

',': [2],

';': [2,3],

':': [2,4],

'.': [2,5,6],

'?': [2,6],

'!': [2,3,5],

'(': [2,3,6,5], # open parenthesis commonly encoded as ⠶ (but implementations vary)

')': [3,5,6,2], # mirrored / alternative — we'll use same as '(' for simplicity

"'": [3],

'-': [3,6],

'/': [3,4],

'"': [5,6,2,3], # approximate

'@': [4,1], # uncommon; approximate

'#': [3,4,5,6], # number sign (we will use official number sign below)

}

# Braille special signs

NUMBER_SIGN = [3,4,5,6] # ⠼

CAPITAL_SIGN = [6] # prefix for capital (single capital) — optional use

SPACE = [] # no dots for space -> unicode U+2800

# Build dot -> Unicode mapping utility

def dots_to_braille_unicode(dots):

"""

dots: list of integers 1..8 (we use 1..6)

returns: single unicode braille character

"""

code = 0x2800

for d in dots:

if 1 <= d <= 8:

code += 1 << (d - 1)

return chr(code)

# Precompute maps

LETTER_TO_BRAILLE = {ch: dots_to_braille_unicode(dots) for ch, dots in LETTER_DOTS.items()}

PUNCT_TO_BRAILLE = {p: dots_to_braille_unicode(dots) for p, dots in PUNCT_DOTS.items()}

NUMBER_SIGN_CHAR = dots_to_braille_unicode(NUMBER_SIGN)

CAPITAL_SIGN_CHAR = dots_to_braille_unicode(CAPITAL_SIGN)

SPACE_CHAR = chr(0x2800)

# Digits mapping in Grade-1: number sign + letters a-j represents 1-0

DIGIT_TO_LETTER = {

'1': 'a', '2': 'b', '3': 'c', '4': 'd', '5': 'e',

'6': 'f', '7': 'g', '8': 'h', '9': 'i', '0': 'j'

}

# -------------------------

# Translation function

# -------------------------

def translate_to_braille(text, use_capital_prefix=True, use_number_prefix=True):

"""

Translate plain text into Grade-1 Braille Unicode string.

Options:

- use_capital_prefix: if True, prefix capitals with the capital sign (⠠)

- use_number_prefix: if True, prefix digit sequences with number sign (⠼)

Returns braille_unicode_string

"""

out = []

i = 0

n = len(text)

while i < n:

ch = text[i]

if ch.isspace():

out.append(SPACE_CHAR)

i += 1

continue

# Digit sequence handling

if ch.isdigit():

if use_number_prefix:

out.append(NUMBER_SIGN_CHAR)

# consume contiguous digits

while i < n and text[i].isdigit():

d = text[i]

letter_equiv = DIGIT_TO_LETTER.get(d, None)

if letter_equiv:

out.append(LETTER_TO_BRAILLE[letter_equiv])

else:

# fallback: space for unknown

out.append(SPACE_CHAR)

i += 1

continue

# Letter

if ch.isalpha():

if ch.isupper():

if use_capital_prefix:

out.append(CAPITAL_SIGN_CHAR)

ch_low = ch.lower()

else:

ch_low = ch

code = LETTER_TO_BRAILLE.get(ch_low)

if code:

out.append(code)

else:

out.append(SPACE_CHAR)

i += 1

continue

# Punctuation

if ch in PUNCT_TO_BRAILLE:

out.append(PUNCT_TO_BRAILLE[ch])

i += 1

continue

# Fallback: try common mapping for punctuation by replacement

if ch == '"':

out.append(PUNCT_TO_BRAILLE.get('"', SPACE_CHAR))

i += 1

continue

# Unknown character: attempt to include as space placeholder

out.append(SPACE_CHAR)

i += 1

return "".join(out)

# -------------------------

# Braille image rendering

# -------------------------

def render_braille_image(braille_text, dot_radius=8, dot_gap=10, cell_gap=16, bg_color=(255,255,255)):

"""

Render braille_text (unicode braille characters) into a PIL Image.

Each braille cell is 2 (columns) x 3 (rows) of dots.

We read the Unicode braille codepoints and draw filled circles for active dots.

Returns PIL.Image (RGB).

"""

# Compute rows & columns: we'll wrap to a max columns per line for reasonable width

max_cols = 40 # characters per row, adjust if needed

# Split into lines by breaking long strings

chars = list(braille_text)

lines = [chars[i:i+max_cols] for i in range(0, len(chars), max_cols)]

# cell size

cell_w = dot_radius*2 + dot_gap

cell_h = dot_radius*3 + dot_gap*2 # 3 rows

img_w = len(lines[0]) * (cell_w + cell_gap) + 2*cell_gap if lines else 200

img_h = len(lines) * (cell_h + cell_gap) + 2*cell_gap if lines else 100

img = Image.new("RGB", (img_w, img_h), color=bg_color)

draw = ImageDraw.Draw(img)

for row_idx, line in enumerate(lines):

for col_idx, ch in enumerate(line):

x0 = cell_gap + col_idx * (cell_w + cell_gap)

y0 = cell_gap + row_idx * (cell_h + cell_gap)

# Determine dot pattern from unicode char

codepoint = ord(ch)

base = 0x2800

mask = codepoint - base

# dot positions for 1..6 are arranged:

# (col0,row0)=dot1 (col1,row0)=dot4

# (col0,row1)=dot2 (col1,row1)=dot5

# (col0,row2)=dot3 (col1,row2)=dot6

dot_positions = [

(0,0,1), # dot1

(0,1,2), # dot2

(0,2,3), # dot3

(1,0,4), # dot4

(1,1,5), # dot5

(1,2,6), # dot6

]

for col, r, dotn in dot_positions:

bit = (mask >> (dotn-1)) & 1

cx = x0 + col * (dot_radius + dot_gap/2) + dot_radius + 4

cy = y0 + r * (dot_radius + dot_gap/2) + dot_radius + 4

bbox = [cx - dot_radius, cy - dot_radius, cx + dot_radius, cy + dot_radius]

if bit:

draw.ellipse(bbox, fill=(0,0,0))

else:

# draw faint circle to indicate empty dot (optional)

draw.ellipse(bbox, outline=(200,200,200))

return img

# -------------------------

# GUI

# -------------------------

class BrailleGUI:

def __init__(self, root):

self.root = root

root.title("Braille Translator Tool — Grade-1 (Uncontracted)")

root.geometry("820x520")

# Input frame

frame_in = tk.LabelFrame(root, text="Input Text", padx=8, pady=8)

frame_in.pack(fill="both", padx=12, pady=8)

self.text_input = tk.Text(frame_in, height=6, wrap="word", font=("Arial", 12))

self.text_input.pack(fill="both", expand=True)

self.text_input.insert("1.0", "Hello, World! 123")

# Controls

ctrl = tk.Frame(root)

ctrl.pack(fill="x", padx=12)

tk.Button(ctrl, text="Translate", command=self.on_translate).pack(side="left", padx=6, pady=6)

tk.Button(ctrl, text="Render Braille Image (Preview)", command=self.on_render_preview).pack(side="left", padx=6, pady=6)

tk.Button(ctrl, text="Save Braille Image...", command=self.on_save_image).pack(side="left", padx=6, pady=6)

tk.Button(ctrl, text="Copy Braille Unicode to Clipboard", command=self.on_copy_clipboard).pack(side="left", padx=6, pady=6)

# Output frame (braille unicode text)

frame_out = tk.LabelFrame(root, text="Braille (Unicode)", padx=8, pady=8)

frame_out.pack(fill="both", padx=12, pady=8, expand=True)

self.braille_text_widget = tk.Text(frame_out, height=6, wrap="word", font=("Segoe UI Symbol", 20))

self.braille_text_widget.pack(fill="both", expand=True)

self.braille_text_widget.config(state="disabled")

# Image preview area

preview_frame = tk.LabelFrame(root, text="Image Preview", padx=8, pady=8)

preview_frame.pack(fill="both", padx=12, pady=8)

self.preview_label = tk.Label(preview_frame)

self.preview_label.pack()

self.last_preview_image = None # keep reference to avoid GC

def on_translate(self):

txt = self.text_input.get("1.0", "end").rstrip("\n")

if not txt.strip():

messagebox.showwarning("Input required", "Please enter some text to translate.")

return

braille = translate_to_braille(txt)

self.braille_text_widget.config(state="normal")

self.braille_text_widget.delete("1.0", "end")

self.braille_text_widget.insert("1.0", braille)

self.braille_text_widget.config(state="disabled")

def on_render_preview(self):

braille = self.braille_text_widget.get("1.0", "end").rstrip("\n")

if not braille:

messagebox.showinfo("No Braille", "Translate text first (click Translate).")

return

img = render_braille_image(braille, dot_radius=8, dot_gap=10, cell_gap=14)

self.show_preview(img)

def on_save_image(self):

braille = self.braille_text_widget.get("1.0", "end").rstrip("\n")

if not braille:

messagebox.showinfo("No Braille", "Translate text first (click Translate).")

return

img = render_braille_image(braille, dot_radius=10, dot_gap=12, cell_gap=16)

path = filedialog.asksaveasfilename(defaultextension=".png", filetypes=[("PNG image","*.png")], title="Save Braille image")

if path:

img.save(path)

messagebox.showinfo("Saved", f"Braille image saved to:\n{path}")

def on_copy_clipboard(self):

braille = self.braille_text_widget.get("1.0", "end").rstrip("\n")

if not braille:

messagebox.showinfo("No Braille", "Translate text first (click Translate).")

return

# Use Tk clipboard

self.root.clipboard_clear()

self.root.clipboard_append(braille)

messagebox.showinfo("Copied", "Braille Unicode copied to clipboard.")

def show_preview(self, pil_img):

# Resize preview if too big

max_w, max_h = 760, 240

w, h = pil_img.size

scale = min(max_w / w, max_h / h, 1.0)

if scale < 1.0:

pil_img = pil_img.resize((int(w*scale), int(h*scale)), Image.LANCZOS)

tk_img = ImageTk.PhotoImage(pil_img)

self.preview_label.config(image=tk_img)

self.preview_label.image = tk_img # keep ref

# -------------------------

# Run the app

# -------------------------

def main():

root = tk.Tk()

app = BrailleGUI(root)

root.mainloop()

if __name__ == "__main__":

main()

Music Sheet to Audio Converter

"""

sheet_to_midi.py

Simple prototype: Convert a scanned single-line, monophonic staff in TREBLE CLEF

to a MIDI file using OpenCV -> heuristic notehead detection -> music21.

Limitations:

- Monophonic, printed notation, single staff detection.

- Treats each notehead as a quarter note by default.

- No clef/key/time signature detection (assumes treble clef, 4/4).

- Not a replacement for full OMR systems like Audiveris.

Usage:

python sheet_to_midi.py input_image.png output.mid

"""

import sys

import cv2

import numpy as np

import math

from music21 import stream, note, midi, tempo, meter

from PIL import Image

# -------------------------

# Utility & image helpers

# -------------------------

def load_image(path):

img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

if img is None:

raise FileNotFoundError(f"Cannot open image: {path}")

return img

def binarize(img):

# Adaptive threshold - robust to lighting

th = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C,

cv2.THRESH_BINARY_INV, 15, 10)

return th

# -------------------------

# Staff line detection

# -------------------------

def detect_staff_lines(binary_img, debug=False):

"""

Detect horizontal staff lines using morphological operations and Hough or projection.

Returns list of y-positions of detected lines (sorted).

"""

h, w = binary_img.shape

# Use horizontal morphological kernel to enhance staff lines

horizontal_size = max(10, w // 30)

horiz_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_size, 1))

hor = cv2.morphologyEx(binary_img, cv2.MORPH_OPEN, horiz_kernel)

# Sum across columns to get projection

proj = np.sum(hor, axis=1)

# Normalize

proj = (proj - proj.min()) / (proj.max() - proj.min() + 1e-9)

# Find peaks in projection where staff lines are

thresh = 0.15 # tunable

candidates = np.where(proj > thresh)[0]

if len(candidates) == 0:

return []

# Group contiguous regions into single lines (cluster by gaps)

lines = []

current = [candidates[0]]

for r in candidates[1:]:

if r - current[-1] <= 2:

current.append(r)

else:

# average

lines.append(int(np.mean(current)))

current = [r]

if current:

lines.append(int(np.mean(current)))

# Staffs are sets of 5 lines close to each other. Find clusters of 5 lines

# For simplicity, find any groups of 5 lines with roughly equal spacing

# If more than 5 lines are present (multiple staves), return the first 5-line group

if len(lines) < 5:

return lines # fallback

# sliding window of size 5, measure spacing variance

best_group = None

best_score = 1e9

for i in range(0, len(lines) - 4):

group = lines[i:i+5]

spacings = np.diff(group)

score = np.var(spacings) # we want equal spacings

if score < best_score:

best_score = score

best_group = group

if best_group is None:

return lines[:5]

return best_group

# -------------------------

# Note head detection

# -------------------------

def detect_noteheads(binary_img, staff_lines, debug=False):

"""

Detect connected components that look like noteheads.

Return list of bounding boxes (x, y, w, h).

"""

# Remove staff lines from binary image to avoid splitting noteheads:

img_nolines = binary_img.copy()

# Create a mask of lines using morphological ops similar to detection

h, w = binary_img.shape

horizontal_size = max(10, w // 30)

horiz_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (horizontal_size, 1))

hor = cv2.morphologyEx(binary_img, cv2.MORPH_OPEN, horiz_kernel)

img_nolines = cv2.bitwise_and(img_nolines, cv2.bitwise_not(hor))

# Morph close small gaps to make noteheads full blobs

kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))

img_nolines = cv2.morphologyEx(img_nolines, cv2.MORPH_CLOSE, kernel, iterations=1)

# Find contours

contours, _ = cv2.findContours(img_nolines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

boxes = []

for cnt in contours:

x, y, wbox, hbox = cv2.boundingRect(cnt)

area = cv2.contourArea(cnt)

# heuristics for notehead sizes

if area < 30: # too small noise

continue

# discard very tall/thin objects (likely stems or flags)

if hbox > 3 * wbox and hbox > 40:

continue

# also discard huge regions (like staff text)

if wbox > binary_img.shape[1] * 0.6:

continue

boxes.append((x, y, wbox, hbox))

# Sort left-to-right

boxes.sort(key=lambda b: b[0])

return boxes

# -------------------------

# Map vertical position to pitch (treble clef)

# -------------------------

def map_y_to_pitch(y_center, staff_lines):

"""

Given y-coordinate and list of 5 staff line y-positions (top->bottom),

compute the pitch name using treble clef mapping.

We'll map lines & spaces to steps; middle C is one ledger line below staff in treble clef.

The mapping: from top line down:

Line 1 (top) -> F5

Space -> E5

Line 2 -> D5

...

We'll build a scale of positions (lines and spaces) with corresponding MIDI note numbers.

"""

# Convert staff_lines sorted top->bottom

lines = sorted(staff_lines)

# staff spacing

spacing = np.median(np.diff(lines))

# Build reference positions: lines and spaces extending several positions above/below

# We'll define positions with index 0 at top line, increasing downward by half-step (line->space->line)

positions = []

labels = [] # MIDI numbers

# Let's compute the center y of each "position" for -6..+12 positions relative to top line

# Determine MIDI mapping: top line (F5) midi 77. Use standard: F5=77, E5=76, D5=74? Wait careful...

# Simpler: define mapping for relative positions using steps in diatonic scale (not semitone), but easiest is map to note names by index:

# We'll build a list of note names starting from some reference. Let's compute using music21 for correctness.

from music21 import pitch

# We'll compute positions: every half staff-step is spacing/2

half = spacing / 2.0

# Let's create position centers from -6 to +18 (enough ledger lines)

pos_centers = [lines[0] - 6*half + i*half for i in range(40)]

# Now assign note names: find which position corresponds to which diatonic step.

# Determine which index corresponds to the top staff line (lines[0])

idx_top_line = int(round((lines[0] - pos_centers[0]) / half))

# For treble clef: top line is F5 (MIDI 77)

top_midi = pitch.Pitch('F5').midi # 77

# Each position step (line->space->line) moves by one diatonic step (i.e., one scale degree), which may be 1 or 2 semitones.

# But easier: we can build a list of midi numbers by moving by semitone steps of a diatonic scale: approximate by mapping every position to midi by using

# semitone step of 1 for each half-step (this maps to chromatic steps which is fine but won't respect staff spacing perfectly for accidentals).

# Simpler: treat each position as semitone steps from top line: top line index -> top_midi, next half position -> top_midi - 1, etc.

# This yields a chromatic mapping: adjacent positions = 1 semitone. This is a simplification (in real staff adjacent positions are diatonic).

midi_for_pos = []

for i in range(len(pos_centers)):

midi_for_pos.append(top_midi - (i - idx_top_line))

# Identify closest pos index for given y_center

diffs = [abs(y_center - c) for c in pos_centers]

pos_idx = int(np.argmin(diffs))

midi = int(round(midi_for_pos[pos_idx]))

# Convert midi to note name

p = pitch.Pitch()

p.midi = midi

return p.nameWithOctave

# -------------------------

# Build music21 stream from detected notes

# -------------------------

def build_stream_from_boxes(boxes, staff_lines, tempo_bpm=100):

s = stream.Stream()

s.append(tempo.MetronomeMark(number=tempo_bpm))

# Simple 4/4 time signature

s.append(meter.TimeSignature('4/4'))

# For each bounding box left->right, map to pitch and create quarter notes

# More advanced: group boxes near same x to chords, or detect stems to find durations (not implemented)

for (x, y, wbox, hbox) in boxes:

cx = x + wbox / 2.0

cy = y + hbox / 2.0

pitch_name = map_y_to_pitch(cy, staff_lines)

n = note.Note(pitch_name)

n.duration.quarterLength = 1.0 # quarter note default

s.append(n)

return s

# -------------------------

# Main flow

# -------------------------

def process_image_to_midi(input_path, output_midi_path, debug=False):

img = load_image(input_path)

bin_img = binarize(img)

staff_lines = detect_staff_lines(bin_img, debug=debug)

if not staff_lines or len(staff_lines) < 5:

print("Warning: could not detect 5 staff lines reliably. Trying to proceed with available lines.")

else:

print("Detected staff lines (y-coordinates):", staff_lines)

boxes = detect_noteheads(bin_img, staff_lines, debug=debug)

if not boxes:

print("No noteheads detected. Exiting.")

return False

print(f"Detected {len(boxes)} candidate noteheads (left→right).")

for i, b in enumerate(boxes, start=1):

x, y, wbox, hbox = b

print(f"{i}: x={x}, y={y}, w={wbox}, h={hbox}")

music_stream = build_stream_from_boxes(boxes, staff_lines, tempo_bpm=100)

# Export to MIDI

mf = midi.translate.streamToMidiFile(music_stream)

mf.open(output_midi_path, 'wb')

mf.write()

mf.close()

print(f"MIDI saved to {output_midi_path}")

return True

# -------------------------

# CLI

# -------------------------

if __name__ == "__main__":

if len(sys.argv) < 3:

print("Usage: python sheet_to_midi.py input_image.png output.mid")

sys.exit(1)

inp = sys.argv[1]

out = sys.argv[2]

ok = process_image_to_midi(inp, out, debug=True)

if not ok:

sys.exit(2)

Blog Pages

Smart Resume Formatter

AI Workout Form Corrector

Voice Emotion Detector

Braille Translator Tool

Music Sheet to Audio Converter