Python for Engineers : AI Whiteboard Digitizer

#!/usr/bin/env python3

"""

AI Whiteboard Digitizer (prototype)

Usage:

python whiteboard_digitizer.py input_image.jpg

Outputs:

- ocr_texts.txt : OCR'd text lines (raw)

- equations_latex.tex : LaTeX for parseable math expressions

- diagram.svg (or diagram.png) : vector-like rendering of detected lines/circles

- several debug images in ./debug_*.png

"""

import sys

import os

import cv2

import numpy as np

from PIL import Image

import pytesseract

from sympy import sympify, latex

from sympy.core.sympify import SympifyError

import svgwrite

import matplotlib.pyplot as plt

# If on Windows and tesseract is not in PATH, set path here (uncomment and adjust)

# pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# ---------- Utilities ----------

def ensure_dir(path):

if not os.path.exists(path):

os.makedirs(path)

# Preprocess: grayscale, denoise, adaptive threshold, deskew

def preprocess_image(img_bgr, max_dim=1600):

# Resize to manageable size, keep aspect

h, w = img_bgr.shape[:2]

scale = min(1.0, float(max_dim) / max(h, w))

if scale != 1.0:

img_bgr = cv2.resize(img_bgr, (int(w*scale), int(h*scale)), interpolation=cv2.INTER_AREA)

gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)

# Denoise

gray = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)

# Bilateral to preserve edges

gray = cv2.bilateralFilter(gray, 9, 75, 75)

# Adaptive threshold (whiteboard: dark text on light background)

th = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,

cv2.THRESH_BINARY_INV, 25, 12)

# Deskew based on largest text contours or Hough lines

coords = np.column_stack(np.where(th > 0))

if coords.shape[0] > 0:

angle = cv2.minAreaRect(coords)[-1]

# Correction of angle

if angle < -45:

angle = -(90 + angle)

else:

angle = -angle

# Rotate

(h2, w2) = gray.shape[:2]

M = cv2.getRotationMatrix2D((w2//2, h2//2), angle, 1.0)

gray = cv2.warpAffine(gray, M, (w2, h2), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

th = cv2.warpAffine(th, M, (w2, h2), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

return gray, th

# Find text boxes using MSER or connected components (we'll use morphological dilation + contours)

def detect_text_regions(thresh_img, min_area=200, debug_out=None):

# dilate to join letters into words/lines

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 5))

dil = cv2.dilate(thresh_img, kernel, iterations=2)

contours, _ = cv2.findContours(dil, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

boxes = []

for cnt in contours:

x, y, w, h = cv2.boundingRect(cnt)

if w*h < min_area:

continue

# filter very tall or very wide noise

if h < 10 or w < 20:

continue

boxes.append((x, y, w, h))

# sort top to bottom, left to right

boxes = sorted(boxes, key=lambda b: (b[1], b[0]))

if debug_out is not None:

vis = cv2.cvtColor(thresh_img, cv2.COLOR_GRAY2BGR)

for (x,y,w,h) in boxes:

cv2.rectangle(vis, (x,y), (x+w,y+h), (0,255,0), 2)

cv2.imwrite(debug_out, vis)

return boxes

# OCR each region (use appropriate psm)

def ocr_regions(gray_img, boxes, ocr_lang='eng'):

lines = []

for (x, y, w, h) in boxes:

pad = 4

x0 = max(0, x-pad)

y0 = max(0, y-pad)

x1 = min(gray_img.shape[1], x+w+pad)

y1 = min(gray_img.shape[0], y+h+pad)

crop = gray_img[y0:y1, x0:x1]

# increase contrast and invert if necessary

# Convert to PIL for pytesseract

pil = Image.fromarray(crop)

# Tesseract config: treat as a single line or single block

config = "--psm 7" # treat as a single text line (good for equations on a line)

text = pytesseract.image_to_string(pil, lang=ocr_lang, config=config)

text = text.strip()

if text:

lines.append({'box': (x0,y0,x1,y1), 'text': text})

return lines

# Heuristic to check if a line looks like an equation/expression

def looks_like_equation(s):

# Accept digits, letters, operators and = ^ / * + - parentheses, fractions-like '/', greek? etc.

import re

s2 = s.replace(' ', '')

# Must have at least one operator or equal sign or variable

if re.search(r'[=\+\-\*/\^]', s2):

return True

# Or something like 'lim', 'sin', 'cos' etc.

if re.search(r'\b(sin|cos|tan|log|ln|lim|sqrt)\b', s.lower()):

return True

# Or presence of digits next to letters (like 2x or x2)

if re.search(r'\d+[a-zA-Z]|[a-zA-Z]\d+', s):

return True

return False

# Clean OCR text for sympy: replace common OCR artifacts

def clean_ocr_for_sympy(s):

# Basic replacements; adapt as needed

repl = {

'×': '*',

'X': 'x',

'—': '-',

'−': '-',

'–': '-',

'÷': '/',

'’': "'",

'‘': "'",

'“': '"',

'”': '"',

'O': '0', # risky: only if look like zero

}

out = s

# Remove stray non-ascii except math symbols

for k,v in repl.items():

out = out.replace(k, v)

# convert superscript-like to **: e.g., x^2 -> x**2 (keep ^ too)

out = out.replace('^', '**')

# Remove weird characters

allowed = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+-*/=().,** _^"

# keep letters and mathematical common symbols; but avoid removing letters like greek

# Simpler: strip only control characters

out = ''.join(ch for ch in out if (ch.isprintable()))

out = out.strip()

return out

# Try to parse with sympy and produce latex

def parse_equation_to_latex(s):

s_clean = clean_ocr_for_sympy(s)

# If contains '=' treat as equation; else expression

try:

if '=' in s_clean:

# sympy's Eq expects left and right; split on first '='

left, right = s_clean.split('=', 1)

eleft = sympify(left)

eright = sympify(right)

eq = eleft - eright # expression equal zero

# represent as LaTeX equation

latex_str = latex(eleft) + " = " + latex(eright)

else:

expr = sympify(s_clean)

latex_str = latex(expr)

return latex_str, None

except SympifyError as e:

return None, f"SympifyError: {e}"

except Exception as e:

return None, str(e)

# Detect simple geometric primitives (lines via Hough, circles via HoughCircles)

def detect_shapes(gray_img, debug_prefix=None):

# Use edge detection

edges = cv2.Canny(gray_img, 50, 150, apertureSize=3)

# Hough lines

lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=80, minLineLength=50, maxLineGap=10)

line_list = []

if lines is not None:

for l in lines:

x1,y1,x2,y2 = l[0]

line_list.append((int(x1),int(y1),int(x2),int(y2)))

# Hough circles

circles = None

try:

circ = cv2.HoughCircles(gray_img, cv2.HOUGH_GRADIENT, dp=1.2, minDist=30,

param1=100, param2=30, minRadius=8, maxRadius=200)

if circ is not None:

circ = np.round(circ[0, :]).astype("int")

circles = [(int(x),int(y),int(r)) for (x,y,r) in circ]

except Exception:

circles = None

# Save debug

if debug_prefix:

vis = cv2.cvtColor(gray_img, cv2.COLOR_GRAY2BGR)

for (x1,y1,x2,y2) in line_list:

cv2.line(vis, (x1,y1), (x2,y2), (0,255,0), 2)

if circles:

for (x,y,r) in circles:

cv2.circle(vis, (x,y), r, (0,0,255), 2)

cv2.imwrite(f"{debug_prefix}_shapes.png", vis)

return line_list, circles

# Render vector-style diagram to SVG using svgwrite or Matplotlib

def render_vector_diagram(svg_path, image_size, lines, circles, boxes=None):

w, h = image_size

dwg = svgwrite.Drawing(svg_path, size=(w, h))

# background white

dwg.add(dwg.rect(insert=(0,0), size=(w,h), fill='white'))

# optional: draw boxes for text regions (thin gray)

if boxes:

for (x0,y0,x1,y1) in boxes:

dwg.add(dwg.rect(insert=(x0,y0), size=(x1-x0,y1-y0), fill='none', stroke='lightgray', stroke_width=1))

# draw lines

for (x1,y1,x2,y2) in lines:

dwg.add(dwg.line(start=(x1,y1), end=(x2,y2), stroke=svgwrite.rgb(10, 10, 16, '%'), stroke_width=2))

# draw circles

if circles:

for (x,y,r) in circles:

dwg.add(dwg.circle(center=(x,y), r=r, stroke='black', fill='none', stroke_width=2))

dwg.save()

# Main pipeline

def process_whiteboard_image(in_path, out_dir="wb_outputs"):

ensure_dir(out_dir)

img_bgr = cv2.imread(in_path)

if img_bgr is None:

raise FileNotFoundError(in_path)

gray, th = preprocess_image(img_bgr)

debug_pre = os.path.join(out_dir, "debug_preprocess.png")

cv2.imwrite(debug_pre, gray)

cv2.imwrite(os.path.join(out_dir, "debug_thresh.png"), th)

# detect regions (text lines)

boxes = detect_text_regions(th, debug_out=os.path.join(out_dir, "debug_boxes.png"))

# OCR

ocr_lines = ocr_regions(gray, boxes)

# write OCR results

ocr_txt_file = os.path.join(out_dir, "ocr_texts.txt")

with open(ocr_txt_file, "w", encoding='utf-8') as f:

for item in ocr_lines:

f.write(item['text'] + "\n")

print(f"[+] OCR lines saved to {ocr_txt_file}")

# Filter likely equations

eq_candidates = [it for it in ocr_lines if looks_like_equation(it['text'])]

latex_results = []

for it in eq_candidates:

txt = it['text']

latex_str, err = parse_equation_to_latex(txt)

if latex_str:

latex_results.append((txt, latex_str))

else:

latex_results.append((txt, f"UNPARSEABLE: {err}"))

# write equations latex

eq_file = os.path.join(out_dir, "equations_latex.tex")

with open(eq_file, "w", encoding='utf-8') as f:

f.write("% Generated LaTeX (auto) — review and correct as needed\n")

for orig, out in latex_results:

f.write("% OCR: " + orig.replace("\n"," ") + "\n")

f.write(out + "\n\n")

print(f"[+] Equation LaTeX saved to {eq_file}")

# shape detection

lines, circles = detect_shapes(gray, debug_prefix=os.path.join(out_dir, "debug"))

svg_path = os.path.join(out_dir, "diagram.svg")

# convert boxes to x0,y0,x1,y1 format for svg (optional)

bboxes = [(x,y,x+w,y+h) for (x,y,w,h) in boxes]

render_vector_diagram(svg_path, (gray.shape[1], gray.shape[0]), lines, circles, boxes=bboxes)

print(f"[+] Diagram SVG saved to {svg_path}")

# Also produce a matplotlib PNG overlay visualization

overlay = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)

for (x1,y1,x2,y2) in lines:

cv2.line(overlay, (x1,y1), (x2,y2), (0,255,0), 2)

if circles:

for (x,y,r) in circles:

cv2.circle(overlay, (x,y), r, (0,0,255), 2)

for (x0,y0,x1,y1) in bboxes:

cv2.rectangle(overlay, (x0,y0), (x1,y1), (255,0,0), 1)

cv2.imwrite(os.path.join(out_dir, "overlay_debug.png"), overlay)

print(f"[+] Debug overlay saved to {os.path.join(out_dir, 'overlay_debug.png')}")

return {

"ocr_lines": ocr_lines,

"equations": latex_results,

"svg": svg_path,

"debug": out_dir

}

# --------- CLI ----------

if __name__ == "__main__":

if len(sys.argv) < 2:

print("Usage: python whiteboard_digitizer.py input_image.jpg")

sys.exit(1)

inp = sys.argv[1]

out = "wb_outputs"

res = process_whiteboard_image(inp, out_dir=out)

print("Done. Outputs in:", out)

Python for Engineers

Blog Pages

AI Whiteboard Digitizer

No comments: