Invoice Data Extractor

import tkinter as tk

from tkinter import filedialog, messagebox

from PIL import Image

import pytesseract

from pdf2image import convert_from_path

import re

import os


# Optional: Set Tesseract path

# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'


def extract_text_from_file(file_path):

    if file_path.lower().endswith('.pdf'):

        images = convert_from_path(file_path, dpi=300)

        text = ""

        for image in images:

            text += pytesseract.image_to_string(image)

        return text

    elif file_path.lower().endswith(('.png', '.jpg', '.jpeg')):

        image = Image.open(file_path)

        return pytesseract.image_to_string(image)

    else:

        return ""


def extract_invoice_data(text):

    invoice_number = re.search(r'Invoice\s*#?:?\s*(\w+)', text, re.IGNORECASE)

    date = re.search(r'Date\s*:?(\s*\d{1,2}/\d{1,2}/\d{2,4})', text, re.IGNORECASE)

    total = re.search(r'Total\s*:?[\s$]*(\d+[\.,]?\d*)', text, re.IGNORECASE)


    return {

        "Invoice Number": invoice_number.group(1) if invoice_number else "Not Found",

        "Date": date.group(1).strip() if date else "Not Found",

        "Total Amount": total.group(1) if total else "Not Found"

    }


def process_invoice():

    file_path = filedialog.askopenfilename(title="Select Invoice", filetypes=[("PDF/Image Files", "*.pdf *.jpg *.png *.jpeg")])

    if not file_path:

        return


    text = extract_text_from_file(file_path)

    data = extract_invoice_data(text)


    result = f"""

    ๐Ÿ“„ File: {os.path.basename(file_path)}

    ๐Ÿงพ Invoice Number: {data['Invoice Number']}

    ๐Ÿ“… Date: {data['Date']}

    ๐Ÿ’ฐ Total: {data['Total Amount']}

    """

    messagebox.showinfo("Extracted Invoice Data", result)


# GUI Setup

app = tk.Tk()

app.title("๐Ÿงพ Invoice Data Extractor")

app.geometry("400x200")

app.configure(bg="#f9f9f9")


label = tk.Label(app, text="Click below to select an invoice PDF or image", bg="#f9f9f9", font=("Helvetica", 12))

label.pack(pady=20)


btn = tk.Button(app, text="Select Invoice", command=process_invoice, bg="#4CAF50", fg="white", font=("Helvetica", 12), padx=10, pady=5)

btn.pack()


app.mainloop()


No comments: