OCR using pytesseract
from PIL import Image
import pytesseract
def extract_text_from_image(image_path):
image = Image.open(image_path)
text = pytesseract.image_to_string(image)
return text
Parse Items and Prices with
re
import re
def parse_items(raw_text): # Match lines like: "Bread 2.50" or "Milk ....... 1.25" pattern = r"([A-Za-z\s]+)\s+([\d]+\.\d{2})" matches = re.findall(pattern, raw_text) items = [{"item": item.strip(), "price": float(price)} for item, price in matches] total = sum(i["price"] for i in items) avg = total / len(items) if items else 0 return items, total, avg
(Optional) Step 3: Streamlit Interface
import streamlit as stfrom utils.text_parser import extract_text_from_image, parse_itemsimport tempfileimport pandas as pd
st.title("๐งพ Receipt Text Extractor & Analyzer")uploaded_file = st.file_uploader("Upload Receipt Image", type=["jpg", "png", "jpeg"])
if uploaded_file: with tempfile.NamedTemporaryFile(delete=False) as tmp: tmp.write(uploaded_file.read()) tmp_path = tmp.name
raw_text = extract_text_from_image(tmp_path) items, total, avg = parse_items(raw_text)
df = pd.DataFrame(items) st.subheader("๐ Items Detected:") st.table(df)
st.markdown(f"**Total Cost:** ₹{total:.2f}") st.markdown(f"**Average Item Cost:** ₹{avg:.2f}")
# Download as CSV csv = df.to_csv(index=False).encode() st.download_button("๐ฅ Download CSV", csv, "receipt_data.csv", "text/csv")
No comments:
Post a Comment