Auto Meeting Notes Generator

import os

import re

import pandas as pd

import whisper

from datetime import datetime


# Optional: For GPT-4 summarization

import openai

from dotenv import load_dotenv


load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")


# ========== CONFIG ==========

AUDIO_FOLDER = "audio"

TRANSCRIPT_FOLDER = "transcriptions"

NOTES_FOLDER = "notes_output"


# ========== SETUP ==========

os.makedirs(TRANSCRIPT_FOLDER, exist_ok=True)

os.makedirs(NOTES_FOLDER, exist_ok=True)


# ========== 1. Transcribe Audio ==========

def transcribe_audio(file_path, model_name="base"):

    model = whisper.load_model(model_name)

    result = model.transcribe(file_path)

    

    filename = os.path.basename(file_path).split('.')[0]

    output_path = os.path.join(TRANSCRIPT_FOLDER, f"{filename}.txt")

    

    with open(output_path, "w", encoding="utf-8") as f:

        f.write(result["text"])

    

    return result["text"]


# ========== 2. Extract Action Items ==========

def extract_action_items(text):

    bullet_pattern = r"(?:-|\*|\d\.)\s*(.+)"

    action_keywords = ["should", "need to", "must", "let's", "we will", "assign", "follow up", "due"]


    actions = []

    for line in text.split('\n'):

        line = line.strip()

        if any(keyword in line.lower() for keyword in action_keywords):

            actions.append(line)


    # Fallback: try extracting bullets

    bullets = re.findall(bullet_pattern, text)

    for b in bullets:

        if any(k in b.lower() for k in action_keywords):

            actions.append(b)

    

    return list(set(actions))


# ========== 3. Summarize with GPT (Optional) ==========

def summarize_with_gpt(transcript_text):

    response = openai.ChatCompletion.create(

        model="gpt-4-turbo",

        messages=[

            {"role": "system", "content": "You are an AI assistant that summarizes meeting transcripts."},

            {"role": "user", "content": f"Summarize this meeting:\n\n{transcript_text}"}

        ]

    )

    return response['choices'][0]['message']['content']


# ========== 4. Save Final Notes ==========

def save_notes(transcript, actions, summary=None, filename="meeting_notes"):

    now = datetime.now().strftime("%Y%m%d_%H%M")

    csv_path = os.path.join(NOTES_FOLDER, f"{filename}_{now}.csv")


    df = pd.DataFrame({

        "Section": ["Transcript", "Action Items", "Summary"],

        "Content": [transcript, "\n".join(actions), summary or "Not generated"]

    })

    df.to_csv(csv_path, index=False)

    print(f"[✔] Notes saved to {csv_path}")


# ========== MAIN ==========

def process_meeting(file_path, use_gpt=False):

    print(f"🔊 Transcribing: {file_path}")

    transcript = transcribe_audio(file_path)


    print("✅ Extracting action items...")

    actions = extract_action_items(transcript)


    summary = None

    if use_gpt:

        print("🤖 Summarizing with GPT...")

        summary = summarize_with_gpt(transcript)


    file_name = os.path.basename(file_path).split('.')[0]

    save_notes(transcript, actions, summary, file_name)



# ========== RUN ==========

if __name__ == "__main__":

    audio_files = [f for f in os.listdir(AUDIO_FOLDER) if f.endswith(('.mp3', '.wav'))]


    if not audio_files:

        print("⚠️ No audio files found in /audio folder.")

    else:

        for file in audio_files:

            process_meeting(os.path.join(AUDIO_FOLDER, file), use_gpt=True)


No comments: