AI Chat Summarizer for WhatsApp

import re

import pandas as pd

import matplotlib.pyplot as plt

from textblob import TextBlob

from collections import Counter

from datetime import datetime

import os


# ========== CONFIG ==========

CHAT_FILE = "chat.txt"

PLOTS_FOLDER = "chat_analysis_plots"

os.makedirs(PLOTS_FOLDER, exist_ok=True)


# ========== 1. Parse WhatsApp Chat ==========

def parse_chat(file_path):

    with open(file_path, 'r', encoding='utf-8') as f:

        raw_text = f.readlines()


    messages = []

    pattern = r'^(\d{1,2}/\d{1,2}/\d{2,4}), (\d{1,2}:\d{2}) (AM|PM|am|pm)? - ([^:]+): (.*)'


    for line in raw_text:

        match = re.match(pattern, line)

        if match:

            date, time, am_pm, sender, message = match.groups()

            dt = datetime.strptime(date + " " + time + (" " + am_pm if am_pm else ""), "%d/%m/%Y %I:%M %p")

            messages.append([dt, sender.strip(), message.strip()])

    

    df = pd.DataFrame(messages, columns=["datetime", "sender", "message"])

    return df


# ========== 2. Sentiment & Stats ==========

def analyze_sentiments(df):

    df['polarity'] = df['message'].apply(lambda x: TextBlob(x).sentiment.polarity)

    df['sentiment'] = df['polarity'].apply(lambda x: 'positive' if x > 0.1 else 'negative' if x < -0.1 else 'neutral')

    return df


def top_senders(df, top_n=5):

    return df['sender'].value_counts().head(top_n)


# ========== 3. Plotting Functions ==========

def plot_message_frequency(df):

    df['date'] = df['datetime'].dt.date

    daily_counts = df.groupby('date').size()


    plt.figure(figsize=(12, 5))

    daily_counts.plot(kind='line', color='teal')

    plt.title("Messages Per Day")

    plt.xlabel("Date")

    plt.ylabel("Number of Messages")

    plt.tight_layout()

    plt.savefig(f"{PLOTS_FOLDER}/messages_per_day.png")

    plt.close()


def plot_sender_activity(df):

    sender_counts = df['sender'].value_counts()

    sender_counts.plot(kind='bar', figsize=(10,5), color='orchid')

    plt.title("Messages by Sender")

    plt.ylabel("Message Count")

    plt.tight_layout()

    plt.savefig(f"{PLOTS_FOLDER}/messages_by_sender.png")

    plt.close()


def plot_sentiment_distribution(df):

    sentiment_counts = df['sentiment'].value_counts()

    sentiment_counts.plot(kind='pie', autopct='%1.1f%%', figsize=(6,6), colors=['lightgreen', 'lightcoral', 'lightgrey'])

    plt.title("Sentiment Distribution")

    plt.tight_layout()

    plt.savefig(f"{PLOTS_FOLDER}/sentiment_distribution.png")

    plt.close()


# ========== 4. Generate Summary ==========

def generate_summary(df):

    summary = []

    summary.append(f"Total messages: {len(df)}")

    summary.append(f"Total participants: {df['sender'].nunique()}")

    summary.append("Top 5 active senders:")

    summary.extend(top_senders(df).to_string().split('\n'))


    sentiment_split = df['sentiment'].value_counts(normalize=True) * 100

    summary.append("\nSentiment Breakdown:")

    summary.extend(sentiment_split.round(2).to_string().split('\n'))


    with open("summary_output.txt", "w") as f:

        f.write("\n".join(summary))

    

    return "\n".join(summary)


# ========== MAIN ==========

if __name__ == "__main__":

    print("📥 Parsing chat...")

    df = parse_chat(CHAT_FILE)


    print("🧠 Analyzing sentiments...")

    df = analyze_sentiments(df)


    print("📊 Generating plots...")

    plot_message_frequency(df)

    plot_sender_activity(df)

    plot_sentiment_distribution(df)


    print("📝 Writing summary...")

    summary_text = generate_summary(df)

    print(summary_text)


    print("\n✅ Done! Plots saved to 'chat_analysis_plots' and summary to 'summary_output.txt'")


No comments: