Blog Pages

AI Chat Summarizer for WhatsApp

import re

import pandas as pd

import matplotlib.pyplot as plt

from textblob import TextBlob

from collections import Counter

from datetime import datetime

import os


# ========== CONFIG ==========

CHAT_FILE = "chat.txt"

PLOTS_FOLDER = "chat_analysis_plots"

os.makedirs(PLOTS_FOLDER, exist_ok=True)


# ========== 1. Parse WhatsApp Chat ==========

def parse_chat(file_path):

    with open(file_path, 'r', encoding='utf-8') as f:

        raw_text = f.readlines()


    messages = []

    pattern = r'^(\d{1,2}/\d{1,2}/\d{2,4}), (\d{1,2}:\d{2}) (AM|PM|am|pm)? - ([^:]+): (.*)'


    for line in raw_text:

        match = re.match(pattern, line)

        if match:

            date, time, am_pm, sender, message = match.groups()

            dt = datetime.strptime(date + " " + time + (" " + am_pm if am_pm else ""), "%d/%m/%Y %I:%M %p")

            messages.append([dt, sender.strip(), message.strip()])

    

    df = pd.DataFrame(messages, columns=["datetime", "sender", "message"])

    return df


# ========== 2. Sentiment & Stats ==========

def analyze_sentiments(df):

    df['polarity'] = df['message'].apply(lambda x: TextBlob(x).sentiment.polarity)

    df['sentiment'] = df['polarity'].apply(lambda x: 'positive' if x > 0.1 else 'negative' if x < -0.1 else 'neutral')

    return df


def top_senders(df, top_n=5):

    return df['sender'].value_counts().head(top_n)


# ========== 3. Plotting Functions ==========

def plot_message_frequency(df):

    df['date'] = df['datetime'].dt.date

    daily_counts = df.groupby('date').size()


    plt.figure(figsize=(12, 5))

    daily_counts.plot(kind='line', color='teal')

    plt.title("Messages Per Day")

    plt.xlabel("Date")

    plt.ylabel("Number of Messages")

    plt.tight_layout()

    plt.savefig(f"{PLOTS_FOLDER}/messages_per_day.png")

    plt.close()


def plot_sender_activity(df):

    sender_counts = df['sender'].value_counts()

    sender_counts.plot(kind='bar', figsize=(10,5), color='orchid')

    plt.title("Messages by Sender")

    plt.ylabel("Message Count")

    plt.tight_layout()

    plt.savefig(f"{PLOTS_FOLDER}/messages_by_sender.png")

    plt.close()


def plot_sentiment_distribution(df):

    sentiment_counts = df['sentiment'].value_counts()

    sentiment_counts.plot(kind='pie', autopct='%1.1f%%', figsize=(6,6), colors=['lightgreen', 'lightcoral', 'lightgrey'])

    plt.title("Sentiment Distribution")

    plt.tight_layout()

    plt.savefig(f"{PLOTS_FOLDER}/sentiment_distribution.png")

    plt.close()


# ========== 4. Generate Summary ==========

def generate_summary(df):

    summary = []

    summary.append(f"Total messages: {len(df)}")

    summary.append(f"Total participants: {df['sender'].nunique()}")

    summary.append("Top 5 active senders:")

    summary.extend(top_senders(df).to_string().split('\n'))


    sentiment_split = df['sentiment'].value_counts(normalize=True) * 100

    summary.append("\nSentiment Breakdown:")

    summary.extend(sentiment_split.round(2).to_string().split('\n'))


    with open("summary_output.txt", "w") as f:

        f.write("\n".join(summary))

    

    return "\n".join(summary)


# ========== MAIN ==========

if __name__ == "__main__":

    print("📥 Parsing chat...")

    df = parse_chat(CHAT_FILE)


    print("🧠 Analyzing sentiments...")

    df = analyze_sentiments(df)


    print("📊 Generating plots...")

    plot_message_frequency(df)

    plot_sender_activity(df)

    plot_sentiment_distribution(df)


    print("📝 Writing summary...")

    summary_text = generate_summary(df)

    print(summary_text)


    print("\n✅ Done! Plots saved to 'chat_analysis_plots' and summary to 'summary_output.txt'")


No comments:

Post a Comment