import re
import pandas as pd
import matplotlib.pyplot as plt
from textblob import TextBlob
from collections import Counter
from datetime import datetime
import os
# ========== CONFIG ==========
CHAT_FILE = "chat.txt"
PLOTS_FOLDER = "chat_analysis_plots"
os.makedirs(PLOTS_FOLDER, exist_ok=True)
# ========== 1. Parse WhatsApp Chat ==========
def parse_chat(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
raw_text = f.readlines()
messages = []
pattern = r'^(\d{1,2}/\d{1,2}/\d{2,4}), (\d{1,2}:\d{2}) (AM|PM|am|pm)? - ([^:]+): (.*)'
for line in raw_text:
match = re.match(pattern, line)
if match:
date, time, am_pm, sender, message = match.groups()
dt = datetime.strptime(date + " " + time + (" " + am_pm if am_pm else ""), "%d/%m/%Y %I:%M %p")
messages.append([dt, sender.strip(), message.strip()])
df = pd.DataFrame(messages, columns=["datetime", "sender", "message"])
return df
# ========== 2. Sentiment & Stats ==========
def analyze_sentiments(df):
df['polarity'] = df['message'].apply(lambda x: TextBlob(x).sentiment.polarity)
df['sentiment'] = df['polarity'].apply(lambda x: 'positive' if x > 0.1 else 'negative' if x < -0.1 else 'neutral')
return df
def top_senders(df, top_n=5):
return df['sender'].value_counts().head(top_n)
# ========== 3. Plotting Functions ==========
def plot_message_frequency(df):
df['date'] = df['datetime'].dt.date
daily_counts = df.groupby('date').size()
plt.figure(figsize=(12, 5))
daily_counts.plot(kind='line', color='teal')
plt.title("Messages Per Day")
plt.xlabel("Date")
plt.ylabel("Number of Messages")
plt.tight_layout()
plt.savefig(f"{PLOTS_FOLDER}/messages_per_day.png")
plt.close()
def plot_sender_activity(df):
sender_counts = df['sender'].value_counts()
sender_counts.plot(kind='bar', figsize=(10,5), color='orchid')
plt.title("Messages by Sender")
plt.ylabel("Message Count")
plt.tight_layout()
plt.savefig(f"{PLOTS_FOLDER}/messages_by_sender.png")
plt.close()
def plot_sentiment_distribution(df):
sentiment_counts = df['sentiment'].value_counts()
sentiment_counts.plot(kind='pie', autopct='%1.1f%%', figsize=(6,6), colors=['lightgreen', 'lightcoral', 'lightgrey'])
plt.title("Sentiment Distribution")
plt.tight_layout()
plt.savefig(f"{PLOTS_FOLDER}/sentiment_distribution.png")
plt.close()
# ========== 4. Generate Summary ==========
def generate_summary(df):
summary = []
summary.append(f"Total messages: {len(df)}")
summary.append(f"Total participants: {df['sender'].nunique()}")
summary.append("Top 5 active senders:")
summary.extend(top_senders(df).to_string().split('\n'))
sentiment_split = df['sentiment'].value_counts(normalize=True) * 100
summary.append("\nSentiment Breakdown:")
summary.extend(sentiment_split.round(2).to_string().split('\n'))
with open("summary_output.txt", "w") as f:
f.write("\n".join(summary))
return "\n".join(summary)
# ========== MAIN ==========
if __name__ == "__main__":
print("š„ Parsing chat...")
df = parse_chat(CHAT_FILE)
print("š§ Analyzing sentiments...")
df = analyze_sentiments(df)
print("š Generating plots...")
plot_message_frequency(df)
plot_sender_activity(df)
plot_sentiment_distribution(df)
print("š Writing summary...")
summary_text = generate_summary(df)
print(summary_text)
print("\n✅ Done! Plots saved to 'chat_analysis_plots' and summary to 'summary_output.txt'")