Blog Pages

WhatsApp Chat Analyzer

pip install pandas matplotlib emoji


import re

import emoji

import pandas as pd

import matplotlib.pyplot as plt

from collections import Counter


# Define anger and happy keywords

anger_keywords = ['angry', 'hate', 'stupid', 'idiot', 'mad', 'annoy', 'fight']

happy_keywords = ['happy', 'love', 'joy', 'awesome', 'great', '😊', '😁', 'šŸ˜']


def extract_chat_data(chat_file):

    with open(chat_file, 'r', encoding='utf-8') as f:

        lines = f.readlines()


    chat_data = []

    for line in lines:

        # Match typical WhatsApp line format

        match = re.match(r'^(\d{1,2}/\d{1,2}/\d{2,4}),\s(\d{1,2}:\d{2})\s[-–]\s(.+?):\s(.+)', line)

        if match:

            date, time, sender, message = match.groups()

            chat_data.append([date, time, sender, message])

    return pd.DataFrame(chat_data, columns=['Date', 'Time', 'Sender', 'Message'])


def count_emojis(text):

    return [char for char in text if char in emoji.EMOJI_DATA]


def analyze_emojis(df):

    emoji_counter = Counter()

    sender_emoji = {}


    for _, row in df.iterrows():

        emojis = count_emojis(row['Message'])

        emoji_counter.update(emojis)

        sender = row['Sender']

        if sender not in sender_emoji:

            sender_emoji[sender] = Counter()

        sender_emoji[sender].update(emojis)

    

    return emoji_counter.most_common(10), sender_emoji


def analyze_mood(df):

    mood_scores = []

    for _, row in df.iterrows():

        message = row['Message'].lower()

        mood = 0

        mood += sum(word in message for word in happy_keywords)

        mood -= sum(word in message for word in anger_keywords)

        mood_scores.append(mood)

    df['MoodScore'] = mood_scores

    return df


def plot_top_emoji_users(sender_emoji):

    emoji_counts = {sender: sum(emojis.values()) for sender, emojis in sender_emoji.items()}

    users = list(emoji_counts.keys())

    counts = list(emoji_counts.values())


    plt.figure(figsize=(8, 4))

    plt.bar(users, counts, color='teal')

    plt.title("Emoji Usage by User")

    plt.ylabel("Total Emojis Used")

    plt.xticks(rotation=45)

    plt.tight_layout()

    plt.show()


def plot_mood_over_time(df):

    mood_by_day = df.groupby("Date")["MoodScore"].sum()

    plt.figure(figsize=(8, 4))

    mood_by_day.plot(kind="line", marker="o", color="purple")

    plt.title("Mood Trend Over Time")

    plt.xlabel("Date")

    plt.ylabel("Mood Score")

    plt.xticks(rotation=45)

    plt.tight_layout()

    plt.show()


if __name__ == "__main__":

    chat_file = "chat.txt"  # exported WhatsApp chat file

    df = extract_chat_data(chat_file)


    df = analyze_mood(df)

    top_emojis, sender_emoji = analyze_emojis(df)


    print("\nšŸ” Top Emojis Used:")

    for emo, count in top_emojis:

        print(f"{emo}: {count}")


    print("\nšŸ™‹‍♂️ Emoji Usage by Users:")

    for sender, emojis in sender_emoji.items():

        print(f"{sender}: {sum(emojis.values())} emojis")


    plot_top_emoji_users(sender_emoji)

    plot_mood_over_time(df)

 

No comments:

Post a Comment