pip install pandas matplotlib emoji
import re
import emoji
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
# Define anger and happy keywords
anger_keywords = ['angry', 'hate', 'stupid', 'idiot', 'mad', 'annoy', 'fight']
happy_keywords = ['happy', 'love', 'joy', 'awesome', 'great', 'š', 'š', 'š']
def extract_chat_data(chat_file):
with open(chat_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
chat_data = []
for line in lines:
# Match typical WhatsApp line format
match = re.match(r'^(\d{1,2}/\d{1,2}/\d{2,4}),\s(\d{1,2}:\d{2})\s[-–]\s(.+?):\s(.+)', line)
if match:
date, time, sender, message = match.groups()
chat_data.append([date, time, sender, message])
return pd.DataFrame(chat_data, columns=['Date', 'Time', 'Sender', 'Message'])
def count_emojis(text):
return [char for char in text if char in emoji.EMOJI_DATA]
def analyze_emojis(df):
emoji_counter = Counter()
sender_emoji = {}
for _, row in df.iterrows():
emojis = count_emojis(row['Message'])
emoji_counter.update(emojis)
sender = row['Sender']
if sender not in sender_emoji:
sender_emoji[sender] = Counter()
sender_emoji[sender].update(emojis)
return emoji_counter.most_common(10), sender_emoji
def analyze_mood(df):
mood_scores = []
for _, row in df.iterrows():
message = row['Message'].lower()
mood = 0
mood += sum(word in message for word in happy_keywords)
mood -= sum(word in message for word in anger_keywords)
mood_scores.append(mood)
df['MoodScore'] = mood_scores
return df
def plot_top_emoji_users(sender_emoji):
emoji_counts = {sender: sum(emojis.values()) for sender, emojis in sender_emoji.items()}
users = list(emoji_counts.keys())
counts = list(emoji_counts.values())
plt.figure(figsize=(8, 4))
plt.bar(users, counts, color='teal')
plt.title("Emoji Usage by User")
plt.ylabel("Total Emojis Used")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
def plot_mood_over_time(df):
mood_by_day = df.groupby("Date")["MoodScore"].sum()
plt.figure(figsize=(8, 4))
mood_by_day.plot(kind="line", marker="o", color="purple")
plt.title("Mood Trend Over Time")
plt.xlabel("Date")
plt.ylabel("Mood Score")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
if __name__ == "__main__":
chat_file = "chat.txt" # exported WhatsApp chat file
df = extract_chat_data(chat_file)
df = analyze_mood(df)
top_emojis, sender_emoji = analyze_emojis(df)
print("\nš Top Emojis Used:")
for emo, count in top_emojis:
print(f"{emo}: {count}")
print("\nš♂️ Emoji Usage by Users:")
for sender, emojis in sender_emoji.items():
print(f"{sender}: {sum(emojis.values())} emojis")
plot_top_emoji_users(sender_emoji)
plot_mood_over_time(df)
No comments:
Post a Comment