import os
import librosa
import numpy as np
import sounddevice as sd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler
import pickle
# -----------------------
# STEP 1: Feature Extraction
# -----------------------
def extract_features(file_path):
y, sr = librosa.load(file_path, duration=3, offset=0.5)
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
mel = librosa.feature.melspectrogram(y=y, sr=sr)
# Take mean of each feature
mfccs = np.mean(mfcc.T, axis=0)
chroma = np.mean(chroma.T, axis=0)
mel = np.mean(mel.T, axis=0)
return np.hstack([mfccs, chroma, mel])
# -----------------------
# STEP 2: Training (Demo Dataset Simulation)
# -----------------------
def train_model():
# Normally, load a dataset (RAVDESS, CREMA-D etc.)
# Here, we'll simulate with few .wav files in "dataset/" folder
emotions = {
"angry": "angry",
"happy": "happy",
"sad": "sad",
"neutral": "neutral"
}
X, y = [], []
dataset_path = "dataset" # folder with wav files: angry1.wav, happy2.wav, etc.
for file in os.listdir(dataset_path):
if file.endswith(".wav"):
label = file.split("_")[0] # e.g., angry_1.wav → "angry"
feature = extract_features(os.path.join(dataset_path, file))
X.append(feature)
y.append(label)
X = np.array(X)
y = np.array(y)
# Encode labels
encoder = LabelEncoder()
y = encoder.fit_transform(y)
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = SVC(kernel="linear", probability=True)
model.fit(X_train, y_train)
acc = model.score(X_test, y_test)
print(f"Model trained with accuracy: {acc*100:.2f}%")
# Save model
with open("emotion_model.pkl", "wb") as f:
pickle.dump((model, encoder, scaler), f)
# -----------------------
# STEP 3: Record & Predict
# -----------------------
def record_and_predict(duration=3, fs=22050):
print("Recording...")
recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
sd.wait()
print("Recording complete. Saving as temp.wav...")
librosa.output.write_wav("temp.wav", recording.flatten(), sr=fs)
with open("emotion_model.pkl", "rb") as f:
model, encoder, scaler = pickle.load(f)
features = extract_features("temp.wav").reshape(1, -1)
features = scaler.transform(features)
pred = model.predict(features)[0]
probas = model.predict_proba(features)[0]
emotion = encoder.inverse_transform([pred])[0]
print(f"Detected Emotion: {emotion}")
# Plot probabilities
plt.bar(encoder.classes_, probas)
plt.title("Emotion Prediction Confidence")
plt.show()
# -----------------------
# MAIN
# -----------------------
if __name__ == "__main__":
if not os.path.exists("emotion_model.pkl"):
print("Training model...")
train_model()
record_and_predict()
No comments:
Post a Comment