!pip install --quiet tensorflow_addons

     |████████████████████████████████| 1.1 MB 5.3 MB/s


# Data Manipulation
import numpy as np
import pandas as pd
import pickle
import os
from scipy.ndimage import interpolation
from sklearn.preprocessing import OneHotEncoder

# Machine Learning
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, CSVLogger, EarlyStopping
from sklearn.metrics import f1_score, accuracy_score, balanced_accuracy_score, confusion_matrix, roc_curve, auc, precision_recall_curve

# Plotting
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
%matplotlib inline
import seaborn as sns
import pylab


print(f'Tensorflow version: {tf.__version__}')
print(f'Tensorflow_addons version: {tfa.__version__}')

Tensorflow version: 2.8.0
Tensorflow_addons version: 0.16.1


# Unpickle data
with open('data/cremad_openface_mfcc.pkl', 'rb') as f_in:
    data = pickle.load(f_in)


# Train
df_train = pd.DataFrame(data['train']).T
mfcc_train = df_train[0].to_numpy()
au_train =  df_train[1].to_numpy()
labels_train = df_train[2].to_numpy(dtype=int)

# Validation
df_valid = pd.DataFrame(data['val']).T
mfcc_valid = df_valid[0].to_numpy()
au_valid =  df_valid[1].to_numpy()
labels_valid = df_valid[2].to_numpy(dtype=int)

# Test
df_test = pd.DataFrame(data['test']).T
mfcc_test = df_test[0].to_numpy()
au_test =  df_test[1].to_numpy()
labels_test = df_test[2].to_numpy(dtype=int)


idx_to_emotion = {0: 'neutral',
                  1: 'anger',
                  2: 'happy',
                  3: 'fear',
                  4: 'disgust',
                  5: 'sad'}

emotion_to_idx = {v:k for k, v in idx_to_emotion.items()}

class_names = np.asarray(list(emotion_to_idx.keys()))


# Sizing parameters
NUM_MFCCS = mfcc_train[0].shape[1]
NUM_AUS = au_train[0].shape[1]
OUTPUT_SIZE = len(class_names)
BATCH_SIZE  = 128
EPOCHS = 100
LEARNING_RATE = 1e-3
WEIGHT_DECAY = 1e-3


# Define directory for model checkpoints
BACKUP_DIR = './models'
if not os.path.exists(BACKUP_DIR): os.mkdir(BACKUP_DIR)
LOGS_DIR = './logs'
if not os.path.exists(LOGS_DIR): os.mkdir(LOGS_DIR)


# Find the 95th quantile for the length of the samples
mfcc_sizes = [sample.shape[0] for sample in mfcc_train]
MFCC_TIMESTEPS_95_QUANTILE = int(np.ceil(pd.Series(mfcc_sizes).quantile(0.95)))

au_sizes = [sample.shape[0] for sample in au_train]
AU_TIMESTEPS_95_QUANTILE = int(np.ceil(pd.Series(au_sizes).quantile(0.95)))


def standardize_timesteps(data, timesteps):

    '''Take a dataset with shape (samples, timesteps, features) whose timesteps are of varying size and standardize the timestep size'''

    # Zero matrix to be filled -- zero will be the mask token
    zeros = np.zeros(shape=(data.shape[0], timesteps, len(data[0][0])))

    # Iterate through samples and standardize timesteps to the 95th quantile of MFCC samples
    for idx, sample in enumerate(data):
        if len(sample) > timesteps:
            zeros[idx] = sample[:timesteps]
        else:
            zeros[idx][:len(sample)] = sample

    return zeros


# Standardize length and use 0 as masking value -- MFCC
mfcc_train_standardized = standardize_timesteps(mfcc_train, MFCC_TIMESTEPS_95_QUANTILE)
mfcc_valid_standardized = standardize_timesteps(mfcc_valid, MFCC_TIMESTEPS_95_QUANTILE)
mfcc_test_standardized = standardize_timesteps(mfcc_test, MFCC_TIMESTEPS_95_QUANTILE)


# Standardize length and use 0 as masking value -- Action Units
au_train_standardized = standardize_timesteps(au_train, AU_TIMESTEPS_95_QUANTILE)
au_valid_standardized = standardize_timesteps(au_valid, AU_TIMESTEPS_95_QUANTILE)
au_test_standardized = standardize_timesteps(au_test, AU_TIMESTEPS_95_QUANTILE)


OHE = OneHotEncoder(sparse=False)
labels_train_one_hot = OHE.fit_transform(labels_train.reshape(-1, 1))
labels_valid_one_hot = OHE.transform(labels_valid.reshape(-1, 1))
labels_test_one_hot = OHE.transform(labels_test.reshape(-1, 1))


# Instead of feeding the NN a full dataset in matrix-style data this will feed a keras Sequence which batches data online and can even randomly sample
class BatchData(tf.keras.utils.Sequence):
    def __init__(self, X, y, n_classes, batch_size=64, shuffle=True):
        
        # Hyperparameters
        self.X = X
        self.y = y
        self.n_classes = n_classes
        self.batch_size = batch_size
        self.shuffle = shuffle
        
        # Shuffle
        if self.shuffle:
            assert len(self.X) == len(self.y)
            p = np.random.permutation(len(self.X))
            self.X, self.y = self.X[p], self.y[p]    
            
    def __len__(self):
        ''' Defines number of calls per epoch '''
        return int(np.ceil(len(self.X) / self.batch_size))
    
    def __getitem__(self, index):
        batch_X = self.X[index*self.batch_size:(index+1)*self.batch_size] 
        batch_y = self.y[index*self.batch_size:(index+1)*self.batch_size] 
        return batch_X, batch_y
    
    def on_epoch_end(self):
        # Shuffle
        if self.shuffle:
            assert len(self.X) == len(self.y)
            p = np.random.permutation(len(self.X))
            self.X, self.y = self.X[p], self.y[p]


def get_model(name, input_shape, output_size):
    model = Sequential(name=name)
    model.add(layers.Masking(mask_value=0, input_shape=input_shape))
    model.add(layers.Bidirectional(layers.LSTM(512, return_sequences=True, recurrent_dropout=0.0)))
    model.add(layers.TimeDistributed(layers.Dropout(0.8)))
    model.add(layers.Bidirectional(layers.LSTM(256, return_sequences=False, recurrent_dropout=0.0)))
    model.add(layers.Dropout(0.8))
    model.add(layers.Dense(512, activation=None))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.Dropout(0.8))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(output_size, activation='Softmax'))

    model.compile(optimizer=tfa.optimizers.AdaBelief(learning_rate=LEARNING_RATE, weight_decay=WEIGHT_DECAY, amsgrad=True, epsilon=1e-7), 
                         loss='categorical_crossentropy',  
                         metrics=['accuracy', tf.keras.metrics.AUC(curve='ROC', name='roc_auc')])

    return model


def load_trained_weights(LOAD_TRAINED_WEIGHTS, model, path):

    '''
    If LOAD_TRAINED_WEIGHTS == True, load the model's training using the pre-trained neurons - THIS WILL GIVE AN ERROR IF THE WINDOW_SIZE OR NEURONS WAS CHANGED
    If LOAD_TRAINED_WEIGHTS == False, train the neural network from scratch - THIS WILL LOSE ALL PROGRESS AND CAUSE WORSE PREDICTIONS
    '''

    if LOAD_TRAINED_WEIGHTS and os.path.exists(path):
        # Try loading weights. Will fail if the model structure changed
        try:
            model.load_weights(path)
            model.compile(optimizer=tfa.optimizers.AdaBelief(learning_rate=LEARNING_RATE, weight_decay=WEIGHT_DECAY, amsgrad=True, epsilon=1e-7), 
                         loss='categorical_crossentropy',  
                         metrics=['accuracy', tf.keras.metrics.AUC(curve='ROC', name='roc_auc')])
            SUCCESSFUL_WEIGHT_LOAD = True
            print('Best weights loaded successfully!')

        except:
            SUCCESSFUL_WEIGHT_LOAD = False
            print('Could not load weights. Most likely the network architecture changed.')
    else:
        SUCCESSFUL_WEIGHT_LOAD = False

    return LOAD_TRAINED_WEIGHTS, SUCCESSFUL_WEIGHT_LOAD, model


def get_callbacks(backup_file):
    checkpoint = ModelCheckpoint(backup_file, monitor='val_loss', save_best_only=True, save_weights_only=True, verbose=0)
    plateauLRreduce = ReduceLROnPlateau(factor = 0.8, patience = 3,  monitor='val_loss', min_lr = 3e-5, verbose=1)
    stopearly = EarlyStopping(monitor='val_loss', patience = 20, verbose=1, restore_best_weights=True)
    model_callbacks = [checkpoint, plateauLRreduce, stopearly]
    return model_callbacks


def plot_keras_log(log_path):

    # Read the log file
    model_log = pd.read_csv(log_path)

    # Create figure
    fig, axs = plt.subplots(nrows=1, ncols=4, figsize=(20,5))

    # Loss
    fig.sca(axs[0])
    plt.plot(model_log.index, model_log.loss, color='C0', lw=3, alpha=0.7, label='Train')
    plt.plot(model_log.index, model_log.val_loss, color='C1', lw=3, alpha=0.7, label='Validation')
    axs[0].title.set_text('Loss')
    axs[0].set_xlabel('Epoch')

    # Accuracy
    fig.sca(axs[1])
    plt.plot(model_log.index, model_log.accuracy, color='C0', lw=3, alpha=0.7, label='Train')
    plt.plot(model_log.index, model_log.val_accuracy, color='C1', lw=3, alpha=0.7, label='Validation')
    axs[1].set_ylim([0, 1])
    axs[1].title.set_text('Accuracy')
    axs[1].set_xlabel('Epoch')

    # ROC-AUC
    fig.sca(axs[2])
    plt.plot(model_log.index, model_log.roc_auc, color='C0', lw=3, alpha=0.7, label='Train')
    plt.plot(model_log.index, model_log.val_roc_auc, color='C1', lw=3, alpha=0.7, label='Validation')
    axs[2].set_ylim([0, 1])
    axs[2].title.set_text('ROC-AUC')
    axs[2].set_xlabel('Epoch')

    # Learning Rate
    fig.sca(axs[3])
    axs[3].set_yscale('log')
    plt.plot(model_log.index, model_log.lr, lw=3, color='C2')
    axs[3].title.set_text('Learning Rate')
    axs[3].set_xlabel('Epoch')

    plt.show()


acoustic_train_loader = BatchData(mfcc_train_standardized, labels_train_one_hot, OUTPUT_SIZE, BATCH_SIZE, shuffle=True)
acoustic_valid_loader = BatchData(mfcc_valid_standardized, labels_valid_one_hot, OUTPUT_SIZE, BATCH_SIZE, shuffle=False)
acoustic_test_loader = BatchData(mfcc_test_standardized, labels_test_one_hot, OUTPUT_SIZE, BATCH_SIZE, shuffle=False)


model_1 = get_model(name='Acoustic', input_shape=(MFCC_TIMESTEPS_95_QUANTILE, NUM_MFCCS), output_size=OUTPUT_SIZE)
model_1.summary()

Model: "Acoustic"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 masking (Masking)           (None, 356, 52)           0         
                                                                 
 bidirectional (Bidirectiona  (None, 356, 1024)        2314240   
 l)                                                              
                                                                 
 time_distributed (TimeDistr  (None, 356, 1024)        0         
 ibuted)                                                         
                                                                 
 bidirectional_1 (Bidirectio  (None, 512)              2623488   
 nal)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 512)               0         
                                                                 
 dense (Dense)               (None, 512)               262656    
                                                                 
 batch_normalization (BatchN  (None, 512)              2048      
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 512)               0         
                                                                 
 dropout_2 (Dropout)         (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 512)               262656    
                                                                 
 dense_2 (Dense)             (None, 6)                 3078      
                                                                 
=================================================================
Total params: 5,468,166
Trainable params: 5,467,142
Non-trainable params: 1,024
_________________________________________________________________


# Load pre-trained weights
LOAD_TRAINED_WEIGHTS, SUCCESSFUL_WEIGHT_LOAD, model_1 = load_trained_weights(LOAD_TRAINED_WEIGHTS=True, model=model_1, path='./models/acoustic.h5')

TRAIN_MODEL = False
if TRAIN_MODEL:
    
    # Define file to store checkpoint
    BACKUP_FILE = os.path.join(BACKUP_DIR, 'acoustic.h5')

    # Callbacks
    model_callbacks = get_callbacks(BACKUP_FILE)
    logCSV = CSVLogger(filename='logs/log_acoustic.csv', separator=',', append=(LOAD_TRAINED_WEIGHTS & SUCCESSFUL_WEIGHT_LOAD))
    model_callbacks.append(logCSV)

    # Train model and save history
    model_1.fit(x = acoustic_train_loader,
                batch_size = BATCH_SIZE,
                epochs = EPOCHS,
                validation_data = acoustic_valid_loader,
                shuffle = True,
                workers = -1,
                use_multiprocessing = True,
                callbacks=model_callbacks)

Best weights loaded successfully!


plot_keras_log('logs/log_acoustic.csv')


visual_train_loader = BatchData(au_train_standardized, labels_train_one_hot, OUTPUT_SIZE, BATCH_SIZE, shuffle=True)
visual_valid_loader = BatchData(au_valid_standardized, labels_valid_one_hot, OUTPUT_SIZE, BATCH_SIZE, shuffle=False)
visual_test_loader = BatchData(au_test_standardized, labels_test_one_hot, OUTPUT_SIZE, BATCH_SIZE, shuffle=False)


model_2 = get_model(name='Visual', input_shape=(AU_TIMESTEPS_95_QUANTILE, NUM_AUS), output_size=OUTPUT_SIZE)
model_2.summary()

Model: "Visual"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 masking_1 (Masking)         (None, 105, 17)           0         
                                                                 
 bidirectional_2 (Bidirectio  (None, 105, 1024)        2170880   
 nal)                                                            
                                                                 
 time_distributed_1 (TimeDis  (None, 105, 1024)        0         
 tributed)                                                       
                                                                 
 bidirectional_3 (Bidirectio  (None, 512)              2623488   
 nal)                                                            
                                                                 
 dropout_4 (Dropout)         (None, 512)               0         
                                                                 
 dense_3 (Dense)             (None, 512)               262656    
                                                                 
 batch_normalization_1 (Batc  (None, 512)              2048      
 hNormalization)                                                 
                                                                 
 activation_1 (Activation)   (None, 512)               0         
                                                                 
 dropout_5 (Dropout)         (None, 512)               0         
                                                                 
 dense_4 (Dense)             (None, 512)               262656    
                                                                 
 dense_5 (Dense)             (None, 6)                 3078      
                                                                 
=================================================================
Total params: 5,324,806
Trainable params: 5,323,782
Non-trainable params: 1,024
_________________________________________________________________


# Load pre-trained weights
LOAD_TRAINED_WEIGHTS, SUCCESSFUL_WEIGHT_LOAD, model_2 = load_trained_weights(LOAD_TRAINED_WEIGHTS=True, model=model_2, path='./models/visual.h5')

TRAIN_MODEL = False
if TRAIN_MODEL:

    # Define file to store checkpoint
    BACKUP_FILE = os.path.join(BACKUP_DIR, 'visual.h5')

    # Callbacks
    model_callbacks = get_callbacks(BACKUP_FILE)
    logCSV = CSVLogger(filename='logs/log_visual.csv', separator=',', append=(LOAD_TRAINED_WEIGHTS & SUCCESSFUL_WEIGHT_LOAD))
    model_callbacks.append(logCSV)
    
    # Train model
    model_2.fit(x = visual_train_loader,
                batch_size = BATCH_SIZE,
                epochs = EPOCHS,
                validation_data = visual_valid_loader,
                shuffle = True,
                workers = -1,
                use_multiprocessing = True,
                callbacks=model_callbacks)

Best weights loaded successfully!


plot_keras_log('logs/log_visual.csv')


class LateFusionModel():

    ''' Takes models trained on two different modalities and averages their predictions to generate a new prediction '''

    def __init__(self, acoustic_model, visual_model):
        super().__init__()
        self.acoustic_model = acoustic_model
        self.visual_model = visual_model

    def predict_proba(self, acoustic_loader, visual_loader):
        # Get predictions for each modality
        probas_acoustic = self.acoustic_model.predict(acoustic_loader)
        probas_visual = self.visual_model.predict(visual_loader)

        # Average the probabilities from each modality
        probas_late_fusion = np.add(probas_acoustic, probas_visual) / 2

        return probas_late_fusion

    def predict(self, acoustic_loader, visual_loader):
        # Get class probability predictions
        probas_late_fusion = self.predict_proba(acoustic_loader, visual_loader)

        # Use argmax to get predicted class
        preds_late_fusion = np.argmax(probas_late_fusion, axis=1)
        
        return preds_late_fusion


model_3 = LateFusionModel(acoustic_model = model_1,
                          visual_model = model_2)


def fuse_data(mfcc_data, au_data, timesteps):

    '''
    Take the unstandardized MFCC and Action Unit data and merge them with the following procedure:
    1 - Enlarge the Action Unit feature vector using interpolation to estimate intermediary states, such that it matches the timesteps of the MFCC feature vector
    2 - Concatenate both feature vectors timestep-wise
    3 - Standardize the number of timesteps across all samples, using the length (timesteps) of the 95th percetile
    '''

    assert mfcc_data.shape[0] == au_data.shape[0]
    fused_feature_vector_size = len(mfcc_data[0][0]) + len(au_data[0][0])
    fused  = np.ndarray(shape = (mfcc_data.shape[0], timesteps, fused_feature_vector_size))

    for idx, (mfcc_sample, au_sample) in enumerate(zip(mfcc_data, au_data)):
        au_interpolated = interpolation.zoom(au_sample, (mfcc_sample.shape[0]/au_sample.shape[0], 1), order=1)
        concatenated = np.concatenate((mfcc_sample, au_interpolated), axis=-1)

        # Standardize timesteps
        if len(concatenated) > timesteps:
            standardized = concatenated[:timesteps]
        else:
            standardized = np.zeros(shape=(timesteps, fused_feature_vector_size))
            standardized[:len(concatenated)] = concatenated
        
        # Store to fused array
        fused[idx] = standardized

    return fused


# Apply function and fuse data
fused_train = fuse_data(mfcc_train, au_train, MFCC_TIMESTEPS_95_QUANTILE)
fused_valid = fuse_data(mfcc_valid, au_valid, MFCC_TIMESTEPS_95_QUANTILE)
fused_test = fuse_data(mfcc_test, au_test, MFCC_TIMESTEPS_95_QUANTILE)


multimodal_train_loader = BatchData(fused_train, labels_train_one_hot, OUTPUT_SIZE, BATCH_SIZE, shuffle=True)
multimodal_valid_loader = BatchData(fused_valid, labels_valid_one_hot, OUTPUT_SIZE, BATCH_SIZE, shuffle=False)
multimodal_test_loader = BatchData(fused_test, labels_test_one_hot, OUTPUT_SIZE, BATCH_SIZE, shuffle=False)


model_4 = get_model(name='Multimodal', input_shape=(MFCC_TIMESTEPS_95_QUANTILE, NUM_MFCCS+NUM_AUS), output_size=OUTPUT_SIZE)
model_4.summary()

Model: "Multimodal"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 masking_2 (Masking)         (None, 356, 69)           0         
                                                                 
 bidirectional_4 (Bidirectio  (None, 356, 1024)        2383872   
 nal)                                                            
                                                                 
 time_distributed_2 (TimeDis  (None, 356, 1024)        0         
 tributed)                                                       
                                                                 
 bidirectional_5 (Bidirectio  (None, 512)              2623488   
 nal)                                                            
                                                                 
 dropout_7 (Dropout)         (None, 512)               0         
                                                                 
 dense_6 (Dense)             (None, 512)               262656    
                                                                 
 batch_normalization_2 (Batc  (None, 512)              2048      
 hNormalization)                                                 
                                                                 
 activation_2 (Activation)   (None, 512)               0         
                                                                 
 dropout_8 (Dropout)         (None, 512)               0         
                                                                 
 dense_7 (Dense)             (None, 512)               262656    
                                                                 
 dense_8 (Dense)             (None, 6)                 3078      
                                                                 
=================================================================
Total params: 5,537,798
Trainable params: 5,536,774
Non-trainable params: 1,024
_________________________________________________________________


# Load pre-trained weights
LOAD_TRAINED_WEIGHTS, SUCCESSFUL_WEIGHT_LOAD, model_4 = load_trained_weights(LOAD_TRAINED_WEIGHTS=True, model=model_4, path='./models/multimodal.h5')

TRAIN_MODEL = False
if TRAIN_MODEL:

    # Define file to store checkpoint
    BACKUP_FILE = os.path.join(BACKUP_DIR, 'multimodal.h5')

    # Callbacks
    model_callbacks = get_callbacks(BACKUP_FILE)
    logCSV = CSVLogger(filename='logs/log_multimodal.csv', separator=',', append=(LOAD_TRAINED_WEIGHTS & SUCCESSFUL_WEIGHT_LOAD))
    model_callbacks.append(logCSV)

    # Train model
    model_4.fit(x = multimodal_train_loader,
                batch_size = BATCH_SIZE,
                epochs = EPOCHS,
                validation_data = multimodal_valid_loader,
                shuffle = True,
                workers = -1,
                use_multiprocessing = True,
                callbacks=model_callbacks)

Best weights loaded successfully!


plot_keras_log('logs/log_multimodal.csv')


class Plotter():

    ''' Receives three inputs, all of which must match in the NN's output dimension, which represents an array with length equals the number of classes:
    1 - True labels as one-hot encodings
    2 - Predicted probabilities for each class
    3 - Class names 

    Provides functionlity for plotting: Confusion Matrix, Precision-Recall Curve, Micro (Global) ROC, and Class-Stratified ROC.
    Provides the following metrics: F1 Score (Macro), Accuracy, Balanced Accuracy
    '''

    def __init__(self, true_onehot, pred_probs, class_names):
        self.true_onehot = np.asarray(true_onehot)
        self.pred_probs = np.asarray(pred_probs)
        self.class_names = np.asarray(class_names)
        self.num_classes = len(class_names)

        # Get predictions from the probabilities
        self.pred_labels = self.class_names[np.argmax(pred_probs, axis=1)]
        self.true_labels = self.class_names[np.argmax(true_onehot, axis=1)]

        # Metrics
        self.f1_score = f1_score(self.true_labels, self.pred_labels, average='macro')
        self.accuracy = accuracy_score(self.true_labels, self.pred_labels)
        self.balanced_accuracy = balanced_accuracy_score(self.true_labels, self.pred_labels)

        # Dictionaries to store data
        self.fpr, self.tpr, self.roc_thresholds, self.roc_auc = dict(), dict(), dict(), dict()
        self.precision, self.recall, self.prc_thresholds, self.prc_auc = dict(), dict(), dict(), dict()

        # Iterate through classes and get class-stratified metrics
        for i, label in enumerate(self.class_names):

            # Get the fpr, tpr, thresholds and auc
            self.fpr[i], self.tpr[i], self.roc_thresholds[i] = roc_curve(self.true_onehot[:, i], self.pred_probs[:, i])
            self.roc_auc[i] = auc(self.fpr[i], self.tpr[i])
            
            # Get the precision, recall, thresholds and auc
            self.precision[i], self.recall[i], self.prc_thresholds[i] = precision_recall_curve(self.true_onehot[:, i], self.pred_probs[:, i])
            self.prc_auc[i] = auc(self.recall[i], self.precision[i])

        # Generate multilabel colors with pylab
        self.class_colors = []
        cm = pylab.get_cmap('nipy_spectral')
        for i, label in enumerate(self.class_names):
            color = cm(1.*i/self.num_classes)
            self.class_colors.append(color)

    def plot_confusion_matrix(self):
        conf_mat = confusion_matrix(self.true_labels, self.pred_labels, labels=self.class_names)
        sns.heatmap(conf_mat, annot=True, cmap='Blues', xticklabels=self.class_names, yticklabels=self.class_names, square=True, cbar=False, fmt='g')
        plt.title('Confusion Matrix', pad = 20, fontweight='bold')
        plt.ylabel('True Emotion', fontsize = 12, labelpad = 10)
        plt.xlabel('Predicted Emotion', fontsize = 12, labelpad = 10)

    def plot_prec_recall_curve(self):
        # Plot the class-stratified PRCs
        plt.axis('square')
        for i, color in enumerate(self.class_colors):
            plt.plot(self.recall[i], self.precision[i], color=color, lw=3, label=f'{self.class_names[i]} (area = {self.prc_auc[i]:.2f})', alpha=0.7)
        plt.plot([1, 0], [0, 1], 'k--', lw=2, alpha=0.3)
        plt.xlim([-0.01, 1.01])
        plt.ylim([-0.01, 1.01])
        plt.xlabel('Recall', fontsize = 12, labelpad = 10)
        plt.ylabel('Precision', fontsize = 12, labelpad = 10)
        plt.title('Precision-Recall Curve', pad = 20, fontweight='bold')
        plt.legend(loc="best")

    def plot_roc_overall(self):
        # Compute global (micro-average) ROC curve and ROC area -- sensitive to class imbalance
        fpr, tpr, thresholds = roc_curve(self.true_onehot.ravel(), self.pred_probs.ravel())
        roc_auc = auc(fpr, tpr)

        # Plot the model overall ROC
        plt.axis('square')
        plt.plot(fpr, tpr, color='deeppink', lw=5, label=f'MODEL OVERALL (area = {roc_auc:.2f})',)
        plt.plot([0, 1], [0, 1], 'k--', lw=2, alpha=0.3)
        plt.xlim([-0.01, 1.01])
        plt.ylim([-0.01, 1.01])
        plt.xlabel('False Positive Rate', fontsize = 12, labelpad = 10)
        plt.ylabel('True Positive Rate', fontsize = 12, labelpad = 10)
        plt.title('Overall ROC Curve', pad = 20, fontweight='bold')
        plt.legend(loc="lower right")

    def plot_roc_multiclass(self):
        # Plot the class-stratified ROCs
        plt.axis('square')
        for i, color in enumerate(self.class_colors):
            plt.plot(self.fpr[i], self.tpr[i], color=color, lw=3, label=f'{self.class_names[i]} (area = {self.roc_auc[i]:.2f})', alpha=0.7)
        plt.plot([0, 1], [0, 1], 'k--', lw=2, alpha=0.3)
        plt.xlim([-0.01, 1.01])
        plt.ylim([-0.01, 1.01])
        plt.xlabel('False Positive Rate', fontsize = 12, labelpad = 10)
        plt.ylabel('True Positive Rate', fontsize = 12, labelpad = 10)
        plt.title('Multi-Class ROC Curve', pad = 20, fontweight='bold')
        plt.legend(loc="lower right")

    def plot_classification_results(self):
        fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(16,10))
        fig.sca(axs[0][0])
        self.plot_confusion_matrix()
        fig.sca(axs[0][1])
        self.plot_prec_recall_curve()
        fig.sca(axs[1][0])
        self.plot_roc_overall()
        fig.sca(axs[1][1])
        self.plot_roc_multiclass()
        fig.tight_layout(h_pad=3, w_pad=-30)
        plt.show()


# Generate probabilities with each model
test_probas_acoustic = model_1.predict(acoustic_test_loader)
test_probas_visual = model_2.predict(visual_test_loader)
test_probas_late = model_3.predict_proba(acoustic_test_loader, visual_test_loader)
test_probas_early = model_4.predict(multimodal_test_loader)


# Instantiate plotter classes with the predicions from each model
plotter_acoustic = Plotter(labels_test_one_hot, test_probas_acoustic, class_names)
plotter_visual = Plotter(labels_test_one_hot, test_probas_visual, class_names)
plotter_late = Plotter(labels_test_one_hot, test_probas_late, class_names)
plotter_early = Plotter(labels_test_one_hot, test_probas_early, class_names)

# Aggregate
plotters = [plotter_acoustic, plotter_visual, plotter_late, plotter_early]
modalities = ['Acoustic', 'Visual', 'Late Fusion', 'Early Fusion']


# Plot
fig, ax = plt.subplots(figsize=(10,5))

# Barplot
scores = [plotter.f1_score for plotter in plotters]
ax.bar(x=modalities, height=scores, color='slategray')

# Formatting
plt.title('F1 Score (Macro)', size=25)
plt.ylim([0, 1])
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
ax.yaxis.set_major_formatter(mtick.PercentFormatter(1, decimals=0))
ax.locator_params(axis ='y', nbins=6)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Add values above bars
for x_index, score in enumerate(scores):
    ax.text(x_index-0.2, score+0.01, f'{100*score:.1f} %', color='black', size=20)

# Show
plt.show()


# Plot
fig, ax = plt.subplots(figsize=(10,5))

# Barplot
scores = [plotter.accuracy for plotter in plotters]
ax.bar(x=modalities, height=scores, color='slategray')

# Formatting
plt.title('Accuracy', size=25)
plt.ylim([0, 1])
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
ax.yaxis.set_major_formatter(mtick.PercentFormatter(1, decimals=0))
ax.locator_params(axis ='y', nbins=6)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Add values above bars
for x_index, score in enumerate(scores):
    ax.text(x_index-0.2, score+0.01, f'{100*score:.1f} %', color='black', size=20)

# Show
plt.show()


# Plot
fig, ax = plt.subplots(figsize=(10,5))

# Barplot
scores = [plotter.balanced_accuracy for plotter in plotters]
ax.bar(x=modalities, height=scores, color='slategray')

# Formatting
plt.title('Balanced Accuracy', size=25)
plt.ylim([0, 1])
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)
ax.yaxis.set_major_formatter(mtick.PercentFormatter(1, decimals=0))
ax.locator_params(axis ='y', nbins=6)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

# Add values above bars
for x_index, score in enumerate(scores):
    ax.text(x_index-0.2, score+0.01, f'{100*score:.1f} %', color='black', size=20)

# Show
plt.show()


# Confusion Matrices
fig, axs = plt.subplots(nrows=1, ncols=4, figsize=(20,10))
for idx, (plotter, modality) in enumerate(zip(plotters, modalities)):

    # Model metric
    fig.sca(axs[idx])
    plotter.plot_confusion_matrix()
    plt.title(modality, size=20, pad=20)

# Show
fig.subplots_adjust(wspace=0.3)
plt.show()


# ROC-AUC Overall
fig, axs = plt.subplots(nrows=1, ncols=4, figsize=(20,10))
for idx, (plotter, modality) in enumerate(zip(plotters, modalities)):

    # Model metric
    fig.sca(axs[idx])
    plotter.plot_roc_overall()
    plt.title(modality, size=20, pad=20)

# Show
fig.subplots_adjust(wspace=0.3)
plt.show()


# ROC-AUC Class Stratified
fig, axs = plt.subplots(nrows=1, ncols=4, figsize=(20,10))
for idx, (plotter, modality) in enumerate(zip(plotters, modalities)):
    
    # Model metric
    fig.sca(axs[idx])
    plotter.plot_roc_multiclass()
    plt.title(modality, size=20, pad=20)

# Show
fig.subplots_adjust(wspace=0.3)
plt.show()


# Precision-Recall Curve
fig, axs = plt.subplots(nrows=1, ncols=4, figsize=(20,10))
for idx, (plotter, modality) in enumerate(zip(plotters, modalities)):
    
    # Model metric
    fig.sca(axs[idx])
    plotter.plot_prec_recall_curve()
    plt.title(modality, size=20, pad=20)

# Show
fig.subplots_adjust(wspace=0.3)
plt.show()

Multimodal Emotion Recognition¶

Imports¶

Load Data¶

Configuration¶

Standardize Data¶

Data Loader¶

Model Architecture¶

Model 1: Acoustic¶

Model 2: Visual¶

Model 3: Late Fusion¶

Model 4: Early Fusion¶

Model Comparison¶

Analysis of Results¶

End¶