Supervised, Semi-Supervised, and Unsupervised Learning

DSCI 552 | Machine Learning for Data Science

Homework 6

Matheus Schmitz

USC ID: 5039286453

Imports

In [1]:
# Data Science
import numpy as np
import pandas as pd

# Scikit-Learn
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, auc, f1_score, precision_score, recall_score,  roc_curve
from sklearn.cluster import KMeans, SpectralClustering
from sklearn.neighbors import KNeighborsClassifier

# Progress Bar
from tqdm import tqdm

# Ignore Warnings
import warnings
warnings.filterwarnings("ignore")

(a) Dataset

1a.JPG

In [2]:
# Get all column names
col_names = ['ID', 'Diagnosis']
collected_stats = ['Mean', 'SE', 'Worst']
collected_features = ['Radius', 'Texture', 'Perimeter', 'Area', 'Smoothness', 'Compactness', 'Concavity', 'ConcavePoints', 'Symmetry', 'FractalDimension']
for stat in collected_stats:
    for feat in collected_features:
        col_names.append(f'{stat}_{feat}')
In [3]:
df = pd.read_csv('../data/wdbc.data', header=None, names=col_names)
print(f'df.shape: {df.shape}')
df.head(3)
df.shape: (569, 32)
Out[3]:
ID Diagnosis Mean_Radius Mean_Texture Mean_Perimeter Mean_Area Mean_Smoothness Mean_Compactness Mean_Concavity Mean_ConcavePoints ... Worst_Radius Worst_Texture Worst_Perimeter Worst_Area Worst_Smoothness Worst_Compactness Worst_Concavity Worst_ConcavePoints Worst_Symmetry Worst_FractalDimension
0 842302 M 17.99 10.38 122.8 1001.0 0.11840 0.27760 0.3001 0.14710 ... 25.38 17.33 184.6 2019.0 0.1622 0.6656 0.7119 0.2654 0.4601 0.11890
1 842517 M 20.57 17.77 132.9 1326.0 0.08474 0.07864 0.0869 0.07017 ... 24.99 23.41 158.8 1956.0 0.1238 0.1866 0.2416 0.1860 0.2750 0.08902
2 84300903 M 19.69 21.25 130.0 1203.0 0.10960 0.15990 0.1974 0.12790 ... 23.57 25.53 152.5 1709.0 0.1444 0.4245 0.4504 0.2430 0.3613 0.08758

3 rows × 32 columns

In [4]:
# Remove 'ID' column
if 'ID' in df.columns:
    df.drop('ID', axis=1, inplace=True)

# Split Benign and Malign samples and shuffle them
df_B = df[df.Diagnosis == 'B'].sample(frac=1)
df_M = df[df.Diagnosis == 'M'].sample(frac=1)

# Create test and train dataframes
df_test = pd.concat([df_B[0:round(len(df_B)*0.2)],
                     df_M[0:round(len(df_M)*0.2)]])
df_train = pd.concat([df_B[round(len(df_B)*0.2):],
                      df_M[round(len(df_M)*0.2):]])

# Shapes
print(f'df_test.shape: {df_test.shape}')
print(f'df_train.shape: {df_train.shape}')

# Get X's and Y's
x_train = df_train.iloc[:, 1:]
x_test = df_test.iloc[:, 1:]
y_train = df_train.iloc[:, 0]
y_test = df_test.iloc[:, 0]

# Normalize X's
SCALER = MinMaxScaler()
x_train = pd.DataFrame(SCALER.fit_transform(x_train), columns=[col_names[2:]])
x_test = pd.DataFrame(SCALER.transform(x_test), columns=[col_names[2:]])

# Label Encode Y's
LE = LabelEncoder()
y_train = pd.DataFrame(LE.fit_transform(y_train), columns=[col_names[1]])
y_test = pd.DataFrame(LE.transform(y_test), columns=[col_names[1]])
df_test.shape: (113, 31)
df_train.shape: (456, 31)

(b) Monte-Carlo Simulations

1b.JPG

In [5]:
# Dataframe to summarize all results
summary = pd.DataFrame()
In [6]:
# Auxilary functions to facilitate plotting classification results
# Based on scikit-learn documentation: https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc, confusion_matrix, precision_recall_curve

def plot_cm(true_binary, pred_binary, classes=['B', 'M']):
    # Get Confusion Matrix and plot
    conf_mat = confusion_matrix(true_binary, pred_binary)
    #plt.axis('equal')
    sns.heatmap(conf_mat, annot=True, cmap='Blues', xticklabels=classes, yticklabels=classes, square=True, cbar=False, fmt='d')
    plt.title('Confusion Matrix', pad = 20, fontweight='bold')
    plt.ylabel('True Class', fontsize = 12, labelpad = 10)
    plt.xlabel('Predicted Class', fontsize = 12, labelpad = 10)

def plot_roc_overall(true_onehot, pred_probs, classes=['B', 'M']):
    # Coerce inputs to np.array
    true_onehot = np.asarray(true_onehot)
    pred_probs = np.asarray(pred_probs)
    classes = np.asarray(classes)
    N_CLASSES = len(classes)
    # Compute global (micro-average) ROC curve and ROC area
    fpr, tpr, thresholds = roc_curve(true_onehot.ravel(), pred_probs.ravel())
    roc_auc = auc(fpr, tpr)
    # Plot the model overall ROC
    plt.axis('square')
    plt.plot(fpr, tpr, label=f'MODEL OVERALL (area = {roc_auc:.2f})', color='deeppink', lw=4)
    plt.plot([0, 1], [0, 1], 'k--', lw=2, alpha=0.3)
    plt.xlim([-0.01, 1.01])
    plt.ylim([-0.01, 1.01])
    plt.xlabel('False Positive Rate', fontsize = 12, labelpad = 10)
    plt.ylabel('True Positive Rate', fontsize = 12, labelpad = 10)
    plt.title('Overall ROC Curve', pad = 20, fontweight='bold')
    legend = plt.legend()
    legend._legend_box.align = "right"
    plt.legend(loc="lower right")

def plot_classification_results(true_onehot, pred_probs, classes=['Benign', 'Malign']):
    fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(8,6))
    fig.sca(axs[0])
    plot_cm(true_onehot, pred_probs, classes)
    fig.sca(axs[1])
    plot_roc_overall(true_onehot, pred_probs, classes)
    fig.tight_layout(w_pad=5)
    fig.show()

(i) Supervised Learning

1bi.JPG

In [7]:
# Grid Search to find best penalty parameter
svc = LinearSVC(penalty='l1', dual=False)
param_grid = {'C': np.logspace(-2, 2, 10)}
grid = GridSearchCV(svc, param_grid=param_grid, cv=5)

# Lists to store metrics
acc_train, acc_test = [], []
auc_train, auc_test = [], []
prec_B_train, prec_M_train, prec_B_test, prec_M_test = [], [], [], [] 
recall_B_train, recall_M_train, recall_B_test, recall_M_test = [], [], [], [] 
f1_B_train, f1_M_train, f1_B_test, f1_M_test = [], [], [], []

# Run 30 Monte-Carlo Simulations
for M in tqdm(range(30)):
    
    #--------------------------------------#
    #   (b) TRAIN-TEST SPLIT               #
    #--------------------------------------# 
    
    # Split Benign and Malign samples and shuffle them
    df_B = df[df.Diagnosis == 'B'].sample(frac=1)
    df_M = df[df.Diagnosis == 'M'].sample(frac=1)

    # Create test and train dataframes
    df_test = pd.concat([df_B[0:round(len(df_B)*0.2)],
                         df_M[0:round(len(df_M)*0.2)]])
    df_train = pd.concat([df_B[round(len(df_B)*0.2):],
                          df_M[round(len(df_M)*0.2):]])

    # Get X's and Y's
    x_train = df_train.iloc[:, 1:]
    x_test = df_test.iloc[:, 1:]
    y_train = df_train.iloc[:, 0]
    y_test = df_test.iloc[:, 0]

    # Normalize X's
    SCALER = MinMaxScaler()
    x_train = pd.DataFrame(SCALER.fit_transform(x_train), columns=[col_names[2:]])
    x_test = pd.DataFrame(SCALER.transform(x_test), columns=[col_names[2:]])

    # Label Encode Y's
    LE = LabelEncoder()
    y_train = pd.DataFrame(LE.fit_transform(y_train), columns=[col_names[1]])
    y_test = pd.DataFrame(LE.transform(y_test), columns=[col_names[1]])

    
    #--------------------------------------#
    #   (i) SUPERVISED LEARNING            #
    #--------------------------------------#
    
    # Train
    grid.fit(x_train, y_train)

    # Predict
    pred_train = grid.predict(x_train)
    pred_test = grid.predict(x_test)

    # Train Metrics
    acc_train.append(accuracy_score(y_train, pred_train))
    fpr, tpr, threshold = roc_curve(y_train, pred_train)
    auc_train.append(auc(fpr, tpr))
    prec_B_train.append(precision_score(y_train, pred_train, pos_label=0))
    prec_M_train.append(precision_score(y_train, pred_train, pos_label=1))
    recall_B_train.append(recall_score(y_train, pred_train, pos_label=0))
    recall_M_train.append(recall_score(y_train, pred_train, pos_label=1))
    f1_B_train.append(f1_score(y_train, pred_train, pos_label=0))
    f1_M_train.append(f1_score(y_train, pred_train, pos_label=1))

    # Test Metrics
    acc_test.append(accuracy_score(y_test, pred_test))
    fpr, tpr, threshold = roc_curve(y_test, pred_test)
    auc_test.append(auc(fpr, tpr))
    prec_B_test.append(precision_score(y_test, pred_test, pos_label=0))
    prec_M_test.append(precision_score(y_test, pred_test, pos_label=1))
    recall_B_test.append(recall_score(y_test, pred_test, pos_label=0))
    recall_M_test.append(recall_score(y_test, pred_test, pos_label=1))
    f1_B_test.append(f1_score(y_test, pred_test, pos_label=0))
    f1_M_test.append(f1_score(y_test, pred_test, pos_label=1))
    
# Average Train Metrics
summary.at['Accuracy', 'Supervised Train'] = np.mean(acc_train)
summary.at['AUC', 'Supervised Train'] = np.mean(auc_train)
summary.at['Precision', 'Supervised Train'] = np.mean(prec_B_train + prec_M_train)
summary.at['Precision_B', 'Supervised Train'] = np.mean(prec_B_train)
summary.at['Precision_M', 'Supervised Train'] = np.mean(prec_M_train)
summary.at['Recall', 'Supervised Train'] = np.mean(recall_B_train + recall_M_train)
summary.at['Recall_B', 'Supervised Train'] = np.mean(recall_B_train)
summary.at['Recall_M', 'Supervised Train'] = np.mean(recall_M_train)
summary.at['F1', 'Supervised Train'] = np.mean(f1_B_train + f1_M_train)
summary.at['F1_B', 'Supervised Train'] = np.mean(f1_B_train)
summary.at['F1_M', 'Supervised Train'] = np.mean(f1_M_train)

# Average Test Metrics
summary.at['Accuracy', 'Supervised Test'] = np.mean(acc_test)
summary.at['AUC', 'Supervised Test'] = np.mean(auc_test)
summary.at['Precision', 'Supervised Test'] = np.mean(prec_B_test + prec_M_test)
summary.at['Precision_B', 'Supervised Test'] = np.mean(prec_B_test)
summary.at['Precision_M', 'Supervised Test'] = np.mean(prec_M_test)
summary.at['Recall', 'Supervised Test'] = np.mean(recall_B_test + recall_M_test)
summary.at['Recall_B', 'Supervised Test'] = np.mean(recall_B_test)
summary.at['Recall_M', 'Supervised Test'] = np.mean(recall_M_test)
summary.at['F1', 'Supervised Test'] = np.mean(f1_B_test + f1_M_test)
summary.at['F1_B', 'Supervised Test'] = np.mean(f1_B_test)
summary.at['F1_M', 'Supervised Test'] = np.mean(f1_M_test)

# Show results
summary.loc[['Accuracy', 'AUC', 'Precision', 'Recall', 'F1']]
100%|██████████████████████████████████████████| 30/30 [00:49<00:00,  1.66s/it]
Out[7]:
Supervised Train Supervised Test
Accuracy 0.986184 0.967847
AUC 0.983061 0.961933
Precision 0.987441 0.969506
Recall 0.983061 0.961933
F1 0.985150 0.965258
In [8]:
# Confusion Matrix and ROC Curve | Train Data
plot_classification_results(y_train, pred_train)
plt.suptitle('Supervised Learning | Train Data', y=0.9, fontsize=15, fontweight='bold')
plt.show()
In [9]:
# Confusion Matrix and ROC Curve | Test Data
plot_classification_results(y_test, pred_test)
plt.suptitle('Supervised Learning | Test Data', y=0.9, fontsize=15, fontweight='bold')
plt.show()

(ii) Semi-Supervised Learning

1bii.JPG

1biia.JPG

1biib.JPG

In [10]:
# Lists to store metrics
acc_train, acc_test = [], []
auc_train, auc_test = [], []
prec_B_train, prec_M_train, prec_B_test, prec_M_test = [], [], [], [] 
recall_B_train, recall_M_train, recall_B_test, recall_M_test = [], [], [], [] 
f1_B_train, f1_M_train, f1_B_test, f1_M_test = [], [], [], []

# Run 30 Monte-Carlo Simulations
for M in tqdm(range(30)):
    
    #--------------------------------------#
    #   (b) TRAIN-TEST SPLIT               #
    #--------------------------------------# 
    
    # Split Benign and Malign samples and shuffle them
    df_B = df[df.Diagnosis == 'B'].sample(frac=1)
    df_M = df[df.Diagnosis == 'M'].sample(frac=1)

    # Create test and train dataframes
    df_test = pd.concat([df_B[0:round(len(df_B)*0.2)],
                         df_M[0:round(len(df_M)*0.2)]])
    df_train = pd.concat([df_B[round(len(df_B)*0.2):],
                          df_M[round(len(df_M)*0.2):]])

    # Get X's and Y's
    x_train = df_train.iloc[:, 1:]
    x_test = df_test.iloc[:, 1:]
    y_train = df_train.iloc[:, 0]
    y_test = df_test.iloc[:, 0]

    # Normalize X's
    SCALER = MinMaxScaler()
    x_train = pd.DataFrame(SCALER.fit_transform(x_train), columns=[col_names[2:]])
    x_test = pd.DataFrame(SCALER.transform(x_test), columns=[col_names[2:]])

    # Label Encode Y's
    LE = LabelEncoder()
    y_train = pd.DataFrame(LE.fit_transform(y_train), columns=[col_names[1]])
    y_test = pd.DataFrame(LE.transform(y_test), columns=[col_names[1]])
    
    
    #--------------------------------------#
    #   (ii) LABELED-UNLABELED SPLIT       #
    #--------------------------------------#    
    
    # Split labeled and unlabeled data
    x_labeled, x_unlabeled, y_labeled, y_unlabeled = train_test_split(x_train, y_train, test_size=0.5, stratify=y_train)

    
    #--------------------------------------#
    #   (A) FIND PENALTY HYPERPARAMETER    #
    #--------------------------------------#

    # Grid Search to find best penalty hyperparameter
    svc = LinearSVC(penalty='l1', dual=False)
    param_grid = {'C': np.logspace(-2, 2, 10)}
    grid = GridSearchCV(svc, param_grid=param_grid, cv=5)

    # labeled
    grid.fit(x_labeled, y_labeled)


    #--------------------------------------#
    #   (B) SELF-TRAINING                  #
    #--------------------------------------#

    # SVC using the best C found in part A
    svc = LinearSVC(penalty='l1', dual=False, C=grid.best_params_["C"])

    # One-by-one predict an unlabeled sample and move it to the labeled dataset
    while len(x_unlabeled) > 0:

        # Train the classifier on the labeled samples
        svc.fit(x_labeled, y_labeled)

        # Find the unlabled sample farthest from the decision boundary
        distances_unlabeled_x = np.absolute(svc.decision_function(x_unlabeled))
        idx_farthest = np.argmax(distances_unlabeled_x)
        farthest_unlabeled = pd.DataFrame(x_unlabeled.iloc[idx_farthest]).T

        # Predict it's label
        assigned_label = pd.DataFrame(svc.predict(farthest_unlabeled))

        # Append the sample to the labeled data
        x_labeled.append(farthest_unlabeled)
        y_labeled.append(assigned_label)

        # Remove the sample from the unlabeled data
        x_unlabeled.drop(farthest_unlabeled.index, inplace=True)
        y_unlabeled.drop(farthest_unlabeled.index, inplace=True)

    # Once all unlabeled samples have been labeled, train a final classifier
    svc.fit(x_labeled, y_labeled)

    # Predict on the train and test datasets
    pred_train = svc.predict(x_train)
    pred_test = svc.predict(x_test)

    # Train Metrics
    acc_train.append(accuracy_score(y_train, pred_train))
    fpr, tpr, threshold = roc_curve(y_train, pred_train)
    auc_train.append(auc(fpr, tpr))
    prec_B_train.append(precision_score(y_train, pred_train, pos_label=0))
    prec_M_train.append(precision_score(y_train, pred_train, pos_label=1))
    recall_B_train.append(recall_score(y_train, pred_train, pos_label=0))
    recall_M_train.append(recall_score(y_train, pred_train, pos_label=1))
    f1_B_train.append(f1_score(y_train, pred_train, pos_label=0))
    f1_M_train.append(f1_score(y_train, pred_train, pos_label=1))

    # Test Metrics
    acc_test.append(accuracy_score(y_test, pred_test))
    fpr, tpr, threshold = roc_curve(y_test, pred_test)
    auc_test.append(auc(fpr, tpr))
    prec_B_test.append(precision_score(y_test, pred_test, pos_label=0))
    prec_M_test.append(precision_score(y_test, pred_test, pos_label=1))
    recall_B_test.append(recall_score(y_test, pred_test, pos_label=0))
    recall_M_test.append(recall_score(y_test, pred_test, pos_label=1))
    f1_B_test.append(f1_score(y_test, pred_test, pos_label=0))
    f1_M_test.append(f1_score(y_test, pred_test, pos_label=1))
    
# Average Train Metrics
summary.at['Accuracy', 'Semi-Supervised Train'] = np.mean(acc_train)
summary.at['AUC', 'Semi-Supervised Train'] = np.mean(auc_train)
summary.at['Precision', 'Semi-Supervised Train'] = np.mean(prec_B_train + prec_M_train)
summary.at['Precision_B', 'Semi-Supervised Train'] = np.mean(prec_B_train)
summary.at['Precision_M', 'Semi-Supervised Train'] = np.mean(prec_M_train)
summary.at['Recall', 'Semi-Supervised Train'] = np.mean(recall_B_train + recall_M_train)
summary.at['Recall_B', 'Semi-Supervised Train'] = np.mean(recall_B_train)
summary.at['Recall_M', 'Semi-Supervised Train'] = np.mean(recall_M_train)
summary.at['F1', 'Semi-Supervised Train'] = np.mean(f1_B_train + f1_M_train)
summary.at['F1_B', 'Semi-Supervised Train'] = np.mean(f1_B_train)
summary.at['F1_M', 'Semi-Supervised Train'] = np.mean(f1_M_train)

# Average Test Metrics
summary.at['Accuracy', 'Semi-Supervised Test'] = np.mean(acc_test)
summary.at['AUC', 'Semi-Supervised Test'] = np.mean(auc_test)
summary.at['Precision', 'Semi-Supervised Test'] = np.mean(prec_B_test + prec_M_test)
summary.at['Precision_B', 'Semi-Supervised Test'] = np.mean(prec_B_test)
summary.at['Precision_M', 'Semi-Supervised Test'] = np.mean(prec_M_test)
summary.at['Recall', 'Semi-Supervised Test'] = np.mean(recall_B_test + recall_M_test)
summary.at['Recall_B', 'Semi-Supervised Test'] = np.mean(recall_B_test)
summary.at['Recall_M', 'Semi-Supervised Test'] = np.mean(recall_M_test)
summary.at['F1', 'Semi-Supervised Test'] = np.mean(f1_B_test + f1_M_test)
summary.at['F1_B', 'Semi-Supervised Test'] = np.mean(f1_B_test)
summary.at['F1_M', 'Semi-Supervised Test'] = np.mean(f1_M_test)

# Show results
summary.loc[['Accuracy', 'AUC', 'Precision', 'Recall', 'F1']]
100%|██████████████████████████████████████████| 30/30 [03:37<00:00,  7.24s/it]
Out[10]:
Supervised Train Supervised Test Semi-Supervised Train Semi-Supervised Test
Accuracy 0.986184 0.967847 0.974635 0.961947
AUC 0.983061 0.961933 0.969241 0.955131
Precision 0.987441 0.969506 0.976693 0.963934
Recall 0.983061 0.961933 0.969241 0.955131
F1 0.985150 0.965258 0.972648 0.958793
In [11]:
# Confusion Matrix and ROC Curve | Train Data
plot_classification_results(y_train, pred_train)
plt.suptitle('Semi-Supervised Learning | Train Data', y=0.9, fontsize=15, fontweight='bold')
plt.show()
In [12]:
# Confusion Matrix and ROC Curve | Test Data
plot_classification_results(y_test, pred_test)
plt.suptitle('Semi-Supervised Learning | Test Data', y=0.9, fontsize=15, fontweight='bold')
plt.show()

(iii) Unsupervised Learning

1biii.JPG

1biiia.JPG

1biiib_1.JPG1biiib_2.JPG

1biiic.JPG

In [13]:
# Lists to store metrics
acc_train, acc_test = [], []
auc_train, auc_test = [], []
prec_B_train, prec_M_train, prec_B_test, prec_M_test = [], [], [], [] 
recall_B_train, recall_M_train, recall_B_test, recall_M_test = [], [], [], [] 
f1_B_train, f1_M_train, f1_B_test, f1_M_test = [], [], [], []

# Run 30 Monte-Carlo Simulations
for M in tqdm(range(30)):
    
    #--------------------------------------#
    #   (b) TRAIN-TEST SPLIT               #
    #--------------------------------------# 
    
    # Split Benign and Malign samples and shuffle them
    df_B = df[df.Diagnosis == 'B'].sample(frac=1)
    df_M = df[df.Diagnosis == 'M'].sample(frac=1)

    # Create test and train dataframes
    df_test = pd.concat([df_B[0:round(len(df_B)*0.2)],
                         df_M[0:round(len(df_M)*0.2)]])
    df_train = pd.concat([df_B[round(len(df_B)*0.2):],
                          df_M[round(len(df_M)*0.2):]])

    # Get X's and Y's
    x_train = df_train.iloc[:, 1:]
    x_test = df_test.iloc[:, 1:]
    y_train = df_train.iloc[:, 0]
    y_test = df_test.iloc[:, 0]

    # Normalize X's
    SCALER = MinMaxScaler()
    x_train = pd.DataFrame(SCALER.fit_transform(x_train), columns=[col_names[2:]])
    x_test = pd.DataFrame(SCALER.transform(x_test), columns=[col_names[2:]])

    # Label Encode Y's
    LE = LabelEncoder()
    y_train = pd.DataFrame(LE.fit_transform(y_train), columns=[col_names[1]])
    y_test = pd.DataFrame(LE.transform(y_test), columns=[col_names[1]])

    
    #--------------------------------------#
    #   (iii) 2 CLUSTER K-MEANS            #
    #--------------------------------------#

    N_CLUSTERS = 2


    #--------------------------------------#
    #   (A)  AVOIDING LOCAL MINIMUM        #
    #--------------------------------------#

    # The way to avoid having the algorithm becoming trapped into local minimum is to initialize it multiple times
    # With each initializion picking the starting cluster centers at random
    # This can be achieved with the hyperparameters "init" and "n_init"
    kmeans = KMeans(n_clusters=N_CLUSTERS, init='random', n_init=100)


    #--------------------------------------#
    #   (B)  LABELING CLUSTERS             #
    #--------------------------------------# 

    # Compute cluster centers
    kmeans.fit(x_train)

    # Get the distances between samples and cluster centers
    cluster_distances = kmeans.transform(x_train)

    # Get the 30 samples closest to each cluster
    cluster_0_distances = cluster_distances[:, 0]
    cluster_1_distances = cluster_distances[:, 1]
    closest_cluster_0 = cluster_0_distances.argsort()[:30]
    closest_cluster_1 = cluster_1_distances.argsort()[:30]

    # Get the most frequent class in each cluster (based on the 30 closest samples)
    cluster_0_class = y_train.iloc[closest_cluster_0].mode(axis='index').values[0][0]
    cluster_1_class = y_train.iloc[closest_cluster_1].mode(axis='index').values[0][0]
    cluster_classes = {0:cluster_0_class,
                       1:cluster_1_class}

    # Assign clusters to train data
    clusters_train = kmeans.predict(x_train)
    
    # Assign a class to each sample based on its cluster
    pred_train = np.asarray(list(map(lambda key: cluster_classes[key], clusters_train)))

    # Train Metrics
    acc_train.append(accuracy_score(y_train, pred_train))
    fpr, tpr, threshold = roc_curve(y_train, pred_train)
    auc_train.append(auc(fpr, tpr))
    prec_B_train.append(precision_score(y_train, pred_train, pos_label=0))
    prec_M_train.append(precision_score(y_train, pred_train, pos_label=1))
    recall_B_train.append(recall_score(y_train, pred_train, pos_label=0))
    recall_M_train.append(recall_score(y_train, pred_train, pos_label=1))
    f1_B_train.append(f1_score(y_train, pred_train, pos_label=0))
    f1_M_train.append(f1_score(y_train, pred_train, pos_label=1))

    
    #--------------------------------------#
    #   (C)  TEST DATASET                  #
    #--------------------------------------# 
    
    # Assign clusters to test data
    clusters_test = kmeans.predict(x_test)

    # Assign a class to each sample based on its cluster
    pred_test = np.asarray(list(map(lambda key: cluster_classes[key], clusters_test)))

    # Test Metrics
    acc_test.append(accuracy_score(y_test, pred_test))
    fpr, tpr, threshold = roc_curve(y_test, pred_test)
    auc_test.append(auc(fpr, tpr))
    prec_B_test.append(precision_score(y_test, pred_test, pos_label=0))
    prec_M_test.append(precision_score(y_test, pred_test, pos_label=1))
    recall_B_test.append(recall_score(y_test, pred_test, pos_label=0))
    recall_M_test.append(recall_score(y_test, pred_test, pos_label=1))
    f1_B_test.append(f1_score(y_test, pred_test, pos_label=0))
    f1_M_test.append(f1_score(y_test, pred_test, pos_label=1))
    
# Average Train Metrics
summary.at['Accuracy', 'Unsupervised Train'] = np.mean(acc_train)
summary.at['AUC', 'Unsupervised Train'] = np.mean(auc_train)
summary.at['Precision', 'Unsupervised Train'] = np.mean(prec_B_train + prec_M_train)
summary.at['Precision_B', 'Unsupervised Train'] = np.mean(prec_B_train)
summary.at['Precision_M', 'Unsupervised Train'] = np.mean(prec_M_train)
summary.at['Recall', 'Unsupervised Train'] = np.mean(recall_B_train + recall_M_train)
summary.at['Recall_B', 'Unsupervised Train'] = np.mean(recall_B_train)
summary.at['Recall_M', 'Unsupervised Train'] = np.mean(recall_M_train)
summary.at['F1', 'Unsupervised Train'] = np.mean(f1_B_train + f1_M_train)
summary.at['F1_B', 'Unsupervised Train'] = np.mean(f1_B_train)
summary.at['F1_M', 'Unsupervised Train'] = np.mean(f1_M_train)

# Average Test Metrics
summary.at['Accuracy', 'Unsupervised Test'] = np.mean(acc_test)
summary.at['AUC', 'Unsupervised Test'] = np.mean(auc_test)
summary.at['Precision', 'Unsupervised Test'] = np.mean(prec_B_test + prec_M_test)
summary.at['Precision_B', 'Unsupervised Test'] = np.mean(prec_B_test)
summary.at['Precision_M', 'Unsupervised Test'] = np.mean(prec_M_test)
summary.at['Recall', 'Unsupervised Test'] = np.mean(recall_B_test + recall_M_test)
summary.at['Recall_B', 'Unsupervised Test'] = np.mean(recall_B_test)
summary.at['Recall_M', 'Unsupervised Test'] = np.mean(recall_M_test)
summary.at['F1', 'Unsupervised Test'] = np.mean(f1_B_test + f1_M_test)
summary.at['F1_B', 'Unsupervised Test'] = np.mean(f1_B_test)
summary.at['F1_M', 'Unsupervised Test'] = np.mean(f1_M_test)

# Show results
summary.loc[['Accuracy', 'AUC', 'Precision', 'Recall', 'F1']]
100%|██████████████████████████████████████████| 30/30 [00:10<00:00,  2.77it/s]
Out[13]:
Supervised Train Supervised Test Semi-Supervised Train Semi-Supervised Test Unsupervised Train Unsupervised Test
Accuracy 0.986184 0.967847 0.974635 0.961947 0.924123 0.930383
AUC 0.983061 0.961933 0.969241 0.955131 0.907938 0.915426
Precision 0.987441 0.969506 0.976693 0.963934 0.929882 0.936120
Recall 0.983061 0.961933 0.969241 0.955131 0.907938 0.915426
F1 0.985150 0.965258 0.972648 0.958793 0.916909 0.923678
In [14]:
# Confusion Matrix and ROC Curve | Train Data
plot_classification_results(y_train, pred_train)
plt.suptitle('Unsupervised Learning | Train Data', y=0.9, fontsize=15, fontweight='bold')
plt.show()
In [15]:
# Confusion Matrix and ROC Curve | Test Data
plot_classification_results(y_test, pred_test)
plt.suptitle('Unsupervised Learning | Test Data', y=0.9, fontsize=15, fontweight='bold')
plt.show()

(iv) Spectral Clustering

1biv.JPG

1biv_note.JPG

In [16]:
# Lists to store metrics
acc_train, acc_test = [], []
auc_train, auc_test = [], []
prec_B_train, prec_M_train, prec_B_test, prec_M_test = [], [], [], [] 
recall_B_train, recall_M_train, recall_B_test, recall_M_test = [], [], [], [] 
f1_B_train, f1_M_train, f1_B_test, f1_M_test = [], [], [], []

# Run 30 Monte-Carlo Simulations
for M in tqdm(range(30)):
    
    #--------------------------------------#
    #   (b) TRAIN-TEST SPLIT               #
    #--------------------------------------# 
    
    # Split Benign and Malign samples and shuffle them
    df_B = df[df.Diagnosis == 'B'].sample(frac=1)
    df_M = df[df.Diagnosis == 'M'].sample(frac=1)

    # Create test and train dataframes
    df_test = pd.concat([df_B[0:round(len(df_B)*0.2)],
                         df_M[0:round(len(df_M)*0.2)]])
    df_train = pd.concat([df_B[round(len(df_B)*0.2):],
                          df_M[round(len(df_M)*0.2):]])

    # Get X's and Y's
    x_train = df_train.iloc[:, 1:]
    x_test = df_test.iloc[:, 1:]
    y_train = df_train.iloc[:, 0]
    y_test = df_test.iloc[:, 0]

    # Normalize X's
    SCALER = MinMaxScaler()
    x_train = pd.DataFrame(SCALER.fit_transform(x_train), columns=[col_names[2:]])
    x_test = pd.DataFrame(SCALER.transform(x_test), columns=[col_names[2:]])

    # Label Encode Y's
    LE = LabelEncoder()
    y_train = pd.DataFrame(LE.fit_transform(y_train), columns=[col_names[1]])
    y_test = pd.DataFrame(LE.transform(y_test), columns=[col_names[1]])

    
    #--------------------------------------#
    #   (iv) 2 CLUSTER K-MEANS             #
    #--------------------------------------#

    N_CLUSTERS = 2


    #--------------------------------------#
    #   (A)  AVOIDING LOCAL MINIMUM        #
    #--------------------------------------#

    # The way to avoid having the algorithm becoming trapped into local minimum is to initialize it multiple times
    # With each initializion picking the starting cluster centers at random
    # This can be achieved with the hyperparameters "init" and "n_init"
    spectral_clst = SpectralClustering(n_clusters=N_CLUSTERS, affinity='rbf', gamma=1, n_init=100, n_jobs=-1)

    #--------------------------------------#
    #   (B)  LABELING CLUSTERS             #
    #--------------------------------------# 

    # Assign clusters to the classes
    assigned_clusters_train = spectral_clst.fit_predict(x_train)

    # For each cluster get the indexes of their datapoints
    idx_cluster_0 = np.argwhere(assigned_clusters_train == 0).flatten()
    idx_cluster_1 = np.argwhere(assigned_clusters_train == 1).flatten()

    # Get the most frequent class in each cluster (based on all samples on the cluster)
    cluster_0_class = y_train.iloc[idx_cluster_0].mode(axis='index').values[0][0]
    cluster_1_class = y_train.iloc[idx_cluster_1].mode(axis='index').values[0][0]
    cluster_classes = {0:cluster_0_class,
                       1:cluster_1_class}

    # Assign a class to each sample based on its cluster
    pred_train = np.asarray(list(map(lambda key: cluster_classes[key], assigned_clusters_train)))

    # Train Metrics
    acc_train.append(accuracy_score(y_train, pred_train))
    fpr, tpr, threshold = roc_curve(y_train, pred_train)
    auc_train.append(auc(fpr, tpr))
    prec_B_train.append(precision_score(y_train, pred_train, pos_label=0))
    prec_M_train.append(precision_score(y_train, pred_train, pos_label=1))
    recall_B_train.append(recall_score(y_train, pred_train, pos_label=0))
    recall_M_train.append(recall_score(y_train, pred_train, pos_label=1))
    f1_B_train.append(f1_score(y_train, pred_train, pos_label=0))
    f1_M_train.append(f1_score(y_train, pred_train, pos_label=1))


    #--------------------------------------#
    #   (C)  TEST DATASET                  #
    #--------------------------------------# 
    '''
    # Assign clusters to the classes
    assigned_clusters_test = spectral_clst.fit_predict(x_test)

    # For each cluster get the indexes of their datapoints
    idx_cluster_0 = np.argwhere(assigned_clusters_test == 0).flatten()
    idx_cluster_1 = np.argwhere(assigned_clusters_test == 1).flatten()

    # Get the most frequent class in each cluster (based on all samples on the cluster)
    cluster_0_class = y_test.iloc[idx_cluster_0].mode(axis='index').values[0][0]
    cluster_1_class = y_test.iloc[idx_cluster_1].mode(axis='index').values[0][0]
    cluster_classes = {0:cluster_0_class,
                       1:cluster_1_class}

    # Assign a class to each sample based on its cluster
    pred_test = np.asarray(list(map(lambda key: cluster_classes[key], assigned_clusters_test)))
    '''
    
    # https://piazza.com/class/kdfi4ly9oqy72y?cid=547
    
    # Use KNN to assign labels to test samples based on train samples
    KNN = KNeighborsClassifier(n_neighbors=1, n_jobs=-1)
    KNN.fit(x_train, pred_train)
    pred_test = KNN.predict(x_test)
    
    # Test Metrics
    acc_test.append(accuracy_score(y_test, pred_test))
    fpr, tpr, threshold = roc_curve(y_test, pred_test)
    auc_test.append(auc(fpr, tpr))
    prec_B_test.append(precision_score(y_test, pred_test, pos_label=0))
    prec_M_test.append(precision_score(y_test, pred_test, pos_label=1))
    recall_B_test.append(recall_score(y_test, pred_test, pos_label=0))
    recall_M_test.append(recall_score(y_test, pred_test, pos_label=1))
    f1_B_test.append(f1_score(y_test, pred_test, pos_label=0))
    f1_M_test.append(f1_score(y_test, pred_test, pos_label=1))
    
# Average Train Metrics
summary.at['Accuracy', 'Spectral Train'] = np.mean(acc_train)
summary.at['AUC', 'Spectral Train'] = np.mean(auc_train)
summary.at['Precision', 'Spectral Train'] = np.mean(prec_B_train + prec_M_train)
summary.at['Precision_B', 'Spectral Train'] = np.mean(prec_B_train)
summary.at['Precision_M', 'Spectral Train'] = np.mean(prec_M_train)
summary.at['Recall', 'Spectral Train'] = np.mean(recall_B_train + recall_M_train)
summary.at['Recall_B', 'Spectral Train'] = np.mean(recall_B_train)
summary.at['Recall_M', 'Spectral Train'] = np.mean(recall_M_train)
summary.at['F1', 'Spectral Train'] = np.mean(f1_B_train + f1_M_train)
summary.at['F1_B', 'Spectral Train'] = np.mean(f1_B_train)
summary.at['F1_M', 'Spectral Train'] = np.mean(f1_M_train)

# Average Test Metrics
summary.at['Accuracy', 'Spectral Test'] = np.mean(acc_test)
summary.at['AUC', 'Spectral Test'] = np.mean(auc_test)
summary.at['Precision', 'Spectral Test'] = np.mean(prec_B_test + prec_M_test)
summary.at['Precision_B', 'Spectral Test'] = np.mean(prec_B_test)
summary.at['Precision_M', 'Spectral Test'] = np.mean(prec_M_test)
summary.at['Recall', 'Spectral Test'] = np.mean(recall_B_test + recall_M_test)
summary.at['Recall_B', 'Spectral Test'] = np.mean(recall_B_test)
summary.at['Recall_M', 'Spectral Test'] = np.mean(recall_M_test)
summary.at['F1', 'Spectral Test'] = np.mean(f1_B_test + f1_M_test)
summary.at['F1_B', 'Spectral Test'] = np.mean(f1_B_test)
summary.at['F1_M', 'Spectral Test'] = np.mean(f1_M_test)

# Show results
summary.loc[['Accuracy', 'AUC', 'Precision', 'Recall', 'F1']]
100%|██████████████████████████████████████████| 30/30 [00:17<00:00,  1.68it/s]
Out[16]:
Supervised Train Supervised Test Semi-Supervised Train Semi-Supervised Test Unsupervised Train Unsupervised Test Spectral Train Spectral Test
Accuracy 0.986184 0.967847 0.974635 0.961947 0.924123 0.930383 0.852047 0.852802
AUC 0.983061 0.961933 0.969241 0.955131 0.907938 0.915426 0.803597 0.802470
Precision 0.987441 0.969506 0.976693 0.963934 0.929882 0.936120 0.898621 0.904304
Recall 0.983061 0.961933 0.969241 0.955131 0.907938 0.915426 0.803597 0.802470
F1 0.985150 0.965258 0.972648 0.958793 0.916909 0.923678 0.823868 0.823039
In [17]:
# Confusion Matrix and ROC Curve | Train Data
plot_classification_results(y_train, pred_train)
plt.suptitle('Spectral Clustering | Train Data', y=0.9, fontsize=15, fontweight='bold')
plt.show()
In [18]:
# Confusion Matrix and ROC Curve | Test Data
plot_classification_results(y_test, pred_test)
plt.suptitle('Spectral Clustering | Test Data', y=0.9, fontsize=15, fontweight='bold')
plt.show()

(v) Result Comparison

1bv.JPG

In [19]:
# Comparing results on train data
summary_train = summary.loc[['Accuracy', 'AUC', 'Precision', 'Recall', 'F1'], ['Supervised Train', 'Semi-Supervised Train', 'Unsupervised Train', 'Spectral Train']]
summary_train
Out[19]:
Supervised Train Semi-Supervised Train Unsupervised Train Spectral Train
Accuracy 0.986184 0.974635 0.924123 0.852047
AUC 0.983061 0.969241 0.907938 0.803597
Precision 0.987441 0.976693 0.929882 0.898621
Recall 0.983061 0.969241 0.907938 0.803597
F1 0.985150 0.972648 0.916909 0.823868
In [20]:
# Comparing results on test data
summary_test = summary.loc[['Accuracy', 'AUC', 'Precision', 'Recall', 'F1'], ['Supervised Test', 'Semi-Supervised Test', 'Unsupervised Test', 'Spectral Test']]
summary_test
Out[20]:
Supervised Test Semi-Supervised Test Unsupervised Test Spectral Test
Accuracy 0.967847 0.961947 0.930383 0.852802
AUC 0.961933 0.955131 0.915426 0.802470
Precision 0.969506 0.963934 0.936120 0.904304
Recall 0.961933 0.955131 0.915426 0.802470
F1 0.965258 0.958793 0.923678 0.823039
In [21]:
# Plotting all metrics for train and test data
fig, axs = plt.subplots(nrows=5, ncols=2, figsize=(10,20), sharey='row')
custom_palette = sns.color_palette("hls", 4)
sns.set_palette(custom_palette)

# Train Data Accuracy
fig.sca(axs[0][0])
g = sns.barplot(y=summary_train.T['Accuracy'].values, x=summary_train.T['Accuracy'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[0][0].set_title('Accuracy | Train Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Test Data Accuracy
fig.sca(axs[0][1])
g = sns.barplot(y=summary_test.T['Accuracy'].values, x=summary_test.T['Accuracy'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[0][1].set_title('Accuracy | Test Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Train Data AUC
fig.sca(axs[1][0])
g = sns.barplot(y=summary_train.T['AUC'].values, x=summary_train.T['AUC'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[1][0].set_title('AUC | Train Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Test Data AUC
fig.sca(axs[1][1])
g = sns.barplot(y=summary_test.T['AUC'].values, x=summary_test.T['AUC'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[1][1].set_title('AUC | Test Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Train Data Precision
fig.sca(axs[2][0])
g = sns.barplot(y=summary_train.T['Precision'].values, x=summary_train.T['Precision'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[2][0].set_title('Precision | Train Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Test Data Precision
fig.sca(axs[2][1])
g = sns.barplot(y=summary_test.T['Precision'].values, x=summary_test.T['Precision'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[2][1].set_title('Precision | Test Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Train Data Recall
fig.sca(axs[3][0])
g = sns.barplot(y=summary_train.T['Recall'].values, x=summary_train.T['Recall'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[3][0].set_title('Recall | Train Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Test Data Recall
fig.sca(axs[3][1])
g = sns.barplot(y=summary_test.T['Recall'].values, x=summary_test.T['Recall'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[3][1].set_title('Recall | Test Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Train Data F1
fig.sca(axs[4][0])
g = sns.barplot(y=summary_train.T['F1'].values, x=summary_train.T['F1'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[4][0].set_title('F1 | Train Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Test Data F1
fig.sca(axs[4][1])
g = sns.barplot(y=summary_test.T['F1'].values, x=summary_test.T['F1'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[4][1].set_title('F1 | Test Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

fig.tight_layout(h_pad=3)
plt.show()
In [22]:
# Analysing how the models perform considering stratified classes
stratified = summary.loc[['Precision_B', 'Precision_M','Recall_B', 'Recall_M', 'F1_B', 'F1_M']]
stratified_train = stratified.loc[['Precision_B', 'Precision_M','Recall_B', 'Recall_M', 'F1_B', 'F1_M'], ['Supervised Train', 'Semi-Supervised Train', 'Unsupervised Train', 'Spectral Train']]
stratified_test = stratified.loc[['Precision_B', 'Precision_M','Recall_B', 'Recall_M', 'F1_B', 'F1_M'], ['Supervised Test', 'Semi-Supervised Test', 'Unsupervised Test', 'Spectral Test']]
stratified
Out[22]:
Supervised Train Supervised Test Semi-Supervised Train Semi-Supervised Test Unsupervised Train Unsupervised Test Spectral Train Spectral Test
Precision_B 0.982899 0.964927 0.969900 0.959323 0.913088 0.920673 0.812891 0.812101
Precision_M 0.991982 0.974085 0.983487 0.968545 0.946676 0.951568 0.984351 0.996508
Recall_B 0.995338 0.984977 0.990443 0.981690 0.971562 0.973709 0.994056 0.998592
Recall_M 0.970784 0.938889 0.948039 0.928571 0.844314 0.857143 0.613137 0.606349
F1_B 0.989068 0.974728 0.980014 0.970153 0.941383 0.946246 0.894174 0.895406
F1_M 0.981233 0.955788 0.965282 0.947432 0.892434 0.901110 0.753563 0.750671
In [23]:
# Plotting all metrics for train and test data
fig, axs = plt.subplots(nrows=6, ncols=2, figsize=(10,24), sharey='row')
custom_palette = sns.color_palette("hls", 4)
sns.set_palette(custom_palette)

# Train Data Precision_B
fig.sca(axs[0][0])
g = sns.barplot(y=stratified_train.T['Precision_B'].values, x=stratified_train.T['Precision_B'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[0][0].set_title('Precision_B | Train Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Test Data Precision_B
fig.sca(axs[1][0])
g = sns.barplot(y=stratified_test.T['Precision_B'].values, x=stratified_test.T['Precision_B'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[1][0].set_title('Precision_B | Test Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Train Data Precision_M
fig.sca(axs[0][1])
g = sns.barplot(y=stratified_train.T['Precision_M'].values, x=stratified_train.T['Precision_M'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[0][1].set_title('Precision_M | Train Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Test Data Precision_M
fig.sca(axs[1][1])
g = sns.barplot(y=stratified_test.T['Precision_M'].values, x=stratified_test.T['Precision_M'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[1][1].set_title('Precision_M | Test Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Train Data Recall_B
fig.sca(axs[2][0])
g = sns.barplot(y=stratified_train.T['Recall_B'].values, x=stratified_train.T['Recall_B'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[2][0].set_title('Recall_B | Train Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Test Data Recall_B
fig.sca(axs[3][0])
g = sns.barplot(y=stratified_test.T['Recall_B'].values, x=stratified_test.T['Recall_B'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[3][0].set_title('Recall_B | Test Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Train Data Recall_M
fig.sca(axs[2][1])
g = sns.barplot(y=stratified_train.T['Recall_M'].values, x=stratified_train.T['Recall_M'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[2][1].set_title('Recall_M | Train Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Test Data Recall_M
fig.sca(axs[3][1])
g = sns.barplot(y=stratified_test.T['Recall_M'].values, x=stratified_test.T['Recall_M'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[3][1].set_title('Recall_M | Test Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Train Data F1_B
fig.sca(axs[4][0])
g = sns.barplot(y=stratified_train.T['F1_B'].values, x=stratified_train.T['F1_B'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[4][0].set_title('F1_B | Train Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Test Data F1_B
fig.sca(axs[5][0])
g = sns.barplot(y=stratified_test.T['F1_B'].values, x=stratified_test.T['F1_B'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[5][0].set_title('F1_B | Test Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Train Data F1_M
fig.sca(axs[4][1])
g = sns.barplot(y=stratified_train.T['F1_M'].values, x=stratified_train.T['F1_M'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[4][1].set_title('F1_M | Train Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

# Test Data F1_M
fig.sca(axs[5][1])
g = sns.barplot(y=stratified_test.T['F1_M'].values, x=stratified_test.T['F1_M'].index)
g.set(ylim=(0, 1), ylabel=None)
g.tick_params(labelrotation=15)
axs[5][1].set_title('F1_M | Test Data', pad=10, fontdict={'fontsize':14, 'fontweight':'bold'})

fig.tight_layout(h_pad=3)
plt.show()

Conclusions Regarding Model Performance

When comparing the Supervised and the Semi-Supervised methods, there is only a small loss in performance across all metrics.

A somewhat larger drop in performance can be observed when moving from Semi-Supervised to Unsupervised methods.

Yet, the most striking loss in performance can be found among the Unsupervised methods themselves, with Spectral Clustering obtaining quite poor results.

There is also consitency across all metrics, with the model never displaying a palpable disparity in performance among metrics.

Conclusion Regarding Class-Stratified Performance

Performance is very slightly higher for the slightly more common class (Benign), but the difference is indeed very minimal.

Given how the dataset is decently balanced, it is no surprise that the classes obtain similar performances in all class-stratified metrics for both train and test datasets.