DSCI 552 | Machine Learning for Data Science
Homework 7
Matheus Schmitz
USC ID: 5039286453
# Py Data Stack
import numpy as np
import pandas as pd
# Visualization
import matplotlib.pyplot as plt
# Scitkit-Learn
from sklearn.cluster import KMeans
from sklearn.preprocessing import OneHotEncoder
# Scikit-Image
from skimage.color import rgb2gray
from skimage import img_as_ubyte
# Tensor Flow & Keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPool2D, Reshape, Softmax, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, CSVLogger, EarlyStopping
# Progress Bar
from tqdm import tqdm
# OS
import os
# Disable warnings
import warnings
warnings.filterwarnings("ignore")
# Making sure Tensor Flow is properly working with GPU
print('Available Devices:')
for device in tf.config.experimental.list_physical_devices():
print(device)
print()
print(f'TensorFlow using GPU: {tf.test.is_gpu_available()}')
print(f'TensorFlow using CUDA: {tf.test.is_built_with_cuda()}')
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
if tf.test.gpu_device_name():
print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
print("Oh boy, there's no GPU, so prepare yourself for a long wait :(")
print()
try:
!nvcc --version
except:
print('ooops, watch out, something went wrong!')
print()
try:
!nvidia-smi
except:
print('ooops, watch out, something went wrong!')
Guide on loading the downloaded data: http://www.cs.toronto.edu/~kriz/cifar.html
Guide on loading the Cifar-10 dataset directly from Keras: https://www.tensorflow.org/api_docs/python/tf/keras/datasets/cifar10/load_data
# Downloading the Cifar-10 dataset from Keras
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
# Shape of x is: samples x img_width x img_height x color_channels
print(x_train.shape)
# Shape of y is: samples x class_encoding
print(y_train.shape)
# Find the label encoding for the bird class
cifar_10_classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
bird_encoding = cifar_10_classes.index('bird')
print(f'Bird encoding: {bird_encoding}')
# Create a boolean mask to extract only bird samples
mask_train = [True if i == bird_encoding else False for i in y_train]
mask_test = [True if i == bird_encoding else False for i in y_test]
# Filter train and test dataset
x_train_bird = x_train[mask_train]
y_train_bird = y_train[mask_train]
x_test_bird = x_test[mask_test]
y_test_bird = y_test[mask_test]
# Check results
print(f'x_train_bird.shape: {x_train_bird.shape}')
print(f'y_train_bird.shape: {y_train_bird.shape}')
print(f'x_test_bird.shape: {x_test_bird.shape}')
print(f'y_test_bird.shape: {y_test_bird.shape}')
# Check if images are birds
fig, axs = plt.subplots(ncols=5, nrows=2, figsize=(15,5))
# Loop through axes and plot random images
for ax in axs.ravel():
ax.imshow(x_train_bird[np.random.choice(x_train_bird.shape[0])])
ax.set_xticks([], [])
ax.set_yticks([], [])
plt.tight_layout()
# I'm opting to choose all pixels
NUM_COLORS = 4
# K-Means requires the data to have 2 or less dimensions, so reshape is required
KMEANS = KMeans(n_clusters=NUM_COLORS)
KMEANS.fit(x_train_bird.reshape(-1, 3))
cluster_centers = KMEANS.cluster_centers_
pred_clusters_train = KMEANS.predict(x_train_bird.reshape(-1, 3))
pred_clusters_test = KMEANS.predict(x_test_bird.reshape(-1, 3))
# Use cluster_centers and the predicted cluster of each pixel to convert images to tetra-chrome
x_train_tetra = np.array([cluster_centers[pred_clusters_train[i]] for i in range(len(x_train_bird.reshape(-1, 3)))], dtype=np.uint8)
x_test_tetra = np.array([cluster_centers[pred_clusters_test[i]] for i in range(len(x_test_bird.reshape(-1, 3)))], dtype=np.uint8)
# Conver the tetra-chrome images back to the correct shape for visualization
x_train_tetra = x_train_tetra.reshape(-1, 32, 32, 3)
x_test_tetra = x_test_tetra.reshape(-1, 32, 32, 3)
# Check the images
fig, axs = plt.subplots(ncols=5, nrows=2, figsize=(15,5))
# Loop through axes and plot random images
for ax in axs.ravel():
ax.imshow(x_train_tetra[np.random.choice(x_train_tetra.shape[0])])
ax.set_xticks([], [])
ax.set_yticks([], [])
plt.tight_layout()
# Indeed the colors have low contrast, so I'll use the suggested colors for contrast
contrast_colors = np.array([[0,0,128],[230,25,75],[170,255,195],[255,255,255]], dtype=np.uint8)
# Color the images
x_train_colored = np.array([contrast_colors[pred_clusters_train[i]] for i in range(len(x_train_bird.reshape(-1, 3)))], dtype=np.uint8)
x_test_colored = np.array([contrast_colors[pred_clusters_test[i]] for i in range(len(x_test_bird.reshape(-1, 3)))], dtype=np.uint8)
# Conver the tetra-chrome images back to the correct shape for visualization
x_train_colored = x_train_colored.reshape(-1, 32, 32, 3)
x_test_colored = x_test_colored.reshape(-1, 32, 32, 3)
# Check the images
fig, axs = plt.subplots(ncols=5, nrows=2, figsize=(15,5))
# Loop through axes and plot random images
for ax in axs.ravel():
ax.imshow(x_train_colored[np.random.choice(x_train_colored.shape[0])])
ax.set_xticks([], [])
ax.set_yticks([], [])
plt.tight_layout()
# Conver images to grayscle
x_train_gray = np.array([rgb2gray(x_train_bird[i]) for i in range(len(x_train_bird))])
x_test_gray = np.array([rgb2gray(x_test_bird[i]) for i in range(len(x_test_bird))])
# Check the images
fig, axs = plt.subplots(ncols=5, nrows=2, figsize=(15,5))
# Loop through axes and plot random images
for ax in axs.ravel():
ax.imshow(x_train_gray[np.random.choice(x_train_gray.shape[0])], cmap='gray')
ax.set_xticks([], [])
ax.set_yticks([], [])
plt.tight_layout()
# One-Hot Encode the clusters (tetra-chrome pixel color)
OHE = OneHotEncoder()
pred_clusters_train_ohe = OHE.fit_transform(pred_clusters_train.reshape(-1, 1))
pred_clusters_test_ohe = OHE.transform(pred_clusters_test.reshape(-1, 1))
# Reshape the pixel colors to match the image shape
pred_clusters_train_ohe = pred_clusters_train_ohe.reshape(-1,32*32*NUM_COLORS).toarray().astype(int)
pred_clusters_train_ohe = pred_clusters_train_ohe.reshape(-1,32,32,NUM_COLORS)
tetrachrome_color_train = pred_clusters_train_ohe.copy()
pred_clusters_test_ohe = pred_clusters_test_ohe.reshape(-1,32*32*NUM_COLORS).toarray().astype(int)
pred_clusters_test_ohe = pred_clusters_test_ohe.reshape(-1,32,32,NUM_COLORS)
tetrachrome_color_test = pred_clusters_test_ohe.copy()
# Shapes
print(f'pred_clusters_train_ohe.shape: {pred_clusters_train_ohe.shape}')
print(f'pred_clusters_test_ohe.shape: {pred_clusters_test_ohe.shape}')
# Reshape the gray images to match the expected input shape of (32, 32, 1), where 1 the the number of channels (aka grayscale)
x_train_gray = x_train_gray.reshape(-1, 32, 32, 1)
x_test_gray = x_test_gray.reshape(-1, 32, 32, 1)
# And convert the graysale images to uint8
x_train_gray = img_as_ubyte(x_train_gray)
x_test_gray = img_as_ubyte(x_test_gray)
# Shapes
print(f'x_train_gray.shape: {x_train_gray.shape}')
print(f'x_test_gray.shape: {x_test_gray.shape}')
# CNN
model = Sequential()
model.add(Conv2D(128, kernel_size=5, activation='relu', input_shape=(32, 32, 1), padding='same'))
model.add(MaxPool2D(pool_size=2, strides=2))
model.add(Conv2D(64, kernel_size=5, activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=2, strides=2))
model.add(Conv2D(32, kernel_size=5, activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=2, strides=2))
model.add(Flatten())
model.add(Dense(4096))
model.add(Reshape((32, 32, NUM_COLORS)))
model.add(Softmax())
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
loss='categorical_crossentropy',
metrics=['accuracy'])
# Model summary
model.summary()
# Set to true to continue the model's training using the pre-trained neurons - THIS WILL GIVE AN ERROR IF THE WINDOW_SIZE OR NEURONS WAS CHANGED
# Set to false to train the neural network from scratch - THIS WILL LOSE ALL PROGRESS AND CAUSE WORSE PREDICTIONS
LOAD_TRAINED_WEIGHTS = True
if LOAD_TRAINED_WEIGHTS and os.path.exists('./checkpoints/model_k4.hdf5'):
# Try loading weights. Will fail if the model structure changed
try:
# Load best model weights
model.load_weights('./checkpoints/model_k4.hdf5')
# Objective Function
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
loss='categorical_crossentropy',
metrics=['accuracy'])
# Check model
model.summary()
# Variable to guide the csv log callback
SUCCESSFUL_WEIGHT_LOAD = True
except:
SUCCESSFUL_WEIGHT_LOAD = False
print('Could not load weights. Most likely the network architecture changed.')
else:
SUCCESSFUL_WEIGHT_LOAD = False
EPOCHS = 50
BATCH_SIZE = 64
TRAIN_MODEL = True
if TRAIN_MODEL:
# Define directory for model checkpoints
BACKUP_DIR = './checkpoints'
if not os.path.exists(BACKUP_DIR):
os.mkdir(BACKUP_DIR)
# Define file to store checkpoint
BACKUP_FILE = os.path.join(BACKUP_DIR, 'model_k4.hdf5')
# Callbacks
checkpoint = ModelCheckpoint(BACKUP_FILE,
monitor='val_loss',
save_best_only=True,
save_weights_only=True,
verbose=0)
plateauLRreduce = ReduceLROnPlateau(factor = 0.5,
patience = 3,
monitor='val_loss',
min_lr = 0.0000001,
verbose=1)
stopearly = EarlyStopping(monitor='val_loss',
patience=15,
verbose=1)
logCSV = CSVLogger(filename='log_model_k4.csv',
separator=',',
append=(LOAD_TRAINED_WEIGHTS & SUCCESSFUL_WEIGHT_LOAD))
model_callbacks = [checkpoint, plateauLRreduce, stopearly, logCSV]
# Train model and save history
model_history = model.fit(x_train_gray,
tetrachrome_color_train,
epochs=EPOCHS,
batch_size=BATCH_SIZE,
validation_split=0.2,
callbacks=model_callbacks,
verbose=1,
shuffle=True)
Model Accuray, Loss and Learning Rate
# Read the log file
log_model_k4 = pd.read_csv('log_model_k4.csv')
# Create figure
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(15,5))
# Accuracy
fig.sca(axs[0])
g1 = plt.plot(log_model_k4.index, log_model_k4.accuracy, color='navy', label='Train')
g2 = plt.plot(log_model_k4.index, log_model_k4.val_accuracy, color='darkorange', label='Test')
axs[0].set_ylim([0, 1])
axs[0].title.set_text('Accuracy')
axs[0].set_xlabel('Epoch')
axs[0].legend()
# Loss
fig.sca(axs[1])
g2 = plt.plot(log_model_k4.index, log_model_k4.loss, color='navy', label='Train')
g3 = plt.plot(log_model_k4.index, log_model_k4.val_loss, color='darkorange', label='Test')
axs[1].title.set_text('Loss')
axs[1].set_xlabel('Epoch')
axs[1].legend()
# Learning Rate
fig.sca(axs[2])
axs[2].set_yscale('log')
g = plt.plot(log_model_k4.index, log_model_k4.lr, color='green')
axs[2].title.set_text('Learning Rate')
axs[2].set_xlabel('Epoch')
plt.tight_layout(w_pad=5)
performance_train = model.evaluate(x_train_gray, tetrachrome_color_train, return_dict=True)
performance_test = model.evaluate(x_test_gray, tetrachrome_color_test, return_dict=True)
print()
print('Train Dataset')
print(f'Loss: {performance_train["loss"]:.5f}')
print(f'Accuracy: {performance_train["accuracy"]:.5f}')
print()
print('Test Dataset')
print(f'Loss: {performance_test["loss"]:.5f}')
print(f'Accuracy: {performance_test["accuracy"]:.5f}')
Model Colorized Images
# Use the model to predict the class of each pixel in the test dataset
pred_test_colors = model.predict(x_test_gray)
# Need to flatten pixels as the one-hot encoded arrays
pred_test_colors = pred_test_colors.reshape(-1, 4)
# Use the predicted pixel class to extract the pixel color
colored_test_images = []
for k in tqdm(range(len(pred_test_colors))):
colored_test_images.append([np.uint8(color) for color in contrast_colors[np.argmax(pred_test_colors[k])]])
# Reshape
colored_test_images = np.asarray(np.reshape(colored_test_images, (-1, 32, 32, 3)))
Vizualising First 10 Images
# Plot original images, tetra-chrome images, and images colored by the model
num_images = 10
fig, axs = plt.subplots(ncols=4, nrows=num_images, figsize=(15,num_images*3))
first_images = 0
# Loop through axes and plot random images
for axs_row in range(axs.shape[0]):
# set image index
img_index = first_images
first_images += 1
# Plot original image
axs[axs_row][0].imshow(x_test_bird[img_index])
axs[axs_row][0].set_xticks([], [])
axs[axs_row][0].set_yticks([], [])
axs[axs_row][0].set_title('Original')
# Plot tetra-chrome image
axs[axs_row][1].imshow(x_test_colored[img_index])
axs[axs_row][1].set_xticks([], [])
axs[axs_row][1].set_yticks([], [])
axs[axs_row][1].set_title('Tetra-Chrome')
# Plot grayscale image (the model input)
axs[axs_row][2].imshow(x_test_gray[img_index].reshape(32, 32), cmap='gray')
axs[axs_row][2].set_xticks([], [])
axs[axs_row][2].set_yticks([], [])
axs[axs_row][2].set_title('Grayscale')
# Plot model colored image
axs[axs_row][3].imshow(colored_test_images[img_index])
axs[axs_row][3].set_xticks([], [])
axs[axs_row][3].set_yticks([], [])
axs[axs_row][3].set_title('CNN Colored')
plt.tight_layout()
Vizualising 5 Random Images
# Plot original images, tetra-chrome images, and images colored by the model
num_images = 5
fig, axs = plt.subplots(ncols=4, nrows=num_images, figsize=(15,num_images*3))
# Loop through axes and plot random images
for axs_row in range(axs.shape[0]):
# Pick a random image
img_index = np.random.choice(x_test_bird.shape[0])
# Plot original image
axs[axs_row][0].imshow(x_test_bird[img_index])
axs[axs_row][0].set_xticks([], [])
axs[axs_row][0].set_yticks([], [])
axs[axs_row][0].set_title('Original')
# Plot tetra-chrome image
axs[axs_row][1].imshow(x_test_colored[img_index])
axs[axs_row][1].set_xticks([], [])
axs[axs_row][1].set_yticks([], [])
axs[axs_row][1].set_title('Tetra-Chrome')
# Plot grayscale image (the model input)
axs[axs_row][2].imshow(x_test_gray[img_index].reshape(32, 32), cmap='gray')
axs[axs_row][2].set_xticks([], [])
axs[axs_row][2].set_yticks([], [])
axs[axs_row][2].set_title('Grayscale')
# Plot model colored image
axs[axs_row][3].imshow(colored_test_images[img_index])
axs[axs_row][3].set_xticks([], [])
axs[axs_row][3].set_yticks([], [])
axs[axs_row][3].set_title('CNN Colored')
plt.tight_layout()