CNNs for Image Colorization

DSCI 552 | Machine Learning for Data Science

Homework 7

Matheus Schmitz

In [1]:
# Py Data Stack
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt

# Scitkit-Learn
from sklearn.cluster import KMeans
from sklearn.preprocessing import OneHotEncoder

# Scikit-Image
from skimage.color import rgb2gray
from skimage import img_as_ubyte

# Tensor Flow & Keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPool2D, Reshape, Softmax, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, CSVLogger, EarlyStopping

# Progress Bar
from tqdm import tqdm

# OS
import os

# Disable warnings
import warnings
In [2]:
# Making sure Tensor Flow is properly working with GPU
print('Available Devices:')
for device in tf.config.experimental.list_physical_devices():
print(f'TensorFlow using GPU: {tf.test.is_gpu_available()}')
print(f'TensorFlow using CUDA: {tf.test.is_built_with_cuda()}')
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
    print("Oh boy, there's no GPU, so prepare yourself for a long wait :(")
    !nvcc --version
    print('ooops, watch out, something went wrong!')
    print('ooops, watch out, something went wrong!')
WARNING:tensorflow:From <ipython-input-2-46d45dd0bd84>:6: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
(a) Dataset


Guide on loading the downloaded data:

Guide on loading the Cifar-10 dataset directly from Keras:

In [3]:
# Downloading the Cifar-10 dataset from Keras
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
In [4]:
# Shape of x is: samples x img_width x img_height x color_channels

# Shape of y is: samples x class_encoding
(50000, 32, 32, 3)
(50000, 1)

(b) Extract the Bird Class


In [5]:
# Find the label encoding for the bird class
cifar_10_classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
bird_encoding = cifar_10_classes.index('bird')
print(f'Bird encoding: {bird_encoding}')
Bird encoding: 2
In [6]:
# Create a boolean mask to extract only bird samples
mask_train = [True if i == bird_encoding else False for i in y_train]
mask_test = [True if i == bird_encoding else False for i in y_test]

# Filter train and test dataset
x_train_bird = x_train[mask_train]
y_train_bird = y_train[mask_train]
x_test_bird = x_test[mask_test]
y_test_bird = y_test[mask_test]

# Check results
print(f'x_train_bird.shape: {x_train_bird.shape}')
print(f'y_train_bird.shape: {y_train_bird.shape}')
print(f'x_test_bird.shape: {x_test_bird.shape}')
print(f'y_test_bird.shape: {y_test_bird.shape}')
x_train_bird.shape: (5000, 32, 32, 3)
y_train_bird.shape: (5000, 1)
x_test_bird.shape: (1000, 32, 32, 3)
y_test_bird.shape: (1000, 1)
In [7]:
# Check if images are birds
fig, axs = plt.subplots(ncols=5, nrows=2, figsize=(15,5))

# Loop through axes and plot random images
for ax in axs.ravel():
    ax.set_xticks([], [])  
    ax.set_yticks([], []) 


(c) Pixel Selection


In [8]:
# I'm opting to choose all pixels

(d) Tetra-Chrome Conversion


In [9]:

# K-Means requires the data to have 2 or less dimensions, so reshape is required
KMEANS = KMeans(n_clusters=NUM_COLORS), 3))
cluster_centers = KMEANS.cluster_centers_
pred_clusters_train = KMEANS.predict(x_train_bird.reshape(-1, 3))
pred_clusters_test = KMEANS.predict(x_test_bird.reshape(-1, 3))

# Use cluster_centers and the predicted cluster of each pixel to convert images to tetra-chrome
x_train_tetra = np.array([cluster_centers[pred_clusters_train[i]] for i in range(len(x_train_bird.reshape(-1, 3)))], dtype=np.uint8)
x_test_tetra = np.array([cluster_centers[pred_clusters_test[i]] for i in range(len(x_test_bird.reshape(-1, 3)))], dtype=np.uint8)

# Conver the tetra-chrome images back to the correct shape for visualization
x_train_tetra = x_train_tetra.reshape(-1, 32, 32, 3)
x_test_tetra = x_test_tetra.reshape(-1, 32, 32, 3)

# Check the images 
fig, axs = plt.subplots(ncols=5, nrows=2, figsize=(15,5))

# Loop through axes and plot random images
for ax in axs.ravel():
    ax.set_xticks([], [])  
    ax.set_yticks([], []) 

In [10]:
# Indeed the colors have low contrast, so I'll use the suggested colors for contrast
contrast_colors =  np.array([[0,0,128],[230,25,75],[170,255,195],[255,255,255]], dtype=np.uint8)

# Color the images
x_train_colored = np.array([contrast_colors[pred_clusters_train[i]] for i in range(len(x_train_bird.reshape(-1, 3)))], dtype=np.uint8)
x_test_colored = np.array([contrast_colors[pred_clusters_test[i]] for i in range(len(x_test_bird.reshape(-1, 3)))], dtype=np.uint8)

# Conver the tetra-chrome images back to the correct shape for visualization
x_train_colored = x_train_colored.reshape(-1, 32, 32, 3)
x_test_colored = x_test_colored.reshape(-1, 32, 32, 3)

# Check the images 
fig, axs = plt.subplots(ncols=5, nrows=2, figsize=(15,5))

# Loop through axes and plot random images
for ax in axs.ravel():
    ax.set_xticks([], [])  
    ax.set_yticks([], []) 


(e) Grayscale Images


In [11]:
# Conver images to grayscle
x_train_gray = np.array([rgb2gray(x_train_bird[i]) for i in range(len(x_train_bird))])
x_test_gray = np.array([rgb2gray(x_test_bird[i]) for i in range(len(x_test_bird))])

# Check the images 
fig, axs = plt.subplots(ncols=5, nrows=2, figsize=(15,5))

# Loop through axes and plot random images
for ax in axs.ravel():
    ax.imshow(x_train_gray[np.random.choice(x_train_gray.shape[0])], cmap='gray')
    ax.set_xticks([], [])  
    ax.set_yticks([], []) 


(f) Deep Convolutional Neural Network

2f.JPG 2_note7.JPG 2_note8.JPG 2_note9.JPG

In [12]:
# One-Hot Encode the clusters (tetra-chrome pixel color)
OHE = OneHotEncoder()
pred_clusters_train_ohe = OHE.fit_transform(pred_clusters_train.reshape(-1, 1))
pred_clusters_test_ohe = OHE.transform(pred_clusters_test.reshape(-1, 1))

# Reshape the pixel colors to match the image shape
pred_clusters_train_ohe = pred_clusters_train_ohe.reshape(-1,32*32*NUM_COLORS).toarray().astype(int)
pred_clusters_train_ohe = pred_clusters_train_ohe.reshape(-1,32,32,NUM_COLORS)
tetrachrome_color_train = pred_clusters_train_ohe.copy()
pred_clusters_test_ohe = pred_clusters_test_ohe.reshape(-1,32*32*NUM_COLORS).toarray().astype(int)
pred_clusters_test_ohe = pred_clusters_test_ohe.reshape(-1,32,32,NUM_COLORS)
tetrachrome_color_test = pred_clusters_test_ohe.copy()

# Shapes
print(f'pred_clusters_train_ohe.shape: {pred_clusters_train_ohe.shape}')
print(f'pred_clusters_test_ohe.shape: {pred_clusters_test_ohe.shape}')

# Reshape the gray images to match the expected input shape of (32, 32, 1), where 1 the the number of channels (aka grayscale)
x_train_gray = x_train_gray.reshape(-1, 32, 32, 1)
x_test_gray = x_test_gray.reshape(-1, 32, 32, 1)

# And convert the graysale images to uint8
x_train_gray = img_as_ubyte(x_train_gray)
x_test_gray = img_as_ubyte(x_test_gray)

# Shapes
print(f'x_train_gray.shape: {x_train_gray.shape}')
print(f'x_test_gray.shape: {x_test_gray.shape}')
pred_clusters_train_ohe.shape: (5000, 32, 32, 4)
pred_clusters_test_ohe.shape: (1000, 32, 32, 4)
x_train_gray.shape: (5000, 32, 32, 1)
x_test_gray.shape: (1000, 32, 32, 1)
In [21]:
model = Sequential()
model.add(Conv2D(128, kernel_size=5, activation='relu', input_shape=(32, 32, 1), padding='same'))
model.add(MaxPool2D(pool_size=2, strides=2))
model.add(Conv2D(64, kernel_size=5, activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=2, strides=2))
model.add(Conv2D(32, kernel_size=5, activation='relu', padding='same'))
model.add(MaxPool2D(pool_size=2, strides=2))
model.add(Reshape((32, 32, NUM_COLORS)))

# Compile the model

# Model summary
Model: "sequential_1"
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 32, 32, 128)       3328      
max_pooling2d_3 (MaxPooling2 (None, 16, 16, 128)       0         
conv2d_4 (Conv2D)            (None, 16, 16, 64)        204864    
max_pooling2d_4 (MaxPooling2 (None, 8, 8, 64)          0         
conv2d_5 (Conv2D)            (None, 8, 8, 32)          51232     
max_pooling2d_5 (MaxPooling2 (None, 4, 4, 32)          0         
flatten_1 (Flatten)          (None, 512)               0         
dense_1 (Dense)              (None, 4096)              2101248   
reshape_1 (Reshape)          (None, 32, 32, 4)         0         
softmax_1 (Softmax)          (None, 32, 32, 4)         0         
Total params: 2,360,672
Trainable params: 2,360,672
Non-trainable params: 0
In [22]:
# Set to true to continue the model's training using the pre-trained neurons - THIS WILL GIVE AN ERROR IF THE WINDOW_SIZE OR NEURONS WAS CHANGED
# Set to false to train the neural network from scratch - THIS WILL LOSE ALL PROGRESS AND CAUSE WORSE PREDICTIONS

if LOAD_TRAINED_WEIGHTS and os.path.exists('./checkpoints/model_k4.hdf5'):
    # Try loading weights. Will fail if the model structure changed
        # Load best model weights

        # Objective Function

        # Check model
        # Variable to guide the csv log callback
        print('Could not load weights. Most likely the network architecture changed.')

Could not load weights. Most likely the network architecture changed
In [23]:

    # Define directory for model checkpoints
    BACKUP_DIR = './checkpoints'
    if not os.path.exists(BACKUP_DIR):

    # Define file to store checkpoint
    BACKUP_FILE = os.path.join(BACKUP_DIR, 'model_k4.hdf5')

    # Callbacks
    checkpoint = ModelCheckpoint(BACKUP_FILE, 
    plateauLRreduce = ReduceLROnPlateau(factor = 0.5,
                                        patience = 3,
                                        min_lr = 0.0000001,
    stopearly = EarlyStopping(monitor='val_loss',
    logCSV = CSVLogger(filename='log_model_k4.csv',
                       append=(LOAD_TRAINED_WEIGHTS & SUCCESSFUL_WEIGHT_LOAD))
    model_callbacks = [checkpoint, plateauLRreduce, stopearly, logCSV]

    # Train model and save history
    model_history =,
Model Accuray, Loss and Learning Rate

In [24]:
# Read the log file
log_model_k4 = pd.read_csv('log_model_k4.csv')

# Create figure
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(15,5))

# Accuracy[0])
g1 = plt.plot(log_model_k4.index, log_model_k4.accuracy, color='navy', label='Train')
g2 = plt.plot(log_model_k4.index, log_model_k4.val_accuracy, color='darkorange', label='Test')
axs[0].set_ylim([0, 1])

# Loss[1])
g2 = plt.plot(log_model_k4.index, log_model_k4.loss, color='navy', label='Train')
g3 = plt.plot(log_model_k4.index, log_model_k4.val_loss, color='darkorange', label='Test')

# Learning Rate[2])
g = plt.plot(log_model_k4.index,, color='green')
axs[2].title.set_text('Learning Rate')

In [25]:
performance_train = model.evaluate(x_train_gray, tetrachrome_color_train, return_dict=True)
performance_test = model.evaluate(x_test_gray, tetrachrome_color_test, return_dict=True)

print('Train Dataset')
print(f'Loss: {performance_train["loss"]:.5f}')
print(f'Accuracy: {performance_train["accuracy"]:.5f}')
print('Test Dataset')
print(f'Loss: {performance_test["loss"]:.5f}')
print(f'Accuracy: {performance_test["accuracy"]:.5f}')
157/157 [==============================] - 1s 9ms/step - loss: 0.6957 - accuracy: 0.7069
32/32 [==============================] - 0s 8ms/step - loss: 0.9324 - accuracy: 0.6096

Train Dataset
Loss: 0.69566
Accuracy: 0.70685

Test Dataset
Loss: 0.93242
Accuracy: 0.60964

Model Colorized Images

In [26]:
# Use the model to predict the class of each pixel in the test dataset
pred_test_colors = model.predict(x_test_gray)

# Need to flatten pixels as the one-hot encoded arrays
pred_test_colors = pred_test_colors.reshape(-1, 4)

# Use the predicted pixel class to extract the pixel color
colored_test_images = []
for k in tqdm(range(len(pred_test_colors))):
    colored_test_images.append([np.uint8(color) for color in contrast_colors[np.argmax(pred_test_colors[k])]])
# Reshape
colored_test_images = np.asarray(np.reshape(colored_test_images, (-1, 32, 32, 3)))
100%|████████████████████████████| 1024000/1024000 [00:08<00:00, 118285.01it/s]

Vizualising First 10 Images

In [29]:
# Plot original images, tetra-chrome images, and images colored by the model
num_images = 10
fig, axs = plt.subplots(ncols=4, nrows=num_images, figsize=(15,num_images*3))

first_images = 0
# Loop through axes and plot random images
for axs_row in range(axs.shape[0]):
    # set image index
    img_index = first_images
    first_images += 1
    # Plot original image
    axs[axs_row][0].set_xticks([], [])  
    axs[axs_row][0].set_yticks([], [])
    # Plot tetra-chrome image
    axs[axs_row][1].set_xticks([], [])  
    axs[axs_row][1].set_yticks([], [])

    # Plot grayscale image (the model input)
    axs[axs_row][2].imshow(x_test_gray[img_index].reshape(32, 32), cmap='gray')
    axs[axs_row][2].set_xticks([], [])  
    axs[axs_row][2].set_yticks([], [])
    # Plot model colored image
    axs[axs_row][3].set_xticks([], [])  
    axs[axs_row][3].set_yticks([], [])
    axs[axs_row][3].set_title('CNN Colored')


Vizualising 5 Random Images

In [31]:
# Plot original images, tetra-chrome images, and images colored by the model
num_images = 5
fig, axs = plt.subplots(ncols=4, nrows=num_images, figsize=(15,num_images*3))

# Loop through axes and plot random images
for axs_row in range(axs.shape[0]):
    # Pick a random image
    img_index = np.random.choice(x_test_bird.shape[0])
    # Plot original image
    axs[axs_row][0].set_xticks([], [])  
    axs[axs_row][0].set_yticks([], [])
    # Plot tetra-chrome image
    axs[axs_row][1].set_xticks([], [])  
    axs[axs_row][1].set_yticks([], [])

    # Plot grayscale image (the model input)
    axs[axs_row][2].imshow(x_test_gray[img_index].reshape(32, 32), cmap='gray')
    axs[axs_row][2].set_xticks([], [])  
    axs[axs_row][2].set_yticks([], [])
    # Plot model colored image
    axs[axs_row][3].set_xticks([], [])  
    axs[axs_row][3].set_yticks([], [])
    axs[axs_row][3].set_title('CNN Colored')


The End!