Manually building my own neural network as a learning excercise
# Using nothing besides numpy!
import numpy as np
def initialize_parameters(input_layer_dims):
# Dictionary for the parameters (weights + biases)
parameters = {}
# Layer dimension length
length = len(input_layer_dims)
# Loop through length
for i in range (1, length):
# Initialize the weights matrix
parameters['W' + str(i)] = np.random.randn(input_layer_dims[i], input_layer_dims[i-1]) * 0.01
# Initialize biases
parameters['b' + str(i)] = np.zeros((input_layer_dims[i], 1))
return parameters
initialize_parameters([3, 5])
def sigmoid(Z):
A = 1 / (1 + np.exp(-Z))
return A, Z
sigmoid(2.7)
def relu(Z):
A = abs(Z * (Z > 0))
return A, Z
relu(-2.7)
# Activation function
# A is the matrix with input data
# W is the weights matrix
# b is the bias
def linear_activation(A, W, b):
Z = np.dot(W, A) + b
cache = (A, W, b)
return Z, cache
linear_activation([1, 2], [3, 4], [5])
# 1*3 + 2*4 + 5 = 16
# Forward movement
def forward(A_prev, W, b, activation):
# Linear activation
Z, linear_cache = linear_activation(A_prev, W, b)
# Pass through differentiable function
if activation == 'sigmoid':
A, activation_cache = sigmoid(Z)
elif activation == 'relu':
A, activation_cache = relu(Z)
cache = (linear_cache, activation_cache)
return A, cache
forward([-2], [3], [6], 'sigmoid')
# Forward propagation
def forward_propagation(X, parameters):
# List previous values (cache)
caches = []
# Input data
A = X
# Parameter length
L = len(parameters) // 2
# Loop through hidden layers
for i in range (1, L):
# Store previous A value
A_prev = A
# Run the forward
A, cache = forward(A_prev, parameters["W" + str(i)], parameters["b" + str(i)], activation = "relu")
# Save cache
caches.append(cache)
# Last layer output
A_last, cache = forward(A, parameters["W" + str(L)], parameters["b" + str(L)], activation = "sigmoid")
# Save cache
caches.append(cache)
return (A_last, caches)
# Binary Cross-Entropy Cost
def cost_function(A_last, Y):
# Adjust Y shape to get its length (total amount of elements)
m = Y.shape[1]
# Calculate cost comparing true value and prediction
cost = (-1 / m) * np.sum((Y * np.log(A_last)) + ((1 - Y) * np.log(1 - A_last)))
# Adjust cost shape
cost = np.squeeze(cost)
return (cost)
cost_function(np.array([[0.9], [0.9]]), np.array([[1], [1]]))
# Backward derivative of the sigmoid function
def sigmoid_backward(da, Z):
# Calculate derivative of Z
dg = (1 / (1 + np.exp(-Z))) * (1 - (1 / (1 + np.exp(-Z))))
# Find the change in the derivative of Z
dz = da * dg
return dz
sigmoid_backward(0.5, 0.5)
# Backward derivative of the relu function
def relu_backward(da, Z):
dg = 1 * ( Z >= 0)
dz = da * dg
return dz
relu_backward(0.5, 0.5)
# Linear activation for back propagation
def linear_backward_function(dz, cache):
# Get values from cache (memory)
A_prev, W, b = cache
# m shape
m = A_prev.shape[1]
# Calculate the derivative of W (result of the operation with dz)
dW = (1 / m) * np.dot(dz, A_prev.T)
# Calculate the derivative of b (result of the operation with dz)
db = (1 / m) * np.sum(dz, axis = 1, keepdims = True)
# Calculate the derivative of the operation
dA_prev = np.dot(W.T, dz)
return dA_prev, dW, db
def linear_activation_backward(dA, cache, activation):
# Extract the cache
linear_cache, activation_cache = cache
# Check if activation is relu
if activation == "relu":
dZ = relu_backward(dA, activation_cache)
dA_prev, dW, db = linear_backward_function(dZ, linear_cache)
# Check if activation is sigmoid
if activation == "sigmoid":
dZ = sigmoid_backward(dA, activation_cache)
dA_prev, dW, db = linear_backward_function(dZ, linear_cache)
return dA_prev, dW, db
# Backward propagation algorithm (calculate gradients to update weights)
# AL = predicted value during forward
# Y = true value
def backward_propagation(AL, Y, caches):
# Gradients dictionary
grads = {}
# Length of data (on cache)
L = len(caches)
# Get the length for the m value
m = AL.shape[1]
# Adjust Y shape
Y = Y.reshape(AL.shape)
# Calculate the derivative of the network's final prediction (done at the end of Forward Propagation)
dAL = -((Y / AL) - ((1 - Y) / (1 - AL)))
# Get the current cache value
current_cache = caches[L-1]
# Generate a list of gradients for the data, weights and biases
# This is done because this is the final part of the NN, starting the way back
# AKA this is the prediction (sigmoid) layer
grads["dA" + str(L - 1)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, activation = "sigmoid")
# Loop to calculate the derivative durante the linear activations with relu
for l in reversed(range(L-1)):
# Current cache
current_cache = caches[l]
# Calculate derivatives
dA_prev, dW, db = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, activation = "relu")
# Feed the gradients dictionary, using their specific indexes
grads["dA" + str(l)] = dA_prev
grads["dW" + str(l + 1)] = dW
grads["db" + str(l + 1)] = db
return grads
# Update weights using the gradients
def update_parameters(parameters, grads, learning_rate):
# Length of the dictionary with weights and biases
L = len(parameters)//2
# Loop to update weights
for l in range(L):
# Update weights
parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - (learning_rate * grads["dW" + str(l + 1)])
# Update biases
parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - (learning_rate * grads["db" + str(l + 1)])
return parameters
# Complete Neural Network Model
def NN_model(X, Y, input_layer_dims, learning_rate = 0.005, num_iterations = 20):
# List for the cost of each training epoch
costs = []
# Inicialize the parameters
parameters = initialize_parameters(input_layer_dims)
# Loop for the number of epocs
for i in range(num_iterations):
# Forward Propagation
AL, caches = forward_propagation(X, parameters)
# Calculate cost
cost = cost_function(AL, Y)
# Backward Propagation
gradients = backward_propagation(AL, Y, caches)
# Update parameters
parameters = update_parameters(parameters, gradients, learning_rate)
# Print the epoch cost at every 10 epochs
if i % 10 == 0:
print('Cost after ' + str(i) + ' epochs is ' + str(cost))
costs.append(cost)
return parameters, costs
def predict(X, parameters):
AL, caches = forward_propagation(X, parameters)
return AL
# Imports
import sklearn
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
# Versões dos pacotes usados neste jupyter notebook
%reload_ext watermark
%watermark -v --iv
# Loading the complete dataset object on a temporary
temp = load_breast_cancer()
type(temp)
# Visualize the object
temp
# Load the dataset
dataset = pd.DataFrame(columns = load_breast_cancer()["feature_names"], data = load_breast_cancer()["data"])
# Shape
dataset.shape
dataset.head()
# Check of missing values
dataset.isnull().any()
# Split the target variavle
target = load_breast_cancer()["target"]
type(target)
target
# Total samples per class - benign cancer
np.count_nonzero(target == 1)
# Total samples per class - malign cancer
np.count_nonzero(target == 0)
# Extract labels
# Labels dictionary
labels = {}
# Target variable class names
target_names = load_breast_cancer()["target_names"]
# Map
for i in range(len(target_names)):
labels.update({i:target_names[i]})
labels
# Prepare the predicting variables in X
X = np.array(dataset)
X
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, target, test_size = 0.15, shuffle = True)
# Shape of training data
print(X_train.shape)
print(y_train.shape)
# Shape of test data
print(X_test.shape)
print(y_test.shape)
# Adjust the shape of the input data
X_train = X_train.T
X_test = X_test.T
print(X_train.shape)
print(X_test.shape)
# Adjust the shape of the output data
y_train = y_train.reshape(1, len(y_train))
y_test = y_test.reshape(1, len(y_test))
print(y_train.shape)
print(y_test.shape)
# Variable with the input layer dimension for the number of neurons
input_layer_dims = [X_train.shape[0], 50, 20, 5, 1]
input_layer_dims
# Train model
print("\nStarting training.\n")
parameters, cost = NN_model(X = X_train,
Y = y_train,
input_layer_dims = input_layer_dims,
num_iterations = 10000,
learning_rate = 0.005)
print("\nTraining finished.\n")
# Plot training error
plt.plot(cost)
# Predictions with train data
y_pred_train = predict(X_train, parameters)
# Visualize predictions
y_pred_train
# Adjust shape of prediction and true value
y_pred_train = y_pred_train.reshape(-1)
y_train = y_train.reshape(-1)
y_pred_train > 0.5
# Convert predictions to the binary class value
# (0 or 1, using as threshold 0.5)
y_pred_train = 1 * (y_pred_train > 0.5)
y_pred_train
acc_train = sum(1 * (y_pred_train == y_train)) / len(y_pred_train) * 100
print("Accuracy with training data: " + str(acc_train))
print(classification_report(y_train, y_pred_train, target_names = ['Malign', 'Benign']))
# Predictions with test data
y_pred_test = predict(X_test, parameters)
y_pred_test
# Adjust shapes
y_pred_test = y_pred_test.reshape(-1)
y_test = y_test.reshape(-1)
# Convert to binary
y_pred_test = 1 * (y_pred_test > 0.5)
y_pred_test
acc_test = sum(1 * (y_pred_test == y_test)) / len(y_pred_test) * 100
print("Accuracy with test data: " + str(acc_test))
print(classification_report(y_test, y_pred_test, target_names = ['Malign', 'Benign']))