Distinguish your Own digits
Distinguish between 3 and 8 handwritten digits
%load_ext autoreload
%autoreload 2
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
incase of using mnist for the first time use this command to install mnist from the notebook - !pip install mnist
Incase to get it from command line use pip install mnist
import mnist
train_images = mnist.train_images()
train_labels = mnist.train_labels()
train_images.shape, train_labels.shape
test_images = mnist.test_images()
test_labels = mnist.test_labels()
test_images.shape, test_labels.shape
image_index = 7777 # You may select anything up to 60,000
print(train_labels[image_index])
plt.imshow(train_images[image_index], cmap='Greys')
train_filter = np.where((train_labels == 3 ) | (train_labels == 8))
test_filter = np.where((test_labels == 3) | (test_labels == 8))
X_train, y_train = train_images[train_filter], train_labels[train_filter]
X_test, y_test = test_images[test_filter], test_labels[test_filter]
X_train = X_train/255. #normalising the pixel values in the 0 to 1 range
X_test = X_test/255.
y_train = 1*(y_train==3)
y_test = 1*(y_test==3) #y = 0 for 8 and y = 1 for 3
X_train.shape, X_test.shape
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
X_train.shape, X_test.shape
Building a classifier from kudzu, And Kudzu is a library made to create a neural network models and it is included in the' _notebooks' folder of this github repo
from kudzu.model import Model
from kudzu.layer import Affine, Sigmoid, Relu
from kudzu.loss import MSE
from kudzu.optim import GD
from kudzu.data import Data, Dataloader, Sampler
from kudzu.train import Learner
class Config:
pass
config = Config()
config.lr = 0.001
config.num_epochs = 200
config.bs = 50
data = Data(X_train,y_train.reshape(-1,1))
loss = MSE()
opt = GD(config.lr)
sampler = Sampler(data, config.bs, shuffle = True)
dl = Dataloader(data,sampler)
train_x = X_train
test_x = X_test
train_y = y_train.reshape(-1,1)
test_y = y_test.reshape(-1,1)
layers = [Affine("first",784,100),Relu("first"),Affine("second",100,100),Relu("second"),Affine("third",100,2),Affine("final",2,1),Sigmoid("final")]
model_nn = Model(layers)
Created a subclass "ClfCallback" of "Callback" class in callbacks.py and import in it
from kudzu.callbacks import ClfCallback
learner_nn = Learner(loss, model_nn, opt, config.num_epochs)
acc_nn = ClfCallback(learner_nn,config.bs,train_x,test_x,train_y,test_y)
learner_nn.set_callbacks([acc_nn])
learner_nn.train_loop(dl)
plt.plot(acc_nn.accuracies,label = "Train Accuracies")
plt.plot(acc_nn.test_accuracies,"r-",label = "Test Accuracies")
# plt.ylim(0.6,1)
plt.title("Classification by NN")
plt.legend(loc = "lower right")
#hints taken from TA sessions
#taking upto 3 three layers for visualisation
model_vis = Model(layers[:-2])
The intuition behind these two graphs(one with low alpha and other one with high) is to show the misclassified points of both the classes clearly
vis = model_vis(test_x)
plt.figure(figsize =(16,7))
plt.subplot(1,2,1)
s1 = plt.scatter(vis[:,0],vis[:,1],alpha = 0.1,c = test_y.ravel(),cmap = 'plasma');
plt.legend(*s1.legend_elements(),loc="upper left", title="Classes")
plt.subplot(1,2,2)
s2 = plt.scatter(vis[:,0],vis[:,1],c = test_y.ravel(),cmap = "plasma");
plt.legend(*s2.legend_elements(),loc="upper left", title="Classes")
#taking last two layers for probabilities
model_prob = Model(layers[-2:])
xgrid = np.linspace(-4, 1, 100)
ygrid = np.linspace(-7.5, 7.5, 100)
xg, yg = np.meshgrid(xgrid, ygrid)
# xg and yg are now both 100X100 -> we need to conver them to single arrays
xg_interim = np.ravel(xg)
yg_interim = np.ravel(yg)
# xg_interim, yg_interim are now arrays of len 10000, now we will stack them and then transpose to get desired shape of n rows, 2 columns
X_interim = np.vstack((xg_interim, yg_interim))
X = X_interim.T
# We want a shape of n rows and 2 columns in order to be able to feed this to last affine
# This last affine takes only two columns, hence the above transformation
probability_contour = model_prob(X).reshape(100,100)
plt.figure(figsize=(12,14))
plt.subplot(2,1,1)
x1 = plt.scatter(vis[:,0], vis[:,1],alpha = 0.1,c = y_test.ravel(),cmap = "plasma")
plt.legend(*x1.legend_elements(),loc="upper right", title="Classes")
contours = plt.contour(xg,yg,probability_contour)
plt.clabel(contours, inline = True );
plt.title("Probability Contour that distinguishes one data class from the other");
plt.subplot(2,1,2)
x2 = plt.scatter(vis[:,0], vis[:,1], c = y_test.ravel(),cmap = "plasma")
plt.legend(*x2.legend_elements(),loc="upper right", title="Classes")
contours = plt.contour(xg,yg,probability_contour)
plt.clabel(contours, inline = True );
plt.title("Probability Contour that distinguishes one data class from the other");
layers_logreg = [Affine("first",784,1),Sigmoid("first")]
model_logreg = Model(layers_logreg)
learner_logreg = Learner(loss, model_logreg, opt, config.num_epochs)
acc_logreg = ClfCallback(learner_logreg,config.bs,train_x,test_x,train_y,test_y)
learner_logreg.set_callbacks([acc_logreg])
learner_logreg.train_loop(dl)
plt.plot(acc_logreg.accuracies,label = "Train Accuracies")
plt.plot(acc_logreg.test_accuracies,"r-",label = "Test Accuracies")
plt.ylim(0.6,1)
plt.title("Classification by Logistic Regression")
plt.legend(loc = "lower right")
plt.figure(figsize = (12,5))
plt.subplot(1,2,1)
plt.plot(acc_nn.accuracies,label = "Train Accuracies")
plt.plot(acc_nn.test_accuracies,"r-",label = "Test Accuracies")
plt.ylim(0.6,1)
plt.title("Classification by NN")
plt.legend(loc = "lower right")
plt.subplot(1,2,2)
plt.plot(acc_logreg.accuracies,label = "Train Accuracies")
plt.plot(acc_logreg.test_accuracies,"r-",label = "Test Accuracies")
plt.ylim(0.6,1)
plt.title("Classification by Logistic Regression")
plt.legend(loc = "lower right")
From the graphs we can see that NN gives a better accurate classifier than logistic regression but looking at the NN graph,it seems to suffer from overfitting.
acc_nn.test_accuracies[-1],acc_logreg.test_accuracies[-1]
From the above accuracies we can say that;
- In neural network model, we get around 51 false positives and false negatives out of 1984
- In logistic regression model, we get around 70 false positives and false negatives out of 1984