# Neural Networks for Fashion MNIST on GPUS

In this notebook we will see how to create a neural network to build a model for multiclass classification. We will train our model on a GPU.

We will use
- Pytorch
- The `torchvision.datasets` submodule to download the Fashion MNIST dataset
- Split the dataset into a train and validation set using `random_split` from the `torch.utils.data` submodule
- We will create our own
 - Dataloader classes
 - Define our own nn.Module class called MultiClassifier
 - compute_accuracy function
- We will build class member functions to help us compute the validation accuracies
- We will load our data and model onto the GPU
- We will see how to train the model by
 - Setting the epochs
 - Computing the loss
 - Calling the .backward() method to compute the gradients of the weights
- Compute the accuracy of our model on the test set

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# torch imports
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F

# torch.utils.data imports
from torch.utils.data import Dataset, DataLoader, random_split

# torchvision imports
from torchvision.datasets import FashionMNIST
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid

In [None]:
# Download the Fashion MNIST dataset
# The FashionMNIST function returns an Object that is a Dataset class
# dataset is our training data
dataset = FashionMNIST(root='fmnist/', download=True, transform=ToTensor())
# tes_dataset is our test data
test_dataset = FashionMNIST(root='fmnist/', train=False, transform=ToTensor())

In [None]:
# Get the size of the training data
dataset_size = len(dataset)
print("Training set size: ", dataset_size)

In [None]:
# Get the size of the test data
test_dataset_size = len(test_dataset)
print("Testing set size: ", test_dataset_size)

In [None]:
# Get the classes in the training dataset
classes = dataset.classes
print("Classes in the dataset ", classes)
print("Number of classes: ", len(classes))

In [None]:
# Get the first image and label in the training dataset
img, label = dataset[0]
print("Image shape: ", img.shape )
print("Label type: ", label)
# The shape is (1, 28, 28)
# 1 indicates grayscale 
# 28 row pixels
# 28 column pixels

In [None]:
# The image is already normalized
print(img)

In [None]:
# Plot the img
# We permute the indices so that it is (pixel rows, pixel columns, scale)
plt.imshow(img.permute((1, 2, 0)), cmap='gist_gray')
print('Label (numeric):', label)
print('Label (textual):', classes[label])

In [None]:
val_size = 10000
train_size = len(dataset) - val_size

generator = torch.Generator().manual_seed(42)
train_ds, val_ds = random_split(dataset, [train_size, val_size], generator=generator)
print("Length of training dataset: ", len(train_ds))
print("Length of validation dataset: ", len(val_ds))

In [None]:
# Create our train, validation, and test data loaders
# We don't have to create dataset classes because torchvision handles this for us
batch_size=128
train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_ds, batch_size*2, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size*2, num_workers=4)

In [None]:
# Plot the images in a grid
for images, _ in train_loader:
 print('images.shape:', images.shape)
 plt.figure(figsize=(16,8))
 plt.axis('off')
 plt.imshow(make_grid(images, nrow=16).permute((1, 2, 0)))
 break

In [None]:
# Create a fucnction that computes the accuracy given outputs and labels 
def compute_accuracy(outputs, labels):
 _, preds = torch.max(outputs, dim=1)
 return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [None]:
# Define our neural network architecture here
# Note the init method takes an input and output size
class MultiClassifier(nn.Module):
 def __init__(self, in_size, out_size):
 super(MultiClassifier, self).__init__()
 # Hidden layer 1
 self.layer1 = nn.Linear(in_size, 16)
 # Hidden layer 2
 self.layer2 = nn.Linear(16, 32)
 # Output layer
 self.output = nn.Linear(32, out_size)
 
 def forward(self, x):
 x = torch.flatten(x, start_dim=1) # Turn image to vector
 x = F.relu(self.layer1(x))
 x = F.relu(self.layer2(x))
 x = F.softmax(self.output(x), dim=1)
 return x
 
 
 # Define a member function to compute the loss for 1 training step
 def training_step(self, batch):
 images, labels = batch 
 out = self(images) # Generate predictions
 loss = F.cross_entropy(out, labels) # Calculate loss
 return loss
 
 # Define a member functio nto compute the loss and accuracy of a validation step
 def validation_step(self, batch):
 images, labels = batch 
 out = self(images) # Generate predictions
 loss = F.cross_entropy(out, labels) # Calculate loss
 acc = compute_accuracy(out, labels) # Calculate accuracy
 return {'val_loss': loss, 'val_acc': acc} # return a dictionary
 
 # Define a member function to compute the average batch loss and accuracy for the validation
 def validation_epoch_end(self, outputs):
 batch_losses = [x['val_loss'] for x in outputs]
 epoch_loss = torch.stack(batch_losses).mean() # Combine losses
 batch_accs = [x['val_acc'] for x in outputs]
 epoch_acc = torch.stack(batch_accs).mean() # Combine accuracies
 return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()} # return a dictionary
 
 # Define a member function to pring out the validation loss and validation accuracy
 def epoch_end(self, epoch, result):
 print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch, result['val_loss'], result['val_acc']))

In [None]:
# Check if cuda is available
torch.cuda.is_available()

In [None]:
# Function to set default device
def get_default_device():
 """Pick GPU if available, else CPU"""
 if torch.cuda.is_available():
 return torch.device('cuda')
 else:
 return torch.device('cpu')

In [None]:
device = get_default_device()
print("The default device is: ", device)

In [None]:
# Function to move data (e.g. tensors or model) to GPU (CUDA) device
def to_device(data, device):
 """Move tensor(s) to chosen device"""
 if isinstance(data, (list, tuple)):
 return [to_device(x, device) for x in data]
 return data.to(device, non_blocking=True)

In [None]:
# Class to move data to GPU (CUDA) device
class DeviceDataLoader():
 """Wrap a dataloader to move data to a device"""
 def __init__(self, dl, device):
 self.dl = dl
 self.device = device
 
 def __iter__(self):
 """Yield a batch of data after moving it to device"""
 for b in self.dl: 
 yield to_device(b, self.device)

 def __len__(self):
 """Number of batches"""
 return len(self.dl)

In [None]:
# Set the three data loaders
train_loader = DeviceDataLoader(train_loader, device)
val_loader = DeviceDataLoader(val_loader, device)
test_loader = DeviceDataLoader(test_loader, device)

In [None]:
# Define a function to compute validation data
def evaluate(model, val_loader):
 outputs = [model.validation_step(batch) for batch in val_loader]
 return model.validation_epoch_end(outputs)

# Define a function to fit the model
# This wraps a function call around the loop
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
 history = []
 optimizer = opt_func(model.parameters(), lr)
 for epoch in range(epochs):
 # Training Phase 
 for batch in train_loader:
 loss = model.training_step(batch)
 loss.backward()
 optimizer.step()
 optimizer.zero_grad()
 # Validation phase
 result = evaluate(model, val_loader)
 model.epoch_end(epoch, result)
 history.append(result)
 return history

In [None]:
# Set the input lyaer and output layer dimensions
input_size = img.size(1)*img.size(2)
num_classes = len(classes)

In [None]:
# Instantiate the Neural Network class and send it to the GPU
mclf = MultiClassifier(input_size, out_size=num_classes)
to_device(mclf, device)

In [None]:
#Evaluate the model before training
history = [evaluate(mclf, val_loader)]
history

In [None]:
# Train the model for 5 epochs
history += fit(5, 0.5, mclf, train_loader, val_loader)

In [None]:
# Compute the accuracy on the test set
test_batch_accuracies =[]
with torch.no_grad():
 for data in test_loader:
 # Get the inputs and labels here
 inputs, labels = data
 # Compute the model output here
 outputs = mclf(inputs)
 # Use compute accuracy function here
 batch_accuracy = compute_accuracy(outputs, labels)
 test_batch_accuracies.append(batch_accuracy)
 
test_accuracy = torch.stack(test_batch_accuracies).mean()
print("Average test set accuracy: ", test_accuracy.item())

In [None]:
def compute_test_accuracy(model, test_loader):
 test_batch_accuracies =[]
 with torch.no_grad():
 for data in test_loader:
 # Get the inputs and labels here
 inputs, labels = data
 # Compute the model output here
 outputs = model(inputs)
 # Use compute accuracy function here
 batch_accuracy = compute_accuracy(outputs, labels)
 test_batch_accuracies.append(batch_accuracy)
 test_accuracy = torch.stack(test_batch_accuracies).mean()
 return test_accuracy.item()

acc = compute_test_accuracy(mclf, test_loader)
print("Average test set accuracy: ", acc)