{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "cbfa01ce",
   "metadata": {},
   "source": [
    "# Neural Networks for Fashion MNIST on GPUS\n",
    "\n",
    "In this notebook we will see how to create a neural network to build a model for multiclass classification. We will train our model on a GPU.\n",
    "\n",
    "We will use\n",
    "- Pytorch\n",
    "- The `torchvision.datasets` submodule to download the Fashion MNIST dataset\n",
    "- Split the dataset into a train and validation set using `random_split` from the `torch.utils.data` submodule\n",
    "- We will create our own\n",
    "    - Dataloader classes\n",
    "    - Define our own nn.Module class called MultiClassifier\n",
    "    - compute_accuracy function\n",
    "- We will build class member functions to help us compute the validation accuracies\n",
    "- We will load our data and model onto the GPU\n",
    "- We will see how to train the model by\n",
    "    - Setting the epochs\n",
    "    - Computing the loss\n",
    "    - Calling the .backward() method to compute the gradients of the weights\n",
    "- Compute the accuracy of our model on the test set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3ccdb3b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "# torch imports\n",
    "import torch\n",
    "import torchvision\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "\n",
    "# torch.utils.data imports\n",
    "from torch.utils.data import Dataset, DataLoader, random_split\n",
    "\n",
    "# torchvision imports\n",
    "from torchvision.datasets import FashionMNIST\n",
    "from torchvision.transforms import ToTensor\n",
    "from torchvision.utils import make_grid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b76be57",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Download the Fashion MNIST dataset\n",
    "# The FashionMNIST function returns an Object that is a Dataset class\n",
    "# dataset is our training data\n",
    "dataset = FashionMNIST(root='fmnist/', download=True, transform=ToTensor())\n",
    "# tes_dataset is our test data\n",
    "test_dataset = FashionMNIST(root='fmnist/', train=False, transform=ToTensor())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d7eaa736",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the size of the training data\n",
    "dataset_size = len(dataset)\n",
    "print(\"Training set size: \", dataset_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "50a0c4d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the size of the test data\n",
    "test_dataset_size = len(test_dataset)\n",
    "print(\"Testing set size: \", test_dataset_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a28138ce",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the classes in the training dataset\n",
    "classes = dataset.classes\n",
    "print(\"Classes in the dataset \", classes)\n",
    "print(\"Number of classes: \", len(classes))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "473efb77",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Get the first image and label in the training dataset\n",
    "img, label = dataset[0]\n",
    "print(\"Image shape: \", img.shape )\n",
    "print(\"Label type: \", label)\n",
    "# The shape is (1, 28, 28)\n",
    "# 1 indicates grayscale \n",
    "# 28 row pixels\n",
    "# 28 column pixels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63879c96",
   "metadata": {},
   "outputs": [],
   "source": [
    "# The image is already normalized\n",
    "print(img)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4c9a9cbf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot the img\n",
    "# We permute the indices so that it is (pixel rows, pixel columns, scale)\n",
    "plt.imshow(img.permute((1, 2, 0)), cmap='gist_gray')\n",
    "print('Label (numeric):', label)\n",
    "print('Label (textual):', classes[label])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "106a1c19",
   "metadata": {},
   "outputs": [],
   "source": [
    "val_size = 10000\n",
    "train_size = len(dataset) - val_size\n",
    "\n",
    "generator = torch.Generator().manual_seed(42)\n",
    "train_ds, val_ds = random_split(dataset, [train_size, val_size], generator=generator)\n",
    "print(\"Length of training dataset: \", len(train_ds))\n",
    "print(\"Length of validation dataset: \", len(val_ds))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5808fe17",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create our train, validation, and test data loaders\n",
    "# We don't have to create dataset classes because torchvision handles this for us\n",
    "batch_size=128\n",
    "train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4)\n",
    "val_loader = DataLoader(val_ds, batch_size*2, num_workers=4)\n",
    "test_loader = DataLoader(test_dataset, batch_size*2, num_workers=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4439ec36",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot the images in a grid\n",
    "for images, _ in train_loader:\n",
    "    print('images.shape:', images.shape)\n",
    "    plt.figure(figsize=(16,8))\n",
    "    plt.axis('off')\n",
    "    plt.imshow(make_grid(images, nrow=16).permute((1, 2, 0)))\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f5cd936c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a fucnction that computes the accuracy given outputs and labels \n",
    "def compute_accuracy(outputs, labels):\n",
    "    _, preds = torch.max(outputs, dim=1)\n",
    "    return torch.tensor(torch.sum(preds == labels).item() / len(preds))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2d61783",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define our neural network architecture here\n",
    "# Note the init method takes an input and output size\n",
    "class MultiClassifier(nn.Module):\n",
    "    def __init__(self, in_size, out_size):\n",
    "        super(MultiClassifier, self).__init__()\n",
    "        # Hidden layer 1\n",
    "        self.layer1 = nn.Linear(in_size, 16)\n",
    "        # Hidden layer 2\n",
    "        self.layer2 = nn.Linear(16, 32)\n",
    "        # Output layer\n",
    "        self.output = nn.Linear(32, out_size)\n",
    "        \n",
    "    def forward(self, x):\n",
    "        x = torch.flatten(x, start_dim=1) # Turn image to vector\n",
    "        x = F.relu(self.layer1(x))\n",
    "        x = F.relu(self.layer2(x))\n",
    "        x = F.softmax(self.output(x), dim=1)\n",
    "        return x\n",
    "    \n",
    "    \n",
    "    # Define a member function to compute the loss for 1 training step\n",
    "    def training_step(self, batch):\n",
    "        images, labels = batch \n",
    "        out = self(images)                  # Generate predictions\n",
    "        loss = F.cross_entropy(out, labels) # Calculate loss\n",
    "        return loss\n",
    "    \n",
    "    # Define a member functio nto compute the loss and accuracy of a validation step\n",
    "    def validation_step(self, batch):\n",
    "        images, labels = batch \n",
    "        out = self(images)                    # Generate predictions\n",
    "        loss = F.cross_entropy(out, labels)   # Calculate loss\n",
    "        acc = compute_accuracy(out, labels)           # Calculate accuracy\n",
    "        return {'val_loss': loss, 'val_acc': acc} # return a dictionary\n",
    "    \n",
    "    # Define a member function to compute the average batch loss and accuracy for the validation\n",
    "    def validation_epoch_end(self, outputs):\n",
    "        batch_losses = [x['val_loss'] for x in outputs]\n",
    "        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses\n",
    "        batch_accs = [x['val_acc'] for x in outputs]\n",
    "        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies\n",
    "        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()} # return a dictionary\n",
    "    \n",
    "    # Define a member function to pring out the validation loss and validation accuracy\n",
    "    def epoch_end(self, epoch, result):\n",
    "        print(\"Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}\".format(epoch, result['val_loss'], result['val_acc']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "be669993",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check if cuda is available\n",
    "torch.cuda.is_available()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f30ca983",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to set default device\n",
    "def get_default_device():\n",
    "    \"\"\"Pick GPU if available, else CPU\"\"\"\n",
    "    if torch.cuda.is_available():\n",
    "        return torch.device('cuda')\n",
    "    else:\n",
    "        return torch.device('cpu')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "856c6e90",
   "metadata": {},
   "outputs": [],
   "source": [
    "device = get_default_device()\n",
    "print(\"The default device is: \", device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4b35bd52",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to move data (e.g. tensors or model) to GPU (CUDA) device\n",
    "def to_device(data, device):\n",
    "    \"\"\"Move tensor(s) to chosen device\"\"\"\n",
    "    if isinstance(data, (list, tuple)):\n",
    "        return [to_device(x, device) for x in data]\n",
    "    return data.to(device, non_blocking=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d28bf244",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Class to move data to GPU (CUDA) device\n",
    "class DeviceDataLoader():\n",
    "    \"\"\"Wrap a dataloader to move data to a device\"\"\"\n",
    "    def __init__(self, dl, device):\n",
    "        self.dl = dl\n",
    "        self.device = device\n",
    "        \n",
    "    def __iter__(self):\n",
    "        \"\"\"Yield a batch of data after moving it to device\"\"\"\n",
    "        for b in self.dl: \n",
    "            yield to_device(b, self.device)\n",
    "\n",
    "    def __len__(self):\n",
    "        \"\"\"Number of batches\"\"\"\n",
    "        return len(self.dl)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eb2e2471",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set the three data loaders\n",
    "train_loader = DeviceDataLoader(train_loader, device)\n",
    "val_loader = DeviceDataLoader(val_loader, device)\n",
    "test_loader = DeviceDataLoader(test_loader, device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ad72a8bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define a function to compute validation data\n",
    "def evaluate(model, val_loader):\n",
    "    outputs = [model.validation_step(batch) for batch in val_loader]\n",
    "    return model.validation_epoch_end(outputs)\n",
    "\n",
    "# Define a function to fit the model\n",
    "# This wraps a function  call around the loop\n",
    "def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):\n",
    "    history = []\n",
    "    optimizer = opt_func(model.parameters(), lr)\n",
    "    for epoch in range(epochs):\n",
    "        # Training Phase \n",
    "        for batch in train_loader:\n",
    "            loss = model.training_step(batch)\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "            optimizer.zero_grad()\n",
    "        # Validation phase\n",
    "        result = evaluate(model, val_loader)\n",
    "        model.epoch_end(epoch, result)\n",
    "        history.append(result)\n",
    "    return history"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "35c58d97",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set the input lyaer and output layer dimensions\n",
    "input_size = img.size(1)*img.size(2)\n",
    "num_classes = len(classes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba27e01f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Instantiate the Neural Network class and send it to the GPU\n",
    "mclf = MultiClassifier(input_size, out_size=num_classes)\n",
    "to_device(mclf, device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c424ca67",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Evaluate the model before training\n",
    "history = [evaluate(mclf, val_loader)]\n",
    "history"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9f7961dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Train the model for 5 epochs\n",
    "history += fit(5, 0.5, mclf, train_loader, val_loader)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8fdb464f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compute the accuracy on the test set\n",
    "test_batch_accuracies =[]\n",
    "with torch.no_grad():\n",
    "    for data in test_loader:\n",
    "        # Get the inputs and labels here\n",
    "        inputs, labels = data\n",
    "        # Compute the model output here\n",
    "        outputs = mclf(inputs)\n",
    "        # Use compute accuracy function here\n",
    "        batch_accuracy = compute_accuracy(outputs, labels)\n",
    "        test_batch_accuracies.append(batch_accuracy)\n",
    "        \n",
    "test_accuracy = torch.stack(test_batch_accuracies).mean()\n",
    "print(\"Average test set accuracy: \", test_accuracy.item())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99b65869",
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_test_accuracy(model, test_loader):\n",
    "    test_batch_accuracies =[]\n",
    "    with torch.no_grad():\n",
    "        for data in test_loader:\n",
    "            # Get the inputs and labels here\n",
    "            inputs, labels = data\n",
    "            # Compute the model output here\n",
    "            outputs = model(inputs)\n",
    "            # Use compute accuracy function here\n",
    "            batch_accuracy = compute_accuracy(outputs, labels)\n",
    "            test_batch_accuracies.append(batch_accuracy)\n",
    "        test_accuracy = torch.stack(test_batch_accuracies).mean()\n",
    "    return test_accuracy.item()\n",
    "\n",
    "acc = compute_test_accuracy(mclf, test_loader)\n",
    "print(\"Average test set accuracy: \", acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c63d7e0",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}