{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "cbbe4846",
   "metadata": {},
   "source": [
    "# Homework Pytorch Deep Learning Part 1\n",
    "The homework consists of two parts\n",
    "1. Implementing gradient descent for a logistic regression model implemented in Pytorch \n",
    "2. Creating a neural network classifier with 2 layers to compare against the logistic regression model \n",
    "\n",
    "We will again work with the `scikit-learn` breast cancer dataset.\n",
    "\n",
    "If you run this notebook on your own machine you need to have Pytorch installed.\n",
    "\n",
    "If you run this notebook on the SCC you can load the lateset Pytorch module."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "90bb0f27",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Below are some of the imports needed for the homework\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "\n",
    "from sklearn.datasets import load_breast_cancer\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "from torch.utils.data import Dataset, DataLoader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a823ef2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the breast cancer dataset as a dataframe\n",
    "data = load_breast_cancer(as_frame=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c2d0ded",
   "metadata": {},
   "outputs": [],
   "source": [
    "##################\n",
    "### Question 1 ###\n",
    "##################\n",
    "# Import the test_train_split function \n",
    "# Split the dataset so that 75% of the dataset is for training and 25% is for testing \n",
    "# Use the StandardScaler to scale the data\n",
    "# Set the random state to 42\n",
    "# 1 point\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "90f893a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "##################\n",
    "### Question 2 ###\n",
    "##################\n",
    "# Implement the Dataset class below and then instantiate\n",
    "# the traindata and a testdata objects using this class\n",
    "# Print out the features and label of the second element in traindata\n",
    "# 1 point for printing out the correct feature\n",
    "# 1 point for printing ou the correct target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c94fa97",
   "metadata": {},
   "outputs": [],
   "source": [
    "##################\n",
    "### Question 3 ###\n",
    "##################\n",
    "# Using a batch size of 8 instantiate a train and test Dataloader class\n",
    "# 1 point \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1e776057",
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.manual_seed(42) # DO NOT CHANGE THIS SEED\n",
    "##################\n",
    "### Question 4 ###\n",
    "##################\n",
    "# Implement a nn.module subclass called LogisticRegression\n",
    "# Define a nn.Linear layer with the appropriate input and output dimensions\n",
    "# Apply the sigmoid function to the output of the nn.Linear layer in the forward method\n",
    "# 1 point for setting the layer correctly in init\n",
    "# 1 point for setting the forward propagation correctly with sigmoid function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "717ddb81",
   "metadata": {},
   "outputs": [],
   "source": [
    "##################\n",
    "### Question 5 ###\n",
    "##################\n",
    "# Create a LogisticRegression object called logr\n",
    "# Print the parameters\n",
    "# 1 point"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4dde514",
   "metadata": {},
   "outputs": [],
   "source": [
    "##################\n",
    "### Question 6 ###\n",
    "##################\n",
    "# Fill in the code below so that \n",
    "# the weights and the biases of the linear layer in the logr class\n",
    "# are updated using gradient descent with a learning rate lr=0.001\n",
    "# Print the losses out at each epoch\n",
    "# Save the accuracies and losses in a list and plot the accuracies and losses against the epochs\n",
    "# 1 point for correct learning rate\n",
    "# 1 point for implementing correct gradient descent\n",
    "# 1 point for printing correct losses\n",
    "# 1 point for correct plots\n",
    "\n",
    "criterion = nn.BCELoss()\n",
    "\n",
    "epochs=20\n",
    "losses = []\n",
    "accuracies = []\n",
    "correct, total = 0, 0\n",
    "\n",
    "# Set the learning rate here\n",
    "lr = pass\n",
    "\n",
    "for epoch in range(epochs):\n",
    "    for i, data in enumerate(trainloader, 0):\n",
    "        inputs, targets = data\n",
    "        \n",
    "        outputs = logr(inputs)\n",
    "        \n",
    "        # YOUR CODE GOES HERE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "012458a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "##################\n",
    "### Question 7 ###\n",
    "##################\n",
    "# Print the accuracy of this model on the test set\n",
    "# 1 point for implementing loop to calculate accuracy on test set\n",
    "# 1 point for correct accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "16bb0c7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.manual_seed(42) # DO NOT CHANGE THIS\n",
    "##################\n",
    "### Question 8 ###\n",
    "##################\n",
    "# Create a NeuralNetwork class with the following architecture\n",
    "# Input layer - you will need to determine the input size from the dataset\n",
    "# Hidden layer 1 - 20 neurons with RELU activation\n",
    "# Hidden layer 2 - 10 neurons with Tanh activation\n",
    "# Hidden layer 3 - 5 neuron with RELU activation\n",
    "# Output layer - Use the softmax activation function\n",
    "# Instantiate the neural network class with the name neural_network\n",
    "# Print the parameters of your network\n",
    "# 1 point for setting all the dimensions correctly in init\n",
    "# 1 point for setting the correct activation functions in forward method\n",
    "# 1 point for printing out the parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1988bd93",
   "metadata": {},
   "outputs": [],
   "source": [
    "##################\n",
    "### Question 9 ###\n",
    "##################\n",
    "# Train your neural network using the Adams optimizer\n",
    "# Use 10 epochs to train\n",
    "# Set the parameters for the Adams optimizer\n",
    "# learning rate = 0.01\n",
    "# betas = (0.8, 0.98)\n",
    "# Print out the loss and accuracy at each epoch\n",
    "# 1 point for setting epochs correctly\n",
    "# 1 point for setting the optimizer correctly\n",
    "# 4 points for implementing the training loop correctly\n",
    "#    - 1 for outputs, \n",
    "#    - 1 for loss, \n",
    "#    - 1 for getting prediction, total and correct, \n",
    "#    - 1 for backprop\n",
    "# 1 point for correct values printed during the training \n",
    "\n",
    "losses = []\n",
    "accuracies = []\n",
    "\n",
    "# Define the loss function\n",
    "criterion = nn.BCELoss()\n",
    "\n",
    "# Define the Adams optimizer\n",
    "optimizer = pass\n",
    "\n",
    "epochs = pass\n",
    "correct, total = 0, 0\n",
    "losses = []\n",
    "accuracies = []\n",
    "for epoch in range(epochs):\n",
    "    for i, data in enumerate(trainloader, 0):\n",
    "    # YOUR CODE GOES HERE\n",
    "    \n",
    "    print(\"epoch {}  loss : {:.5f}  accuracy : {:.5f}\".format(epoch, loss, acc))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c32f4353",
   "metadata": {},
   "outputs": [],
   "source": [
    "###################\n",
    "### Question 10 ###\n",
    "###################\n",
    "# Print the accuracy on the test set\n",
    "# Use scikit learn classification_report to evaulate the model performance\n",
    "# 1 point for filling in outputs correctly\n",
    "# 1 point for filling in predicted correctly\n",
    "# 1 point for correct classification_report\n",
    "\n",
    "from sklearn.metrics import classification_report\n",
    "\n",
    "predictions = []\n",
    "targets = []\n",
    "\n",
    "correct, total = 0, 0\n",
    "# No need to calculate gradients during inference\n",
    "with torch.no_grad():\n",
    "    for data in testloader:\n",
    "        inputs, labels = data\n",
    "        \n",
    "        # FILL IN THE CODE BELOW\n",
    "        outputs = pass \n",
    "        predicted = pass\n",
    "        \n",
    "        # This create a long list\n",
    "        predictions.extend(predicted.flatten().tolist())\n",
    "        targets.extend(labels.flatten().tolist())\n",
    "        \n",
    "        # CODE TO COMPUTE ACCURACY GOES HERE\n",
    "\n",
    "    print(f'Accuracy of the network on the {len(testdata)} test data: {100 * correct // total} %')\n",
    "\n",
    "# Add classification report here using predictions and targets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "92535f0b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}