# Using MLFlow to track hyperparameter tuning
In this notebook we will use MFflow to do some hyperparameter tuning with a our previously trained neural network in for the breast cancer dataset.

In [None]:
import numpy as np
import mlflow as ml
import torch
import torch.nn as nn
import torch.nn.functional as F

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from torch.utils.data import Dataset, DataLoader, random_split

In [None]:
# Load the breast cancer dataset as a dataframe
bc_dataset = load_breast_cancer(as_frame=True)

# X is a Pandas dataframe
# The columns are the features 
X = bc_dataset["data"]

# y is a Pandas series with the target class labels (0 - negative, 1 - positive)
y = bc_dataset["target"]

# Using the train_test_split method we split 80% of the data into the X_train, y_train numpy arrays
# The remaining 20% is our X_test and y_test 
X_train, X_test, y_train, y_test = train_test_split(X.to_numpy(), y.to_numpy(), test_size=0.20, random_state=10)

# Create a StandardScaler object
sc = StandardScaler()

# The StandardScaler standardizes features by removing the mean and scaling to unit variance
# Prevents features with larger variances to dominate
# We only need to apply this to our training/testing input data
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [None]:
# Create dataset class
class Data(Dataset):
 def __init__(self, X_train, y_train):
 # need to convert float64 to float32 else 
 # will get the following error
 # RuntimeError: expected scalar type Double but found Float
 self.X = torch.from_numpy(X_train.astype(np.float32))
 # need to convert float64 to Long else 
 # will get the following error
 # RuntimeError: expected scalar type Long but found Float
 # Unsqueeze function needed to 
 self.y = torch.from_numpy(y_train.astype(np.float32)).unsqueeze(1)
 self.len = self.X.shape[0]
 
 def __getitem__(self, index):
 return self.X[index], self.y[index]

 def __len__(self):
 return self.len

# Initialize the training data 
traindata = Data(X_train, y_train)
testdata = Data(X_test, y_test)

val_size = 100
train_size = len(traindata) - val_size

generator = torch.Generator().manual_seed(42)
train_ds, val_ds = random_split(traindata, [train_size, val_size], generator=generator)
print("Length of training dataset: ", len(train_ds))
print("Length of validation dataset: ", len(val_ds))

# Create dataloader

batch_size = 4
trainloader = DataLoader(train_ds, batch_size=batch_size)
validationloader = DataLoader(val_ds, batch_size=batch_size)
testdata = Data(X_test, y_test)
testloader = DataLoader(testdata, batch_size=batch_size)

In [None]:
torch.manual_seed(42)

# Define the Neural Network class
class NeuralNetwork(torch.nn.Module):
 def __init__(self, input_dim, hidden_layer_dim, output_dim):
 super(NeuralNetwork, self).__init__()
 self.linear1 = nn.Linear(input_dim, hidden_layer_dim)
 self.linear2 = nn.Linear(hidden_layer_dim, output_dim)
 
 def forward(self, x):
 x = torch.relu(self.linear1(x))
 x = torch.sigmoid(self.linear2(x))
 return x
 
 def compute_accuracy(self, outputs, labels):
 preds = torch.round(outputs)
 return torch.tensor(torch.sum(preds == labels).item() / len(preds))

 def training_step(self, inputs, labels):
 out = self(inputs) # Generate predictions
 loss = F.binary_cross_entropy(out, labels) # Calculate loss
 return loss
 
 def training_epoch_end(self, outputs):
 batch_losses = [x['train_loss'] for x in outputs]
 epoch_loss = torch.stack(batch_losses).mean() # Combine losses
 batch_accs = [x['train_acc'] for x in outputs]
 epoch_acc = torch.stack(batch_accs).mean() # Combine accuracies
 return {'train_loss': epoch_loss.item(), 'train_acc': epoch_acc.item()} # return a dictionary
 
 def validation_step(self, batch):
 features, labels = batch 
 out = self(features) # Generate predictions
 loss = F.binary_cross_entropy(out, labels) # Calculate loss
 acc = self.compute_accuracy(out, labels) # Calculate accuracy
 return {'val_loss': loss, 'val_acc': acc} # return a dictionary
 
 def validation_epoch_end(self, outputs):
 batch_losses = [x['val_loss'] for x in outputs]
 epoch_loss = torch.stack(batch_losses).mean() # Combine losses
 batch_accs = [x['val_acc'] for x in outputs]
 epoch_acc = torch.stack(batch_accs).mean() # Combine accuracies
 return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()} # return a dictionary
 
 def epoch_end(self, epoch, train_result, val_result):
 print("Epoch [{}], train_loss: {:.4f}, train_acc: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
 epoch, train_result['train_loss'], train_result['train_acc'], val_result['val_loss'], val_result['val_acc']))
 
 def fit(self, epochs, lr, train_loader, val_loader, opt_func):
 history = []
 optimizer = opt_func(self.parameters(), lr)
 for epoch in range(epochs):
 # Training Phase 
 train_results = []
 for batch in train_loader:
 inputs, labels = batch
 loss = self.training_step(inputs, labels)
 loss.backward()
 optimizer.step()
 optimizer.zero_grad()
 out = self(inputs)
 acc = self.compute_accuracy(out, labels)
 train_results.append({'train_loss': loss, 'train_acc': acc})
 train_result = self.training_epoch_end(train_results)
 # Validation phase
 val_results = [self.validation_step(batch) for batch in val_loader]
 val_result = self.validation_epoch_end(val_results)
 result = self.epoch_end(epoch, train_result, val_result)
 history.append(train_result | val_result)
 return history
 
 def score(self, testloader):
 test_batch_accuracies = []
 with torch.no_grad():
 for data in testloader:
 # Get the inputs and labels here
 inputs, labels = data
 # Compute the model output here
 outputs = self(inputs)
 # Use compute accuracy function here
 batch_accuracy = self.compute_accuracy(outputs, labels)
 test_batch_accuracies.append(batch_accuracy)
 test_accuracy = torch.stack(test_batch_accuracies).mean()
 return test_accuracy.item()

In [None]:
# Set the input layer, hidden layer, and output layer dimensions
input_dim = X_train.shape[1]
hidden_layer_dim = 4
output_dim = 1

In [None]:
import mlflow.pytorch
epochs = 10

lrs = [0.1, 0.01, 0.001, 0.0001]

opt_func = torch.optim.SGD

for lr in lrs:
 with ml.start_run():
 clf = NeuralNetwork(input_dim, hidden_layer_dim, output_dim)
 print("Training model with lr: ", lr)
 history = clf.fit(epochs, lr, trainloader, validationloader, opt_func)
 score = clf.score(testloader)
 ml.log_param("lr", lr)
 ml.log_metric("train acc", history[-1]["train_acc"])
 ml.log_metric("val acc", history[-1]["val_acc"])
 ml.log_metric("test acc", score)
 mlflow.pytorch.log_model(clf, "model")