{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Digits classification using Random Forest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.decomposition import PCA\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.datasets import load_digits\n",
    "\n",
    "# Step 1: Load the Olivetti Faces dataset\n",
    "digits = load_digits()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# set up the figure\n",
    "fig = plt.figure(figsize=(6, 6))  # figure size in inches\n",
    "fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)\n",
    "\n",
    "# plot the digits: each image is 8x8 pixels\n",
    "for i in range(64):\n",
    "    ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])\n",
    "    ax.imshow(digits.images[i], cmap=plt.cm.binary, interpolation='nearest')\n",
    "    \n",
    "    # label the image with the target value\n",
    "    ax.text(0, 7, str(digits.target[i]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create the Feature Matrix for model training\n",
    "X, y = digits.data, digits.target  # X = Images (flattened), y = Digits ID (target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Images are represented as numpy array\n",
    "X[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "# Step 2: Split the data into training and test sets\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "\n",
    "# Step 3: Train Random Forest without PCA\n",
    "rf_model = RandomForestClassifier(n_estimators=100, random_state=42)\n",
    "rf_model.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Step 4: Predict and Evaluate\n",
    "y_pred = rf_model.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "\n",
    "print(\"\\nClassification Report (Random Forest):\")\n",
    "print(classification_report(y_test, y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay\n",
    "\n",
    "# Print the model preformance as a Confusion Matrix\n",
    "print(\"Confusion Matrix (Random Forest):\")\n",
    "print(confusion_matrix(y_test, y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize the confusion matrix\n",
    "print(\"\\nConfusion Matrix (Random Forest):\")\n",
    "cm = confusion_matrix(y_test, y_pred, labels=rf_model.classes_)\n",
    "\n",
    "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=digits.target_names)\n",
    "disp.plot(cmap=plt.cm.Blues)\n",
    "plt.title(\"Confusion Matrix\")\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Explainability of Random Forest\n",
    "\n",
    "# Extract and plot feature importance\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Get feature importances from the trained Random Forest model\n",
    "importances = rf_model.feature_importances_\n",
    "\n",
    "# Print feature importances\n",
    "print(\"Feature Importances:\")\n",
    "print(importances)\n",
    "\n",
    "# Sort feature importances in descending order\n",
    "indices = np.argsort(importances)[::-1]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Plot feature importances\n",
    "plt.figure(figsize=(10, 5))\n",
    "plt.title(\"Feature Importance in Digits Classification (Random Forest)\")\n",
    "plt.bar(range(20), importances[indices[:20]], align=\"center\")  # Plot top 20 features\n",
    "plt.xticks(range(20), indices[:20], rotation=90)\n",
    "plt.xlabel(\"Feature Index\")\n",
    "plt.ylabel(\"Importance Score\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}