# Digits classification using Random Forest

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA


In [None]:
from sklearn.datasets import load_digits

# Step 1: Load the Olivetti Faces dataset
digits = load_digits()


In [None]:
# set up the figure
fig = plt.figure(figsize=(6, 6)) # figure size in inches
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)

# plot the digits: each image is 8x8 pixels
for i in range(64):
 ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])
 ax.imshow(digits.images[i], cmap=plt.cm.binary, interpolation='nearest')
 
 # label the image with the target value
 ax.text(0, 7, str(digits.target[i]))

In [None]:
# Create the Feature Matrix for model training
X, y = digits.data, digits.target # X = Images (flattened), y = Digits ID (target)

In [None]:
# Images are represented as numpy array
X[0]

In [None]:

from sklearn.model_selection import train_test_split

# Step 2: Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Step 3: Train Random Forest without PCA
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [None]:

# Step 4: Predict and Evaluate
y_pred = rf_model.predict(X_test)

In [None]:
from sklearn.metrics import classification_report

print("\nClassification Report (Random Forest):")
print(classification_report(y_test, y_pred))

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Print the model preformance as a Confusion Matrix
print("Confusion Matrix (Random Forest):")
print(confusion_matrix(y_test, y_pred))

In [None]:
# Visualize the confusion matrix
print("\nConfusion Matrix (Random Forest):")
cm = confusion_matrix(y_test, y_pred, labels=rf_model.classes_)

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=digits.target_names)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix")
plt.show()


In [None]:
# Explainability of Random Forest

# Extract and plot feature importance
import numpy as np
import matplotlib.pyplot as plt

# Get feature importances from the trained Random Forest model
importances = rf_model.feature_importances_

# Print feature importances
print("Feature Importances:")
print(importances)

# Sort feature importances in descending order
indices = np.argsort(importances)[::-1]


In [None]:

# Plot feature importances
plt.figure(figsize=(10, 5))
plt.title("Feature Importance in Digits Classification (Random Forest)")
plt.bar(range(20), importances[indices[:20]], align="center") # Plot top 20 features
plt.xticks(range(20), indices[:20], rotation=90)
plt.xlabel("Feature Index")
plt.ylabel("Importance Score")
plt.show()