{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Face Detection using KMeans Clustering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.datasets import fetch_olivetti_faces\n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn.decomposition import PCA\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score, adjusted_rand_score\n",
    "from scipy.stats import mode\n",
    "\n",
    "# Load the Olivetti Faces dataset\n",
    "faces = fetch_olivetti_faces(shuffle=True, random_state=42)\n",
    "X, y = faces.data, faces.target  # X = Flattened images, y = Person ID (target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "faces.data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Split the dataset into training and testing sets\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Apply PCA for Dimensionality Reduction (Optional for faster computation)\n",
    "pca = PCA(n_components=100)  # Reduce to 100 principal components\n",
    "X_train_pca = pca.fit_transform(X_train)\n",
    "X_test_pca = pca.transform(X_test)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Train K-Means Clustering on Training Data\n",
    "k = 40  # Assuming we have 40 individuals in the dataset\n",
    "kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)\n",
    "kmeans.fit(X_train_pca)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Predict Clusters for Test Set\n",
    "y_pred_clusters = kmeans.predict(X_test_pca)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.stats import mode\n",
    "\n",
    "def map_clusters_to_labels(y_true, clusters):\n",
    "    labels = np.zeros_like(clusters)\n",
    "    for cluster in np.unique(clusters):\n",
    "        mask = (clusters == cluster)\n",
    "        most_common_label = mode(y_true[mask], keepdims=True)[0][0]  # Ensure correct indexing\n",
    "        labels[mask] = most_common_label\n",
    "    return labels\n",
    "\n",
    "\n",
    "y_pred_mapped = map_clusters_to_labels(y_test, y_pred_clusters)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Evaluate Clustering Performance\n",
    "accuracy = accuracy_score(y_test, y_pred_mapped)\n",
    "ari = adjusted_rand_score(y_test, y_pred_clusters)\n",
    "\n",
    "print(f\"Clustering Accuracy: {accuracy:.2f}\")\n",
    "print(f\"Adjusted Rand Index (ARI): {ari:.2f}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Visualize Sample Clustered Faces\n",
    "fig, axes = plt.subplots(3, 5, figsize=(10, 6))\n",
    "for i, ax in enumerate(axes.flat):\n",
    "    ax.imshow(X_test[i].reshape(64, 64), cmap='gray')\n",
    "    ax.set_title(f\"Pred: {y_pred_mapped[i]}\\nTrue: {y_test[i]}\")\n",
    "    ax.axis(\"off\")\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# What are some ways to improve this model performance?"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}