{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Face Detection using KMeans Clustering" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.datasets import fetch_olivetti_faces\n", "from sklearn.cluster import KMeans\n", "from sklearn.decomposition import PCA\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, adjusted_rand_score\n", "from scipy.stats import mode\n", "\n", "# Load the Olivetti Faces dataset\n", "faces = fetch_olivetti_faces(shuffle=True, random_state=42)\n", "X, y = faces.data, faces.target # X = Flattened images, y = Person ID (target)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "faces.data.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "# Split the dataset into training and testing sets\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "# Apply PCA for Dimensionality Reduction (Optional for faster computation)\n", "pca = PCA(n_components=100) # Reduce to 100 principal components\n", "X_train_pca = pca.fit_transform(X_train)\n", "X_test_pca = pca.transform(X_test)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "# Train K-Means Clustering on Training Data\n", "k = 40 # Assuming we have 40 individuals in the dataset\n", "kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)\n", "kmeans.fit(X_train_pca)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Predict Clusters for Test Set\n", "y_pred_clusters = kmeans.predict(X_test_pca)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from scipy.stats import mode\n", "\n", "def map_clusters_to_labels(y_true, clusters):\n", " labels = np.zeros_like(clusters)\n", " for cluster in np.unique(clusters):\n", " mask = (clusters == cluster)\n", " most_common_label = mode(y_true[mask], keepdims=True)[0][0] # Ensure correct indexing\n", " labels[mask] = most_common_label\n", " return labels\n", "\n", "\n", "y_pred_mapped = map_clusters_to_labels(y_test, y_pred_clusters)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "# Evaluate Clustering Performance\n", "accuracy = accuracy_score(y_test, y_pred_mapped)\n", "ari = adjusted_rand_score(y_test, y_pred_clusters)\n", "\n", "print(f\"Clustering Accuracy: {accuracy:.2f}\")\n", "print(f\"Adjusted Rand Index (ARI): {ari:.2f}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "# Visualize Sample Clustered Faces\n", "fig, axes = plt.subplots(3, 5, figsize=(10, 6))\n", "for i, ax in enumerate(axes.flat):\n", " ax.imshow(X_test[i].reshape(64, 64), cmap='gray')\n", " ax.set_title(f\"Pred: {y_pred_mapped[i]}\\nTrue: {y_test[i]}\")\n", " ax.axis(\"off\")\n", "\n", "plt.tight_layout()\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# What are some ways to improve this model performance?" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 4 }