# Face Detection using KMeans Clustering

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_olivetti_faces
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, adjusted_rand_score
from scipy.stats import mode

# Load the Olivetti Faces dataset
faces = fetch_olivetti_faces(shuffle=True, random_state=42)
X, y = faces.data, faces.target # X = Flattened images, y = Person ID (target)

In [None]:
faces.data.shape

In [None]:

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

# Apply PCA for Dimensionality Reduction (Optional for faster computation)
pca = PCA(n_components=100) # Reduce to 100 principal components
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)


In [None]:

# Train K-Means Clustering on Training Data
k = 40 # Assuming we have 40 individuals in the dataset
kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
kmeans.fit(X_train_pca)



In [None]:
# Predict Clusters for Test Set
y_pred_clusters = kmeans.predict(X_test_pca)


In [None]:
from scipy.stats import mode

def map_clusters_to_labels(y_true, clusters):
 labels = np.zeros_like(clusters)
 for cluster in np.unique(clusters):
 mask = (clusters == cluster)
 most_common_label = mode(y_true[mask], keepdims=True)[0][0] # Ensure correct indexing
 labels[mask] = most_common_label
 return labels


y_pred_mapped = map_clusters_to_labels(y_test, y_pred_clusters)


In [None]:

# Evaluate Clustering Performance
accuracy = accuracy_score(y_test, y_pred_mapped)
ari = adjusted_rand_score(y_test, y_pred_clusters)

print(f"Clustering Accuracy: {accuracy:.2f}")
print(f"Adjusted Rand Index (ARI): {ari:.2f}")


In [None]:

# Visualize Sample Clustered Faces
fig, axes = plt.subplots(3, 5, figsize=(10, 6))
for i, ax in enumerate(axes.flat):
 ax.imshow(X_test[i].reshape(64, 64), cmap='gray')
 ax.set_title(f"Pred: {y_pred_mapped[i]}\nTrue: {y_test[i]}")
 ax.axis("off")

plt.tight_layout()
plt.show()


In [None]:
# What are some ways to improve this model performance?