``````'''
Un petit test pour faire du clustering
avec une distance de mahalanobis
'''

import matplotlib.pyplot as plt
import numpy as np
from sklearn.manifold import TSNE

N = 18  # Number of individus
d = 2  # Number of dimensions
K = 3  # number of clusters

X = np.random.rand(N, d)  # Features

C = np.random.random_sample((K, d))  # Model 0

def dist(a, b):
'''
Distance euclidienne
'''
return np.sum(np.power(np.abs(a - b), 2))

def plot_iteration(iteration, points, clusters, centers):
fig = plt.figure()
scatter = ax.scatter(points[:, 0], points[:, 1], c=clusters, s=50)
for i, j in centers:
ax.scatter(i, j, s=50, c='red', marker='+')
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.colorbar(scatter)
plt.ylim(0, 1)
plt.xlim(0, 1)
plt.savefig("test_" + str(iteration) + ".pdf")

end_algo = False
i = 0
while not end_algo:
if i == 2000:
exit(1)
print("Iteration: ", i)
# Calcul matrix distance
distances = np.zeros((N, K))

for n in range(N):
for k in range(K):
distances[n][k] = dist(X[n], C[k])
closest_cluster = np.argmin(distances, axis=1)

if i % 1 == 0:
# -- Debug tool ----------------------
# TSNE
X_embedded = np.concatenate((X, C), axis=0)
# X_embedded = TSNE(n_components=2).fit_transform(np.concatenate((X, C), axis=0))
# Then plot
plot_iteration(
i,
X_embedded[:X.shape[0]],
closest_cluster,
X_embedded[X.shape[0]:]
)
# ------------------------------------

end_algo = True
for k in range(K):
# Find subset of X with values closed to the centroid c_k.
X_sub = np.where(closest_cluster == k)
X_sub = np.take(X, X_sub[0], axis=0)
np.mean(X_sub, axis=0)
C_new = np.mean(X_sub, axis=0)
if end_algo and (not (C[k] == C_new).all()):  # If the same stop
end_algo = False
C[k] = C_new
i = i + 1

plot_iteration(
i,
X_embedded[:X.shape[0]],
closest_cluster,
X_embedded[X.shape[0]:]
)``````