Commit cce036f22feef37d6c05c4b0757afbcbc15de45c

Authored by Mathias Quillot
1 parent a79344696d
Exists in master

Implementation of basic kmeans made by my hand

Showing 1 changed file with 85 additions and 0 deletions Inline Diff

bin/cluster_kmeans_ownmade.py
File was created 1 '''
2 Un petit test pour faire du clustering
3 avec une distance de mahalanobis
4 '''
5
6 import matplotlib.pyplot as plt
7 import numpy as np
8 from sklearn.manifold import TSNE
9
10 N = 18 # Number of individus
11 d = 2 # Number of dimensions
12 K = 3 # number of clusters
13
14 X = np.random.rand(N, d) # Features
15
16 C = np.random.random_sample((K, d)) # Model 0
17
18
19 def dist(a, b):
20 '''
21 Distance euclidienne
22 '''
23 return np.sum(np.power(np.abs(a - b), 2))
24
25
26 def plot_iteration(iteration, points, clusters, centers):
27 fig = plt.figure()
28 ax = fig.add_subplot(111)
29 scatter = ax.scatter(points[:, 0], points[:, 1], c=clusters, s=50)
30 for i, j in centers:
31 ax.scatter(i, j, s=50, c='red', marker='+')
32 ax.set_xlabel('x')
33 ax.set_ylabel('y')
34 plt.colorbar(scatter)
35 plt.ylim(0, 1)
36 plt.xlim(0, 1)
37 plt.savefig("test_" + str(iteration) + ".pdf")
38
39
40 end_algo = False
41 i = 0
42 while not end_algo:
43 if i == 2000:
44 exit(1)
45 print("Iteration: ", i)
46 # Calcul matrix distance
47 distances = np.zeros((N, K))
48
49 for n in range(N):
50 for k in range(K):
51 distances[n][k] = dist(X[n], C[k])
52 closest_cluster = np.argmin(distances, axis=1)
53
54 if i % 1 == 0:
55 # -- Debug tool ----------------------
56 # TSNE
57 X_embedded = np.concatenate((X, C), axis=0)
58 # X_embedded = TSNE(n_components=2).fit_transform(np.concatenate((X, C), axis=0))
59 # Then plot
60 plot_iteration(
61 i,
62 X_embedded[:X.shape[0]],
63 closest_cluster,
64 X_embedded[X.shape[0]:]
65 )
66 # ------------------------------------
67
68 end_algo = True
69 for k in range(K):
70 # Find subset of X with values closed to the centroid c_k.
71 X_sub = np.where(closest_cluster == k)
72 X_sub = np.take(X, X_sub[0], axis=0)
73 np.mean(X_sub, axis=0)
74 C_new = np.mean(X_sub, axis=0)
75 if end_algo and (not (C[k] == C_new).all()): # If the same stop
76 end_algo = False
77 C[k] = C_new
78 i = i + 1
79
80 plot_iteration(
81 i,
82 X_embedded[:X.shape[0]],
83 closest_cluster,
84 X_embedded[X.shape[0]:]
85 )
86