Commit cce036f22feef37d6c05c4b0757afbcbc15de45c
1 parent
a79344696d
Exists in
master
Implementation of basic kmeans made by my hand
Showing 1 changed file with 85 additions and 0 deletions Side-by-side Diff
bin/cluster_kmeans_ownmade.py
1 | +''' | |
2 | +Un petit test pour faire du clustering | |
3 | +avec une distance de mahalanobis | |
4 | +''' | |
5 | + | |
6 | +import matplotlib.pyplot as plt | |
7 | +import numpy as np | |
8 | +from sklearn.manifold import TSNE | |
9 | + | |
10 | +N = 18 # Number of individus | |
11 | +d = 2 # Number of dimensions | |
12 | +K = 3 # number of clusters | |
13 | + | |
14 | +X = np.random.rand(N, d) # Features | |
15 | + | |
16 | +C = np.random.random_sample((K, d)) # Model 0 | |
17 | + | |
18 | + | |
19 | +def dist(a, b): | |
20 | + ''' | |
21 | + Distance euclidienne | |
22 | + ''' | |
23 | + return np.sum(np.power(np.abs(a - b), 2)) | |
24 | + | |
25 | + | |
26 | +def plot_iteration(iteration, points, clusters, centers): | |
27 | + fig = plt.figure() | |
28 | + ax = fig.add_subplot(111) | |
29 | + scatter = ax.scatter(points[:, 0], points[:, 1], c=clusters, s=50) | |
30 | + for i, j in centers: | |
31 | + ax.scatter(i, j, s=50, c='red', marker='+') | |
32 | + ax.set_xlabel('x') | |
33 | + ax.set_ylabel('y') | |
34 | + plt.colorbar(scatter) | |
35 | + plt.ylim(0, 1) | |
36 | + plt.xlim(0, 1) | |
37 | + plt.savefig("test_" + str(iteration) + ".pdf") | |
38 | + | |
39 | + | |
40 | +end_algo = False | |
41 | +i = 0 | |
42 | +while not end_algo: | |
43 | + if i == 2000: | |
44 | + exit(1) | |
45 | + print("Iteration: ", i) | |
46 | + # Calcul matrix distance | |
47 | + distances = np.zeros((N, K)) | |
48 | + | |
49 | + for n in range(N): | |
50 | + for k in range(K): | |
51 | + distances[n][k] = dist(X[n], C[k]) | |
52 | + closest_cluster = np.argmin(distances, axis=1) | |
53 | + | |
54 | + if i % 1 == 0: | |
55 | + # -- Debug tool ---------------------- | |
56 | + # TSNE | |
57 | + X_embedded = np.concatenate((X, C), axis=0) | |
58 | + # X_embedded = TSNE(n_components=2).fit_transform(np.concatenate((X, C), axis=0)) | |
59 | + # Then plot | |
60 | + plot_iteration( | |
61 | + i, | |
62 | + X_embedded[:X.shape[0]], | |
63 | + closest_cluster, | |
64 | + X_embedded[X.shape[0]:] | |
65 | + ) | |
66 | + # ------------------------------------ | |
67 | + | |
68 | + end_algo = True | |
69 | + for k in range(K): | |
70 | + # Find subset of X with values closed to the centroid c_k. | |
71 | + X_sub = np.where(closest_cluster == k) | |
72 | + X_sub = np.take(X, X_sub[0], axis=0) | |
73 | + np.mean(X_sub, axis=0) | |
74 | + C_new = np.mean(X_sub, axis=0) | |
75 | + if end_algo and (not (C[k] == C_new).all()): # If the same stop | |
76 | + end_algo = False | |
77 | + C[k] = C_new | |
78 | + i = i + 1 | |
79 | + | |
80 | +plot_iteration( | |
81 | + i, | |
82 | + X_embedded[:X.shape[0]], | |
83 | + closest_cluster, | |
84 | + X_embedded[X.shape[0]:] | |
85 | +) |