Commit cce036f22feef37d6c05c4b0757afbcbc15de45c
1 parent
a79344696d
Exists in
master
Implementation of basic kmeans made by my hand
Showing 1 changed file with 85 additions and 0 deletions Inline Diff
bin/cluster_kmeans_ownmade.py
File was created | 1 | ''' | |
2 | Un petit test pour faire du clustering | ||
3 | avec une distance de mahalanobis | ||
4 | ''' | ||
5 | |||
6 | import matplotlib.pyplot as plt | ||
7 | import numpy as np | ||
8 | from sklearn.manifold import TSNE | ||
9 | |||
10 | N = 18 # Number of individus | ||
11 | d = 2 # Number of dimensions | ||
12 | K = 3 # number of clusters | ||
13 | |||
14 | X = np.random.rand(N, d) # Features | ||
15 | |||
16 | C = np.random.random_sample((K, d)) # Model 0 | ||
17 | |||
18 | |||
19 | def dist(a, b): | ||
20 | ''' | ||
21 | Distance euclidienne | ||
22 | ''' | ||
23 | return np.sum(np.power(np.abs(a - b), 2)) | ||
24 | |||
25 | |||
26 | def plot_iteration(iteration, points, clusters, centers): | ||
27 | fig = plt.figure() | ||
28 | ax = fig.add_subplot(111) | ||
29 | scatter = ax.scatter(points[:, 0], points[:, 1], c=clusters, s=50) | ||
30 | for i, j in centers: | ||
31 | ax.scatter(i, j, s=50, c='red', marker='+') | ||
32 | ax.set_xlabel('x') | ||
33 | ax.set_ylabel('y') | ||
34 | plt.colorbar(scatter) | ||
35 | plt.ylim(0, 1) | ||
36 | plt.xlim(0, 1) | ||
37 | plt.savefig("test_" + str(iteration) + ".pdf") | ||
38 | |||
39 | |||
40 | end_algo = False | ||
41 | i = 0 | ||
42 | while not end_algo: | ||
43 | if i == 2000: | ||
44 | exit(1) | ||
45 | print("Iteration: ", i) | ||
46 | # Calcul matrix distance | ||
47 | distances = np.zeros((N, K)) | ||
48 | |||
49 | for n in range(N): | ||
50 | for k in range(K): | ||
51 | distances[n][k] = dist(X[n], C[k]) | ||
52 | closest_cluster = np.argmin(distances, axis=1) | ||
53 | |||
54 | if i % 1 == 0: | ||
55 | # -- Debug tool ---------------------- | ||
56 | # TSNE | ||
57 | X_embedded = np.concatenate((X, C), axis=0) | ||
58 | # X_embedded = TSNE(n_components=2).fit_transform(np.concatenate((X, C), axis=0)) | ||
59 | # Then plot | ||
60 | plot_iteration( | ||
61 | i, | ||
62 | X_embedded[:X.shape[0]], | ||
63 | closest_cluster, | ||
64 | X_embedded[X.shape[0]:] | ||
65 | ) | ||
66 | # ------------------------------------ | ||
67 | |||
68 | end_algo = True | ||
69 | for k in range(K): | ||
70 | # Find subset of X with values closed to the centroid c_k. | ||
71 | X_sub = np.where(closest_cluster == k) | ||
72 | X_sub = np.take(X, X_sub[0], axis=0) | ||
73 | np.mean(X_sub, axis=0) | ||
74 | C_new = np.mean(X_sub, axis=0) | ||
75 | if end_algo and (not (C[k] == C_new).all()): # If the same stop | ||
76 | end_algo = False | ||
77 | C[k] = C_new | ||
78 | i = i + 1 | ||
79 | |||
80 | plot_iteration( | ||
81 | i, | ||
82 | X_embedded[:X.shape[0]], | ||
83 | closest_cluster, | ||
84 | X_embedded[X.shape[0]:] | ||
85 | ) | ||
86 |