Commit 4309b4a340144c6dff7757892cd5539e89e20538

Authored by quillotm
1 parent 88d1d67e9d
Exists in master

Adding constrained mahalanobis to help converging

Showing 2 changed files with 23 additions and 12 deletions Side-by-side Diff

... ... @@ -17,7 +17,8 @@
17 17  
18 18 CLUSTERING_METHODS = {
19 19 "k-means": kmeans(),
20   - "k-means-mahalanobis": kmeansMahalanobis()
  20 + "k-means-mahalanobis": kmeansMahalanobis(),
  21 + "k-means-mahalanobis-constrained": kmeansMahalanobis(constrained=True)
21 22 }
22 23  
23 24 EVALUATION_METHODS = {
volia/clustering_modules/kmeans_mahalanobis.py
... ... @@ -8,13 +8,14 @@
8 8 from abstract_clustering import AbstractClustering
9 9  
10 10 class kmeansMahalanobis():
11   - def __init__(self):
  11 + def __init__(self, constrained: bool = False):
12 12 """
13 13  
14 14 """
15 15 self.C = None
16 16 self.L = None
17 17 self.K = None
  18 + self.constrained = constrained
18 19  
19 20 def predict(self, features):
20 21 """
... ... @@ -45,6 +46,7 @@
45 46 self.C = data["C"]
46 47 self.L = data["L"]
47 48 self.K = data["K"]
  49 + self.constrained = data["constrained"]
48 50  
49 51 def save(self, modelpath: str):
50 52 """
... ... @@ -55,7 +57,8 @@
55 57 data = {
56 58 "C": self.C,
57 59 "L": self.L,
58   - "K": self.K
  60 + "K": self.K,
  61 + "constrained": self.constrained
59 62 }
60 63 with open(modelpath, "wb") as f:
61 64 pickle.dump(data, f)
62 65  
... ... @@ -82,11 +85,11 @@
82 85  
83 86 def _dist(self, a, b, l):
84 87 '''
85   - Distance euclidienne
  88 + Distance euclidienne with mahalanobis
86 89 '''
87 90 a = np.reshape(a, (-1, 1))
88 91 b = np.reshape(b, (-1, 1))
89   - result = np.transpose(a - b).dot(l).dot(a-b)[0][0]
  92 + result = np.transpose(a - b).dot(l).dot(a - b)[0][0]
90 93 return result
91 94  
92 95 def _plot_iteration(self, iteration, points, clusters, centers):
93 96  
94 97  
... ... @@ -129,17 +132,18 @@
129 132 distances[n][k] = self._dist(X[n], self.C[k], self.L[k])
130 133  
131 134 closest_cluster = np.argmin(distances, axis=1)
  135 +
132 136 loss = np.sum(distances[np.arange(len(distances)), closest_cluster])
133 137 if debug:
134 138 print(f"loss {loss}")
135 139  
136 140  
137 141 # -- Debug tool ----------------------
138   - if debug and i % 10 == 0:
  142 + if debug and i % 1 == 0:
139 143 # TSNE if needed
140 144 X_embedded = np.concatenate((X, self.C), axis=0)
141 145 if d > 2:
142   - X_embedded = TSNE(n_components=2).fit_transform(np.concatenate((X, C), axis=0))
  146 + X_embedded = TSNE(n_components=2).fit_transform(np.concatenate((X, self.C), axis=0))
143 147  
144 148 # Then plot
145 149 self._plot_iteration(
146 150  
147 151  
148 152  
... ... @@ -151,22 +155,28 @@
151 155 # ------------------------------------
152 156  
153 157 old_c = self.C.copy()
154   - for k in range(K):
  158 + for k in range(self.K):
155 159 # Find subset of X with values closed to the centroid c_k.
156 160 X_sub = np.where(closest_cluster == k)
157 161 X_sub = np.take(X, X_sub[0], axis=0)
158 162 if X_sub.shape[0] == 0:
159 163 continue
160   - np.mean(X_sub, axis=0)
  164 +
161 165 C_new = np.mean(X_sub, axis=0)
162 166  
163 167 # -- COMPUTE NEW LAMBDA (here named K) --
164   - K_new = np.zeros((L.shape[1], L.shape[2]))
  168 + K_new = np.zeros((self.L.shape[1], self.L.shape[2]))
  169 + tmp = np.zeros((self.L.shape[1], self.L.shape[2]))
165 170 for x in X_sub:
166 171 x = np.reshape(x, (-1, 1))
167 172 c_tmp = np.reshape(C_new, (-1, 1))
168   - K_new = K_new + (x - c_tmp).dot((x - c_tmp).transpose())
169   - K_new = K_new / X_sub.shape[0]
  173 + #K_new = K_new + (x - c_tmp).dot((x - c_tmp).transpose())
  174 +
  175 + tmp = tmp + (x - c_tmp).dot((x - c_tmp).transpose())
  176 + if self.constrained:
  177 + K_new = (tmp / X_sub.shape[0]) / np.power(np.linalg.det((tmp / X_sub.shape[0])), 1/d)
  178 + else:
  179 + K_new = tmp / X_sub.shape[0]
170 180 K_new = np.linalg.pinv(K_new)
171 181  
172 182 #if end_algo and (not (self.C[k] == C_new).all()): # If the same stop