Commit 660d9960f95ade5bb2446df6177425097c9b71a9

Authored by quillotm
1 parent 78e6974959
Exists in master

Adding n init parameters

Showing 2 changed files with 53 additions and 28 deletions Inline Diff

1 import argparse 1 import argparse
2 from os import path, mkdir 2 from os import path, mkdir
3 from utils import SubCommandRunner 3 from utils import SubCommandRunner
4 from core.data import read_features, read_lst, read_labels 4 from core.data import read_features, read_lst, read_labels
5 import numpy as np 5 import numpy as np
6 from sklearn.cluster import KMeans 6 from sklearn.cluster import KMeans
7 import pickle 7 import pickle
8 from clustering_modules.kmeans import kmeans 8 from clustering_modules.kmeans import kmeans
9 from clustering_modules.kmeans_mahalanobis import kmeansMahalanobis 9 from clustering_modules.kmeans_mahalanobis import kmeansMahalanobis
10 10
11 from sklearn.preprocessing import LabelEncoder 11 from sklearn.preprocessing import LabelEncoder
12 from sklearn.metrics import v_measure_score, homogeneity_score, completeness_score 12 from sklearn.metrics import v_measure_score, homogeneity_score, completeness_score
13 13
14 import core.measures 14 import core.measures
15 import json 15 import json
16 16
17 17
18 CLUSTERING_METHODS = { 18 CLUSTERING_METHODS = {
19 "k-means": kmeans(), 19 "k-means": kmeans(),
20 "k-means-mahalanobis": kmeansMahalanobis() 20 "k-means-mahalanobis": kmeansMahalanobis()
21 } 21 }
22 22
23 EVALUATION_METHODS = { 23 EVALUATION_METHODS = {
24 "entropy": core.measures.entropy_score, 24 "entropy": core.measures.entropy_score,
25 "purity": core.measures.purity_score, 25 "purity": core.measures.purity_score,
26 "v-measure": v_measure_score, 26 "v-measure": v_measure_score,
27 "homogeneity": homogeneity_score, 27 "homogeneity": homogeneity_score,
28 "completeness": completeness_score, 28 "completeness": completeness_score,
29 } 29 }
30 30
31 31
32 def disequilibrium_run(): 32 def disequilibrium_run():
33 pass 33 pass
34 34
35 35
36 def measure_run(measure: str, features: str, lst: str, truelabels: str, model: str, modeltype: str): 36 def measure_run(measure: str, features: str, lst: str, truelabels: str, model: str, modeltype: str):
37 """ 37 """
38 38
39 @param measure: 39 @param measure:
40 @param features: 40 @param features:
41 @param lst: 41 @param lst:
42 @param truelabels: 42 @param truelabels:
43 @param model: 43 @param model:
44 @param modeltype: 44 @param modeltype:
45 @return: 45 @return:
46 """ 46 """
47 module = CLUSTERING_METHODS[modeltype] 47 module = CLUSTERING_METHODS[modeltype]
48 module.load(model) 48 module.load(model)
49 49
50 eval = {} 50 eval = {}
51 for ms in measure: 51 for ms in measure:
52 evaluation = EVALUATION_METHODS[ms] 52 evaluation = EVALUATION_METHODS[ms]
53 feats_dict = read_features(features) 53 feats_dict = read_features(features)
54 labels_dict = read_labels(truelabels) 54 labels_dict = read_labels(truelabels)
55 lst_dict = read_lst(lst) 55 lst_dict = read_lst(lst)
56 lst_keys = [key for key in lst_dict] 56 lst_keys = [key for key in lst_dict]
57 feats = np.asarray([feats_dict[key] for key in lst_keys]) 57 feats = np.asarray([feats_dict[key] for key in lst_keys])
58 Y_pred = module.predict(feats) 58 Y_pred = module.predict(feats)
59 Y_truth = [labels_dict[key][0] for key in lst_keys] 59 Y_truth = [labels_dict[key][0] for key in lst_keys]
60 60
61 le = LabelEncoder() 61 le = LabelEncoder()
62 le.fit(Y_truth) 62 le.fit(Y_truth)
63 Y_truth = le.transform(Y_truth) 63 Y_truth = le.transform(Y_truth)
64 64
65 eval[ms] = evaluation(Y_truth, Y_pred) 65 eval[ms] = evaluation(Y_truth, Y_pred)
66 66
67 print(json.dumps(eval)) 67 print(json.dumps(eval))
68 68
69 69
70 def kmeans_run(features: str, 70 def kmeans_run(features: str,
71 lst: str, 71 lst: str,
72 k:int, 72 k:int,
73 kmax: int, 73 kmax: int,
74 klist, 74 klist,
75 maxiter: int, 75 maxiter: int,
76 ninit: int, 76 ninit: int,
77 output: str, 77 output: str,
78 tol: float, 78 tol: float,
79 debug: bool = False, 79 debug: bool = False,
80 mahalanobis: str = False): 80 mahalanobis: str = False):
81 """ 81 """
82 82
83 @param features: output features 83 @param features: output features
84 @param lst: list file 84 @param lst: list file
85 @param k: k (kmin if kmax specified) 85 @param k: k (kmin if kmax specified)
86 @param kmax: maximum k to compute 86 @param kmax: maximum k to compute
87 @param klist: list of k values to compute, ignore k value 87 @param klist: list of k values to compute, ignore k value
88 @param output: output file if kmax not specified, else, output directory 88 @param output: output file if kmax not specified, else, output directory
89 @param mahalanobis: distance option of k-means. 89 @param mahalanobis: distance option of k-means.
90 """ 90 """
91 json_content = locals().copy()
92
93 def fit_model(k: int, output_file):
94 if debug:
95 print(f"Computing clustering with k={k}")
96 model = CLUSTERING_METHODS["k-means"]
97 if mahalanobis:
98 if debug:
99 print("Mahalanobis activated")
100 model = CLUSTERING_METHODS["k-means-mahalanobis"]
101 model.fit(X, k, tol, ninit, maxiter, debug)
102 model.save(output_file)
103 json_content["models"].append({
104 "model_file": output_file,
105 "k": k,
106 })
107
108 json_content["models"] = []
109
91 # -- READ FILES -- 110 # -- READ FILES --
92 features_dict = read_features(features) 111 features_dict = read_features(features)
93 lst_dict = read_lst(lst) 112 lst_dict = read_lst(lst)
94 X = np.asarray([features_dict[x] for x in lst_dict]) 113 X = np.asarray([features_dict[x] for x in lst_dict])
95 114
96 # Exception cases 115 # Exception cases
97 if kmax is None and klist is None and path.isdir(output): 116 if kmax is None and klist is None and path.isdir(output):
98 raise Exception("The \"output\" is an existing directory while the system is waiting the path of a file.") 117 raise Exception("The \"output\" is an existing directory while the system is waiting the path of a file.")
99 118
100 if (kmax is not None or klist is not None) and path.isfile(output): 119 if (kmax is not None or klist is not None) and path.isfile(output):
101 raise Exception("The \"output\" is an existing file while the system is waiting the path of a directory.") 120 raise Exception("The \"output\" is an existing file while the system is waiting the path of a directory.")
102 121
103 # Mono value case 122 # Mono value case
104 if kmax is None and klist is None: 123 if kmax is None and klist is None:
105 if debug: 124 fit_model(k, output)
106 print(f"Computing clustering with k={k}")
107 model = CLUSTERING_METHODS["k-means"]
108 if mahalanobis:
109 model = CLUSTERING_METHODS["k-means-mahalanobis"]
110 model.fit(X, k, tol, maxiter, debug)
111 model.save(output)
112 125
113 # Multi values case with kmax 126 # Multi values case with kmax
114 if kmax is not None: 127 if kmax is not None:
115 if not path.isdir(output): 128 if not path.isdir(output):
116 mkdir(output) 129 mkdir(output)
117 Ks = range(k, kmax + 1) 130 Ks = range(k, kmax + 1)
118 for i in Ks: 131 for i in Ks:
119 model = CLUSTERING_METHODS["k-means"] 132 fit_model(i, path.join(output, "clustering_" + str(i) + ".pkl"))
120 if mahalanobis:
121 model = CLUSTERING_METHODS["k-means-mahalanobis"]
122 model.fit(X, i, tol, maxiter, debug)
123 model.save(path.join(output, "clustering_" + str(i) + ".pkl"))
124 133
125 # Second multi values case with klist 134 # Second multi values case with klist
126 if klist is not None: 135 if klist is not None:
127 if not path.isdir(output): 136 if not path.isdir(output):
128 mkdir(output) 137 mkdir(output)
129 for k in klist: 138 for k in klist:
130 k = int(k) 139 k = int(k)
131 model = CLUSTERING_METHODS["k-means"] 140 fit_model(k, path.join(output, "clustering_" + str(i) + ".pkl"))
132 if mahalanobis:
133 model = CLUSTERING_METHODS["k-means-mahalanobis"]
134 model.fit(X, k, tol, maxiter, debug)
135 model.save(path.join(output, "clustering_" + str(k) + ".pkl"))
136 141
137 # TODO: Output json to explain the end parameters like number of iteration, tol reached and stoped the process ? 142 print(json_content)
138 # etc. (what distance, what parameters etc) 143 # TODO: compute loss with k-means mahalanobis.
139 # TODO: Move example data into a directory.
140 # TODO: Add example receipts
141 # TODO: n_init have to be taken into account for mahalanobis case of k-means algorithm. 144 # TODO: n_init have to be taken into account for mahalanobis case of k-means algorithm.
142 145
143 146
144 if __name__ == "__main__": 147 if __name__ == "__main__":
145 # Main parser 148 # Main parser
146 parser = argparse.ArgumentParser(description="Clustering methods to apply") 149 parser = argparse.ArgumentParser(description="Clustering methods to apply")
147 subparsers = parser.add_subparsers(title="action") 150 subparsers = parser.add_subparsers(title="action")
148 151
149 # kmeans 152 # kmeans
150 parser_kmeans = subparsers.add_parser( 153 parser_kmeans = subparsers.add_parser(
151 "kmeans", help="Compute clustering using k-means algorithm") 154 "kmeans", help="Compute clustering using k-means algorithm")
152 155
153 parser_kmeans.add_argument("--features", required=True, type=str, help="Features file (works with list)") 156 parser_kmeans.add_argument("--features", required=True, type=str, help="Features file (works with list)")
154 parser_kmeans.add_argument("--lst", required=True, type=str, help="List file (.lst)") 157 parser_kmeans.add_argument("--lst", required=True, type=str, help="List file (.lst)")
155 parser_kmeans.add_argument("-k", default=2, type=int, 158 parser_kmeans.add_argument("-k", default=2, type=int,
156 help="number of clusters to compute. It is kmin if kmax is specified.") 159 help="number of clusters to compute. It is kmin if kmax is specified.")
157 parser_kmeans.add_argument("--kmax", default=None, type=int, help="if specified, k is kmin.") 160 parser_kmeans.add_argument("--kmax", default=None, type=int, help="if specified, k is kmin.")
158 parser_kmeans.add_argument("--klist", nargs="+", 161 parser_kmeans.add_argument("--klist", nargs="+",
159 help="List of k values to test. As kmax, activate the multi values mod.") 162 help="List of k values to test. As kmax, activate the multi values mod.")
160 parser_kmeans.add_argument("--maxiter", 163 parser_kmeans.add_argument("--maxiter",
161 type=int, 164 type=int,
162 default=300, 165 default=300,
163 help="Max number of iteration before stoping if not converging") 166 help="Max number of iteration before stoping if not converging")
164 parser_kmeans.add_argument("--ninit", 167 parser_kmeans.add_argument("--ninit",
165 type=int, 168 type=int,
166 default=10, 169 default=10,
167 help="Number of time the k-means algorithm will be run with different centroid seeds.") 170 help="Number of time the k-means algorithm will be run with different centroid seeds.")
168 parser_kmeans.add_argument("--tol", 171 parser_kmeans.add_argument("--tol",
169 type=float, 172 type=float,
170 default=0.0001, 173 default=0.0001,
171 help="Tolerance to finish of distance between centroids and their updates.") 174 help="Tolerance to finish of distance between centroids and their updates.")
172 parser_kmeans.add_argument("--debug", action="store_true") 175 parser_kmeans.add_argument("--debug", action="store_true")
173 parser_kmeans.add_argument("--output", 176 parser_kmeans.add_argument("--output",
174 default=".kmeans", 177 default=".kmeans",
175 help="output file if only k. Output directory if multiple kmax specified.") 178 help="output file if only k. Output directory if multiple kmax specified.")
176 parser_kmeans.add_argument("--mahalanobis", action="store_true") 179 parser_kmeans.add_argument("--mahalanobis", action="store_true")
177 parser_kmeans.set_defaults(which="kmeans") 180 parser_kmeans.set_defaults(which="kmeans")
178 181
179 # measure 182 # measure
180 parser_measure = subparsers.add_parser( 183 parser_measure = subparsers.add_parser(
181 "measure", help="compute the entropy") 184 "measure", help="compute the entropy")
182 185
183 parser_measure.add_argument("--measure", 186 parser_measure.add_argument("--measure",
184 required=True, 187 required=True,
185 nargs="+", 188 nargs="+",
186 choices=[key for key in EVALUATION_METHODS], 189 choices=[key for key in EVALUATION_METHODS],
187 help="...") 190 help="...")
188 parser_measure.add_argument("--features", required=True, type=str, help="...") 191 parser_measure.add_argument("--features", required=True, type=str, help="...")
189 parser_measure.add_argument("--lst", required=True, type=str, help="...") 192 parser_measure.add_argument("--lst", required=True, type=str, help="...")
190 parser_measure.add_argument("--truelabels", required=True, type=str, help="...") 193 parser_measure.add_argument("--truelabels", required=True, type=str, help="...")
191 parser_measure.add_argument("--model", required=True, type=str, help="...") 194 parser_measure.add_argument("--model", required=True, type=str, help="...")
192 parser_measure.add_argument("--modeltype", 195 parser_measure.add_argument("--modeltype",
193 required=True, 196 required=True,
194 choices=[key for key in CLUSTERING_METHODS], 197 choices=[key for key in CLUSTERING_METHODS],
195 help="type of model for learning") 198 help="type of model for learning")
196 parser_measure.set_defaults(which="measure") 199 parser_measure.set_defaults(which="measure")
197 200
198 # disequilibrium 201 # disequilibrium
199 parser_disequilibrium = subparsers.add_parser( 202 parser_disequilibrium = subparsers.add_parser(
200 "disequilibrium", help="...") 203 "disequilibrium", help="...")
201 204
202 parser_disequilibrium.add_argument("--features", required=True, type=str, help="...") 205 parser_disequilibrium.add_argument("--features", required=True, type=str, help="...")
203 parser_disequilibrium.add_argument("--lstrain", required=True, type=str, help="...") 206 parser_disequilibrium.add_argument("--lstrain", required=True, type=str, help="...")
204 parser_disequilibrium.add_argument("--lstest", required=True, type=str, help="...") 207 parser_disequilibrium.add_argument("--lstest", required=True, type=str, help="...")
205 parser_disequilibrium.add_argument("--model", required=True, type=str, help="...") 208 parser_disequilibrium.add_argument("--model", required=True, type=str, help="...")
206 parser_disequilibrium.add_argument("--model-type", 209 parser_disequilibrium.add_argument("--model-type",
207 required=True, 210 required=True,
volia/clustering_modules/kmeans_mahalanobis.py
1 1
2 2
3 from sklearn.cluster import KMeans 3 from sklearn.cluster import KMeans
4 import pickle 4 import pickle
5 import numpy as np 5 import numpy as np
6 import matplotlib.pyplot as plt 6 import matplotlib.pyplot as plt
7 from sklearn.manifold import TSNE 7 from sklearn.manifold import TSNE
8 from abstract_clustering import AbstractClustering 8 from abstract_clustering import AbstractClustering
9 9
10 class kmeansMahalanobis(): 10 class kmeansMahalanobis():
11 def __init__(self): 11 def __init__(self):
12 """ 12 """
13 13
14 """ 14 """
15 self.C = None 15 self.C = None
16 self.L = None 16 self.L = None
17 self.K = None 17 self.K = None
18 18
19 def predict(self, features): 19 def predict(self, features):
20 """ 20 """
21 21
22 @param features: 22 @param features:
23 @return: 23 @return:
24 """ 24 """
25 N = features.shape[0] 25 N = features.shape[0]
26 distances = np.zeros((N, self.K)) 26 distances = np.zeros((N, self.K))
27 for n in range(N): 27 for n in range(N):
28 for k in range(self.K): 28 for k in range(self.K):
29 distances[n][k] = self._dist(features[n], self.C[k], self.L[k]) 29 distances[n][k] = self._dist(features[n], self.C[k], self.L[k])
30 closest_cluster = np.argmin(distances, axis=1) 30 closest_cluster = np.argmin(distances, axis=1)
31 return closest_cluster 31 return closest_cluster
32 32
33 def load(self, model_path): 33 def load(self, model_path):
34 """ 34 """
35 35
36 @param model_path: 36 @param model_path:
37 @return: 37 @return:
38 """ 38 """
39 data = None 39 data = None
40 with open(model_path, "rb") as f: 40 with open(model_path, "rb") as f:
41 data = pickle.load(f) 41 data = pickle.load(f)
42 if data is None: 42 if data is None:
43 raise Exception("Le modèle n'a pas pu être chargé") 43 raise Exception("Le modèle n'a pas pu être chargé")
44 else: 44 else:
45 self.C = data["C"] 45 self.C = data["C"]
46 self.L = data["L"] 46 self.L = data["L"]
47 self.K = data["K"] 47 self.K = data["K"]
48 48
49 def save(self, modelpath: str): 49 def save(self, modelpath: str):
50 """ 50 """
51 51
52 @param modelpath: 52 @param modelpath:
53 @return: 53 @return:
54 """ 54 """
55 data = { 55 data = {
56 "C": self.C, 56 "C": self.C,
57 "L": self.L, 57 "L": self.L,
58 "K": self.K 58 "K": self.K
59 } 59 }
60 with open(modelpath, "wb") as f: 60 with open(modelpath, "wb") as f:
61 pickle.dump(data, f) 61 pickle.dump(data, f)
62 62
63 def fit(self, features, k: int, tol: float = 0.0001, maxiter: int=300, debug: bool=False): 63 def fit(self, features, k: int, tol: float, ninit: int, maxiter: int=300, debug: bool=False):
64 self._train(features, k, tol, maxiter, debug) 64 results = []
65 for i in range(ninit):
66 results.append(self._train(features, k, tol, maxiter, debug))
67 losses = [v["loss"] for v in results]
68 best = results[losses.index(min(losses))]
69 if debug:
70 print(f"best: {best['loss']} loss")
71 self.C = best["C"]
72 self.L = best["L"]
73 self.K = best["K"]
65 74
66 def _initialize_model(self, X, number_clusters): 75 def _initialize_model(self, X, number_clusters):
67 d = X.shape[1] 76 d = X.shape[1]
68 C = X[np.random.choice(X.shape[0], number_clusters)] 77 C = X[np.random.choice(X.shape[0], number_clusters)]
69 L = np.zeros((number_clusters, d, d)) 78 L = np.zeros((number_clusters, d, d))
70 for k in range(number_clusters): 79 for k in range(number_clusters):
71 L[k] = np.identity(d) 80 L[k] = np.identity(d)
72 return C, L 81 return C, L
73 82
74 def _dist(self, a, b, l): 83 def _dist(self, a, b, l):
75 ''' 84 '''
76 Distance euclidienne 85 Distance euclidienne
77 ''' 86 '''
78 a = np.reshape(a, (-1, 1)) 87 a = np.reshape(a, (-1, 1))
79 b = np.reshape(b, (-1, 1)) 88 b = np.reshape(b, (-1, 1))
80 result = np.transpose(a - b).dot(l).dot(a-b)[0][0] 89 result = np.transpose(a - b).dot(l).dot(a-b)[0][0]
81 return result 90 return result
82 91
83 def _plot_iteration(self, iteration, points, clusters, centers): 92 def _plot_iteration(self, iteration, points, clusters, centers):
84 fig = plt.figure() 93 fig = plt.figure()
85 ax = fig.add_subplot(111) 94 ax = fig.add_subplot(111)
86 scatter = ax.scatter(points[:, 0], points[:, 1], c=clusters, s=50) 95 scatter = ax.scatter(points[:, 0], points[:, 1], c=clusters, s=50)
87 96
88 #for center in centers: 97 #for center in centers:
89 # ax.scatter(center[0], center[1], s=50, c='red', marker='+') 98 # ax.scatter(center[0], center[1], s=50, c='red', marker='+')
90 ax.scatter(centers[:, 0], centers[:, 1], s=50, c='red', marker='+') 99 ax.scatter(centers[:, 0], centers[:, 1], s=50, c='red', marker='+')
91 100
92 ax.set_xlabel('x') 101 ax.set_xlabel('x')
93 ax.set_ylabel('y') 102 ax.set_ylabel('y')
94 plt.colorbar(scatter) 103 plt.colorbar(scatter)
95 #plt.ylim(0, 1) 104 #plt.ylim(0, 1)
96 #plt.xlim(0, 1) 105 #plt.xlim(0, 1)
97 plt.savefig("test_" + str(iteration) + ".pdf") 106 plt.savefig("test_" + str(iteration) + ".pdf")
98 107
99 def _train(self, features, K: int, tol: float, maxiter: int, debug: bool=False): 108 def _train(self, features, K: int, tol: float, maxiter: int, debug: bool=False):
100 X = features 109 X = features
101 N = X.shape[0] 110 N = X.shape[0]
102 d = X.shape[1] 111 d = X.shape[1]
103 112
104 X_embedded = None
105 C, L = self._initialize_model(X, K) 113 C, L = self._initialize_model(X, K)
106 self.C = C 114 self.C = C
107 self.L = L 115 self.L = L
108 self.K = K 116 self.K = K
109 117
110 end_algo = False 118 end_algo = False
111 i = 0 119 i = 0
112 while not end_algo: 120 while not end_algo:
113 if debug: 121 if debug:
114 print("Iteration: ", i) 122 print("Iteration: ", i)
115 123
116 # Calcul matrix distance 124 # Calcul matrix distance
117 distances = np.zeros((N, K)) 125 distances = np.zeros((N, self.K))
118 126
119 for n in range(N): 127 for n in range(N):
120 for k in range(self.K): 128 for k in range(self.K):
121 distances[n][k] = self._dist(X[n], self.C[k], self.L[k]) 129 distances[n][k] = self._dist(X[n], self.C[k], self.L[k])
130
122 closest_cluster = np.argmin(distances, axis=1) 131 closest_cluster = np.argmin(distances, axis=1)
132 loss = np.sum(distances[np.arange(len(distances)), closest_cluster])
133 if debug:
134 print(f"loss {loss}")
123 135
136
124 # -- Debug tool ---------------------- 137 # -- Debug tool ----------------------
125 if debug and i % 10 == 0: 138 if debug and i % 10 == 0:
126 # TSNE if needed 139 # TSNE if needed
127 X_embedded = np.concatenate((X, self.C), axis=0) 140 X_embedded = np.concatenate((X, self.C), axis=0)
128 if d > 2: 141 if d > 2:
129 X_embedded = TSNE(n_components=2).fit_transform(np.concatenate((X, C), axis=0)) 142 X_embedded = TSNE(n_components=2).fit_transform(np.concatenate((X, C), axis=0))
130 143
131 # Then plot 144 # Then plot
132 self._plot_iteration( 145 self._plot_iteration(
133 i, 146 i,
134 X_embedded[:X.shape[0]], 147 X_embedded[:X.shape[0]],
135 closest_cluster, 148 closest_cluster,
136 X_embedded[X.shape[0]:] 149 X_embedded[X.shape[0]:]
137 ) 150 )
138 # ------------------------------------ 151 # ------------------------------------
139 152
140 old_c = self.C.copy() 153 old_c = self.C.copy()
141 for k in range(K): 154 for k in range(K):
142 # Find subset of X with values closed to the centroid c_k. 155 # Find subset of X with values closed to the centroid c_k.
143 X_sub = np.where(closest_cluster == k) 156 X_sub = np.where(closest_cluster == k)
144 X_sub = np.take(X, X_sub[0], axis=0) 157 X_sub = np.take(X, X_sub[0], axis=0)
145 if X_sub.shape[0] == 0: 158 if X_sub.shape[0] == 0:
146 continue 159 continue
147 np.mean(X_sub, axis=0) 160 np.mean(X_sub, axis=0)
148 C_new = np.mean(X_sub, axis=0) 161 C_new = np.mean(X_sub, axis=0)
149 162
150 # -- COMPUTE NEW LAMBDA (here named K) -- 163 # -- COMPUTE NEW LAMBDA (here named K) --
151 K_new = np.zeros((L.shape[1], L.shape[2])) 164 K_new = np.zeros((L.shape[1], L.shape[2]))
152 for x in X_sub: 165 for x in X_sub:
153 x = np.reshape(x, (-1, 1)) 166 x = np.reshape(x, (-1, 1))
154 c_tmp = np.reshape(C_new, (-1, 1)) 167 c_tmp = np.reshape(C_new, (-1, 1))
155 K_new = K_new + (x - c_tmp).dot((x - c_tmp).transpose()) 168 K_new = K_new + (x - c_tmp).dot((x - c_tmp).transpose())
156 K_new = K_new / X_sub.shape[0] 169 K_new = K_new / X_sub.shape[0]
157 K_new = np.linalg.pinv(K_new) 170 K_new = np.linalg.pinv(K_new)
158 171
159 #if end_algo and (not (self.C[k] == C_new).all()): # If the same stop 172 #if end_algo and (not (self.C[k] == C_new).all()): # If the same stop
160 # end_algo = False 173 # end_algo = False
161 self.C[k] = C_new 174 self.C[k] = C_new
162 self.L[k] = K_new 175 self.L[k] = K_new
163 176
177
164 diff = np.sum(np.absolute((self.C - old_c) / old_c * 100)) 178 diff = np.sum(np.absolute((self.C - old_c) / old_c * 100))
165 if diff > tol: 179 if diff > tol:
166 end_algo = False 180 end_algo = False
167 if debug: 181 if debug:
168 print(f"{diff}") 182 print(f"{diff}")
169 elif debug: 183 else:
170 print(f"Tolerance threshold {tol} reached with diff {diff}") 184 if debug:
185 print(f"Tolerance threshold {tol} reached with diff {diff}")
171 end_algo = True 186 end_algo = True
187
172 i = i + 1 188 i = i + 1
173 if i > maxiter: 189 if i > maxiter:
174 end_algo = True 190 end_algo = True
175 if debug: 191 if debug:
176 print(f"Iteration {maxiter} reached") 192 print(f"Iteration {maxiter} reached")
193 return {
194 "loss": loss,
195 "C": self.C,
196 "K": self.K,
197 "L": self.L