Commit 4aa3a0ea73de5edd298638d217cc1ff337be95b1
1 parent
6bc3b63707
Exists in
master
Add --onlymeasures flag that allow the user to run the script without training n…
…ew clustering models. It only load the already trained models and calculates the measures. Usefull when you add new measures and you don't want to train clustering models again.
Showing 1 changed file with 32 additions and 22 deletions Side-by-side Diff
scripts/evaluations/clustering.py
... | ... | @@ -30,28 +30,35 @@ |
30 | 30 | |
31 | 31 | def train_clustering(label_encoder, feats, classes, outdir): |
32 | 32 | num_classes = len(label_encoder.classes_) |
33 | + estimator = None | |
34 | + kmeans_filepath = os.path.join(outdir, f"{args.prefix}kmeans.pkl") | |
35 | + if args.onlymeasures: | |
36 | + print(f"Loading model: {kmeans_filepath}") | |
37 | + with open(kmeans_filepath, "rb") as f: | |
38 | + estimator = pickle.load(f) | |
39 | + else: | |
40 | + # Compute KMEANS clustering on data | |
41 | + print("Saving parameters") | |
42 | + kmeans_parameters = { | |
43 | + "n_clusters": num_classes, | |
44 | + "n_init": 100, | |
45 | + "tol": 10-6, | |
46 | + "algorithm": "elkan" | |
47 | + } | |
48 | + with open(os.path.join(outdir, f"{args.prefix}kmeans_parameters.json"), "w") as f: | |
49 | + json.dump(kmeans_parameters, f) | |
33 | 50 | |
34 | - # Compute KMEANS clustering on data | |
35 | - kmeans_parameters = { | |
36 | - "n_clusters": num_classes, | |
37 | - "n_init": 100, | |
38 | - "tol": 10-6, | |
39 | - "algorithm": "elkan" | |
40 | - } | |
41 | - with open(os.path.join(outdir, f"{args.prefix}kmeans_parameters.json"), "w") as f: | |
42 | - json.dump(kmeans_parameters, f) | |
51 | + # Fit the model and Save parameters | |
52 | + print(f"Fit the model: {kmeans_filepath}") | |
53 | + estimator = KMeans( | |
54 | + **kmeans_parameters | |
55 | + ) | |
56 | + estimator.fit(feats) | |
57 | + print(f"Kmeans: processed {estimator.n_iter_} iterations - intertia={estimator.inertia_}") | |
43 | 58 | |
44 | - # Save parameters | |
59 | + with open(kmeans_filepath, "wb") as f: | |
60 | + pickle.dump(estimator, f) | |
45 | 61 | |
46 | - estimator = KMeans( | |
47 | - **kmeans_parameters | |
48 | - ) | |
49 | - estimator.fit(feats) | |
50 | - print(f"Kmeans: processed {estimator.n_iter_} iterations - intertia={estimator.inertia_}") | |
51 | - | |
52 | - with open(os.path.join(outdir, f"{args.prefix}kmeans.pkl"), "wb") as f: | |
53 | - pickle.dump(estimator, f) | |
54 | - | |
55 | 62 | # contains distance to each cluster for each sample |
56 | 63 | dist_space = estimator.transform(feats) |
57 | 64 | predictions = np.argmin(dist_space, axis=1) |
... | ... | @@ -179,7 +186,10 @@ |
179 | 186 | parser.add_argument("--nmodels", |
180 | 187 | type=int, |
181 | 188 | default=1, |
182 | - help="specifies the number of models to train") | |
189 | + help="specifies the number of models to train") | |
190 | + parser.add_argument("--onlymeasures", | |
191 | + action='store_true', | |
192 | + help="Don't compute the clustering, compute only the measures") | |
183 | 193 | args = parser.parse_args() |
184 | 194 | |
185 | 195 | assert args.outdir |