Add --onlymeasures flag that allow the user to run the script without training n…

…ew clustering models. It only load the already trained models and calculates the measures. Usefull when you add new measures and you don't want to train clustering models again.

Add --onlymeasures flag that allow the user to run the script without training n…
…ew clustering models. It only load the already trained models and calculates the measures. Usefull when you add new measures and you don't want to train clustering models again.
Mathias
1 parent 6bc3b63707
Showing 1 changed file with 32 additions and 22 deletions Side-by-side Diff
scripts/evaluations/clustering.py
@@ -30,28 +30,35 @@
  
 def train_clustering(label_encoder, feats, classes, outdir):
     num_classes = len(label_encoder.classes_)
+    estimator = None
+    kmeans_filepath = os.path.join(outdir, f"{args.prefix}kmeans.pkl") 
+    if args.onlymeasures:
+        print(f"Loading model: {kmeans_filepath}")
+        with open(kmeans_filepath, "rb") as f:
+            estimator = pickle.load(f)
+    else:
+        # Compute KMEANS clustering on data
+        print("Saving parameters")
+        kmeans_parameters = {
+            "n_clusters": num_classes,
+            "n_init": 100,
+            "tol": 10-6,
+            "algorithm": "elkan"
+        }
+        with open(os.path.join(outdir, f"{args.prefix}kmeans_parameters.json"), "w") as f:
+            json.dump(kmeans_parameters, f)
  
-    # Compute KMEANS clustering on data
-    kmeans_parameters = {
-        "n_clusters": num_classes,
-        "n_init": 100,
-        "tol": 10-6,
-        "algorithm": "elkan"
-    }
-    with open(os.path.join(outdir, f"{args.prefix}kmeans_parameters.json"), "w") as f:
-        json.dump(kmeans_parameters, f)
+        # Fit the model and Save parameters
+        print(f"Fit the model: {kmeans_filepath}")
+        estimator = KMeans(
+            **kmeans_parameters
+        )
+        estimator.fit(feats)
+        print(f"Kmeans: processed {estimator.n_iter_} iterations - intertia={estimator.inertia_}")
  
-    # Save parameters
+        with open(kmeans_filepath, "wb") as f:
+            pickle.dump(estimator, f)
  
-    estimator = KMeans(
-        **kmeans_parameters
-    )
-    estimator.fit(feats)
-    print(f"Kmeans: processed {estimator.n_iter_} iterations - intertia={estimator.inertia_}")
-
-    with open(os.path.join(outdir, f"{args.prefix}kmeans.pkl"), "wb") as f:
-        pickle.dump(estimator, f)
-    
     # contains distance to each cluster for each sample
     dist_space = estimator.transform(feats)
     predictions = np.argmin(dist_space, axis=1)
@@ -179,7 +186,10 @@
     parser.add_argument("--nmodels",
                         type=int,
                         default=1,
-                        help="specifies the number of models to train")   
+                        help="specifies the number of models to train")
+    parser.add_argument("--onlymeasures",
+                        action='store_true',
+                        help="Don't compute the clustering, compute only the measures")
     args = parser.parse_args()
  
     assert args.outdir
...	...	@@ -30,28 +30,35 @@
30	30
31	31	def train_clustering(label_encoder, feats, classes, outdir):
32	32	num_classes = len(label_encoder.classes_)
	33	+ estimator = None
	34	+ kmeans_filepath = os.path.join(outdir, f"{args.prefix}kmeans.pkl")
	35	+ if args.onlymeasures:
	36	+ print(f"Loading model: {kmeans_filepath}")
	37	+ with open(kmeans_filepath, "rb") as f:
	38	+ estimator = pickle.load(f)
	39	+ else:
	40	+ # Compute KMEANS clustering on data
	41	+ print("Saving parameters")
	42	+ kmeans_parameters = {
	43	+ "n_clusters": num_classes,
	44	+ "n_init": 100,
	45	+ "tol": 10-6,
	46	+ "algorithm": "elkan"
	47	+ }
	48	+ with open(os.path.join(outdir, f"{args.prefix}kmeans_parameters.json"), "w") as f:
	49	+ json.dump(kmeans_parameters, f)
33	50
34		- # Compute KMEANS clustering on data
35		- kmeans_parameters = {
36		- "n_clusters": num_classes,
37		- "n_init": 100,
38		- "tol": 10-6,
39		- "algorithm": "elkan"
40		- }
41		- with open(os.path.join(outdir, f"{args.prefix}kmeans_parameters.json"), "w") as f:
42		- json.dump(kmeans_parameters, f)
	51	+ # Fit the model and Save parameters
	52	+ print(f"Fit the model: {kmeans_filepath}")
	53	+ estimator = KMeans(
	54	+ **kmeans_parameters
	55	+ )
	56	+ estimator.fit(feats)
	57	+ print(f"Kmeans: processed {estimator.n_iter_} iterations - intertia={estimator.inertia_}")
43	58
44		- # Save parameters
	59	+ with open(kmeans_filepath, "wb") as f:
	60	+ pickle.dump(estimator, f)
45	61
46		- estimator = KMeans(
47		- **kmeans_parameters
48		- )
49		- estimator.fit(feats)
50		- print(f"Kmeans: processed {estimator.n_iter_} iterations - intertia={estimator.inertia_}")
51		-
52		- with open(os.path.join(outdir, f"{args.prefix}kmeans.pkl"), "wb") as f:
53		- pickle.dump(estimator, f)
54		-
55	62	# contains distance to each cluster for each sample
56	63	dist_space = estimator.transform(feats)
57	64	predictions = np.argmin(dist_space, axis=1)
...	...	@@ -179,7 +186,10 @@
179	186	parser.add_argument("--nmodels",
180	187	type=int,
181	188	default=1,
182		- help="specifies the number of models to train")
	189	+ help="specifies the number of models to train")
	190	+ parser.add_argument("--onlymeasures",
	191	+ action='store_true',
	192	+ help="Don't compute the clustering, compute only the measures")
183	193	args = parser.parse_args()
184	194
185	195	assert args.outdir