Commit 4aa3a0ea73de5edd298638d217cc1ff337be95b1

Authored by Mathias
1 parent 6bc3b63707
Exists in master

Add --onlymeasures flag that allow the user to run the script without training n…

…ew clustering models. It only load the already trained models and calculates the measures. Usefull when you add new measures and you don't want to train clustering models again.

Showing 1 changed file with 32 additions and 22 deletions Side-by-side Diff

scripts/evaluations/clustering.py
... ... @@ -30,28 +30,35 @@
30 30  
31 31 def train_clustering(label_encoder, feats, classes, outdir):
32 32 num_classes = len(label_encoder.classes_)
  33 + estimator = None
  34 + kmeans_filepath = os.path.join(outdir, f"{args.prefix}kmeans.pkl")
  35 + if args.onlymeasures:
  36 + print(f"Loading model: {kmeans_filepath}")
  37 + with open(kmeans_filepath, "rb") as f:
  38 + estimator = pickle.load(f)
  39 + else:
  40 + # Compute KMEANS clustering on data
  41 + print("Saving parameters")
  42 + kmeans_parameters = {
  43 + "n_clusters": num_classes,
  44 + "n_init": 100,
  45 + "tol": 10-6,
  46 + "algorithm": "elkan"
  47 + }
  48 + with open(os.path.join(outdir, f"{args.prefix}kmeans_parameters.json"), "w") as f:
  49 + json.dump(kmeans_parameters, f)
33 50  
34   - # Compute KMEANS clustering on data
35   - kmeans_parameters = {
36   - "n_clusters": num_classes,
37   - "n_init": 100,
38   - "tol": 10-6,
39   - "algorithm": "elkan"
40   - }
41   - with open(os.path.join(outdir, f"{args.prefix}kmeans_parameters.json"), "w") as f:
42   - json.dump(kmeans_parameters, f)
  51 + # Fit the model and Save parameters
  52 + print(f"Fit the model: {kmeans_filepath}")
  53 + estimator = KMeans(
  54 + **kmeans_parameters
  55 + )
  56 + estimator.fit(feats)
  57 + print(f"Kmeans: processed {estimator.n_iter_} iterations - intertia={estimator.inertia_}")
43 58  
44   - # Save parameters
  59 + with open(kmeans_filepath, "wb") as f:
  60 + pickle.dump(estimator, f)
45 61  
46   - estimator = KMeans(
47   - **kmeans_parameters
48   - )
49   - estimator.fit(feats)
50   - print(f"Kmeans: processed {estimator.n_iter_} iterations - intertia={estimator.inertia_}")
51   -
52   - with open(os.path.join(outdir, f"{args.prefix}kmeans.pkl"), "wb") as f:
53   - pickle.dump(estimator, f)
54   -
55 62 # contains distance to each cluster for each sample
56 63 dist_space = estimator.transform(feats)
57 64 predictions = np.argmin(dist_space, axis=1)
... ... @@ -179,7 +186,10 @@
179 186 parser.add_argument("--nmodels",
180 187 type=int,
181 188 default=1,
182   - help="specifies the number of models to train")
  189 + help="specifies the number of models to train")
  190 + parser.add_argument("--onlymeasures",
  191 + action='store_true',
  192 + help="Don't compute the clustering, compute only the measures")
183 193 args = parser.parse_args()
184 194  
185 195 assert args.outdir