Commit a9912f135f481a97c6113e5723b33d69de6a919d
1 parent
05afc43e54
Exists in
master
We can now precise the modeltype in parameter of the kmeans learning command. Th…
…is is more permissive to evolution.
Showing 1 changed file with 14 additions and 10 deletions Side-by-side Diff
volia/clustering.py
| ... | ... | @@ -7,6 +7,7 @@ |
| 7 | 7 | import pickle |
| 8 | 8 | from clustering_modules.kmeans import kmeans |
| 9 | 9 | from clustering_modules.kmeans_mahalanobis import kmeansMahalanobis |
| 10 | +from clustering_modules.kmeans_multidistance import kmeansMultidistance | |
| 10 | 11 | |
| 11 | 12 | from sklearn.preprocessing import LabelEncoder |
| 12 | 13 | from sklearn.metrics import v_measure_score, homogeneity_score, completeness_score |
| 13 | 14 | |
| ... | ... | @@ -18,9 +19,13 @@ |
| 18 | 19 | CLUSTERING_METHODS = { |
| 19 | 20 | "k-means": kmeans(), |
| 20 | 21 | "k-means-mahalanobis": kmeansMahalanobis(), |
| 21 | - "k-means-mahalanobis-constrained": kmeansMahalanobis(constrained=True) | |
| 22 | + "k-means-mahalanobis-constrained": kmeansMahalanobis(constrained=True), | |
| 23 | + "k-means-basic-mahalanobis": kmeansMultidistance(distance="mahalanobis"), | |
| 24 | + "k-means-basic-cosine": kmeansMultidistance(distance="cosine") | |
| 22 | 25 | } |
| 23 | 26 | |
| 27 | +KMEANS_METHODS = [key for key in CLUSTERING_METHODS if key.startswith("k-means")] | |
| 28 | + | |
| 24 | 29 | EVALUATION_METHODS = { |
| 25 | 30 | "entropy": core.measures.entropy_score, |
| 26 | 31 | "purity": core.measures.purity_score, |
| ... | ... | @@ -77,8 +82,8 @@ |
| 77 | 82 | ninit: int, |
| 78 | 83 | output: str, |
| 79 | 84 | tol: float, |
| 80 | - debug: bool = False, | |
| 81 | - mahalanobis: str = False): | |
| 85 | + modeltype: str, | |
| 86 | + debug: bool = False): | |
| 82 | 87 | """ |
| 83 | 88 | |
| 84 | 89 | @param features: output features |
| ... | ... | @@ -94,11 +99,7 @@ |
| 94 | 99 | def fit_model(k: int, output_file): |
| 95 | 100 | if debug: |
| 96 | 101 | print(f"Computing clustering with k={k}") |
| 97 | - model = CLUSTERING_METHODS["k-means"] | |
| 98 | - if mahalanobis: | |
| 99 | - if debug: | |
| 100 | - print("Mahalanobis activated") | |
| 101 | - model = CLUSTERING_METHODS["k-means-mahalanobis-constrained"] | |
| 102 | + model = CLUSTERING_METHODS[modeltype] | |
| 102 | 103 | model.fit(X, k, tol, ninit, maxiter, debug) |
| 103 | 104 | model.save(output_file) |
| 104 | 105 | json_content["models"].append({ |
| ... | ... | @@ -193,7 +194,10 @@ |
| 193 | 194 | parser_kmeans.add_argument("--output", |
| 194 | 195 | default=".kmeans", |
| 195 | 196 | help="output file if only k. Output directory if multiple kmax specified.") |
| 196 | - parser_kmeans.add_argument("--mahalanobis", action="store_true") | |
| 197 | + parser_kmeans.add_argument("--modeltype", | |
| 198 | + required=True, | |
| 199 | + choices=KMEANS_METHODS, | |
| 200 | + help="type of model for learning") | |
| 197 | 201 | parser_kmeans.set_defaults(which="kmeans") |
| 198 | 202 | |
| 199 | 203 | # measure |
| ... | ... | @@ -223,7 +227,7 @@ |
| 223 | 227 | parser_disequilibrium.add_argument("--lstrain", required=True, type=str, help="...") |
| 224 | 228 | parser_disequilibrium.add_argument("--lstest", required=True, type=str, help="...") |
| 225 | 229 | parser_disequilibrium.add_argument("--model", required=True, type=str, help="...") |
| 226 | - parser_disequilibrium.add_argument("--model-type", | |
| 230 | + parser_disequilibrium.add_argument("--modeltype", | |
| 227 | 231 | required=True, |
| 228 | 232 | choices=["kmeans", "2", "3"], |
| 229 | 233 | help="...") |