Commit 0c12dd8941fe2df65582721840c61d17a08a9c77
1 parent
b6841495fc
Exists in
master
usable script
Showing 1 changed file with 11 additions and 3 deletions Side-by-side Diff
bin/cluster_kmeans.py
| ... | ... | @@ -7,6 +7,7 @@ |
| 7 | 7 | import numpy as np |
| 8 | 8 | from sklearn.cluster import KMeans |
| 9 | 9 | from os import path |
| 10 | +from os import mkdir | |
| 10 | 11 | |
| 11 | 12 | import pickle |
| 12 | 13 | from data import read_file, index_by_id |
| ... | ... | @@ -18,6 +19,8 @@ |
| 18 | 19 | parser.add_argument("outdir", type=str, help="Output directory for k-means models") |
| 19 | 20 | parser.add_argument("--kmin", type=int, help="minimum k", default=2) |
| 20 | 21 | parser.add_argument("--kmax", type=int, help="maximum k", default=100) |
| 22 | +parser.add_argument("--allindir", type=bool, default=False, | |
| 23 | + help="all in same dir or separed ?") | |
| 21 | 24 | |
| 22 | 25 | args = vars(parser.parse_args()) |
| 23 | 26 | FEATURES = args["features"] |
| ... | ... | @@ -25,6 +28,7 @@ |
| 25 | 28 | OUTDIR = args["outdir"] |
| 26 | 29 | KMIN = args["kmin"] |
| 27 | 30 | KMAX = args["kmax"] |
| 31 | +ALLINDIR = args["allindir"] | |
| 28 | 32 | |
| 29 | 33 | # -- READE FILES -- |
| 30 | 34 | features = read_file(FEATURES) |
| 31 | 35 | |
| 32 | 36 | |
| ... | ... | @@ -32,11 +36,16 @@ |
| 32 | 36 | |
| 33 | 37 | lst = read_file(LST) |
| 34 | 38 | |
| 39 | +subdir = "" | |
| 35 | 40 | # -- TRANSFORM INTO NUMPY -- |
| 36 | 41 | X = np.asarray([feat_ind[x[0][0]][x[0][3]][1] for x in lst]) |
| 37 | - | |
| 38 | 42 | Ks = range(KMIN, KMAX+1) |
| 39 | 43 | for k in Ks: |
| 40 | 44 | kmeans = KMeans(n_clusters=k, n_init=10, random_state=0).fit(X) |
| 41 | - pickle.dump(kmeans, open(path.join(OUTDIR, "clustering_" + str(k) + ".pkl"), "wb")) | |
| 45 | + if ALLINDIR is False: | |
| 46 | + subdir = str(k) | |
| 47 | + dirname=path.join(OUTDIR, subdir) | |
| 48 | + if not path.exists(dirname): | |
| 49 | + mkdir(dirname) | |
| 50 | + pickle.dump(kmeans, open(path.join(OUTDIR, subdir, "clustering_" + str(k) + ".pkl"), "wb")) |