Commit 0c12dd8941fe2df65582721840c61d17a08a9c77
1 parent
b6841495fc
Exists in
master
usable script
Showing 1 changed file with 11 additions and 3 deletions Side-by-side Diff
bin/cluster_kmeans.py
... | ... | @@ -7,6 +7,7 @@ |
7 | 7 | import numpy as np |
8 | 8 | from sklearn.cluster import KMeans |
9 | 9 | from os import path |
10 | +from os import mkdir | |
10 | 11 | |
11 | 12 | import pickle |
12 | 13 | from data import read_file, index_by_id |
... | ... | @@ -18,6 +19,8 @@ |
18 | 19 | parser.add_argument("outdir", type=str, help="Output directory for k-means models") |
19 | 20 | parser.add_argument("--kmin", type=int, help="minimum k", default=2) |
20 | 21 | parser.add_argument("--kmax", type=int, help="maximum k", default=100) |
22 | +parser.add_argument("--allindir", type=bool, default=False, | |
23 | + help="all in same dir or separed ?") | |
21 | 24 | |
22 | 25 | args = vars(parser.parse_args()) |
23 | 26 | FEATURES = args["features"] |
... | ... | @@ -25,6 +28,7 @@ |
25 | 28 | OUTDIR = args["outdir"] |
26 | 29 | KMIN = args["kmin"] |
27 | 30 | KMAX = args["kmax"] |
31 | +ALLINDIR = args["allindir"] | |
28 | 32 | |
29 | 33 | # -- READE FILES -- |
30 | 34 | features = read_file(FEATURES) |
31 | 35 | |
32 | 36 | |
... | ... | @@ -32,11 +36,16 @@ |
32 | 36 | |
33 | 37 | lst = read_file(LST) |
34 | 38 | |
39 | +subdir = "" | |
35 | 40 | # -- TRANSFORM INTO NUMPY -- |
36 | 41 | X = np.asarray([feat_ind[x[0][0]][x[0][3]][1] for x in lst]) |
37 | - | |
38 | 42 | Ks = range(KMIN, KMAX+1) |
39 | 43 | for k in Ks: |
40 | 44 | kmeans = KMeans(n_clusters=k, n_init=10, random_state=0).fit(X) |
41 | - pickle.dump(kmeans, open(path.join(OUTDIR, "clustering_" + str(k) + ".pkl"), "wb")) | |
45 | + if ALLINDIR is False: | |
46 | + subdir = str(k) | |
47 | + dirname=path.join(OUTDIR, subdir) | |
48 | + if not path.exists(dirname): | |
49 | + mkdir(dirname) | |
50 | + pickle.dump(kmeans, open(path.join(OUTDIR, subdir, "clustering_" + str(k) + ".pkl"), "wb")) |