Commit 0c12dd8941fe2df65582721840c61d17a08a9c77

Authored by Mathias Quillot
1 parent b6841495fc
Exists in master

usable script

Showing 1 changed file with 11 additions and 3 deletions Side-by-side Diff

bin/cluster_kmeans.py
... ... @@ -7,6 +7,7 @@
7 7 import numpy as np
8 8 from sklearn.cluster import KMeans
9 9 from os import path
  10 +from os import mkdir
10 11  
11 12 import pickle
12 13 from data import read_file, index_by_id
... ... @@ -18,6 +19,8 @@
18 19 parser.add_argument("outdir", type=str, help="Output directory for k-means models")
19 20 parser.add_argument("--kmin", type=int, help="minimum k", default=2)
20 21 parser.add_argument("--kmax", type=int, help="maximum k", default=100)
  22 +parser.add_argument("--allindir", type=bool, default=False,
  23 + help="all in same dir or separed ?")
21 24  
22 25 args = vars(parser.parse_args())
23 26 FEATURES = args["features"]
... ... @@ -25,6 +28,7 @@
25 28 OUTDIR = args["outdir"]
26 29 KMIN = args["kmin"]
27 30 KMAX = args["kmax"]
  31 +ALLINDIR = args["allindir"]
28 32  
29 33 # -- READE FILES --
30 34 features = read_file(FEATURES)
31 35  
32 36  
... ... @@ -32,11 +36,16 @@
32 36  
33 37 lst = read_file(LST)
34 38  
  39 +subdir = ""
35 40 # -- TRANSFORM INTO NUMPY --
36 41 X = np.asarray([feat_ind[x[0][0]][x[0][3]][1] for x in lst])
37   -
38 42 Ks = range(KMIN, KMAX+1)
39 43 for k in Ks:
40 44 kmeans = KMeans(n_clusters=k, n_init=10, random_state=0).fit(X)
41   - pickle.dump(kmeans, open(path.join(OUTDIR, "clustering_" + str(k) + ".pkl"), "wb"))
  45 + if ALLINDIR is False:
  46 + subdir = str(k)
  47 + dirname=path.join(OUTDIR, subdir)
  48 + if not path.exists(dirname):
  49 + mkdir(dirname)
  50 + pickle.dump(kmeans, open(path.join(OUTDIR, subdir, "clustering_" + str(k) + ".pkl"), "wb"))