Quillot Mathias / volia

1

import argparse

1

import argparse

2

from os import path, mkdir

2

from os import path, mkdir

3

from utils import SubCommandRunner

3

from utils import SubCommandRunner

4

from core.data import read_features, read_lst, read_labels, write_line

4

from core.data import read_features, read_lst, read_labels, write_line

5

import numpy as np

5

import numpy as np

6

from sklearn.cluster import KMeans

6

from sklearn.cluster import KMeans

7

import pickle

7

import pickle

8

from clustering_modules.kmeans import kmeans

8

from clustering_modules.kmeans import kmeans

9

from clustering_modules.kmeans_mahalanobis import kmeansMahalanobis

9

from clustering_modules.kmeans_mahalanobis import kmeansMahalanobis

10

from clustering_modules.kmeans_multidistance import kmeansMultidistance

10

11

from sklearn.preprocessing import LabelEncoder

12

from sklearn.preprocessing import LabelEncoder

12

from sklearn.metrics import v_measure_score, homogeneity_score, completeness_score

13

from sklearn.metrics import v_measure_score, homogeneity_score, completeness_score

13

14

import core.measures

15

import core.measures

15

import json

16

import json

16

17

18

CLUSTERING_METHODS = {

19

CLUSTERING_METHODS = {

19

"k-means": kmeans(),

20

"k-means": kmeans(),

20

"k-means-mahalanobis": kmeansMahalanobis(),

21

"k-means-mahalanobis": kmeansMahalanobis(),

21

"k-means-mahalanobis-constrained": kmeansMahalanobis(constrained=True)

22

"k-means-mahalanobis-constrained": kmeansMahalanobis(constrained=True),

23

"k-means-basic-mahalanobis": kmeansMultidistance(distance="mahalanobis"),

24

"k-means-basic-cosine": kmeansMultidistance(distance="cosine")

22

}

25

}

23

26

27

KMEANS_METHODS = [key for key in CLUSTERING_METHODS if key.startswith("k-means")]

28

24

EVALUATION_METHODS = {

29

EVALUATION_METHODS = {

25

"entropy": core.measures.entropy_score,

30

"entropy": core.measures.entropy_score,

26

"purity": core.measures.purity_score,

31

"purity": core.measures.purity_score,

27

"v-measure": v_measure_score,

32

"v-measure": v_measure_score,

28

"homogeneity": homogeneity_score,

33

"homogeneity": homogeneity_score,

29

"completeness": completeness_score,

34

"completeness": completeness_score,

30

}

35

}

31

36

32

37

33

def disequilibrium_run():

38

def disequilibrium_run():

34

pass

39

pass

35

40

36

41

37

def measure_run(measure: str, features: str, lst: str, truelabels: str, model: str, modeltype: str):

42

def measure_run(measure: str, features: str, lst: str, truelabels: str, model: str, modeltype: str):

38

"""

43

"""

39

44

40

@param measure:

45

@param measure:

41

@param features:

46

@param features:

42

@param lst:

47

@param lst:

43

@param truelabels:

48

@param truelabels:

44

@param model:

49

@param model:

45

@param modeltype:

50

@param modeltype:

46

@return:

51

@return:

47

"""

52

"""

48

module = CLUSTERING_METHODS[modeltype]

53

module = CLUSTERING_METHODS[modeltype]

49

module.load(model)

54

module.load(model)

50

55

51

eval = {}

56

eval = {}

52

for ms in measure:

57

for ms in measure:

53

evaluation = EVALUATION_METHODS[ms]

58

evaluation = EVALUATION_METHODS[ms]

54

feats_dict = read_features(features)

59

feats_dict = read_features(features)

55

labels_dict = read_labels(truelabels)

60

labels_dict = read_labels(truelabels)

56

lst_dict = read_lst(lst)

61

lst_dict = read_lst(lst)

57

lst_keys = [key for key in lst_dict]

62

lst_keys = [key for key in lst_dict]

58

feats = np.asarray([feats_dict[key] for key in lst_keys])

63

feats = np.asarray([feats_dict[key] for key in lst_keys])

59

Y_pred = module.predict(feats)

64

Y_pred = module.predict(feats)

60

Y_truth = [labels_dict[key][0] for key in lst_keys]

65

Y_truth = [labels_dict[key][0] for key in lst_keys]

61

66

62

le = LabelEncoder()

67

le = LabelEncoder()

63

le.fit(Y_truth)

68

le.fit(Y_truth)

64

Y_truth = le.transform(Y_truth)

69

Y_truth = le.transform(Y_truth)

65

70

66

eval[ms] = evaluation(Y_truth, Y_pred)

71

eval[ms] = evaluation(Y_truth, Y_pred)

67

72

68

print(json.dumps(eval))

73

print(json.dumps(eval))

69

74

70

75

71

def kmeans_run(features: str,

76

def kmeans_run(features: str,

72

lst: str,

77

lst: str,

73

k:int,

78

k:int,

74

kmax: int,

79

kmax: int,

75

klist,

80

klist,

76

maxiter: int,

81

maxiter: int,

77

ninit: int,

82

ninit: int,

78

output: str,

83

output: str,

79

tol: float,

84

tol: float,

80

debug: bool = False,

85

modeltype: str,

81

mahalanobis: str = False):

86

debug: bool = False):

82

"""

87

"""

83

88

84

@param features: output features

89

@param features: output features

85

@param lst: list file

90

@param lst: list file

86

@param k: k (kmin if kmax specified)

91

@param k: k (kmin if kmax specified)

87

@param kmax: maximum k to compute

92

@param kmax: maximum k to compute

88

@param klist: list of k values to compute, ignore k value

93

@param klist: list of k values to compute, ignore k value

89

@param output: output file if kmax not specified, else, output directory

94

@param output: output file if kmax not specified, else, output directory

90

@param mahalanobis: distance option of k-means.

95

@param mahalanobis: distance option of k-means.

91

"""

96

"""

92

json_content = locals().copy()

97

json_content = locals().copy()

93

98

94

def fit_model(k: int, output_file):

99

def fit_model(k: int, output_file):

95

if debug:

100

if debug:

96

print(f"Computing clustering with k={k}")

101

print(f"Computing clustering with k={k}")

97

model = CLUSTERING_METHODS["k-means"]

102

model = CLUSTERING_METHODS[modeltype]

98

if mahalanobis:

99

if debug:

100

print("Mahalanobis activated")

101

model = CLUSTERING_METHODS["k-means-mahalanobis-constrained"]

102

model.fit(X, k, tol, ninit, maxiter, debug)

103

model.fit(X, k, tol, ninit, maxiter, debug)

103

model.save(output_file)

104

model.save(output_file)

104

json_content["models"].append({

105

json_content["models"].append({

105

"model_file": output_file,

106

"model_file": output_file,

106

"k": k,

107

"k": k,

107

})

108

})

108

109

json_content["models"] = []

110

json_content["models"] = []

110

111

# -- READ FILES --

112

# -- READ FILES --

112

features_dict = read_features(features)

113

features_dict = read_features(features)

113

lst_dict = read_lst(lst)

114

lst_dict = read_lst(lst)

114

X = np.asarray([features_dict[x] for x in lst_dict])

115

X = np.asarray([features_dict[x] for x in lst_dict])

115

116

# Exception cases

117

# Exception cases

117

if kmax is None and klist is None and path.isdir(output):

118

if kmax is None and klist is None and path.isdir(output):

118

raise Exception("The \"output\" is an existing directory while the system is waiting the path of a file.")

119

raise Exception("The \"output\" is an existing directory while the system is waiting the path of a file.")

119

120

if (kmax is not None or klist is not None) and path.isfile(output):

121

if (kmax is not None or klist is not None) and path.isfile(output):

121

raise Exception("The \"output\" is an existing file while the system is waiting the path of a directory.")

122

raise Exception("The \"output\" is an existing file while the system is waiting the path of a directory.")

122

123

# Mono value case

124

# Mono value case

124

if kmax is None and klist is None:

125

if kmax is None and klist is None:

125

fit_model(k, output)

126

fit_model(k, output)

126

127

# Multi values case with kmax

128

# Multi values case with kmax

128

if kmax is not None:

129

if kmax is not None:

129

if not path.isdir(output):

130

if not path.isdir(output):

130

mkdir(output)

131

mkdir(output)

131

Ks = range(k, kmax + 1)

132

Ks = range(k, kmax + 1)

132

for i in Ks:

133

for i in Ks:

133

fit_model(i, path.join(output, "clustering_" + str(i) + ".pkl"))

134

fit_model(i, path.join(output, "clustering_" + str(i) + ".pkl"))

134

135

# Second multi values case with klist

136

# Second multi values case with klist

136

if klist is not None:

137

if klist is not None:

137

if not path.isdir(output):

138

if not path.isdir(output):

138

mkdir(output)

139

mkdir(output)

139

for k in klist:

140

for k in klist:

140

k = int(k)

141

k = int(k)

141

fit_model(k, path.join(output, "clustering_" + str(k) + ".pkl"))

142

fit_model(k, path.join(output, "clustering_" + str(k) + ".pkl"))

142

143

print(json.dumps(json_content))

144

print(json.dumps(json_content))

144

145

146

def extract_run(features, lst, model, modeltype, outfile):

147

def extract_run(features, lst, model, modeltype, outfile):

147

feats_dict = read_features(features)

148

feats_dict = read_features(features)

148

lst_dict = read_lst(lst)

149

lst_dict = read_lst(lst)

149

lst_keys = [key for key in lst_dict]

150

lst_keys = [key for key in lst_dict]

150

feats = np.asarray([feats_dict[key] for key in lst_keys])

151

feats = np.asarray([feats_dict[key] for key in lst_keys])

151

152

module = CLUSTERING_METHODS[modeltype]

153

module = CLUSTERING_METHODS[modeltype]

153

module.load(model)

154

module.load(model)

154

Y_pred = module.predict(feats)

155

Y_pred = module.predict(feats)

155

with open(outfile, "w") as f:

156

with open(outfile, "w") as f:

156

for i, key in enumerate(lst_keys):

157

for i, key in enumerate(lst_keys):

157

write_line(key, Y_pred[i], f)

158

write_line(key, Y_pred[i], f)

158

json_output = {

159

json_output = {

159

"outfile": outfile

160

"outfile": outfile

160

}

161

}

161

print(json.dumps(json_output))

162

print(json.dumps(json_output))

162

163

164

if __name__ == "__main__":

165

if __name__ == "__main__":

165

# Main parser

166

# Main parser

166

parser = argparse.ArgumentParser(description="Clustering methods to apply")

167

parser = argparse.ArgumentParser(description="Clustering methods to apply")

167

subparsers = parser.add_subparsers(title="action")

168

subparsers = parser.add_subparsers(title="action")

168

169

# kmeans

170

# kmeans

170

parser_kmeans = subparsers.add_parser(

171

parser_kmeans = subparsers.add_parser(

171

"kmeans", help="Compute clustering using k-means algorithm")

172

"kmeans", help="Compute clustering using k-means algorithm")

172

173

parser_kmeans.add_argument("--features", required=True, type=str, help="Features file (works with list)")

174

parser_kmeans.add_argument("--features", required=True, type=str, help="Features file (works with list)")

174

parser_kmeans.add_argument("--lst", required=True, type=str, help="List file (.lst)")

175

parser_kmeans.add_argument("--lst", required=True, type=str, help="List file (.lst)")

175

parser_kmeans.add_argument("-k", default=2, type=int,

176

parser_kmeans.add_argument("-k", default=2, type=int,

176

help="number of clusters to compute. It is kmin if kmax is specified.")

177

help="number of clusters to compute. It is kmin if kmax is specified.")

177

parser_kmeans.add_argument("--kmax", default=None, type=int, help="if specified, k is kmin.")

178

parser_kmeans.add_argument("--kmax", default=None, type=int, help="if specified, k is kmin.")

178

parser_kmeans.add_argument("--klist", nargs="+",

179

parser_kmeans.add_argument("--klist", nargs="+",

179

help="List of k values to test. As kmax, activate the multi values mod.")

180

help="List of k values to test. As kmax, activate the multi values mod.")

180

parser_kmeans.add_argument("--maxiter",

181

parser_kmeans.add_argument("--maxiter",

181

type=int,

182

type=int,

182

default=300,

183

default=300,

183

help="Max number of iteration before stoping if not converging")

184

help="Max number of iteration before stoping if not converging")

184

parser_kmeans.add_argument("--ninit",

185

parser_kmeans.add_argument("--ninit",

185

type=int,

186

type=int,

186

default=10,

187

default=10,

187

help="Number of time the k-means algorithm will be run with different centroid seeds.")

188

help="Number of time the k-means algorithm will be run with different centroid seeds.")

188

parser_kmeans.add_argument("--tol",

189

parser_kmeans.add_argument("--tol",

189

type=float,

190

type=float,

190

default=0.0001,

191

default=0.0001,

191

help="Tolerance to finish of distance between centroids and their updates.")

192

help="Tolerance to finish of distance between centroids and their updates.")

192

parser_kmeans.add_argument("--debug", action="store_true")

193

parser_kmeans.add_argument("--debug", action="store_true")

193

parser_kmeans.add_argument("--output",

194

parser_kmeans.add_argument("--output",

194

default=".kmeans",

195

default=".kmeans",

195

help="output file if only k. Output directory if multiple kmax specified.")

196

help="output file if only k. Output directory if multiple kmax specified.")

196

parser_kmeans.add_argument("--mahalanobis", action="store_true")

197

parser_kmeans.add_argument("--modeltype",

198

required=True,

199

choices=KMEANS_METHODS,

200

help="type of model for learning")

197

parser_kmeans.set_defaults(which="kmeans")

201

parser_kmeans.set_defaults(which="kmeans")

198

202

199

# measure

203

# measure

200

parser_measure = subparsers.add_parser(

204

parser_measure = subparsers.add_parser(

201

"measure", help="compute the entropy")

205

"measure", help="compute the entropy")

202

206

203

parser_measure.add_argument("--measure",

207

parser_measure.add_argument("--measure",

204

required=True,

208

required=True,

205

nargs="+",

209

nargs="+",

206

choices=[key for key in EVALUATION_METHODS],

210

choices=[key for key in EVALUATION_METHODS],

207

help="...")

211

help="...")

208

parser_measure.add_argument("--features", required=True, type=str, help="...")

212

parser_measure.add_argument("--features", required=True, type=str, help="...")

209

parser_measure.add_argument("--lst", required=True, type=str, help="...")

213

parser_measure.add_argument("--lst", required=True, type=str, help="...")

210

parser_measure.add_argument("--truelabels", required=True, type=str, help="...")

214

parser_measure.add_argument("--truelabels", required=True, type=str, help="...")

211

parser_measure.add_argument("--model", required=True, type=str, help="...")

215

parser_measure.add_argument("--model", required=True, type=str, help="...")

212

parser_measure.add_argument("--modeltype",

216

parser_measure.add_argument("--modeltype",

213

required=True,

217

required=True,

214

choices=[key for key in CLUSTERING_METHODS],

218

choices=[key for key in CLUSTERING_METHODS],

215

help="type of model for learning")

219

help="type of model for learning")

216

parser_measure.set_defaults(which="measure")

220

parser_measure.set_defaults(which="measure")

217

221

218

# disequilibrium

222

# disequilibrium

219

parser_disequilibrium = subparsers.add_parser(

223

parser_disequilibrium = subparsers.add_parser(

220

"disequilibrium", help="...")

224

"disequilibrium", help="...")

221

225

222

parser_disequilibrium.add_argument("--features", required=True, type=str, help="...")

226

parser_disequilibrium.add_argument("--features", required=True, type=str, help="...")

223

parser_disequilibrium.add_argument("--lstrain", required=True, type=str, help="...")

227

parser_disequilibrium.add_argument("--lstrain", required=True, type=str, help="...")

224

parser_disequilibrium.add_argument("--lstest", required=True, type=str, help="...")

228

parser_disequilibrium.add_argument("--lstest", required=True, type=str, help="...")

225

parser_disequilibrium.add_argument("--model", required=True, type=str, help="...")

229

parser_disequilibrium.add_argument("--model", required=True, type=str, help="...")

226

parser_disequilibrium.add_argument("--model-type",

230

parser_disequilibrium.add_argument("--modeltype",

227

required=True,

231

required=True,

228

choices=["kmeans", "2", "3"],

232

choices=["kmeans", "2", "3"],

229

help="...")

233

help="...")

230

parser_disequilibrium.set_defaults(which="disequilibrium")

234

parser_disequilibrium.set_defaults(which="disequilibrium")

231

235

232

# Extract

236

# Extract

233

parser_extract = subparsers.add_parser(

237

parser_extract = subparsers.add_parser(

234

"extract", help="extract cluster labels")

238

"extract", help="extract cluster labels")

235

239

236

parser_extract.add_argument("--features", required=True, type=str, help="...")

240

parser_extract.add_argument("--features", required=True, type=str, help="...")

237

parser_extract.add_argument("--lst", required=True, type=str, help="...")

241

parser_extract.add_argument("--lst", required=True, type=str, help="...")

238

parser_extract.add_argument("--model", required=True, type=str, help="...")

242

parser_extract.add_argument("--model", required=True, type=str, help="...")

239

parser_extract.add_argument("--modeltype",

243

parser_extract.add_argument("--modeltype",

240

required=True,

244

required=True,

241

choices=[key for key in CLUSTERING_METHODS],

245

choices=[key for key in CLUSTERING_METHODS],

242

help="type of model for learning")

246

help="type of model for learning")

243

parser_extract.add_argument("--outfile", required=True, type=str, help="...")

247

parser_extract.add_argument("--outfile", required=True, type=str, help="...")

244

parser_extract.set_defaults(which="extract")

248

parser_extract.set_defaults(which="extract")

245

249

246

# Parse

250

# Parse

247

args = parser.parse_args()

251

args = parser.parse_args()

248

252

249

# Run commands

253

# Run commands

250

runner = SubCommandRunner({

254

runner = SubCommandRunner({

251

"kmeans": kmeans_run,

255

"kmeans": kmeans_run,

252

"measure": measure_run,

256

"measure": measure_run,

253

"disequilibrium": disequilibrium_run,

257

"disequilibrium": disequilibrium_run,

254

"extract": extract_run

258

"extract": extract_run

GITLAB

Quillot Mathias / volia

We can now precise the modeltype in parameter of the kmeans learning command. Th…

 import argparse
 from os import path, mkdir
 from utils import SubCommandRunner
 from core.data import read_features, read_lst, read_labels, write_line
 import numpy as np
 from sklearn.cluster import KMeans
 import pickle
 from clustering_modules.kmeans import kmeans
 from clustering_modules.kmeans_mahalanobis import  kmeansMahalanobis
+from clustering_modules.kmeans_multidistance import kmeansMultidistance
 from sklearn.preprocessing import LabelEncoder
 from sklearn.metrics import v_measure_score, homogeneity_score, completeness_score
 import core.measures
 import json
 CLUSTERING_METHODS = {
     "k-means": kmeans(),
     "k-means-mahalanobis": kmeansMahalanobis(),
-    "k-means-mahalanobis-constrained": kmeansMahalanobis(constrained=True)
+    "k-means-mahalanobis-constrained": kmeansMahalanobis(constrained=True),
+    "k-means-basic-mahalanobis": kmeansMultidistance(distance="mahalanobis"),
+    "k-means-basic-cosine": kmeansMultidistance(distance="cosine")
 }
+KMEANS_METHODS = [key for key in CLUSTERING_METHODS if key.startswith("k-means")]
 EVALUATION_METHODS = {
     "entropy": core.measures.entropy_score,
     "purity": core.measures.purity_score,
     "v-measure": v_measure_score,
     "homogeneity": homogeneity_score,
     "completeness": completeness_score,
 }
 def disequilibrium_run():
     pass
 def measure_run(measure: str, features: str, lst: str, truelabels: str, model: str, modeltype: str):
     """
     @param measure:
     @param features:
     @param lst:
     @param truelabels:
     @param model:
     @param modeltype:
     @return:
     """
     module = CLUSTERING_METHODS[modeltype]
     module.load(model)
     eval = {}
     for ms in measure:
         evaluation = EVALUATION_METHODS[ms]
         feats_dict = read_features(features)
         labels_dict = read_labels(truelabels)
         lst_dict = read_lst(lst)
         lst_keys = [key for key in lst_dict]
         feats = np.asarray([feats_dict[key] for key in lst_keys])
         Y_pred = module.predict(feats)
         Y_truth = [labels_dict[key][0] for key in lst_keys]
         le = LabelEncoder()
         le.fit(Y_truth)
         Y_truth = le.transform(Y_truth)
         eval[ms] = evaluation(Y_truth, Y_pred)
     print(json.dumps(eval))
 def kmeans_run(features: str,
                lst: str,
                k:int,
                kmax: int,
                klist,
                maxiter: int,
                ninit: int,
                output: str,
                tol: float,
-               debug: bool = False,
+               modeltype: str,
-               mahalanobis: str = False):
+               debug: bool = False):
     """
     @param features: output features
     @param lst: list file
     @param k: k (kmin if kmax specified)
     @param kmax: maximum k to compute
     @param klist: list of k values to compute, ignore k value
     @param output: output file if kmax not specified, else, output directory
     @param mahalanobis: distance option of k-means.
     """
     json_content = locals().copy()
     def fit_model(k: int, output_file):
         if debug:
             print(f"Computing clustering with k={k}")
-        model = CLUSTERING_METHODS["k-means"]
+        model = CLUSTERING_METHODS[modeltype]
-        if mahalanobis:
-            if debug:
-                print("Mahalanobis activated")
-            model = CLUSTERING_METHODS["k-means-mahalanobis-constrained"]
         model.fit(X, k, tol, ninit, maxiter, debug)
         model.save(output_file)
         json_content["models"].append({
             "model_file": output_file,
             "k": k,
         })
     json_content["models"] = []
     # -- READ FILES --
     features_dict = read_features(features)
     lst_dict = read_lst(lst)
     X = np.asarray([features_dict[x] for x in lst_dict])
     # Exception cases
     if kmax is None and klist is None and path.isdir(output):
         raise Exception("The \"output\" is an existing directory while the system is waiting the path of a file.")
     if (kmax is not None or klist is not None) and path.isfile(output):
         raise Exception("The \"output\" is an existing file while the system is waiting the path of a directory.")
     # Mono value case
     if kmax is None and klist is None:
         fit_model(k, output)
     # Multi values case with kmax
     if kmax is not None:
         if not path.isdir(output):
             mkdir(output)
         Ks = range(k, kmax + 1)
         for i in Ks:
             fit_model(i, path.join(output, "clustering_" + str(i) + ".pkl"))
     # Second multi values case with klist
     if klist is not None:
         if not path.isdir(output):
             mkdir(output)
         for k in klist:
             k = int(k)
             fit_model(k, path.join(output, "clustering_" + str(k) + ".pkl"))
     print(json.dumps(json_content))
 def extract_run(features, lst, model, modeltype, outfile):
     feats_dict = read_features(features)
     lst_dict = read_lst(lst)
     lst_keys = [key for key in lst_dict]
     feats = np.asarray([feats_dict[key] for key in lst_keys])
     module = CLUSTERING_METHODS[modeltype]
     module.load(model)
     Y_pred = module.predict(feats)
     with open(outfile, "w") as f:
         for i, key in enumerate(lst_keys):
             write_line(key, Y_pred[i], f)
     json_output = {
         "outfile": outfile
     }
     print(json.dumps(json_output))
 if __name__ == "__main__":
     # Main parser
     parser = argparse.ArgumentParser(description="Clustering methods to apply")
     subparsers = parser.add_subparsers(title="action")
     # kmeans
     parser_kmeans = subparsers.add_parser(
         "kmeans", help="Compute clustering using k-means algorithm")
     parser_kmeans.add_argument("--features", required=True, type=str, help="Features file (works with list)")
     parser_kmeans.add_argument("--lst", required=True, type=str, help="List file (.lst)")
     parser_kmeans.add_argument("-k", default=2, type=int,
                                help="number of clusters to compute. It is kmin if kmax is specified.")
     parser_kmeans.add_argument("--kmax", default=None, type=int, help="if specified, k is kmin.")
     parser_kmeans.add_argument("--klist", nargs="+",
                                help="List of k values to test. As kmax, activate the multi values mod.")
     parser_kmeans.add_argument("--maxiter",
                                type=int,
                                default=300,
                                help="Max number of iteration before stoping if not converging")
     parser_kmeans.add_argument("--ninit",
                                type=int,
                                default=10,
                                help="Number of time the k-means algorithm will be run with different centroid seeds.")
     parser_kmeans.add_argument("--tol",
                                type=float,
                                default=0.0001,
                                help="Tolerance to finish of distance between centroids and their updates.")
     parser_kmeans.add_argument("--debug", action="store_true")
     parser_kmeans.add_argument("--output",
                                default=".kmeans",
                                help="output file if only k. Output directory if multiple kmax specified.")
-    parser_kmeans.add_argument("--mahalanobis", action="store_true")
+    parser_kmeans.add_argument("--modeltype",
+                                required=True,
+                                choices=KMEANS_METHODS,
+                                help="type of model for learning")
     parser_kmeans.set_defaults(which="kmeans")
     # measure
     parser_measure = subparsers.add_parser(
         "measure", help="compute the entropy")
     parser_measure.add_argument("--measure",
                                 required=True,
                                 nargs="+",
                                 choices=[key for key in EVALUATION_METHODS],
                                 help="...")
     parser_measure.add_argument("--features", required=True, type=str, help="...")
     parser_measure.add_argument("--lst", required=True, type=str, help="...")
     parser_measure.add_argument("--truelabels", required=True, type=str, help="...")
     parser_measure.add_argument("--model", required=True, type=str, help="...")
     parser_measure.add_argument("--modeltype",
                                 required=True,
                                 choices=[key for key in CLUSTERING_METHODS],
                                 help="type of model for learning")
     parser_measure.set_defaults(which="measure")
     # disequilibrium
     parser_disequilibrium = subparsers.add_parser(
         "disequilibrium", help="...")
     parser_disequilibrium.add_argument("--features", required=True, type=str, help="...")
     parser_disequilibrium.add_argument("--lstrain", required=True, type=str, help="...")
     parser_disequilibrium.add_argument("--lstest", required=True, type=str, help="...")
     parser_disequilibrium.add_argument("--model", required=True, type=str, help="...")
-    parser_disequilibrium.add_argument("--model-type",
+    parser_disequilibrium.add_argument("--modeltype",
                                 required=True,
                                 choices=["kmeans", "2", "3"],
                                 help="...")
     parser_disequilibrium.set_defaults(which="disequilibrium")
     # Extract
     parser_extract = subparsers.add_parser(
         "extract", help="extract cluster labels")
     parser_extract.add_argument("--features", required=True, type=str, help="...")
     parser_extract.add_argument("--lst", required=True, type=str, help="...")
     parser_extract.add_argument("--model", required=True, type=str, help="...")
     parser_extract.add_argument("--modeltype",
                                 required=True,
                                 choices=[key for key in CLUSTERING_METHODS],
                                 help="type of model for learning")
     parser_extract.add_argument("--outfile", required=True, type=str, help="...")
     parser_extract.set_defaults(which="extract")
     # Parse
     args = parser.parse_args()
     # Run commands
     runner = SubCommandRunner({
         "kmeans": kmeans_run,
         "measure": measure_run,
         "disequilibrium": disequilibrium_run,
         "extract": extract_run