Commit d414b83e18cdc5d0313f6880349609082dc035c1
1 parent
7c16f9bfe8
Exists in
master
add Botttleneck MLp + script
Showing 16 changed files with 719 additions and 0 deletions Side-by-side Diff
- BOTTLENECK/01a-mlp_proj.py
- BOTTLENECK/02a-mlp_score_on_BN.py
- BOTTLENECK/02b-transfert_ae.py
- BOTTLENECK/02c-tsne_mlproj.py
- BOTTLENECK/03-mlp_score_on_transfert.py
- BOTTLENECK/04-accuracyscore.py
- BOTTLENECK/mlp.py
- BOTTLENECK/run01_do_alljson.sh
- BOTTLENECK/run02_mlpscore.sh
- BOTTLENECK/run02b-transfert.sh
- BOTTLENECK/run03_tsne_MLPtransfert.sh
- BOTTLENECK/run04-mlp_on_transfert.sh
- BOTTLENECK/run05_accuracy.sh
- BOTTLENECK/run_all.sh
- BOTTLENECK/run_one.sh
- BOTTLENECK/utils.py
BOTTLENECK/01a-mlp_proj.py
1 | + | |
2 | +# coding: utf-8 | |
3 | + | |
4 | +# In[2]: | |
5 | + | |
6 | +# Import | |
7 | +import gensim | |
8 | +from scipy import sparse | |
9 | +import itertools | |
10 | +from sklearn import preprocessing | |
11 | +from keras.models import Sequential | |
12 | +from keras.optimizers import SGD,Adam | |
13 | +from keras.layers.advanced_activations import ELU,PReLU | |
14 | +from keras.callbacks import ModelCheckpoint | |
15 | +from mlp import * | |
16 | +import sklearn.metrics | |
17 | +from sklearn.preprocessing import LabelBinarizer | |
18 | +import shelve | |
19 | +import pickle | |
20 | +from utils import * | |
21 | +import sys | |
22 | +import os | |
23 | +import json | |
24 | +# In[4]: | |
25 | + | |
26 | +infer_model=shelve.open("{}".format(sys.argv[2])) | |
27 | +in_dir = sys.argv[1] | |
28 | +#['ASR', 'TRS', 'LABEL'] | |
29 | +# In[6]: | |
30 | +if len(sys.argv) > 4 : | |
31 | + features_key = sys.argv[4] | |
32 | +else : | |
33 | + features_key = "LDA" | |
34 | +save_projection = True | |
35 | +json_conf =json.load(open(sys.argv[3])) | |
36 | +ae_conf = json_conf["mlp_proj"] | |
37 | + | |
38 | +hidden_size= ae_conf["hidden_size"] | |
39 | +input_activation = None | |
40 | +if ae_conf["input_activation"] == "elu": | |
41 | + print " ELU" | |
42 | + input_activation = PReLU() | |
43 | +else: | |
44 | + print " ELSE" | |
45 | + input_activation = ae_conf["input_activation"] | |
46 | +#input_activation=ae_conf["input_activation"] | |
47 | +output_activation=ae_conf["output_activation"] | |
48 | +loss=ae_conf["loss"] | |
49 | +epochs=ae_conf["epochs"] | |
50 | +batch_size=ae_conf["batch"] | |
51 | +patience=ae_conf["patience"] | |
52 | +dropouts=ae_conf["do"] | |
53 | +try: | |
54 | + k = ae_conf["sgd"] | |
55 | + if ae_conf["sgd"]["name"] == "adam": | |
56 | + sgd = Adam(lr=ae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | |
57 | + elif ae_conf["sgd"]["name"] == "sgd": | |
58 | + sgd = SGD(lr=ae_conf["sgd"]["lr"]) | |
59 | +except: | |
60 | + sgd = ae_conf["sgd"] | |
61 | + | |
62 | +mlp_conf = json_conf["mlp"] | |
63 | +mlp_h = mlp_conf["hidden_size"] | |
64 | +mlp_loss = mlp_conf["loss"] | |
65 | +mlp_dropouts = mlp_conf["do"] | |
66 | +mlp_epochs = mlp_conf["epochs"] | |
67 | +mlp_batch_size = mlp_conf["batch"] | |
68 | +mlp_input_activation=mlp_conf["input_activation"] | |
69 | +mlp_output_activation=mlp_conf["output_activation"] | |
70 | + | |
71 | +try: | |
72 | + k = mlp_conf["sgd"] | |
73 | + if mlp_conf["sgd"]["name"] == "adam": | |
74 | + mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | |
75 | + elif mlp_conf["sgd"]["name"] == "sgd": | |
76 | + mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | |
77 | +except: | |
78 | + mlp_sgd = mlp_conf["sgd"] | |
79 | + | |
80 | + | |
81 | +name = json_conf["name"] | |
82 | +try : | |
83 | + os.mkdir("{}/{}".format(in_dir,name)) | |
84 | +except OSError : | |
85 | + pass | |
86 | +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) | |
87 | +db["IDS"]=dict(infer_model["LABEL"]) | |
88 | +# | |
89 | +keys = infer_model[features_key].keys() | |
90 | +LABELS = {} | |
91 | +for mod in keys : | |
92 | + | |
93 | + int_labels_train = map(select,infer_model["LABEL"][mod]["TRAIN"]) | |
94 | + binarizer = LabelBinarizer() | |
95 | + y_train=binarizer.fit_transform(int_labels_train) | |
96 | + y_dev=binarizer.transform(map(select,infer_model["LABEL"][mod]["DEV"])) | |
97 | + y_test=binarizer.transform(map(select,infer_model["LABEL"][mod]["TEST"])) | |
98 | + LABELS[mod]= { "TRAIN":y_train , "DEV" : y_dev, "TEST" : y_test} | |
99 | + sumary,proj = train_mlp_proj(infer_model[features_key][mod]["TRAIN"].todense(),y_train, | |
100 | + infer_model[features_key][mod]["DEV"].todense(),y_dev, | |
101 | + infer_model[features_key][mod]["TEST"].todense(),y_test, | |
102 | + hidden_size ,sgd=sgd, | |
103 | + epochs=epochs, | |
104 | + patience=patience, | |
105 | + batch_size=batch_size, | |
106 | + input_activation=input_activation, | |
107 | + output_activation=output_activation, | |
108 | + dropouts=dropouts, | |
109 | + fit_verbose=1) | |
110 | + with open("{}/{}/{}_sum.txt".format(in_dir,name,mod),"w") as output_sum : | |
111 | + print >>output_sum, sumary | |
112 | + for num_lvl,level in enumerate(proj): | |
113 | + print len(level) | |
114 | + for num,corp_type in enumerate(["TRAIN","DEV","TEST"]): | |
115 | + pd = pandas.DataFrame(level[num]) | |
116 | + pd.to_hdf("{}/{}/MLP_proj_df.hdf".format(in_dir,name),"{}/lvl{}/{}".format(mod,num_lvl,corp_type)) | |
117 | +db["LABEL"] = LABELS | |
118 | +db.sync() | |
119 | +db.close() |
BOTTLENECK/02a-mlp_score_on_BN.py
1 | + | |
2 | +# coding: utf-8 | |
3 | + | |
4 | +# In[2]: | |
5 | + | |
6 | +# Import | |
7 | +import gensim | |
8 | +from scipy import sparse | |
9 | +import itertools | |
10 | +from sklearn import preprocessing | |
11 | +from keras.models import Sequential | |
12 | +from keras.optimizers import SGD,Adam | |
13 | +from keras.layers.advanced_activations import ELU,PReLU | |
14 | +from keras.callbacks import ModelCheckpoint | |
15 | +from mlp import * | |
16 | +import sklearn.metrics | |
17 | +from sklearn.preprocessing import LabelBinarizer | |
18 | +import shelve | |
19 | +import pickle | |
20 | +from utils import * | |
21 | +import sys | |
22 | +import os | |
23 | +import json | |
24 | +# In[4]: | |
25 | + | |
26 | +in_dir = sys.argv[1] | |
27 | +#['ASR', 'TRS', 'LABEL'] | |
28 | +# In[6]: | |
29 | +json_conf =json.load(open(sys.argv[2])) | |
30 | + | |
31 | +mlp_conf = json_conf["mlp"] | |
32 | +hidden_size = mlp_conf["hidden_size"] | |
33 | +loss = mlp_conf["loss"] | |
34 | +patience = mlp_conf["patience"] | |
35 | +dropouts = mlp_conf["do"] | |
36 | +epochs = mlp_conf["epochs"] | |
37 | +batch_size = mlp_conf["batch"] | |
38 | +input_activation=mlp_conf["input_activation"] | |
39 | +output_activation=mlp_conf["output_activation"] | |
40 | + | |
41 | +try: | |
42 | + k = mlp_conf["sgd"] | |
43 | + if mlp_conf["sgd"]["name"] == "adam": | |
44 | + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | |
45 | + elif mlp_conf["sgd"]["name"] == "sgd": | |
46 | + sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | |
47 | +except: | |
48 | + sgd = mlp_conf["sgd"] | |
49 | +name = json_conf["name"] | |
50 | + | |
51 | +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) | |
52 | +shelve_logs=shelve.open("{}/{}/02a_logs.shelve".format(in_dir,name)) | |
53 | + | |
54 | +# | |
55 | +keys = db["LABEL"].keys() | |
56 | +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name)) | |
57 | +hdf_keys = proj_hdf.keys() | |
58 | +proj_hdf.close() | |
59 | +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ]) | |
60 | +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ]) | |
61 | +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | |
62 | +print hdf_mods | |
63 | +print hdf_lvl | |
64 | +print hdf_crossval | |
65 | + | |
66 | +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name) | |
67 | +labels_dict = {"origine":{} } | |
68 | +logs = {} | |
69 | +for lvl in hdf_lvl : | |
70 | + labels_dict[lvl] = {} | |
71 | + for mod in hdf_mods: | |
72 | + labels_dict[lvl][mod] = {} | |
73 | + | |
74 | +for mod in hdf_mods: | |
75 | + for lvl in hdf_lvl : | |
76 | + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN")) | |
77 | + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV")) | |
78 | + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST")) | |
79 | + if x_train.shape[1] <= 8 : | |
80 | + labels_dict["origine"]["TRAIN"] = np.argmax(x_train.values,axis=1) | |
81 | + labels_dict["origine"]["DEV"] = np.argmax(x_dev.values,axis=1) | |
82 | + labels_dict["origine"]["TEST"] = np.argmax(x_test.values,axis=1) | |
83 | + continue | |
84 | + y_train = db["LABEL"][mod]["TRAIN"] | |
85 | + y_dev = db["LABEL"][mod]["DEV"] | |
86 | + y_test = db["LABEL"][mod]["TEST"] | |
87 | + | |
88 | + print x_train.shape | |
89 | + print x_dev.shape | |
90 | + print x_test.shape | |
91 | + print y_train.shape | |
92 | + print y_dev.shape | |
93 | + print y_test.shape | |
94 | + pred,hist = train_mlp_pred(x_train.values,y_train, | |
95 | + x_dev.values,y_dev, | |
96 | + x_test.values,y_test, | |
97 | + hidden_size ,sgd=sgd, | |
98 | + epochs=epochs, | |
99 | + patience=patience, | |
100 | + batch_size=batch_size, | |
101 | + input_activation=input_activation, | |
102 | + output_activation=output_activation, | |
103 | + dropouts=dropouts, | |
104 | + fit_verbose=1) | |
105 | + shelve_logs["{}/{}".format(mod,lvl)] = hist | |
106 | + labels_dict[lvl][mod]["TRAIN"] = np.argmax(pred[0],axis=1) | |
107 | + labels_dict[lvl][mod]["DEV"] = np.argmax(pred[1],axis=1) | |
108 | + labels_dict[lvl][mod]["TEST"] = np.argmax(pred[2],axis=1) | |
109 | + | |
110 | +for lvl in hdf_lvl: | |
111 | + db[lvl] = labels_dict[lvl] | |
112 | +shelve_logs.sync() | |
113 | +shelve_logs.close() | |
114 | +db.sync() | |
115 | +db.close() |
BOTTLENECK/02b-transfert_ae.py
1 | + | |
2 | +# coding: utf-8 | |
3 | + | |
4 | +# In[2]: | |
5 | + | |
6 | +# Import | |
7 | +import gensim | |
8 | +from scipy import sparse | |
9 | +import itertools | |
10 | +from sklearn import preprocessing | |
11 | +from keras.models import Sequential | |
12 | +from keras.optimizers import SGD,Adam | |
13 | +from keras.layers.advanced_activations import ELU,PReLU | |
14 | +from keras.callbacks import ModelCheckpoint | |
15 | +from mlp import * | |
16 | +import pandas as pd | |
17 | +import sklearn.metrics | |
18 | +from sklearn.preprocessing import LabelBinarizer | |
19 | +import shelve | |
20 | +import pickle | |
21 | +from utils import * | |
22 | +import sys | |
23 | +import os | |
24 | +import json | |
25 | +# In[4]: | |
26 | + | |
27 | +in_dir = sys.argv[1] | |
28 | +#['ASR', 'TRS', 'LABEL'] | |
29 | +# In[6]: | |
30 | +json_conf =json.load(open(sys.argv[2])) | |
31 | + | |
32 | +mlp_conf = json_conf["transfert"] | |
33 | +hidden_size = mlp_conf["hidden_size"] | |
34 | +loss = mlp_conf["loss"] | |
35 | +patience = mlp_conf["patience"] | |
36 | +dropouts = mlp_conf["do"] | |
37 | +epochs = mlp_conf["epochs"] | |
38 | +batch_size = mlp_conf["batch"] | |
39 | +input_activation=mlp_conf["input_activation"] | |
40 | +output_activation=mlp_conf["output_activation"] | |
41 | + | |
42 | +try: | |
43 | + k = mlp_conf["sgd"] | |
44 | + if mlp_conf["sgd"]["name"] == "adam": | |
45 | + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | |
46 | + elif mlp_conf["sgd"]["name"] == "sgd": | |
47 | + sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | |
48 | +except: | |
49 | + sgd = mlp_conf["sgd"] | |
50 | +name = json_conf["name"] | |
51 | + | |
52 | +# | |
53 | +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name)) | |
54 | +hdf_keys = proj_hdf.keys() | |
55 | +proj_hdf.close() | |
56 | +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ]) | |
57 | +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ]) | |
58 | +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | |
59 | +print hdf_mods | |
60 | +print hdf_lvl | |
61 | +print hdf_crossval | |
62 | + | |
63 | +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name) | |
64 | +transfert_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name) | |
65 | +mod1,mod2 = "ASR","TRS" | |
66 | +for lvl in hdf_lvl : | |
67 | + x_train_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TRAIN")) | |
68 | + x_dev_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"DEV")) | |
69 | + x_test_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TEST")) | |
70 | + x_train_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TRAIN")) | |
71 | + x_dev_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"DEV")) | |
72 | + x_test_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TEST")) | |
73 | + | |
74 | + if x_train_ASR.shape[1] <= 8 : | |
75 | + continue | |
76 | + | |
77 | + pred = train_ae(x_train_ASR.values, | |
78 | + x_dev_ASR.values, | |
79 | + x_test_ASR.values, | |
80 | + hidden_size ,sgd=sgd, | |
81 | + y_train=x_train_TRS.values, | |
82 | + y_dev=x_dev_TRS.values, | |
83 | + y_test=x_test_TRS.values, | |
84 | + epochs=epochs, | |
85 | + patience=patience, | |
86 | + batch_size=batch_size, | |
87 | + input_activation=input_activation, | |
88 | + output_activation=output_activation, | |
89 | + dropouts=dropouts, | |
90 | + best_mod=True, | |
91 | + verbose=1) | |
92 | + for num_layer,layer in enumerate(pred): | |
93 | + transfert_train = pd.DataFrame(layer[0]) | |
94 | + transfert_dev = pd.DataFrame(layer[1]) | |
95 | + transfert_test = pd.DataFrame(layer[2]) | |
96 | + transfert_train.to_hdf(transfert_proj_path,"{}/{}/TRAIN".format(lvl,"layer"+str(num_layer))) | |
97 | + transfert_dev.to_hdf(transfert_proj_path,"{}/{}/DEV".format(lvl,"layer"+str(num_layer))) | |
98 | + transfert_test.to_hdf(transfert_proj_path,"{}/{}/TEST".format(lvl,"layer"+str(num_layer))) | |
99 | + |
BOTTLENECK/02c-tsne_mlproj.py
1 | + | |
2 | +# coding: utf-8 | |
3 | + | |
4 | +# In[2]: | |
5 | + | |
6 | +# Import | |
7 | +import gensim | |
8 | +from scipy import sparse | |
9 | +import itertools | |
10 | +from sklearn import preprocessing | |
11 | +from keras.models import Sequential | |
12 | +from keras.optimizers import SGD,Adam | |
13 | +from keras.layers.advanced_activations import ELU,PReLU | |
14 | +from keras.callbacks import ModelCheckpoint | |
15 | +from mlp import * | |
16 | +import pandas as pd | |
17 | +import sklearn.metrics | |
18 | +from sklearn.preprocessing import LabelBinarizer | |
19 | +from sklearn.manifold import TSNE | |
20 | +import shelve | |
21 | +import pickle | |
22 | +from utils import * | |
23 | +import sys | |
24 | +import os | |
25 | +import json | |
26 | +# In[4]: | |
27 | + | |
28 | +in_dir = sys.argv[1] | |
29 | +#['ASR', 'TRS', 'LABEL'] | |
30 | +# In[6]: | |
31 | +json_conf =json.load(open(sys.argv[2])) | |
32 | + | |
33 | +mlp_conf = json_conf["transfert"] | |
34 | +hidden_size = mlp_conf["hidden_size"] | |
35 | +loss = mlp_conf["loss"] | |
36 | +patience = mlp_conf["patience"] | |
37 | +dropouts = mlp_conf["do"] | |
38 | +epochs = mlp_conf["epochs"] | |
39 | +batch_size = mlp_conf["batch"] | |
40 | +input_activation=mlp_conf["input_activation"] | |
41 | +output_activation=mlp_conf["output_activation"] | |
42 | + | |
43 | +try: | |
44 | + k = mlp_conf["sgd"] | |
45 | + if mlp_conf["sgd"]["name"] == "adam": | |
46 | + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | |
47 | + elif mlp_conf["sgd"]["name"] == "sgd": | |
48 | + sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | |
49 | +except: | |
50 | + sgd = mlp_conf["sgd"] | |
51 | +name = json_conf["name"] | |
52 | + | |
53 | +# | |
54 | +print " MLP" | |
55 | +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name)) | |
56 | +hdf_keys = proj_hdf.keys() | |
57 | +proj_hdf.close() | |
58 | +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ]) | |
59 | +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ]) | |
60 | +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | |
61 | +print hdf_mods | |
62 | +print hdf_lvl | |
63 | +print hdf_crossval | |
64 | + | |
65 | +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name) | |
66 | +tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name) | |
67 | +for mod in hdf_mods: | |
68 | + for lvl in hdf_lvl : | |
69 | + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN")) | |
70 | + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV")) | |
71 | + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST")) | |
72 | + | |
73 | + if x_train.shape[1] <= 8 : | |
74 | + continue | |
75 | + tsne= TSNE() | |
76 | + tsne_train=tsne.fit_transform(x_train.values) | |
77 | + pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TRAIN")) | |
78 | + tsne= TSNE() | |
79 | + tsne_dev=tsne.fit_transform(x_dev.values) | |
80 | + pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"DEV")) | |
81 | + tsne= TSNE() | |
82 | + tsne_test=tsne.fit_transform(x_test.values) | |
83 | + pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TEST")) | |
84 | + tsne = TSNE() | |
85 | + tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values) | |
86 | + pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"CONCAT")) | |
87 | + | |
88 | +print " TRANSFERT" | |
89 | + | |
90 | +hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name) | |
91 | +proj_hdf = pandas.HDFStore(hdf_proj_path) | |
92 | +print proj_hdf | |
93 | +hdf_keys = proj_hdf.keys() | |
94 | +proj_hdf.close() | |
95 | +print hdf_keys | |
96 | +hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ]) | |
97 | +hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ]) | |
98 | +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | |
99 | +print hdf_lvl | |
100 | +print hdf_layer | |
101 | +print hdf_crossval | |
102 | + | |
103 | +tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name) | |
104 | +for lvl in hdf_lvl : | |
105 | + for layer in hdf_layer: | |
106 | + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN")) | |
107 | + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV")) | |
108 | + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TEST")) | |
109 | + | |
110 | + if x_train.shape[1] <= 8 : | |
111 | + continue | |
112 | + tsne= TSNE() | |
113 | + tsne_train=tsne.fit_transform(x_train.values) | |
114 | + pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TRAIN")) | |
115 | + tsne= TSNE() | |
116 | + tsne_dev=tsne.fit_transform(x_dev.values) | |
117 | + pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"DEV")) | |
118 | + tsne= TSNE() | |
119 | + tsne_test=tsne.fit_transform(x_test.values) | |
120 | + pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TEST")) | |
121 | + tsne = TSNE() | |
122 | + tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values) | |
123 | + pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"CONCAT")) |
BOTTLENECK/03-mlp_score_on_transfert.py
1 | + | |
2 | +# coding: utf-8 | |
3 | + | |
4 | +# In[2]: | |
5 | + | |
6 | +# Import | |
7 | +import gensim | |
8 | +from scipy import sparse | |
9 | +import itertools | |
10 | +from sklearn import preprocessing | |
11 | +from keras.models import Sequential | |
12 | +from keras.optimizers import SGD,Adam | |
13 | +from keras.layers.advanced_activations import ELU,PReLU | |
14 | +from keras.callbacks import ModelCheckpoint | |
15 | +from mlp import * | |
16 | +import sklearn.metrics | |
17 | +from sklearn.preprocessing import LabelBinarizer | |
18 | +import shelve | |
19 | +import pickle | |
20 | +from utils import * | |
21 | +import sys | |
22 | +import os | |
23 | +import json | |
24 | +# In[4]: | |
25 | + | |
26 | +in_dir = sys.argv[1] | |
27 | +#['ASR', 'TRS', 'LABEL'] | |
28 | +# In[6]: | |
29 | +json_conf =json.load(open(sys.argv[2])) | |
30 | + | |
31 | +mlp_conf = json_conf["mlp"] | |
32 | +hidden_size = mlp_conf["hidden_size"] | |
33 | +loss = mlp_conf["loss"] | |
34 | +patience = mlp_conf["patience"] | |
35 | +dropouts = mlp_conf["do"] | |
36 | +epochs = mlp_conf["epochs"] | |
37 | +batch_size = mlp_conf["batch"] | |
38 | +input_activation=mlp_conf["input_activation"] | |
39 | +output_activation=mlp_conf["output_activation"] | |
40 | + | |
41 | +try: | |
42 | + k = mlp_conf["sgd"] | |
43 | + if mlp_conf["sgd"]["name"] == "adam": | |
44 | + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | |
45 | + elif mlp_conf["sgd"]["name"] == "sgd": | |
46 | + sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | |
47 | +except: | |
48 | + sgd = mlp_conf["sgd"] | |
49 | +name = json_conf["name"] | |
50 | + | |
51 | +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) | |
52 | +shelve_logs=shelve.open("{}/{}/03_logs.shelve".format(in_dir,name),writeback=True) | |
53 | + | |
54 | +# | |
55 | +keys = db["LABEL"].keys() | |
56 | + | |
57 | +hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name) | |
58 | +proj_hdf = pandas.HDFStore(hdf_proj_path) | |
59 | +hdf_keys = proj_hdf.keys() | |
60 | +print hdf_keys | |
61 | +proj_hdf.close() | |
62 | +hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ]) | |
63 | +hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ]) | |
64 | +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | |
65 | +print hdf_lvl | |
66 | +print hdf_crossval | |
67 | + | |
68 | +labels_dict = { } | |
69 | +logs = {} | |
70 | +for lvl in hdf_lvl : | |
71 | + labels_dict[lvl] = {} | |
72 | + for layer in hdf_layer: | |
73 | + labels_dict[lvl][layer] = {} | |
74 | + | |
75 | +for lvl in hdf_lvl : | |
76 | + for layer in hdf_layer: | |
77 | + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN")) | |
78 | + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV")) | |
79 | + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer, "TEST")) | |
80 | + | |
81 | + y_train = db["LABEL"]["ASR"]["TRAIN"] | |
82 | + y_dev = db["LABEL"]["ASR"]["DEV"] | |
83 | + y_test = db["LABEL"]["ASR"]["TEST"] | |
84 | + | |
85 | + print x_train.shape | |
86 | + print x_dev.shape | |
87 | + print x_test.shape | |
88 | + print y_train.shape | |
89 | + print y_dev.shape | |
90 | + print y_test.shape | |
91 | + pred,hist = train_mlp_pred(x_train.values,y_train, | |
92 | + x_dev.values,y_dev, | |
93 | + x_test.values,y_test, | |
94 | + hidden_size ,sgd=sgd, | |
95 | + epochs=epochs, | |
96 | + patience=patience, | |
97 | + batch_size=batch_size, | |
98 | + input_activation=input_activation, | |
99 | + output_activation=output_activation, | |
100 | + dropouts=dropouts, | |
101 | + fit_verbose=1) | |
102 | + shelve_logs["{}/{}".format(lvl,layer)] = hist | |
103 | + labels_dict[lvl][layer]["TRAIN"] = np.argmax(pred[0],axis=1) | |
104 | + labels_dict[lvl][layer]["DEV"] = np.argmax(pred[1],axis=1) | |
105 | + labels_dict[lvl][layer]["TEST"] = np.argmax(pred[2],axis=1) | |
106 | + | |
107 | +db["transfert"] = labels_dict | |
108 | +shelve_logs.sync() | |
109 | +shelve_logs.close() | |
110 | +db.sync() | |
111 | +db.close() |
BOTTLENECK/04-accuracyscore.py
1 | + | |
2 | +# coding: utf-8 | |
3 | + | |
4 | +# In[2]: | |
5 | + | |
6 | +# Import | |
7 | +import gensim | |
8 | +from scipy import sparse | |
9 | +import numpy as np | |
10 | +import itertools | |
11 | +from sklearn import preprocessing | |
12 | +from keras.models import Sequential | |
13 | +from keras.optimizers import SGD,Adam | |
14 | +from keras.layers.advanced_activations import ELU,PReLU | |
15 | +from keras.callbacks import ModelCheckpoint | |
16 | +from mlp import * | |
17 | +from sklearn import metrics | |
18 | +from sklearn.preprocessing import LabelBinarizer | |
19 | +import shelve | |
20 | +import pickle | |
21 | +from utils import * | |
22 | +import sys | |
23 | +import os | |
24 | +import json | |
25 | + | |
26 | +# In[4]: | |
27 | + | |
28 | +in_dir = sys.argv[1] | |
29 | +#['ASR', 'TRS', 'LABEL'] | |
30 | +# In[6]: | |
31 | +json_conf =json.load(open(sys.argv[2])) | |
32 | + | |
33 | +name = json_conf["name"] | |
34 | + | |
35 | +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) | |
36 | +# | |
37 | +keys = sorted(db.keys()) | |
38 | +keys.remove("IDS") | |
39 | +keys.remove("transfert") | |
40 | +keys.remove("LABEL") | |
41 | +mods = ["ASR", "TRS"] | |
42 | +ref_train = db["LABEL"]["ASR"]["TRAIN"] | |
43 | +ref_dev = db["LABEL"]["ASR"]["DEV"] | |
44 | +ref_test = db["LABEL"]["ASR"]["TEST"] | |
45 | + | |
46 | +print "name,MOD,level,train,dev,test" | |
47 | +for mod in mods : | |
48 | + for lvl in keys : | |
49 | + if "TEST" in db[lvl][mod] : | |
50 | + train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db[lvl][mod]["TRAIN"]) | |
51 | + dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db[lvl][mod]["DEV"]) | |
52 | + test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db[lvl][mod]["TEST"]) | |
53 | + else : | |
54 | + train_score = "ERROR" | |
55 | + dev_score = "ERROR" | |
56 | + test_score = "ERROR" | |
57 | + print ",".join([name,mod, lvl, str(train_score), str(dev_score) , str(test_score)]) | |
58 | + | |
59 | +for level in db["transfert"].keys() : | |
60 | + for layer in db["transfert"][level].keys(): | |
61 | + if "TRAIN" in db["transfert"][level][layer].keys(): | |
62 | + | |
63 | + train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db["transfert"][level][layer]["TRAIN"]) | |
64 | + dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db["transfert"][level][layer]["DEV"]) | |
65 | + test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db["transfert"][level][layer]["TEST"]) | |
66 | + print ",".join([name,"transfert",level+"/"+layer, str(train_score), str(dev_score) , str(test_score)]) | |
67 | + | |
68 | +db.close() |
BOTTLENECK/mlp.py
1 | +../LDA/mlp.py |
BOTTLENECK/run01_do_alljson.sh
1 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0.json RAW | |
2 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0do.json RAW | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1.json RAW | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1do.json RAW | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2.json RAW | |
6 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2do.json RAW | |
7 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3.json RAW | |
8 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3do.json RAW |
BOTTLENECK/run02_mlpscore.sh
1 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0.json | |
2 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0do.json | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1.json | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1do.json | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2.json | |
6 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2do.json | |
7 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3.json | |
8 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3do.json |
BOTTLENECK/run02b-transfert.sh
1 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0.json | |
2 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0do.json | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1.json | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1do.json | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2.json | |
6 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2do.json | |
7 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3.json | |
8 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3do.json |
BOTTLENECK/run03_tsne_MLPtransfert.sh
1 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0.json | |
2 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0do.json | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1.json | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1do.json | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2.json | |
6 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2do.json | |
7 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3.json | |
8 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3do.json |
BOTTLENECK/run04-mlp_on_transfert.sh
1 | +#THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1.json | |
2 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1do.json | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2.json | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2do.json | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3.json | |
6 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3do.json | |
7 | + | |
8 | +#THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0.json | |
9 | + | |
10 | +#THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0do.json |
BOTTLENECK/run05_accuracy.sh
1 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1.json | |
2 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1do.json | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2.json | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2do.json | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3.json | |
6 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3do.json | |
7 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0.json | |
8 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0do.json |
BOTTLENECK/run_all.sh
1 | +bash run_one.sh output_3 output_3/L0do.json gpu0 & | |
2 | +bash run_one.sh output_3 output_3/L0.json gpu1 & | |
3 | +bash run_one.sh output_3 output_3/L1do.json gpu0 & | |
4 | +bash run_one.sh output_3 output_3/L1.json gpu1 & | |
5 | +wait | |
6 | +bash run_one.sh output_3 output_3/L2do.json gpu0 & | |
7 | +bash run_one.sh output_3 output_3/L2.json gpu1 & | |
8 | +bash run_one.sh output_3 output_3/L3bndo.json gpu0 & | |
9 | +bash run_one.sh output_3 output_3/L3ce1.json gpu1 & | |
10 | +wait | |
11 | +bash run_one.sh output_3 output_3/L3ce.json gpu0 & | |
12 | +bash run_one.sh output_3 output_3/L3do.json gpu1 & | |
13 | +bash run_one.sh output_3 output_3/L3.json gpu0 & | |
14 | +bash run_one.sh output_3 output_3/L3sigmo.json gpu1 & | |
15 | +wait | |
16 | +bash run_one.sh output_3 output_3/L4do.json gpu0 & | |
17 | +bash run_one.sh output_3 output_3/L5do.json gpu1 & | |
18 | +bash run_one.sh output_3 output_3/L6do.json gpu0 & | |
19 | +bash run_one.sh output_3 output_3/L7do.json gpu1 & | |
20 | +wait | |
21 | +bash run_one.sh output_3 output_3/MaxMLP.json gpu0 & | |
22 | +bash run_one.sh output_3 output_3/MinMLP.json gpu1 & |
BOTTLENECK/run_one.sh
1 | +bn=$(basename $2) | |
2 | +time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 01a-mlp_proj.py $1 Sparse_tfidf2.shelve $2 RAW) 2>> logs/${bn}_time ; echo MLP_$2 >> logs/${bn}_time | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02a-mlp_score_on_BN.py $1 $2 | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02b-transfert_ae.py $1 $2 | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02c-tsne_mlproj.py $1 $2 | |
6 | +time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 03-mlp_score_on_transfert.py $1 $2) 2>> logs/${bn}_time ; echo transfert_$2 >> logs/${bn}_time | |
7 | +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 04-accuracyscore.py $1 $2 >> $1/res.csv |
BOTTLENECK/utils.py
1 | +../utils.py |