Commit d414b83e18cdc5d0313f6880349609082dc035c1
1 parent
7c16f9bfe8
Exists in
master
add Botttleneck MLp + script
Showing 16 changed files with 719 additions and 0 deletions Inline Diff
- BOTTLENECK/01a-mlp_proj.py
- BOTTLENECK/02a-mlp_score_on_BN.py
- BOTTLENECK/02b-transfert_ae.py
- BOTTLENECK/02c-tsne_mlproj.py
- BOTTLENECK/03-mlp_score_on_transfert.py
- BOTTLENECK/04-accuracyscore.py
- BOTTLENECK/mlp.py
- BOTTLENECK/run01_do_alljson.sh
- BOTTLENECK/run02_mlpscore.sh
- BOTTLENECK/run02b-transfert.sh
- BOTTLENECK/run03_tsne_MLPtransfert.sh
- BOTTLENECK/run04-mlp_on_transfert.sh
- BOTTLENECK/run05_accuracy.sh
- BOTTLENECK/run_all.sh
- BOTTLENECK/run_one.sh
- BOTTLENECK/utils.py
BOTTLENECK/01a-mlp_proj.py
File was created | 1 | ||
2 | # coding: utf-8 | ||
3 | |||
4 | # In[2]: | ||
5 | |||
6 | # Import | ||
7 | import gensim | ||
8 | from scipy import sparse | ||
9 | import itertools | ||
10 | from sklearn import preprocessing | ||
11 | from keras.models import Sequential | ||
12 | from keras.optimizers import SGD,Adam | ||
13 | from keras.layers.advanced_activations import ELU,PReLU | ||
14 | from keras.callbacks import ModelCheckpoint | ||
15 | from mlp import * | ||
16 | import sklearn.metrics | ||
17 | from sklearn.preprocessing import LabelBinarizer | ||
18 | import shelve | ||
19 | import pickle | ||
20 | from utils import * | ||
21 | import sys | ||
22 | import os | ||
23 | import json | ||
24 | # In[4]: | ||
25 | |||
26 | infer_model=shelve.open("{}".format(sys.argv[2])) | ||
27 | in_dir = sys.argv[1] | ||
28 | #['ASR', 'TRS', 'LABEL'] | ||
29 | # In[6]: | ||
30 | if len(sys.argv) > 4 : | ||
31 | features_key = sys.argv[4] | ||
32 | else : | ||
33 | features_key = "LDA" | ||
34 | save_projection = True | ||
35 | json_conf =json.load(open(sys.argv[3])) | ||
36 | ae_conf = json_conf["mlp_proj"] | ||
37 | |||
38 | hidden_size= ae_conf["hidden_size"] | ||
39 | input_activation = None | ||
40 | if ae_conf["input_activation"] == "elu": | ||
41 | print " ELU" | ||
42 | input_activation = PReLU() | ||
43 | else: | ||
44 | print " ELSE" | ||
45 | input_activation = ae_conf["input_activation"] | ||
46 | #input_activation=ae_conf["input_activation"] | ||
47 | output_activation=ae_conf["output_activation"] | ||
48 | loss=ae_conf["loss"] | ||
49 | epochs=ae_conf["epochs"] | ||
50 | batch_size=ae_conf["batch"] | ||
51 | patience=ae_conf["patience"] | ||
52 | dropouts=ae_conf["do"] | ||
53 | try: | ||
54 | k = ae_conf["sgd"] | ||
55 | if ae_conf["sgd"]["name"] == "adam": | ||
56 | sgd = Adam(lr=ae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | ||
57 | elif ae_conf["sgd"]["name"] == "sgd": | ||
58 | sgd = SGD(lr=ae_conf["sgd"]["lr"]) | ||
59 | except: | ||
60 | sgd = ae_conf["sgd"] | ||
61 | |||
62 | mlp_conf = json_conf["mlp"] | ||
63 | mlp_h = mlp_conf["hidden_size"] | ||
64 | mlp_loss = mlp_conf["loss"] | ||
65 | mlp_dropouts = mlp_conf["do"] | ||
66 | mlp_epochs = mlp_conf["epochs"] | ||
67 | mlp_batch_size = mlp_conf["batch"] | ||
68 | mlp_input_activation=mlp_conf["input_activation"] | ||
69 | mlp_output_activation=mlp_conf["output_activation"] | ||
70 | |||
71 | try: | ||
72 | k = mlp_conf["sgd"] | ||
73 | if mlp_conf["sgd"]["name"] == "adam": | ||
74 | mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | ||
75 | elif mlp_conf["sgd"]["name"] == "sgd": | ||
76 | mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | ||
77 | except: | ||
78 | mlp_sgd = mlp_conf["sgd"] | ||
79 | |||
80 | |||
81 | name = json_conf["name"] | ||
82 | try : | ||
83 | os.mkdir("{}/{}".format(in_dir,name)) | ||
84 | except OSError : | ||
85 | pass | ||
86 | db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) | ||
87 | db["IDS"]=dict(infer_model["LABEL"]) | ||
88 | # | ||
89 | keys = infer_model[features_key].keys() | ||
90 | LABELS = {} | ||
91 | for mod in keys : | ||
92 | |||
93 | int_labels_train = map(select,infer_model["LABEL"][mod]["TRAIN"]) | ||
94 | binarizer = LabelBinarizer() | ||
95 | y_train=binarizer.fit_transform(int_labels_train) | ||
96 | y_dev=binarizer.transform(map(select,infer_model["LABEL"][mod]["DEV"])) | ||
97 | y_test=binarizer.transform(map(select,infer_model["LABEL"][mod]["TEST"])) | ||
98 | LABELS[mod]= { "TRAIN":y_train , "DEV" : y_dev, "TEST" : y_test} | ||
99 | sumary,proj = train_mlp_proj(infer_model[features_key][mod]["TRAIN"].todense(),y_train, | ||
100 | infer_model[features_key][mod]["DEV"].todense(),y_dev, | ||
101 | infer_model[features_key][mod]["TEST"].todense(),y_test, | ||
102 | hidden_size ,sgd=sgd, | ||
103 | epochs=epochs, | ||
104 | patience=patience, | ||
105 | batch_size=batch_size, | ||
106 | input_activation=input_activation, | ||
107 | output_activation=output_activation, | ||
108 | dropouts=dropouts, | ||
109 | fit_verbose=1) | ||
110 | with open("{}/{}/{}_sum.txt".format(in_dir,name,mod),"w") as output_sum : | ||
111 | print >>output_sum, sumary | ||
112 | for num_lvl,level in enumerate(proj): | ||
113 | print len(level) | ||
114 | for num,corp_type in enumerate(["TRAIN","DEV","TEST"]): | ||
115 | pd = pandas.DataFrame(level[num]) | ||
116 | pd.to_hdf("{}/{}/MLP_proj_df.hdf".format(in_dir,name),"{}/lvl{}/{}".format(mod,num_lvl,corp_type)) | ||
117 | db["LABEL"] = LABELS | ||
118 | db.sync() | ||
119 | db.close() | ||
120 |
BOTTLENECK/02a-mlp_score_on_BN.py
File was created | 1 | ||
2 | # coding: utf-8 | ||
3 | |||
4 | # In[2]: | ||
5 | |||
6 | # Import | ||
7 | import gensim | ||
8 | from scipy import sparse | ||
9 | import itertools | ||
10 | from sklearn import preprocessing | ||
11 | from keras.models import Sequential | ||
12 | from keras.optimizers import SGD,Adam | ||
13 | from keras.layers.advanced_activations import ELU,PReLU | ||
14 | from keras.callbacks import ModelCheckpoint | ||
15 | from mlp import * | ||
16 | import sklearn.metrics | ||
17 | from sklearn.preprocessing import LabelBinarizer | ||
18 | import shelve | ||
19 | import pickle | ||
20 | from utils import * | ||
21 | import sys | ||
22 | import os | ||
23 | import json | ||
24 | # In[4]: | ||
25 | |||
26 | in_dir = sys.argv[1] | ||
27 | #['ASR', 'TRS', 'LABEL'] | ||
28 | # In[6]: | ||
29 | json_conf =json.load(open(sys.argv[2])) | ||
30 | |||
31 | mlp_conf = json_conf["mlp"] | ||
32 | hidden_size = mlp_conf["hidden_size"] | ||
33 | loss = mlp_conf["loss"] | ||
34 | patience = mlp_conf["patience"] | ||
35 | dropouts = mlp_conf["do"] | ||
36 | epochs = mlp_conf["epochs"] | ||
37 | batch_size = mlp_conf["batch"] | ||
38 | input_activation=mlp_conf["input_activation"] | ||
39 | output_activation=mlp_conf["output_activation"] | ||
40 | |||
41 | try: | ||
42 | k = mlp_conf["sgd"] | ||
43 | if mlp_conf["sgd"]["name"] == "adam": | ||
44 | sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | ||
45 | elif mlp_conf["sgd"]["name"] == "sgd": | ||
46 | sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | ||
47 | except: | ||
48 | sgd = mlp_conf["sgd"] | ||
49 | name = json_conf["name"] | ||
50 | |||
51 | db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) | ||
52 | shelve_logs=shelve.open("{}/{}/02a_logs.shelve".format(in_dir,name)) | ||
53 | |||
54 | # | ||
55 | keys = db["LABEL"].keys() | ||
56 | proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name)) | ||
57 | hdf_keys = proj_hdf.keys() | ||
58 | proj_hdf.close() | ||
59 | hdf_mods = set([ x.split("/")[1] for x in hdf_keys ]) | ||
60 | hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ]) | ||
61 | hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | ||
62 | print hdf_mods | ||
63 | print hdf_lvl | ||
64 | print hdf_crossval | ||
65 | |||
66 | hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name) | ||
67 | labels_dict = {"origine":{} } | ||
68 | logs = {} | ||
69 | for lvl in hdf_lvl : | ||
70 | labels_dict[lvl] = {} | ||
71 | for mod in hdf_mods: | ||
72 | labels_dict[lvl][mod] = {} | ||
73 | |||
74 | for mod in hdf_mods: | ||
75 | for lvl in hdf_lvl : | ||
76 | x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN")) | ||
77 | x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV")) | ||
78 | x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST")) | ||
79 | if x_train.shape[1] <= 8 : | ||
80 | labels_dict["origine"]["TRAIN"] = np.argmax(x_train.values,axis=1) | ||
81 | labels_dict["origine"]["DEV"] = np.argmax(x_dev.values,axis=1) | ||
82 | labels_dict["origine"]["TEST"] = np.argmax(x_test.values,axis=1) | ||
83 | continue | ||
84 | y_train = db["LABEL"][mod]["TRAIN"] | ||
85 | y_dev = db["LABEL"][mod]["DEV"] | ||
86 | y_test = db["LABEL"][mod]["TEST"] | ||
87 | |||
88 | print x_train.shape | ||
89 | print x_dev.shape | ||
90 | print x_test.shape | ||
91 | print y_train.shape | ||
92 | print y_dev.shape | ||
93 | print y_test.shape | ||
94 | pred,hist = train_mlp_pred(x_train.values,y_train, | ||
95 | x_dev.values,y_dev, | ||
96 | x_test.values,y_test, | ||
97 | hidden_size ,sgd=sgd, | ||
98 | epochs=epochs, | ||
99 | patience=patience, | ||
100 | batch_size=batch_size, | ||
101 | input_activation=input_activation, | ||
102 | output_activation=output_activation, | ||
103 | dropouts=dropouts, | ||
104 | fit_verbose=1) | ||
105 | shelve_logs["{}/{}".format(mod,lvl)] = hist | ||
106 | labels_dict[lvl][mod]["TRAIN"] = np.argmax(pred[0],axis=1) | ||
107 | labels_dict[lvl][mod]["DEV"] = np.argmax(pred[1],axis=1) | ||
108 | labels_dict[lvl][mod]["TEST"] = np.argmax(pred[2],axis=1) | ||
109 | |||
110 | for lvl in hdf_lvl: | ||
111 | db[lvl] = labels_dict[lvl] | ||
112 | shelve_logs.sync() | ||
113 | shelve_logs.close() | ||
114 | db.sync() | ||
115 | db.close() | ||
116 |
BOTTLENECK/02b-transfert_ae.py
File was created | 1 | ||
2 | # coding: utf-8 | ||
3 | |||
4 | # In[2]: | ||
5 | |||
6 | # Import | ||
7 | import gensim | ||
8 | from scipy import sparse | ||
9 | import itertools | ||
10 | from sklearn import preprocessing | ||
11 | from keras.models import Sequential | ||
12 | from keras.optimizers import SGD,Adam | ||
13 | from keras.layers.advanced_activations import ELU,PReLU | ||
14 | from keras.callbacks import ModelCheckpoint | ||
15 | from mlp import * | ||
16 | import pandas as pd | ||
17 | import sklearn.metrics | ||
18 | from sklearn.preprocessing import LabelBinarizer | ||
19 | import shelve | ||
20 | import pickle | ||
21 | from utils import * | ||
22 | import sys | ||
23 | import os | ||
24 | import json | ||
25 | # In[4]: | ||
26 | |||
27 | in_dir = sys.argv[1] | ||
28 | #['ASR', 'TRS', 'LABEL'] | ||
29 | # In[6]: | ||
30 | json_conf =json.load(open(sys.argv[2])) | ||
31 | |||
32 | mlp_conf = json_conf["transfert"] | ||
33 | hidden_size = mlp_conf["hidden_size"] | ||
34 | loss = mlp_conf["loss"] | ||
35 | patience = mlp_conf["patience"] | ||
36 | dropouts = mlp_conf["do"] | ||
37 | epochs = mlp_conf["epochs"] | ||
38 | batch_size = mlp_conf["batch"] | ||
39 | input_activation=mlp_conf["input_activation"] | ||
40 | output_activation=mlp_conf["output_activation"] | ||
41 | |||
42 | try: | ||
43 | k = mlp_conf["sgd"] | ||
44 | if mlp_conf["sgd"]["name"] == "adam": | ||
45 | sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | ||
46 | elif mlp_conf["sgd"]["name"] == "sgd": | ||
47 | sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | ||
48 | except: | ||
49 | sgd = mlp_conf["sgd"] | ||
50 | name = json_conf["name"] | ||
51 | |||
52 | # | ||
53 | proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name)) | ||
54 | hdf_keys = proj_hdf.keys() | ||
55 | proj_hdf.close() | ||
56 | hdf_mods = set([ x.split("/")[1] for x in hdf_keys ]) | ||
57 | hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ]) | ||
58 | hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | ||
59 | print hdf_mods | ||
60 | print hdf_lvl | ||
61 | print hdf_crossval | ||
62 | |||
63 | hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name) | ||
64 | transfert_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name) | ||
65 | mod1,mod2 = "ASR","TRS" | ||
66 | for lvl in hdf_lvl : | ||
67 | x_train_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TRAIN")) | ||
68 | x_dev_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"DEV")) | ||
69 | x_test_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TEST")) | ||
70 | x_train_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TRAIN")) | ||
71 | x_dev_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"DEV")) | ||
72 | x_test_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TEST")) | ||
73 | |||
74 | if x_train_ASR.shape[1] <= 8 : | ||
75 | continue | ||
76 | |||
77 | pred = train_ae(x_train_ASR.values, | ||
78 | x_dev_ASR.values, | ||
79 | x_test_ASR.values, | ||
80 | hidden_size ,sgd=sgd, | ||
81 | y_train=x_train_TRS.values, | ||
82 | y_dev=x_dev_TRS.values, | ||
83 | y_test=x_test_TRS.values, | ||
84 | epochs=epochs, | ||
85 | patience=patience, | ||
86 | batch_size=batch_size, | ||
87 | input_activation=input_activation, | ||
88 | output_activation=output_activation, | ||
89 | dropouts=dropouts, | ||
90 | best_mod=True, | ||
91 | verbose=1) | ||
92 | for num_layer,layer in enumerate(pred): | ||
93 | transfert_train = pd.DataFrame(layer[0]) | ||
94 | transfert_dev = pd.DataFrame(layer[1]) | ||
95 | transfert_test = pd.DataFrame(layer[2]) | ||
96 | transfert_train.to_hdf(transfert_proj_path,"{}/{}/TRAIN".format(lvl,"layer"+str(num_layer))) | ||
97 | transfert_dev.to_hdf(transfert_proj_path,"{}/{}/DEV".format(lvl,"layer"+str(num_layer))) | ||
98 | transfert_test.to_hdf(transfert_proj_path,"{}/{}/TEST".format(lvl,"layer"+str(num_layer))) | ||
99 | |||
100 |
BOTTLENECK/02c-tsne_mlproj.py
File was created | 1 | ||
2 | # coding: utf-8 | ||
3 | |||
4 | # In[2]: | ||
5 | |||
6 | # Import | ||
7 | import gensim | ||
8 | from scipy import sparse | ||
9 | import itertools | ||
10 | from sklearn import preprocessing | ||
11 | from keras.models import Sequential | ||
12 | from keras.optimizers import SGD,Adam | ||
13 | from keras.layers.advanced_activations import ELU,PReLU | ||
14 | from keras.callbacks import ModelCheckpoint | ||
15 | from mlp import * | ||
16 | import pandas as pd | ||
17 | import sklearn.metrics | ||
18 | from sklearn.preprocessing import LabelBinarizer | ||
19 | from sklearn.manifold import TSNE | ||
20 | import shelve | ||
21 | import pickle | ||
22 | from utils import * | ||
23 | import sys | ||
24 | import os | ||
25 | import json | ||
26 | # In[4]: | ||
27 | |||
28 | in_dir = sys.argv[1] | ||
29 | #['ASR', 'TRS', 'LABEL'] | ||
30 | # In[6]: | ||
31 | json_conf =json.load(open(sys.argv[2])) | ||
32 | |||
33 | mlp_conf = json_conf["transfert"] | ||
34 | hidden_size = mlp_conf["hidden_size"] | ||
35 | loss = mlp_conf["loss"] | ||
36 | patience = mlp_conf["patience"] | ||
37 | dropouts = mlp_conf["do"] | ||
38 | epochs = mlp_conf["epochs"] | ||
39 | batch_size = mlp_conf["batch"] | ||
40 | input_activation=mlp_conf["input_activation"] | ||
41 | output_activation=mlp_conf["output_activation"] | ||
42 | |||
43 | try: | ||
44 | k = mlp_conf["sgd"] | ||
45 | if mlp_conf["sgd"]["name"] == "adam": | ||
46 | sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | ||
47 | elif mlp_conf["sgd"]["name"] == "sgd": | ||
48 | sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | ||
49 | except: | ||
50 | sgd = mlp_conf["sgd"] | ||
51 | name = json_conf["name"] | ||
52 | |||
53 | # | ||
54 | print " MLP" | ||
55 | proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name)) | ||
56 | hdf_keys = proj_hdf.keys() | ||
57 | proj_hdf.close() | ||
58 | hdf_mods = set([ x.split("/")[1] for x in hdf_keys ]) | ||
59 | hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ]) | ||
60 | hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | ||
61 | print hdf_mods | ||
62 | print hdf_lvl | ||
63 | print hdf_crossval | ||
64 | |||
65 | hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name) | ||
66 | tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name) | ||
67 | for mod in hdf_mods: | ||
68 | for lvl in hdf_lvl : | ||
69 | x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN")) | ||
70 | x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV")) | ||
71 | x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST")) | ||
72 | |||
73 | if x_train.shape[1] <= 8 : | ||
74 | continue | ||
75 | tsne= TSNE() | ||
76 | tsne_train=tsne.fit_transform(x_train.values) | ||
77 | pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TRAIN")) | ||
78 | tsne= TSNE() | ||
79 | tsne_dev=tsne.fit_transform(x_dev.values) | ||
80 | pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"DEV")) | ||
81 | tsne= TSNE() | ||
82 | tsne_test=tsne.fit_transform(x_test.values) | ||
83 | pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TEST")) | ||
84 | tsne = TSNE() | ||
85 | tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values) | ||
86 | pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"CONCAT")) | ||
87 | |||
88 | print " TRANSFERT" | ||
89 | |||
90 | hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name) | ||
91 | proj_hdf = pandas.HDFStore(hdf_proj_path) | ||
92 | print proj_hdf | ||
93 | hdf_keys = proj_hdf.keys() | ||
94 | proj_hdf.close() | ||
95 | print hdf_keys | ||
96 | hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ]) | ||
97 | hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ]) | ||
98 | hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | ||
99 | print hdf_lvl | ||
100 | print hdf_layer | ||
101 | print hdf_crossval | ||
102 | |||
103 | tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name) | ||
104 | for lvl in hdf_lvl : | ||
105 | for layer in hdf_layer: | ||
106 | x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN")) | ||
107 | x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV")) | ||
108 | x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TEST")) | ||
109 | |||
110 | if x_train.shape[1] <= 8 : | ||
111 | continue | ||
112 | tsne= TSNE() | ||
113 | tsne_train=tsne.fit_transform(x_train.values) | ||
114 | pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TRAIN")) | ||
115 | tsne= TSNE() | ||
116 | tsne_dev=tsne.fit_transform(x_dev.values) | ||
117 | pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"DEV")) | ||
118 | tsne= TSNE() | ||
119 | tsne_test=tsne.fit_transform(x_test.values) | ||
120 | pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TEST")) | ||
121 | tsne = TSNE() | ||
122 | tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values) | ||
123 | pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"CONCAT")) | ||
124 |
BOTTLENECK/03-mlp_score_on_transfert.py
File was created | 1 | ||
2 | # coding: utf-8 | ||
3 | |||
4 | # In[2]: | ||
5 | |||
6 | # Import | ||
7 | import gensim | ||
8 | from scipy import sparse | ||
9 | import itertools | ||
10 | from sklearn import preprocessing | ||
11 | from keras.models import Sequential | ||
12 | from keras.optimizers import SGD,Adam | ||
13 | from keras.layers.advanced_activations import ELU,PReLU | ||
14 | from keras.callbacks import ModelCheckpoint | ||
15 | from mlp import * | ||
16 | import sklearn.metrics | ||
17 | from sklearn.preprocessing import LabelBinarizer | ||
18 | import shelve | ||
19 | import pickle | ||
20 | from utils import * | ||
21 | import sys | ||
22 | import os | ||
23 | import json | ||
24 | # In[4]: | ||
25 | |||
26 | in_dir = sys.argv[1] | ||
27 | #['ASR', 'TRS', 'LABEL'] | ||
28 | # In[6]: | ||
29 | json_conf =json.load(open(sys.argv[2])) | ||
30 | |||
31 | mlp_conf = json_conf["mlp"] | ||
32 | hidden_size = mlp_conf["hidden_size"] | ||
33 | loss = mlp_conf["loss"] | ||
34 | patience = mlp_conf["patience"] | ||
35 | dropouts = mlp_conf["do"] | ||
36 | epochs = mlp_conf["epochs"] | ||
37 | batch_size = mlp_conf["batch"] | ||
38 | input_activation=mlp_conf["input_activation"] | ||
39 | output_activation=mlp_conf["output_activation"] | ||
40 | |||
41 | try: | ||
42 | k = mlp_conf["sgd"] | ||
43 | if mlp_conf["sgd"]["name"] == "adam": | ||
44 | sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | ||
45 | elif mlp_conf["sgd"]["name"] == "sgd": | ||
46 | sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | ||
47 | except: | ||
48 | sgd = mlp_conf["sgd"] | ||
49 | name = json_conf["name"] | ||
50 | |||
51 | db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) | ||
52 | shelve_logs=shelve.open("{}/{}/03_logs.shelve".format(in_dir,name),writeback=True) | ||
53 | |||
54 | # | ||
55 | keys = db["LABEL"].keys() | ||
56 | |||
57 | hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name) | ||
58 | proj_hdf = pandas.HDFStore(hdf_proj_path) | ||
59 | hdf_keys = proj_hdf.keys() | ||
60 | print hdf_keys | ||
61 | proj_hdf.close() | ||
62 | hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ]) | ||
63 | hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ]) | ||
64 | hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | ||
65 | print hdf_lvl | ||
66 | print hdf_crossval | ||
67 | |||
68 | labels_dict = { } | ||
69 | logs = {} | ||
70 | for lvl in hdf_lvl : | ||
71 | labels_dict[lvl] = {} | ||
72 | for layer in hdf_layer: | ||
73 | labels_dict[lvl][layer] = {} | ||
74 | |||
75 | for lvl in hdf_lvl : | ||
76 | for layer in hdf_layer: | ||
77 | x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN")) | ||
78 | x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV")) | ||
79 | x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer, "TEST")) | ||
80 | |||
81 | y_train = db["LABEL"]["ASR"]["TRAIN"] | ||
82 | y_dev = db["LABEL"]["ASR"]["DEV"] | ||
83 | y_test = db["LABEL"]["ASR"]["TEST"] | ||
84 | |||
85 | print x_train.shape | ||
86 | print x_dev.shape | ||
87 | print x_test.shape | ||
88 | print y_train.shape | ||
89 | print y_dev.shape | ||
90 | print y_test.shape | ||
91 | pred,hist = train_mlp_pred(x_train.values,y_train, | ||
92 | x_dev.values,y_dev, | ||
93 | x_test.values,y_test, | ||
94 | hidden_size ,sgd=sgd, | ||
95 | epochs=epochs, | ||
96 | patience=patience, | ||
97 | batch_size=batch_size, | ||
98 | input_activation=input_activation, | ||
99 | output_activation=output_activation, | ||
100 | dropouts=dropouts, | ||
101 | fit_verbose=1) | ||
102 | shelve_logs["{}/{}".format(lvl,layer)] = hist | ||
103 | labels_dict[lvl][layer]["TRAIN"] = np.argmax(pred[0],axis=1) | ||
104 | labels_dict[lvl][layer]["DEV"] = np.argmax(pred[1],axis=1) | ||
105 | labels_dict[lvl][layer]["TEST"] = np.argmax(pred[2],axis=1) | ||
106 | |||
107 | db["transfert"] = labels_dict | ||
108 | shelve_logs.sync() | ||
109 | shelve_logs.close() | ||
110 | db.sync() | ||
111 | db.close() | ||
112 |
BOTTLENECK/04-accuracyscore.py
File was created | 1 | ||
2 | # coding: utf-8 | ||
3 | |||
4 | # In[2]: | ||
5 | |||
6 | # Import | ||
7 | import gensim | ||
8 | from scipy import sparse | ||
9 | import numpy as np | ||
10 | import itertools | ||
11 | from sklearn import preprocessing | ||
12 | from keras.models import Sequential | ||
13 | from keras.optimizers import SGD,Adam | ||
14 | from keras.layers.advanced_activations import ELU,PReLU | ||
15 | from keras.callbacks import ModelCheckpoint | ||
16 | from mlp import * | ||
17 | from sklearn import metrics | ||
18 | from sklearn.preprocessing import LabelBinarizer | ||
19 | import shelve | ||
20 | import pickle | ||
21 | from utils import * | ||
22 | import sys | ||
23 | import os | ||
24 | import json | ||
25 | |||
26 | # In[4]: | ||
27 | |||
28 | in_dir = sys.argv[1] | ||
29 | #['ASR', 'TRS', 'LABEL'] | ||
30 | # In[6]: | ||
31 | json_conf =json.load(open(sys.argv[2])) | ||
32 | |||
33 | name = json_conf["name"] | ||
34 | |||
35 | db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) | ||
36 | # | ||
37 | keys = sorted(db.keys()) | ||
38 | keys.remove("IDS") | ||
39 | keys.remove("transfert") | ||
40 | keys.remove("LABEL") | ||
41 | mods = ["ASR", "TRS"] | ||
42 | ref_train = db["LABEL"]["ASR"]["TRAIN"] | ||
43 | ref_dev = db["LABEL"]["ASR"]["DEV"] | ||
44 | ref_test = db["LABEL"]["ASR"]["TEST"] | ||
45 | |||
46 | print "name,MOD,level,train,dev,test" | ||
47 | for mod in mods : | ||
48 | for lvl in keys : | ||
49 | if "TEST" in db[lvl][mod] : | ||
50 | train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db[lvl][mod]["TRAIN"]) | ||
51 | dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db[lvl][mod]["DEV"]) | ||
52 | test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db[lvl][mod]["TEST"]) | ||
53 | else : | ||
54 | train_score = "ERROR" | ||
55 | dev_score = "ERROR" | ||
56 | test_score = "ERROR" | ||
57 | print ",".join([name,mod, lvl, str(train_score), str(dev_score) , str(test_score)]) | ||
58 | |||
59 | for level in db["transfert"].keys() : | ||
60 | for layer in db["transfert"][level].keys(): | ||
61 | if "TRAIN" in db["transfert"][level][layer].keys(): | ||
62 | |||
63 | train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db["transfert"][level][layer]["TRAIN"]) | ||
64 | dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db["transfert"][level][layer]["DEV"]) | ||
65 | test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db["transfert"][level][layer]["TEST"]) | ||
66 | print ",".join([name,"transfert",level+"/"+layer, str(train_score), str(dev_score) , str(test_score)]) | ||
67 | |||
68 | db.close() | ||
69 |
BOTTLENECK/mlp.py
File was created | 1 | ../LDA/mlp.py |
BOTTLENECK/run01_do_alljson.sh
File was created | 1 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0.json RAW | |
2 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0do.json RAW | ||
3 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1.json RAW | ||
4 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1do.json RAW | ||
5 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2.json RAW | ||
6 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2do.json RAW | ||
7 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3.json RAW | ||
8 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3do.json RAW | ||
9 |
BOTTLENECK/run02_mlpscore.sh
File was created | 1 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0.json | |
2 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0do.json | ||
3 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1.json | ||
4 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1do.json | ||
5 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2.json | ||
6 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2do.json | ||
7 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3.json | ||
8 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3do.json | ||
9 | |||
10 | |||
11 | |||
12 |
BOTTLENECK/run02b-transfert.sh
File was created | 1 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0.json | |
2 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0do.json | ||
3 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1.json | ||
4 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1do.json | ||
5 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2.json | ||
6 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2do.json | ||
7 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3.json | ||
8 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3do.json | ||
9 |
BOTTLENECK/run03_tsne_MLPtransfert.sh
File was created | 1 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0.json | |
2 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0do.json | ||
3 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1.json | ||
4 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1do.json | ||
5 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2.json | ||
6 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2do.json | ||
7 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3.json | ||
8 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3do.json | ||
9 |
BOTTLENECK/run04-mlp_on_transfert.sh
File was created | 1 | #THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1.json | |
2 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1do.json | ||
3 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2.json | ||
4 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2do.json | ||
5 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3.json | ||
6 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3do.json | ||
7 | |||
8 | #THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0.json | ||
9 | |||
10 | #THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0do.json | ||
11 |
BOTTLENECK/run05_accuracy.sh
File was created | 1 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1.json | |
2 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1do.json | ||
3 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2.json | ||
4 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2do.json | ||
5 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3.json | ||
6 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3do.json | ||
7 | THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0.json | ||
8 | THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0do.json | ||
9 |
BOTTLENECK/run_all.sh
File was created | 1 | bash run_one.sh output_3 output_3/L0do.json gpu0 & | |
2 | bash run_one.sh output_3 output_3/L0.json gpu1 & | ||
3 | bash run_one.sh output_3 output_3/L1do.json gpu0 & | ||
4 | bash run_one.sh output_3 output_3/L1.json gpu1 & | ||
5 | wait | ||
6 | bash run_one.sh output_3 output_3/L2do.json gpu0 & | ||
7 | bash run_one.sh output_3 output_3/L2.json gpu1 & | ||
8 | bash run_one.sh output_3 output_3/L3bndo.json gpu0 & | ||
9 | bash run_one.sh output_3 output_3/L3ce1.json gpu1 & | ||
10 | wait | ||
11 | bash run_one.sh output_3 output_3/L3ce.json gpu0 & | ||
12 | bash run_one.sh output_3 output_3/L3do.json gpu1 & | ||
13 | bash run_one.sh output_3 output_3/L3.json gpu0 & | ||
14 | bash run_one.sh output_3 output_3/L3sigmo.json gpu1 & | ||
15 | wait | ||
16 | bash run_one.sh output_3 output_3/L4do.json gpu0 & | ||
17 | bash run_one.sh output_3 output_3/L5do.json gpu1 & | ||
18 | bash run_one.sh output_3 output_3/L6do.json gpu0 & | ||
19 | bash run_one.sh output_3 output_3/L7do.json gpu1 & | ||
20 | wait | ||
21 | bash run_one.sh output_3 output_3/MaxMLP.json gpu0 & | ||
22 | bash run_one.sh output_3 output_3/MinMLP.json gpu1 & | ||
23 |
BOTTLENECK/run_one.sh
File was created | 1 | bn=$(basename $2) | |
2 | time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 01a-mlp_proj.py $1 Sparse_tfidf2.shelve $2 RAW) 2>> logs/${bn}_time ; echo MLP_$2 >> logs/${bn}_time | ||
3 | THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02a-mlp_score_on_BN.py $1 $2 | ||
4 | THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02b-transfert_ae.py $1 $2 | ||
5 | THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02c-tsne_mlproj.py $1 $2 | ||
6 | time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 03-mlp_score_on_transfert.py $1 $2) 2>> logs/${bn}_time ; echo transfert_$2 >> logs/${bn}_time | ||
7 | THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 04-accuracyscore.py $1 $2 >> $1/res.csv | ||
8 |
BOTTLENECK/utils.py
File was created | 1 | ../utils.py |