Compare View
Commits (3)
Changes
Showing 17 changed files Side-by-side Diff
- BOTTLENECK/01a-mlp_proj.py
- BOTTLENECK/02a-mlp_score_on_BN.py
- BOTTLENECK/02b-transfert_ae.py
- BOTTLENECK/02c-tsne_mlproj.py
- BOTTLENECK/03-mlp_score_on_transfert.py
- BOTTLENECK/04-accuracyscore.py
- BOTTLENECK/mlp.py
- BOTTLENECK/run01_do_alljson.sh
- BOTTLENECK/run02_mlpscore.sh
- BOTTLENECK/run02b-transfert.sh
- BOTTLENECK/run03_tsne_MLPtransfert.sh
- BOTTLENECK/run04-mlp_on_transfert.sh
- BOTTLENECK/run05_accuracy.sh
- BOTTLENECK/run_all.sh
- BOTTLENECK/run_one.sh
- BOTTLENECK/utils.py
- LDA/mlp.py
BOTTLENECK/01a-mlp_proj.py
... | ... | @@ -0,0 +1,119 @@ |
1 | + | |
2 | +# coding: utf-8 | |
3 | + | |
4 | +# In[2]: | |
5 | + | |
6 | +# Import | |
7 | +import gensim | |
8 | +from scipy import sparse | |
9 | +import itertools | |
10 | +from sklearn import preprocessing | |
11 | +from keras.models import Sequential | |
12 | +from keras.optimizers import SGD,Adam | |
13 | +from keras.layers.advanced_activations import ELU,PReLU | |
14 | +from keras.callbacks import ModelCheckpoint | |
15 | +from mlp import * | |
16 | +import sklearn.metrics | |
17 | +from sklearn.preprocessing import LabelBinarizer | |
18 | +import shelve | |
19 | +import pickle | |
20 | +from utils import * | |
21 | +import sys | |
22 | +import os | |
23 | +import json | |
24 | +# In[4]: | |
25 | + | |
26 | +infer_model=shelve.open("{}".format(sys.argv[2])) | |
27 | +in_dir = sys.argv[1] | |
28 | +#['ASR', 'TRS', 'LABEL'] | |
29 | +# In[6]: | |
30 | +if len(sys.argv) > 4 : | |
31 | + features_key = sys.argv[4] | |
32 | +else : | |
33 | + features_key = "LDA" | |
34 | +save_projection = True | |
35 | +json_conf =json.load(open(sys.argv[3])) | |
36 | +ae_conf = json_conf["mlp_proj"] | |
37 | + | |
38 | +hidden_size= ae_conf["hidden_size"] | |
39 | +input_activation = None | |
40 | +if ae_conf["input_activation"] == "elu": | |
41 | + print " ELU" | |
42 | + input_activation = PReLU() | |
43 | +else: | |
44 | + print " ELSE" | |
45 | + input_activation = ae_conf["input_activation"] | |
46 | +#input_activation=ae_conf["input_activation"] | |
47 | +output_activation=ae_conf["output_activation"] | |
48 | +loss=ae_conf["loss"] | |
49 | +epochs=ae_conf["epochs"] | |
50 | +batch_size=ae_conf["batch"] | |
51 | +patience=ae_conf["patience"] | |
52 | +dropouts=ae_conf["do"] | |
53 | +try: | |
54 | + k = ae_conf["sgd"] | |
55 | + if ae_conf["sgd"]["name"] == "adam": | |
56 | + sgd = Adam(lr=ae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | |
57 | + elif ae_conf["sgd"]["name"] == "sgd": | |
58 | + sgd = SGD(lr=ae_conf["sgd"]["lr"]) | |
59 | +except: | |
60 | + sgd = ae_conf["sgd"] | |
61 | + | |
62 | +mlp_conf = json_conf["mlp"] | |
63 | +mlp_h = mlp_conf["hidden_size"] | |
64 | +mlp_loss = mlp_conf["loss"] | |
65 | +mlp_dropouts = mlp_conf["do"] | |
66 | +mlp_epochs = mlp_conf["epochs"] | |
67 | +mlp_batch_size = mlp_conf["batch"] | |
68 | +mlp_input_activation=mlp_conf["input_activation"] | |
69 | +mlp_output_activation=mlp_conf["output_activation"] | |
70 | + | |
71 | +try: | |
72 | + k = mlp_conf["sgd"] | |
73 | + if mlp_conf["sgd"]["name"] == "adam": | |
74 | + mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | |
75 | + elif mlp_conf["sgd"]["name"] == "sgd": | |
76 | + mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | |
77 | +except: | |
78 | + mlp_sgd = mlp_conf["sgd"] | |
79 | + | |
80 | + | |
81 | +name = json_conf["name"] | |
82 | +try : | |
83 | + os.mkdir("{}/{}".format(in_dir,name)) | |
84 | +except OSError : | |
85 | + pass | |
86 | +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) | |
87 | +db["IDS"]=dict(infer_model["LABEL"]) | |
88 | +# | |
89 | +keys = infer_model[features_key].keys() | |
90 | +LABELS = {} | |
91 | +for mod in keys : | |
92 | + | |
93 | + int_labels_train = map(select,infer_model["LABEL"][mod]["TRAIN"]) | |
94 | + binarizer = LabelBinarizer() | |
95 | + y_train=binarizer.fit_transform(int_labels_train) | |
96 | + y_dev=binarizer.transform(map(select,infer_model["LABEL"][mod]["DEV"])) | |
97 | + y_test=binarizer.transform(map(select,infer_model["LABEL"][mod]["TEST"])) | |
98 | + LABELS[mod]= { "TRAIN":y_train , "DEV" : y_dev, "TEST" : y_test} | |
99 | + sumary,proj = train_mlp_proj(infer_model[features_key][mod]["TRAIN"].todense(),y_train, | |
100 | + infer_model[features_key][mod]["DEV"].todense(),y_dev, | |
101 | + infer_model[features_key][mod]["TEST"].todense(),y_test, | |
102 | + hidden_size ,sgd=sgd, | |
103 | + epochs=epochs, | |
104 | + patience=patience, | |
105 | + batch_size=batch_size, | |
106 | + input_activation=input_activation, | |
107 | + output_activation=output_activation, | |
108 | + dropouts=dropouts, | |
109 | + fit_verbose=1) | |
110 | + with open("{}/{}/{}_sum.txt".format(in_dir,name,mod),"w") as output_sum : | |
111 | + print >>output_sum, sumary | |
112 | + for num_lvl,level in enumerate(proj): | |
113 | + print len(level) | |
114 | + for num,corp_type in enumerate(["TRAIN","DEV","TEST"]): | |
115 | + pd = pandas.DataFrame(level[num]) | |
116 | + pd.to_hdf("{}/{}/MLP_proj_df.hdf".format(in_dir,name),"{}/lvl{}/{}".format(mod,num_lvl,corp_type)) | |
117 | +db["LABEL"] = LABELS | |
118 | +db.sync() | |
119 | +db.close() |
BOTTLENECK/02a-mlp_score_on_BN.py
... | ... | @@ -0,0 +1,115 @@ |
1 | + | |
2 | +# coding: utf-8 | |
3 | + | |
4 | +# In[2]: | |
5 | + | |
6 | +# Import | |
7 | +import gensim | |
8 | +from scipy import sparse | |
9 | +import itertools | |
10 | +from sklearn import preprocessing | |
11 | +from keras.models import Sequential | |
12 | +from keras.optimizers import SGD,Adam | |
13 | +from keras.layers.advanced_activations import ELU,PReLU | |
14 | +from keras.callbacks import ModelCheckpoint | |
15 | +from mlp import * | |
16 | +import sklearn.metrics | |
17 | +from sklearn.preprocessing import LabelBinarizer | |
18 | +import shelve | |
19 | +import pickle | |
20 | +from utils import * | |
21 | +import sys | |
22 | +import os | |
23 | +import json | |
24 | +# In[4]: | |
25 | + | |
26 | +in_dir = sys.argv[1] | |
27 | +#['ASR', 'TRS', 'LABEL'] | |
28 | +# In[6]: | |
29 | +json_conf =json.load(open(sys.argv[2])) | |
30 | + | |
31 | +mlp_conf = json_conf["mlp"] | |
32 | +hidden_size = mlp_conf["hidden_size"] | |
33 | +loss = mlp_conf["loss"] | |
34 | +patience = mlp_conf["patience"] | |
35 | +dropouts = mlp_conf["do"] | |
36 | +epochs = mlp_conf["epochs"] | |
37 | +batch_size = mlp_conf["batch"] | |
38 | +input_activation=mlp_conf["input_activation"] | |
39 | +output_activation=mlp_conf["output_activation"] | |
40 | + | |
41 | +try: | |
42 | + k = mlp_conf["sgd"] | |
43 | + if mlp_conf["sgd"]["name"] == "adam": | |
44 | + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | |
45 | + elif mlp_conf["sgd"]["name"] == "sgd": | |
46 | + sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | |
47 | +except: | |
48 | + sgd = mlp_conf["sgd"] | |
49 | +name = json_conf["name"] | |
50 | + | |
51 | +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) | |
52 | +shelve_logs=shelve.open("{}/{}/02a_logs.shelve".format(in_dir,name)) | |
53 | + | |
54 | +# | |
55 | +keys = db["LABEL"].keys() | |
56 | +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name)) | |
57 | +hdf_keys = proj_hdf.keys() | |
58 | +proj_hdf.close() | |
59 | +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ]) | |
60 | +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ]) | |
61 | +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | |
62 | +print hdf_mods | |
63 | +print hdf_lvl | |
64 | +print hdf_crossval | |
65 | + | |
66 | +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name) | |
67 | +labels_dict = {"origine":{} } | |
68 | +logs = {} | |
69 | +for lvl in hdf_lvl : | |
70 | + labels_dict[lvl] = {} | |
71 | + for mod in hdf_mods: | |
72 | + labels_dict[lvl][mod] = {} | |
73 | + | |
74 | +for mod in hdf_mods: | |
75 | + for lvl in hdf_lvl : | |
76 | + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN")) | |
77 | + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV")) | |
78 | + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST")) | |
79 | + if x_train.shape[1] <= 8 : | |
80 | + labels_dict["origine"]["TRAIN"] = np.argmax(x_train.values,axis=1) | |
81 | + labels_dict["origine"]["DEV"] = np.argmax(x_dev.values,axis=1) | |
82 | + labels_dict["origine"]["TEST"] = np.argmax(x_test.values,axis=1) | |
83 | + continue | |
84 | + y_train = db["LABEL"][mod]["TRAIN"] | |
85 | + y_dev = db["LABEL"][mod]["DEV"] | |
86 | + y_test = db["LABEL"][mod]["TEST"] | |
87 | + | |
88 | + print x_train.shape | |
89 | + print x_dev.shape | |
90 | + print x_test.shape | |
91 | + print y_train.shape | |
92 | + print y_dev.shape | |
93 | + print y_test.shape | |
94 | + pred,hist = train_mlp_pred(x_train.values,y_train, | |
95 | + x_dev.values,y_dev, | |
96 | + x_test.values,y_test, | |
97 | + hidden_size ,sgd=sgd, | |
98 | + epochs=epochs, | |
99 | + patience=patience, | |
100 | + batch_size=batch_size, | |
101 | + input_activation=input_activation, | |
102 | + output_activation=output_activation, | |
103 | + dropouts=dropouts, | |
104 | + fit_verbose=1) | |
105 | + shelve_logs["{}/{}".format(mod,lvl)] = hist | |
106 | + labels_dict[lvl][mod]["TRAIN"] = np.argmax(pred[0],axis=1) | |
107 | + labels_dict[lvl][mod]["DEV"] = np.argmax(pred[1],axis=1) | |
108 | + labels_dict[lvl][mod]["TEST"] = np.argmax(pred[2],axis=1) | |
109 | + | |
110 | +for lvl in hdf_lvl: | |
111 | + db[lvl] = labels_dict[lvl] | |
112 | +shelve_logs.sync() | |
113 | +shelve_logs.close() | |
114 | +db.sync() | |
115 | +db.close() |
BOTTLENECK/02b-transfert_ae.py
... | ... | @@ -0,0 +1,99 @@ |
1 | + | |
2 | +# coding: utf-8 | |
3 | + | |
4 | +# In[2]: | |
5 | + | |
6 | +# Import | |
7 | +import gensim | |
8 | +from scipy import sparse | |
9 | +import itertools | |
10 | +from sklearn import preprocessing | |
11 | +from keras.models import Sequential | |
12 | +from keras.optimizers import SGD,Adam | |
13 | +from keras.layers.advanced_activations import ELU,PReLU | |
14 | +from keras.callbacks import ModelCheckpoint | |
15 | +from mlp import * | |
16 | +import pandas as pd | |
17 | +import sklearn.metrics | |
18 | +from sklearn.preprocessing import LabelBinarizer | |
19 | +import shelve | |
20 | +import pickle | |
21 | +from utils import * | |
22 | +import sys | |
23 | +import os | |
24 | +import json | |
25 | +# In[4]: | |
26 | + | |
27 | +in_dir = sys.argv[1] | |
28 | +#['ASR', 'TRS', 'LABEL'] | |
29 | +# In[6]: | |
30 | +json_conf =json.load(open(sys.argv[2])) | |
31 | + | |
32 | +mlp_conf = json_conf["transfert"] | |
33 | +hidden_size = mlp_conf["hidden_size"] | |
34 | +loss = mlp_conf["loss"] | |
35 | +patience = mlp_conf["patience"] | |
36 | +dropouts = mlp_conf["do"] | |
37 | +epochs = mlp_conf["epochs"] | |
38 | +batch_size = mlp_conf["batch"] | |
39 | +input_activation=mlp_conf["input_activation"] | |
40 | +output_activation=mlp_conf["output_activation"] | |
41 | + | |
42 | +try: | |
43 | + k = mlp_conf["sgd"] | |
44 | + if mlp_conf["sgd"]["name"] == "adam": | |
45 | + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | |
46 | + elif mlp_conf["sgd"]["name"] == "sgd": | |
47 | + sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | |
48 | +except: | |
49 | + sgd = mlp_conf["sgd"] | |
50 | +name = json_conf["name"] | |
51 | + | |
52 | +# | |
53 | +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name)) | |
54 | +hdf_keys = proj_hdf.keys() | |
55 | +proj_hdf.close() | |
56 | +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ]) | |
57 | +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ]) | |
58 | +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | |
59 | +print hdf_mods | |
60 | +print hdf_lvl | |
61 | +print hdf_crossval | |
62 | + | |
63 | +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name) | |
64 | +transfert_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name) | |
65 | +mod1,mod2 = "ASR","TRS" | |
66 | +for lvl in hdf_lvl : | |
67 | + x_train_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TRAIN")) | |
68 | + x_dev_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"DEV")) | |
69 | + x_test_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TEST")) | |
70 | + x_train_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TRAIN")) | |
71 | + x_dev_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"DEV")) | |
72 | + x_test_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TEST")) | |
73 | + | |
74 | + if x_train_ASR.shape[1] <= 8 : | |
75 | + continue | |
76 | + | |
77 | + pred = train_ae(x_train_ASR.values, | |
78 | + x_dev_ASR.values, | |
79 | + x_test_ASR.values, | |
80 | + hidden_size ,sgd=sgd, | |
81 | + y_train=x_train_TRS.values, | |
82 | + y_dev=x_dev_TRS.values, | |
83 | + y_test=x_test_TRS.values, | |
84 | + epochs=epochs, | |
85 | + patience=patience, | |
86 | + batch_size=batch_size, | |
87 | + input_activation=input_activation, | |
88 | + output_activation=output_activation, | |
89 | + dropouts=dropouts, | |
90 | + best_mod=True, | |
91 | + verbose=1) | |
92 | + for num_layer,layer in enumerate(pred): | |
93 | + transfert_train = pd.DataFrame(layer[0]) | |
94 | + transfert_dev = pd.DataFrame(layer[1]) | |
95 | + transfert_test = pd.DataFrame(layer[2]) | |
96 | + transfert_train.to_hdf(transfert_proj_path,"{}/{}/TRAIN".format(lvl,"layer"+str(num_layer))) | |
97 | + transfert_dev.to_hdf(transfert_proj_path,"{}/{}/DEV".format(lvl,"layer"+str(num_layer))) | |
98 | + transfert_test.to_hdf(transfert_proj_path,"{}/{}/TEST".format(lvl,"layer"+str(num_layer))) | |
99 | + |
BOTTLENECK/02c-tsne_mlproj.py
... | ... | @@ -0,0 +1,123 @@ |
1 | + | |
2 | +# coding: utf-8 | |
3 | + | |
4 | +# In[2]: | |
5 | + | |
6 | +# Import | |
7 | +import gensim | |
8 | +from scipy import sparse | |
9 | +import itertools | |
10 | +from sklearn import preprocessing | |
11 | +from keras.models import Sequential | |
12 | +from keras.optimizers import SGD,Adam | |
13 | +from keras.layers.advanced_activations import ELU,PReLU | |
14 | +from keras.callbacks import ModelCheckpoint | |
15 | +from mlp import * | |
16 | +import pandas as pd | |
17 | +import sklearn.metrics | |
18 | +from sklearn.preprocessing import LabelBinarizer | |
19 | +from sklearn.manifold import TSNE | |
20 | +import shelve | |
21 | +import pickle | |
22 | +from utils import * | |
23 | +import sys | |
24 | +import os | |
25 | +import json | |
26 | +# In[4]: | |
27 | + | |
28 | +in_dir = sys.argv[1] | |
29 | +#['ASR', 'TRS', 'LABEL'] | |
30 | +# In[6]: | |
31 | +json_conf =json.load(open(sys.argv[2])) | |
32 | + | |
33 | +mlp_conf = json_conf["transfert"] | |
34 | +hidden_size = mlp_conf["hidden_size"] | |
35 | +loss = mlp_conf["loss"] | |
36 | +patience = mlp_conf["patience"] | |
37 | +dropouts = mlp_conf["do"] | |
38 | +epochs = mlp_conf["epochs"] | |
39 | +batch_size = mlp_conf["batch"] | |
40 | +input_activation=mlp_conf["input_activation"] | |
41 | +output_activation=mlp_conf["output_activation"] | |
42 | + | |
43 | +try: | |
44 | + k = mlp_conf["sgd"] | |
45 | + if mlp_conf["sgd"]["name"] == "adam": | |
46 | + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | |
47 | + elif mlp_conf["sgd"]["name"] == "sgd": | |
48 | + sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | |
49 | +except: | |
50 | + sgd = mlp_conf["sgd"] | |
51 | +name = json_conf["name"] | |
52 | + | |
53 | +# | |
54 | +print " MLP" | |
55 | +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name)) | |
56 | +hdf_keys = proj_hdf.keys() | |
57 | +proj_hdf.close() | |
58 | +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ]) | |
59 | +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ]) | |
60 | +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | |
61 | +print hdf_mods | |
62 | +print hdf_lvl | |
63 | +print hdf_crossval | |
64 | + | |
65 | +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name) | |
66 | +tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name) | |
67 | +for mod in hdf_mods: | |
68 | + for lvl in hdf_lvl : | |
69 | + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN")) | |
70 | + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV")) | |
71 | + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST")) | |
72 | + | |
73 | + if x_train.shape[1] <= 8 : | |
74 | + continue | |
75 | + tsne= TSNE() | |
76 | + tsne_train=tsne.fit_transform(x_train.values) | |
77 | + pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TRAIN")) | |
78 | + tsne= TSNE() | |
79 | + tsne_dev=tsne.fit_transform(x_dev.values) | |
80 | + pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"DEV")) | |
81 | + tsne= TSNE() | |
82 | + tsne_test=tsne.fit_transform(x_test.values) | |
83 | + pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TEST")) | |
84 | + tsne = TSNE() | |
85 | + tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values) | |
86 | + pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"CONCAT")) | |
87 | + | |
88 | +print " TRANSFERT" | |
89 | + | |
90 | +hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name) | |
91 | +proj_hdf = pandas.HDFStore(hdf_proj_path) | |
92 | +print proj_hdf | |
93 | +hdf_keys = proj_hdf.keys() | |
94 | +proj_hdf.close() | |
95 | +print hdf_keys | |
96 | +hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ]) | |
97 | +hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ]) | |
98 | +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | |
99 | +print hdf_lvl | |
100 | +print hdf_layer | |
101 | +print hdf_crossval | |
102 | + | |
103 | +tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name) | |
104 | +for lvl in hdf_lvl : | |
105 | + for layer in hdf_layer: | |
106 | + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN")) | |
107 | + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV")) | |
108 | + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TEST")) | |
109 | + | |
110 | + if x_train.shape[1] <= 8 : | |
111 | + continue | |
112 | + tsne= TSNE() | |
113 | + tsne_train=tsne.fit_transform(x_train.values) | |
114 | + pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TRAIN")) | |
115 | + tsne= TSNE() | |
116 | + tsne_dev=tsne.fit_transform(x_dev.values) | |
117 | + pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"DEV")) | |
118 | + tsne= TSNE() | |
119 | + tsne_test=tsne.fit_transform(x_test.values) | |
120 | + pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TEST")) | |
121 | + tsne = TSNE() | |
122 | + tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values) | |
123 | + pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"CONCAT")) |
BOTTLENECK/03-mlp_score_on_transfert.py
... | ... | @@ -0,0 +1,111 @@ |
1 | + | |
2 | +# coding: utf-8 | |
3 | + | |
4 | +# In[2]: | |
5 | + | |
6 | +# Import | |
7 | +import gensim | |
8 | +from scipy import sparse | |
9 | +import itertools | |
10 | +from sklearn import preprocessing | |
11 | +from keras.models import Sequential | |
12 | +from keras.optimizers import SGD,Adam | |
13 | +from keras.layers.advanced_activations import ELU,PReLU | |
14 | +from keras.callbacks import ModelCheckpoint | |
15 | +from mlp import * | |
16 | +import sklearn.metrics | |
17 | +from sklearn.preprocessing import LabelBinarizer | |
18 | +import shelve | |
19 | +import pickle | |
20 | +from utils import * | |
21 | +import sys | |
22 | +import os | |
23 | +import json | |
24 | +# In[4]: | |
25 | + | |
26 | +in_dir = sys.argv[1] | |
27 | +#['ASR', 'TRS', 'LABEL'] | |
28 | +# In[6]: | |
29 | +json_conf =json.load(open(sys.argv[2])) | |
30 | + | |
31 | +mlp_conf = json_conf["mlp"] | |
32 | +hidden_size = mlp_conf["hidden_size"] | |
33 | +loss = mlp_conf["loss"] | |
34 | +patience = mlp_conf["patience"] | |
35 | +dropouts = mlp_conf["do"] | |
36 | +epochs = mlp_conf["epochs"] | |
37 | +batch_size = mlp_conf["batch"] | |
38 | +input_activation=mlp_conf["input_activation"] | |
39 | +output_activation=mlp_conf["output_activation"] | |
40 | + | |
41 | +try: | |
42 | + k = mlp_conf["sgd"] | |
43 | + if mlp_conf["sgd"]["name"] == "adam": | |
44 | + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | |
45 | + elif mlp_conf["sgd"]["name"] == "sgd": | |
46 | + sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | |
47 | +except: | |
48 | + sgd = mlp_conf["sgd"] | |
49 | +name = json_conf["name"] | |
50 | + | |
51 | +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) | |
52 | +shelve_logs=shelve.open("{}/{}/03_logs.shelve".format(in_dir,name),writeback=True) | |
53 | + | |
54 | +# | |
55 | +keys = db["LABEL"].keys() | |
56 | + | |
57 | +hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name) | |
58 | +proj_hdf = pandas.HDFStore(hdf_proj_path) | |
59 | +hdf_keys = proj_hdf.keys() | |
60 | +print hdf_keys | |
61 | +proj_hdf.close() | |
62 | +hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ]) | |
63 | +hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ]) | |
64 | +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) | |
65 | +print hdf_lvl | |
66 | +print hdf_crossval | |
67 | + | |
68 | +labels_dict = { } | |
69 | +logs = {} | |
70 | +for lvl in hdf_lvl : | |
71 | + labels_dict[lvl] = {} | |
72 | + for layer in hdf_layer: | |
73 | + labels_dict[lvl][layer] = {} | |
74 | + | |
75 | +for lvl in hdf_lvl : | |
76 | + for layer in hdf_layer: | |
77 | + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN")) | |
78 | + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV")) | |
79 | + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer, "TEST")) | |
80 | + | |
81 | + y_train = db["LABEL"]["ASR"]["TRAIN"] | |
82 | + y_dev = db["LABEL"]["ASR"]["DEV"] | |
83 | + y_test = db["LABEL"]["ASR"]["TEST"] | |
84 | + | |
85 | + print x_train.shape | |
86 | + print x_dev.shape | |
87 | + print x_test.shape | |
88 | + print y_train.shape | |
89 | + print y_dev.shape | |
90 | + print y_test.shape | |
91 | + pred,hist = train_mlp_pred(x_train.values,y_train, | |
92 | + x_dev.values,y_dev, | |
93 | + x_test.values,y_test, | |
94 | + hidden_size ,sgd=sgd, | |
95 | + epochs=epochs, | |
96 | + patience=patience, | |
97 | + batch_size=batch_size, | |
98 | + input_activation=input_activation, | |
99 | + output_activation=output_activation, | |
100 | + dropouts=dropouts, | |
101 | + fit_verbose=1) | |
102 | + shelve_logs["{}/{}".format(lvl,layer)] = hist | |
103 | + labels_dict[lvl][layer]["TRAIN"] = np.argmax(pred[0],axis=1) | |
104 | + labels_dict[lvl][layer]["DEV"] = np.argmax(pred[1],axis=1) | |
105 | + labels_dict[lvl][layer]["TEST"] = np.argmax(pred[2],axis=1) | |
106 | + | |
107 | +db["transfert"] = labels_dict | |
108 | +shelve_logs.sync() | |
109 | +shelve_logs.close() | |
110 | +db.sync() | |
111 | +db.close() |
BOTTLENECK/04-accuracyscore.py
... | ... | @@ -0,0 +1,68 @@ |
1 | + | |
2 | +# coding: utf-8 | |
3 | + | |
4 | +# In[2]: | |
5 | + | |
6 | +# Import | |
7 | +import gensim | |
8 | +from scipy import sparse | |
9 | +import numpy as np | |
10 | +import itertools | |
11 | +from sklearn import preprocessing | |
12 | +from keras.models import Sequential | |
13 | +from keras.optimizers import SGD,Adam | |
14 | +from keras.layers.advanced_activations import ELU,PReLU | |
15 | +from keras.callbacks import ModelCheckpoint | |
16 | +from mlp import * | |
17 | +from sklearn import metrics | |
18 | +from sklearn.preprocessing import LabelBinarizer | |
19 | +import shelve | |
20 | +import pickle | |
21 | +from utils import * | |
22 | +import sys | |
23 | +import os | |
24 | +import json | |
25 | + | |
26 | +# In[4]: | |
27 | + | |
28 | +in_dir = sys.argv[1] | |
29 | +#['ASR', 'TRS', 'LABEL'] | |
30 | +# In[6]: | |
31 | +json_conf =json.load(open(sys.argv[2])) | |
32 | + | |
33 | +name = json_conf["name"] | |
34 | + | |
35 | +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) | |
36 | +# | |
37 | +keys = sorted(db.keys()) | |
38 | +keys.remove("IDS") | |
39 | +keys.remove("transfert") | |
40 | +keys.remove("LABEL") | |
41 | +mods = ["ASR", "TRS"] | |
42 | +ref_train = db["LABEL"]["ASR"]["TRAIN"] | |
43 | +ref_dev = db["LABEL"]["ASR"]["DEV"] | |
44 | +ref_test = db["LABEL"]["ASR"]["TEST"] | |
45 | + | |
46 | +print "name,MOD,level,train,dev,test" | |
47 | +for mod in mods : | |
48 | + for lvl in keys : | |
49 | + if "TEST" in db[lvl][mod] : | |
50 | + train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db[lvl][mod]["TRAIN"]) | |
51 | + dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db[lvl][mod]["DEV"]) | |
52 | + test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db[lvl][mod]["TEST"]) | |
53 | + else : | |
54 | + train_score = "ERROR" | |
55 | + dev_score = "ERROR" | |
56 | + test_score = "ERROR" | |
57 | + print ",".join([name,mod, lvl, str(train_score), str(dev_score) , str(test_score)]) | |
58 | + | |
59 | +for level in db["transfert"].keys() : | |
60 | + for layer in db["transfert"][level].keys(): | |
61 | + if "TRAIN" in db["transfert"][level][layer].keys(): | |
62 | + | |
63 | + train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db["transfert"][level][layer]["TRAIN"]) | |
64 | + dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db["transfert"][level][layer]["DEV"]) | |
65 | + test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db["transfert"][level][layer]["TEST"]) | |
66 | + print ",".join([name,"transfert",level+"/"+layer, str(train_score), str(dev_score) , str(test_score)]) | |
67 | + | |
68 | +db.close() |
BOTTLENECK/mlp.py
BOTTLENECK/run01_do_alljson.sh
... | ... | @@ -0,0 +1,8 @@ |
1 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0.json RAW | |
2 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0do.json RAW | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1.json RAW | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1do.json RAW | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2.json RAW | |
6 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2do.json RAW | |
7 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3.json RAW | |
8 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3do.json RAW |
BOTTLENECK/run02_mlpscore.sh
... | ... | @@ -0,0 +1,11 @@ |
1 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0.json | |
2 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0do.json | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1.json | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1do.json | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2.json | |
6 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2do.json | |
7 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3.json | |
8 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3do.json | |
9 | + | |
10 | + | |
11 | + |
BOTTLENECK/run02b-transfert.sh
... | ... | @@ -0,0 +1,8 @@ |
1 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0.json | |
2 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0do.json | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1.json | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1do.json | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2.json | |
6 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2do.json | |
7 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3.json | |
8 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3do.json |
BOTTLENECK/run03_tsne_MLPtransfert.sh
... | ... | @@ -0,0 +1,8 @@ |
1 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0.json | |
2 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0do.json | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1.json | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1do.json | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2.json | |
6 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2do.json | |
7 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3.json | |
8 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3do.json |
BOTTLENECK/run04-mlp_on_transfert.sh
... | ... | @@ -0,0 +1,10 @@ |
1 | +#THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1.json | |
2 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1do.json | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2.json | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2do.json | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3.json | |
6 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3do.json | |
7 | + | |
8 | +#THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0.json | |
9 | + | |
10 | +#THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0do.json |
BOTTLENECK/run05_accuracy.sh
... | ... | @@ -0,0 +1,8 @@ |
1 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1.json | |
2 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1do.json | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2.json | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2do.json | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3.json | |
6 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3do.json | |
7 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0.json | |
8 | +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0do.json |
BOTTLENECK/run_all.sh
... | ... | @@ -0,0 +1,22 @@ |
1 | +bash run_one.sh output_3 output_3/L0do.json gpu0 & | |
2 | +bash run_one.sh output_3 output_3/L0.json gpu1 & | |
3 | +bash run_one.sh output_3 output_3/L1do.json gpu0 & | |
4 | +bash run_one.sh output_3 output_3/L1.json gpu1 & | |
5 | +wait | |
6 | +bash run_one.sh output_3 output_3/L2do.json gpu0 & | |
7 | +bash run_one.sh output_3 output_3/L2.json gpu1 & | |
8 | +bash run_one.sh output_3 output_3/L3bndo.json gpu0 & | |
9 | +bash run_one.sh output_3 output_3/L3ce1.json gpu1 & | |
10 | +wait | |
11 | +bash run_one.sh output_3 output_3/L3ce.json gpu0 & | |
12 | +bash run_one.sh output_3 output_3/L3do.json gpu1 & | |
13 | +bash run_one.sh output_3 output_3/L3.json gpu0 & | |
14 | +bash run_one.sh output_3 output_3/L3sigmo.json gpu1 & | |
15 | +wait | |
16 | +bash run_one.sh output_3 output_3/L4do.json gpu0 & | |
17 | +bash run_one.sh output_3 output_3/L5do.json gpu1 & | |
18 | +bash run_one.sh output_3 output_3/L6do.json gpu0 & | |
19 | +bash run_one.sh output_3 output_3/L7do.json gpu1 & | |
20 | +wait | |
21 | +bash run_one.sh output_3 output_3/MaxMLP.json gpu0 & | |
22 | +bash run_one.sh output_3 output_3/MinMLP.json gpu1 & |
BOTTLENECK/run_one.sh
... | ... | @@ -0,0 +1,7 @@ |
1 | +bn=$(basename $2) | |
2 | +time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 01a-mlp_proj.py $1 Sparse_tfidf2.shelve $2 RAW) 2>> logs/${bn}_time ; echo MLP_$2 >> logs/${bn}_time | |
3 | +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02a-mlp_score_on_BN.py $1 $2 | |
4 | +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02b-transfert_ae.py $1 $2 | |
5 | +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02c-tsne_mlproj.py $1 $2 | |
6 | +time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 03-mlp_score_on_transfert.py $1 $2) 2>> logs/${bn}_time ; echo transfert_$2 >> logs/${bn}_time | |
7 | +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 04-accuracyscore.py $1 $2 >> $1/res.csv |
BOTTLENECK/utils.py
LDA/mlp.py
... | ... | @@ -6,13 +6,15 @@ from keras.optimizers import SGD,Adam |
6 | 6 | from keras.models import Sequential |
7 | 7 | from keras.layers import Input, Dense, Dropout |
8 | 8 | from keras.models import Model |
9 | +from keras.callbacks import ModelCheckpoint, EarlyStopping | |
9 | 10 | from keras.utils.layer_utils import layer_from_config |
10 | 11 | from itertools import izip_longest |
11 | - | |
12 | +import tempfile | |
13 | +import shutil | |
12 | 14 | import pandas |
13 | 15 | from collections import namedtuple |
14 | 16 | from sklearn.metrics import accuracy_score as perf |
15 | -save_tuple= namedtuple("save_tuple",["pred_train","pred_dev","pred_test"]) | |
17 | +save_tuple = namedtuple("save_tuple",["pred_train","pred_dev","pred_test"]) | |
16 | 18 | |
17 | 19 | |
18 | 20 | def ft_dsae(train,dev,test, |
... | ... | @@ -74,12 +76,114 @@ def ft_dsae(train,dev,test, |
74 | 76 | layers.append(Dense(y_train.shape[1],activation=output_activation)(layers[-1])) |
75 | 77 | models = [Model(input=layers[0] , output=x) for x in layers[1:]] |
76 | 78 | models[-1].compile(optimizer=sgd,loss=loss) |
77 | - models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose) | |
79 | + models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose) | |
78 | 80 | predictions = [ [x.predict(y) for y in param_predict ] for x in models ] |
79 | 81 | pred_by_level.append(predictions) |
80 | 82 | |
81 | 83 | return pred_by_level |
82 | 84 | |
85 | +def train_mlp_proj(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0): | |
86 | + | |
87 | + #model_tempfile=tempfile.mkstemp() | |
88 | + tempfold = tempfile.mkdtemp() | |
89 | + model_tempfile= tempfold+"/model.hdf" | |
90 | + | |
91 | + layers = [Input(shape=(x_train.shape[1],))] | |
92 | + | |
93 | + for h in hidden_size: | |
94 | + print h | |
95 | + if dropouts: | |
96 | + d = dropouts.pop(0) | |
97 | + if d > 0 : | |
98 | + ldo = Dropout(d)(layers[-1]) | |
99 | + print 'append' | |
100 | + layers.append(Dense(h,init=init,activation=input_activation)(ldo)) | |
101 | + else : | |
102 | + print " append" | |
103 | + layers.append(Dense(h,init=init,activation=input_activation)(layers[-1])) | |
104 | + | |
105 | + | |
106 | + if dropouts: | |
107 | + d = dropouts.pop(0) | |
108 | + if d > 0 : | |
109 | + ldo =Dropout(d)(layers[-1]) | |
110 | + print "end" | |
111 | + layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo)) | |
112 | + else: | |
113 | + print "end" | |
114 | + layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1])) | |
115 | + | |
116 | + models = [] | |
117 | + for l in layers[1:] : | |
118 | + models.append(Model(layers[0] , l)) | |
119 | + print "nb models : ", len(models), "h :",hidden_size , "layer", len(layers) | |
120 | + if not sgd: | |
121 | + sgd = SGD(lr=0.01, decay=0, momentum=0.9) | |
122 | + | |
123 | + models[-1].compile(loss=loss, optimizer=sgd,metrics=['accuracy']) | |
124 | + callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'), | |
125 | + EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi | |
126 | + print models[-1].summary() | |
127 | + hist=models[-1].fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks) | |
128 | + models[-1].load_weights(model_tempfile, by_name=False) | |
129 | + proj = [] | |
130 | + for layer,model in enumerate(models): | |
131 | + proj.append((model.predict(x_train),model.predict(x_dev),model.predict(x_test))) | |
132 | + | |
133 | + shutil.rmtree(tempfold) | |
134 | + return models[-1].summary(),proj | |
135 | + | |
136 | + | |
137 | + | |
138 | + | |
139 | + | |
140 | +def train_mlp_pred(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0): | |
141 | + | |
142 | + #model_tempfile=tempfile.mkstemp() | |
143 | + tempfold = tempfile.mkdtemp() | |
144 | + model_tempfile= tempfold+"/model.hdf" | |
145 | + | |
146 | + layers = [Input(shape=(x_train.shape[1],))] | |
147 | + | |
148 | + for h in hidden_size: | |
149 | + if dropouts: | |
150 | + d = dropouts.pop(0) | |
151 | + if d > 0 : | |
152 | + ldo = Dropout(d)(layers[-1]) | |
153 | + layers.append(Dense(h,init=init,activation=input_activation)(ldo)) | |
154 | + else : | |
155 | + layers.append(Dense(h,init=init,activation=input_activation)(layers[-1])) | |
156 | + | |
157 | + | |
158 | + if dropouts: | |
159 | + d = dropouts.pop(0) | |
160 | + if d > 0 : | |
161 | + ldo =Dropout(d)(layers[-1]) | |
162 | + layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo)) | |
163 | + else: | |
164 | + layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1])) | |
165 | + | |
166 | + model=Model(layers[0] , layers[-1]) | |
167 | + if not sgd: | |
168 | + sgd = SGD(lr=0.01, decay=0, momentum=0.9) | |
169 | + | |
170 | + model.compile(loss=loss, optimizer=sgd,metrics=['accuracy']) | |
171 | + callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'), | |
172 | + EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi | |
173 | + print model.summary() | |
174 | + hist=model.fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks) | |
175 | + model.load_weights(model_tempfile, by_name=False) | |
176 | + pred=(model.predict(x_train),model.predict(x_dev),model.predict(x_test)) | |
177 | + | |
178 | + shutil.rmtree(tempfold) | |
179 | + return pred,hist | |
180 | + | |
181 | + | |
182 | + | |
183 | + | |
184 | + | |
185 | + | |
186 | + | |
83 | 187 | def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False): |
84 | 188 | |
85 | 189 | layers = [Input(shape=(x_train.shape[1],))] |
... | ... | @@ -107,7 +211,7 @@ def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activa |
107 | 211 | d = dropouts.pop(0) |
108 | 212 | if d > 0 : |
109 | 213 | layers.append(Dropout(d)(layers[-1])) |
110 | - | |
214 | + print y_train[2:10] | |
111 | 215 | layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1])) |
112 | 216 | |
113 | 217 | model = Model(layers[0] , layers[-1]) |
... | ... | @@ -147,7 +251,7 @@ def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activa |
147 | 251 | res.append(hist) |
148 | 252 | return res |
149 | 253 | |
150 | -def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20,get_weights=False,set_weights=[]): | |
254 | +def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,test_verbose=0,verbose=1,patience=20,get_weights=False,set_weights=[],best_mod=False): | |
151 | 255 | |
152 | 256 | input_vect = Input(shape=(train.shape[1],)) |
153 | 257 | |
... | ... | @@ -193,7 +297,17 @@ def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dro |
193 | 297 | models = [Model(input=previous[0] , output=x) for x in previous[1:]] |
194 | 298 | print "MLP", sgd, loss |
195 | 299 | models[-1].compile(optimizer=sgd,loss=loss) |
196 | - models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose) | |
300 | + cb = [EarlyStopping(monitor='val_loss', patience=patience, verbose=0)] | |
301 | + if best_mod: | |
302 | + tempfold = tempfile.mkdtemp() | |
303 | + model_tempfile= tempfold+"/model.hdf" | |
304 | + cb.append( ModelCheckpoint(model_tempfile, monitor='val_loss', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto') ) | |
305 | + | |
306 | + models[-1].summary() | |
307 | + models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=cb,validation_data=(dev,dev),verbose=verbose) | |
308 | + if best_mod: | |
309 | + models[-1].load_weights(model_tempfile) | |
310 | + shutil.rmtree(tempfold) | |
197 | 311 | param_predict = [ train, dev, test ] |
198 | 312 | if predict_y : |
199 | 313 | param_predict += [ y_train, y_dev ,y_test ] |