Compare View

switch
from
...
to
 
Commits (3)

Changes

Showing 17 changed files Inline Diff

BOTTLENECK/01a-mlp_proj.py
File was created 1
2 # coding: utf-8
3
4 # In[2]:
5
6 # Import
7 import gensim
8 from scipy import sparse
9 import itertools
10 from sklearn import preprocessing
11 from keras.models import Sequential
12 from keras.optimizers import SGD,Adam
13 from keras.layers.advanced_activations import ELU,PReLU
14 from keras.callbacks import ModelCheckpoint
15 from mlp import *
16 import sklearn.metrics
17 from sklearn.preprocessing import LabelBinarizer
18 import shelve
19 import pickle
20 from utils import *
21 import sys
22 import os
23 import json
24 # In[4]:
25
26 infer_model=shelve.open("{}".format(sys.argv[2]))
27 in_dir = sys.argv[1]
28 #['ASR', 'TRS', 'LABEL']
29 # In[6]:
30 if len(sys.argv) > 4 :
31 features_key = sys.argv[4]
32 else :
33 features_key = "LDA"
34 save_projection = True
35 json_conf =json.load(open(sys.argv[3]))
36 ae_conf = json_conf["mlp_proj"]
37
38 hidden_size= ae_conf["hidden_size"]
39 input_activation = None
40 if ae_conf["input_activation"] == "elu":
41 print " ELU"
42 input_activation = PReLU()
43 else:
44 print " ELSE"
45 input_activation = ae_conf["input_activation"]
46 #input_activation=ae_conf["input_activation"]
47 output_activation=ae_conf["output_activation"]
48 loss=ae_conf["loss"]
49 epochs=ae_conf["epochs"]
50 batch_size=ae_conf["batch"]
51 patience=ae_conf["patience"]
52 dropouts=ae_conf["do"]
53 try:
54 k = ae_conf["sgd"]
55 if ae_conf["sgd"]["name"] == "adam":
56 sgd = Adam(lr=ae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
57 elif ae_conf["sgd"]["name"] == "sgd":
58 sgd = SGD(lr=ae_conf["sgd"]["lr"])
59 except:
60 sgd = ae_conf["sgd"]
61
62 mlp_conf = json_conf["mlp"]
63 mlp_h = mlp_conf["hidden_size"]
64 mlp_loss = mlp_conf["loss"]
65 mlp_dropouts = mlp_conf["do"]
66 mlp_epochs = mlp_conf["epochs"]
67 mlp_batch_size = mlp_conf["batch"]
68 mlp_input_activation=mlp_conf["input_activation"]
69 mlp_output_activation=mlp_conf["output_activation"]
70
71 try:
72 k = mlp_conf["sgd"]
73 if mlp_conf["sgd"]["name"] == "adam":
74 mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
75 elif mlp_conf["sgd"]["name"] == "sgd":
76 mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"])
77 except:
78 mlp_sgd = mlp_conf["sgd"]
79
80
81 name = json_conf["name"]
82 try :
83 os.mkdir("{}/{}".format(in_dir,name))
84 except OSError :
85 pass
86 db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
87 db["IDS"]=dict(infer_model["LABEL"])
88 #
89 keys = infer_model[features_key].keys()
90 LABELS = {}
91 for mod in keys :
92
93 int_labels_train = map(select,infer_model["LABEL"][mod]["TRAIN"])
94 binarizer = LabelBinarizer()
95 y_train=binarizer.fit_transform(int_labels_train)
96 y_dev=binarizer.transform(map(select,infer_model["LABEL"][mod]["DEV"]))
97 y_test=binarizer.transform(map(select,infer_model["LABEL"][mod]["TEST"]))
98 LABELS[mod]= { "TRAIN":y_train , "DEV" : y_dev, "TEST" : y_test}
99 sumary,proj = train_mlp_proj(infer_model[features_key][mod]["TRAIN"].todense(),y_train,
100 infer_model[features_key][mod]["DEV"].todense(),y_dev,
101 infer_model[features_key][mod]["TEST"].todense(),y_test,
102 hidden_size ,sgd=sgd,
103 epochs=epochs,
104 patience=patience,
105 batch_size=batch_size,
106 input_activation=input_activation,
107 output_activation=output_activation,
108 dropouts=dropouts,
109 fit_verbose=1)
110 with open("{}/{}/{}_sum.txt".format(in_dir,name,mod),"w") as output_sum :
111 print >>output_sum, sumary
112 for num_lvl,level in enumerate(proj):
113 print len(level)
114 for num,corp_type in enumerate(["TRAIN","DEV","TEST"]):
115 pd = pandas.DataFrame(level[num])
116 pd.to_hdf("{}/{}/MLP_proj_df.hdf".format(in_dir,name),"{}/lvl{}/{}".format(mod,num_lvl,corp_type))
117 db["LABEL"] = LABELS
118 db.sync()
119 db.close()
120
BOTTLENECK/02a-mlp_score_on_BN.py
File was created 1
2 # coding: utf-8
3
4 # In[2]:
5
6 # Import
7 import gensim
8 from scipy import sparse
9 import itertools
10 from sklearn import preprocessing
11 from keras.models import Sequential
12 from keras.optimizers import SGD,Adam
13 from keras.layers.advanced_activations import ELU,PReLU
14 from keras.callbacks import ModelCheckpoint
15 from mlp import *
16 import sklearn.metrics
17 from sklearn.preprocessing import LabelBinarizer
18 import shelve
19 import pickle
20 from utils import *
21 import sys
22 import os
23 import json
24 # In[4]:
25
26 in_dir = sys.argv[1]
27 #['ASR', 'TRS', 'LABEL']
28 # In[6]:
29 json_conf =json.load(open(sys.argv[2]))
30
31 mlp_conf = json_conf["mlp"]
32 hidden_size = mlp_conf["hidden_size"]
33 loss = mlp_conf["loss"]
34 patience = mlp_conf["patience"]
35 dropouts = mlp_conf["do"]
36 epochs = mlp_conf["epochs"]
37 batch_size = mlp_conf["batch"]
38 input_activation=mlp_conf["input_activation"]
39 output_activation=mlp_conf["output_activation"]
40
41 try:
42 k = mlp_conf["sgd"]
43 if mlp_conf["sgd"]["name"] == "adam":
44 sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
45 elif mlp_conf["sgd"]["name"] == "sgd":
46 sgd = SGD(lr=mlp_conf["sgd"]["lr"])
47 except:
48 sgd = mlp_conf["sgd"]
49 name = json_conf["name"]
50
51 db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
52 shelve_logs=shelve.open("{}/{}/02a_logs.shelve".format(in_dir,name))
53
54 #
55 keys = db["LABEL"].keys()
56 proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name))
57 hdf_keys = proj_hdf.keys()
58 proj_hdf.close()
59 hdf_mods = set([ x.split("/")[1] for x in hdf_keys ])
60 hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ])
61 hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
62 print hdf_mods
63 print hdf_lvl
64 print hdf_crossval
65
66 hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name)
67 labels_dict = {"origine":{} }
68 logs = {}
69 for lvl in hdf_lvl :
70 labels_dict[lvl] = {}
71 for mod in hdf_mods:
72 labels_dict[lvl][mod] = {}
73
74 for mod in hdf_mods:
75 for lvl in hdf_lvl :
76 x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN"))
77 x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV"))
78 x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST"))
79 if x_train.shape[1] <= 8 :
80 labels_dict["origine"]["TRAIN"] = np.argmax(x_train.values,axis=1)
81 labels_dict["origine"]["DEV"] = np.argmax(x_dev.values,axis=1)
82 labels_dict["origine"]["TEST"] = np.argmax(x_test.values,axis=1)
83 continue
84 y_train = db["LABEL"][mod]["TRAIN"]
85 y_dev = db["LABEL"][mod]["DEV"]
86 y_test = db["LABEL"][mod]["TEST"]
87
88 print x_train.shape
89 print x_dev.shape
90 print x_test.shape
91 print y_train.shape
92 print y_dev.shape
93 print y_test.shape
94 pred,hist = train_mlp_pred(x_train.values,y_train,
95 x_dev.values,y_dev,
96 x_test.values,y_test,
97 hidden_size ,sgd=sgd,
98 epochs=epochs,
99 patience=patience,
100 batch_size=batch_size,
101 input_activation=input_activation,
102 output_activation=output_activation,
103 dropouts=dropouts,
104 fit_verbose=1)
105 shelve_logs["{}/{}".format(mod,lvl)] = hist
106 labels_dict[lvl][mod]["TRAIN"] = np.argmax(pred[0],axis=1)
107 labels_dict[lvl][mod]["DEV"] = np.argmax(pred[1],axis=1)
108 labels_dict[lvl][mod]["TEST"] = np.argmax(pred[2],axis=1)
109
110 for lvl in hdf_lvl:
111 db[lvl] = labels_dict[lvl]
112 shelve_logs.sync()
113 shelve_logs.close()
114 db.sync()
115 db.close()
116
BOTTLENECK/02b-transfert_ae.py
File was created 1
2 # coding: utf-8
3
4 # In[2]:
5
6 # Import
7 import gensim
8 from scipy import sparse
9 import itertools
10 from sklearn import preprocessing
11 from keras.models import Sequential
12 from keras.optimizers import SGD,Adam
13 from keras.layers.advanced_activations import ELU,PReLU
14 from keras.callbacks import ModelCheckpoint
15 from mlp import *
16 import pandas as pd
17 import sklearn.metrics
18 from sklearn.preprocessing import LabelBinarizer
19 import shelve
20 import pickle
21 from utils import *
22 import sys
23 import os
24 import json
25 # In[4]:
26
27 in_dir = sys.argv[1]
28 #['ASR', 'TRS', 'LABEL']
29 # In[6]:
30 json_conf =json.load(open(sys.argv[2]))
31
32 mlp_conf = json_conf["transfert"]
33 hidden_size = mlp_conf["hidden_size"]
34 loss = mlp_conf["loss"]
35 patience = mlp_conf["patience"]
36 dropouts = mlp_conf["do"]
37 epochs = mlp_conf["epochs"]
38 batch_size = mlp_conf["batch"]
39 input_activation=mlp_conf["input_activation"]
40 output_activation=mlp_conf["output_activation"]
41
42 try:
43 k = mlp_conf["sgd"]
44 if mlp_conf["sgd"]["name"] == "adam":
45 sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
46 elif mlp_conf["sgd"]["name"] == "sgd":
47 sgd = SGD(lr=mlp_conf["sgd"]["lr"])
48 except:
49 sgd = mlp_conf["sgd"]
50 name = json_conf["name"]
51
52 #
53 proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name))
54 hdf_keys = proj_hdf.keys()
55 proj_hdf.close()
56 hdf_mods = set([ x.split("/")[1] for x in hdf_keys ])
57 hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ])
58 hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
59 print hdf_mods
60 print hdf_lvl
61 print hdf_crossval
62
63 hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name)
64 transfert_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name)
65 mod1,mod2 = "ASR","TRS"
66 for lvl in hdf_lvl :
67 x_train_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TRAIN"))
68 x_dev_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"DEV"))
69 x_test_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TEST"))
70 x_train_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TRAIN"))
71 x_dev_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"DEV"))
72 x_test_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TEST"))
73
74 if x_train_ASR.shape[1] <= 8 :
75 continue
76
77 pred = train_ae(x_train_ASR.values,
78 x_dev_ASR.values,
79 x_test_ASR.values,
80 hidden_size ,sgd=sgd,
81 y_train=x_train_TRS.values,
82 y_dev=x_dev_TRS.values,
83 y_test=x_test_TRS.values,
84 epochs=epochs,
85 patience=patience,
86 batch_size=batch_size,
87 input_activation=input_activation,
88 output_activation=output_activation,
89 dropouts=dropouts,
90 best_mod=True,
91 verbose=1)
92 for num_layer,layer in enumerate(pred):
93 transfert_train = pd.DataFrame(layer[0])
94 transfert_dev = pd.DataFrame(layer[1])
95 transfert_test = pd.DataFrame(layer[2])
96 transfert_train.to_hdf(transfert_proj_path,"{}/{}/TRAIN".format(lvl,"layer"+str(num_layer)))
97 transfert_dev.to_hdf(transfert_proj_path,"{}/{}/DEV".format(lvl,"layer"+str(num_layer)))
98 transfert_test.to_hdf(transfert_proj_path,"{}/{}/TEST".format(lvl,"layer"+str(num_layer)))
99
100
BOTTLENECK/02c-tsne_mlproj.py
File was created 1
2 # coding: utf-8
3
4 # In[2]:
5
6 # Import
7 import gensim
8 from scipy import sparse
9 import itertools
10 from sklearn import preprocessing
11 from keras.models import Sequential
12 from keras.optimizers import SGD,Adam
13 from keras.layers.advanced_activations import ELU,PReLU
14 from keras.callbacks import ModelCheckpoint
15 from mlp import *
16 import pandas as pd
17 import sklearn.metrics
18 from sklearn.preprocessing import LabelBinarizer
19 from sklearn.manifold import TSNE
20 import shelve
21 import pickle
22 from utils import *
23 import sys
24 import os
25 import json
26 # In[4]:
27
28 in_dir = sys.argv[1]
29 #['ASR', 'TRS', 'LABEL']
30 # In[6]:
31 json_conf =json.load(open(sys.argv[2]))
32
33 mlp_conf = json_conf["transfert"]
34 hidden_size = mlp_conf["hidden_size"]
35 loss = mlp_conf["loss"]
36 patience = mlp_conf["patience"]
37 dropouts = mlp_conf["do"]
38 epochs = mlp_conf["epochs"]
39 batch_size = mlp_conf["batch"]
40 input_activation=mlp_conf["input_activation"]
41 output_activation=mlp_conf["output_activation"]
42
43 try:
44 k = mlp_conf["sgd"]
45 if mlp_conf["sgd"]["name"] == "adam":
46 sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
47 elif mlp_conf["sgd"]["name"] == "sgd":
48 sgd = SGD(lr=mlp_conf["sgd"]["lr"])
49 except:
50 sgd = mlp_conf["sgd"]
51 name = json_conf["name"]
52
53 #
54 print " MLP"
55 proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name))
56 hdf_keys = proj_hdf.keys()
57 proj_hdf.close()
58 hdf_mods = set([ x.split("/")[1] for x in hdf_keys ])
59 hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ])
60 hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
61 print hdf_mods
62 print hdf_lvl
63 print hdf_crossval
64
65 hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name)
66 tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name)
67 for mod in hdf_mods:
68 for lvl in hdf_lvl :
69 x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN"))
70 x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV"))
71 x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST"))
72
73 if x_train.shape[1] <= 8 :
74 continue
75 tsne= TSNE()
76 tsne_train=tsne.fit_transform(x_train.values)
77 pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TRAIN"))
78 tsne= TSNE()
79 tsne_dev=tsne.fit_transform(x_dev.values)
80 pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"DEV"))
81 tsne= TSNE()
82 tsne_test=tsne.fit_transform(x_test.values)
83 pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TEST"))
84 tsne = TSNE()
85 tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values)
86 pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"CONCAT"))
87
88 print " TRANSFERT"
89
90 hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name)
91 proj_hdf = pandas.HDFStore(hdf_proj_path)
92 print proj_hdf
93 hdf_keys = proj_hdf.keys()
94 proj_hdf.close()
95 print hdf_keys
96 hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ])
97 hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ])
98 hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
99 print hdf_lvl
100 print hdf_layer
101 print hdf_crossval
102
103 tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name)
104 for lvl in hdf_lvl :
105 for layer in hdf_layer:
106 x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN"))
107 x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV"))
108 x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TEST"))
109
110 if x_train.shape[1] <= 8 :
111 continue
112 tsne= TSNE()
113 tsne_train=tsne.fit_transform(x_train.values)
114 pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TRAIN"))
115 tsne= TSNE()
116 tsne_dev=tsne.fit_transform(x_dev.values)
117 pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"DEV"))
118 tsne= TSNE()
119 tsne_test=tsne.fit_transform(x_test.values)
120 pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TEST"))
121 tsne = TSNE()
122 tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values)
123 pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"CONCAT"))
124
BOTTLENECK/03-mlp_score_on_transfert.py
File was created 1
2 # coding: utf-8
3
4 # In[2]:
5
6 # Import
7 import gensim
8 from scipy import sparse
9 import itertools
10 from sklearn import preprocessing
11 from keras.models import Sequential
12 from keras.optimizers import SGD,Adam
13 from keras.layers.advanced_activations import ELU,PReLU
14 from keras.callbacks import ModelCheckpoint
15 from mlp import *
16 import sklearn.metrics
17 from sklearn.preprocessing import LabelBinarizer
18 import shelve
19 import pickle
20 from utils import *
21 import sys
22 import os
23 import json
24 # In[4]:
25
26 in_dir = sys.argv[1]
27 #['ASR', 'TRS', 'LABEL']
28 # In[6]:
29 json_conf =json.load(open(sys.argv[2]))
30
31 mlp_conf = json_conf["mlp"]
32 hidden_size = mlp_conf["hidden_size"]
33 loss = mlp_conf["loss"]
34 patience = mlp_conf["patience"]
35 dropouts = mlp_conf["do"]
36 epochs = mlp_conf["epochs"]
37 batch_size = mlp_conf["batch"]
38 input_activation=mlp_conf["input_activation"]
39 output_activation=mlp_conf["output_activation"]
40
41 try:
42 k = mlp_conf["sgd"]
43 if mlp_conf["sgd"]["name"] == "adam":
44 sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
45 elif mlp_conf["sgd"]["name"] == "sgd":
46 sgd = SGD(lr=mlp_conf["sgd"]["lr"])
47 except:
48 sgd = mlp_conf["sgd"]
49 name = json_conf["name"]
50
51 db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
52 shelve_logs=shelve.open("{}/{}/03_logs.shelve".format(in_dir,name),writeback=True)
53
54 #
55 keys = db["LABEL"].keys()
56
57 hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name)
58 proj_hdf = pandas.HDFStore(hdf_proj_path)
59 hdf_keys = proj_hdf.keys()
60 print hdf_keys
61 proj_hdf.close()
62 hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ])
63 hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ])
64 hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
65 print hdf_lvl
66 print hdf_crossval
67
68 labels_dict = { }
69 logs = {}
70 for lvl in hdf_lvl :
71 labels_dict[lvl] = {}
72 for layer in hdf_layer:
73 labels_dict[lvl][layer] = {}
74
75 for lvl in hdf_lvl :
76 for layer in hdf_layer:
77 x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN"))
78 x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV"))
79 x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer, "TEST"))
80
81 y_train = db["LABEL"]["ASR"]["TRAIN"]
82 y_dev = db["LABEL"]["ASR"]["DEV"]
83 y_test = db["LABEL"]["ASR"]["TEST"]
84
85 print x_train.shape
86 print x_dev.shape
87 print x_test.shape
88 print y_train.shape
89 print y_dev.shape
90 print y_test.shape
91 pred,hist = train_mlp_pred(x_train.values,y_train,
92 x_dev.values,y_dev,
93 x_test.values,y_test,
94 hidden_size ,sgd=sgd,
95 epochs=epochs,
96 patience=patience,
97 batch_size=batch_size,
98 input_activation=input_activation,
99 output_activation=output_activation,
100 dropouts=dropouts,
101 fit_verbose=1)
102 shelve_logs["{}/{}".format(lvl,layer)] = hist
103 labels_dict[lvl][layer]["TRAIN"] = np.argmax(pred[0],axis=1)
104 labels_dict[lvl][layer]["DEV"] = np.argmax(pred[1],axis=1)
105 labels_dict[lvl][layer]["TEST"] = np.argmax(pred[2],axis=1)
106
107 db["transfert"] = labels_dict
108 shelve_logs.sync()
109 shelve_logs.close()
110 db.sync()
111 db.close()
112
BOTTLENECK/04-accuracyscore.py
File was created 1
2 # coding: utf-8
3
4 # In[2]:
5
6 # Import
7 import gensim
8 from scipy import sparse
9 import numpy as np
10 import itertools
11 from sklearn import preprocessing
12 from keras.models import Sequential
13 from keras.optimizers import SGD,Adam
14 from keras.layers.advanced_activations import ELU,PReLU
15 from keras.callbacks import ModelCheckpoint
16 from mlp import *
17 from sklearn import metrics
18 from sklearn.preprocessing import LabelBinarizer
19 import shelve
20 import pickle
21 from utils import *
22 import sys
23 import os
24 import json
25
26 # In[4]:
27
28 in_dir = sys.argv[1]
29 #['ASR', 'TRS', 'LABEL']
30 # In[6]:
31 json_conf =json.load(open(sys.argv[2]))
32
33 name = json_conf["name"]
34
35 db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
36 #
37 keys = sorted(db.keys())
38 keys.remove("IDS")
39 keys.remove("transfert")
40 keys.remove("LABEL")
41 mods = ["ASR", "TRS"]
42 ref_train = db["LABEL"]["ASR"]["TRAIN"]
43 ref_dev = db["LABEL"]["ASR"]["DEV"]
44 ref_test = db["LABEL"]["ASR"]["TEST"]
45
46 print "name,MOD,level,train,dev,test"
47 for mod in mods :
48 for lvl in keys :
49 if "TEST" in db[lvl][mod] :
50 train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db[lvl][mod]["TRAIN"])
51 dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db[lvl][mod]["DEV"])
52 test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db[lvl][mod]["TEST"])
53 else :
54 train_score = "ERROR"
55 dev_score = "ERROR"
56 test_score = "ERROR"
57 print ",".join([name,mod, lvl, str(train_score), str(dev_score) , str(test_score)])
58
59 for level in db["transfert"].keys() :
60 for layer in db["transfert"][level].keys():
61 if "TRAIN" in db["transfert"][level][layer].keys():
62
63 train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db["transfert"][level][layer]["TRAIN"])
64 dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db["transfert"][level][layer]["DEV"])
65 test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db["transfert"][level][layer]["TEST"])
66 print ",".join([name,"transfert",level+"/"+layer, str(train_score), str(dev_score) , str(test_score)])
67
68 db.close()
69
File was created 1 ../LDA/mlp.py
BOTTLENECK/run01_do_alljson.sh
File was created 1 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0.json RAW
2 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0do.json RAW
3 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1.json RAW
4 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1do.json RAW
5 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2.json RAW
6 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2do.json RAW
7 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3.json RAW
8 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3do.json RAW
9
BOTTLENECK/run02_mlpscore.sh
File was created 1 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0.json
2 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0do.json
3 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1.json
4 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1do.json
5 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2.json
6 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2do.json
7 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3.json
8 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3do.json
9
10
11
12
BOTTLENECK/run02b-transfert.sh
File was created 1 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0.json
2 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0do.json
3 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1.json
4 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1do.json
5 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2.json
6 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2do.json
7 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3.json
8 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3do.json
9
BOTTLENECK/run03_tsne_MLPtransfert.sh
File was created 1 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0.json
2 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0do.json
3 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1.json
4 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1do.json
5 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2.json
6 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2do.json
7 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3.json
8 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3do.json
9
BOTTLENECK/run04-mlp_on_transfert.sh
File was created 1 #THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1.json
2 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1do.json
3 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2.json
4 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2do.json
5 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3.json
6 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3do.json
7
8 #THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0.json
9
10 #THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0do.json
11
BOTTLENECK/run05_accuracy.sh
File was created 1 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1.json
2 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1do.json
3 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2.json
4 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2do.json
5 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3.json
6 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3do.json
7 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0.json
8 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0do.json
9
BOTTLENECK/run_all.sh
File was created 1 bash run_one.sh output_3 output_3/L0do.json gpu0 &
2 bash run_one.sh output_3 output_3/L0.json gpu1 &
3 bash run_one.sh output_3 output_3/L1do.json gpu0 &
4 bash run_one.sh output_3 output_3/L1.json gpu1 &
5 wait
6 bash run_one.sh output_3 output_3/L2do.json gpu0 &
7 bash run_one.sh output_3 output_3/L2.json gpu1 &
8 bash run_one.sh output_3 output_3/L3bndo.json gpu0 &
9 bash run_one.sh output_3 output_3/L3ce1.json gpu1 &
10 wait
11 bash run_one.sh output_3 output_3/L3ce.json gpu0 &
12 bash run_one.sh output_3 output_3/L3do.json gpu1 &
13 bash run_one.sh output_3 output_3/L3.json gpu0 &
14 bash run_one.sh output_3 output_3/L3sigmo.json gpu1 &
15 wait
16 bash run_one.sh output_3 output_3/L4do.json gpu0 &
17 bash run_one.sh output_3 output_3/L5do.json gpu1 &
18 bash run_one.sh output_3 output_3/L6do.json gpu0 &
19 bash run_one.sh output_3 output_3/L7do.json gpu1 &
20 wait
21 bash run_one.sh output_3 output_3/MaxMLP.json gpu0 &
22 bash run_one.sh output_3 output_3/MinMLP.json gpu1 &
23
BOTTLENECK/run_one.sh
File was created 1 bn=$(basename $2)
2 time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 01a-mlp_proj.py $1 Sparse_tfidf2.shelve $2 RAW) 2>> logs/${bn}_time ; echo MLP_$2 >> logs/${bn}_time
3 THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02a-mlp_score_on_BN.py $1 $2
4 THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02b-transfert_ae.py $1 $2
5 THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02c-tsne_mlproj.py $1 $2
6 time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 03-mlp_score_on_transfert.py $1 $2) 2>> logs/${bn}_time ; echo transfert_$2 >> logs/${bn}_time
7 THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 04-accuracyscore.py $1 $2 >> $1/res.csv
8
File was created 1 ../utils.py
1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 import keras 2 import keras
3 import numpy as np 3 import numpy as np
4 #from keras.layers.core import Dense, Dropout, Activation 4 #from keras.layers.core import Dense, Dropout, Activation
5 from keras.optimizers import SGD,Adam 5 from keras.optimizers import SGD,Adam
6 from keras.models import Sequential 6 from keras.models import Sequential
7 from keras.layers import Input, Dense, Dropout 7 from keras.layers import Input, Dense, Dropout
8 from keras.models import Model 8 from keras.models import Model
9 from keras.callbacks import ModelCheckpoint, EarlyStopping
9 from keras.callbacks import ModelCheckpoint, EarlyStopping 10 from keras.utils.layer_utils import layer_from_config
10 from keras.utils.layer_utils import layer_from_config 11 from itertools import izip_longest
11 from itertools import izip_longest 12 import tempfile
13 import shutil
12 import tempfile 14 import pandas
13 import shutil 15 from collections import namedtuple
14 import pandas 16 from sklearn.metrics import accuracy_score as perf
15 from collections import namedtuple 17 save_tuple = namedtuple("save_tuple",["pred_train","pred_dev","pred_test"])
16 from sklearn.metrics import accuracy_score as perf 18
17 save_tuple = namedtuple("save_tuple",["pred_train","pred_dev","pred_test"]) 19
18 20 def ft_dsae(train,dev,test,
19 21 y_train=None,y_dev=None,y_test=None,
20 def ft_dsae(train,dev,test, 22 ae_hidden=[20],transfer_hidden=[20],
21 y_train=None,y_dev=None,y_test=None, 23 start_weights=None,transfer_weights=None,end_weights=None,
22 ae_hidden=[20],transfer_hidden=[20], 24 input_activation="tanh", output_activation="tanh",
23 start_weights=None,transfer_weights=None,end_weights=None, 25 init="glorot_uniform",
24 input_activation="tanh", output_activation="tanh", 26 ae_dropouts=[None], transfer_do=[None],
25 init="glorot_uniform", 27 sgd="sgd", loss="mse", patience=5, verbose=0, epochs=5, batch_size=8):
26 ae_dropouts=[None], transfer_do=[None], 28
27 sgd="sgd", loss="mse", patience=5, verbose=0, epochs=5, batch_size=8): 29 if not start_weights :
28 30 start_weights = [ None ] * len(ae_hidden)
29 if not start_weights : 31 if not transfer_weights :
30 start_weights = [ None ] * len(ae_hidden) 32 transfer_weights = [None ] * len(transfer_hidden)
31 if not transfer_weights : 33 if not end_weights :
32 transfer_weights = [None ] * len(transfer_hidden) 34 end_weights = [ None ] * len(end_weights)
33 if not end_weights : 35 if not transfer_do :
34 end_weights = [ None ] * len(end_weights) 36 transfer_do = [0] * len(transfer_hidden)
35 if not transfer_do : 37 predict_y = True
36 transfer_do = [0] * len(transfer_hidden) 38 if y_train is None or y_dev is None or y_test is None :
37 predict_y = True 39 y_train = train
38 if y_train is None or y_dev is None or y_test is None : 40 y_dev = dev
39 y_train = train 41 y_test = test
40 y_dev = dev 42 predict_y = False
41 y_test = test 43 param_predict = [ train, dev, test ]
42 predict_y = False 44 if predict_y :
43 param_predict = [ train, dev, test ] 45 param_predict += [ y_train, y_dev ,y_test ]
44 if predict_y : 46
45 param_predict += [ y_train, y_dev ,y_test ] 47 pred_by_level = [] # Contient les prediction par niveaux de transfert
46 48 layers = [Input(shape=(train.shape[1],))]
47 pred_by_level = [] # Contient les prediction par niveaux de transfert 49 #for w in transfer_weights:
48 layers = [Input(shape=(train.shape[1],))] 50 #print "TW",[ [ y.shape for y in x ] for x in w]
49 #for w in transfer_weights: 51 #print "SW",[ [ y.shape for y in x] for x in start_weights]
50 #print "TW",[ [ y.shape for y in x ] for x in w] 52 #print "EW",[ [ y.shape for y in x ] for x in end_weights]
51 #print "SW",[ [ y.shape for y in x] for x in start_weights] 53 for cpt in range(1,len(ae_hidden)):
52 #print "EW",[ [ y.shape for y in x ] for x in end_weights] 54 #print ae_hidden,cpt
53 for cpt in range(1,len(ae_hidden)): 55 #print cpt, "before"
54 #print ae_hidden,cpt 56 #print "before2", [ [ x.shape for x in y] for y in start_weights[:cpt] ]
55 #print cpt, "before" 57 #print "before3", [ [ x.shape for x in y] for y in transfer_weights[cpt]]
56 #print "before2", [ [ x.shape for x in y] for y in start_weights[:cpt] ] 58 #print "before4", [ [ x.shape for x in y] for y in end_weights[cpt:]]
57 #print "before3", [ [ x.shape for x in y] for y in transfer_weights[cpt]] 59 sizes = ae_hidden[:cpt] + transfer_hidden + ae_hidden[cpt:]
58 #print "before4", [ [ x.shape for x in y] for y in end_weights[cpt:]] 60 weights = start_weights[:cpt] + transfer_weights[(cpt-1)] + end_weights[cpt:]
59 sizes = ae_hidden[:cpt] + transfer_hidden + ae_hidden[cpt:] 61 #print "SIZES", sizes
60 weights = start_weights[:cpt] + transfer_weights[(cpt-1)] + end_weights[cpt:] 62 #print "AW",[ [ y.shape for y in x ] for x in weights]
61 #print "SIZES", sizes 63 #print "WEI", len(weights) , [ len(x) for x in weights ]
62 #print "AW",[ [ y.shape for y in x ] for x in weights] 64 if len(ae_dropouts) == len(ae_hidden):
63 #print "WEI", len(weights) , [ len(x) for x in weights ] 65 do = ae_dropouts[:cpt] + transfer_do + ae_dropouts[cpt:]
64 if len(ae_dropouts) == len(ae_hidden): 66 else :
65 do = ae_dropouts[:cpt] + transfer_do + ae_dropouts[cpt:] 67 do = [ 0 ] * (len(ae_hidden) + len(transfer_hidden))
66 else : 68 for w in weights[:-1]:
67 do = [ 0 ] * (len(ae_hidden) + len(transfer_hidden)) 69 #print "STEP", size
68 for w in weights[:-1]: 70 layers.append(Dense(w[1].shape[0],activation=input_activation,init=init,weights=w)(layers[-1]))
69 #print "STEP", size 71 if do :
70 layers.append(Dense(w[1].shape[0],activation=input_activation,init=init,weights=w)(layers[-1])) 72 d = do.pop(0)
71 if do : 73 if d > 0 :
72 d = do.pop(0) 74 layers.append(Dropout(d)(layers[-1]))
73 if d > 0 : 75
74 layers.append(Dropout(d)(layers[-1])) 76 layers.append(Dense(y_train.shape[1],activation=output_activation)(layers[-1]))
75 77 models = [Model(input=layers[0] , output=x) for x in layers[1:]]
76 layers.append(Dense(y_train.shape[1],activation=output_activation)(layers[-1])) 78 models[-1].compile(optimizer=sgd,loss=loss)
77 models = [Model(input=layers[0] , output=x) for x in layers[1:]] 79 models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose)
78 models[-1].compile(optimizer=sgd,loss=loss) 80 predictions = [ [x.predict(y) for y in param_predict ] for x in models ]
79 models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose) 81 pred_by_level.append(predictions)
80 predictions = [ [x.predict(y) for y in param_predict ] for x in models ] 82
81 pred_by_level.append(predictions) 83 return pred_by_level
82 84
85 def train_mlp_proj(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0):
86
87 #model_tempfile=tempfile.mkstemp()
88 tempfold = tempfile.mkdtemp()
89 model_tempfile= tempfold+"/model.hdf"
90
91 layers = [Input(shape=(x_train.shape[1],))]
92
93 for h in hidden_size:
94 print h
95 if dropouts:
96 d = dropouts.pop(0)
97 if d > 0 :
98 ldo = Dropout(d)(layers[-1])
99 print 'append'
100 layers.append(Dense(h,init=init,activation=input_activation)(ldo))
101 else :
102 print " append"
103 layers.append(Dense(h,init=init,activation=input_activation)(layers[-1]))
104
105
106 if dropouts:
107 d = dropouts.pop(0)
108 if d > 0 :
109 ldo =Dropout(d)(layers[-1])
110 print "end"
111 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo))
112 else:
113 print "end"
114 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1]))
115
116 models = []
117 for l in layers[1:] :
118 models.append(Model(layers[0] , l))
119 print "nb models : ", len(models), "h :",hidden_size , "layer", len(layers)
120 if not sgd:
121 sgd = SGD(lr=0.01, decay=0, momentum=0.9)
122
123 models[-1].compile(loss=loss, optimizer=sgd,metrics=['accuracy'])
124 callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'),
125 EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi
126 print models[-1].summary()
127 hist=models[-1].fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks)
128 models[-1].load_weights(model_tempfile, by_name=False)
129 proj = []
130 for layer,model in enumerate(models):
131 proj.append((model.predict(x_train),model.predict(x_dev),model.predict(x_test)))
132
133 shutil.rmtree(tempfold)
134 return models[-1].summary(),proj
135
136
137
138
139
140 def train_mlp_pred(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0):
141
142 #model_tempfile=tempfile.mkstemp()
143 tempfold = tempfile.mkdtemp()
144 model_tempfile= tempfold+"/model.hdf"
145
146 layers = [Input(shape=(x_train.shape[1],))]
147
148 for h in hidden_size:
149 if dropouts:
150 d = dropouts.pop(0)
151 if d > 0 :
152 ldo = Dropout(d)(layers[-1])
153 layers.append(Dense(h,init=init,activation=input_activation)(ldo))
154 else :
155 layers.append(Dense(h,init=init,activation=input_activation)(layers[-1]))
156
157
158 if dropouts:
159 d = dropouts.pop(0)
160 if d > 0 :
161 ldo =Dropout(d)(layers[-1])
162 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo))
163 else:
164 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1]))
165
166 model=Model(layers[0] , layers[-1])
167 if not sgd:
168 sgd = SGD(lr=0.01, decay=0, momentum=0.9)
169
170 model.compile(loss=loss, optimizer=sgd,metrics=['accuracy'])
171 callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'),
172 EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi
173 print model.summary()
174 hist=model.fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks)
175 model.load_weights(model_tempfile, by_name=False)
176 pred=(model.predict(x_train),model.predict(x_dev),model.predict(x_test))
177
178 shutil.rmtree(tempfold)
179 return pred,hist
180
181
182
183
184
185
186
83 return pred_by_level 187 def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False):
84 188
85 def train_mlp_proj(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0): 189 layers = [Input(shape=(x_train.shape[1],))]
86 190
87 #model_tempfile=tempfile.mkstemp() 191 for h in hidden_size:
88 tempfold = tempfile.mkdtemp() 192 if dropouts:
89 model_tempfile= tempfold+"/model.hdf" 193 d = dropouts.pop(0)
90 194 if d > 0 :
91 layers = [Input(shape=(x_train.shape[1],))] 195 layers.append(Dropout(d)(layers[-1]))
92 196
93 for h in hidden_size: 197 layers.append(Dense(h,init=init,activation=input_activation)(layers[-1]))
94 print h 198 #if dropouts:
95 if dropouts: 199 # drop_prob=dropouts.pop(0)
96 d = dropouts.pop(0) 200 # if drop_prob > 0:
97 if d > 0 : 201 # model.add(Dropout(drop_prob))
98 ldo = Dropout(d)(layers[-1]) 202
99 print 'append' 203 #if dropouts:
100 layers.append(Dense(h,init=init,activation=input_activation)(ldo)) 204 # drop_prob=dropouts.pop(0)
101 else : 205 # if drop_prob > 0:
102 print " append" 206 # model.add(Dropout(drop_prob))
103 layers.append(Dense(h,init=init,activation=input_activation)(layers[-1])) 207
104 208 #if dropouts:
105 209 # model.add(Dropout(dropouts.pop(0)))
106 if dropouts: 210 if dropouts:
107 d = dropouts.pop(0) 211 d = dropouts.pop(0)
108 if d > 0 : 212 if d > 0 :
109 ldo =Dropout(d)(layers[-1]) 213 layers.append(Dropout(d)(layers[-1]))
110 print "end" 214 print y_train[2:10]
111 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo)) 215 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1]))
112 else: 216
113 print "end" 217 model = Model(layers[0] , layers[-1])
114 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1])) 218 if not sgd:
115 219 sgd = SGD(lr=0.01, decay=0, momentum=0.9)
116 models = [] 220
117 for l in layers[1:] : 221 model.compile(loss=loss, optimizer=sgd,metrics=['accuracy'])
118 models.append(Model(layers[0] , l)) 222
119 print "nb models : ", len(models), "h :",hidden_size , "layer", len(layers) 223 scores_dev=[]
120 if not sgd: 224 scores_test=[]
121 sgd = SGD(lr=0.01, decay=0, momentum=0.9) 225 scores_train=[]
122 226 save=None
123 models[-1].compile(loss=loss, optimizer=sgd,metrics=['accuracy']) 227 for i in range(epochs):
124 callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'), 228 hist=model.fit(x_train, y_train, nb_epoch=1, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev))
125 EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi 229 pred_train=model.predict(x_train)
126 print models[-1].summary() 230 pred_dev=model.predict(x_dev)
127 hist=models[-1].fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks) 231 pred_test=model.predict(x_test)
128 models[-1].load_weights(model_tempfile, by_name=False) 232
129 proj = [] 233 scores_train.append(perf(np.argmax(y_train,axis=1),np.argmax(pred_train,axis=1)))
130 for layer,model in enumerate(models): 234 scores_dev.append(perf(np.argmax(y_dev,axis=1),np.argmax(pred_dev,axis=1)))
131 proj.append((model.predict(x_train),model.predict(x_dev),model.predict(x_test))) 235 scores_test.append(perf(np.argmax(y_test,axis=1),np.argmax(pred_test,axis=1)))
132 236 if fit_verbose :
133 shutil.rmtree(tempfold) 237 print "{} {} {} {}".format(i,scores_train[-1],scores_dev[-1],scores_test[-1])
134 return models[-1].summary(),proj 238 if save is None or (len(scores_dev)>2 and scores_dev[-1] > scores_dev[-2]):
135 239 save=save_tuple(pred_train,pred_dev,pred_test)
136 240 arg_dev = np.argmax(scores_dev)
137 241 best_dev=scores_dev[arg_dev]
138 242 best_test=scores_test[arg_dev]
139 243 max_test=np.max(scores_test)
140 def train_mlp_pred(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0): 244 if fit_verbose:
141 245 print " res : {} {} {}".format(best_dev,best_test,max_test)
142 #model_tempfile=tempfile.mkstemp() 246
143 tempfold = tempfile.mkdtemp() 247 res=[scores_train,scores_dev,scores_test]
144 model_tempfile= tempfold+"/model.hdf" 248 if save_pred:
145 249 res.append(save)
146 layers = [Input(shape=(x_train.shape[1],))] 250 if keep_histo:
147 251 res.append(hist)
148 for h in hidden_size: 252 return res
149 if dropouts: 253
150 d = dropouts.pop(0) 254 def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,test_verbose=0,verbose=1,patience=20,get_weights=False,set_weights=[],best_mod=False):
151 if d > 0 : 255
152 ldo = Dropout(d)(layers[-1]) 256 input_vect = Input(shape=(train.shape[1],))
153 layers.append(Dense(h,init=init,activation=input_activation)(ldo)) 257
154 else : 258 previous = [input_vect]
155 layers.append(Dense(h,init=init,activation=input_activation)(layers[-1])) 259
156 260 if dropouts is None:
157 261 dropouts = [ 0 ] * (len(hidden_sizes) +1)
158 if dropouts: 262 if sgd is None :
159 d = dropouts.pop(0) 263 sgd = SGD(lr=0.01, decay=0, momentum=0.9)
160 if d > 0 : 264 did_do = False
161 ldo =Dropout(d)(layers[-1]) 265 if dropouts :
162 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo)) 266 d = dropouts.pop(0)
163 else: 267 if d :
164 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1])) 268 previous.append(Dropout(d)(previous[-1]))
165 269 did_do = True
166 model=Model(layers[0] , layers[-1]) 270
167 if not sgd: 271 for h_layer,weight_layer in izip_longest(hidden_sizes,set_weights,fillvalue=None) :
168 sgd = SGD(lr=0.01, decay=0, momentum=0.9) 272 # ,weights=w
169 273 if weight_layer :
170 model.compile(loss=loss, optimizer=sgd,metrics=['accuracy']) 274 w = weight_layer[0]
171 callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'), 275 else :
172 EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi 276 w = None
173 print model.summary() 277 #print "ADD SIZE" , h_layer
174 hist=model.fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks) 278 if did_do :
175 model.load_weights(model_tempfile, by_name=False) 279 p = previous.pop()
176 pred=(model.predict(x_train),model.predict(x_dev),model.predict(x_test)) 280 did_do = False
177 281 else :
178 shutil.rmtree(tempfold) 282 p = previous[-1]
179 return pred,hist 283 previous.append(Dense(h_layer,activation=input_activation,weights=w)(previous[-1]))
180 284 if dropouts:
181 285 d = dropouts.pop(0)
182 286 if d :
183 287 previous.append(Dropout(d)(previous[-1]))
184 288 did_do = True
185 289
186 290 predict_y = True
187 def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False): 291 if y_train is None or y_dev is None or y_test is None :
188 292 y_train = train
189 layers = [Input(shape=(x_train.shape[1],))] 293 y_dev = dev
190 294 y_test = test
191 for h in hidden_size: 295 predict_y = False
192 if dropouts: 296 previous.append(Dense(y_train.shape[1],activation=output_activation)(previous[-1]))
193 d = dropouts.pop(0) 297 models = [Model(input=previous[0] , output=x) for x in previous[1:]]
194 if d > 0 : 298 print "MLP", sgd, loss
195 layers.append(Dropout(d)(layers[-1])) 299 models[-1].compile(optimizer=sgd,loss=loss)
196 300 cb = [EarlyStopping(monitor='val_loss', patience=patience, verbose=0)]
301 if best_mod:
302 tempfold = tempfile.mkdtemp()
303 model_tempfile= tempfold+"/model.hdf"
304 cb.append( ModelCheckpoint(model_tempfile, monitor='val_loss', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto') )
305
306 models[-1].summary()
307 models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=cb,validation_data=(dev,dev),verbose=verbose)
308 if best_mod:
309 models[-1].load_weights(model_tempfile)
310 shutil.rmtree(tempfold)
197 layers.append(Dense(h,init=init,activation=input_activation)(layers[-1])) 311 param_predict = [ train, dev, test ]
198 #if dropouts: 312 if predict_y :
199 # drop_prob=dropouts.pop(0) 313 param_predict += [ y_train, y_dev ,y_test ]
200 # if drop_prob > 0: 314 predictions = [ [x.predict(y) for y in param_predict ] for x in models ]
201 # model.add(Dropout(drop_prob)) 315 if get_weights :
202 316 weights = [ x.get_weights() for x in models[-1].layers if x.get_weights() ]
203 #if dropouts: 317 return ( predictions , weights )
204 # drop_prob=dropouts.pop(0) 318 else :
205 # if drop_prob > 0: 319 return predictions
206 # model.add(Dropout(drop_prob)) 320
207 321 def train_sae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20):
208 #if dropouts: 322
209 # model.add(Dropout(dropouts.pop(0))) 323 weights = []
210 if dropouts: 324 predictions = [[(train,dev,test),()]]
211 d = dropouts.pop(0) 325 ft_pred = []
212 if d > 0 : 326 past_sizes = []
213 layers.append(Dropout(d)(layers[-1])) 327
214 print y_train[2:10] 328
215 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1])) 329 for size in hidden_sizes :
216 330 #print "DO size " , size , "FROM" , hidden_sizes
217 model = Model(layers[0] , layers[-1]) 331 res_pred, res_wght = train_ae(predictions[-1][-2][0], predictions[-1][-2][1],predictions[-1][-2][2],[size],
218 if not sgd: 332 dropouts=dropouts, input_activation=input_activation,
219 sgd = SGD(lr=0.01, decay=0, momentum=0.9) 333 output_activation=output_activation, loss=loss, sgd=sgd,
220 334 epochs=epochs, batch_size=batch_size, verbose=verbose,
221 model.compile(loss=loss, optimizer=sgd,metrics=['accuracy']) 335 patience=patience,get_weights=True)
222 336 past_sizes.append(size)
223 scores_dev=[] 337 weights.append(res_wght)
224 scores_test=[] 338 predictions.append(res_pred)
225 scores_train=[] 339 #print "FINE TUNE "
226 save=None 340 res_ftpred = train_ae(train,dev,test,past_sizes,y_train=y_train,y_dev=y_dev,y_test=y_test,
227 for i in range(epochs): 341 dropouts=dropouts,
228 hist=model.fit(x_train, y_train, nb_epoch=1, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev)) 342 input_activation=input_activation,
229 pred_train=model.predict(x_train) 343 output_activation=output_activation,
230 pred_dev=model.predict(x_dev) 344 loss=loss,sgd=sgd,epochs=epochs,
231 pred_test=model.predict(x_test) 345 batch_size=batch_size,verbose=verbose,patience=patience,
232 346 set_weights=weights)
233 scores_train.append(perf(np.argmax(y_train,axis=1),np.argmax(pred_train,axis=1))) 347 ft_pred.append(res_ftpred)
234 scores_dev.append(perf(np.argmax(y_dev,axis=1),np.argmax(pred_dev,axis=1))) 348
235 scores_test.append(perf(np.argmax(y_test,axis=1),np.argmax(pred_test,axis=1))) 349 return ( predictions[1:] , ft_pred)
236 if fit_verbose : 350
237 print "{} {} {} {}".format(i,scores_train[-1],scores_dev[-1],scores_test[-1]) 351
238 if save is None or (len(scores_dev)>2 and scores_dev[-1] > scores_dev[-2]): 352