Commit 91aeb914f7a4a592c9645fb28e6f39f9a73116df

Authored by Killian
1 parent d1012a7a16
Exists in master

add Botttleneck MLp

Showing 1 changed file with 120 additions and 6 deletions Inline Diff

1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 import keras 2 import keras
3 import numpy as np 3 import numpy as np
4 #from keras.layers.core import Dense, Dropout, Activation 4 #from keras.layers.core import Dense, Dropout, Activation
5 from keras.optimizers import SGD,Adam 5 from keras.optimizers import SGD,Adam
6 from keras.models import Sequential 6 from keras.models import Sequential
7 from keras.layers import Input, Dense, Dropout 7 from keras.layers import Input, Dense, Dropout
8 from keras.models import Model 8 from keras.models import Model
9 from keras.callbacks import ModelCheckpoint, EarlyStopping
9 from keras.utils.layer_utils import layer_from_config 10 from keras.utils.layer_utils import layer_from_config
10 from itertools import izip_longest 11 from itertools import izip_longest
11 12 import tempfile
13 import shutil
12 import pandas 14 import pandas
13 from collections import namedtuple 15 from collections import namedtuple
14 from sklearn.metrics import accuracy_score as perf 16 from sklearn.metrics import accuracy_score as perf
15 save_tuple= namedtuple("save_tuple",["pred_train","pred_dev","pred_test"]) 17 save_tuple = namedtuple("save_tuple",["pred_train","pred_dev","pred_test"])
16 18
17 19
18 def ft_dsae(train,dev,test, 20 def ft_dsae(train,dev,test,
19 y_train=None,y_dev=None,y_test=None, 21 y_train=None,y_dev=None,y_test=None,
20 ae_hidden=[20],transfer_hidden=[20], 22 ae_hidden=[20],transfer_hidden=[20],
21 start_weights=None,transfer_weights=None,end_weights=None, 23 start_weights=None,transfer_weights=None,end_weights=None,
22 input_activation="tanh", output_activation="tanh", 24 input_activation="tanh", output_activation="tanh",
23 init="glorot_uniform", 25 init="glorot_uniform",
24 ae_dropouts=[None], transfer_do=[None], 26 ae_dropouts=[None], transfer_do=[None],
25 sgd="sgd", loss="mse", patience=5, verbose=0, epochs=5, batch_size=8): 27 sgd="sgd", loss="mse", patience=5, verbose=0, epochs=5, batch_size=8):
26 28
27 if not start_weights : 29 if not start_weights :
28 start_weights = [ None ] * len(ae_hidden) 30 start_weights = [ None ] * len(ae_hidden)
29 if not transfer_weights : 31 if not transfer_weights :
30 transfer_weights = [None ] * len(transfer_hidden) 32 transfer_weights = [None ] * len(transfer_hidden)
31 if not end_weights : 33 if not end_weights :
32 end_weights = [ None ] * len(end_weights) 34 end_weights = [ None ] * len(end_weights)
33 if not transfer_do : 35 if not transfer_do :
34 transfer_do = [0] * len(transfer_hidden) 36 transfer_do = [0] * len(transfer_hidden)
35 predict_y = True 37 predict_y = True
36 if y_train is None or y_dev is None or y_test is None : 38 if y_train is None or y_dev is None or y_test is None :
37 y_train = train 39 y_train = train
38 y_dev = dev 40 y_dev = dev
39 y_test = test 41 y_test = test
40 predict_y = False 42 predict_y = False
41 param_predict = [ train, dev, test ] 43 param_predict = [ train, dev, test ]
42 if predict_y : 44 if predict_y :
43 param_predict += [ y_train, y_dev ,y_test ] 45 param_predict += [ y_train, y_dev ,y_test ]
44 46
45 pred_by_level = [] # Contient les prediction par niveaux de transfert 47 pred_by_level = [] # Contient les prediction par niveaux de transfert
46 layers = [Input(shape=(train.shape[1],))] 48 layers = [Input(shape=(train.shape[1],))]
47 #for w in transfer_weights: 49 #for w in transfer_weights:
48 #print "TW",[ [ y.shape for y in x ] for x in w] 50 #print "TW",[ [ y.shape for y in x ] for x in w]
49 #print "SW",[ [ y.shape for y in x] for x in start_weights] 51 #print "SW",[ [ y.shape for y in x] for x in start_weights]
50 #print "EW",[ [ y.shape for y in x ] for x in end_weights] 52 #print "EW",[ [ y.shape for y in x ] for x in end_weights]
51 for cpt in range(1,len(ae_hidden)): 53 for cpt in range(1,len(ae_hidden)):
52 #print ae_hidden,cpt 54 #print ae_hidden,cpt
53 #print cpt, "before" 55 #print cpt, "before"
54 #print "before2", [ [ x.shape for x in y] for y in start_weights[:cpt] ] 56 #print "before2", [ [ x.shape for x in y] for y in start_weights[:cpt] ]
55 #print "before3", [ [ x.shape for x in y] for y in transfer_weights[cpt]] 57 #print "before3", [ [ x.shape for x in y] for y in transfer_weights[cpt]]
56 #print "before4", [ [ x.shape for x in y] for y in end_weights[cpt:]] 58 #print "before4", [ [ x.shape for x in y] for y in end_weights[cpt:]]
57 sizes = ae_hidden[:cpt] + transfer_hidden + ae_hidden[cpt:] 59 sizes = ae_hidden[:cpt] + transfer_hidden + ae_hidden[cpt:]
58 weights = start_weights[:cpt] + transfer_weights[(cpt-1)] + end_weights[cpt:] 60 weights = start_weights[:cpt] + transfer_weights[(cpt-1)] + end_weights[cpt:]
59 #print "SIZES", sizes 61 #print "SIZES", sizes
60 #print "AW",[ [ y.shape for y in x ] for x in weights] 62 #print "AW",[ [ y.shape for y in x ] for x in weights]
61 #print "WEI", len(weights) , [ len(x) for x in weights ] 63 #print "WEI", len(weights) , [ len(x) for x in weights ]
62 if len(ae_dropouts) == len(ae_hidden): 64 if len(ae_dropouts) == len(ae_hidden):
63 do = ae_dropouts[:cpt] + transfer_do + ae_dropouts[cpt:] 65 do = ae_dropouts[:cpt] + transfer_do + ae_dropouts[cpt:]
64 else : 66 else :
65 do = [ 0 ] * (len(ae_hidden) + len(transfer_hidden)) 67 do = [ 0 ] * (len(ae_hidden) + len(transfer_hidden))
66 for w in weights[:-1]: 68 for w in weights[:-1]:
67 #print "STEP", size 69 #print "STEP", size
68 layers.append(Dense(w[1].shape[0],activation=input_activation,init=init,weights=w)(layers[-1])) 70 layers.append(Dense(w[1].shape[0],activation=input_activation,init=init,weights=w)(layers[-1]))
69 if do : 71 if do :
70 d = do.pop(0) 72 d = do.pop(0)
71 if d > 0 : 73 if d > 0 :
72 layers.append(Dropout(d)(layers[-1])) 74 layers.append(Dropout(d)(layers[-1]))
73 75
74 layers.append(Dense(y_train.shape[1],activation=output_activation)(layers[-1])) 76 layers.append(Dense(y_train.shape[1],activation=output_activation)(layers[-1]))
75 models = [Model(input=layers[0] , output=x) for x in layers[1:]] 77 models = [Model(input=layers[0] , output=x) for x in layers[1:]]
76 models[-1].compile(optimizer=sgd,loss=loss) 78 models[-1].compile(optimizer=sgd,loss=loss)
77 models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose) 79 models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose)
78 predictions = [ [x.predict(y) for y in param_predict ] for x in models ] 80 predictions = [ [x.predict(y) for y in param_predict ] for x in models ]
79 pred_by_level.append(predictions) 81 pred_by_level.append(predictions)
80 82
81 return pred_by_level 83 return pred_by_level
82 84
85 def train_mlp_proj(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0):
86
87 #model_tempfile=tempfile.mkstemp()
88 tempfold = tempfile.mkdtemp()
89 model_tempfile= tempfold+"/model.hdf"
90
91 layers = [Input(shape=(x_train.shape[1],))]
92
93 for h in hidden_size:
94 print h
95 if dropouts:
96 d = dropouts.pop(0)
97 if d > 0 :
98 ldo = Dropout(d)(layers[-1])
99 print 'append'
100 layers.append(Dense(h,init=init,activation=input_activation)(ldo))
101 else :
102 print " append"
103 layers.append(Dense(h,init=init,activation=input_activation)(layers[-1]))
104
105
106 if dropouts:
107 d = dropouts.pop(0)
108 if d > 0 :
109 ldo =Dropout(d)(layers[-1])
110 print "end"
111 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo))
112 else:
113 print "end"
114 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1]))
115
116 models = []
117 for l in layers[1:] :
118 models.append(Model(layers[0] , l))
119 print "nb models : ", len(models), "h :",hidden_size , "layer", len(layers)
120 if not sgd:
121 sgd = SGD(lr=0.01, decay=0, momentum=0.9)
122
123 models[-1].compile(loss=loss, optimizer=sgd,metrics=['accuracy'])
124 callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'),
125 EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi
126 print models[-1].summary()
127 hist=models[-1].fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks)
128 models[-1].load_weights(model_tempfile, by_name=False)
129 proj = []
130 for layer,model in enumerate(models):
131 proj.append((model.predict(x_train),model.predict(x_dev),model.predict(x_test)))
132
133 shutil.rmtree(tempfold)
134 return models[-1].summary(),proj
135
136
137
138
139
140 def train_mlp_pred(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0):
141
142 #model_tempfile=tempfile.mkstemp()
143 tempfold = tempfile.mkdtemp()
144 model_tempfile= tempfold+"/model.hdf"
145
146 layers = [Input(shape=(x_train.shape[1],))]
147
148 for h in hidden_size:
149 if dropouts:
150 d = dropouts.pop(0)
151 if d > 0 :
152 ldo = Dropout(d)(layers[-1])
153 layers.append(Dense(h,init=init,activation=input_activation)(ldo))
154 else :
155 layers.append(Dense(h,init=init,activation=input_activation)(layers[-1]))
156
157
158 if dropouts:
159 d = dropouts.pop(0)
160 if d > 0 :
161 ldo =Dropout(d)(layers[-1])
162 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo))
163 else:
164 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1]))
165
166 model=Model(layers[0] , layers[-1])
167 if not sgd:
168 sgd = SGD(lr=0.01, decay=0, momentum=0.9)
169
170 model.compile(loss=loss, optimizer=sgd,metrics=['accuracy'])
171 callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'),
172 EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi
173 print model.summary()
174 hist=model.fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks)
175 model.load_weights(model_tempfile, by_name=False)
176 pred=(model.predict(x_train),model.predict(x_dev),model.predict(x_test))
177
178 shutil.rmtree(tempfold)
179 return pred,hist
180
181
182
183
184
185
186
83 def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False): 187 def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False):
84 188
85 layers = [Input(shape=(x_train.shape[1],))] 189 layers = [Input(shape=(x_train.shape[1],))]
86 190
87 for h in hidden_size: 191 for h in hidden_size:
88 if dropouts: 192 if dropouts:
89 d = dropouts.pop(0) 193 d = dropouts.pop(0)
90 if d > 0 : 194 if d > 0 :
91 layers.append(Dropout(d)(layers[-1])) 195 layers.append(Dropout(d)(layers[-1]))
92 196
93 layers.append(Dense(h,init=init,activation=input_activation)(layers[-1])) 197 layers.append(Dense(h,init=init,activation=input_activation)(layers[-1]))
94 #if dropouts: 198 #if dropouts:
95 # drop_prob=dropouts.pop(0) 199 # drop_prob=dropouts.pop(0)
96 # if drop_prob > 0: 200 # if drop_prob > 0:
97 # model.add(Dropout(drop_prob)) 201 # model.add(Dropout(drop_prob))
98 202
99 #if dropouts: 203 #if dropouts:
100 # drop_prob=dropouts.pop(0) 204 # drop_prob=dropouts.pop(0)
101 # if drop_prob > 0: 205 # if drop_prob > 0:
102 # model.add(Dropout(drop_prob)) 206 # model.add(Dropout(drop_prob))
103 207
104 #if dropouts: 208 #if dropouts:
105 # model.add(Dropout(dropouts.pop(0))) 209 # model.add(Dropout(dropouts.pop(0)))
106 if dropouts: 210 if dropouts:
107 d = dropouts.pop(0) 211 d = dropouts.pop(0)
108 if d > 0 : 212 if d > 0 :
109 layers.append(Dropout(d)(layers[-1])) 213 layers.append(Dropout(d)(layers[-1]))
110 214 print y_train[2:10]
111 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1])) 215 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1]))
112 216
113 model = Model(layers[0] , layers[-1]) 217 model = Model(layers[0] , layers[-1])
114 if not sgd: 218 if not sgd:
115 sgd = SGD(lr=0.01, decay=0, momentum=0.9) 219 sgd = SGD(lr=0.01, decay=0, momentum=0.9)
116 220
117 model.compile(loss=loss, optimizer=sgd,metrics=['accuracy']) 221 model.compile(loss=loss, optimizer=sgd,metrics=['accuracy'])
118 222
119 scores_dev=[] 223 scores_dev=[]
120 scores_test=[] 224 scores_test=[]
121 scores_train=[] 225 scores_train=[]
122 save=None 226 save=None
123 for i in range(epochs): 227 for i in range(epochs):
124 hist=model.fit(x_train, y_train, nb_epoch=1, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev)) 228 hist=model.fit(x_train, y_train, nb_epoch=1, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev))
125 pred_train=model.predict(x_train) 229 pred_train=model.predict(x_train)
126 pred_dev=model.predict(x_dev) 230 pred_dev=model.predict(x_dev)
127 pred_test=model.predict(x_test) 231 pred_test=model.predict(x_test)
128 232
129 scores_train.append(perf(np.argmax(y_train,axis=1),np.argmax(pred_train,axis=1))) 233 scores_train.append(perf(np.argmax(y_train,axis=1),np.argmax(pred_train,axis=1)))
130 scores_dev.append(perf(np.argmax(y_dev,axis=1),np.argmax(pred_dev,axis=1))) 234 scores_dev.append(perf(np.argmax(y_dev,axis=1),np.argmax(pred_dev,axis=1)))
131 scores_test.append(perf(np.argmax(y_test,axis=1),np.argmax(pred_test,axis=1))) 235 scores_test.append(perf(np.argmax(y_test,axis=1),np.argmax(pred_test,axis=1)))
132 if fit_verbose : 236 if fit_verbose :
133 print "{} {} {} {}".format(i,scores_train[-1],scores_dev[-1],scores_test[-1]) 237 print "{} {} {} {}".format(i,scores_train[-1],scores_dev[-1],scores_test[-1])
134 if save is None or (len(scores_dev)>2 and scores_dev[-1] > scores_dev[-2]): 238 if save is None or (len(scores_dev)>2 and scores_dev[-1] > scores_dev[-2]):
135 save=save_tuple(pred_train,pred_dev,pred_test) 239 save=save_tuple(pred_train,pred_dev,pred_test)
136 arg_dev = np.argmax(scores_dev) 240 arg_dev = np.argmax(scores_dev)
137 best_dev=scores_dev[arg_dev] 241 best_dev=scores_dev[arg_dev]
138 best_test=scores_test[arg_dev] 242 best_test=scores_test[arg_dev]
139 max_test=np.max(scores_test) 243 max_test=np.max(scores_test)
140 if fit_verbose: 244 if fit_verbose:
141 print " res : {} {} {}".format(best_dev,best_test,max_test) 245 print " res : {} {} {}".format(best_dev,best_test,max_test)
142 246
143 res=[scores_train,scores_dev,scores_test] 247 res=[scores_train,scores_dev,scores_test]
144 if save_pred: 248 if save_pred:
145 res.append(save) 249 res.append(save)
146 if keep_histo: 250 if keep_histo:
147 res.append(hist) 251 res.append(hist)
148 return res 252 return res
149 253
150 def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20,get_weights=False,set_weights=[]): 254 def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,test_verbose=0,verbose=1,patience=20,get_weights=False,set_weights=[],best_mod=False):
151 255
152 input_vect = Input(shape=(train.shape[1],)) 256 input_vect = Input(shape=(train.shape[1],))
153 257
154 previous = [input_vect] 258 previous = [input_vect]
155 259
156 if dropouts is None: 260 if dropouts is None:
157 dropouts = [ 0 ] * (len(hidden_sizes) +1) 261 dropouts = [ 0 ] * (len(hidden_sizes) +1)
158 if sgd is None : 262 if sgd is None :
159 sgd = SGD(lr=0.01, decay=0, momentum=0.9) 263 sgd = SGD(lr=0.01, decay=0, momentum=0.9)
160 did_do = False 264 did_do = False
161 if dropouts : 265 if dropouts :
162 d = dropouts.pop(0) 266 d = dropouts.pop(0)
163 if d : 267 if d :
164 previous.append(Dropout(d)(previous[-1])) 268 previous.append(Dropout(d)(previous[-1]))
165 did_do = True 269 did_do = True
166 270
167 for h_layer,weight_layer in izip_longest(hidden_sizes,set_weights,fillvalue=None) : 271 for h_layer,weight_layer in izip_longest(hidden_sizes,set_weights,fillvalue=None) :
168 # ,weights=w 272 # ,weights=w
169 if weight_layer : 273 if weight_layer :
170 w = weight_layer[0] 274 w = weight_layer[0]
171 else : 275 else :
172 w = None 276 w = None
173 #print "ADD SIZE" , h_layer 277 #print "ADD SIZE" , h_layer
174 if did_do : 278 if did_do :
175 p = previous.pop() 279 p = previous.pop()
176 did_do = False 280 did_do = False
177 else : 281 else :
178 p = previous[-1] 282 p = previous[-1]
179 previous.append(Dense(h_layer,activation=input_activation,weights=w)(previous[-1])) 283 previous.append(Dense(h_layer,activation=input_activation,weights=w)(previous[-1]))
180 if dropouts: 284 if dropouts:
181 d = dropouts.pop(0) 285 d = dropouts.pop(0)
182 if d : 286 if d :
183 previous.append(Dropout(d)(previous[-1])) 287 previous.append(Dropout(d)(previous[-1]))
184 did_do = True 288 did_do = True
185 289
186 predict_y = True 290 predict_y = True
187 if y_train is None or y_dev is None or y_test is None : 291 if y_train is None or y_dev is None or y_test is None :
188 y_train = train 292 y_train = train
189 y_dev = dev 293 y_dev = dev
190 y_test = test 294 y_test = test
191 predict_y = False 295 predict_y = False
192 previous.append(Dense(y_train.shape[1],activation=output_activation)(previous[-1])) 296 previous.append(Dense(y_train.shape[1],activation=output_activation)(previous[-1]))
193 models = [Model(input=previous[0] , output=x) for x in previous[1:]] 297 models = [Model(input=previous[0] , output=x) for x in previous[1:]]
194 print "MLP", sgd, loss 298 print "MLP", sgd, loss
195 models[-1].compile(optimizer=sgd,loss=loss) 299 models[-1].compile(optimizer=sgd,loss=loss)
196 models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose) 300 cb = [EarlyStopping(monitor='val_loss', patience=patience, verbose=0)]
301 if best_mod:
302 tempfold = tempfile.mkdtemp()
303 model_tempfile= tempfold+"/model.hdf"
304 cb.append( ModelCheckpoint(model_tempfile, monitor='val_loss', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto') )
305
306 models[-1].summary()
307 models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=cb,validation_data=(dev,dev),verbose=verbose)
308 if best_mod:
309 models[-1].load_weights(model_tempfile)
310 shutil.rmtree(tempfold)
197 param_predict = [ train, dev, test ] 311 param_predict = [ train, dev, test ]
198 if predict_y : 312 if predict_y :
199 param_predict += [ y_train, y_dev ,y_test ] 313 param_predict += [ y_train, y_dev ,y_test ]
200 predictions = [ [x.predict(y) for y in param_predict ] for x in models ] 314 predictions = [ [x.predict(y) for y in param_predict ] for x in models ]
201 if get_weights : 315 if get_weights :
202 weights = [ x.get_weights() for x in models[-1].layers if x.get_weights() ] 316 weights = [ x.get_weights() for x in models[-1].layers if x.get_weights() ]
203 return ( predictions , weights ) 317 return ( predictions , weights )
204 else : 318 else :
205 return predictions 319 return predictions
206 320
207 def train_sae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20): 321 def train_sae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20):
208 322
209 weights = [] 323 weights = []
210 predictions = [[(train,dev,test),()]] 324 predictions = [[(train,dev,test),()]]
211 ft_pred = [] 325 ft_pred = []
212 past_sizes = [] 326 past_sizes = []
213 327
214 328
215 for size in hidden_sizes : 329 for size in hidden_sizes :
216 #print "DO size " , size , "FROM" , hidden_sizes 330 #print "DO size " , size , "FROM" , hidden_sizes
217 res_pred, res_wght = train_ae(predictions[-1][-2][0], predictions[-1][-2][1],predictions[-1][-2][2],[size], 331 res_pred, res_wght = train_ae(predictions[-1][-2][0], predictions[-1][-2][1],predictions[-1][-2][2],[size],
218 dropouts=dropouts, input_activation=input_activation, 332 dropouts=dropouts, input_activation=input_activation,
219 output_activation=output_activation, loss=loss, sgd=sgd, 333 output_activation=output_activation, loss=loss, sgd=sgd,
220 epochs=epochs, batch_size=batch_size, verbose=verbose, 334 epochs=epochs, batch_size=batch_size, verbose=verbose,
221 patience=patience,get_weights=True) 335 patience=patience,get_weights=True)
222 past_sizes.append(size) 336 past_sizes.append(size)
223 weights.append(res_wght) 337 weights.append(res_wght)
224 predictions.append(res_pred) 338 predictions.append(res_pred)
225 #print "FINE TUNE " 339 #print "FINE TUNE "
226 res_ftpred = train_ae(train,dev,test,past_sizes,y_train=y_train,y_dev=y_dev,y_test=y_test, 340 res_ftpred = train_ae(train,dev,test,past_sizes,y_train=y_train,y_dev=y_dev,y_test=y_test,
227 dropouts=dropouts, 341 dropouts=dropouts,
228 input_activation=input_activation, 342 input_activation=input_activation,
229 output_activation=output_activation, 343 output_activation=output_activation,
230 loss=loss,sgd=sgd,epochs=epochs, 344 loss=loss,sgd=sgd,epochs=epochs,
231 batch_size=batch_size,verbose=verbose,patience=patience, 345 batch_size=batch_size,verbose=verbose,patience=patience,
232 set_weights=weights) 346 set_weights=weights)
233 ft_pred.append(res_ftpred) 347 ft_pred.append(res_ftpred)
234 348
235 return ( predictions[1:] , ft_pred) 349 return ( predictions[1:] , ft_pred)
236 350
237 351
238 352