Commit 91aeb914f7a4a592c9645fb28e6f39f9a73116df
1 parent
d1012a7a16
Exists in
master
add Botttleneck MLp
Showing 1 changed file with 120 additions and 6 deletions Inline Diff
LDA/mlp.py
| 1 | # -*- coding: utf-8 -*- | 1 | # -*- coding: utf-8 -*- |
| 2 | import keras | 2 | import keras |
| 3 | import numpy as np | 3 | import numpy as np |
| 4 | #from keras.layers.core import Dense, Dropout, Activation | 4 | #from keras.layers.core import Dense, Dropout, Activation |
| 5 | from keras.optimizers import SGD,Adam | 5 | from keras.optimizers import SGD,Adam |
| 6 | from keras.models import Sequential | 6 | from keras.models import Sequential |
| 7 | from keras.layers import Input, Dense, Dropout | 7 | from keras.layers import Input, Dense, Dropout |
| 8 | from keras.models import Model | 8 | from keras.models import Model |
| 9 | from keras.callbacks import ModelCheckpoint, EarlyStopping | ||
| 9 | from keras.utils.layer_utils import layer_from_config | 10 | from keras.utils.layer_utils import layer_from_config |
| 10 | from itertools import izip_longest | 11 | from itertools import izip_longest |
| 11 | 12 | import tempfile | |
| 13 | import shutil | ||
| 12 | import pandas | 14 | import pandas |
| 13 | from collections import namedtuple | 15 | from collections import namedtuple |
| 14 | from sklearn.metrics import accuracy_score as perf | 16 | from sklearn.metrics import accuracy_score as perf |
| 15 | save_tuple= namedtuple("save_tuple",["pred_train","pred_dev","pred_test"]) | 17 | save_tuple = namedtuple("save_tuple",["pred_train","pred_dev","pred_test"]) |
| 16 | 18 | ||
| 17 | 19 | ||
| 18 | def ft_dsae(train,dev,test, | 20 | def ft_dsae(train,dev,test, |
| 19 | y_train=None,y_dev=None,y_test=None, | 21 | y_train=None,y_dev=None,y_test=None, |
| 20 | ae_hidden=[20],transfer_hidden=[20], | 22 | ae_hidden=[20],transfer_hidden=[20], |
| 21 | start_weights=None,transfer_weights=None,end_weights=None, | 23 | start_weights=None,transfer_weights=None,end_weights=None, |
| 22 | input_activation="tanh", output_activation="tanh", | 24 | input_activation="tanh", output_activation="tanh", |
| 23 | init="glorot_uniform", | 25 | init="glorot_uniform", |
| 24 | ae_dropouts=[None], transfer_do=[None], | 26 | ae_dropouts=[None], transfer_do=[None], |
| 25 | sgd="sgd", loss="mse", patience=5, verbose=0, epochs=5, batch_size=8): | 27 | sgd="sgd", loss="mse", patience=5, verbose=0, epochs=5, batch_size=8): |
| 26 | 28 | ||
| 27 | if not start_weights : | 29 | if not start_weights : |
| 28 | start_weights = [ None ] * len(ae_hidden) | 30 | start_weights = [ None ] * len(ae_hidden) |
| 29 | if not transfer_weights : | 31 | if not transfer_weights : |
| 30 | transfer_weights = [None ] * len(transfer_hidden) | 32 | transfer_weights = [None ] * len(transfer_hidden) |
| 31 | if not end_weights : | 33 | if not end_weights : |
| 32 | end_weights = [ None ] * len(end_weights) | 34 | end_weights = [ None ] * len(end_weights) |
| 33 | if not transfer_do : | 35 | if not transfer_do : |
| 34 | transfer_do = [0] * len(transfer_hidden) | 36 | transfer_do = [0] * len(transfer_hidden) |
| 35 | predict_y = True | 37 | predict_y = True |
| 36 | if y_train is None or y_dev is None or y_test is None : | 38 | if y_train is None or y_dev is None or y_test is None : |
| 37 | y_train = train | 39 | y_train = train |
| 38 | y_dev = dev | 40 | y_dev = dev |
| 39 | y_test = test | 41 | y_test = test |
| 40 | predict_y = False | 42 | predict_y = False |
| 41 | param_predict = [ train, dev, test ] | 43 | param_predict = [ train, dev, test ] |
| 42 | if predict_y : | 44 | if predict_y : |
| 43 | param_predict += [ y_train, y_dev ,y_test ] | 45 | param_predict += [ y_train, y_dev ,y_test ] |
| 44 | 46 | ||
| 45 | pred_by_level = [] # Contient les prediction par niveaux de transfert | 47 | pred_by_level = [] # Contient les prediction par niveaux de transfert |
| 46 | layers = [Input(shape=(train.shape[1],))] | 48 | layers = [Input(shape=(train.shape[1],))] |
| 47 | #for w in transfer_weights: | 49 | #for w in transfer_weights: |
| 48 | #print "TW",[ [ y.shape for y in x ] for x in w] | 50 | #print "TW",[ [ y.shape for y in x ] for x in w] |
| 49 | #print "SW",[ [ y.shape for y in x] for x in start_weights] | 51 | #print "SW",[ [ y.shape for y in x] for x in start_weights] |
| 50 | #print "EW",[ [ y.shape for y in x ] for x in end_weights] | 52 | #print "EW",[ [ y.shape for y in x ] for x in end_weights] |
| 51 | for cpt in range(1,len(ae_hidden)): | 53 | for cpt in range(1,len(ae_hidden)): |
| 52 | #print ae_hidden,cpt | 54 | #print ae_hidden,cpt |
| 53 | #print cpt, "before" | 55 | #print cpt, "before" |
| 54 | #print "before2", [ [ x.shape for x in y] for y in start_weights[:cpt] ] | 56 | #print "before2", [ [ x.shape for x in y] for y in start_weights[:cpt] ] |
| 55 | #print "before3", [ [ x.shape for x in y] for y in transfer_weights[cpt]] | 57 | #print "before3", [ [ x.shape for x in y] for y in transfer_weights[cpt]] |
| 56 | #print "before4", [ [ x.shape for x in y] for y in end_weights[cpt:]] | 58 | #print "before4", [ [ x.shape for x in y] for y in end_weights[cpt:]] |
| 57 | sizes = ae_hidden[:cpt] + transfer_hidden + ae_hidden[cpt:] | 59 | sizes = ae_hidden[:cpt] + transfer_hidden + ae_hidden[cpt:] |
| 58 | weights = start_weights[:cpt] + transfer_weights[(cpt-1)] + end_weights[cpt:] | 60 | weights = start_weights[:cpt] + transfer_weights[(cpt-1)] + end_weights[cpt:] |
| 59 | #print "SIZES", sizes | 61 | #print "SIZES", sizes |
| 60 | #print "AW",[ [ y.shape for y in x ] for x in weights] | 62 | #print "AW",[ [ y.shape for y in x ] for x in weights] |
| 61 | #print "WEI", len(weights) , [ len(x) for x in weights ] | 63 | #print "WEI", len(weights) , [ len(x) for x in weights ] |
| 62 | if len(ae_dropouts) == len(ae_hidden): | 64 | if len(ae_dropouts) == len(ae_hidden): |
| 63 | do = ae_dropouts[:cpt] + transfer_do + ae_dropouts[cpt:] | 65 | do = ae_dropouts[:cpt] + transfer_do + ae_dropouts[cpt:] |
| 64 | else : | 66 | else : |
| 65 | do = [ 0 ] * (len(ae_hidden) + len(transfer_hidden)) | 67 | do = [ 0 ] * (len(ae_hidden) + len(transfer_hidden)) |
| 66 | for w in weights[:-1]: | 68 | for w in weights[:-1]: |
| 67 | #print "STEP", size | 69 | #print "STEP", size |
| 68 | layers.append(Dense(w[1].shape[0],activation=input_activation,init=init,weights=w)(layers[-1])) | 70 | layers.append(Dense(w[1].shape[0],activation=input_activation,init=init,weights=w)(layers[-1])) |
| 69 | if do : | 71 | if do : |
| 70 | d = do.pop(0) | 72 | d = do.pop(0) |
| 71 | if d > 0 : | 73 | if d > 0 : |
| 72 | layers.append(Dropout(d)(layers[-1])) | 74 | layers.append(Dropout(d)(layers[-1])) |
| 73 | 75 | ||
| 74 | layers.append(Dense(y_train.shape[1],activation=output_activation)(layers[-1])) | 76 | layers.append(Dense(y_train.shape[1],activation=output_activation)(layers[-1])) |
| 75 | models = [Model(input=layers[0] , output=x) for x in layers[1:]] | 77 | models = [Model(input=layers[0] , output=x) for x in layers[1:]] |
| 76 | models[-1].compile(optimizer=sgd,loss=loss) | 78 | models[-1].compile(optimizer=sgd,loss=loss) |
| 77 | models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose) | 79 | models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose) |
| 78 | predictions = [ [x.predict(y) for y in param_predict ] for x in models ] | 80 | predictions = [ [x.predict(y) for y in param_predict ] for x in models ] |
| 79 | pred_by_level.append(predictions) | 81 | pred_by_level.append(predictions) |
| 80 | 82 | ||
| 81 | return pred_by_level | 83 | return pred_by_level |
| 82 | 84 | ||
| 85 | def train_mlp_proj(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0): | ||
| 86 | |||
| 87 | #model_tempfile=tempfile.mkstemp() | ||
| 88 | tempfold = tempfile.mkdtemp() | ||
| 89 | model_tempfile= tempfold+"/model.hdf" | ||
| 90 | |||
| 91 | layers = [Input(shape=(x_train.shape[1],))] | ||
| 92 | |||
| 93 | for h in hidden_size: | ||
| 94 | print h | ||
| 95 | if dropouts: | ||
| 96 | d = dropouts.pop(0) | ||
| 97 | if d > 0 : | ||
| 98 | ldo = Dropout(d)(layers[-1]) | ||
| 99 | print 'append' | ||
| 100 | layers.append(Dense(h,init=init,activation=input_activation)(ldo)) | ||
| 101 | else : | ||
| 102 | print " append" | ||
| 103 | layers.append(Dense(h,init=init,activation=input_activation)(layers[-1])) | ||
| 104 | |||
| 105 | |||
| 106 | if dropouts: | ||
| 107 | d = dropouts.pop(0) | ||
| 108 | if d > 0 : | ||
| 109 | ldo =Dropout(d)(layers[-1]) | ||
| 110 | print "end" | ||
| 111 | layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo)) | ||
| 112 | else: | ||
| 113 | print "end" | ||
| 114 | layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1])) | ||
| 115 | |||
| 116 | models = [] | ||
| 117 | for l in layers[1:] : | ||
| 118 | models.append(Model(layers[0] , l)) | ||
| 119 | print "nb models : ", len(models), "h :",hidden_size , "layer", len(layers) | ||
| 120 | if not sgd: | ||
| 121 | sgd = SGD(lr=0.01, decay=0, momentum=0.9) | ||
| 122 | |||
| 123 | models[-1].compile(loss=loss, optimizer=sgd,metrics=['accuracy']) | ||
| 124 | callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'), | ||
| 125 | EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi | ||
| 126 | print models[-1].summary() | ||
| 127 | hist=models[-1].fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks) | ||
| 128 | models[-1].load_weights(model_tempfile, by_name=False) | ||
| 129 | proj = [] | ||
| 130 | for layer,model in enumerate(models): | ||
| 131 | proj.append((model.predict(x_train),model.predict(x_dev),model.predict(x_test))) | ||
| 132 | |||
| 133 | shutil.rmtree(tempfold) | ||
| 134 | return models[-1].summary(),proj | ||
| 135 | |||
| 136 | |||
| 137 | |||
| 138 | |||
| 139 | |||
| 140 | def train_mlp_pred(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0): | ||
| 141 | |||
| 142 | #model_tempfile=tempfile.mkstemp() | ||
| 143 | tempfold = tempfile.mkdtemp() | ||
| 144 | model_tempfile= tempfold+"/model.hdf" | ||
| 145 | |||
| 146 | layers = [Input(shape=(x_train.shape[1],))] | ||
| 147 | |||
| 148 | for h in hidden_size: | ||
| 149 | if dropouts: | ||
| 150 | d = dropouts.pop(0) | ||
| 151 | if d > 0 : | ||
| 152 | ldo = Dropout(d)(layers[-1]) | ||
| 153 | layers.append(Dense(h,init=init,activation=input_activation)(ldo)) | ||
| 154 | else : | ||
| 155 | layers.append(Dense(h,init=init,activation=input_activation)(layers[-1])) | ||
| 156 | |||
| 157 | |||
| 158 | if dropouts: | ||
| 159 | d = dropouts.pop(0) | ||
| 160 | if d > 0 : | ||
| 161 | ldo =Dropout(d)(layers[-1]) | ||
| 162 | layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo)) | ||
| 163 | else: | ||
| 164 | layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1])) | ||
| 165 | |||
| 166 | model=Model(layers[0] , layers[-1]) | ||
| 167 | if not sgd: | ||
| 168 | sgd = SGD(lr=0.01, decay=0, momentum=0.9) | ||
| 169 | |||
| 170 | model.compile(loss=loss, optimizer=sgd,metrics=['accuracy']) | ||
| 171 | callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'), | ||
| 172 | EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi | ||
| 173 | print model.summary() | ||
| 174 | hist=model.fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks) | ||
| 175 | model.load_weights(model_tempfile, by_name=False) | ||
| 176 | pred=(model.predict(x_train),model.predict(x_dev),model.predict(x_test)) | ||
| 177 | |||
| 178 | shutil.rmtree(tempfold) | ||
| 179 | return pred,hist | ||
| 180 | |||
| 181 | |||
| 182 | |||
| 183 | |||
| 184 | |||
| 185 | |||
| 186 | |||
| 83 | def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False): | 187 | def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False): |
| 84 | 188 | ||
| 85 | layers = [Input(shape=(x_train.shape[1],))] | 189 | layers = [Input(shape=(x_train.shape[1],))] |
| 86 | 190 | ||
| 87 | for h in hidden_size: | 191 | for h in hidden_size: |
| 88 | if dropouts: | 192 | if dropouts: |
| 89 | d = dropouts.pop(0) | 193 | d = dropouts.pop(0) |
| 90 | if d > 0 : | 194 | if d > 0 : |
| 91 | layers.append(Dropout(d)(layers[-1])) | 195 | layers.append(Dropout(d)(layers[-1])) |
| 92 | 196 | ||
| 93 | layers.append(Dense(h,init=init,activation=input_activation)(layers[-1])) | 197 | layers.append(Dense(h,init=init,activation=input_activation)(layers[-1])) |
| 94 | #if dropouts: | 198 | #if dropouts: |
| 95 | # drop_prob=dropouts.pop(0) | 199 | # drop_prob=dropouts.pop(0) |
| 96 | # if drop_prob > 0: | 200 | # if drop_prob > 0: |
| 97 | # model.add(Dropout(drop_prob)) | 201 | # model.add(Dropout(drop_prob)) |
| 98 | 202 | ||
| 99 | #if dropouts: | 203 | #if dropouts: |
| 100 | # drop_prob=dropouts.pop(0) | 204 | # drop_prob=dropouts.pop(0) |
| 101 | # if drop_prob > 0: | 205 | # if drop_prob > 0: |
| 102 | # model.add(Dropout(drop_prob)) | 206 | # model.add(Dropout(drop_prob)) |
| 103 | 207 | ||
| 104 | #if dropouts: | 208 | #if dropouts: |
| 105 | # model.add(Dropout(dropouts.pop(0))) | 209 | # model.add(Dropout(dropouts.pop(0))) |
| 106 | if dropouts: | 210 | if dropouts: |
| 107 | d = dropouts.pop(0) | 211 | d = dropouts.pop(0) |
| 108 | if d > 0 : | 212 | if d > 0 : |
| 109 | layers.append(Dropout(d)(layers[-1])) | 213 | layers.append(Dropout(d)(layers[-1])) |
| 110 | 214 | print y_train[2:10] | |
| 111 | layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1])) | 215 | layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1])) |
| 112 | 216 | ||
| 113 | model = Model(layers[0] , layers[-1]) | 217 | model = Model(layers[0] , layers[-1]) |
| 114 | if not sgd: | 218 | if not sgd: |
| 115 | sgd = SGD(lr=0.01, decay=0, momentum=0.9) | 219 | sgd = SGD(lr=0.01, decay=0, momentum=0.9) |
| 116 | 220 | ||
| 117 | model.compile(loss=loss, optimizer=sgd,metrics=['accuracy']) | 221 | model.compile(loss=loss, optimizer=sgd,metrics=['accuracy']) |
| 118 | 222 | ||
| 119 | scores_dev=[] | 223 | scores_dev=[] |
| 120 | scores_test=[] | 224 | scores_test=[] |
| 121 | scores_train=[] | 225 | scores_train=[] |
| 122 | save=None | 226 | save=None |
| 123 | for i in range(epochs): | 227 | for i in range(epochs): |
| 124 | hist=model.fit(x_train, y_train, nb_epoch=1, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev)) | 228 | hist=model.fit(x_train, y_train, nb_epoch=1, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev)) |
| 125 | pred_train=model.predict(x_train) | 229 | pred_train=model.predict(x_train) |
| 126 | pred_dev=model.predict(x_dev) | 230 | pred_dev=model.predict(x_dev) |
| 127 | pred_test=model.predict(x_test) | 231 | pred_test=model.predict(x_test) |
| 128 | 232 | ||
| 129 | scores_train.append(perf(np.argmax(y_train,axis=1),np.argmax(pred_train,axis=1))) | 233 | scores_train.append(perf(np.argmax(y_train,axis=1),np.argmax(pred_train,axis=1))) |
| 130 | scores_dev.append(perf(np.argmax(y_dev,axis=1),np.argmax(pred_dev,axis=1))) | 234 | scores_dev.append(perf(np.argmax(y_dev,axis=1),np.argmax(pred_dev,axis=1))) |
| 131 | scores_test.append(perf(np.argmax(y_test,axis=1),np.argmax(pred_test,axis=1))) | 235 | scores_test.append(perf(np.argmax(y_test,axis=1),np.argmax(pred_test,axis=1))) |
| 132 | if fit_verbose : | 236 | if fit_verbose : |
| 133 | print "{} {} {} {}".format(i,scores_train[-1],scores_dev[-1],scores_test[-1]) | 237 | print "{} {} {} {}".format(i,scores_train[-1],scores_dev[-1],scores_test[-1]) |
| 134 | if save is None or (len(scores_dev)>2 and scores_dev[-1] > scores_dev[-2]): | 238 | if save is None or (len(scores_dev)>2 and scores_dev[-1] > scores_dev[-2]): |
| 135 | save=save_tuple(pred_train,pred_dev,pred_test) | 239 | save=save_tuple(pred_train,pred_dev,pred_test) |
| 136 | arg_dev = np.argmax(scores_dev) | 240 | arg_dev = np.argmax(scores_dev) |
| 137 | best_dev=scores_dev[arg_dev] | 241 | best_dev=scores_dev[arg_dev] |
| 138 | best_test=scores_test[arg_dev] | 242 | best_test=scores_test[arg_dev] |
| 139 | max_test=np.max(scores_test) | 243 | max_test=np.max(scores_test) |
| 140 | if fit_verbose: | 244 | if fit_verbose: |
| 141 | print " res : {} {} {}".format(best_dev,best_test,max_test) | 245 | print " res : {} {} {}".format(best_dev,best_test,max_test) |
| 142 | 246 | ||
| 143 | res=[scores_train,scores_dev,scores_test] | 247 | res=[scores_train,scores_dev,scores_test] |
| 144 | if save_pred: | 248 | if save_pred: |
| 145 | res.append(save) | 249 | res.append(save) |
| 146 | if keep_histo: | 250 | if keep_histo: |
| 147 | res.append(hist) | 251 | res.append(hist) |
| 148 | return res | 252 | return res |
| 149 | 253 | ||
| 150 | def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20,get_weights=False,set_weights=[]): | 254 | def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,test_verbose=0,verbose=1,patience=20,get_weights=False,set_weights=[],best_mod=False): |
| 151 | 255 | ||
| 152 | input_vect = Input(shape=(train.shape[1],)) | 256 | input_vect = Input(shape=(train.shape[1],)) |
| 153 | 257 | ||
| 154 | previous = [input_vect] | 258 | previous = [input_vect] |
| 155 | 259 | ||
| 156 | if dropouts is None: | 260 | if dropouts is None: |
| 157 | dropouts = [ 0 ] * (len(hidden_sizes) +1) | 261 | dropouts = [ 0 ] * (len(hidden_sizes) +1) |
| 158 | if sgd is None : | 262 | if sgd is None : |
| 159 | sgd = SGD(lr=0.01, decay=0, momentum=0.9) | 263 | sgd = SGD(lr=0.01, decay=0, momentum=0.9) |
| 160 | did_do = False | 264 | did_do = False |
| 161 | if dropouts : | 265 | if dropouts : |
| 162 | d = dropouts.pop(0) | 266 | d = dropouts.pop(0) |
| 163 | if d : | 267 | if d : |
| 164 | previous.append(Dropout(d)(previous[-1])) | 268 | previous.append(Dropout(d)(previous[-1])) |
| 165 | did_do = True | 269 | did_do = True |
| 166 | 270 | ||
| 167 | for h_layer,weight_layer in izip_longest(hidden_sizes,set_weights,fillvalue=None) : | 271 | for h_layer,weight_layer in izip_longest(hidden_sizes,set_weights,fillvalue=None) : |
| 168 | # ,weights=w | 272 | # ,weights=w |
| 169 | if weight_layer : | 273 | if weight_layer : |
| 170 | w = weight_layer[0] | 274 | w = weight_layer[0] |
| 171 | else : | 275 | else : |
| 172 | w = None | 276 | w = None |
| 173 | #print "ADD SIZE" , h_layer | 277 | #print "ADD SIZE" , h_layer |
| 174 | if did_do : | 278 | if did_do : |
| 175 | p = previous.pop() | 279 | p = previous.pop() |
| 176 | did_do = False | 280 | did_do = False |
| 177 | else : | 281 | else : |
| 178 | p = previous[-1] | 282 | p = previous[-1] |
| 179 | previous.append(Dense(h_layer,activation=input_activation,weights=w)(previous[-1])) | 283 | previous.append(Dense(h_layer,activation=input_activation,weights=w)(previous[-1])) |
| 180 | if dropouts: | 284 | if dropouts: |
| 181 | d = dropouts.pop(0) | 285 | d = dropouts.pop(0) |
| 182 | if d : | 286 | if d : |
| 183 | previous.append(Dropout(d)(previous[-1])) | 287 | previous.append(Dropout(d)(previous[-1])) |
| 184 | did_do = True | 288 | did_do = True |
| 185 | 289 | ||
| 186 | predict_y = True | 290 | predict_y = True |
| 187 | if y_train is None or y_dev is None or y_test is None : | 291 | if y_train is None or y_dev is None or y_test is None : |
| 188 | y_train = train | 292 | y_train = train |
| 189 | y_dev = dev | 293 | y_dev = dev |
| 190 | y_test = test | 294 | y_test = test |
| 191 | predict_y = False | 295 | predict_y = False |
| 192 | previous.append(Dense(y_train.shape[1],activation=output_activation)(previous[-1])) | 296 | previous.append(Dense(y_train.shape[1],activation=output_activation)(previous[-1])) |
| 193 | models = [Model(input=previous[0] , output=x) for x in previous[1:]] | 297 | models = [Model(input=previous[0] , output=x) for x in previous[1:]] |
| 194 | print "MLP", sgd, loss | 298 | print "MLP", sgd, loss |
| 195 | models[-1].compile(optimizer=sgd,loss=loss) | 299 | models[-1].compile(optimizer=sgd,loss=loss) |
| 196 | models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose) | 300 | cb = [EarlyStopping(monitor='val_loss', patience=patience, verbose=0)] |
| 301 | if best_mod: | ||
| 302 | tempfold = tempfile.mkdtemp() | ||
| 303 | model_tempfile= tempfold+"/model.hdf" | ||
| 304 | cb.append( ModelCheckpoint(model_tempfile, monitor='val_loss', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto') ) | ||
| 305 | |||
| 306 | models[-1].summary() | ||
| 307 | models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=cb,validation_data=(dev,dev),verbose=verbose) | ||
| 308 | if best_mod: | ||
| 309 | models[-1].load_weights(model_tempfile) | ||
| 310 | shutil.rmtree(tempfold) | ||
| 197 | param_predict = [ train, dev, test ] | 311 | param_predict = [ train, dev, test ] |
| 198 | if predict_y : | 312 | if predict_y : |
| 199 | param_predict += [ y_train, y_dev ,y_test ] | 313 | param_predict += [ y_train, y_dev ,y_test ] |
| 200 | predictions = [ [x.predict(y) for y in param_predict ] for x in models ] | 314 | predictions = [ [x.predict(y) for y in param_predict ] for x in models ] |
| 201 | if get_weights : | 315 | if get_weights : |
| 202 | weights = [ x.get_weights() for x in models[-1].layers if x.get_weights() ] | 316 | weights = [ x.get_weights() for x in models[-1].layers if x.get_weights() ] |
| 203 | return ( predictions , weights ) | 317 | return ( predictions , weights ) |
| 204 | else : | 318 | else : |
| 205 | return predictions | 319 | return predictions |
| 206 | 320 | ||
| 207 | def train_sae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20): | 321 | def train_sae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20): |
| 208 | 322 | ||
| 209 | weights = [] | 323 | weights = [] |
| 210 | predictions = [[(train,dev,test),()]] | 324 | predictions = [[(train,dev,test),()]] |
| 211 | ft_pred = [] | 325 | ft_pred = [] |
| 212 | past_sizes = [] | 326 | past_sizes = [] |
| 213 | 327 | ||
| 214 | 328 | ||
| 215 | for size in hidden_sizes : | 329 | for size in hidden_sizes : |
| 216 | #print "DO size " , size , "FROM" , hidden_sizes | 330 | #print "DO size " , size , "FROM" , hidden_sizes |
| 217 | res_pred, res_wght = train_ae(predictions[-1][-2][0], predictions[-1][-2][1],predictions[-1][-2][2],[size], | 331 | res_pred, res_wght = train_ae(predictions[-1][-2][0], predictions[-1][-2][1],predictions[-1][-2][2],[size], |
| 218 | dropouts=dropouts, input_activation=input_activation, | 332 | dropouts=dropouts, input_activation=input_activation, |
| 219 | output_activation=output_activation, loss=loss, sgd=sgd, | 333 | output_activation=output_activation, loss=loss, sgd=sgd, |
| 220 | epochs=epochs, batch_size=batch_size, verbose=verbose, | 334 | epochs=epochs, batch_size=batch_size, verbose=verbose, |
| 221 | patience=patience,get_weights=True) | 335 | patience=patience,get_weights=True) |
| 222 | past_sizes.append(size) | 336 | past_sizes.append(size) |
| 223 | weights.append(res_wght) | 337 | weights.append(res_wght) |
| 224 | predictions.append(res_pred) | 338 | predictions.append(res_pred) |
| 225 | #print "FINE TUNE " | 339 | #print "FINE TUNE " |
| 226 | res_ftpred = train_ae(train,dev,test,past_sizes,y_train=y_train,y_dev=y_dev,y_test=y_test, | 340 | res_ftpred = train_ae(train,dev,test,past_sizes,y_train=y_train,y_dev=y_dev,y_test=y_test, |
| 227 | dropouts=dropouts, | 341 | dropouts=dropouts, |
| 228 | input_activation=input_activation, | 342 | input_activation=input_activation, |
| 229 | output_activation=output_activation, | 343 | output_activation=output_activation, |
| 230 | loss=loss,sgd=sgd,epochs=epochs, | 344 | loss=loss,sgd=sgd,epochs=epochs, |
| 231 | batch_size=batch_size,verbose=verbose,patience=patience, | 345 | batch_size=batch_size,verbose=verbose,patience=patience, |
| 232 | set_weights=weights) | 346 | set_weights=weights) |
| 233 | ft_pred.append(res_ftpred) | 347 | ft_pred.append(res_ftpred) |
| 234 | 348 | ||
| 235 | return ( predictions[1:] , ft_pred) | 349 | return ( predictions[1:] , ft_pred) |
| 236 | 350 | ||
| 237 | 351 | ||
| 238 | 352 |