mlp.py 14.3 KB
# -*- coding: utf-8 -*-
import keras
import numpy as np
#from keras.layers.core import Dense, Dropout, Activation 
from keras.optimizers import SGD,Adam
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout
from keras.models import Model
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils.layer_utils import layer_from_config
from itertools import izip_longest
import tempfile
import shutil
import pandas 
from collections import namedtuple
from sklearn.metrics import accuracy_score as perf
save_tuple = namedtuple("save_tuple",["pred_train","pred_dev","pred_test"])


def ft_dsae(train,dev,test,
        y_train=None,y_dev=None,y_test=None,
        ae_hidden=[20],transfer_hidden=[20],
        start_weights=None,transfer_weights=None,end_weights=None,
        input_activation="tanh", output_activation="tanh",
        init="glorot_uniform",
        ae_dropouts=[None], transfer_do=[None],
        sgd="sgd", loss="mse", patience=5, verbose=0, epochs=5, batch_size=8):

    if not start_weights :
        start_weights = [ None ] * len(ae_hidden)
    if not transfer_weights :
        transfer_weights = [None ] * len(transfer_hidden)
    if not end_weights :
        end_weights = [ None ] * len(end_weights)
    if not transfer_do :
        transfer_do = [0] * len(transfer_hidden) 
    predict_y = True
    if  y_train is None or y_dev is None or y_test is None :
        y_train = train
        y_dev = dev
        y_test = test
        predict_y = False
    param_predict = [ train, dev, test ]
    if predict_y :
        param_predict += [ y_train, y_dev ,y_test ]

    pred_by_level = [] # Contient les prediction par niveaux de transfert 
    layers = [Input(shape=(train.shape[1],))]
    #for w in transfer_weights:
        #print "TW",[ [ y.shape for y in x ]  for x in w] 
    #print "SW",[ [ y.shape for y in x] for x in start_weights]
    #print "EW",[ [ y.shape for y in x ]  for x in end_weights] 
    for cpt in range(1,len(ae_hidden)):
        #print ae_hidden,cpt
        #print cpt, "before" 
        #print "before2", [ [ x.shape for x in y] for y in start_weights[:cpt] ]
        #print "before3", [ [ x.shape for x in y] for y in transfer_weights[cpt]]
        #print "before4", [ [ x.shape for x in y] for y in end_weights[cpt:]]
        sizes = ae_hidden[:cpt] + transfer_hidden + ae_hidden[cpt:]
        weights =  start_weights[:cpt] + transfer_weights[(cpt-1)] + end_weights[cpt:]
        #print "SIZES", sizes
        #print "AW",[ [ y.shape for y in x ]  for x in weights] 
        #print "WEI", len(weights) , [ len(x) for x in weights ]
        if len(ae_dropouts) == len(ae_hidden):
                do = ae_dropouts[:cpt] + transfer_do + ae_dropouts[cpt:]
        else : 
                do = [ 0 ] * (len(ae_hidden) + len(transfer_hidden))
        for w in weights[:-1]:
            #print "STEP", size
            layers.append(Dense(w[1].shape[0],activation=input_activation,init=init,weights=w)(layers[-1]))
            if do :
                d = do.pop(0)
                if d > 0 : 
                    layers.append(Dropout(d)(layers[-1]))
               
        layers.append(Dense(y_train.shape[1],activation=output_activation)(layers[-1]))
        models = [Model(input=layers[0] , output=x) for x in layers[1:]]
        models[-1].compile(optimizer=sgd,loss=loss)
        models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose)
        predictions = [ [x.predict(y) for y in param_predict  ] for x in models ]
        pred_by_level.append(predictions)
  
    return pred_by_level

def train_mlp_proj(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0):

    #model_tempfile=tempfile.mkstemp()
    tempfold = tempfile.mkdtemp()
    model_tempfile= tempfold+"/model.hdf"
    
    layers = [Input(shape=(x_train.shape[1],))]

    for h in hidden_size:
        print h
        if dropouts:   
            d = dropouts.pop(0)
            if d > 0 :
                ldo = Dropout(d)(layers[-1])
                print 'append'
                layers.append(Dense(h,init=init,activation=input_activation)(ldo))
        else :
            print " append" 
            layers.append(Dense(h,init=init,activation=input_activation)(layers[-1]))


    if dropouts:   
        d = dropouts.pop(0)
        if d > 0 :
            ldo =Dropout(d)(layers[-1])
            print "end"
            layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo))
    else: 
        print "end"
        layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1]))
    
    models = []
    for l in layers[1:] :
        models.append(Model(layers[0] , l))
    print "nb models : ", len(models), "h :",hidden_size , "layer", len(layers)
    if not sgd:
        sgd = SGD(lr=0.01, decay=0, momentum=0.9)

    models[-1].compile(loss=loss, optimizer=sgd,metrics=['accuracy'])
    callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'),
                 EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi
    print models[-1].summary()
    hist=models[-1].fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks)
    models[-1].load_weights(model_tempfile, by_name=False)
    proj = []
    for layer,model in enumerate(models):
        proj.append((model.predict(x_train),model.predict(x_dev),model.predict(x_test)))

    shutil.rmtree(tempfold)
    return models[-1].summary(),proj





def train_mlp_pred(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0):

    #model_tempfile=tempfile.mkstemp()
    tempfold = tempfile.mkdtemp()
    model_tempfile= tempfold+"/model.hdf"
    
    layers = [Input(shape=(x_train.shape[1],))]

    for h in hidden_size:
        if dropouts:   
            d = dropouts.pop(0)
            if d > 0 :
                ldo = Dropout(d)(layers[-1])
                layers.append(Dense(h,init=init,activation=input_activation)(ldo))
        else :
            layers.append(Dense(h,init=init,activation=input_activation)(layers[-1]))


    if dropouts:   
        d = dropouts.pop(0)
        if d > 0 :
            ldo =Dropout(d)(layers[-1])
            layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo))
    else: 
        layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1]))
    
    model=Model(layers[0] , layers[-1])
    if not sgd:
        sgd = SGD(lr=0.01, decay=0, momentum=0.9)

    model.compile(loss=loss, optimizer=sgd,metrics=['accuracy'])
    callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'),
                 EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi
    print model.summary()
    hist=model.fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks)
    model.load_weights(model_tempfile, by_name=False)
    pred=(model.predict(x_train),model.predict(x_dev),model.predict(x_test))

    shutil.rmtree(tempfold)
    return pred,hist







def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False):

    layers = [Input(shape=(x_train.shape[1],))]

    for h in hidden_size:
        if dropouts:   
            d = dropouts.pop(0)
            if d > 0 :
                layers.append(Dropout(d)(layers[-1]))

        layers.append(Dense(h,init=init,activation=input_activation)(layers[-1]))
            #if dropouts:
            #    drop_prob=dropouts.pop(0)
            #    if drop_prob > 0:
            #        model.add(Dropout(drop_prob))

        #if dropouts:
        #    drop_prob=dropouts.pop(0)
        #    if drop_prob > 0:
        #        model.add(Dropout(drop_prob))

        #if dropouts:
        #    model.add(Dropout(dropouts.pop(0)))
    if dropouts:   
        d = dropouts.pop(0)
        if d > 0 :
            layers.append(Dropout(d)(layers[-1]))
    print y_train[2:10]
    layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1]))

    model =  Model(layers[0] , layers[-1])
    if not sgd:
        sgd = SGD(lr=0.01, decay=0, momentum=0.9)

    model.compile(loss=loss, optimizer=sgd,metrics=['accuracy'])

    scores_dev=[]
    scores_test=[]
    scores_train=[]
    save=None
    for i in range(epochs):
        hist=model.fit(x_train, y_train, nb_epoch=1, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev))
        pred_train=model.predict(x_train)
        pred_dev=model.predict(x_dev)
        pred_test=model.predict(x_test)

        scores_train.append(perf(np.argmax(y_train,axis=1),np.argmax(pred_train,axis=1)))
        scores_dev.append(perf(np.argmax(y_dev,axis=1),np.argmax(pred_dev,axis=1)))
        scores_test.append(perf(np.argmax(y_test,axis=1),np.argmax(pred_test,axis=1)))
        if fit_verbose :
            print "{} {} {} {}".format(i,scores_train[-1],scores_dev[-1],scores_test[-1])
        if save is None or (len(scores_dev)>2 and scores_dev[-1] > scores_dev[-2]):
            save=save_tuple(pred_train,pred_dev,pred_test)
    arg_dev = np.argmax(scores_dev)
    best_dev=scores_dev[arg_dev]
    best_test=scores_test[arg_dev]
    max_test=np.max(scores_test)
    if fit_verbose:
        print " res : {} {} {}".format(best_dev,best_test,max_test)

    res=[scores_train,scores_dev,scores_test]
    if save_pred:
        res.append(save)
    if keep_histo:
        res.append(hist)
    return res

def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,test_verbose=0,verbose=1,patience=20,get_weights=False,set_weights=[],best_mod=False):
     
    input_vect = Input(shape=(train.shape[1],))

    previous = [input_vect]

    if dropouts is None:
        dropouts = [ 0 ] * (len(hidden_sizes) +1)
    if sgd is None : 
        sgd = SGD(lr=0.01, decay=0, momentum=0.9)
    did_do = False
    if dropouts :
        d = dropouts.pop(0)
        if d :
            previous.append(Dropout(d)(previous[-1]))
            did_do = True

    for h_layer,weight_layer in izip_longest(hidden_sizes,set_weights,fillvalue=None) :
        # ,weights=w
        if weight_layer :
            w = weight_layer[0] 
        else :
            w = None
        #print "ADD SIZE" , h_layer
        if did_do : 
            p = previous.pop()
            did_do = False
        else :
            p = previous[-1]
        previous.append(Dense(h_layer,activation=input_activation,weights=w)(previous[-1]))
        if dropouts:
            d = dropouts.pop(0)
            if d :
                previous.append(Dropout(d)(previous[-1]))
                did_do = True

    predict_y = True
    if y_train is None or  y_dev is None or y_test is None :
        y_train = train
        y_dev = dev
        y_test = test
        predict_y = False
    previous.append(Dense(y_train.shape[1],activation=output_activation)(previous[-1]))
    models = [Model(input=previous[0] , output=x) for x in previous[1:]]
    print "MLP", sgd, loss
    models[-1].compile(optimizer=sgd,loss=loss)
    cb = [EarlyStopping(monitor='val_loss', patience=patience, verbose=0)]
    if best_mod:
        tempfold = tempfile.mkdtemp()
        model_tempfile= tempfold+"/model.hdf"
        cb.append( ModelCheckpoint(model_tempfile, monitor='val_loss', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto') )

    models[-1].summary()
    models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=cb,validation_data=(dev,dev),verbose=verbose)
    if best_mod:
        models[-1].load_weights(model_tempfile)
        shutil.rmtree(tempfold)
    param_predict = [ train, dev, test ]
    if predict_y :
        param_predict += [ y_train, y_dev ,y_test ]
    predictions = [ [x.predict(y) for y in param_predict  ] for x in models ]
    if get_weights : 
        weights = [ x.get_weights()  for x in models[-1].layers if x.get_weights() ]
        return ( predictions , weights )
    else :
        return predictions

def train_sae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20):

    weights = []
    predictions = [[(train,dev,test),()]]
    ft_pred = []
    past_sizes = []


    for size in hidden_sizes :
        #print "DO size " , size , "FROM" , hidden_sizes
        res_pred, res_wght = train_ae(predictions[-1][-2][0], predictions[-1][-2][1],predictions[-1][-2][2],[size],
                                      dropouts=dropouts, input_activation=input_activation,
                                      output_activation=output_activation, loss=loss, sgd=sgd,
                                      epochs=epochs, batch_size=batch_size, verbose=verbose,
                                      patience=patience,get_weights=True)
        past_sizes.append(size)
        weights.append(res_wght)
        predictions.append(res_pred)
        #print "FINE TUNE "
        res_ftpred = train_ae(train,dev,test,past_sizes,y_train=y_train,y_dev=y_dev,y_test=y_test,
                              dropouts=dropouts,
                              input_activation=input_activation,
                              output_activation=output_activation,
                              loss=loss,sgd=sgd,epochs=epochs,
                              batch_size=batch_size,verbose=verbose,patience=patience,
                              set_weights=weights)
        ft_pred.append(res_ftpred)

    return ( predictions[1:] , ft_pred)