mlp.py 9.78 KB
# -*- coding: utf-8 -*-
import keras
import numpy as np
#from keras.layers.core import Dense, Dropout, Activation 
from keras.optimizers import SGD,Adam
from keras.models import Sequential
from keras.layers import Input, Dense, Dropout
from keras.models import Model
from keras.utils.layer_utils import layer_from_config
from itertools import izip_longest

import pandas 
from collections import namedtuple
from sklearn.metrics import accuracy_score as perf
save_tuple= namedtuple("save_tuple",["pred_train","pred_dev","pred_test"])


def ft_dsae(train,dev,test,
        y_train=None,y_dev=None,y_test=None,
        ae_hidden=[20],transfer_hidden=[20],
        start_weights=None,transfer_weights=None,end_weights=None,
        input_activation="tanh", output_activation="tanh",
        init="glorot_uniform",
        ae_dropouts=[None], transfer_do=[None],
        sgd="sgd", loss="mse", patience=5, verbose=0, epochs=5, batch_size=8):

    if not start_weights :
        start_weights = [ None ] * len(ae_hidden)
    if not transfer_weights :
        transfer_weights = [None ] * len(transfer_hidden)
    if not end_weights :
        end_weights = [ None ] * len(end_weights)
    if not transfer_do :
        transfer_do = [0] * len(transfer_hidden) 
    predict_y = True
    if  y_train is None or y_dev is None or y_test is None :
        y_train = train
        y_dev = dev
        y_test = test
        predict_y = False
    param_predict = [ train, dev, test ]
    if predict_y :
        param_predict += [ y_train, y_dev ,y_test ]

    pred_by_level = [] # Contient les prediction par niveaux de transfert 
    layers = [Input(shape=(train.shape[1],))]
    #for w in transfer_weights:
        #print "TW",[ [ y.shape for y in x ]  for x in w] 
    #print "SW",[ [ y.shape for y in x] for x in start_weights]
    #print "EW",[ [ y.shape for y in x ]  for x in end_weights] 
    for cpt in range(1,len(ae_hidden)):
        #print ae_hidden,cpt
        #print cpt, "before" 
        #print "before2", [ [ x.shape for x in y] for y in start_weights[:cpt] ]
        #print "before3", [ [ x.shape for x in y] for y in transfer_weights[cpt]]
        #print "before4", [ [ x.shape for x in y] for y in end_weights[cpt:]]
        sizes = ae_hidden[:cpt] + transfer_hidden + ae_hidden[cpt:]
        weights =  start_weights[:cpt] + transfer_weights[(cpt-1)] + end_weights[cpt:]
        #print "SIZES", sizes
        #print "AW",[ [ y.shape for y in x ]  for x in weights] 
        #print "WEI", len(weights) , [ len(x) for x in weights ]
        if len(ae_dropouts) == len(ae_hidden):
                do = ae_dropouts[:cpt] + transfer_do + ae_dropouts[cpt:]
        else : 
                do = [ 0 ] * (len(ae_hidden) + len(transfer_hidden))
        for w in weights[:-1]:
            #print "STEP", size
            layers.append(Dense(w[1].shape[0],activation=input_activation,init=init,weights=w)(layers[-1]))
            if do :
                d = do.pop(0)
                if d > 0 : 
                    layers.append(Dropout(d)(layers[-1]))
               
        layers.append(Dense(y_train.shape[1],activation=output_activation)(layers[-1]))
        models = [Model(input=layers[0] , output=x) for x in layers[1:]]
        models[-1].compile(optimizer=sgd,loss=loss)
        models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose)
        predictions = [ [x.predict(y) for y in param_predict  ] for x in models ]
        pred_by_level.append(predictions)
  
    return pred_by_level

def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False):

    layers = [Input(shape=(x_train.shape[1],))]

    for h in hidden_size:
        if dropouts:   
            d = dropouts.pop(0)
            if d > 0 :
                layers.append(Dropout(d)(layers[-1]))

        layers.append(Dense(h,init=init,activation=input_activation)(layers[-1]))
            #if dropouts:
            #    drop_prob=dropouts.pop(0)
            #    if drop_prob > 0:
            #        model.add(Dropout(drop_prob))

        #if dropouts:
        #    drop_prob=dropouts.pop(0)
        #    if drop_prob > 0:
        #        model.add(Dropout(drop_prob))

        #if dropouts:
        #    model.add(Dropout(dropouts.pop(0)))
    if dropouts:   
        d = dropouts.pop(0)
        if d > 0 :
            layers.append(Dropout(d)(layers[-1]))

    layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1]))

    model =  Model(layers[0] , layers[-1])
    if not sgd:
        sgd = SGD(lr=0.01, decay=0, momentum=0.9)

    model.compile(loss=loss, optimizer=sgd,metrics=['accuracy'])

    scores_dev=[]
    scores_test=[]
    scores_train=[]
    save=None
    for i in range(epochs):
        hist=model.fit(x_train, y_train, nb_epoch=1, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev))
        pred_train=model.predict(x_train)
        pred_dev=model.predict(x_dev)
        pred_test=model.predict(x_test)

        scores_train.append(perf(np.argmax(y_train,axis=1),np.argmax(pred_train,axis=1)))
        scores_dev.append(perf(np.argmax(y_dev,axis=1),np.argmax(pred_dev,axis=1)))
        scores_test.append(perf(np.argmax(y_test,axis=1),np.argmax(pred_test,axis=1)))
        if fit_verbose :
            print "{} {} {} {}".format(i,scores_train[-1],scores_dev[-1],scores_test[-1])
        if save is None or (len(scores_dev)>2 and scores_dev[-1] > scores_dev[-2]):
            save=save_tuple(pred_train,pred_dev,pred_test)
    arg_dev = np.argmax(scores_dev)
    best_dev=scores_dev[arg_dev]
    best_test=scores_test[arg_dev]
    max_test=np.max(scores_test)
    if fit_verbose:
        print " res : {} {} {}".format(best_dev,best_test,max_test)

    res=[scores_train,scores_dev,scores_test]
    if save_pred:
        res.append(save)
    if keep_histo:
        res.append(hist)
    return res

def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20,get_weights=False,set_weights=[]):
     
    input_vect = Input(shape=(train.shape[1],))

    previous = [input_vect]

    if dropouts is None:
        dropouts = [ 0 ] * (len(hidden_sizes) +1)
    if sgd is None : 
        sgd = SGD(lr=0.01, decay=0, momentum=0.9)
    did_do = False
    if dropouts :
        d = dropouts.pop(0)
        if d :
            previous.append(Dropout(d)(previous[-1]))
            did_do = True

    for h_layer,weight_layer in izip_longest(hidden_sizes,set_weights,fillvalue=None) :
        # ,weights=w
        if weight_layer :
            w = weight_layer[0] 
        else :
            w = None
        #print "ADD SIZE" , h_layer
        if did_do : 
            p = previous.pop()
            did_do = False
        else :
            p = previous[-1]
        previous.append(Dense(h_layer,activation=input_activation,weights=w)(previous[-1]))
        if dropouts:
            d = dropouts.pop(0)
            if d :
                previous.append(Dropout(d)(previous[-1]))
                did_do = True

    predict_y = True
    if y_train is None or  y_dev is None or y_test is None :
        y_train = train
        y_dev = dev
        y_test = test
        predict_y = False
    previous.append(Dense(y_train.shape[1],activation=output_activation)(previous[-1]))
    models = [Model(input=previous[0] , output=x) for x in previous[1:]]
    print "MLP", sgd, loss
    models[-1].compile(optimizer=sgd,loss=loss)
    models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose)
    param_predict = [ train, dev, test ]
    if predict_y :
        param_predict += [ y_train, y_dev ,y_test ]
    predictions = [ [x.predict(y) for y in param_predict  ] for x in models ]
    if get_weights : 
        weights = [ x.get_weights()  for x in models[-1].layers if x.get_weights() ]
        return ( predictions , weights )
    else :
        return predictions

def train_sae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20):

    weights = []
    predictions = [[(train,dev,test),()]]
    ft_pred = []
    past_sizes = []


    for size in hidden_sizes :
        #print "DO size " , size , "FROM" , hidden_sizes
        res_pred, res_wght = train_ae(predictions[-1][-2][0], predictions[-1][-2][1],predictions[-1][-2][2],[size],
                                      dropouts=dropouts, input_activation=input_activation,
                                      output_activation=output_activation, loss=loss, sgd=sgd,
                                      epochs=epochs, batch_size=batch_size, verbose=verbose,
                                      patience=patience,get_weights=True)
        past_sizes.append(size)
        weights.append(res_wght)
        predictions.append(res_pred)
        #print "FINE TUNE "
        res_ftpred = train_ae(train,dev,test,past_sizes,y_train=y_train,y_dev=y_dev,y_test=y_test,
                              dropouts=dropouts,
                              input_activation=input_activation,
                              output_activation=output_activation,
                              loss=loss,sgd=sgd,epochs=epochs,
                              batch_size=batch_size,verbose=verbose,patience=patience,
                              set_weights=weights)
        ft_pred.append(res_ftpred)

    return ( predictions[1:] , ft_pred)