Killian / decodopr

Blame view

LDA/04d-mmf_dsae.py 9.16 KB
  
  # coding: utf-8
  
  # In[2]:
  
  # Import
  import gensim
  from scipy import sparse
  import itertools
  from sklearn import preprocessing
  from keras.models import Sequential
  from keras.optimizers import SGD,Adam
  from mlp import *
  import mlp
  import sklearn.metrics
  import shelve
  import pickle
  from utils import *
  import sys
  import os
  import json
  # In[4]:
  
  infer_model=shelve.open("{}".format(sys.argv[2]))
  in_dir = sys.argv[1]
  #['ASR', 'TRS', 'LABEL']
  # In[6]:
  
  # AE params 
  hidden_size=[ 100, 100  ]
  input_activation="relu"
  output_activation="relu"
  loss="mse"
  epochs= 1000
  batch_size=1
  patience=20
  do_do=[ 0.25 ] * len(hidden_size)
  sgd = Adam(lr=0.00001)#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  try :
      sgd_repr=sgd.get_config()["name"]
  except AttributeError :
      sgd_repr=sgd
  
  # Transforme :
  trans_hidden_size=[ 300 , 300 ]
  trans_input_activation="relu"
  trans_output_activation="relu"
  trans_loss="mse"
  trans_epochs=1000
  trans_batch_size=8
  trans_patience=20
  trans_do=[ 0.25 ] * len(trans_hidden_size)
  trans_sgd = Adam(lr=0.0001)#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  try :
      trans_sgd_repr=trans_sgd.get_config()["name"]
  except AttributeError :
      trans_sgd_repr=trans_sgd
  
  
  
  ae={ "h1" : "_".join([str(x) for x in hidden_size]),
  	"inside_activation" : input_activation,
  	"out_activation" : output_activation,
          "do_dropout": "_".join([str(x) for x in do_do]),
  	"loss" : loss,
  	"epochs" : epochs ,
  	"batch_size" : batch_size,
  	"patience" : patience,
          "sgd" : sgd_repr}
  name = "_".join([ str(x) for x in ae.values()])
  
  trans={ "h1" : "_".join([str(x) for x in trans_hidden_size]),
  	"inside_activation" : trans_input_activation,
  	"out_activation" : trans_output_activation,
          "do_dropout": "_".join([str(x) for x in trans_do]),
  	"loss" : trans_loss,
  	"epochs" : trans_epochs ,
  	"batch_size" : trans_batch_size,
  	"patience" : trans_patience,
          "sgd" : trans_sgd_repr}
  
  mlp_h = [ 300 , 300 ]
  mlp_loss ="categorical_crossentropy"
  mlp_dropouts = [0,0,0,0]
  mlp_sgd = Adam(0.0001)
  mlp_epochs = 1000
  mlp_batch_size = 8
  mlp_input_activation = "relu"
  mlp_output_activation = "softmax"
  
  try :
      mlp_sgd_repr=mlp_sgd.get_config()["name"]
  except AttributeError :
      mlp_sgd_repr=mlp_sgd
  
  
  
  mlp={ "h1" : "_".join([str(x) for x in mlp_h ]),
  	"inside_activation" : mlp_input_activation,
  	"out_activation" : mlp_output_activation,
          "do_dropout": "_".join([str(x) for x in mlp_dropouts]),
  	"loss" : mlp_loss,
  	"epochs" : mlp_epochs ,
  	"batch_size" : mlp_batch_size,
          "sgd" : mlp_sgd_repr}
  
  params = { "ae":ae, "trans":trans, "mlp":mlp}
  try:
      os.mkdir("{}/DSAE_{}".format(in_dir,name))
  except:
      pass
  db = shelve.open("{}/DSAE_{}/ae_model.shelve".format(in_dir,name),writeback=True)
  #
  json.dump(params,
  	open("{}/DSAE_{}/ae_model.json".format(in_dir,name),"w"),
  	indent=4)
  
  keys = ["ASR","TRS"]
  
  
  
  db["DSAE"] = {}
  
  db["DSAEFT"] = {}
  mod = "ASR"
  res_tuple_ASR = train_ae(infer_model["LDA"][mod]["TRAIN"],
                           infer_model["LDA"][mod]["DEV"],
                           infer_model["LDA"][mod]["TEST"],
                           hidden_size,dropouts=do_do,
                           patience = patience,sgd=sgd,
                           input_activation=input_activation,
                           output_activation=output_activation,loss=loss,epochs=epochs,
                           batch_size=batch_size,verbose=0,get_weights=True)
  mlp_res_list = []
  for layer in res_tuple_ASR[0]: 
      mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                    layer[1],infer_model["LABEL"][mod]["DEV"],
                                    layer[2],infer_model["LABEL"][mod]["TEST"],
                                    mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
                                    sgd=mlp_sgd,epochs=mlp_epochs,
                                    output_activation=mlp_output_activation,
                                    input_activation=mlp_input_activation,
                                    batch_size=mlp_batch_size,fit_verbose=0))
  
  db["DSAE"][mod] = mlp_res_list
  mod = "TRS"
  print hidden_size
  res_tuple_TRS = train_ae(infer_model["LDA"][mod]["TRAIN"],
                           infer_model["LDA"][mod]["DEV"],
                           infer_model["LDA"][mod]["TEST"],
                           hidden_size,dropouts=do_do,
                           sgd=sgd,input_activation=input_activation,
                           output_activation=output_activation,loss=loss,epochs=epochs,
                           batch_size=batch_size,patience=patience,
                           verbose=0,get_weights=True)
  
  mlp_res_list = []
  for layer in res_tuple_TRS[0]: 
      mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                    layer[1],infer_model["LABEL"][mod]["DEV"],
                                    layer[2],infer_model["LABEL"][mod]["TEST"],
                                    mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
                                    sgd=mlp_sgd,epochs=mlp_epochs,
                                    output_activation=mlp_output_activation,
                                    input_activation=mlp_input_activation,
                                    batch_size=mlp_batch_size,fit_verbose=0))
  
  db["DSAE"][mod] = mlp_res_list
  
  
  
  transfert = []
  
  print " get weight trans" 
  
  for asr_pred, trs_pred in zip(res_tuple_ASR[0], res_tuple_TRS[0]):
      print "ASR", [ x.shape for x in asr_pred]
  
      print "TRS", [ x.shape for x in trs_pred]
      print
  
  for asr_pred, trs_pred in zip(res_tuple_ASR[0], res_tuple_TRS[0]):
      print "ASR", [ x.shape for x in asr_pred]
  
      print "TRS", [ x.shape for x in trs_pred]
      transfert.append( train_ae(asr_pred[0],
                                 asr_pred[1],
                                 asr_pred[2],
                                 trans_hidden_size,
                                 dropouts=trans_do,
                                 y_train = trs_pred[0],
                                 y_dev=trs_pred[1],
                                 y_test = trs_pred[2],
                                 patience = trans_patience,sgd=trans_sgd,
                                 input_activation=trans_input_activation,
                                 output_activation=trans_output_activation,
                                 loss=trans_loss,
                                 epochs=trans_epochs,
                                 batch_size=trans_batch_size,verbose=0,get_weights=True) )
  mod = "ASR"
  mlp_res_bylvl = []
  print " MLP on transfert "
  for level, w  in transfert  :
      mlp_res_list = []
      for layer in level :
          mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                        layer[1],infer_model["LABEL"][mod]["DEV"],
                                        layer[2],infer_model["LABEL"][mod]["TEST"],
                                        mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
                                        sgd=mlp_sgd,epochs=mlp_epochs,
                                        output_activation=mlp_output_activation,
                                        input_activation=mlp_input_activation,
                                        batch_size=mlp_batch_size,fit_verbose=0))
      mlp_res_bylvl.append(mlp_res_list)
  db["DSAE"]["transfert"] = mlp_res_bylvl
  
  
  print " FT " 
  WA = res_tuple_ASR[1]
  print "WA", len(WA), [ len(x) for x in WA]
  WT = res_tuple_TRS[1]
  
  print "WT", len(WT), [ len(x) for x in WT]
  Wtr = [ x[1] for x in transfert]
  
  print "Wtr", len(Wtr), [ len(x) for x in Wtr],[ len(x[1]) for x in Wtr]
  
  ft_res = ft_dsae(infer_model["LDA"]["ASR"]["TRAIN"],
                   infer_model["LDA"]["ASR"]["DEV"],
                   infer_model["LDA"]["ASR"]["TEST"],
                   y_train=infer_model["LDA"]["TRS"]["TRAIN"],
                   y_dev=infer_model["LDA"]["TRS"]["DEV"],
                   y_test=infer_model["LDA"]["TRS"]["TEST"],
                   ae_hidden = hidden_size,
                   transfer_hidden = trans_hidden_size,
                   start_weights = WA,
                   transfer_weights = Wtr,
                   end_weights = WT,
                   input_activation = input_activation,
                   output_activation = output_activation,
                   ae_dropouts= do_do,
                   transfer_do = trans_do,
                   sgd =  sgd,
                   loss = loss ,
                   patience = patience,
                   batch_size = batch_size,
                   epochs= epochs)
  mlps_by_lvls= []
  for level  in ft_res  :
      mlp_res_list = []
      for layer in level :
          mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                        layer[1],infer_model["LABEL"][mod]["DEV"],
                                        layer[2],infer_model["LABEL"][mod]["TEST"],
                                        mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
                                        sgd=mlp_sgd,epochs=mlp_epochs,
                                        output_activation=mlp_output_activation,
                                        input_activation=mlp_input_activation,
                                        batch_size=mlp_batch_size,fit_verbose=0))
      mlps_by_lvls.append(mlp_res_list)
  
  
  db["DSAEFT"]["transfert"] = mlps_by_lvls
  
  db.close()