04d-mmf_dsae.py 9.11 KB
# coding: utf-8

# In[2]:

# Import
import gensim
from scipy import sparse
import itertools
from sklearn import preprocessing
from keras.models import Sequential
from keras.optimizers import SGD,Adam
from mlp import *
import mlp
import sklearn.metrics
import shelve
import pickle

from utils import *
import sys
import os
import json
# In[4]:

infer_model=shelve.open("{}".format(sys.argv[2]))
in_dir = sys.argv[1]
#['ASR', 'TRS', 'LABEL']
# In[6]:
if len(sys.argv) > 4 :
    features_key = sys.argv[4]
else :
    features_key = "LDA"

json_conf =json.load(open(sys.argv[3]))

dsae_conf = json_conf["dsae"]

hidden_size= dsae_conf["hidden_size"]
input_activation=dsae_conf["input_activation"]
output_activation=dsae_conf["output_activation"]
loss=dsae_conf["loss"]
epochs=dsae_conf["epochs"]
batch_size=dsae_conf["batch"]
patience=dsae_conf["patience"]
do_do=dsae_conf["do"]
try:
    k = dsae_conf["sgd"]
    if dsae_conf["sgd"]["name"] == "adam":
        sgd = Adam(lr=dsae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
    elif dsae_conf["sgd"]["name"] == "sgd":
        sgd = SGD(lr=dsae_conf["sgd"]["lr"])
except: 
    sgd = dsae_conf["sgd"]

trans_conf = json_conf["dsae"]["transform"]
trans_hidden_size=trans_conf["hidden_size"]
trans_input_activation=trans_conf["input_activation"]
trans_output_activation=trans_conf["output_activation"]
trans_loss=trans_conf["loss"]
trans_epochs=trans_conf["epochs"]
trans_batch_size=trans_conf["batch"]
trans_patience=trans_conf["patience"]
trans_do=trans_conf["do"]
try:
    k = trans_conf["sgd"]
    if trans_conf["sgd"]["name"] == "adam":
        trans_sgd = Adam(lr=trans_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
    elif trans_conf["sgd"]["name"] == "sgd":
        trans_sgd = SGD(lr=trans_conf["sgd"]["lr"])
except e : 
    trans_sgd = trans_conf["sgd"]


mlp_conf = json_conf["mlp"]
mlp_h = mlp_conf["hidden_size"]
mlp_loss = mlp_conf["loss"]
mlp_dropouts = mlp_conf["do"]
mlp_epochs = mlp_conf["epochs"]
mlp_batch_size = mlp_conf["batch"]
mlp_input_activation=mlp_conf["input_activation"]
mlp_output_activation=mlp_conf["output_activation"]
try:
    k = mlp_conf["sgd"]
    if mlp_conf["sgd"]["name"] == "adam":
        mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
    elif mlp_conf["sgd"]["name"] == "sgd":
        mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"])
except: 
    mlp_sgd = mlp_conf["sgd"]


name = json_conf["name"]
try:
    os.mkdir("{}/{}".format(in_dir,name))
except:
    pass
db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
#

keys = ["ASR","TRS"]



db["DSAE"] = {}

db["DSAEFT"] = {}
mod = "ASR"
res_tuple_ASR = train_ae(infer_model[features_key][mod]["TRAIN"],
                         infer_model[features_key][mod]["DEV"],
                         infer_model[features_key][mod]["TEST"],
                         hidden_size,dropouts=do_do,
                         patience = patience,sgd=sgd,
                         input_activation=input_activation,
                         output_activation=output_activation,loss=loss,epochs=epochs,
                         batch_size=batch_size,verbose=0,get_weights=True)
mlp_res_list = []
for layer in res_tuple_ASR[0]: 
    mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                  layer[1],infer_model["LABEL"][mod]["DEV"],
                                  layer[2],infer_model["LABEL"][mod]["TEST"],
                                  mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
                                  sgd=mlp_sgd,epochs=mlp_epochs,
                                  output_activation=mlp_output_activation,
                                  input_activation=mlp_input_activation,
                                  batch_size=mlp_batch_size,fit_verbose=0))

db["DSAE"][mod] = mlp_res_list
mod = "TRS"
res_tuple_TRS = train_ae(infer_model[features_key][mod]["TRAIN"],
                         infer_model[features_key][mod]["DEV"],
                         infer_model[features_key][mod]["TEST"],
                         hidden_size,dropouts=do_do,
                         sgd=sgd,input_activation=input_activation,
                         output_activation=output_activation,loss=loss,epochs=epochs,
                         batch_size=batch_size,patience=patience,
                         verbose=0,get_weights=True)

mlp_res_list = []
for layer in res_tuple_TRS[0]: 
    mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                  layer[1],infer_model["LABEL"][mod]["DEV"],
                                  layer[2],infer_model["LABEL"][mod]["TEST"],
                                  mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
                                  sgd=mlp_sgd,epochs=mlp_epochs,
                                  output_activation=mlp_output_activation,
                                  input_activation=mlp_input_activation,
                                  batch_size=mlp_batch_size,fit_verbose=0))

db["DSAE"][mod] = mlp_res_list



transfert = []

print " get weight trans" 

#for asr_pred, trs_pred in zip(res_tuple_ASR[0], res_tuple_TRS[0]):
 #   print "ASR", [ x.shape for x in asr_pred]

  #  print "TRS", [ x.shape for x in trs_pred]

for asr_pred, trs_pred in zip(res_tuple_ASR[0], res_tuple_TRS[0]):
 #   print "ASR", [ x.shape for x in asr_pred]

  #  print "TRS", [ x.shape for x in trs_pred]
  #  print " TRANS SGD", trans_sgd
    transfert.append( train_ae(asr_pred[0],
                               asr_pred[1],
                               asr_pred[2],
                               trans_hidden_size,
                               dropouts=trans_do,
                               y_train = trs_pred[0],
                               y_dev=trs_pred[1],
                               y_test = trs_pred[2],
                               patience = trans_patience,sgd=trans_sgd,
                               input_activation=trans_input_activation,
                               output_activation=trans_output_activation,
                               loss=trans_loss,
                               epochs=trans_epochs,
                               batch_size=trans_batch_size,verbose=0,get_weights=True) )
mod = "ASR"
mlp_res_bylvl = []
print " MLP on transfert "
for level, w  in transfert  :
    mlp_res_list = []
    for layer in level :
        mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                      layer[1],infer_model["LABEL"][mod]["DEV"],
                                      layer[2],infer_model["LABEL"][mod]["TEST"],
                                      mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
                                      sgd=mlp_sgd,epochs=mlp_epochs,
                                      output_activation=mlp_output_activation,
                                      input_activation=mlp_input_activation,
                                      batch_size=mlp_batch_size,fit_verbose=0))
    mlp_res_bylvl.append(mlp_res_list)
db["DSAE"]["transfert"] = mlp_res_bylvl


print " FT " 
WA = res_tuple_ASR[1]
#print "WA", len(WA), [ len(x) for x in WA]
WT = res_tuple_TRS[1]

#print "WT", len(WT), [ len(x) for x in WT]
Wtr = [ x[1] for x in transfert]

#print "Wtr", len(Wtr), [ len(x) for x in Wtr],[ len(x[1]) for x in Wtr]

ft_res = ft_dsae(infer_model[features_key]["ASR"]["TRAIN"],
                 infer_model[features_key]["ASR"]["DEV"],
                 infer_model[features_key]["ASR"]["TEST"],
                 y_train=infer_model[features_key]["TRS"]["TRAIN"],
                 y_dev=infer_model[features_key]["TRS"]["DEV"],
                 y_test=infer_model[features_key]["TRS"]["TEST"],
                 ae_hidden = hidden_size,
                 transfer_hidden = trans_hidden_size,
                 start_weights = WA,
                 transfer_weights = Wtr,
                 end_weights = WT,
                 input_activation = input_activation,
                 output_activation = output_activation,
                 ae_dropouts= do_do,
                 transfer_do = trans_do,
                 sgd =  sgd,
                 loss = loss ,
                 patience = patience,
                 batch_size = batch_size,
                 epochs= epochs)
mlps_by_lvls= []
for level  in ft_res  :
    mlp_res_list = []
    for layer in level :
        mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                      layer[1],infer_model["LABEL"][mod]["DEV"],
                                      layer[2],infer_model["LABEL"][mod]["TEST"],
                                      mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
                                      sgd=mlp_sgd,epochs=mlp_epochs,
                                      output_activation=mlp_output_activation,
                                      input_activation=mlp_input_activation,
                                      batch_size=mlp_batch_size,fit_verbose=0))
    mlps_by_lvls.append(mlp_res_list)


db["DSAEFT"]["transfert"] = mlps_by_lvls

db.close()