04e-mm_vae.py 6.4 KB
# coding: utf-8
import gensim
from scipy import sparse
import itertools
from sklearn import preprocessing
from keras.models import Sequential
from keras.optimizers import SGD,Adam
from mlp import *
from vae import *
import sklearn.metrics
import shelve
import pickle
from utils import *
import sys
import os
import json
# In[4]:

infer_model=shelve.open("{}".format(sys.argv[2]))
in_dir = sys.argv[1]
#['ASR', 'TRS', 'LABEL']
# In[6]:
if len(sys.argv) > 4 :
    features_key = sys.argv[4]
else :
    features_key = "LDA"

save_projection = True
json_conf =json.load(open(sys.argv[3]))
vae_conf = json_conf["vae"]

hidden_size= vae_conf["hidden_size"]
input_activation=vae_conf["input_activation"]
output_activation=vae_conf["output_activation"]
epochs=vae_conf["epochs"]
batch=vae_conf["batch"]
patience=vae_conf["patience"]
latent_dim = vae_conf["latent"]
try:
    k = vae_conf["sgd"]
    if vae_conf["sgd"]["name"] == "adam":
        sgd = Adam(lr=vae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
    elif vae_conf["sgd"]["name"] == "sgd":
        sgd = SGD(lr=vae_conf["sgd"]["lr"])
except: 
    sgd = vae_conf["sgd"]

mlp_conf = json_conf["mlp"]
mlp_h = mlp_conf["hidden_size"]
mlp_loss = mlp_conf["loss"]
mlp_dropouts = mlp_conf["do"]
mlp_epochs = mlp_conf["epochs"]
mlp_batch_size = mlp_conf["batch"]
mlp_input_activation=mlp_conf["input_activation"]
mlp_output_activation=mlp_conf["output_activation"]


try:
    k = mlp_conf["sgd"]
    if mlp_conf["sgd"]["name"] == "adam":
        mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
    elif mlp_conf["sgd"]["name"] == "sgd":
        mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"])
except: 
    mlp_sgd = mlp_conf["sgd"]


name = json_conf["name"]

try :
    print "make folder "
    os.mkdir("{}/{}".format(in_dir,name))
except:
    print "folder not maked"
    pass


db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
db["LABEL"]=infer_model["LABEL"]
#


keys = infer_model[features_key].keys()

db["VAE"] = {}
db[features_key] = {}
for mod in keys : 
    #print mod
    db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
                            infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
                            infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
                            mlp_h ,sgd=mlp_sgd,
                            epochs=mlp_epochs,
                            batch_size=mlp_batch_size,
                            input_activation=input_activation,
                            output_activation=mlp_output_activation,
                            dropouts=mlp_dropouts,
                            fit_verbose=0)

    res=train_vae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"],
                 hidden_size=hidden_size[0],
                 latent_dim=latent_dim,sgd=sgd,
                 input_activation=input_activation,output_activation=output_activation,
                 nb_epochs=epochs,batch_size=batch)
    mlp_res_list=[]
    for nb,layer in enumerate(res) :
        if save_projection:
            pd = pandas.DataFrame(layer[0])
            col_count = (pd.sum(axis=0) != 0)
            pd = pd.loc[:,col_count]
            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN")
            pd = pandas.DataFrame(layer[1])
            pd = pd.loc[:,col_count]
            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV")
            pd = pandas.DataFrame(layer[2])
            pd = pd.loc[:,col_count]
            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST")
            del pd

        mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                      layer[1],infer_model["LABEL"][mod]["DEV"],
                                      layer[2],infer_model["LABEL"][mod]["TEST"],
                                      mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
                                      output_activation=mlp_output_activation,
                                      input_activation=input_activation,
                                      batch_size=mlp_batch_size,fit_verbose=0))
    db["VAE"][mod]=mlp_res_list

if "ASR" in keys and "TRS" in keys :
    mod = "ASR"
    mod2= "TRS"
    mlp_res_list=[]

    res = train_vae(infer_model[features_key][mod]["TRAIN"],
                    infer_model[features_key][mod]["DEV"],
                    infer_model[features_key][mod]["TEST"],
                    hidden_size=hidden_size[0],
                    sgd=sgd,input_activation=input_activation,output_activation=output_activation,
                    latent_dim=latent_dim,
                    nb_epochs=epochs,
                    batch_size=batch,
                    y_train=infer_model[features_key][mod2]["TRAIN"],
                    y_dev=infer_model[features_key][mod2]["DEV"],
                    y_test=infer_model[features_key][mod2]["TEST"])

    for nb,layer in enumerate(res) :
        if save_projection:
            pd = pandas.DataFrame(layer[0])
            col_count = (pd.sum(axis=0) != 0)
            pd = pd.loc[:,col_count]
            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN")
            pd = pandas.DataFrame(layer[1])
            pd = pd.loc[:,col_count]
            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV")
            pd = pandas.DataFrame(layer[2])
            pd = pd.loc[:,col_count]
            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST")
            del pd

        mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
                                      layer[1],infer_model["LABEL"][mod]["DEV"],
                                      layer[2],infer_model["LABEL"][mod]["TEST"],
                                      mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
                                      output_activation=mlp_output_activation,
                                      input_activation=input_activation,
                                      batch_size=mlp_batch_size,fit_verbose=0))

    db["VAE"]["SPE"] = mlp_res_list

db.sync()
db.close()