04b-mmf_mini_ae.py 6.74 KB
# coding: utf-8

# In[2]:

# Import
import gensim
from scipy import sparse
import itertools
from sklearn import preprocessing
from keras.models import Sequential
from keras.optimizers import SGD,Adam
from keras.layers.advanced_activations import ELU,PReLU
from mlp import *
import sklearn.metrics
import shelve
import pickle
from utils import *
import sys
import os
import json
# In[4]:

infer_model=shelve.open("{}".format(sys.argv[2]))
in_dir = sys.argv[1]
#['ASR', 'TRS', 'LABEL']
# In[6]:
if len(sys.argv) > 4 :
    features_key = sys.argv[4]
else :
    features_key = "LDA"
save_projection = True
json_conf =json.load(open(sys.argv[3]))
ae_conf = json_conf["ae"]

hidden_size= ae_conf["hidden_size"]
input_activation = None
print ae_conf["input_activation"]
if ae_conf["input_activation"] == "elu":
    print " ELU"
    input_activation = PReLU()
else:
    print " ELSE"
    input_activation = ae_conf["input_activation"]
#input_activation=ae_conf["input_activation"]
output_activation=ae_conf["output_activation"]
loss=ae_conf["loss"]
epochs=ae_conf["epochs"]
batch=ae_conf["batch"]
patience=ae_conf["patience"]
do_do=ae_conf["do"]
try:
    k = ae_conf["sgd"]
    if ae_conf["sgd"]["name"] == "adam":
        sgd = Adam(lr=ae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
    elif ae_conf["sgd"]["name"] == "sgd":
        sgd = SGD(lr=ae_conf["sgd"]["lr"])
except: 
    sgd = ae_conf["sgd"]

mlp_conf = json_conf["mlp"]
mlp_h = mlp_conf["hidden_size"]
mlp_loss = mlp_conf["loss"]
mlp_dropouts = mlp_conf["do"]
mlp_epochs = mlp_conf["epochs"]
mlp_batch_size = mlp_conf["batch"]
mlp_input_activation=mlp_conf["input_activation"]
mlp_output_activation=mlp_conf["output_activation"]

try:
    k = mlp_conf["sgd"]
    if mlp_conf["sgd"]["name"] == "adam":
        mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
    elif mlp_conf["sgd"]["name"] == "sgd":
        mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"])
except: 
    mlp_sgd = mlp_conf["sgd"]


name = json_conf["name"]
try:
    os.mkdir("{}/{}".format(in_dir,name))
except:
    pass
db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
db["LABEL"]=infer_model["LABEL"]
#
keys = infer_model[features_key].keys()

db["AE"] = {}
db[features_key] = {}
for mod in keys : 
    print infer_model[features_key][mod]["TRAIN"].shape
    print infer_model[features_key][mod]["DEV"].shape
    print infer_model[features_key][mod]["TEST"].shape

    db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
                            infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
                            infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
                            mlp_h ,sgd=mlp_sgd,
                            epochs=mlp_epochs,
                            batch_size=mlp_batch_size,
                            input_activation=mlp_input_activation,
                            output_activation=mlp_output_activation,
                            dropouts=mlp_dropouts,
                            fit_verbose=0)
    print input_activation
    res=train_ae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"],
                 hidden_size,patience = patience,sgd=sgd,
                 dropouts=do_do,input_activation=input_activation,output_activation=output_activation,
                 loss=loss,epochs=epochs,batch_size=batch,verbose=0)
    mlp_res_list=[]
    for nb,layer in enumerate(res) :
        if save_projection:
            pd = pandas.DataFrame(layer[0])
            col_count = (pd.sum(axis=0) != 0)
            pd = pd.loc[:,col_count]
            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN")
            pd = pandas.DataFrame(layer[1])
            pd = pd.loc[:,col_count]
            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV")
            pd = pandas.DataFrame(layer[2])
            pd = pd.loc[:,col_count]
            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST")
            del pd
        mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                      layer[1],infer_model["LABEL"][mod]["DEV"],
                                      layer[2],infer_model["LABEL"][mod]["TEST"],
                                      mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
                                      output_activation=mlp_output_activation,
                                      input_activation=mlp_input_activation,
                                      batch_size=mlp_batch_size,fit_verbose=0))
    db["AE"][mod]=mlp_res_list

if "ASR" in keys and "TRS" in keys:
    mod = "ASR"
    mod2= "TRS"
    mlp_res_list=[]

    res = train_ae(infer_model[features_key][mod]["TRAIN"],
                    infer_model[features_key][mod]["DEV"],
                    infer_model[features_key][mod]["TEST"],
                    hidden_size,dropouts=do_do,patience = patience,
                    sgd=sgd,input_activation=input_activation,output_activation=output_activation,loss=loss,epochs=epochs,
                    batch_size=batch,
                    y_train=infer_model[features_key][mod]["TRAIN"],
                    y_dev=infer_model[features_key][mod2]["DEV"],
                    y_test=infer_model[features_key][mod2]["TEST"])

    for nb,layer in enumerate(res) :
        if save_projection:
            pd = pandas.DataFrame(layer[0])
            col_count= (pd.sum(axis=0) != 0)
            pd = pd.loc[:,col_count]
            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN")
            pd = pandas.DataFrame(layer[1])
            pd = pd.loc[:,col_count]
            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV")
            pd = pandas.DataFrame(layer[2])
            pd = pd.loc[:,col_count]
            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST")
            del pd

        mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
                                      layer[1],infer_model["LABEL"][mod]["DEV"],
                                      layer[2],infer_model["LABEL"][mod]["TEST"],
                                      mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
                                      output_activation=mlp_output_activation,
                                      input_activation=input_activation,
                                      batch_size=mlp_batch_size,fit_verbose=0))

    db["AE"]["SPE"] = mlp_res_list

db.sync()
db.close()