02a-mlp_score_on_BN.py 3.67 KB
# coding: utf-8

# In[2]:

# Import
import gensim
from scipy import sparse
import itertools
from sklearn import preprocessing
from keras.models import Sequential
from keras.optimizers import SGD,Adam
from keras.layers.advanced_activations import ELU,PReLU
from keras.callbacks import ModelCheckpoint
from mlp import *
import sklearn.metrics
from sklearn.preprocessing import LabelBinarizer
import shelve
import pickle
from utils import *
import sys
import os
import json
# In[4]:

in_dir = sys.argv[1]
#['ASR', 'TRS', 'LABEL']
# In[6]:
json_conf =json.load(open(sys.argv[2]))

mlp_conf = json_conf["mlp"]
hidden_size = mlp_conf["hidden_size"]
loss = mlp_conf["loss"]
patience = mlp_conf["patience"]
dropouts = mlp_conf["do"]
epochs = mlp_conf["epochs"]
batch_size = mlp_conf["batch"]
input_activation=mlp_conf["input_activation"]
output_activation=mlp_conf["output_activation"]

try:
    k = mlp_conf["sgd"]
    if mlp_conf["sgd"]["name"] == "adam":
        sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
    elif mlp_conf["sgd"]["name"] == "sgd":
        sgd = SGD(lr=mlp_conf["sgd"]["lr"])
except: 
    sgd = mlp_conf["sgd"]
name = json_conf["name"]

db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
shelve_logs=shelve.open("{}/{}/02a_logs.shelve".format(in_dir,name))

#
keys = db["LABEL"].keys()
proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name))
hdf_keys = proj_hdf.keys()
proj_hdf.close()
hdf_mods = set([ x.split("/")[1] for x in hdf_keys ])
hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ])
hdf_crossval =  set([ x.split("/")[3] for x in hdf_keys ])
print hdf_mods
print hdf_lvl
print hdf_crossval

hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name)
labels_dict = {"origine":{} }
logs = {}
for lvl in hdf_lvl :
    labels_dict[lvl] = {}
    for mod in hdf_mods:
        labels_dict[lvl][mod] = {}

for mod in hdf_mods:
    for lvl in hdf_lvl :
        x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN"))
        x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV"))
        x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST"))
        if x_train.shape[1] <= 8 :
            labels_dict["origine"]["TRAIN"] = np.argmax(x_train.values,axis=1)
            labels_dict["origine"]["DEV"] = np.argmax(x_dev.values,axis=1)
            labels_dict["origine"]["TEST"] = np.argmax(x_test.values,axis=1)
            continue
        y_train = db["LABEL"][mod]["TRAIN"]
        y_dev = db["LABEL"][mod]["DEV"]
        y_test = db["LABEL"][mod]["TEST"]

        print x_train.shape
        print x_dev.shape
        print x_test.shape
        print y_train.shape
        print y_dev.shape
        print y_test.shape
        pred,hist = train_mlp_pred(x_train.values,y_train,
                            x_dev.values,y_dev,
                            x_test.values,y_test,
                            hidden_size ,sgd=sgd,
                            epochs=epochs,
                            patience=patience,
                            batch_size=batch_size,
                            input_activation=input_activation,
                            output_activation=output_activation,
                            dropouts=dropouts,
                            fit_verbose=1)
        shelve_logs["{}/{}".format(mod,lvl)] = hist
        labels_dict[lvl][mod]["TRAIN"] = np.argmax(pred[0],axis=1)
        labels_dict[lvl][mod]["DEV"] = np.argmax(pred[1],axis=1)
        labels_dict[lvl][mod]["TEST"] = np.argmax(pred[2],axis=1)

for lvl in hdf_lvl:
    db[lvl] = labels_dict[lvl]
shelve_logs.sync()
shelve_logs.close()
db.sync()
db.close()