From d414b83e18cdc5d0313f6880349609082dc035c1 Mon Sep 17 00:00:00 2001 From: Killian Date: Mon, 10 Oct 2016 11:59:15 +0200 Subject: [PATCH] add Botttleneck MLp + script --- BOTTLENECK/01a-mlp_proj.py | 119 ++++++++++++++++++++++++++++++ BOTTLENECK/02a-mlp_score_on_BN.py | 115 +++++++++++++++++++++++++++++ BOTTLENECK/02b-transfert_ae.py | 99 +++++++++++++++++++++++++ BOTTLENECK/02c-tsne_mlproj.py | 123 ++++++++++++++++++++++++++++++++ BOTTLENECK/03-mlp_score_on_transfert.py | 111 ++++++++++++++++++++++++++++ BOTTLENECK/04-accuracyscore.py | 68 ++++++++++++++++++ BOTTLENECK/mlp.py | 1 + BOTTLENECK/run01_do_alljson.sh | 8 +++ BOTTLENECK/run02_mlpscore.sh | 11 +++ BOTTLENECK/run02b-transfert.sh | 8 +++ BOTTLENECK/run03_tsne_MLPtransfert.sh | 8 +++ BOTTLENECK/run04-mlp_on_transfert.sh | 10 +++ BOTTLENECK/run05_accuracy.sh | 8 +++ BOTTLENECK/run_all.sh | 22 ++++++ BOTTLENECK/run_one.sh | 7 ++ BOTTLENECK/utils.py | 1 + 16 files changed, 719 insertions(+) create mode 100644 BOTTLENECK/01a-mlp_proj.py create mode 100644 BOTTLENECK/02a-mlp_score_on_BN.py create mode 100644 BOTTLENECK/02b-transfert_ae.py create mode 100644 BOTTLENECK/02c-tsne_mlproj.py create mode 100644 BOTTLENECK/03-mlp_score_on_transfert.py create mode 100644 BOTTLENECK/04-accuracyscore.py create mode 120000 BOTTLENECK/mlp.py create mode 100644 BOTTLENECK/run01_do_alljson.sh create mode 100644 BOTTLENECK/run02_mlpscore.sh create mode 100644 BOTTLENECK/run02b-transfert.sh create mode 100644 BOTTLENECK/run03_tsne_MLPtransfert.sh create mode 100644 BOTTLENECK/run04-mlp_on_transfert.sh create mode 100644 BOTTLENECK/run05_accuracy.sh create mode 100644 BOTTLENECK/run_all.sh create mode 100644 BOTTLENECK/run_one.sh create mode 120000 BOTTLENECK/utils.py diff --git a/BOTTLENECK/01a-mlp_proj.py b/BOTTLENECK/01a-mlp_proj.py new file mode 100644 index 0000000..d133fc4 --- /dev/null +++ b/BOTTLENECK/01a-mlp_proj.py @@ -0,0 +1,119 @@ + +# coding: utf-8 + +# In[2]: + +# Import +import gensim +from scipy import sparse +import itertools +from sklearn import preprocessing +from keras.models import Sequential +from keras.optimizers import SGD,Adam +from keras.layers.advanced_activations import ELU,PReLU +from keras.callbacks import ModelCheckpoint +from mlp import * +import sklearn.metrics +from sklearn.preprocessing import LabelBinarizer +import shelve +import pickle +from utils import * +import sys +import os +import json +# In[4]: + +infer_model=shelve.open("{}".format(sys.argv[2])) +in_dir = sys.argv[1] +#['ASR', 'TRS', 'LABEL'] +# In[6]: +if len(sys.argv) > 4 : + features_key = sys.argv[4] +else : + features_key = "LDA" +save_projection = True +json_conf =json.load(open(sys.argv[3])) +ae_conf = json_conf["mlp_proj"] + +hidden_size= ae_conf["hidden_size"] +input_activation = None +if ae_conf["input_activation"] == "elu": + print " ELU" + input_activation = PReLU() +else: + print " ELSE" + input_activation = ae_conf["input_activation"] +#input_activation=ae_conf["input_activation"] +output_activation=ae_conf["output_activation"] +loss=ae_conf["loss"] +epochs=ae_conf["epochs"] +batch_size=ae_conf["batch"] +patience=ae_conf["patience"] +dropouts=ae_conf["do"] +try: + k = ae_conf["sgd"] + if ae_conf["sgd"]["name"] == "adam": + sgd = Adam(lr=ae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) + elif ae_conf["sgd"]["name"] == "sgd": + sgd = SGD(lr=ae_conf["sgd"]["lr"]) +except: + sgd = ae_conf["sgd"] + +mlp_conf = json_conf["mlp"] +mlp_h = mlp_conf["hidden_size"] +mlp_loss = mlp_conf["loss"] +mlp_dropouts = mlp_conf["do"] +mlp_epochs = mlp_conf["epochs"] +mlp_batch_size = mlp_conf["batch"] +mlp_input_activation=mlp_conf["input_activation"] +mlp_output_activation=mlp_conf["output_activation"] + +try: + k = mlp_conf["sgd"] + if mlp_conf["sgd"]["name"] == "adam": + mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) + elif mlp_conf["sgd"]["name"] == "sgd": + mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"]) +except: + mlp_sgd = mlp_conf["sgd"] + + +name = json_conf["name"] +try : + os.mkdir("{}/{}".format(in_dir,name)) +except OSError : + pass +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) +db["IDS"]=dict(infer_model["LABEL"]) +# +keys = infer_model[features_key].keys() +LABELS = {} +for mod in keys : + + int_labels_train = map(select,infer_model["LABEL"][mod]["TRAIN"]) + binarizer = LabelBinarizer() + y_train=binarizer.fit_transform(int_labels_train) + y_dev=binarizer.transform(map(select,infer_model["LABEL"][mod]["DEV"])) + y_test=binarizer.transform(map(select,infer_model["LABEL"][mod]["TEST"])) + LABELS[mod]= { "TRAIN":y_train , "DEV" : y_dev, "TEST" : y_test} + sumary,proj = train_mlp_proj(infer_model[features_key][mod]["TRAIN"].todense(),y_train, + infer_model[features_key][mod]["DEV"].todense(),y_dev, + infer_model[features_key][mod]["TEST"].todense(),y_test, + hidden_size ,sgd=sgd, + epochs=epochs, + patience=patience, + batch_size=batch_size, + input_activation=input_activation, + output_activation=output_activation, + dropouts=dropouts, + fit_verbose=1) + with open("{}/{}/{}_sum.txt".format(in_dir,name,mod),"w") as output_sum : + print >>output_sum, sumary + for num_lvl,level in enumerate(proj): + print len(level) + for num,corp_type in enumerate(["TRAIN","DEV","TEST"]): + pd = pandas.DataFrame(level[num]) + pd.to_hdf("{}/{}/MLP_proj_df.hdf".format(in_dir,name),"{}/lvl{}/{}".format(mod,num_lvl,corp_type)) +db["LABEL"] = LABELS +db.sync() +db.close() diff --git a/BOTTLENECK/02a-mlp_score_on_BN.py b/BOTTLENECK/02a-mlp_score_on_BN.py new file mode 100644 index 0000000..a64f3a3 --- /dev/null +++ b/BOTTLENECK/02a-mlp_score_on_BN.py @@ -0,0 +1,115 @@ + +# coding: utf-8 + +# In[2]: + +# Import +import gensim +from scipy import sparse +import itertools +from sklearn import preprocessing +from keras.models import Sequential +from keras.optimizers import SGD,Adam +from keras.layers.advanced_activations import ELU,PReLU +from keras.callbacks import ModelCheckpoint +from mlp import * +import sklearn.metrics +from sklearn.preprocessing import LabelBinarizer +import shelve +import pickle +from utils import * +import sys +import os +import json +# In[4]: + +in_dir = sys.argv[1] +#['ASR', 'TRS', 'LABEL'] +# In[6]: +json_conf =json.load(open(sys.argv[2])) + +mlp_conf = json_conf["mlp"] +hidden_size = mlp_conf["hidden_size"] +loss = mlp_conf["loss"] +patience = mlp_conf["patience"] +dropouts = mlp_conf["do"] +epochs = mlp_conf["epochs"] +batch_size = mlp_conf["batch"] +input_activation=mlp_conf["input_activation"] +output_activation=mlp_conf["output_activation"] + +try: + k = mlp_conf["sgd"] + if mlp_conf["sgd"]["name"] == "adam": + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) + elif mlp_conf["sgd"]["name"] == "sgd": + sgd = SGD(lr=mlp_conf["sgd"]["lr"]) +except: + sgd = mlp_conf["sgd"] +name = json_conf["name"] + +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) +shelve_logs=shelve.open("{}/{}/02a_logs.shelve".format(in_dir,name)) + +# +keys = db["LABEL"].keys() +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name)) +hdf_keys = proj_hdf.keys() +proj_hdf.close() +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ]) +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ]) +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) +print hdf_mods +print hdf_lvl +print hdf_crossval + +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name) +labels_dict = {"origine":{} } +logs = {} +for lvl in hdf_lvl : + labels_dict[lvl] = {} + for mod in hdf_mods: + labels_dict[lvl][mod] = {} + +for mod in hdf_mods: + for lvl in hdf_lvl : + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN")) + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV")) + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST")) + if x_train.shape[1] <= 8 : + labels_dict["origine"]["TRAIN"] = np.argmax(x_train.values,axis=1) + labels_dict["origine"]["DEV"] = np.argmax(x_dev.values,axis=1) + labels_dict["origine"]["TEST"] = np.argmax(x_test.values,axis=1) + continue + y_train = db["LABEL"][mod]["TRAIN"] + y_dev = db["LABEL"][mod]["DEV"] + y_test = db["LABEL"][mod]["TEST"] + + print x_train.shape + print x_dev.shape + print x_test.shape + print y_train.shape + print y_dev.shape + print y_test.shape + pred,hist = train_mlp_pred(x_train.values,y_train, + x_dev.values,y_dev, + x_test.values,y_test, + hidden_size ,sgd=sgd, + epochs=epochs, + patience=patience, + batch_size=batch_size, + input_activation=input_activation, + output_activation=output_activation, + dropouts=dropouts, + fit_verbose=1) + shelve_logs["{}/{}".format(mod,lvl)] = hist + labels_dict[lvl][mod]["TRAIN"] = np.argmax(pred[0],axis=1) + labels_dict[lvl][mod]["DEV"] = np.argmax(pred[1],axis=1) + labels_dict[lvl][mod]["TEST"] = np.argmax(pred[2],axis=1) + +for lvl in hdf_lvl: + db[lvl] = labels_dict[lvl] +shelve_logs.sync() +shelve_logs.close() +db.sync() +db.close() diff --git a/BOTTLENECK/02b-transfert_ae.py b/BOTTLENECK/02b-transfert_ae.py new file mode 100644 index 0000000..34f4cfa --- /dev/null +++ b/BOTTLENECK/02b-transfert_ae.py @@ -0,0 +1,99 @@ + +# coding: utf-8 + +# In[2]: + +# Import +import gensim +from scipy import sparse +import itertools +from sklearn import preprocessing +from keras.models import Sequential +from keras.optimizers import SGD,Adam +from keras.layers.advanced_activations import ELU,PReLU +from keras.callbacks import ModelCheckpoint +from mlp import * +import pandas as pd +import sklearn.metrics +from sklearn.preprocessing import LabelBinarizer +import shelve +import pickle +from utils import * +import sys +import os +import json +# In[4]: + +in_dir = sys.argv[1] +#['ASR', 'TRS', 'LABEL'] +# In[6]: +json_conf =json.load(open(sys.argv[2])) + +mlp_conf = json_conf["transfert"] +hidden_size = mlp_conf["hidden_size"] +loss = mlp_conf["loss"] +patience = mlp_conf["patience"] +dropouts = mlp_conf["do"] +epochs = mlp_conf["epochs"] +batch_size = mlp_conf["batch"] +input_activation=mlp_conf["input_activation"] +output_activation=mlp_conf["output_activation"] + +try: + k = mlp_conf["sgd"] + if mlp_conf["sgd"]["name"] == "adam": + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) + elif mlp_conf["sgd"]["name"] == "sgd": + sgd = SGD(lr=mlp_conf["sgd"]["lr"]) +except: + sgd = mlp_conf["sgd"] +name = json_conf["name"] + +# +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name)) +hdf_keys = proj_hdf.keys() +proj_hdf.close() +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ]) +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ]) +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) +print hdf_mods +print hdf_lvl +print hdf_crossval + +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name) +transfert_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name) +mod1,mod2 = "ASR","TRS" +for lvl in hdf_lvl : + x_train_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TRAIN")) + x_dev_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"DEV")) + x_test_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TEST")) + x_train_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TRAIN")) + x_dev_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"DEV")) + x_test_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TEST")) + + if x_train_ASR.shape[1] <= 8 : + continue + + pred = train_ae(x_train_ASR.values, + x_dev_ASR.values, + x_test_ASR.values, + hidden_size ,sgd=sgd, + y_train=x_train_TRS.values, + y_dev=x_dev_TRS.values, + y_test=x_test_TRS.values, + epochs=epochs, + patience=patience, + batch_size=batch_size, + input_activation=input_activation, + output_activation=output_activation, + dropouts=dropouts, + best_mod=True, + verbose=1) + for num_layer,layer in enumerate(pred): + transfert_train = pd.DataFrame(layer[0]) + transfert_dev = pd.DataFrame(layer[1]) + transfert_test = pd.DataFrame(layer[2]) + transfert_train.to_hdf(transfert_proj_path,"{}/{}/TRAIN".format(lvl,"layer"+str(num_layer))) + transfert_dev.to_hdf(transfert_proj_path,"{}/{}/DEV".format(lvl,"layer"+str(num_layer))) + transfert_test.to_hdf(transfert_proj_path,"{}/{}/TEST".format(lvl,"layer"+str(num_layer))) + diff --git a/BOTTLENECK/02c-tsne_mlproj.py b/BOTTLENECK/02c-tsne_mlproj.py new file mode 100644 index 0000000..bb7e988 --- /dev/null +++ b/BOTTLENECK/02c-tsne_mlproj.py @@ -0,0 +1,123 @@ + +# coding: utf-8 + +# In[2]: + +# Import +import gensim +from scipy import sparse +import itertools +from sklearn import preprocessing +from keras.models import Sequential +from keras.optimizers import SGD,Adam +from keras.layers.advanced_activations import ELU,PReLU +from keras.callbacks import ModelCheckpoint +from mlp import * +import pandas as pd +import sklearn.metrics +from sklearn.preprocessing import LabelBinarizer +from sklearn.manifold import TSNE +import shelve +import pickle +from utils import * +import sys +import os +import json +# In[4]: + +in_dir = sys.argv[1] +#['ASR', 'TRS', 'LABEL'] +# In[6]: +json_conf =json.load(open(sys.argv[2])) + +mlp_conf = json_conf["transfert"] +hidden_size = mlp_conf["hidden_size"] +loss = mlp_conf["loss"] +patience = mlp_conf["patience"] +dropouts = mlp_conf["do"] +epochs = mlp_conf["epochs"] +batch_size = mlp_conf["batch"] +input_activation=mlp_conf["input_activation"] +output_activation=mlp_conf["output_activation"] + +try: + k = mlp_conf["sgd"] + if mlp_conf["sgd"]["name"] == "adam": + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) + elif mlp_conf["sgd"]["name"] == "sgd": + sgd = SGD(lr=mlp_conf["sgd"]["lr"]) +except: + sgd = mlp_conf["sgd"] +name = json_conf["name"] + +# +print " MLP" +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name)) +hdf_keys = proj_hdf.keys() +proj_hdf.close() +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ]) +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ]) +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) +print hdf_mods +print hdf_lvl +print hdf_crossval + +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name) +tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name) +for mod in hdf_mods: + for lvl in hdf_lvl : + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN")) + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV")) + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST")) + + if x_train.shape[1] <= 8 : + continue + tsne= TSNE() + tsne_train=tsne.fit_transform(x_train.values) + pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TRAIN")) + tsne= TSNE() + tsne_dev=tsne.fit_transform(x_dev.values) + pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"DEV")) + tsne= TSNE() + tsne_test=tsne.fit_transform(x_test.values) + pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TEST")) + tsne = TSNE() + tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values) + pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"CONCAT")) + +print " TRANSFERT" + +hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name) +proj_hdf = pandas.HDFStore(hdf_proj_path) +print proj_hdf +hdf_keys = proj_hdf.keys() +proj_hdf.close() +print hdf_keys +hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ]) +hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ]) +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) +print hdf_lvl +print hdf_layer +print hdf_crossval + +tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name) +for lvl in hdf_lvl : + for layer in hdf_layer: + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN")) + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV")) + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TEST")) + + if x_train.shape[1] <= 8 : + continue + tsne= TSNE() + tsne_train=tsne.fit_transform(x_train.values) + pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TRAIN")) + tsne= TSNE() + tsne_dev=tsne.fit_transform(x_dev.values) + pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"DEV")) + tsne= TSNE() + tsne_test=tsne.fit_transform(x_test.values) + pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TEST")) + tsne = TSNE() + tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values) + pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"CONCAT")) diff --git a/BOTTLENECK/03-mlp_score_on_transfert.py b/BOTTLENECK/03-mlp_score_on_transfert.py new file mode 100644 index 0000000..dd9a99c --- /dev/null +++ b/BOTTLENECK/03-mlp_score_on_transfert.py @@ -0,0 +1,111 @@ + +# coding: utf-8 + +# In[2]: + +# Import +import gensim +from scipy import sparse +import itertools +from sklearn import preprocessing +from keras.models import Sequential +from keras.optimizers import SGD,Adam +from keras.layers.advanced_activations import ELU,PReLU +from keras.callbacks import ModelCheckpoint +from mlp import * +import sklearn.metrics +from sklearn.preprocessing import LabelBinarizer +import shelve +import pickle +from utils import * +import sys +import os +import json +# In[4]: + +in_dir = sys.argv[1] +#['ASR', 'TRS', 'LABEL'] +# In[6]: +json_conf =json.load(open(sys.argv[2])) + +mlp_conf = json_conf["mlp"] +hidden_size = mlp_conf["hidden_size"] +loss = mlp_conf["loss"] +patience = mlp_conf["patience"] +dropouts = mlp_conf["do"] +epochs = mlp_conf["epochs"] +batch_size = mlp_conf["batch"] +input_activation=mlp_conf["input_activation"] +output_activation=mlp_conf["output_activation"] + +try: + k = mlp_conf["sgd"] + if mlp_conf["sgd"]["name"] == "adam": + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) + elif mlp_conf["sgd"]["name"] == "sgd": + sgd = SGD(lr=mlp_conf["sgd"]["lr"]) +except: + sgd = mlp_conf["sgd"] +name = json_conf["name"] + +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) +shelve_logs=shelve.open("{}/{}/03_logs.shelve".format(in_dir,name),writeback=True) + +# +keys = db["LABEL"].keys() + +hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name) +proj_hdf = pandas.HDFStore(hdf_proj_path) +hdf_keys = proj_hdf.keys() +print hdf_keys +proj_hdf.close() +hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ]) +hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ]) +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ]) +print hdf_lvl +print hdf_crossval + +labels_dict = { } +logs = {} +for lvl in hdf_lvl : + labels_dict[lvl] = {} + for layer in hdf_layer: + labels_dict[lvl][layer] = {} + +for lvl in hdf_lvl : + for layer in hdf_layer: + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN")) + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV")) + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer, "TEST")) + + y_train = db["LABEL"]["ASR"]["TRAIN"] + y_dev = db["LABEL"]["ASR"]["DEV"] + y_test = db["LABEL"]["ASR"]["TEST"] + + print x_train.shape + print x_dev.shape + print x_test.shape + print y_train.shape + print y_dev.shape + print y_test.shape + pred,hist = train_mlp_pred(x_train.values,y_train, + x_dev.values,y_dev, + x_test.values,y_test, + hidden_size ,sgd=sgd, + epochs=epochs, + patience=patience, + batch_size=batch_size, + input_activation=input_activation, + output_activation=output_activation, + dropouts=dropouts, + fit_verbose=1) + shelve_logs["{}/{}".format(lvl,layer)] = hist + labels_dict[lvl][layer]["TRAIN"] = np.argmax(pred[0],axis=1) + labels_dict[lvl][layer]["DEV"] = np.argmax(pred[1],axis=1) + labels_dict[lvl][layer]["TEST"] = np.argmax(pred[2],axis=1) + +db["transfert"] = labels_dict +shelve_logs.sync() +shelve_logs.close() +db.sync() +db.close() diff --git a/BOTTLENECK/04-accuracyscore.py b/BOTTLENECK/04-accuracyscore.py new file mode 100644 index 0000000..09a496f --- /dev/null +++ b/BOTTLENECK/04-accuracyscore.py @@ -0,0 +1,68 @@ + +# coding: utf-8 + +# In[2]: + +# Import +import gensim +from scipy import sparse +import numpy as np +import itertools +from sklearn import preprocessing +from keras.models import Sequential +from keras.optimizers import SGD,Adam +from keras.layers.advanced_activations import ELU,PReLU +from keras.callbacks import ModelCheckpoint +from mlp import * +from sklearn import metrics +from sklearn.preprocessing import LabelBinarizer +import shelve +import pickle +from utils import * +import sys +import os +import json + +# In[4]: + +in_dir = sys.argv[1] +#['ASR', 'TRS', 'LABEL'] +# In[6]: +json_conf =json.load(open(sys.argv[2])) + +name = json_conf["name"] + +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name)) +# +keys = sorted(db.keys()) +keys.remove("IDS") +keys.remove("transfert") +keys.remove("LABEL") +mods = ["ASR", "TRS"] +ref_train = db["LABEL"]["ASR"]["TRAIN"] +ref_dev = db["LABEL"]["ASR"]["DEV"] +ref_test = db["LABEL"]["ASR"]["TEST"] + +print "name,MOD,level,train,dev,test" +for mod in mods : + for lvl in keys : + if "TEST" in db[lvl][mod] : + train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db[lvl][mod]["TRAIN"]) + dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db[lvl][mod]["DEV"]) + test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db[lvl][mod]["TEST"]) + else : + train_score = "ERROR" + dev_score = "ERROR" + test_score = "ERROR" + print ",".join([name,mod, lvl, str(train_score), str(dev_score) , str(test_score)]) + +for level in db["transfert"].keys() : + for layer in db["transfert"][level].keys(): + if "TRAIN" in db["transfert"][level][layer].keys(): + + train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db["transfert"][level][layer]["TRAIN"]) + dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db["transfert"][level][layer]["DEV"]) + test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db["transfert"][level][layer]["TEST"]) + print ",".join([name,"transfert",level+"/"+layer, str(train_score), str(dev_score) , str(test_score)]) + +db.close() diff --git a/BOTTLENECK/mlp.py b/BOTTLENECK/mlp.py new file mode 120000 index 0000000..73d1e54 --- /dev/null +++ b/BOTTLENECK/mlp.py @@ -0,0 +1 @@ +../LDA/mlp.py \ No newline at end of file diff --git a/BOTTLENECK/run01_do_alljson.sh b/BOTTLENECK/run01_do_alljson.sh new file mode 100644 index 0000000..9a5992d --- /dev/null +++ b/BOTTLENECK/run01_do_alljson.sh @@ -0,0 +1,8 @@ +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0.json RAW +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0do.json RAW +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1.json RAW +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1do.json RAW +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2.json RAW +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2do.json RAW +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3.json RAW +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3do.json RAW diff --git a/BOTTLENECK/run02_mlpscore.sh b/BOTTLENECK/run02_mlpscore.sh new file mode 100644 index 0000000..ddfa375 --- /dev/null +++ b/BOTTLENECK/run02_mlpscore.sh @@ -0,0 +1,11 @@ +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3do.json + + + diff --git a/BOTTLENECK/run02b-transfert.sh b/BOTTLENECK/run02b-transfert.sh new file mode 100644 index 0000000..91537cc --- /dev/null +++ b/BOTTLENECK/run02b-transfert.sh @@ -0,0 +1,8 @@ +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3do.json diff --git a/BOTTLENECK/run03_tsne_MLPtransfert.sh b/BOTTLENECK/run03_tsne_MLPtransfert.sh new file mode 100644 index 0000000..2c8d147 --- /dev/null +++ b/BOTTLENECK/run03_tsne_MLPtransfert.sh @@ -0,0 +1,8 @@ +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3do.json diff --git a/BOTTLENECK/run04-mlp_on_transfert.sh b/BOTTLENECK/run04-mlp_on_transfert.sh new file mode 100644 index 0000000..2d35bad --- /dev/null +++ b/BOTTLENECK/run04-mlp_on_transfert.sh @@ -0,0 +1,10 @@ +#THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3do.json + +#THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0.json + +#THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0do.json diff --git a/BOTTLENECK/run05_accuracy.sh b/BOTTLENECK/run05_accuracy.sh new file mode 100644 index 0000000..fec5cf6 --- /dev/null +++ b/BOTTLENECK/run05_accuracy.sh @@ -0,0 +1,8 @@ +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3do.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0.json +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0do.json diff --git a/BOTTLENECK/run_all.sh b/BOTTLENECK/run_all.sh new file mode 100644 index 0000000..285c3a7 --- /dev/null +++ b/BOTTLENECK/run_all.sh @@ -0,0 +1,22 @@ +bash run_one.sh output_3 output_3/L0do.json gpu0 & +bash run_one.sh output_3 output_3/L0.json gpu1 & +bash run_one.sh output_3 output_3/L1do.json gpu0 & +bash run_one.sh output_3 output_3/L1.json gpu1 & +wait +bash run_one.sh output_3 output_3/L2do.json gpu0 & +bash run_one.sh output_3 output_3/L2.json gpu1 & +bash run_one.sh output_3 output_3/L3bndo.json gpu0 & +bash run_one.sh output_3 output_3/L3ce1.json gpu1 & +wait +bash run_one.sh output_3 output_3/L3ce.json gpu0 & +bash run_one.sh output_3 output_3/L3do.json gpu1 & +bash run_one.sh output_3 output_3/L3.json gpu0 & +bash run_one.sh output_3 output_3/L3sigmo.json gpu1 & +wait +bash run_one.sh output_3 output_3/L4do.json gpu0 & +bash run_one.sh output_3 output_3/L5do.json gpu1 & +bash run_one.sh output_3 output_3/L6do.json gpu0 & +bash run_one.sh output_3 output_3/L7do.json gpu1 & +wait +bash run_one.sh output_3 output_3/MaxMLP.json gpu0 & +bash run_one.sh output_3 output_3/MinMLP.json gpu1 & diff --git a/BOTTLENECK/run_one.sh b/BOTTLENECK/run_one.sh new file mode 100644 index 0000000..20743ca --- /dev/null +++ b/BOTTLENECK/run_one.sh @@ -0,0 +1,7 @@ +bn=$(basename $2) +time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 01a-mlp_proj.py $1 Sparse_tfidf2.shelve $2 RAW) 2>> logs/${bn}_time ; echo MLP_$2 >> logs/${bn}_time +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02a-mlp_score_on_BN.py $1 $2 +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02b-transfert_ae.py $1 $2 +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02c-tsne_mlproj.py $1 $2 +time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 03-mlp_score_on_transfert.py $1 $2) 2>> logs/${bn}_time ; echo transfert_$2 >> logs/${bn}_time +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 04-accuracyscore.py $1 $2 >> $1/res.csv diff --git a/BOTTLENECK/utils.py b/BOTTLENECK/utils.py new file mode 120000 index 0000000..50fbc6d --- /dev/null +++ b/BOTTLENECK/utils.py @@ -0,0 +1 @@ +../utils.py \ No newline at end of file -- 1.8.2.3