Commit d414b83e18cdc5d0313f6880349609082dc035c1

Authored by Killian
1 parent 7c16f9bfe8
Exists in master

add Botttleneck MLp + script

Showing 16 changed files with 719 additions and 0 deletions Side-by-side Diff

BOTTLENECK/01a-mlp_proj.py
  1 +
  2 +# coding: utf-8
  3 +
  4 +# In[2]:
  5 +
  6 +# Import
  7 +import gensim
  8 +from scipy import sparse
  9 +import itertools
  10 +from sklearn import preprocessing
  11 +from keras.models import Sequential
  12 +from keras.optimizers import SGD,Adam
  13 +from keras.layers.advanced_activations import ELU,PReLU
  14 +from keras.callbacks import ModelCheckpoint
  15 +from mlp import *
  16 +import sklearn.metrics
  17 +from sklearn.preprocessing import LabelBinarizer
  18 +import shelve
  19 +import pickle
  20 +from utils import *
  21 +import sys
  22 +import os
  23 +import json
  24 +# In[4]:
  25 +
  26 +infer_model=shelve.open("{}".format(sys.argv[2]))
  27 +in_dir = sys.argv[1]
  28 +#['ASR', 'TRS', 'LABEL']
  29 +# In[6]:
  30 +if len(sys.argv) > 4 :
  31 + features_key = sys.argv[4]
  32 +else :
  33 + features_key = "LDA"
  34 +save_projection = True
  35 +json_conf =json.load(open(sys.argv[3]))
  36 +ae_conf = json_conf["mlp_proj"]
  37 +
  38 +hidden_size= ae_conf["hidden_size"]
  39 +input_activation = None
  40 +if ae_conf["input_activation"] == "elu":
  41 + print " ELU"
  42 + input_activation = PReLU()
  43 +else:
  44 + print " ELSE"
  45 + input_activation = ae_conf["input_activation"]
  46 +#input_activation=ae_conf["input_activation"]
  47 +output_activation=ae_conf["output_activation"]
  48 +loss=ae_conf["loss"]
  49 +epochs=ae_conf["epochs"]
  50 +batch_size=ae_conf["batch"]
  51 +patience=ae_conf["patience"]
  52 +dropouts=ae_conf["do"]
  53 +try:
  54 + k = ae_conf["sgd"]
  55 + if ae_conf["sgd"]["name"] == "adam":
  56 + sgd = Adam(lr=ae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  57 + elif ae_conf["sgd"]["name"] == "sgd":
  58 + sgd = SGD(lr=ae_conf["sgd"]["lr"])
  59 +except:
  60 + sgd = ae_conf["sgd"]
  61 +
  62 +mlp_conf = json_conf["mlp"]
  63 +mlp_h = mlp_conf["hidden_size"]
  64 +mlp_loss = mlp_conf["loss"]
  65 +mlp_dropouts = mlp_conf["do"]
  66 +mlp_epochs = mlp_conf["epochs"]
  67 +mlp_batch_size = mlp_conf["batch"]
  68 +mlp_input_activation=mlp_conf["input_activation"]
  69 +mlp_output_activation=mlp_conf["output_activation"]
  70 +
  71 +try:
  72 + k = mlp_conf["sgd"]
  73 + if mlp_conf["sgd"]["name"] == "adam":
  74 + mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  75 + elif mlp_conf["sgd"]["name"] == "sgd":
  76 + mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"])
  77 +except:
  78 + mlp_sgd = mlp_conf["sgd"]
  79 +
  80 +
  81 +name = json_conf["name"]
  82 +try :
  83 + os.mkdir("{}/{}".format(in_dir,name))
  84 +except OSError :
  85 + pass
  86 +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
  87 +db["IDS"]=dict(infer_model["LABEL"])
  88 +#
  89 +keys = infer_model[features_key].keys()
  90 +LABELS = {}
  91 +for mod in keys :
  92 +
  93 + int_labels_train = map(select,infer_model["LABEL"][mod]["TRAIN"])
  94 + binarizer = LabelBinarizer()
  95 + y_train=binarizer.fit_transform(int_labels_train)
  96 + y_dev=binarizer.transform(map(select,infer_model["LABEL"][mod]["DEV"]))
  97 + y_test=binarizer.transform(map(select,infer_model["LABEL"][mod]["TEST"]))
  98 + LABELS[mod]= { "TRAIN":y_train , "DEV" : y_dev, "TEST" : y_test}
  99 + sumary,proj = train_mlp_proj(infer_model[features_key][mod]["TRAIN"].todense(),y_train,
  100 + infer_model[features_key][mod]["DEV"].todense(),y_dev,
  101 + infer_model[features_key][mod]["TEST"].todense(),y_test,
  102 + hidden_size ,sgd=sgd,
  103 + epochs=epochs,
  104 + patience=patience,
  105 + batch_size=batch_size,
  106 + input_activation=input_activation,
  107 + output_activation=output_activation,
  108 + dropouts=dropouts,
  109 + fit_verbose=1)
  110 + with open("{}/{}/{}_sum.txt".format(in_dir,name,mod),"w") as output_sum :
  111 + print >>output_sum, sumary
  112 + for num_lvl,level in enumerate(proj):
  113 + print len(level)
  114 + for num,corp_type in enumerate(["TRAIN","DEV","TEST"]):
  115 + pd = pandas.DataFrame(level[num])
  116 + pd.to_hdf("{}/{}/MLP_proj_df.hdf".format(in_dir,name),"{}/lvl{}/{}".format(mod,num_lvl,corp_type))
  117 +db["LABEL"] = LABELS
  118 +db.sync()
  119 +db.close()
BOTTLENECK/02a-mlp_score_on_BN.py
  1 +
  2 +# coding: utf-8
  3 +
  4 +# In[2]:
  5 +
  6 +# Import
  7 +import gensim
  8 +from scipy import sparse
  9 +import itertools
  10 +from sklearn import preprocessing
  11 +from keras.models import Sequential
  12 +from keras.optimizers import SGD,Adam
  13 +from keras.layers.advanced_activations import ELU,PReLU
  14 +from keras.callbacks import ModelCheckpoint
  15 +from mlp import *
  16 +import sklearn.metrics
  17 +from sklearn.preprocessing import LabelBinarizer
  18 +import shelve
  19 +import pickle
  20 +from utils import *
  21 +import sys
  22 +import os
  23 +import json
  24 +# In[4]:
  25 +
  26 +in_dir = sys.argv[1]
  27 +#['ASR', 'TRS', 'LABEL']
  28 +# In[6]:
  29 +json_conf =json.load(open(sys.argv[2]))
  30 +
  31 +mlp_conf = json_conf["mlp"]
  32 +hidden_size = mlp_conf["hidden_size"]
  33 +loss = mlp_conf["loss"]
  34 +patience = mlp_conf["patience"]
  35 +dropouts = mlp_conf["do"]
  36 +epochs = mlp_conf["epochs"]
  37 +batch_size = mlp_conf["batch"]
  38 +input_activation=mlp_conf["input_activation"]
  39 +output_activation=mlp_conf["output_activation"]
  40 +
  41 +try:
  42 + k = mlp_conf["sgd"]
  43 + if mlp_conf["sgd"]["name"] == "adam":
  44 + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  45 + elif mlp_conf["sgd"]["name"] == "sgd":
  46 + sgd = SGD(lr=mlp_conf["sgd"]["lr"])
  47 +except:
  48 + sgd = mlp_conf["sgd"]
  49 +name = json_conf["name"]
  50 +
  51 +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
  52 +shelve_logs=shelve.open("{}/{}/02a_logs.shelve".format(in_dir,name))
  53 +
  54 +#
  55 +keys = db["LABEL"].keys()
  56 +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name))
  57 +hdf_keys = proj_hdf.keys()
  58 +proj_hdf.close()
  59 +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ])
  60 +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ])
  61 +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
  62 +print hdf_mods
  63 +print hdf_lvl
  64 +print hdf_crossval
  65 +
  66 +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name)
  67 +labels_dict = {"origine":{} }
  68 +logs = {}
  69 +for lvl in hdf_lvl :
  70 + labels_dict[lvl] = {}
  71 + for mod in hdf_mods:
  72 + labels_dict[lvl][mod] = {}
  73 +
  74 +for mod in hdf_mods:
  75 + for lvl in hdf_lvl :
  76 + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN"))
  77 + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV"))
  78 + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST"))
  79 + if x_train.shape[1] <= 8 :
  80 + labels_dict["origine"]["TRAIN"] = np.argmax(x_train.values,axis=1)
  81 + labels_dict["origine"]["DEV"] = np.argmax(x_dev.values,axis=1)
  82 + labels_dict["origine"]["TEST"] = np.argmax(x_test.values,axis=1)
  83 + continue
  84 + y_train = db["LABEL"][mod]["TRAIN"]
  85 + y_dev = db["LABEL"][mod]["DEV"]
  86 + y_test = db["LABEL"][mod]["TEST"]
  87 +
  88 + print x_train.shape
  89 + print x_dev.shape
  90 + print x_test.shape
  91 + print y_train.shape
  92 + print y_dev.shape
  93 + print y_test.shape
  94 + pred,hist = train_mlp_pred(x_train.values,y_train,
  95 + x_dev.values,y_dev,
  96 + x_test.values,y_test,
  97 + hidden_size ,sgd=sgd,
  98 + epochs=epochs,
  99 + patience=patience,
  100 + batch_size=batch_size,
  101 + input_activation=input_activation,
  102 + output_activation=output_activation,
  103 + dropouts=dropouts,
  104 + fit_verbose=1)
  105 + shelve_logs["{}/{}".format(mod,lvl)] = hist
  106 + labels_dict[lvl][mod]["TRAIN"] = np.argmax(pred[0],axis=1)
  107 + labels_dict[lvl][mod]["DEV"] = np.argmax(pred[1],axis=1)
  108 + labels_dict[lvl][mod]["TEST"] = np.argmax(pred[2],axis=1)
  109 +
  110 +for lvl in hdf_lvl:
  111 + db[lvl] = labels_dict[lvl]
  112 +shelve_logs.sync()
  113 +shelve_logs.close()
  114 +db.sync()
  115 +db.close()
BOTTLENECK/02b-transfert_ae.py
  1 +
  2 +# coding: utf-8
  3 +
  4 +# In[2]:
  5 +
  6 +# Import
  7 +import gensim
  8 +from scipy import sparse
  9 +import itertools
  10 +from sklearn import preprocessing
  11 +from keras.models import Sequential
  12 +from keras.optimizers import SGD,Adam
  13 +from keras.layers.advanced_activations import ELU,PReLU
  14 +from keras.callbacks import ModelCheckpoint
  15 +from mlp import *
  16 +import pandas as pd
  17 +import sklearn.metrics
  18 +from sklearn.preprocessing import LabelBinarizer
  19 +import shelve
  20 +import pickle
  21 +from utils import *
  22 +import sys
  23 +import os
  24 +import json
  25 +# In[4]:
  26 +
  27 +in_dir = sys.argv[1]
  28 +#['ASR', 'TRS', 'LABEL']
  29 +# In[6]:
  30 +json_conf =json.load(open(sys.argv[2]))
  31 +
  32 +mlp_conf = json_conf["transfert"]
  33 +hidden_size = mlp_conf["hidden_size"]
  34 +loss = mlp_conf["loss"]
  35 +patience = mlp_conf["patience"]
  36 +dropouts = mlp_conf["do"]
  37 +epochs = mlp_conf["epochs"]
  38 +batch_size = mlp_conf["batch"]
  39 +input_activation=mlp_conf["input_activation"]
  40 +output_activation=mlp_conf["output_activation"]
  41 +
  42 +try:
  43 + k = mlp_conf["sgd"]
  44 + if mlp_conf["sgd"]["name"] == "adam":
  45 + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  46 + elif mlp_conf["sgd"]["name"] == "sgd":
  47 + sgd = SGD(lr=mlp_conf["sgd"]["lr"])
  48 +except:
  49 + sgd = mlp_conf["sgd"]
  50 +name = json_conf["name"]
  51 +
  52 +#
  53 +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name))
  54 +hdf_keys = proj_hdf.keys()
  55 +proj_hdf.close()
  56 +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ])
  57 +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ])
  58 +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
  59 +print hdf_mods
  60 +print hdf_lvl
  61 +print hdf_crossval
  62 +
  63 +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name)
  64 +transfert_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name)
  65 +mod1,mod2 = "ASR","TRS"
  66 +for lvl in hdf_lvl :
  67 + x_train_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TRAIN"))
  68 + x_dev_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"DEV"))
  69 + x_test_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TEST"))
  70 + x_train_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TRAIN"))
  71 + x_dev_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"DEV"))
  72 + x_test_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TEST"))
  73 +
  74 + if x_train_ASR.shape[1] <= 8 :
  75 + continue
  76 +
  77 + pred = train_ae(x_train_ASR.values,
  78 + x_dev_ASR.values,
  79 + x_test_ASR.values,
  80 + hidden_size ,sgd=sgd,
  81 + y_train=x_train_TRS.values,
  82 + y_dev=x_dev_TRS.values,
  83 + y_test=x_test_TRS.values,
  84 + epochs=epochs,
  85 + patience=patience,
  86 + batch_size=batch_size,
  87 + input_activation=input_activation,
  88 + output_activation=output_activation,
  89 + dropouts=dropouts,
  90 + best_mod=True,
  91 + verbose=1)
  92 + for num_layer,layer in enumerate(pred):
  93 + transfert_train = pd.DataFrame(layer[0])
  94 + transfert_dev = pd.DataFrame(layer[1])
  95 + transfert_test = pd.DataFrame(layer[2])
  96 + transfert_train.to_hdf(transfert_proj_path,"{}/{}/TRAIN".format(lvl,"layer"+str(num_layer)))
  97 + transfert_dev.to_hdf(transfert_proj_path,"{}/{}/DEV".format(lvl,"layer"+str(num_layer)))
  98 + transfert_test.to_hdf(transfert_proj_path,"{}/{}/TEST".format(lvl,"layer"+str(num_layer)))
  99 +
BOTTLENECK/02c-tsne_mlproj.py
  1 +
  2 +# coding: utf-8
  3 +
  4 +# In[2]:
  5 +
  6 +# Import
  7 +import gensim
  8 +from scipy import sparse
  9 +import itertools
  10 +from sklearn import preprocessing
  11 +from keras.models import Sequential
  12 +from keras.optimizers import SGD,Adam
  13 +from keras.layers.advanced_activations import ELU,PReLU
  14 +from keras.callbacks import ModelCheckpoint
  15 +from mlp import *
  16 +import pandas as pd
  17 +import sklearn.metrics
  18 +from sklearn.preprocessing import LabelBinarizer
  19 +from sklearn.manifold import TSNE
  20 +import shelve
  21 +import pickle
  22 +from utils import *
  23 +import sys
  24 +import os
  25 +import json
  26 +# In[4]:
  27 +
  28 +in_dir = sys.argv[1]
  29 +#['ASR', 'TRS', 'LABEL']
  30 +# In[6]:
  31 +json_conf =json.load(open(sys.argv[2]))
  32 +
  33 +mlp_conf = json_conf["transfert"]
  34 +hidden_size = mlp_conf["hidden_size"]
  35 +loss = mlp_conf["loss"]
  36 +patience = mlp_conf["patience"]
  37 +dropouts = mlp_conf["do"]
  38 +epochs = mlp_conf["epochs"]
  39 +batch_size = mlp_conf["batch"]
  40 +input_activation=mlp_conf["input_activation"]
  41 +output_activation=mlp_conf["output_activation"]
  42 +
  43 +try:
  44 + k = mlp_conf["sgd"]
  45 + if mlp_conf["sgd"]["name"] == "adam":
  46 + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  47 + elif mlp_conf["sgd"]["name"] == "sgd":
  48 + sgd = SGD(lr=mlp_conf["sgd"]["lr"])
  49 +except:
  50 + sgd = mlp_conf["sgd"]
  51 +name = json_conf["name"]
  52 +
  53 +#
  54 +print " MLP"
  55 +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name))
  56 +hdf_keys = proj_hdf.keys()
  57 +proj_hdf.close()
  58 +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ])
  59 +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ])
  60 +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
  61 +print hdf_mods
  62 +print hdf_lvl
  63 +print hdf_crossval
  64 +
  65 +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name)
  66 +tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name)
  67 +for mod in hdf_mods:
  68 + for lvl in hdf_lvl :
  69 + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN"))
  70 + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV"))
  71 + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST"))
  72 +
  73 + if x_train.shape[1] <= 8 :
  74 + continue
  75 + tsne= TSNE()
  76 + tsne_train=tsne.fit_transform(x_train.values)
  77 + pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TRAIN"))
  78 + tsne= TSNE()
  79 + tsne_dev=tsne.fit_transform(x_dev.values)
  80 + pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"DEV"))
  81 + tsne= TSNE()
  82 + tsne_test=tsne.fit_transform(x_test.values)
  83 + pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TEST"))
  84 + tsne = TSNE()
  85 + tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values)
  86 + pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"CONCAT"))
  87 +
  88 +print " TRANSFERT"
  89 +
  90 +hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name)
  91 +proj_hdf = pandas.HDFStore(hdf_proj_path)
  92 +print proj_hdf
  93 +hdf_keys = proj_hdf.keys()
  94 +proj_hdf.close()
  95 +print hdf_keys
  96 +hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ])
  97 +hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ])
  98 +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
  99 +print hdf_lvl
  100 +print hdf_layer
  101 +print hdf_crossval
  102 +
  103 +tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name)
  104 +for lvl in hdf_lvl :
  105 + for layer in hdf_layer:
  106 + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN"))
  107 + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV"))
  108 + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TEST"))
  109 +
  110 + if x_train.shape[1] <= 8 :
  111 + continue
  112 + tsne= TSNE()
  113 + tsne_train=tsne.fit_transform(x_train.values)
  114 + pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TRAIN"))
  115 + tsne= TSNE()
  116 + tsne_dev=tsne.fit_transform(x_dev.values)
  117 + pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"DEV"))
  118 + tsne= TSNE()
  119 + tsne_test=tsne.fit_transform(x_test.values)
  120 + pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TEST"))
  121 + tsne = TSNE()
  122 + tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values)
  123 + pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"CONCAT"))
BOTTLENECK/03-mlp_score_on_transfert.py
  1 +
  2 +# coding: utf-8
  3 +
  4 +# In[2]:
  5 +
  6 +# Import
  7 +import gensim
  8 +from scipy import sparse
  9 +import itertools
  10 +from sklearn import preprocessing
  11 +from keras.models import Sequential
  12 +from keras.optimizers import SGD,Adam
  13 +from keras.layers.advanced_activations import ELU,PReLU
  14 +from keras.callbacks import ModelCheckpoint
  15 +from mlp import *
  16 +import sklearn.metrics
  17 +from sklearn.preprocessing import LabelBinarizer
  18 +import shelve
  19 +import pickle
  20 +from utils import *
  21 +import sys
  22 +import os
  23 +import json
  24 +# In[4]:
  25 +
  26 +in_dir = sys.argv[1]
  27 +#['ASR', 'TRS', 'LABEL']
  28 +# In[6]:
  29 +json_conf =json.load(open(sys.argv[2]))
  30 +
  31 +mlp_conf = json_conf["mlp"]
  32 +hidden_size = mlp_conf["hidden_size"]
  33 +loss = mlp_conf["loss"]
  34 +patience = mlp_conf["patience"]
  35 +dropouts = mlp_conf["do"]
  36 +epochs = mlp_conf["epochs"]
  37 +batch_size = mlp_conf["batch"]
  38 +input_activation=mlp_conf["input_activation"]
  39 +output_activation=mlp_conf["output_activation"]
  40 +
  41 +try:
  42 + k = mlp_conf["sgd"]
  43 + if mlp_conf["sgd"]["name"] == "adam":
  44 + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  45 + elif mlp_conf["sgd"]["name"] == "sgd":
  46 + sgd = SGD(lr=mlp_conf["sgd"]["lr"])
  47 +except:
  48 + sgd = mlp_conf["sgd"]
  49 +name = json_conf["name"]
  50 +
  51 +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
  52 +shelve_logs=shelve.open("{}/{}/03_logs.shelve".format(in_dir,name),writeback=True)
  53 +
  54 +#
  55 +keys = db["LABEL"].keys()
  56 +
  57 +hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name)
  58 +proj_hdf = pandas.HDFStore(hdf_proj_path)
  59 +hdf_keys = proj_hdf.keys()
  60 +print hdf_keys
  61 +proj_hdf.close()
  62 +hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ])
  63 +hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ])
  64 +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
  65 +print hdf_lvl
  66 +print hdf_crossval
  67 +
  68 +labels_dict = { }
  69 +logs = {}
  70 +for lvl in hdf_lvl :
  71 + labels_dict[lvl] = {}
  72 + for layer in hdf_layer:
  73 + labels_dict[lvl][layer] = {}
  74 +
  75 +for lvl in hdf_lvl :
  76 + for layer in hdf_layer:
  77 + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN"))
  78 + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV"))
  79 + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer, "TEST"))
  80 +
  81 + y_train = db["LABEL"]["ASR"]["TRAIN"]
  82 + y_dev = db["LABEL"]["ASR"]["DEV"]
  83 + y_test = db["LABEL"]["ASR"]["TEST"]
  84 +
  85 + print x_train.shape
  86 + print x_dev.shape
  87 + print x_test.shape
  88 + print y_train.shape
  89 + print y_dev.shape
  90 + print y_test.shape
  91 + pred,hist = train_mlp_pred(x_train.values,y_train,
  92 + x_dev.values,y_dev,
  93 + x_test.values,y_test,
  94 + hidden_size ,sgd=sgd,
  95 + epochs=epochs,
  96 + patience=patience,
  97 + batch_size=batch_size,
  98 + input_activation=input_activation,
  99 + output_activation=output_activation,
  100 + dropouts=dropouts,
  101 + fit_verbose=1)
  102 + shelve_logs["{}/{}".format(lvl,layer)] = hist
  103 + labels_dict[lvl][layer]["TRAIN"] = np.argmax(pred[0],axis=1)
  104 + labels_dict[lvl][layer]["DEV"] = np.argmax(pred[1],axis=1)
  105 + labels_dict[lvl][layer]["TEST"] = np.argmax(pred[2],axis=1)
  106 +
  107 +db["transfert"] = labels_dict
  108 +shelve_logs.sync()
  109 +shelve_logs.close()
  110 +db.sync()
  111 +db.close()
BOTTLENECK/04-accuracyscore.py
  1 +
  2 +# coding: utf-8
  3 +
  4 +# In[2]:
  5 +
  6 +# Import
  7 +import gensim
  8 +from scipy import sparse
  9 +import numpy as np
  10 +import itertools
  11 +from sklearn import preprocessing
  12 +from keras.models import Sequential
  13 +from keras.optimizers import SGD,Adam
  14 +from keras.layers.advanced_activations import ELU,PReLU
  15 +from keras.callbacks import ModelCheckpoint
  16 +from mlp import *
  17 +from sklearn import metrics
  18 +from sklearn.preprocessing import LabelBinarizer
  19 +import shelve
  20 +import pickle
  21 +from utils import *
  22 +import sys
  23 +import os
  24 +import json
  25 +
  26 +# In[4]:
  27 +
  28 +in_dir = sys.argv[1]
  29 +#['ASR', 'TRS', 'LABEL']
  30 +# In[6]:
  31 +json_conf =json.load(open(sys.argv[2]))
  32 +
  33 +name = json_conf["name"]
  34 +
  35 +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
  36 +#
  37 +keys = sorted(db.keys())
  38 +keys.remove("IDS")
  39 +keys.remove("transfert")
  40 +keys.remove("LABEL")
  41 +mods = ["ASR", "TRS"]
  42 +ref_train = db["LABEL"]["ASR"]["TRAIN"]
  43 +ref_dev = db["LABEL"]["ASR"]["DEV"]
  44 +ref_test = db["LABEL"]["ASR"]["TEST"]
  45 +
  46 +print "name,MOD,level,train,dev,test"
  47 +for mod in mods :
  48 + for lvl in keys :
  49 + if "TEST" in db[lvl][mod] :
  50 + train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db[lvl][mod]["TRAIN"])
  51 + dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db[lvl][mod]["DEV"])
  52 + test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db[lvl][mod]["TEST"])
  53 + else :
  54 + train_score = "ERROR"
  55 + dev_score = "ERROR"
  56 + test_score = "ERROR"
  57 + print ",".join([name,mod, lvl, str(train_score), str(dev_score) , str(test_score)])
  58 +
  59 +for level in db["transfert"].keys() :
  60 + for layer in db["transfert"][level].keys():
  61 + if "TRAIN" in db["transfert"][level][layer].keys():
  62 +
  63 + train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db["transfert"][level][layer]["TRAIN"])
  64 + dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db["transfert"][level][layer]["DEV"])
  65 + test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db["transfert"][level][layer]["TEST"])
  66 + print ",".join([name,"transfert",level+"/"+layer, str(train_score), str(dev_score) , str(test_score)])
  67 +
  68 +db.close()
  1 +../LDA/mlp.py
BOTTLENECK/run01_do_alljson.sh
  1 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0.json RAW
  2 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0do.json RAW
  3 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1.json RAW
  4 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1do.json RAW
  5 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2.json RAW
  6 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2do.json RAW
  7 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3.json RAW
  8 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3do.json RAW
BOTTLENECK/run02_mlpscore.sh
  1 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0.json
  2 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0do.json
  3 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1.json
  4 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1do.json
  5 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2.json
  6 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2do.json
  7 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3.json
  8 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3do.json
BOTTLENECK/run02b-transfert.sh
  1 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0.json
  2 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0do.json
  3 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1.json
  4 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1do.json
  5 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2.json
  6 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2do.json
  7 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3.json
  8 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3do.json
BOTTLENECK/run03_tsne_MLPtransfert.sh
  1 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0.json
  2 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0do.json
  3 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1.json
  4 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1do.json
  5 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2.json
  6 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2do.json
  7 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3.json
  8 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3do.json
BOTTLENECK/run04-mlp_on_transfert.sh
  1 +#THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1.json
  2 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1do.json
  3 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2.json
  4 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2do.json
  5 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3.json
  6 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3do.json
  7 +
  8 +#THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0.json
  9 +
  10 +#THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0do.json
BOTTLENECK/run05_accuracy.sh
  1 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1.json
  2 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1do.json
  3 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2.json
  4 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2do.json
  5 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3.json
  6 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3do.json
  7 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0.json
  8 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0do.json
BOTTLENECK/run_all.sh
  1 +bash run_one.sh output_3 output_3/L0do.json gpu0 &
  2 +bash run_one.sh output_3 output_3/L0.json gpu1 &
  3 +bash run_one.sh output_3 output_3/L1do.json gpu0 &
  4 +bash run_one.sh output_3 output_3/L1.json gpu1 &
  5 +wait
  6 +bash run_one.sh output_3 output_3/L2do.json gpu0 &
  7 +bash run_one.sh output_3 output_3/L2.json gpu1 &
  8 +bash run_one.sh output_3 output_3/L3bndo.json gpu0 &
  9 +bash run_one.sh output_3 output_3/L3ce1.json gpu1 &
  10 +wait
  11 +bash run_one.sh output_3 output_3/L3ce.json gpu0 &
  12 +bash run_one.sh output_3 output_3/L3do.json gpu1 &
  13 +bash run_one.sh output_3 output_3/L3.json gpu0 &
  14 +bash run_one.sh output_3 output_3/L3sigmo.json gpu1 &
  15 +wait
  16 +bash run_one.sh output_3 output_3/L4do.json gpu0 &
  17 +bash run_one.sh output_3 output_3/L5do.json gpu1 &
  18 +bash run_one.sh output_3 output_3/L6do.json gpu0 &
  19 +bash run_one.sh output_3 output_3/L7do.json gpu1 &
  20 +wait
  21 +bash run_one.sh output_3 output_3/MaxMLP.json gpu0 &
  22 +bash run_one.sh output_3 output_3/MinMLP.json gpu1 &
BOTTLENECK/run_one.sh
  1 +bn=$(basename $2)
  2 +time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 01a-mlp_proj.py $1 Sparse_tfidf2.shelve $2 RAW) 2>> logs/${bn}_time ; echo MLP_$2 >> logs/${bn}_time
  3 +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02a-mlp_score_on_BN.py $1 $2
  4 +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02b-transfert_ae.py $1 $2
  5 +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02c-tsne_mlproj.py $1 $2
  6 +time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 03-mlp_score_on_transfert.py $1 $2) 2>> logs/${bn}_time ; echo transfert_$2 >> logs/${bn}_time
  7 +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 04-accuracyscore.py $1 $2 >> $1/res.csv
  1 +../utils.py