Compare View

switch
from
...
to
 
Commits (3)

Changes

Showing 17 changed files Side-by-side Diff

BOTTLENECK/01a-mlp_proj.py
... ... @@ -0,0 +1,119 @@
  1 +
  2 +# coding: utf-8
  3 +
  4 +# In[2]:
  5 +
  6 +# Import
  7 +import gensim
  8 +from scipy import sparse
  9 +import itertools
  10 +from sklearn import preprocessing
  11 +from keras.models import Sequential
  12 +from keras.optimizers import SGD,Adam
  13 +from keras.layers.advanced_activations import ELU,PReLU
  14 +from keras.callbacks import ModelCheckpoint
  15 +from mlp import *
  16 +import sklearn.metrics
  17 +from sklearn.preprocessing import LabelBinarizer
  18 +import shelve
  19 +import pickle
  20 +from utils import *
  21 +import sys
  22 +import os
  23 +import json
  24 +# In[4]:
  25 +
  26 +infer_model=shelve.open("{}".format(sys.argv[2]))
  27 +in_dir = sys.argv[1]
  28 +#['ASR', 'TRS', 'LABEL']
  29 +# In[6]:
  30 +if len(sys.argv) > 4 :
  31 + features_key = sys.argv[4]
  32 +else :
  33 + features_key = "LDA"
  34 +save_projection = True
  35 +json_conf =json.load(open(sys.argv[3]))
  36 +ae_conf = json_conf["mlp_proj"]
  37 +
  38 +hidden_size= ae_conf["hidden_size"]
  39 +input_activation = None
  40 +if ae_conf["input_activation"] == "elu":
  41 + print " ELU"
  42 + input_activation = PReLU()
  43 +else:
  44 + print " ELSE"
  45 + input_activation = ae_conf["input_activation"]
  46 +#input_activation=ae_conf["input_activation"]
  47 +output_activation=ae_conf["output_activation"]
  48 +loss=ae_conf["loss"]
  49 +epochs=ae_conf["epochs"]
  50 +batch_size=ae_conf["batch"]
  51 +patience=ae_conf["patience"]
  52 +dropouts=ae_conf["do"]
  53 +try:
  54 + k = ae_conf["sgd"]
  55 + if ae_conf["sgd"]["name"] == "adam":
  56 + sgd = Adam(lr=ae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  57 + elif ae_conf["sgd"]["name"] == "sgd":
  58 + sgd = SGD(lr=ae_conf["sgd"]["lr"])
  59 +except:
  60 + sgd = ae_conf["sgd"]
  61 +
  62 +mlp_conf = json_conf["mlp"]
  63 +mlp_h = mlp_conf["hidden_size"]
  64 +mlp_loss = mlp_conf["loss"]
  65 +mlp_dropouts = mlp_conf["do"]
  66 +mlp_epochs = mlp_conf["epochs"]
  67 +mlp_batch_size = mlp_conf["batch"]
  68 +mlp_input_activation=mlp_conf["input_activation"]
  69 +mlp_output_activation=mlp_conf["output_activation"]
  70 +
  71 +try:
  72 + k = mlp_conf["sgd"]
  73 + if mlp_conf["sgd"]["name"] == "adam":
  74 + mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  75 + elif mlp_conf["sgd"]["name"] == "sgd":
  76 + mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"])
  77 +except:
  78 + mlp_sgd = mlp_conf["sgd"]
  79 +
  80 +
  81 +name = json_conf["name"]
  82 +try :
  83 + os.mkdir("{}/{}".format(in_dir,name))
  84 +except OSError :
  85 + pass
  86 +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
  87 +db["IDS"]=dict(infer_model["LABEL"])
  88 +#
  89 +keys = infer_model[features_key].keys()
  90 +LABELS = {}
  91 +for mod in keys :
  92 +
  93 + int_labels_train = map(select,infer_model["LABEL"][mod]["TRAIN"])
  94 + binarizer = LabelBinarizer()
  95 + y_train=binarizer.fit_transform(int_labels_train)
  96 + y_dev=binarizer.transform(map(select,infer_model["LABEL"][mod]["DEV"]))
  97 + y_test=binarizer.transform(map(select,infer_model["LABEL"][mod]["TEST"]))
  98 + LABELS[mod]= { "TRAIN":y_train , "DEV" : y_dev, "TEST" : y_test}
  99 + sumary,proj = train_mlp_proj(infer_model[features_key][mod]["TRAIN"].todense(),y_train,
  100 + infer_model[features_key][mod]["DEV"].todense(),y_dev,
  101 + infer_model[features_key][mod]["TEST"].todense(),y_test,
  102 + hidden_size ,sgd=sgd,
  103 + epochs=epochs,
  104 + patience=patience,
  105 + batch_size=batch_size,
  106 + input_activation=input_activation,
  107 + output_activation=output_activation,
  108 + dropouts=dropouts,
  109 + fit_verbose=1)
  110 + with open("{}/{}/{}_sum.txt".format(in_dir,name,mod),"w") as output_sum :
  111 + print >>output_sum, sumary
  112 + for num_lvl,level in enumerate(proj):
  113 + print len(level)
  114 + for num,corp_type in enumerate(["TRAIN","DEV","TEST"]):
  115 + pd = pandas.DataFrame(level[num])
  116 + pd.to_hdf("{}/{}/MLP_proj_df.hdf".format(in_dir,name),"{}/lvl{}/{}".format(mod,num_lvl,corp_type))
  117 +db["LABEL"] = LABELS
  118 +db.sync()
  119 +db.close()
BOTTLENECK/02a-mlp_score_on_BN.py
... ... @@ -0,0 +1,115 @@
  1 +
  2 +# coding: utf-8
  3 +
  4 +# In[2]:
  5 +
  6 +# Import
  7 +import gensim
  8 +from scipy import sparse
  9 +import itertools
  10 +from sklearn import preprocessing
  11 +from keras.models import Sequential
  12 +from keras.optimizers import SGD,Adam
  13 +from keras.layers.advanced_activations import ELU,PReLU
  14 +from keras.callbacks import ModelCheckpoint
  15 +from mlp import *
  16 +import sklearn.metrics
  17 +from sklearn.preprocessing import LabelBinarizer
  18 +import shelve
  19 +import pickle
  20 +from utils import *
  21 +import sys
  22 +import os
  23 +import json
  24 +# In[4]:
  25 +
  26 +in_dir = sys.argv[1]
  27 +#['ASR', 'TRS', 'LABEL']
  28 +# In[6]:
  29 +json_conf =json.load(open(sys.argv[2]))
  30 +
  31 +mlp_conf = json_conf["mlp"]
  32 +hidden_size = mlp_conf["hidden_size"]
  33 +loss = mlp_conf["loss"]
  34 +patience = mlp_conf["patience"]
  35 +dropouts = mlp_conf["do"]
  36 +epochs = mlp_conf["epochs"]
  37 +batch_size = mlp_conf["batch"]
  38 +input_activation=mlp_conf["input_activation"]
  39 +output_activation=mlp_conf["output_activation"]
  40 +
  41 +try:
  42 + k = mlp_conf["sgd"]
  43 + if mlp_conf["sgd"]["name"] == "adam":
  44 + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  45 + elif mlp_conf["sgd"]["name"] == "sgd":
  46 + sgd = SGD(lr=mlp_conf["sgd"]["lr"])
  47 +except:
  48 + sgd = mlp_conf["sgd"]
  49 +name = json_conf["name"]
  50 +
  51 +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
  52 +shelve_logs=shelve.open("{}/{}/02a_logs.shelve".format(in_dir,name))
  53 +
  54 +#
  55 +keys = db["LABEL"].keys()
  56 +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name))
  57 +hdf_keys = proj_hdf.keys()
  58 +proj_hdf.close()
  59 +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ])
  60 +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ])
  61 +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
  62 +print hdf_mods
  63 +print hdf_lvl
  64 +print hdf_crossval
  65 +
  66 +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name)
  67 +labels_dict = {"origine":{} }
  68 +logs = {}
  69 +for lvl in hdf_lvl :
  70 + labels_dict[lvl] = {}
  71 + for mod in hdf_mods:
  72 + labels_dict[lvl][mod] = {}
  73 +
  74 +for mod in hdf_mods:
  75 + for lvl in hdf_lvl :
  76 + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN"))
  77 + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV"))
  78 + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST"))
  79 + if x_train.shape[1] <= 8 :
  80 + labels_dict["origine"]["TRAIN"] = np.argmax(x_train.values,axis=1)
  81 + labels_dict["origine"]["DEV"] = np.argmax(x_dev.values,axis=1)
  82 + labels_dict["origine"]["TEST"] = np.argmax(x_test.values,axis=1)
  83 + continue
  84 + y_train = db["LABEL"][mod]["TRAIN"]
  85 + y_dev = db["LABEL"][mod]["DEV"]
  86 + y_test = db["LABEL"][mod]["TEST"]
  87 +
  88 + print x_train.shape
  89 + print x_dev.shape
  90 + print x_test.shape
  91 + print y_train.shape
  92 + print y_dev.shape
  93 + print y_test.shape
  94 + pred,hist = train_mlp_pred(x_train.values,y_train,
  95 + x_dev.values,y_dev,
  96 + x_test.values,y_test,
  97 + hidden_size ,sgd=sgd,
  98 + epochs=epochs,
  99 + patience=patience,
  100 + batch_size=batch_size,
  101 + input_activation=input_activation,
  102 + output_activation=output_activation,
  103 + dropouts=dropouts,
  104 + fit_verbose=1)
  105 + shelve_logs["{}/{}".format(mod,lvl)] = hist
  106 + labels_dict[lvl][mod]["TRAIN"] = np.argmax(pred[0],axis=1)
  107 + labels_dict[lvl][mod]["DEV"] = np.argmax(pred[1],axis=1)
  108 + labels_dict[lvl][mod]["TEST"] = np.argmax(pred[2],axis=1)
  109 +
  110 +for lvl in hdf_lvl:
  111 + db[lvl] = labels_dict[lvl]
  112 +shelve_logs.sync()
  113 +shelve_logs.close()
  114 +db.sync()
  115 +db.close()
BOTTLENECK/02b-transfert_ae.py
... ... @@ -0,0 +1,99 @@
  1 +
  2 +# coding: utf-8
  3 +
  4 +# In[2]:
  5 +
  6 +# Import
  7 +import gensim
  8 +from scipy import sparse
  9 +import itertools
  10 +from sklearn import preprocessing
  11 +from keras.models import Sequential
  12 +from keras.optimizers import SGD,Adam
  13 +from keras.layers.advanced_activations import ELU,PReLU
  14 +from keras.callbacks import ModelCheckpoint
  15 +from mlp import *
  16 +import pandas as pd
  17 +import sklearn.metrics
  18 +from sklearn.preprocessing import LabelBinarizer
  19 +import shelve
  20 +import pickle
  21 +from utils import *
  22 +import sys
  23 +import os
  24 +import json
  25 +# In[4]:
  26 +
  27 +in_dir = sys.argv[1]
  28 +#['ASR', 'TRS', 'LABEL']
  29 +# In[6]:
  30 +json_conf =json.load(open(sys.argv[2]))
  31 +
  32 +mlp_conf = json_conf["transfert"]
  33 +hidden_size = mlp_conf["hidden_size"]
  34 +loss = mlp_conf["loss"]
  35 +patience = mlp_conf["patience"]
  36 +dropouts = mlp_conf["do"]
  37 +epochs = mlp_conf["epochs"]
  38 +batch_size = mlp_conf["batch"]
  39 +input_activation=mlp_conf["input_activation"]
  40 +output_activation=mlp_conf["output_activation"]
  41 +
  42 +try:
  43 + k = mlp_conf["sgd"]
  44 + if mlp_conf["sgd"]["name"] == "adam":
  45 + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  46 + elif mlp_conf["sgd"]["name"] == "sgd":
  47 + sgd = SGD(lr=mlp_conf["sgd"]["lr"])
  48 +except:
  49 + sgd = mlp_conf["sgd"]
  50 +name = json_conf["name"]
  51 +
  52 +#
  53 +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name))
  54 +hdf_keys = proj_hdf.keys()
  55 +proj_hdf.close()
  56 +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ])
  57 +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ])
  58 +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
  59 +print hdf_mods
  60 +print hdf_lvl
  61 +print hdf_crossval
  62 +
  63 +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name)
  64 +transfert_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name)
  65 +mod1,mod2 = "ASR","TRS"
  66 +for lvl in hdf_lvl :
  67 + x_train_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TRAIN"))
  68 + x_dev_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"DEV"))
  69 + x_test_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TEST"))
  70 + x_train_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TRAIN"))
  71 + x_dev_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"DEV"))
  72 + x_test_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TEST"))
  73 +
  74 + if x_train_ASR.shape[1] <= 8 :
  75 + continue
  76 +
  77 + pred = train_ae(x_train_ASR.values,
  78 + x_dev_ASR.values,
  79 + x_test_ASR.values,
  80 + hidden_size ,sgd=sgd,
  81 + y_train=x_train_TRS.values,
  82 + y_dev=x_dev_TRS.values,
  83 + y_test=x_test_TRS.values,
  84 + epochs=epochs,
  85 + patience=patience,
  86 + batch_size=batch_size,
  87 + input_activation=input_activation,
  88 + output_activation=output_activation,
  89 + dropouts=dropouts,
  90 + best_mod=True,
  91 + verbose=1)
  92 + for num_layer,layer in enumerate(pred):
  93 + transfert_train = pd.DataFrame(layer[0])
  94 + transfert_dev = pd.DataFrame(layer[1])
  95 + transfert_test = pd.DataFrame(layer[2])
  96 + transfert_train.to_hdf(transfert_proj_path,"{}/{}/TRAIN".format(lvl,"layer"+str(num_layer)))
  97 + transfert_dev.to_hdf(transfert_proj_path,"{}/{}/DEV".format(lvl,"layer"+str(num_layer)))
  98 + transfert_test.to_hdf(transfert_proj_path,"{}/{}/TEST".format(lvl,"layer"+str(num_layer)))
  99 +
BOTTLENECK/02c-tsne_mlproj.py
... ... @@ -0,0 +1,123 @@
  1 +
  2 +# coding: utf-8
  3 +
  4 +# In[2]:
  5 +
  6 +# Import
  7 +import gensim
  8 +from scipy import sparse
  9 +import itertools
  10 +from sklearn import preprocessing
  11 +from keras.models import Sequential
  12 +from keras.optimizers import SGD,Adam
  13 +from keras.layers.advanced_activations import ELU,PReLU
  14 +from keras.callbacks import ModelCheckpoint
  15 +from mlp import *
  16 +import pandas as pd
  17 +import sklearn.metrics
  18 +from sklearn.preprocessing import LabelBinarizer
  19 +from sklearn.manifold import TSNE
  20 +import shelve
  21 +import pickle
  22 +from utils import *
  23 +import sys
  24 +import os
  25 +import json
  26 +# In[4]:
  27 +
  28 +in_dir = sys.argv[1]
  29 +#['ASR', 'TRS', 'LABEL']
  30 +# In[6]:
  31 +json_conf =json.load(open(sys.argv[2]))
  32 +
  33 +mlp_conf = json_conf["transfert"]
  34 +hidden_size = mlp_conf["hidden_size"]
  35 +loss = mlp_conf["loss"]
  36 +patience = mlp_conf["patience"]
  37 +dropouts = mlp_conf["do"]
  38 +epochs = mlp_conf["epochs"]
  39 +batch_size = mlp_conf["batch"]
  40 +input_activation=mlp_conf["input_activation"]
  41 +output_activation=mlp_conf["output_activation"]
  42 +
  43 +try:
  44 + k = mlp_conf["sgd"]
  45 + if mlp_conf["sgd"]["name"] == "adam":
  46 + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  47 + elif mlp_conf["sgd"]["name"] == "sgd":
  48 + sgd = SGD(lr=mlp_conf["sgd"]["lr"])
  49 +except:
  50 + sgd = mlp_conf["sgd"]
  51 +name = json_conf["name"]
  52 +
  53 +#
  54 +print " MLP"
  55 +proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name))
  56 +hdf_keys = proj_hdf.keys()
  57 +proj_hdf.close()
  58 +hdf_mods = set([ x.split("/")[1] for x in hdf_keys ])
  59 +hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ])
  60 +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
  61 +print hdf_mods
  62 +print hdf_lvl
  63 +print hdf_crossval
  64 +
  65 +hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name)
  66 +tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name)
  67 +for mod in hdf_mods:
  68 + for lvl in hdf_lvl :
  69 + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN"))
  70 + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV"))
  71 + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST"))
  72 +
  73 + if x_train.shape[1] <= 8 :
  74 + continue
  75 + tsne= TSNE()
  76 + tsne_train=tsne.fit_transform(x_train.values)
  77 + pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TRAIN"))
  78 + tsne= TSNE()
  79 + tsne_dev=tsne.fit_transform(x_dev.values)
  80 + pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"DEV"))
  81 + tsne= TSNE()
  82 + tsne_test=tsne.fit_transform(x_test.values)
  83 + pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TEST"))
  84 + tsne = TSNE()
  85 + tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values)
  86 + pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"CONCAT"))
  87 +
  88 +print " TRANSFERT"
  89 +
  90 +hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name)
  91 +proj_hdf = pandas.HDFStore(hdf_proj_path)
  92 +print proj_hdf
  93 +hdf_keys = proj_hdf.keys()
  94 +proj_hdf.close()
  95 +print hdf_keys
  96 +hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ])
  97 +hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ])
  98 +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
  99 +print hdf_lvl
  100 +print hdf_layer
  101 +print hdf_crossval
  102 +
  103 +tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name)
  104 +for lvl in hdf_lvl :
  105 + for layer in hdf_layer:
  106 + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN"))
  107 + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV"))
  108 + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TEST"))
  109 +
  110 + if x_train.shape[1] <= 8 :
  111 + continue
  112 + tsne= TSNE()
  113 + tsne_train=tsne.fit_transform(x_train.values)
  114 + pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TRAIN"))
  115 + tsne= TSNE()
  116 + tsne_dev=tsne.fit_transform(x_dev.values)
  117 + pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"DEV"))
  118 + tsne= TSNE()
  119 + tsne_test=tsne.fit_transform(x_test.values)
  120 + pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TEST"))
  121 + tsne = TSNE()
  122 + tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values)
  123 + pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"CONCAT"))
BOTTLENECK/03-mlp_score_on_transfert.py
... ... @@ -0,0 +1,111 @@
  1 +
  2 +# coding: utf-8
  3 +
  4 +# In[2]:
  5 +
  6 +# Import
  7 +import gensim
  8 +from scipy import sparse
  9 +import itertools
  10 +from sklearn import preprocessing
  11 +from keras.models import Sequential
  12 +from keras.optimizers import SGD,Adam
  13 +from keras.layers.advanced_activations import ELU,PReLU
  14 +from keras.callbacks import ModelCheckpoint
  15 +from mlp import *
  16 +import sklearn.metrics
  17 +from sklearn.preprocessing import LabelBinarizer
  18 +import shelve
  19 +import pickle
  20 +from utils import *
  21 +import sys
  22 +import os
  23 +import json
  24 +# In[4]:
  25 +
  26 +in_dir = sys.argv[1]
  27 +#['ASR', 'TRS', 'LABEL']
  28 +# In[6]:
  29 +json_conf =json.load(open(sys.argv[2]))
  30 +
  31 +mlp_conf = json_conf["mlp"]
  32 +hidden_size = mlp_conf["hidden_size"]
  33 +loss = mlp_conf["loss"]
  34 +patience = mlp_conf["patience"]
  35 +dropouts = mlp_conf["do"]
  36 +epochs = mlp_conf["epochs"]
  37 +batch_size = mlp_conf["batch"]
  38 +input_activation=mlp_conf["input_activation"]
  39 +output_activation=mlp_conf["output_activation"]
  40 +
  41 +try:
  42 + k = mlp_conf["sgd"]
  43 + if mlp_conf["sgd"]["name"] == "adam":
  44 + sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
  45 + elif mlp_conf["sgd"]["name"] == "sgd":
  46 + sgd = SGD(lr=mlp_conf["sgd"]["lr"])
  47 +except:
  48 + sgd = mlp_conf["sgd"]
  49 +name = json_conf["name"]
  50 +
  51 +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
  52 +shelve_logs=shelve.open("{}/{}/03_logs.shelve".format(in_dir,name),writeback=True)
  53 +
  54 +#
  55 +keys = db["LABEL"].keys()
  56 +
  57 +hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name)
  58 +proj_hdf = pandas.HDFStore(hdf_proj_path)
  59 +hdf_keys = proj_hdf.keys()
  60 +print hdf_keys
  61 +proj_hdf.close()
  62 +hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ])
  63 +hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ])
  64 +hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
  65 +print hdf_lvl
  66 +print hdf_crossval
  67 +
  68 +labels_dict = { }
  69 +logs = {}
  70 +for lvl in hdf_lvl :
  71 + labels_dict[lvl] = {}
  72 + for layer in hdf_layer:
  73 + labels_dict[lvl][layer] = {}
  74 +
  75 +for lvl in hdf_lvl :
  76 + for layer in hdf_layer:
  77 + x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN"))
  78 + x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV"))
  79 + x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer, "TEST"))
  80 +
  81 + y_train = db["LABEL"]["ASR"]["TRAIN"]
  82 + y_dev = db["LABEL"]["ASR"]["DEV"]
  83 + y_test = db["LABEL"]["ASR"]["TEST"]
  84 +
  85 + print x_train.shape
  86 + print x_dev.shape
  87 + print x_test.shape
  88 + print y_train.shape
  89 + print y_dev.shape
  90 + print y_test.shape
  91 + pred,hist = train_mlp_pred(x_train.values,y_train,
  92 + x_dev.values,y_dev,
  93 + x_test.values,y_test,
  94 + hidden_size ,sgd=sgd,
  95 + epochs=epochs,
  96 + patience=patience,
  97 + batch_size=batch_size,
  98 + input_activation=input_activation,
  99 + output_activation=output_activation,
  100 + dropouts=dropouts,
  101 + fit_verbose=1)
  102 + shelve_logs["{}/{}".format(lvl,layer)] = hist
  103 + labels_dict[lvl][layer]["TRAIN"] = np.argmax(pred[0],axis=1)
  104 + labels_dict[lvl][layer]["DEV"] = np.argmax(pred[1],axis=1)
  105 + labels_dict[lvl][layer]["TEST"] = np.argmax(pred[2],axis=1)
  106 +
  107 +db["transfert"] = labels_dict
  108 +shelve_logs.sync()
  109 +shelve_logs.close()
  110 +db.sync()
  111 +db.close()
BOTTLENECK/04-accuracyscore.py
... ... @@ -0,0 +1,68 @@
  1 +
  2 +# coding: utf-8
  3 +
  4 +# In[2]:
  5 +
  6 +# Import
  7 +import gensim
  8 +from scipy import sparse
  9 +import numpy as np
  10 +import itertools
  11 +from sklearn import preprocessing
  12 +from keras.models import Sequential
  13 +from keras.optimizers import SGD,Adam
  14 +from keras.layers.advanced_activations import ELU,PReLU
  15 +from keras.callbacks import ModelCheckpoint
  16 +from mlp import *
  17 +from sklearn import metrics
  18 +from sklearn.preprocessing import LabelBinarizer
  19 +import shelve
  20 +import pickle
  21 +from utils import *
  22 +import sys
  23 +import os
  24 +import json
  25 +
  26 +# In[4]:
  27 +
  28 +in_dir = sys.argv[1]
  29 +#['ASR', 'TRS', 'LABEL']
  30 +# In[6]:
  31 +json_conf =json.load(open(sys.argv[2]))
  32 +
  33 +name = json_conf["name"]
  34 +
  35 +db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
  36 +#
  37 +keys = sorted(db.keys())
  38 +keys.remove("IDS")
  39 +keys.remove("transfert")
  40 +keys.remove("LABEL")
  41 +mods = ["ASR", "TRS"]
  42 +ref_train = db["LABEL"]["ASR"]["TRAIN"]
  43 +ref_dev = db["LABEL"]["ASR"]["DEV"]
  44 +ref_test = db["LABEL"]["ASR"]["TEST"]
  45 +
  46 +print "name,MOD,level,train,dev,test"
  47 +for mod in mods :
  48 + for lvl in keys :
  49 + if "TEST" in db[lvl][mod] :
  50 + train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db[lvl][mod]["TRAIN"])
  51 + dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db[lvl][mod]["DEV"])
  52 + test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db[lvl][mod]["TEST"])
  53 + else :
  54 + train_score = "ERROR"
  55 + dev_score = "ERROR"
  56 + test_score = "ERROR"
  57 + print ",".join([name,mod, lvl, str(train_score), str(dev_score) , str(test_score)])
  58 +
  59 +for level in db["transfert"].keys() :
  60 + for layer in db["transfert"][level].keys():
  61 + if "TRAIN" in db["transfert"][level][layer].keys():
  62 +
  63 + train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db["transfert"][level][layer]["TRAIN"])
  64 + dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db["transfert"][level][layer]["DEV"])
  65 + test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db["transfert"][level][layer]["TEST"])
  66 + print ",".join([name,"transfert",level+"/"+layer, str(train_score), str(dev_score) , str(test_score)])
  67 +
  68 +db.close()
... ... @@ -0,0 +1 @@
  1 +../LDA/mlp.py
0 2 \ No newline at end of file
BOTTLENECK/run01_do_alljson.sh
... ... @@ -0,0 +1,8 @@
  1 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0.json RAW
  2 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0do.json RAW
  3 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1.json RAW
  4 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1do.json RAW
  5 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2.json RAW
  6 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2do.json RAW
  7 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3.json RAW
  8 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3do.json RAW
BOTTLENECK/run02_mlpscore.sh
... ... @@ -0,0 +1,11 @@
  1 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0.json
  2 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0do.json
  3 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1.json
  4 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1do.json
  5 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2.json
  6 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2do.json
  7 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3.json
  8 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3do.json
  9 +
  10 +
  11 +
BOTTLENECK/run02b-transfert.sh
... ... @@ -0,0 +1,8 @@
  1 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0.json
  2 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0do.json
  3 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1.json
  4 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1do.json
  5 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2.json
  6 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2do.json
  7 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3.json
  8 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3do.json
BOTTLENECK/run03_tsne_MLPtransfert.sh
... ... @@ -0,0 +1,8 @@
  1 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0.json
  2 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0do.json
  3 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1.json
  4 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1do.json
  5 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2.json
  6 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2do.json
  7 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3.json
  8 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3do.json
BOTTLENECK/run04-mlp_on_transfert.sh
... ... @@ -0,0 +1,10 @@
  1 +#THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1.json
  2 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1do.json
  3 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2.json
  4 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2do.json
  5 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3.json
  6 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3do.json
  7 +
  8 +#THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0.json
  9 +
  10 +#THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0do.json
BOTTLENECK/run05_accuracy.sh
... ... @@ -0,0 +1,8 @@
  1 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1.json
  2 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1do.json
  3 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2.json
  4 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2do.json
  5 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3.json
  6 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3do.json
  7 +THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0.json
  8 +THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0do.json
BOTTLENECK/run_all.sh
... ... @@ -0,0 +1,22 @@
  1 +bash run_one.sh output_3 output_3/L0do.json gpu0 &
  2 +bash run_one.sh output_3 output_3/L0.json gpu1 &
  3 +bash run_one.sh output_3 output_3/L1do.json gpu0 &
  4 +bash run_one.sh output_3 output_3/L1.json gpu1 &
  5 +wait
  6 +bash run_one.sh output_3 output_3/L2do.json gpu0 &
  7 +bash run_one.sh output_3 output_3/L2.json gpu1 &
  8 +bash run_one.sh output_3 output_3/L3bndo.json gpu0 &
  9 +bash run_one.sh output_3 output_3/L3ce1.json gpu1 &
  10 +wait
  11 +bash run_one.sh output_3 output_3/L3ce.json gpu0 &
  12 +bash run_one.sh output_3 output_3/L3do.json gpu1 &
  13 +bash run_one.sh output_3 output_3/L3.json gpu0 &
  14 +bash run_one.sh output_3 output_3/L3sigmo.json gpu1 &
  15 +wait
  16 +bash run_one.sh output_3 output_3/L4do.json gpu0 &
  17 +bash run_one.sh output_3 output_3/L5do.json gpu1 &
  18 +bash run_one.sh output_3 output_3/L6do.json gpu0 &
  19 +bash run_one.sh output_3 output_3/L7do.json gpu1 &
  20 +wait
  21 +bash run_one.sh output_3 output_3/MaxMLP.json gpu0 &
  22 +bash run_one.sh output_3 output_3/MinMLP.json gpu1 &
BOTTLENECK/run_one.sh
... ... @@ -0,0 +1,7 @@
  1 +bn=$(basename $2)
  2 +time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 01a-mlp_proj.py $1 Sparse_tfidf2.shelve $2 RAW) 2>> logs/${bn}_time ; echo MLP_$2 >> logs/${bn}_time
  3 +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02a-mlp_score_on_BN.py $1 $2
  4 +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02b-transfert_ae.py $1 $2
  5 +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02c-tsne_mlproj.py $1 $2
  6 +time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 03-mlp_score_on_transfert.py $1 $2) 2>> logs/${bn}_time ; echo transfert_$2 >> logs/${bn}_time
  7 +THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 04-accuracyscore.py $1 $2 >> $1/res.csv
... ... @@ -0,0 +1 @@
  1 +../utils.py
0 2 \ No newline at end of file
... ... @@ -6,13 +6,15 @@ from keras.optimizers import SGD,Adam
6 6 from keras.models import Sequential
7 7 from keras.layers import Input, Dense, Dropout
8 8 from keras.models import Model
  9 +from keras.callbacks import ModelCheckpoint, EarlyStopping
9 10 from keras.utils.layer_utils import layer_from_config
10 11 from itertools import izip_longest
11   -
  12 +import tempfile
  13 +import shutil
12 14 import pandas
13 15 from collections import namedtuple
14 16 from sklearn.metrics import accuracy_score as perf
15   -save_tuple= namedtuple("save_tuple",["pred_train","pred_dev","pred_test"])
  17 +save_tuple = namedtuple("save_tuple",["pred_train","pred_dev","pred_test"])
16 18  
17 19  
18 20 def ft_dsae(train,dev,test,
... ... @@ -74,12 +76,114 @@ def ft_dsae(train,dev,test,
74 76 layers.append(Dense(y_train.shape[1],activation=output_activation)(layers[-1]))
75 77 models = [Model(input=layers[0] , output=x) for x in layers[1:]]
76 78 models[-1].compile(optimizer=sgd,loss=loss)
77   - models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose)
  79 + models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose)
78 80 predictions = [ [x.predict(y) for y in param_predict ] for x in models ]
79 81 pred_by_level.append(predictions)
80 82  
81 83 return pred_by_level
82 84  
  85 +def train_mlp_proj(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0):
  86 +
  87 + #model_tempfile=tempfile.mkstemp()
  88 + tempfold = tempfile.mkdtemp()
  89 + model_tempfile= tempfold+"/model.hdf"
  90 +
  91 + layers = [Input(shape=(x_train.shape[1],))]
  92 +
  93 + for h in hidden_size:
  94 + print h
  95 + if dropouts:
  96 + d = dropouts.pop(0)
  97 + if d > 0 :
  98 + ldo = Dropout(d)(layers[-1])
  99 + print 'append'
  100 + layers.append(Dense(h,init=init,activation=input_activation)(ldo))
  101 + else :
  102 + print " append"
  103 + layers.append(Dense(h,init=init,activation=input_activation)(layers[-1]))
  104 +
  105 +
  106 + if dropouts:
  107 + d = dropouts.pop(0)
  108 + if d > 0 :
  109 + ldo =Dropout(d)(layers[-1])
  110 + print "end"
  111 + layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo))
  112 + else:
  113 + print "end"
  114 + layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1]))
  115 +
  116 + models = []
  117 + for l in layers[1:] :
  118 + models.append(Model(layers[0] , l))
  119 + print "nb models : ", len(models), "h :",hidden_size , "layer", len(layers)
  120 + if not sgd:
  121 + sgd = SGD(lr=0.01, decay=0, momentum=0.9)
  122 +
  123 + models[-1].compile(loss=loss, optimizer=sgd,metrics=['accuracy'])
  124 + callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'),
  125 + EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi
  126 + print models[-1].summary()
  127 + hist=models[-1].fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks)
  128 + models[-1].load_weights(model_tempfile, by_name=False)
  129 + proj = []
  130 + for layer,model in enumerate(models):
  131 + proj.append((model.predict(x_train),model.predict(x_dev),model.predict(x_test)))
  132 +
  133 + shutil.rmtree(tempfold)
  134 + return models[-1].summary(),proj
  135 +
  136 +
  137 +
  138 +
  139 +
  140 +def train_mlp_pred(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,patience=20,test_verbose=0):
  141 +
  142 + #model_tempfile=tempfile.mkstemp()
  143 + tempfold = tempfile.mkdtemp()
  144 + model_tempfile= tempfold+"/model.hdf"
  145 +
  146 + layers = [Input(shape=(x_train.shape[1],))]
  147 +
  148 + for h in hidden_size:
  149 + if dropouts:
  150 + d = dropouts.pop(0)
  151 + if d > 0 :
  152 + ldo = Dropout(d)(layers[-1])
  153 + layers.append(Dense(h,init=init,activation=input_activation)(ldo))
  154 + else :
  155 + layers.append(Dense(h,init=init,activation=input_activation)(layers[-1]))
  156 +
  157 +
  158 + if dropouts:
  159 + d = dropouts.pop(0)
  160 + if d > 0 :
  161 + ldo =Dropout(d)(layers[-1])
  162 + layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(ldo))
  163 + else:
  164 + layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1]))
  165 +
  166 + model=Model(layers[0] , layers[-1])
  167 + if not sgd:
  168 + sgd = SGD(lr=0.01, decay=0, momentum=0.9)
  169 +
  170 + model.compile(loss=loss, optimizer=sgd,metrics=['accuracy'])
  171 + callbacks = [ModelCheckpoint(model_tempfile, monitor='val_acc', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto'),
  172 + EarlyStopping(monitor='val_acc', patience=patience, verbose=test_verbose) ] # On pourrai essayer avec la loss aussi
  173 + print model.summary()
  174 + hist=model.fit(x_train, y_train, nb_epoch=epochs, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev),callbacks=callbacks)
  175 + model.load_weights(model_tempfile, by_name=False)
  176 + pred=(model.predict(x_train),model.predict(x_dev),model.predict(x_test))
  177 +
  178 + shutil.rmtree(tempfold)
  179 + return pred,hist
  180 +
  181 +
  182 +
  183 +
  184 +
  185 +
  186 +
83 187 def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False):
84 188  
85 189 layers = [Input(shape=(x_train.shape[1],))]
... ... @@ -107,7 +211,7 @@ def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activa
107 211 d = dropouts.pop(0)
108 212 if d > 0 :
109 213 layers.append(Dropout(d)(layers[-1]))
110   -
  214 + print y_train[2:10]
111 215 layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1]))
112 216  
113 217 model = Model(layers[0] , layers[-1])
... ... @@ -147,7 +251,7 @@ def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activa
147 251 res.append(hist)
148 252 return res
149 253  
150   -def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20,get_weights=False,set_weights=[]):
  254 +def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,test_verbose=0,verbose=1,patience=20,get_weights=False,set_weights=[],best_mod=False):
151 255  
152 256 input_vect = Input(shape=(train.shape[1],))
153 257  
... ... @@ -193,7 +297,17 @@ def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dro
193 297 models = [Model(input=previous[0] , output=x) for x in previous[1:]]
194 298 print "MLP", sgd, loss
195 299 models[-1].compile(optimizer=sgd,loss=loss)
196   - models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose)
  300 + cb = [EarlyStopping(monitor='val_loss', patience=patience, verbose=0)]
  301 + if best_mod:
  302 + tempfold = tempfile.mkdtemp()
  303 + model_tempfile= tempfold+"/model.hdf"
  304 + cb.append( ModelCheckpoint(model_tempfile, monitor='val_loss', verbose=test_verbose, save_best_only=True, save_weights_only=True, mode='auto') )
  305 +
  306 + models[-1].summary()
  307 + models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=cb,validation_data=(dev,dev),verbose=verbose)
  308 + if best_mod:
  309 + models[-1].load_weights(model_tempfile)
  310 + shutil.rmtree(tempfold)
197 311 param_predict = [ train, dev, test ]
198 312 if predict_y :
199 313 param_predict += [ y_train, y_dev ,y_test ]