From 2af8e57f4e1ebcfdd5ba9d3e8963c4853e472982 Mon Sep 17 00:00:00 2001
From: Killian <killian.janod@alumni.univ-avignon.fr>
Date: Fri, 22 Jul 2016 11:10:31 +0200
Subject: [PATCH]  change all

---
 LDA/04a-mmdf.py        |  15 ++++---
 LDA/04b-mmf_mini_ae.py | 111 ++++++++++++++++++++++++++++++++++---------------
 LDA/04c-mmf_sae.py     | 101 +++++++++++++++++++++++++++++---------------
 LDA/04d-mmf_dsae.py    |  28 +++++++------
 LDA/04e-mm_vae.py      | 104 +++++++++++++++++++++++++++++----------------
 LDA/05-lts_scoring.py  |  80 +++++++++++++++++++++++++++++++++++
 LDA/mlp.py             |   1 -
 LDA/utils.py           |  59 ++++++++++++++++++++++++++
 LDA/vae.py             |  53 +++++++++++++++++++++--
 9 files changed, 428 insertions(+), 124 deletions(-)
 create mode 100644 LDA/05-lts_scoring.py

diff --git a/LDA/04a-mmdf.py b/LDA/04a-mmdf.py
index 8c49391..a891987 100644
--- a/LDA/04a-mmdf.py
+++ b/LDA/04a-mmdf.py
@@ -31,7 +31,10 @@ from gensim.models import LdaModel
 #db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True)
 origin_corps=shelve.open("{}".format(sys.argv[2]))
 in_dir = sys.argv[1]
-
+if len(sys.argv) > 3 :
+    features_key = sys.argv[3]
+else :
+    features_key = "LDA"
 
 out_db=shelve.open("{}/mlp_scores.shelve".format(in_dir),writeback=True)
 
@@ -40,16 +43,16 @@ mlp_loss = "categorical_crossentropy"
 mlp_dropouts = [0.25]* len(mlp_h)
 mlp_sgd = Adam(lr=0.0001)
 mlp_epochs = 3000
-mlp_batch_size = 1
+mlp_batch_size = 5
 mlp_input_activation = "relu"
 mlp_output_activation="softmax"
 
 ress = []
-for key in ["TRS", "ASR"] :
+for key in origin_corps["features_key"].keys() :
 
-    res=mlp.train_mlp(origin_corps["LDA"][key]["TRAIN"],origin_corps["LABEL"][key]["TRAIN"],
-            origin_corps["LDA"][key]["DEV"],origin_corps["LABEL"][key]["DEV"],
-            origin_corps["LDA"][key]["TEST"],origin_corps["LABEL"][key]["TEST"],
+    res=mlp.train_mlp(origin_corps[features_key][key]["TRAIN"],origin_corps["LABEL"][key]["TRAIN"],
+            origin_corps[features_key][key]["DEV"],origin_corps["LABEL"][key]["DEV"],
+            origin_corps[features_key][key]["TEST"],origin_corps["LABEL"][key]["TEST"],
             mlp_h,dropouts=mlp_dropouts,sgd=mlp_sgd,
             epochs=mlp_epochs,
             batch_size=mlp_batch_size,
diff --git a/LDA/04b-mmf_mini_ae.py b/LDA/04b-mmf_mini_ae.py
index dc52788..b500b0c 100644
--- a/LDA/04b-mmf_mini_ae.py
+++ b/LDA/04b-mmf_mini_ae.py
@@ -10,6 +10,7 @@ import itertools
 from sklearn import preprocessing
 from keras.models import Sequential
 from keras.optimizers import SGD,Adam
+from keras.layers.advanced_activations import ELU,PReLU
 from mlp import *
 import sklearn.metrics
 import shelve
@@ -24,12 +25,24 @@ infer_model=shelve.open("{}".format(sys.argv[2]))
 in_dir = sys.argv[1]
 #['ASR', 'TRS', 'LABEL']
 # In[6]:
-
+if len(sys.argv) > 4 :
+    features_key = sys.argv[4]
+else :
+    features_key = "LDA"
+save_projection = True
 json_conf =json.load(open(sys.argv[3]))
 ae_conf = json_conf["ae"]
 
 hidden_size= ae_conf["hidden_size"]
-input_activation=ae_conf["input_activation"]
+input_activation = None
+print ae_conf["input_activation"]
+if ae_conf["input_activation"] == "elu":
+    print " ELU"
+    input_activation = PReLU()
+else:
+    print " ELSE"
+    input_activation = ae_conf["input_activation"]
+#input_activation=ae_conf["input_activation"]
 output_activation=ae_conf["output_activation"]
 loss=ae_conf["loss"]
 epochs=ae_conf["epochs"]
@@ -72,14 +85,18 @@ except:
 db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
 db["LABEL"]=infer_model["LABEL"]
 #
-keys = ["ASR","TRS"]
+keys = infer_model[features_key].keys()
 
 db["AE"] = {}
-db["LDA"] = {}
+db[features_key] = {}
 for mod in keys : 
-    db["LDA"][mod] = train_mlp(infer_model["LDA"][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
-                            infer_model["LDA"][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
-                            infer_model["LDA"][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
+    print infer_model[features_key][mod]["TRAIN"].shape
+    print infer_model[features_key][mod]["DEV"].shape
+    print infer_model[features_key][mod]["TEST"].shape
+
+    db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
+                            infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
+                            infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
                             mlp_h ,sgd=mlp_sgd,
                             epochs=mlp_epochs,
                             batch_size=mlp_batch_size,
@@ -87,13 +104,25 @@ for mod in keys :
                             output_activation=mlp_output_activation,
                             dropouts=mlp_dropouts,
                             fit_verbose=0)
-
-    res=train_ae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"],
+    print input_activation
+    res=train_ae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"],
                  hidden_size,patience = patience,sgd=sgd,
                  dropouts=do_do,input_activation=input_activation,output_activation=output_activation,
                  loss=loss,epochs=epochs,batch_size=batch,verbose=0)
     mlp_res_list=[]
-    for layer in res :
+    for nb,layer in enumerate(res) :
+        if save_projection:
+            pd = pandas.DataFrame(layer[0])
+            col_count = (pd.sum(axis=0) != 0)
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN")
+            pd = pandas.DataFrame(layer[1])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV")
+            pd = pandas.DataFrame(layer[2])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST")
+            del pd
         mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                       layer[1],infer_model["LABEL"][mod]["DEV"],
                                       layer[2],infer_model["LABEL"][mod]["TEST"],
@@ -103,30 +132,44 @@ for mod in keys :
                                       batch_size=mlp_batch_size,fit_verbose=0))
     db["AE"][mod]=mlp_res_list
 
-mod = "ASR"
-mod2= "TRS"
-mlp_res_list=[]
-
-res = train_ae(infer_model["LDA"][mod]["TRAIN"],
-                infer_model["LDA"][mod]["DEV"],
-                infer_model["LDA"][mod]["TEST"],
-                hidden_size,dropouts=do_do,patience = patience,
-                sgd=sgd,input_activation=input_activation,output_activation=output_activation,loss=loss,epochs=epochs,
-                batch_size=batch,
-                y_train=infer_model["LDA"][mod]["TRAIN"],
-                y_dev=infer_model["LDA"][mod2]["DEV"],
-                y_test=infer_model["LDA"][mod2]["TEST"])
-
-for layer in res :
-    mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
-                                  layer[1],infer_model["LABEL"][mod]["DEV"],
-                                  layer[2],infer_model["LABEL"][mod]["TEST"],
-                                  mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
-                                  output_activation=mlp_output_activation,
-                                  input_activation=input_activation,
-                                  batch_size=mlp_batch_size,fit_verbose=0))
-
-db["AE"]["SPE"] = mlp_res_list
+if "ASR" in keys and "TRS" in keys:
+    mod = "ASR"
+    mod2= "TRS"
+    mlp_res_list=[]
+
+    res = train_ae(infer_model[features_key][mod]["TRAIN"],
+                    infer_model[features_key][mod]["DEV"],
+                    infer_model[features_key][mod]["TEST"],
+                    hidden_size,dropouts=do_do,patience = patience,
+                    sgd=sgd,input_activation=input_activation,output_activation=output_activation,loss=loss,epochs=epochs,
+                    batch_size=batch,
+                    y_train=infer_model[features_key][mod]["TRAIN"],
+                    y_dev=infer_model[features_key][mod2]["DEV"],
+                    y_test=infer_model[features_key][mod2]["TEST"])
+
+    for nb,layer in enumerate(res) :
+        if save_projection:
+            pd = pandas.DataFrame(layer[0])
+            col_count= (pd.sum(axis=0) != 0)
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN")
+            pd = pandas.DataFrame(layer[1])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV")
+            pd = pandas.DataFrame(layer[2])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST")
+            del pd
+
+        mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
+                                      layer[1],infer_model["LABEL"][mod]["DEV"],
+                                      layer[2],infer_model["LABEL"][mod]["TEST"],
+                                      mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
+                                      output_activation=mlp_output_activation,
+                                      input_activation=input_activation,
+                                      batch_size=mlp_batch_size,fit_verbose=0))
+
+    db["AE"]["SPE"] = mlp_res_list
 
 db.sync()
 db.close()
diff --git a/LDA/04c-mmf_sae.py b/LDA/04c-mmf_sae.py
index 1130cac..fc51a57 100644
--- a/LDA/04c-mmf_sae.py
+++ b/LDA/04c-mmf_sae.py
@@ -23,6 +23,11 @@ import json
 
 infer_model=shelve.open("{}".format(sys.argv[2]))
 in_dir = sys.argv[1]
+if len(sys.argv) > 4 :
+    features_key = sys.argv[4]
+else :
+    features_key = "LDA"
+save_projection = True
 #['ASR', 'TRS', 'LABEL']
 # In[6]:
 json_conf =json.load(open(sys.argv[3])) 
@@ -47,13 +52,13 @@ except :
     sgd = sae_conf["sgd"]
 
 name = json_conf["name"]
+print name
 try:
     os.mkdir("{}/{}".format(in_dir,name))
 except:
     pass
 db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
 #
-keys = ["ASR","TRS"]
 mlp_conf = json_conf["mlp"]
 mlp_h = mlp_conf["hidden_size"]
 mlp_loss = mlp_conf["loss"]
@@ -72,23 +77,38 @@ try:
 except :
     mlp_sgd = mlp_conf["sgd"]
 
-
+keys = infer_model[features_key].keys()
 db["SAE"] = {}
 
 db["SAEFT"] = {}
 for mod in keys : 
-    res_tuple=train_sae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],
-                 infer_model["LDA"][mod]["TEST"],
+    res_tuple=train_sae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],
+                 infer_model[features_key][mod]["TEST"],
                  hidden_size,dropouts=do_do,
                  patience = "patience",sgd=sgd,input_activation="tanh",
                  output_activation="tanh",loss=loss,epochs=epochs,
                  batch_size=batch,verbose=0)
     #print len(res), [len(x) for x in res[0]], [ len(x) for x in res[1]]
-    for name , levels in zip(["SAE","SAEFT"],res_tuple):
+    for i, levels in zip(["SAE","SAEFT"],res_tuple):
         mlp_res_by_level = []
-        for res in levels:
+        for lvl,res in enumerate(levels):
             mlp_res_list=[]
             for nb,layer in enumerate(res) :
+                if save_projection:
+                    pd = pandas.DataFrame(layer[0])
+                    col_count= (pd.sum(axis=0) != 0)
+                    pd = pd.loc[:,col_count]
+                    hdffile = "{}/{}/{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,mod)
+                    print hdffile
+                    pd.to_hdf(hdffile,"TRAIN")
+                    pd = pandas.DataFrame(layer[1])
+                    pd = pd.loc[:,col_count]
+                    pd.to_hdf(hdffile,"DEV")
+                    pd = pandas.DataFrame(layer[2])
+                    pd = pd.loc[:,col_count]
+                    pd.to_hdf(hdffile,"TEST")
+                    del pd
+
                 mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
                                               layer[1],infer_model["LABEL"][mod]["DEV"],
                                               layer[2],infer_model["LABEL"][mod]["TEST"],
@@ -96,33 +116,48 @@ for mod in keys :
                                               sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,
                                               fit_verbose=0))
             mlp_res_by_level.append(mlp_res_list)
-        db[name][mod]=mlp_res_by_level
-
-mod = "ASR"
-mod2= "TRS"
-res_tuple = train_sae(infer_model["LDA"][mod]["TRAIN"],
-                      infer_model["LDA"][mod]["DEV"],
-                      infer_model["LDA"][mod]["TEST"],
-                      hidden_size,dropouts=[0],patience="patience",
-                      sgd=sgd,input_activation=input_activation,output_activation=input_activation,
-                      loss=loss,epochs=epochs,batch_size=batch,
-                      y_train=infer_model["LDA"][mod2]["TRAIN"],
-                      y_dev=infer_model["LDA"][mod2]["DEV"],
-                      y_test=infer_model["LDA"][mod2]["TEST"])
-
-for name , levels in zip(["SAE","SAEFT"],res_tuple):
-    mlp_res_by_level = []
-    for res in levels : 
-        mlp_res_list=[]
-        for layer in res :
-            mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
-                                layer[1],infer_model["LABEL"][mod]["DEV"],layer[2],
-                                infer_model["LABEL"][mod]["TEST"],
-                                mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
-                                sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,
-                                fit_verbose=0))
-        mlp_res_by_level.append(mlp_res_list)
-    db[name]["SPE"] = mlp_res_by_level
+        db[i][mod]=mlp_res_by_level
+
+
+if "ASR" in keys and "TRS" in keys :
+    mod = "ASR"
+    mod2= "TRS"
+    res_tuple = train_sae(infer_model[features_key][mod]["TRAIN"],
+                          infer_model[features_key][mod]["DEV"],
+                          infer_model[features_key][mod]["TEST"],
+                          hidden_size,dropouts=[0],patience="patience",
+                          sgd=sgd,input_activation=input_activation,output_activation=input_activation,
+                          loss=loss,epochs=epochs,batch_size=batch,
+                          y_train=infer_model[features_key][mod2]["TRAIN"],
+                          y_dev=infer_model[features_key][mod2]["DEV"],
+                          y_test=infer_model[features_key][mod2]["TEST"])
+
+    for i , levels in zip(["SAE","SAEFT"],res_tuple):
+        mlp_res_by_level = []
+        for lvl,res in enumerate(levels) : 
+            mlp_res_list=[]
+            for nb,layer in enumerate(res) :
+                if save_projection:
+                    pd = pandas.DataFrame(layer[0])
+                    col_count= (pd.sum(axis=0) != 0)
+                    pd = pd.loc[:,col_count]
+                    pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"TRAIN")
+                    pd = pandas.DataFrame(layer[1])
+                    pd = pd.loc[:,col_count]
+                    pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"DEV")
+                    pd = pandas.DataFrame(layer[2])
+                    pd = pd.loc[:,col_count]
+                    pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"TEST")
+                    del pd
+
+                mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
+                                    layer[1],infer_model["LABEL"][mod]["DEV"],layer[2],
+                                    infer_model["LABEL"][mod]["TEST"],
+                                    mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
+                                    sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,
+                                    fit_verbose=0))
+            mlp_res_by_level.append(mlp_res_list)
+        db[i]["SPE"] = mlp_res_by_level
 
 db.sync()
 db.close()
diff --git a/LDA/04d-mmf_dsae.py b/LDA/04d-mmf_dsae.py
index d768f9b..401aa2d 100644
--- a/LDA/04d-mmf_dsae.py
+++ b/LDA/04d-mmf_dsae.py
@@ -26,6 +26,10 @@ infer_model=shelve.open("{}".format(sys.argv[2]))
 in_dir = sys.argv[1]
 #['ASR', 'TRS', 'LABEL']
 # In[6]:
+if len(sys.argv) > 4 :
+    features_key = sys.argv[4]
+else :
+    features_key = "LDA"
 
 json_conf =json.load(open(sys.argv[3]))
 
@@ -101,9 +105,9 @@ db["DSAE"] = {}
 
 db["DSAEFT"] = {}
 mod = "ASR"
-res_tuple_ASR = train_ae(infer_model["LDA"][mod]["TRAIN"],
-                         infer_model["LDA"][mod]["DEV"],
-                         infer_model["LDA"][mod]["TEST"],
+res_tuple_ASR = train_ae(infer_model[features_key][mod]["TRAIN"],
+                         infer_model[features_key][mod]["DEV"],
+                         infer_model[features_key][mod]["TEST"],
                          hidden_size,dropouts=do_do,
                          patience = patience,sgd=sgd,
                          input_activation=input_activation,
@@ -122,9 +126,9 @@ for layer in res_tuple_ASR[0]:
 
 db["DSAE"][mod] = mlp_res_list
 mod = "TRS"
-res_tuple_TRS = train_ae(infer_model["LDA"][mod]["TRAIN"],
-                         infer_model["LDA"][mod]["DEV"],
-                         infer_model["LDA"][mod]["TEST"],
+res_tuple_TRS = train_ae(infer_model[features_key][mod]["TRAIN"],
+                         infer_model[features_key][mod]["DEV"],
+                         infer_model[features_key][mod]["TEST"],
                          hidden_size,dropouts=do_do,
                          sgd=sgd,input_activation=input_activation,
                          output_activation=output_activation,loss=loss,epochs=epochs,
@@ -202,12 +206,12 @@ Wtr = [ x[1] for x in transfert]
 
 #print "Wtr", len(Wtr), [ len(x) for x in Wtr],[ len(x[1]) for x in Wtr]
 
-ft_res = ft_dsae(infer_model["LDA"]["ASR"]["TRAIN"],
-                 infer_model["LDA"]["ASR"]["DEV"],
-                 infer_model["LDA"]["ASR"]["TEST"],
-                 y_train=infer_model["LDA"]["TRS"]["TRAIN"],
-                 y_dev=infer_model["LDA"]["TRS"]["DEV"],
-                 y_test=infer_model["LDA"]["TRS"]["TEST"],
+ft_res = ft_dsae(infer_model[features_key]["ASR"]["TRAIN"],
+                 infer_model[features_key]["ASR"]["DEV"],
+                 infer_model[features_key]["ASR"]["TEST"],
+                 y_train=infer_model[features_key]["TRS"]["TRAIN"],
+                 y_dev=infer_model[features_key]["TRS"]["DEV"],
+                 y_test=infer_model[features_key]["TRS"]["TEST"],
                  ae_hidden = hidden_size,
                  transfer_hidden = trans_hidden_size,
                  start_weights = WA,
diff --git a/LDA/04e-mm_vae.py b/LDA/04e-mm_vae.py
index 7818868..60ec159 100644
--- a/LDA/04e-mm_vae.py
+++ b/LDA/04e-mm_vae.py
@@ -21,7 +21,12 @@ infer_model=shelve.open("{}".format(sys.argv[2]))
 in_dir = sys.argv[1]
 #['ASR', 'TRS', 'LABEL']
 # In[6]:
+if len(sys.argv) > 4 :
+    features_key = sys.argv[4]
+else :
+    features_key = "LDA"
 
+save_projection = True
 json_conf =json.load(open(sys.argv[3]))
 vae_conf = json_conf["vae"]
 
@@ -63,10 +68,11 @@ except:
 
 name = json_conf["name"]
 
-
-try:
+try :
+    print "make folder "
     os.mkdir("{}/{}".format(in_dir,name))
 except:
+    print "folder not maked"
     pass
 
 
@@ -74,15 +80,16 @@ db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
 db["LABEL"]=infer_model["LABEL"]
 #
 
-keys = ["ASR","TRS"]
+
+keys = infer_model[features_key].keys()
 
 db["VAE"] = {}
-db["LDA"] = {}
+db[features_key] = {}
 for mod in keys : 
     #print mod
-    db["LDA"][mod] = train_mlp(infer_model["LDA"][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
-                            infer_model["LDA"][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
-                            infer_model["LDA"][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
+    db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
+                            infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
+                            infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
                             mlp_h ,sgd=mlp_sgd,
                             epochs=mlp_epochs,
                             batch_size=mlp_batch_size,
@@ -91,13 +98,26 @@ for mod in keys :
                             dropouts=mlp_dropouts,
                             fit_verbose=0)
 
-    res=train_vae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"],
+    res=train_vae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"],
                  hidden_size=hidden_size[0],
                  latent_dim=latent_dim,sgd=sgd,
                  input_activation=input_activation,output_activation=output_activation,
                  nb_epochs=epochs,batch_size=batch)
     mlp_res_list=[]
-    for layer in res :
+    for nb,layer in enumerate(res) :
+        if save_projection:
+            pd = pandas.DataFrame(layer[0])
+            col_count = (pd.sum(axis=0) != 0)
+            pd = pd.loc[:,cyyol_count]
+            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN")
+            pd = pandas.DataFrame(layer[1])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV")
+            pd = pandas.DataFrame(layer[2])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST")
+            del pd
+
         mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                       layer[1],infer_model["LABEL"][mod]["DEV"],
                                       layer[2],infer_model["LABEL"][mod]["TEST"],
@@ -107,32 +127,46 @@ for mod in keys :
                                       batch_size=mlp_batch_size,fit_verbose=0))
     db["VAE"][mod]=mlp_res_list
 
-mod = "ASR"
-mod2= "TRS"
-mlp_res_list=[]
-
-res = train_vae(infer_model["LDA"][mod]["TRAIN"],
-                infer_model["LDA"][mod]["DEV"],
-                infer_model["LDA"][mod]["TEST"],
-                hidden_size=hidden_size[0],
-                sgd=sgd,input_activation=input_activation,output_activation=output_activation,
-                latent_dim=latent_dim,
-                nb_epochs=epochs,
-                batch_size=batch,
-                y_train=infer_model["LDA"][mod2]["TRAIN"],
-                y_dev=infer_model["LDA"][mod2]["DEV"],
-                y_test=infer_model["LDA"][mod2]["TEST"])
-
-for layer in res :
-    mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
-                                  layer[1],infer_model["LABEL"][mod]["DEV"],
-                                  layer[2],infer_model["LABEL"][mod]["TEST"],
-                                  mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
-                                  output_activation=mlp_output_activation,
-                                  input_activation=input_activation,
-                                  batch_size=mlp_batch_size,fit_verbose=0))
-
-db["VAE"]["SPE"] = mlp_res_list
+if "ASR" in keys and "TRS" in keys :
+    mod = "ASR"
+    mod2= "TRS"
+    mlp_res_list=[]
+
+    res = train_vae(infer_model[features_key][mod]["TRAIN"],
+                    infer_model[features_key][mod]["DEV"],
+                    infer_model[features_key][mod]["TEST"],
+                    hidden_size=hidden_size[0],
+                    sgd=sgd,input_activation=input_activation,output_activation=output_activation,
+                    latent_dim=latent_dim,
+                    nb_epochs=epochs,
+                    batch_size=batch,
+                    y_train=infer_model[features_key][mod2]["TRAIN"],
+                    y_dev=infer_model[features_key][mod2]["DEV"],
+                    y_test=infer_model[features_key][mod2]["TEST"])
+
+    for nb,layer in enumerate(res) :
+        if save_projection:
+            pd = pandas.DataFrame(layer[0])
+            col_count = (pd.sum(axis=0) != 0)
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN")
+            pd = pandas.DataFrame(layer[1])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV")
+            pd = pandas.DataFrame(layer[2])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST")
+            del pd
+
+        mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
+                                      layer[1],infer_model["LABEL"][mod]["DEV"],
+                                      layer[2],infer_model["LABEL"][mod]["TEST"],
+                                      mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
+                                      output_activation=mlp_output_activation,
+                                      input_activation=input_activation,
+                                      batch_size=mlp_batch_size,fit_verbose=0))
+
+    db["VAE"]["SPE"] = mlp_res_list
 
 db.sync()
 db.close()
diff --git a/LDA/05-lts_scoring.py b/LDA/05-lts_scoring.py
new file mode 100644
index 0000000..9585d75
--- /dev/null
+++ b/LDA/05-lts_scoring.py
@@ -0,0 +1,80 @@
+import sys
+import shelve
+import pickle
+from utils import *
+import sys
+import os
+import json
+import glob
+import tempfile
+import pandas
+import subprocess
+from subprocess import CalledProcessError
+import shutil
+import numpy
+
+in_dir = sys.argv[1]
+json_conf =json.load(open(sys.argv[2])) 
+name = json_conf["name"]
+
+ae_m = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name))
+y_train=numpy.argmax(ae_m["LABEL"]["ASR"]["TRAIN"],axis=1)
+_,ytr_path=tempfile.mkstemp()
+ytr_open= open(ytr_path,"w")
+for el in y_train:
+    print >>ytr_open, el
+ytr_open.close()
+
+y_dev=numpy.argmax(ae_m["LABEL"]["ASR"]["DEV"],axis=1)
+_,yd_path=tempfile.mkstemp()
+yd_open = open(yd_path,"w")
+for el in y_dev:
+    print >>yd_open, el
+yd_open.close()
+
+y_test=numpy.argmax(ae_m["LABEL"]["ASR"]["TEST"],axis=1)
+_,yte_path=tempfile.mkstemp()
+yte_open=open(yte_path,"w")
+for el in y_test:
+    print >>yte_open, el
+yte_open.close()
+
+hdfs_files=glob.glob("{}/{}/*.hdf".format(in_dir,name))
+temp_dir=tempfile.mkdtemp()
+out_file=open("{}/{}/malaha_res.txt".format(in_dir,name),"a")
+
+for hdf in hdfs_files:
+    print >>out_file, "Start  ---------------------------------------------------" 
+    print >>out_file, hdf
+    x_train = pandas.read_hdf(hdf,"TRAIN")
+    x_train.to_csv("{}/xtrain.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False)
+    x_train = pandas.read_hdf(hdf,"DEV")
+    x_train.to_csv("{}/xdev.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False)
+    x_train = pandas.read_hdf(hdf,"TEST")
+    x_train.to_csv("{}/xtest.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False)
+    try :
+        resdev=subprocess.check_output(['Rscript',
+                                        '/home/laboinfo/janod/WorkingDir/erreur_traduction/Author_Topic_Decoda/estimate.R',
+                                        "{}/xtrain.dat".format(temp_dir),
+                                        "{}/xdev.dat".format(temp_dir),
+                                        ytr_path,yd_path])
+
+        restest=subprocess.check_output(['Rscript',
+                                        '/home/laboinfo/janod/WorkingDir/erreur_traduction/Author_Topic_Decoda/estimate.R',
+                                        "{}/xtrain.dat".format(temp_dir), 
+                                        "{}/xtest.dat".format(temp_dir),
+                                        ytr_path,yte_path])
+
+        print >>out_file, resdev
+        print >>out_file, hdf
+        print >>out_file, restest
+    except CalledProcessError:
+        print >>out_file, "FAILED"
+    print >>out_file, hdf
+    print >>out_file, "End  ---------------------------------------------------" 
+
+shutil.rmtree(temp_dir)
+os.remove(ytr_path)
+os.remove(yd_path)
+os.remove(yte_path)
+
diff --git a/LDA/mlp.py b/LDA/mlp.py
index 7e8e2cb..c83db76 100755
--- a/LDA/mlp.py
+++ b/LDA/mlp.py
@@ -82,7 +82,6 @@ def ft_dsae(train,dev,test,
 
 def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False):
 
-    
     layers = [Input(shape=(x_train.shape[1],))]
 
     for h in hidden_size:
diff --git a/LDA/utils.py b/LDA/utils.py
index c901e37..627c87e 100644
--- a/LDA/utils.py
+++ b/LDA/utils.py
@@ -1,6 +1,10 @@
 # -*- coding: utf-8 -*-
 import nltk
 import re
+import codecs
+import numpy as np
+import sqlite3
+
 pattern =  ur"\d+(?:\.\d+)?\s*%?|\w{1,2}'|<unk>|[\wéàèùêôûâòìîç]+|[^\w\s]"
 rer_b = re.compile(ur" r e r(?: e r)? b ")
 rer_c = re.compile(ur" r e r(?: e r)? c |r e r( e r)? c' est | rer c' est")
@@ -43,3 +47,58 @@ def select(elm):
 
 def select_mmf(elm):
     return int(elm.split("_")[0])
+
+def get_score(table):
+    mx_train = np.max(table[0])
+    argmx_dev = np.argmax(table[1])
+    mx_dev = table[1][argmx_dev]
+    best_test = table[2][argmx_dev]
+    mx_test = np.max(table[2])
+    print """\tmax train : {}
+    \tmax dev : {}
+    \tmax test : {} - best test : {}
+    \t best epochs : {}""".format(mx_train,mx_dev,mx_test,best_test,argmx_dev)
+    return mx_train,mx_dev,mx_test,best_test,argmx_dev
+class WeightedWordsList :
+    @staticmethod
+    def get_key(wtuple):
+        return wtuple[1]
+    @staticmethod
+    def get_okey(wtuple):
+        return wtuple[1][1]
+
+
+    def __init__(self,file_path):
+        self.wlist = codecs.open(file_path,"r","utf8").readlines()
+        self.wlist = [x.strip().split(':') for x in self.wlist ]
+        self.wlist = [ (x, float(y)) for x,y in self.wlist ]
+        self.wdict = {}
+        for x,y in self.wlist:
+            self.wdict[x.encode("utf8")] = y
+
+    def select_best(self,word_list,lenght=5):
+        scored_word = []
+        for w in word_list:
+            w = w.encode("utf8")
+            if w not in self.wdict :
+                continue
+
+            if len(scored_word) < lenght:
+                scored_word.append((w,self.wdict[w]))
+            else :
+                w_min= min(enumerate(scored_word),key=WeightedWordsList.get_okey)
+                w_curr = (w, self.wdict[w])
+                if w_min[1][1] < w_curr[1]:
+                    del scored_word[w_min[0]]
+                    scored_word.append(w_curr)
+                    w_min = min(enumerate(scored_word),key=WeightedWordsList.get_okey)
+                    while len(scored_word) > lenght and w_min[1][1] < w_curr[1] :
+                        del scored_word[w_min[0]]
+                        w_min = min(enumerate(scored_word),key=WeightedWordsList.get_okey)
+                elif w_min[1][1] == w_curr[1]:
+                    scored_word.append(w_curr)
+        return [ w[0] for w in scored_word ]
+
+
+
+
diff --git a/LDA/vae.py b/LDA/vae.py
index b846e53..4a8f858 100644
--- a/LDA/vae.py
+++ b/LDA/vae.py
@@ -16,14 +16,58 @@ from keras.models import Model
 from keras import backend as K
 from keras import objectives
 from keras.datasets import mnist
+from keras.callbacks import EarlyStopping,Callback
 
 import pandas
 import shelve
 import pickle
 
 
-
-
+class ZeroStopping(Callback):
+    '''Stop training when a monitored quantity has stopped improving.
+    # Arguments
+        monitor: quantity to be monitored.
+        patience: number of epochs with no improvement
+            after which training will be stopped.
+        verbose: verbosity mode.
+        mode: one of {auto, min, max}. In 'min' mode,
+            training will stop when the quantity
+            monitored has stopped decreasing; in 'max'
+            mode it will stop when the quantity
+            monitored has stopped increasing.
+    '''
+    def __init__(self, monitor='val_loss', verbose=0, mode='auto', thresh = 0):
+        super(ZeroStopping, self).__init__()
+
+        self.monitor = monitor
+        self.verbose = verbose
+        self.thresh = thresh # is a rythme
+
+        if mode not in ['auto', 'min', 'max']:
+            warnings.warn('EarlyStopping mode %s is unknown, '
+                          'fallback to auto mode.' % (self.mode),
+                          RuntimeWarning)
+            mode = 'auto'
+
+        if mode == 'min':
+            self.monitor_op = np.less
+        elif mode == 'max':
+            self.monitor_op = np.greater
+        else:
+            if 'acc' in self.monitor:
+                self.monitor_op = np.greater
+            else:
+                self.monitor_op = np.less
+
+    def on_epoch_end(self, epoch, logs={}):
+        current = logs.get(self.monitor)
+        if current is None:
+            warnings.warn('Zero stopping requires %s available!' %
+                          (self.monitor), RuntimeWarning)
+
+        if self.monitor_op(current, self.thresh):
+            self.best = current
+            self.model.stop_training = True
 
 #batch_size = 16
 #original_dim = 784
@@ -82,8 +126,11 @@ def train_vae(x_train,x_dev,x_test,y_train=None,y_dev=None,y_test=None,hidden_si
     vae.fit(x_train, y_train,
             shuffle=True,
             nb_epoch=nb_epochs,
+            verbose = 1,
             batch_size=batch_size,
-            validation_data=(x_dev, y_dev))
+            validation_data=(x_dev, y_dev),
+            callbacks = [ZeroStopping(monitor='val_loss', thresh=0, verbose=0, mode='min')]
+            )
 
     # build a model to project inputs on the latent space
     encoder = Model(x, z_mean)
-- 
1.8.2.3