change all

Killian
1 parent e5108393c8
Showing 9 changed files with 428 additions and 124 deletions Side-by-side Diff
LDA/04a-mmdf.py
LDA/04b-mmf_mini_ae.py
LDA/04c-mmf_sae.py
LDA/04d-mmf_dsae.py
LDA/04e-mm_vae.py
LDA/05-lts_scoring.py
LDA/mlp.py
LDA/utils.py
LDA/vae.py
@@ -31,8 +31,11 @@
 #db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True)
 origin_corps=shelve.open("{}".format(sys.argv[2]))
 in_dir = sys.argv[1]
+if len(sys.argv) > 3 :
+    features_key = sys.argv[3]
+else :
+    features_key = "LDA"
  
-
 out_db=shelve.open("{}/mlp_scores.shelve".format(in_dir),writeback=True)
  
 mlp_h = [ 250, 250  ]
  
  
@@ -40,16 +43,16 @@
 mlp_dropouts = [0.25]* len(mlp_h)
 mlp_sgd = Adam(lr=0.0001)
 mlp_epochs = 3000
-mlp_batch_size = 1
+mlp_batch_size = 5
 mlp_input_activation = "relu"
 mlp_output_activation="softmax"
  
 ress = []
-for key in ["TRS", "ASR"] :
+for key in origin_corps["features_key"].keys() :
  
-    res=mlp.train_mlp(origin_corps["LDA"][key]["TRAIN"],origin_corps["LABEL"][key]["TRAIN"],
-            origin_corps["LDA"][key]["DEV"],origin_corps["LABEL"][key]["DEV"],
-            origin_corps["LDA"][key]["TEST"],origin_corps["LABEL"][key]["TEST"],
+    res=mlp.train_mlp(origin_corps[features_key][key]["TRAIN"],origin_corps["LABEL"][key]["TRAIN"],
+            origin_corps[features_key][key]["DEV"],origin_corps["LABEL"][key]["DEV"],
+            origin_corps[features_key][key]["TEST"],origin_corps["LABEL"][key]["TEST"],
             mlp_h,dropouts=mlp_dropouts,sgd=mlp_sgd,
             epochs=mlp_epochs,
             batch_size=mlp_batch_size,
@@ -10,6 +10,7 @@
 from sklearn import preprocessing
 from keras.models import Sequential
 from keras.optimizers import SGD,Adam
+from keras.layers.advanced_activations import ELU,PReLU
 from mlp import *
 import sklearn.metrics
 import shelve
  
@@ -24,12 +25,24 @@
 in_dir = sys.argv[1]
 #['ASR', 'TRS', 'LABEL']
 # In[6]:
-
+if len(sys.argv) > 4 :
+    features_key = sys.argv[4]
+else :
+    features_key = "LDA"
+save_projection = True
 json_conf =json.load(open(sys.argv[3]))
 ae_conf = json_conf["ae"]
  
 hidden_size= ae_conf["hidden_size"]
-input_activation=ae_conf["input_activation"]
+input_activation = None
+print ae_conf["input_activation"]
+if ae_conf["input_activation"] == "elu":
+    print " ELU"
+    input_activation = PReLU()
+else:
+    print " ELSE"
+    input_activation = ae_conf["input_activation"]
+#input_activation=ae_conf["input_activation"]
 output_activation=ae_conf["output_activation"]
 loss=ae_conf["loss"]
 epochs=ae_conf["epochs"]
  
  
@@ -72,14 +85,18 @@
 db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
 db["LABEL"]=infer_model["LABEL"]
 #
-keys = ["ASR","TRS"]
+keys = infer_model[features_key].keys()
  
 db["AE"] = {}
-db["LDA"] = {}
+db[features_key] = {}
 for mod in keys : 
-    db["LDA"][mod] = train_mlp(infer_model["LDA"][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
-                            infer_model["LDA"][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
-                            infer_model["LDA"][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
+    print infer_model[features_key][mod]["TRAIN"].shape
+    print infer_model[features_key][mod]["DEV"].shape
+    print infer_model[features_key][mod]["TEST"].shape
+
+    db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
+                            infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
+                            infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
                             mlp_h ,sgd=mlp_sgd,
                             epochs=mlp_epochs,
                             batch_size=mlp_batch_size,
  
@@ -87,13 +104,25 @@
                             output_activation=mlp_output_activation,
                             dropouts=mlp_dropouts,
                             fit_verbose=0)
-
-    res=train_ae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"],
+    print input_activation
+    res=train_ae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"],
                  hidden_size,patience = patience,sgd=sgd,
                  dropouts=do_do,input_activation=input_activation,output_activation=output_activation,
                  loss=loss,epochs=epochs,batch_size=batch,verbose=0)
     mlp_res_list=[]
-    for layer in res :
+    for nb,layer in enumerate(res) :
+        if save_projection:
+            pd = pandas.DataFrame(layer[0])
+            col_count = (pd.sum(axis=0) != 0)
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN")
+            pd = pandas.DataFrame(layer[1])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV")
+            pd = pandas.DataFrame(layer[2])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST")
+            del pd
         mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                       layer[1],infer_model["LABEL"][mod]["DEV"],
                                       layer[2],infer_model["LABEL"][mod]["TEST"],
  
  
  
@@ -103,30 +132,44 @@
                                       batch_size=mlp_batch_size,fit_verbose=0))
     db["AE"][mod]=mlp_res_list
  
-mod = "ASR"
-mod2= "TRS"
-mlp_res_list=[]
+if "ASR" in keys and "TRS" in keys:
+    mod = "ASR"
+    mod2= "TRS"
+    mlp_res_list=[]
  
-res = train_ae(infer_model["LDA"][mod]["TRAIN"],
-                infer_model["LDA"][mod]["DEV"],
-                infer_model["LDA"][mod]["TEST"],
-                hidden_size,dropouts=do_do,patience = patience,
-                sgd=sgd,input_activation=input_activation,output_activation=output_activation,loss=loss,epochs=epochs,
-                batch_size=batch,
-                y_train=infer_model["LDA"][mod]["TRAIN"],
-                y_dev=infer_model["LDA"][mod2]["DEV"],
-                y_test=infer_model["LDA"][mod2]["TEST"])
+    res = train_ae(infer_model[features_key][mod]["TRAIN"],
+                    infer_model[features_key][mod]["DEV"],
+                    infer_model[features_key][mod]["TEST"],
+                    hidden_size,dropouts=do_do,patience = patience,
+                    sgd=sgd,input_activation=input_activation,output_activation=output_activation,loss=loss,epochs=epochs,
+                    batch_size=batch,
+                    y_train=infer_model[features_key][mod]["TRAIN"],
+                    y_dev=infer_model[features_key][mod2]["DEV"],
+                    y_test=infer_model[features_key][mod2]["TEST"])
  
-for layer in res :
-    mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
-                                  layer[1],infer_model["LABEL"][mod]["DEV"],
-                                  layer[2],infer_model["LABEL"][mod]["TEST"],
-                                  mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
-                                  output_activation=mlp_output_activation,
-                                  input_activation=input_activation,
-                                  batch_size=mlp_batch_size,fit_verbose=0))
+    for nb,layer in enumerate(res) :
+        if save_projection:
+            pd = pandas.DataFrame(layer[0])
+            col_count= (pd.sum(axis=0) != 0)
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN")
+            pd = pandas.DataFrame(layer[1])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV")
+            pd = pandas.DataFrame(layer[2])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST")
+            del pd
  
-db["AE"]["SPE"] = mlp_res_list
+        mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
+                                      layer[1],infer_model["LABEL"][mod]["DEV"],
+                                      layer[2],infer_model["LABEL"][mod]["TEST"],
+                                      mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
+                                      output_activation=mlp_output_activation,
+                                      input_activation=input_activation,
+                                      batch_size=mlp_batch_size,fit_verbose=0))
+
+    db["AE"]["SPE"] = mlp_res_list
  
 db.sync()
 db.close()
@@ -23,6 +23,11 @@
  
 infer_model=shelve.open("{}".format(sys.argv[2]))
 in_dir = sys.argv[1]
+if len(sys.argv) > 4 :
+    features_key = sys.argv[4]
+else :
+    features_key = "LDA"
+save_projection = True
 #['ASR', 'TRS', 'LABEL']
 # In[6]:
 json_conf =json.load(open(sys.argv[3])) 
  
@@ -47,13 +52,13 @@
     sgd = sae_conf["sgd"]
  
 name = json_conf["name"]
+print name
 try:
     os.mkdir("{}/{}".format(in_dir,name))
 except:
     pass
 db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
 #
-keys = ["ASR","TRS"]
 mlp_conf = json_conf["mlp"]
 mlp_h = mlp_conf["hidden_size"]
 mlp_loss = mlp_conf["loss"]
  
  
  
  
@@ -72,23 +77,38 @@
 except :
     mlp_sgd = mlp_conf["sgd"]
  
-
+keys = infer_model[features_key].keys()
 db["SAE"] = {}
  
 db["SAEFT"] = {}
 for mod in keys : 
-    res_tuple=train_sae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],
-                 infer_model["LDA"][mod]["TEST"],
+    res_tuple=train_sae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],
+                 infer_model[features_key][mod]["TEST"],
                  hidden_size,dropouts=do_do,
                  patience = "patience",sgd=sgd,input_activation="tanh",
                  output_activation="tanh",loss=loss,epochs=epochs,
                  batch_size=batch,verbose=0)
     #print len(res), [len(x) for x in res[0]], [ len(x) for x in res[1]]
-    for name , levels in zip(["SAE","SAEFT"],res_tuple):
+    for i, levels in zip(["SAE","SAEFT"],res_tuple):
         mlp_res_by_level = []
-        for res in levels:
+        for lvl,res in enumerate(levels):
             mlp_res_list=[]
             for nb,layer in enumerate(res) :
+                if save_projection:
+                    pd = pandas.DataFrame(layer[0])
+                    col_count= (pd.sum(axis=0) != 0)
+                    pd = pd.loc[:,col_count]
+                    hdffile = "{}/{}/{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,mod)
+                    print hdffile
+                    pd.to_hdf(hdffile,"TRAIN")
+                    pd = pandas.DataFrame(layer[1])
+                    pd = pd.loc[:,col_count]
+                    pd.to_hdf(hdffile,"DEV")
+                    pd = pandas.DataFrame(layer[2])
+                    pd = pd.loc[:,col_count]
+                    pd.to_hdf(hdffile,"TEST")
+                    del pd
+
                 mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
                                               layer[1],infer_model["LABEL"][mod]["DEV"],
                                               layer[2],infer_model["LABEL"][mod]["TEST"],
  
  
@@ -96,33 +116,48 @@
                                               sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,
                                               fit_verbose=0))
             mlp_res_by_level.append(mlp_res_list)
-        db[name][mod]=mlp_res_by_level
+        db[i][mod]=mlp_res_by_level
  
-mod = "ASR"
-mod2= "TRS"
-res_tuple = train_sae(infer_model["LDA"][mod]["TRAIN"],
-                      infer_model["LDA"][mod]["DEV"],
-                      infer_model["LDA"][mod]["TEST"],
-                      hidden_size,dropouts=[0],patience="patience",
-                      sgd=sgd,input_activation=input_activation,output_activation=input_activation,
-                      loss=loss,epochs=epochs,batch_size=batch,
-                      y_train=infer_model["LDA"][mod2]["TRAIN"],
-                      y_dev=infer_model["LDA"][mod2]["DEV"],
-                      y_test=infer_model["LDA"][mod2]["TEST"])
  
-for name , levels in zip(["SAE","SAEFT"],res_tuple):
-    mlp_res_by_level = []
-    for res in levels : 
-        mlp_res_list=[]
-        for layer in res :
-            mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
-                                layer[1],infer_model["LABEL"][mod]["DEV"],layer[2],
-                                infer_model["LABEL"][mod]["TEST"],
-                                mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
-                                sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,
-                                fit_verbose=0))
-        mlp_res_by_level.append(mlp_res_list)
-    db[name]["SPE"] = mlp_res_by_level
+if "ASR" in keys and "TRS" in keys :
+    mod = "ASR"
+    mod2= "TRS"
+    res_tuple = train_sae(infer_model[features_key][mod]["TRAIN"],
+                          infer_model[features_key][mod]["DEV"],
+                          infer_model[features_key][mod]["TEST"],
+                          hidden_size,dropouts=[0],patience="patience",
+                          sgd=sgd,input_activation=input_activation,output_activation=input_activation,
+                          loss=loss,epochs=epochs,batch_size=batch,
+                          y_train=infer_model[features_key][mod2]["TRAIN"],
+                          y_dev=infer_model[features_key][mod2]["DEV"],
+                          y_test=infer_model[features_key][mod2]["TEST"])
+
+    for i , levels in zip(["SAE","SAEFT"],res_tuple):
+        mlp_res_by_level = []
+        for lvl,res in enumerate(levels) : 
+            mlp_res_list=[]
+            for nb,layer in enumerate(res) :
+                if save_projection:
+                    pd = pandas.DataFrame(layer[0])
+                    col_count= (pd.sum(axis=0) != 0)
+                    pd = pd.loc[:,col_count]
+                    pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"TRAIN")
+                    pd = pandas.DataFrame(layer[1])
+                    pd = pd.loc[:,col_count]
+                    pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"DEV")
+                    pd = pandas.DataFrame(layer[2])
+                    pd = pd.loc[:,col_count]
+                    pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"TEST")
+                    del pd
+
+                mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
+                                    layer[1],infer_model["LABEL"][mod]["DEV"],layer[2],
+                                    infer_model["LABEL"][mod]["TEST"],
+                                    mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
+                                    sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,
+                                    fit_verbose=0))
+            mlp_res_by_level.append(mlp_res_list)
+        db[i]["SPE"] = mlp_res_by_level
  
 db.sync()
 db.close()
@@ -26,6 +26,10 @@
 in_dir = sys.argv[1]
 #['ASR', 'TRS', 'LABEL']
 # In[6]:
+if len(sys.argv) > 4 :
+    features_key = sys.argv[4]
+else :
+    features_key = "LDA"
  
 json_conf =json.load(open(sys.argv[3]))
  
@@ -101,9 +105,9 @@
  
 db["DSAEFT"] = {}
 mod = "ASR"
-res_tuple_ASR = train_ae(infer_model["LDA"][mod]["TRAIN"],
-                         infer_model["LDA"][mod]["DEV"],
-                         infer_model["LDA"][mod]["TEST"],
+res_tuple_ASR = train_ae(infer_model[features_key][mod]["TRAIN"],
+                         infer_model[features_key][mod]["DEV"],
+                         infer_model[features_key][mod]["TEST"],
                          hidden_size,dropouts=do_do,
                          patience = patience,sgd=sgd,
                          input_activation=input_activation,
@@ -122,9 +126,9 @@
  
 db["DSAE"][mod] = mlp_res_list
 mod = "TRS"
-res_tuple_TRS = train_ae(infer_model["LDA"][mod]["TRAIN"],
-                         infer_model["LDA"][mod]["DEV"],
-                         infer_model["LDA"][mod]["TEST"],
+res_tuple_TRS = train_ae(infer_model[features_key][mod]["TRAIN"],
+                         infer_model[features_key][mod]["DEV"],
+                         infer_model[features_key][mod]["TEST"],
                          hidden_size,dropouts=do_do,
                          sgd=sgd,input_activation=input_activation,
                          output_activation=output_activation,loss=loss,epochs=epochs,
@@ -202,12 +206,12 @@
  
 #print "Wtr", len(Wtr), [ len(x) for x in Wtr],[ len(x[1]) for x in Wtr]
  
-ft_res = ft_dsae(infer_model["LDA"]["ASR"]["TRAIN"],
-                 infer_model["LDA"]["ASR"]["DEV"],
-                 infer_model["LDA"]["ASR"]["TEST"],
-                 y_train=infer_model["LDA"]["TRS"]["TRAIN"],
-                 y_dev=infer_model["LDA"]["TRS"]["DEV"],
-                 y_test=infer_model["LDA"]["TRS"]["TEST"],
+ft_res = ft_dsae(infer_model[features_key]["ASR"]["TRAIN"],
+                 infer_model[features_key]["ASR"]["DEV"],
+                 infer_model[features_key]["ASR"]["TEST"],
+                 y_train=infer_model[features_key]["TRS"]["TRAIN"],
+                 y_dev=infer_model[features_key]["TRS"]["DEV"],
+                 y_test=infer_model[features_key]["TRS"]["TEST"],
                  ae_hidden = hidden_size,
                  transfer_hidden = trans_hidden_size,
                  start_weights = WA,
@@ -21,7 +21,12 @@
 in_dir = sys.argv[1]
 #['ASR', 'TRS', 'LABEL']
 # In[6]:
+if len(sys.argv) > 4 :
+    features_key = sys.argv[4]
+else :
+    features_key = "LDA"
  
+save_projection = True
 json_conf =json.load(open(sys.argv[3]))
 vae_conf = json_conf["vae"]
  
  
@@ -63,10 +68,11 @@
  
 name = json_conf["name"]
  
-
-try:
+try :
+    print "make folder "
     os.mkdir("{}/{}".format(in_dir,name))
 except:
+    print "folder not maked"
     pass
  
  
  
  
  
@@ -74,15 +80,16 @@
 db["LABEL"]=infer_model["LABEL"]
 #
  
-keys = ["ASR","TRS"]
  
+keys = infer_model[features_key].keys()
+
 db["VAE"] = {}
-db["LDA"] = {}
+db[features_key] = {}
 for mod in keys : 
     #print mod
-    db["LDA"][mod] = train_mlp(infer_model["LDA"][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
-                            infer_model["LDA"][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
-                            infer_model["LDA"][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
+    db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
+                            infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
+                            infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
                             mlp_h ,sgd=mlp_sgd,
                             epochs=mlp_epochs,
                             batch_size=mlp_batch_size,
  
@@ -91,13 +98,26 @@
                             dropouts=mlp_dropouts,
                             fit_verbose=0)
  
-    res=train_vae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"],
+    res=train_vae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"],
                  hidden_size=hidden_size[0],
                  latent_dim=latent_dim,sgd=sgd,
                  input_activation=input_activation,output_activation=output_activation,
                  nb_epochs=epochs,batch_size=batch)
     mlp_res_list=[]
-    for layer in res :
+    for nb,layer in enumerate(res) :
+        if save_projection:
+            pd = pandas.DataFrame(layer[0])
+            col_count = (pd.sum(axis=0) != 0)
+            pd = pd.loc[:,cyyol_count]
+            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN")
+            pd = pandas.DataFrame(layer[1])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV")
+            pd = pandas.DataFrame(layer[2])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST")
+            del pd
+
         mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
                                       layer[1],infer_model["LABEL"][mod]["DEV"],
                                       layer[2],infer_model["LABEL"][mod]["TEST"],
  
  
  
@@ -107,32 +127,46 @@
                                       batch_size=mlp_batch_size,fit_verbose=0))
     db["VAE"][mod]=mlp_res_list
  
-mod = "ASR"
-mod2= "TRS"
-mlp_res_list=[]
+if "ASR" in keys and "TRS" in keys :
+    mod = "ASR"
+    mod2= "TRS"
+    mlp_res_list=[]
  
-res = train_vae(infer_model["LDA"][mod]["TRAIN"],
-                infer_model["LDA"][mod]["DEV"],
-                infer_model["LDA"][mod]["TEST"],
-                hidden_size=hidden_size[0],
-                sgd=sgd,input_activation=input_activation,output_activation=output_activation,
-                latent_dim=latent_dim,
-                nb_epochs=epochs,
-                batch_size=batch,
-                y_train=infer_model["LDA"][mod2]["TRAIN"],
-                y_dev=infer_model["LDA"][mod2]["DEV"],
-                y_test=infer_model["LDA"][mod2]["TEST"])
+    res = train_vae(infer_model[features_key][mod]["TRAIN"],
+                    infer_model[features_key][mod]["DEV"],
+                    infer_model[features_key][mod]["TEST"],
+                    hidden_size=hidden_size[0],
+                    sgd=sgd,input_activation=input_activation,output_activation=output_activation,
+                    latent_dim=latent_dim,
+                    nb_epochs=epochs,
+                    batch_size=batch,
+                    y_train=infer_model[features_key][mod2]["TRAIN"],
+                    y_dev=infer_model[features_key][mod2]["DEV"],
+                    y_test=infer_model[features_key][mod2]["TEST"])
  
-for layer in res :
-    mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
-                                  layer[1],infer_model["LABEL"][mod]["DEV"],
-                                  layer[2],infer_model["LABEL"][mod]["TEST"],
-                                  mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
-                                  output_activation=mlp_output_activation,
-                                  input_activation=input_activation,
-                                  batch_size=mlp_batch_size,fit_verbose=0))
+    for nb,layer in enumerate(res) :
+        if save_projection:
+            pd = pandas.DataFrame(layer[0])
+            col_count = (pd.sum(axis=0) != 0)
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN")
+            pd = pandas.DataFrame(layer[1])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV")
+            pd = pandas.DataFrame(layer[2])
+            pd = pd.loc[:,col_count]
+            pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST")
+            del pd
  
-db["VAE"]["SPE"] = mlp_res_list
+        mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
+                                      layer[1],infer_model["LABEL"][mod]["DEV"],
+                                      layer[2],infer_model["LABEL"][mod]["TEST"],
+                                      mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
+                                      output_activation=mlp_output_activation,
+                                      input_activation=input_activation,
+                                      batch_size=mlp_batch_size,fit_verbose=0))
+
+    db["VAE"]["SPE"] = mlp_res_list
  
 db.sync()
 db.close()
+import sys
+import shelve
+import pickle
+from utils import *
+import sys
+import os
+import json
+import glob
+import tempfile
+import pandas
+import subprocess
+from subprocess import CalledProcessError
+import shutil
+import numpy
+
+in_dir = sys.argv[1]
+json_conf =json.load(open(sys.argv[2])) 
+name = json_conf["name"]
+
+ae_m = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name))
+y_train=numpy.argmax(ae_m["LABEL"]["ASR"]["TRAIN"],axis=1)
+_,ytr_path=tempfile.mkstemp()
+ytr_open= open(ytr_path,"w")
+for el in y_train:
+    print >>ytr_open, el
+ytr_open.close()
+
+y_dev=numpy.argmax(ae_m["LABEL"]["ASR"]["DEV"],axis=1)
+_,yd_path=tempfile.mkstemp()
+yd_open = open(yd_path,"w")
+for el in y_dev:
+    print >>yd_open, el
+yd_open.close()
+
+y_test=numpy.argmax(ae_m["LABEL"]["ASR"]["TEST"],axis=1)
+_,yte_path=tempfile.mkstemp()
+yte_open=open(yte_path,"w")
+for el in y_test:
+    print >>yte_open, el
+yte_open.close()
+
+hdfs_files=glob.glob("{}/{}/*.hdf".format(in_dir,name))
+temp_dir=tempfile.mkdtemp()
+out_file=open("{}/{}/malaha_res.txt".format(in_dir,name),"a")
+
+for hdf in hdfs_files:
+    print >>out_file, "Start  ---------------------------------------------------" 
+    print >>out_file, hdf
+    x_train = pandas.read_hdf(hdf,"TRAIN")
+    x_train.to_csv("{}/xtrain.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False)
+    x_train = pandas.read_hdf(hdf,"DEV")
+    x_train.to_csv("{}/xdev.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False)
+    x_train = pandas.read_hdf(hdf,"TEST")
+    x_train.to_csv("{}/xtest.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False)
+    try :
+        resdev=subprocess.check_output(['Rscript',
+                                        '/home/laboinfo/janod/WorkingDir/erreur_traduction/Author_Topic_Decoda/estimate.R',
+                                        "{}/xtrain.dat".format(temp_dir),
+                                        "{}/xdev.dat".format(temp_dir),
+                                        ytr_path,yd_path])
+
+        restest=subprocess.check_output(['Rscript',
+                                        '/home/laboinfo/janod/WorkingDir/erreur_traduction/Author_Topic_Decoda/estimate.R',
+                                        "{}/xtrain.dat".format(temp_dir), 
+                                        "{}/xtest.dat".format(temp_dir),
+                                        ytr_path,yte_path])
+
+        print >>out_file, resdev
+        print >>out_file, hdf
+        print >>out_file, restest
+    except CalledProcessError:
+        print >>out_file, "FAILED"
+    print >>out_file, hdf
+    print >>out_file, "End  ---------------------------------------------------" 
+
+shutil.rmtree(temp_dir)
+os.remove(ytr_path)
+os.remove(yd_path)
+os.remove(yte_path)
@@ -82,7 +82,6 @@
  
 def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False):
  
-    
     layers = [Input(shape=(x_train.shape[1],))]
  
     for h in hidden_size:
 # -*- coding: utf-8 -*-
 import nltk
 import re
+import codecs
+import numpy as np
+import sqlite3
+
 pattern =  ur"\d+(?:\.\d+)?\s*%?|\w{1,2}'|<unk>|[\wéàèùêôûâòìîç]+|[^\w\s]"
 rer_b = re.compile(ur" r e r(?: e r)? b ")
 rer_c = re.compile(ur" r e r(?: e r)? c |r e r( e r)? c' est | rer c' est")
@@ -43,4 +47,55 @@
  
 def select_mmf(elm):
     return int(elm.split("_")[0])
+
+def get_score(table):
+    mx_train = np.max(table[0])
+    argmx_dev = np.argmax(table[1])
+    mx_dev = table[1][argmx_dev]
+    best_test = table[2][argmx_dev]
+    mx_test = np.max(table[2])
+    print """\tmax train : {}
+    \tmax dev : {}
+    \tmax test : {} - best test : {}
+    \t best epochs : {}""".format(mx_train,mx_dev,mx_test,best_test,argmx_dev)
+    return mx_train,mx_dev,mx_test,best_test,argmx_dev
+class WeightedWordsList :
+    @staticmethod
+    def get_key(wtuple):
+        return wtuple[1]
+    @staticmethod
+    def get_okey(wtuple):
+        return wtuple[1][1]
+
+
+    def __init__(self,file_path):
+        self.wlist = codecs.open(file_path,"r","utf8").readlines()
+        self.wlist = [x.strip().split(':') for x in self.wlist ]
+        self.wlist = [ (x, float(y)) for x,y in self.wlist ]
+        self.wdict = {}
+        for x,y in self.wlist:
+            self.wdict[x.encode("utf8")] = y
+
+    def select_best(self,word_list,lenght=5):
+        scored_word = []
+        for w in word_list:
+            w = w.encode("utf8")
+            if w not in self.wdict :
+                continue
+
+            if len(scored_word) < lenght:
+                scored_word.append((w,self.wdict[w]))
+            else :
+                w_min= min(enumerate(scored_word),key=WeightedWordsList.get_okey)
+                w_curr = (w, self.wdict[w])
+                if w_min[1][1] < w_curr[1]:
+                    del scored_word[w_min[0]]
+                    scored_word.append(w_curr)
+                    w_min = min(enumerate(scored_word),key=WeightedWordsList.get_okey)
+                    while len(scored_word) > lenght and w_min[1][1] < w_curr[1] :
+                        del scored_word[w_min[0]]
+                        w_min = min(enumerate(scored_word),key=WeightedWordsList.get_okey)
+                elif w_min[1][1] == w_curr[1]:
+                    scored_word.append(w_curr)
+        return [ w[0] for w in scored_word ]
@@ -16,15 +16,59 @@
 from keras import backend as K
 from keras import objectives
 from keras.datasets import mnist
+from keras.callbacks import EarlyStopping,Callback
  
 import pandas
 import shelve
 import pickle
  
  
+class ZeroStopping(Callback):
+    '''Stop training when a monitored quantity has stopped improving.
+    # Arguments
+        monitor: quantity to be monitored.
+        patience: number of epochs with no improvement
+            after which training will be stopped.
+        verbose: verbosity mode.
+        mode: one of {auto, min, max}. In 'min' mode,
+            training will stop when the quantity
+            monitored has stopped decreasing; in 'max'
+            mode it will stop when the quantity
+            monitored has stopped increasing.
+    '''
+    def __init__(self, monitor='val_loss', verbose=0, mode='auto', thresh = 0):
+        super(ZeroStopping, self).__init__()
  
+        self.monitor = monitor
+        self.verbose = verbose
+        self.thresh = thresh # is a rythme
  
+        if mode not in ['auto', 'min', 'max']:
+            warnings.warn('EarlyStopping mode %s is unknown, '
+                          'fallback to auto mode.' % (self.mode),
+                          RuntimeWarning)
+            mode = 'auto'
  
+        if mode == 'min':
+            self.monitor_op = np.less
+        elif mode == 'max':
+            self.monitor_op = np.greater
+        else:
+            if 'acc' in self.monitor:
+                self.monitor_op = np.greater
+            else:
+                self.monitor_op = np.less
+
+    def on_epoch_end(self, epoch, logs={}):
+        current = logs.get(self.monitor)
+        if current is None:
+            warnings.warn('Zero stopping requires %s available!' %
+                          (self.monitor), RuntimeWarning)
+
+        if self.monitor_op(current, self.thresh):
+            self.best = current
+            self.model.stop_training = True
+
 #batch_size = 16
 #original_dim = 784
 #latent_dim = 2
  
@@ -82,8 +126,11 @@
     vae.fit(x_train, y_train,
             shuffle=True,
             nb_epoch=nb_epochs,
+            verbose = 1,
             batch_size=batch_size,
-            validation_data=(x_dev, y_dev))
+            validation_data=(x_dev, y_dev),
+            callbacks = [ZeroStopping(monitor='val_loss', thresh=0, verbose=0, mode='min')]
+            )
  
     # build a model to project inputs on the latent space
     encoder = Model(x, z_mean)
...	...	@@ -31,8 +31,11 @@
31	31	#db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True)
32	32	origin_corps=shelve.open("{}".format(sys.argv[2]))
33	33	in_dir = sys.argv[1]
	34	+if len(sys.argv) > 3 :
	35	+ features_key = sys.argv[3]
	36	+else :
	37	+ features_key = "LDA"
34	38
35		-
36	39	out_db=shelve.open("{}/mlp_scores.shelve".format(in_dir),writeback=True)
37	40
38	41	mlp_h = [ 250, 250 ]
39	42
40	43
...	...	@@ -40,16 +43,16 @@
40	43	mlp_dropouts = [0.25]* len(mlp_h)
41	44	mlp_sgd = Adam(lr=0.0001)
42	45	mlp_epochs = 3000
43		-mlp_batch_size = 1
	46	+mlp_batch_size = 5
44	47	mlp_input_activation = "relu"
45	48	mlp_output_activation="softmax"
46	49
47	50	ress = []
48		-for key in ["TRS", "ASR"] :
	51	+for key in origin_corps["features_key"].keys() :
49	52
50		- res=mlp.train_mlp(origin_corps["LDA"][key]["TRAIN"],origin_corps["LABEL"][key]["TRAIN"],
51		- origin_corps["LDA"][key]["DEV"],origin_corps["LABEL"][key]["DEV"],
52		- origin_corps["LDA"][key]["TEST"],origin_corps["LABEL"][key]["TEST"],
	53	+ res=mlp.train_mlp(origin_corps[features_key][key]["TRAIN"],origin_corps["LABEL"][key]["TRAIN"],
	54	+ origin_corps[features_key][key]["DEV"],origin_corps["LABEL"][key]["DEV"],
	55	+ origin_corps[features_key][key]["TEST"],origin_corps["LABEL"][key]["TEST"],
53	56	mlp_h,dropouts=mlp_dropouts,sgd=mlp_sgd,
54	57	epochs=mlp_epochs,
55	58	batch_size=mlp_batch_size,
...	...	@@ -10,6 +10,7 @@
10	10	from sklearn import preprocessing
11	11	from keras.models import Sequential
12	12	from keras.optimizers import SGD,Adam
	13	+from keras.layers.advanced_activations import ELU,PReLU
13	14	from mlp import *
14	15	import sklearn.metrics
15	16	import shelve
16	17
...	...	@@ -24,12 +25,24 @@
24	25	in_dir = sys.argv[1]
25	26	#['ASR', 'TRS', 'LABEL']
26	27	# In[6]:
27		-
	28	+if len(sys.argv) > 4 :
	29	+ features_key = sys.argv[4]
	30	+else :
	31	+ features_key = "LDA"
	32	+save_projection = True
28	33	json_conf =json.load(open(sys.argv[3]))
29	34	ae_conf = json_conf["ae"]
30	35
31	36	hidden_size= ae_conf["hidden_size"]
32		-input_activation=ae_conf["input_activation"]
	37	+input_activation = None
	38	+print ae_conf["input_activation"]
	39	+if ae_conf["input_activation"] == "elu":
	40	+ print " ELU"
	41	+ input_activation = PReLU()
	42	+else:
	43	+ print " ELSE"
	44	+ input_activation = ae_conf["input_activation"]
	45	+#input_activation=ae_conf["input_activation"]
33	46	output_activation=ae_conf["output_activation"]
34	47	loss=ae_conf["loss"]
35	48	epochs=ae_conf["epochs"]
36	49
37	50
...	...	@@ -72,14 +85,18 @@
72	85	db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
73	86	db["LABEL"]=infer_model["LABEL"]
74	87	#
75		-keys = ["ASR","TRS"]
	88	+keys = infer_model[features_key].keys()
76	89
77	90	db["AE"] = {}
78		-db["LDA"] = {}
	91	+db[features_key] = {}
79	92	for mod in keys :
80		- db["LDA"][mod] = train_mlp(infer_model["LDA"][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
81		- infer_model["LDA"][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
82		- infer_model["LDA"][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
	93	+ print infer_model[features_key][mod]["TRAIN"].shape
	94	+ print infer_model[features_key][mod]["DEV"].shape
	95	+ print infer_model[features_key][mod]["TEST"].shape
	96	+
	97	+ db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
	98	+ infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
	99	+ infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
83	100	mlp_h ,sgd=mlp_sgd,
84	101	epochs=mlp_epochs,
85	102	batch_size=mlp_batch_size,
86	103
...	...	@@ -87,13 +104,25 @@
87	104	output_activation=mlp_output_activation,
88	105	dropouts=mlp_dropouts,
89	106	fit_verbose=0)
90		-
91		- res=train_ae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"],
	107	+ print input_activation
	108	+ res=train_ae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"],
92	109	hidden_size,patience = patience,sgd=sgd,
93	110	dropouts=do_do,input_activation=input_activation,output_activation=output_activation,
94	111	loss=loss,epochs=epochs,batch_size=batch,verbose=0)
95	112	mlp_res_list=[]
96		- for layer in res :
	113	+ for nb,layer in enumerate(res) :
	114	+ if save_projection:
	115	+ pd = pandas.DataFrame(layer[0])
	116	+ col_count = (pd.sum(axis=0) != 0)
	117	+ pd = pd.loc[:,col_count]
	118	+ pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN")
	119	+ pd = pandas.DataFrame(layer[1])
	120	+ pd = pd.loc[:,col_count]
	121	+ pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV")
	122	+ pd = pandas.DataFrame(layer[2])
	123	+ pd = pd.loc[:,col_count]
	124	+ pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST")
	125	+ del pd
97	126	mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
98	127	layer[1],infer_model["LABEL"][mod]["DEV"],
99	128	layer[2],infer_model["LABEL"][mod]["TEST"],
100	129
101	130
102	131
...	...	@@ -103,30 +132,44 @@
103	132	batch_size=mlp_batch_size,fit_verbose=0))
104	133	db["AE"][mod]=mlp_res_list
105	134
106		-mod = "ASR"
107		-mod2= "TRS"
108		-mlp_res_list=[]
	135	+if "ASR" in keys and "TRS" in keys:
	136	+ mod = "ASR"
	137	+ mod2= "TRS"
	138	+ mlp_res_list=[]
109	139
110		-res = train_ae(infer_model["LDA"][mod]["TRAIN"],
111		- infer_model["LDA"][mod]["DEV"],
112		- infer_model["LDA"][mod]["TEST"],
113		- hidden_size,dropouts=do_do,patience = patience,
114		- sgd=sgd,input_activation=input_activation,output_activation=output_activation,loss=loss,epochs=epochs,
115		- batch_size=batch,
116		- y_train=infer_model["LDA"][mod]["TRAIN"],
117		- y_dev=infer_model["LDA"][mod2]["DEV"],
118		- y_test=infer_model["LDA"][mod2]["TEST"])
	140	+ res = train_ae(infer_model[features_key][mod]["TRAIN"],
	141	+ infer_model[features_key][mod]["DEV"],
	142	+ infer_model[features_key][mod]["TEST"],
	143	+ hidden_size,dropouts=do_do,patience = patience,
	144	+ sgd=sgd,input_activation=input_activation,output_activation=output_activation,loss=loss,epochs=epochs,
	145	+ batch_size=batch,
	146	+ y_train=infer_model[features_key][mod]["TRAIN"],
	147	+ y_dev=infer_model[features_key][mod2]["DEV"],
	148	+ y_test=infer_model[features_key][mod2]["TEST"])
119	149
120		-for layer in res :
121		- mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
122		- layer[1],infer_model["LABEL"][mod]["DEV"],
123		- layer[2],infer_model["LABEL"][mod]["TEST"],
124		- mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
125		- output_activation=mlp_output_activation,
126		- input_activation=input_activation,
127		- batch_size=mlp_batch_size,fit_verbose=0))
	150	+ for nb,layer in enumerate(res) :
	151	+ if save_projection:
	152	+ pd = pandas.DataFrame(layer[0])
	153	+ col_count= (pd.sum(axis=0) != 0)
	154	+ pd = pd.loc[:,col_count]
	155	+ pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN")
	156	+ pd = pandas.DataFrame(layer[1])
	157	+ pd = pd.loc[:,col_count]
	158	+ pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV")
	159	+ pd = pandas.DataFrame(layer[2])
	160	+ pd = pd.loc[:,col_count]
	161	+ pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST")
	162	+ del pd
128	163
129		-db["AE"]["SPE"] = mlp_res_list
	164	+ mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
	165	+ layer[1],infer_model["LABEL"][mod]["DEV"],
	166	+ layer[2],infer_model["LABEL"][mod]["TEST"],
	167	+ mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
	168	+ output_activation=mlp_output_activation,
	169	+ input_activation=input_activation,
	170	+ batch_size=mlp_batch_size,fit_verbose=0))
	171	+
	172	+ db["AE"]["SPE"] = mlp_res_list
130	173
131	174	db.sync()
132	175	db.close()
...	...	@@ -23,6 +23,11 @@
23	23
24	24	infer_model=shelve.open("{}".format(sys.argv[2]))
25	25	in_dir = sys.argv[1]
	26	+if len(sys.argv) > 4 :
	27	+ features_key = sys.argv[4]
	28	+else :
	29	+ features_key = "LDA"
	30	+save_projection = True
26	31	#['ASR', 'TRS', 'LABEL']
27	32	# In[6]:
28	33	json_conf =json.load(open(sys.argv[3]))
29	34
...	...	@@ -47,13 +52,13 @@
47	52	sgd = sae_conf["sgd"]
48	53
49	54	name = json_conf["name"]
	55	+print name
50	56	try:
51	57	os.mkdir("{}/{}".format(in_dir,name))
52	58	except:
53	59	pass
54	60	db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
55	61	#
56		-keys = ["ASR","TRS"]
57	62	mlp_conf = json_conf["mlp"]
58	63	mlp_h = mlp_conf["hidden_size"]
59	64	mlp_loss = mlp_conf["loss"]
60	65
61	66
62	67
63	68
...	...	@@ -72,23 +77,38 @@
72	77	except :
73	78	mlp_sgd = mlp_conf["sgd"]
74	79
75		-
	80	+keys = infer_model[features_key].keys()
76	81	db["SAE"] = {}
77	82
78	83	db["SAEFT"] = {}
79	84	for mod in keys :
80		- res_tuple=train_sae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],
81		- infer_model["LDA"][mod]["TEST"],
	85	+ res_tuple=train_sae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],
	86	+ infer_model[features_key][mod]["TEST"],
82	87	hidden_size,dropouts=do_do,
83	88	patience = "patience",sgd=sgd,input_activation="tanh",
84	89	output_activation="tanh",loss=loss,epochs=epochs,
85	90	batch_size=batch,verbose=0)
86	91	#print len(res), [len(x) for x in res[0]], [ len(x) for x in res[1]]
87		- for name , levels in zip(["SAE","SAEFT"],res_tuple):
	92	+ for i, levels in zip(["SAE","SAEFT"],res_tuple):
88	93	mlp_res_by_level = []
89		- for res in levels:
	94	+ for lvl,res in enumerate(levels):
90	95	mlp_res_list=[]
91	96	for nb,layer in enumerate(res) :
	97	+ if save_projection:
	98	+ pd = pandas.DataFrame(layer[0])
	99	+ col_count= (pd.sum(axis=0) != 0)
	100	+ pd = pd.loc[:,col_count]
	101	+ hdffile = "{}/{}/{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,mod)
	102	+ print hdffile
	103	+ pd.to_hdf(hdffile,"TRAIN")
	104	+ pd = pandas.DataFrame(layer[1])
	105	+ pd = pd.loc[:,col_count]
	106	+ pd.to_hdf(hdffile,"DEV")
	107	+ pd = pandas.DataFrame(layer[2])
	108	+ pd = pd.loc[:,col_count]
	109	+ pd.to_hdf(hdffile,"TEST")
	110	+ del pd
	111	+
92	112	mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
93	113	layer[1],infer_model["LABEL"][mod]["DEV"],
94	114	layer[2],infer_model["LABEL"][mod]["TEST"],
95	115
96	116
...	...	@@ -96,33 +116,48 @@
96	116	sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,
97	117	fit_verbose=0))
98	118	mlp_res_by_level.append(mlp_res_list)
99		- db[name][mod]=mlp_res_by_level
	119	+ db[i][mod]=mlp_res_by_level
100	120
101		-mod = "ASR"
102		-mod2= "TRS"
103		-res_tuple = train_sae(infer_model["LDA"][mod]["TRAIN"],
104		- infer_model["LDA"][mod]["DEV"],
105		- infer_model["LDA"][mod]["TEST"],
106		- hidden_size,dropouts=[0],patience="patience",
107		- sgd=sgd,input_activation=input_activation,output_activation=input_activation,
108		- loss=loss,epochs=epochs,batch_size=batch,
109		- y_train=infer_model["LDA"][mod2]["TRAIN"],
110		- y_dev=infer_model["LDA"][mod2]["DEV"],
111		- y_test=infer_model["LDA"][mod2]["TEST"])
112	121
113		-for name , levels in zip(["SAE","SAEFT"],res_tuple):
114		- mlp_res_by_level = []
115		- for res in levels :
116		- mlp_res_list=[]
117		- for layer in res :
118		- mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
119		- layer[1],infer_model["LABEL"][mod]["DEV"],layer[2],
120		- infer_model["LABEL"][mod]["TEST"],
121		- mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
122		- sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,
123		- fit_verbose=0))
124		- mlp_res_by_level.append(mlp_res_list)
125		- db[name]["SPE"] = mlp_res_by_level
	122	+if "ASR" in keys and "TRS" in keys :
	123	+ mod = "ASR"
	124	+ mod2= "TRS"
	125	+ res_tuple = train_sae(infer_model[features_key][mod]["TRAIN"],
	126	+ infer_model[features_key][mod]["DEV"],
	127	+ infer_model[features_key][mod]["TEST"],
	128	+ hidden_size,dropouts=[0],patience="patience",
	129	+ sgd=sgd,input_activation=input_activation,output_activation=input_activation,
	130	+ loss=loss,epochs=epochs,batch_size=batch,
	131	+ y_train=infer_model[features_key][mod2]["TRAIN"],
	132	+ y_dev=infer_model[features_key][mod2]["DEV"],
	133	+ y_test=infer_model[features_key][mod2]["TEST"])
	134	+
	135	+ for i , levels in zip(["SAE","SAEFT"],res_tuple):
	136	+ mlp_res_by_level = []
	137	+ for lvl,res in enumerate(levels) :
	138	+ mlp_res_list=[]
	139	+ for nb,layer in enumerate(res) :
	140	+ if save_projection:
	141	+ pd = pandas.DataFrame(layer[0])
	142	+ col_count= (pd.sum(axis=0) != 0)
	143	+ pd = pd.loc[:,col_count]
	144	+ pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"TRAIN")
	145	+ pd = pandas.DataFrame(layer[1])
	146	+ pd = pd.loc[:,col_count]
	147	+ pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"DEV")
	148	+ pd = pandas.DataFrame(layer[2])
	149	+ pd = pd.loc[:,col_count]
	150	+ pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"TEST")
	151	+ del pd
	152	+
	153	+ mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
	154	+ layer[1],infer_model["LABEL"][mod]["DEV"],layer[2],
	155	+ infer_model["LABEL"][mod]["TEST"],
	156	+ mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
	157	+ sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,
	158	+ fit_verbose=0))
	159	+ mlp_res_by_level.append(mlp_res_list)
	160	+ db[i]["SPE"] = mlp_res_by_level
126	161
127	162	db.sync()
128	163	db.close()
...	...	@@ -26,6 +26,10 @@
26	26	in_dir = sys.argv[1]
27	27	#['ASR', 'TRS', 'LABEL']
28	28	# In[6]:
	29	+if len(sys.argv) > 4 :
	30	+ features_key = sys.argv[4]
	31	+else :
	32	+ features_key = "LDA"
29	33
30	34	json_conf =json.load(open(sys.argv[3]))
31	35
...	...	@@ -101,9 +105,9 @@
101	105
102	106	db["DSAEFT"] = {}
103	107	mod = "ASR"
104		-res_tuple_ASR = train_ae(infer_model["LDA"][mod]["TRAIN"],
105		- infer_model["LDA"][mod]["DEV"],
106		- infer_model["LDA"][mod]["TEST"],
	108	+res_tuple_ASR = train_ae(infer_model[features_key][mod]["TRAIN"],
	109	+ infer_model[features_key][mod]["DEV"],
	110	+ infer_model[features_key][mod]["TEST"],
107	111	hidden_size,dropouts=do_do,
108	112	patience = patience,sgd=sgd,
109	113	input_activation=input_activation,
...	...	@@ -122,9 +126,9 @@
122	126
123	127	db["DSAE"][mod] = mlp_res_list
124	128	mod = "TRS"
125		-res_tuple_TRS = train_ae(infer_model["LDA"][mod]["TRAIN"],
126		- infer_model["LDA"][mod]["DEV"],
127		- infer_model["LDA"][mod]["TEST"],
	129	+res_tuple_TRS = train_ae(infer_model[features_key][mod]["TRAIN"],
	130	+ infer_model[features_key][mod]["DEV"],
	131	+ infer_model[features_key][mod]["TEST"],
128	132	hidden_size,dropouts=do_do,
129	133	sgd=sgd,input_activation=input_activation,
130	134	output_activation=output_activation,loss=loss,epochs=epochs,
...	...	@@ -202,12 +206,12 @@
202	206
203	207	#print "Wtr", len(Wtr), [ len(x) for x in Wtr],[ len(x[1]) for x in Wtr]
204	208
205		-ft_res = ft_dsae(infer_model["LDA"]["ASR"]["TRAIN"],
206		- infer_model["LDA"]["ASR"]["DEV"],
207		- infer_model["LDA"]["ASR"]["TEST"],
208		- y_train=infer_model["LDA"]["TRS"]["TRAIN"],
209		- y_dev=infer_model["LDA"]["TRS"]["DEV"],
210		- y_test=infer_model["LDA"]["TRS"]["TEST"],
	209	+ft_res = ft_dsae(infer_model[features_key]["ASR"]["TRAIN"],
	210	+ infer_model[features_key]["ASR"]["DEV"],
	211	+ infer_model[features_key]["ASR"]["TEST"],
	212	+ y_train=infer_model[features_key]["TRS"]["TRAIN"],
	213	+ y_dev=infer_model[features_key]["TRS"]["DEV"],
	214	+ y_test=infer_model[features_key]["TRS"]["TEST"],
211	215	ae_hidden = hidden_size,
212	216	transfer_hidden = trans_hidden_size,
213	217	start_weights = WA,
...	...	@@ -21,7 +21,12 @@
21	21	in_dir = sys.argv[1]
22	22	#['ASR', 'TRS', 'LABEL']
23	23	# In[6]:
	24	+if len(sys.argv) > 4 :
	25	+ features_key = sys.argv[4]
	26	+else :
	27	+ features_key = "LDA"
24	28
	29	+save_projection = True
25	30	json_conf =json.load(open(sys.argv[3]))
26	31	vae_conf = json_conf["vae"]
27	32
28	33
...	...	@@ -63,10 +68,11 @@
63	68
64	69	name = json_conf["name"]
65	70
66		-
67		-try:
	71	+try :
	72	+ print "make folder "
68	73	os.mkdir("{}/{}".format(in_dir,name))
69	74	except:
	75	+ print "folder not maked"
70	76	pass
71	77
72	78
73	79
74	80
75	81
...	...	@@ -74,15 +80,16 @@
74	80	db["LABEL"]=infer_model["LABEL"]
75	81	#
76	82
77		-keys = ["ASR","TRS"]
78	83
	84	+keys = infer_model[features_key].keys()
	85	+
79	86	db["VAE"] = {}
80		-db["LDA"] = {}
	87	+db[features_key] = {}
81	88	for mod in keys :
82	89	#print mod
83		- db["LDA"][mod] = train_mlp(infer_model["LDA"][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
84		- infer_model["LDA"][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
85		- infer_model["LDA"][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
	90	+ db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
	91	+ infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
	92	+ infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
86	93	mlp_h ,sgd=mlp_sgd,
87	94	epochs=mlp_epochs,
88	95	batch_size=mlp_batch_size,
89	96
...	...	@@ -91,13 +98,26 @@
91	98	dropouts=mlp_dropouts,
92	99	fit_verbose=0)
93	100
94		- res=train_vae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"],
	101	+ res=train_vae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"],
95	102	hidden_size=hidden_size[0],
96	103	latent_dim=latent_dim,sgd=sgd,
97	104	input_activation=input_activation,output_activation=output_activation,
98	105	nb_epochs=epochs,batch_size=batch)
99	106	mlp_res_list=[]
100		- for layer in res :
	107	+ for nb,layer in enumerate(res) :
	108	+ if save_projection:
	109	+ pd = pandas.DataFrame(layer[0])
	110	+ col_count = (pd.sum(axis=0) != 0)
	111	+ pd = pd.loc[:,cyyol_count]
	112	+ pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN")
	113	+ pd = pandas.DataFrame(layer[1])
	114	+ pd = pd.loc[:,col_count]
	115	+ pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV")
	116	+ pd = pandas.DataFrame(layer[2])
	117	+ pd = pd.loc[:,col_count]
	118	+ pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST")
	119	+ del pd
	120	+
101	121	mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
102	122	layer[1],infer_model["LABEL"][mod]["DEV"],
103	123	layer[2],infer_model["LABEL"][mod]["TEST"],
104	124
105	125
106	126
...	...	@@ -107,32 +127,46 @@
107	127	batch_size=mlp_batch_size,fit_verbose=0))
108	128	db["VAE"][mod]=mlp_res_list
109	129
110		-mod = "ASR"
111		-mod2= "TRS"
112		-mlp_res_list=[]
	130	+if "ASR" in keys and "TRS" in keys :
	131	+ mod = "ASR"
	132	+ mod2= "TRS"
	133	+ mlp_res_list=[]
113	134
114		-res = train_vae(infer_model["LDA"][mod]["TRAIN"],
115		- infer_model["LDA"][mod]["DEV"],
116		- infer_model["LDA"][mod]["TEST"],
117		- hidden_size=hidden_size[0],
118		- sgd=sgd,input_activation=input_activation,output_activation=output_activation,
119		- latent_dim=latent_dim,
120		- nb_epochs=epochs,
121		- batch_size=batch,
122		- y_train=infer_model["LDA"][mod2]["TRAIN"],
123		- y_dev=infer_model["LDA"][mod2]["DEV"],
124		- y_test=infer_model["LDA"][mod2]["TEST"])
	135	+ res = train_vae(infer_model[features_key][mod]["TRAIN"],
	136	+ infer_model[features_key][mod]["DEV"],
	137	+ infer_model[features_key][mod]["TEST"],
	138	+ hidden_size=hidden_size[0],
	139	+ sgd=sgd,input_activation=input_activation,output_activation=output_activation,
	140	+ latent_dim=latent_dim,
	141	+ nb_epochs=epochs,
	142	+ batch_size=batch,
	143	+ y_train=infer_model[features_key][mod2]["TRAIN"],
	144	+ y_dev=infer_model[features_key][mod2]["DEV"],
	145	+ y_test=infer_model[features_key][mod2]["TEST"])
125	146
126		-for layer in res :
127		- mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
128		- layer[1],infer_model["LABEL"][mod]["DEV"],
129		- layer[2],infer_model["LABEL"][mod]["TEST"],
130		- mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
131		- output_activation=mlp_output_activation,
132		- input_activation=input_activation,
133		- batch_size=mlp_batch_size,fit_verbose=0))
	147	+ for nb,layer in enumerate(res) :
	148	+ if save_projection:
	149	+ pd = pandas.DataFrame(layer[0])
	150	+ col_count = (pd.sum(axis=0) != 0)
	151	+ pd = pd.loc[:,col_count]
	152	+ pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN")
	153	+ pd = pandas.DataFrame(layer[1])
	154	+ pd = pd.loc[:,col_count]
	155	+ pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV")
	156	+ pd = pandas.DataFrame(layer[2])
	157	+ pd = pd.loc[:,col_count]
	158	+ pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST")
	159	+ del pd
134	160
135		-db["VAE"]["SPE"] = mlp_res_list
	161	+ mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
	162	+ layer[1],infer_model["LABEL"][mod]["DEV"],
	163	+ layer[2],infer_model["LABEL"][mod]["TEST"],
	164	+ mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
	165	+ output_activation=mlp_output_activation,
	166	+ input_activation=input_activation,
	167	+ batch_size=mlp_batch_size,fit_verbose=0))
	168	+
	169	+ db["VAE"]["SPE"] = mlp_res_list
136	170
137	171	db.sync()
138	172	db.close()
	1	+import sys
	2	+import shelve
	3	+import pickle
	4	+from utils import *
	5	+import sys
	6	+import os
	7	+import json
	8	+import glob
	9	+import tempfile
	10	+import pandas
	11	+import subprocess
	12	+from subprocess import CalledProcessError
	13	+import shutil
	14	+import numpy
	15	+
	16	+in_dir = sys.argv[1]
	17	+json_conf =json.load(open(sys.argv[2]))
	18	+name = json_conf["name"]
	19	+
	20	+ae_m = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name))
	21	+y_train=numpy.argmax(ae_m["LABEL"]["ASR"]["TRAIN"],axis=1)
	22	+_,ytr_path=tempfile.mkstemp()
	23	+ytr_open= open(ytr_path,"w")
	24	+for el in y_train:
	25	+ print >>ytr_open, el
	26	+ytr_open.close()
	27	+
	28	+y_dev=numpy.argmax(ae_m["LABEL"]["ASR"]["DEV"],axis=1)
	29	+_,yd_path=tempfile.mkstemp()
	30	+yd_open = open(yd_path,"w")
	31	+for el in y_dev:
	32	+ print >>yd_open, el
	33	+yd_open.close()
	34	+
	35	+y_test=numpy.argmax(ae_m["LABEL"]["ASR"]["TEST"],axis=1)
	36	+_,yte_path=tempfile.mkstemp()
	37	+yte_open=open(yte_path,"w")
	38	+for el in y_test:
	39	+ print >>yte_open, el
	40	+yte_open.close()
	41	+
	42	+hdfs_files=glob.glob("{}/{}/*.hdf".format(in_dir,name))
	43	+temp_dir=tempfile.mkdtemp()
	44	+out_file=open("{}/{}/malaha_res.txt".format(in_dir,name),"a")
	45	+
	46	+for hdf in hdfs_files:
	47	+ print >>out_file, "Start ---------------------------------------------------"
	48	+ print >>out_file, hdf
	49	+ x_train = pandas.read_hdf(hdf,"TRAIN")
	50	+ x_train.to_csv("{}/xtrain.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False)
	51	+ x_train = pandas.read_hdf(hdf,"DEV")
	52	+ x_train.to_csv("{}/xdev.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False)
	53	+ x_train = pandas.read_hdf(hdf,"TEST")
	54	+ x_train.to_csv("{}/xtest.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False)
	55	+ try :
	56	+ resdev=subprocess.check_output(['Rscript',
	57	+ '/home/laboinfo/janod/WorkingDir/erreur_traduction/Author_Topic_Decoda/estimate.R',
	58	+ "{}/xtrain.dat".format(temp_dir),
	59	+ "{}/xdev.dat".format(temp_dir),
	60	+ ytr_path,yd_path])
	61	+
	62	+ restest=subprocess.check_output(['Rscript',
	63	+ '/home/laboinfo/janod/WorkingDir/erreur_traduction/Author_Topic_Decoda/estimate.R',
	64	+ "{}/xtrain.dat".format(temp_dir),
	65	+ "{}/xtest.dat".format(temp_dir),
	66	+ ytr_path,yte_path])
	67	+
	68	+ print >>out_file, resdev
	69	+ print >>out_file, hdf
	70	+ print >>out_file, restest
	71	+ except CalledProcessError:
	72	+ print >>out_file, "FAILED"
	73	+ print >>out_file, hdf
	74	+ print >>out_file, "End ---------------------------------------------------"
	75	+
	76	+shutil.rmtree(temp_dir)
	77	+os.remove(ytr_path)
	78	+os.remove(yd_path)
	79	+os.remove(yte_path)
...	...	@@ -82,7 +82,6 @@
82	82
83	83	def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False):
84	84
85		-
86	85	layers = [Input(shape=(x_train.shape[1],))]
87	86
88	87	for h in hidden_size:
1	1	# -- coding: utf-8 --
2	2	import nltk
3	3	import re
	4	+import codecs
	5	+import numpy as np
	6	+import sqlite3
	7	+
4	8	pattern = ur"\d+(?:\.\d+)?\s*%?\|\w{1,2}'\|<unk>\|[\wéàèùêôûâòìîç]+\|[^\w\s]"
5	9	rer_b = re.compile(ur" r e r(?: e r)? b ")
6	10	rer_c = re.compile(ur" r e r(?: e r)? c \|r e r( e r)? c' est \| rer c' est")
...	...	@@ -43,4 +47,55 @@
43	47
44	48	def select_mmf(elm):
45	49	return int(elm.split("_")[0])
	50	+
	51	+def get_score(table):
	52	+ mx_train = np.max(table[0])
	53	+ argmx_dev = np.argmax(table[1])
	54	+ mx_dev = table[1][argmx_dev]
	55	+ best_test = table[2][argmx_dev]
	56	+ mx_test = np.max(table[2])
	57	+ print """\tmax train : {}
	58	+ \tmax dev : {}
	59	+ \tmax test : {} - best test : {}
	60	+ \t best epochs : {}""".format(mx_train,mx_dev,mx_test,best_test,argmx_dev)
	61	+ return mx_train,mx_dev,mx_test,best_test,argmx_dev
	62	+class WeightedWordsList :
	63	+ @staticmethod
	64	+ def get_key(wtuple):
	65	+ return wtuple[1]
	66	+ @staticmethod
	67	+ def get_okey(wtuple):
	68	+ return wtuple[1][1]
	69	+
	70	+
	71	+ def __init__(self,file_path):
	72	+ self.wlist = codecs.open(file_path,"r","utf8").readlines()
	73	+ self.wlist = [x.strip().split(':') for x in self.wlist ]
	74	+ self.wlist = [ (x, float(y)) for x,y in self.wlist ]
	75	+ self.wdict = {}
	76	+ for x,y in self.wlist:
	77	+ self.wdict[x.encode("utf8")] = y
	78	+
	79	+ def select_best(self,word_list,lenght=5):
	80	+ scored_word = []
	81	+ for w in word_list:
	82	+ w = w.encode("utf8")
	83	+ if w not in self.wdict :
	84	+ continue
	85	+
	86	+ if len(scored_word) < lenght:
	87	+ scored_word.append((w,self.wdict[w]))
	88	+ else :
	89	+ w_min= min(enumerate(scored_word),key=WeightedWordsList.get_okey)
	90	+ w_curr = (w, self.wdict[w])
	91	+ if w_min[1][1] < w_curr[1]:
	92	+ del scored_word[w_min[0]]
	93	+ scored_word.append(w_curr)
	94	+ w_min = min(enumerate(scored_word),key=WeightedWordsList.get_okey)
	95	+ while len(scored_word) > lenght and w_min[1][1] < w_curr[1] :
	96	+ del scored_word[w_min[0]]
	97	+ w_min = min(enumerate(scored_word),key=WeightedWordsList.get_okey)
	98	+ elif w_min[1][1] == w_curr[1]:
	99	+ scored_word.append(w_curr)
	100	+ return [ w[0] for w in scored_word ]
...	...	@@ -16,15 +16,59 @@
16	16	from keras import backend as K
17	17	from keras import objectives
18	18	from keras.datasets import mnist
	19	+from keras.callbacks import EarlyStopping,Callback
19	20
20	21	import pandas
21	22	import shelve
22	23	import pickle
23	24
24	25
	26	+class ZeroStopping(Callback):
	27	+ '''Stop training when a monitored quantity has stopped improving.
	28	+ # Arguments
	29	+ monitor: quantity to be monitored.
	30	+ patience: number of epochs with no improvement
	31	+ after which training will be stopped.
	32	+ verbose: verbosity mode.
	33	+ mode: one of {auto, min, max}. In 'min' mode,
	34	+ training will stop when the quantity
	35	+ monitored has stopped decreasing; in 'max'
	36	+ mode it will stop when the quantity
	37	+ monitored has stopped increasing.
	38	+ '''
	39	+ def __init__(self, monitor='val_loss', verbose=0, mode='auto', thresh = 0):
	40	+ super(ZeroStopping, self).__init__()
25	41
	42	+ self.monitor = monitor
	43	+ self.verbose = verbose
	44	+ self.thresh = thresh # is a rythme
26	45
	46	+ if mode not in ['auto', 'min', 'max']:
	47	+ warnings.warn('EarlyStopping mode %s is unknown, '
	48	+ 'fallback to auto mode.' % (self.mode),
	49	+ RuntimeWarning)
	50	+ mode = 'auto'
27	51
	52	+ if mode == 'min':
	53	+ self.monitor_op = np.less
	54	+ elif mode == 'max':
	55	+ self.monitor_op = np.greater
	56	+ else:
	57	+ if 'acc' in self.monitor:
	58	+ self.monitor_op = np.greater
	59	+ else:
	60	+ self.monitor_op = np.less
	61	+
	62	+ def on_epoch_end(self, epoch, logs={}):
	63	+ current = logs.get(self.monitor)
	64	+ if current is None:
	65	+ warnings.warn('Zero stopping requires %s available!' %
	66	+ (self.monitor), RuntimeWarning)
	67	+
	68	+ if self.monitor_op(current, self.thresh):
	69	+ self.best = current
	70	+ self.model.stop_training = True
	71	+
28	72	#batch_size = 16
29	73	#original_dim = 784
30	74	#latent_dim = 2
31	75
...	...	@@ -82,8 +126,11 @@
82	126	vae.fit(x_train, y_train,
83	127	shuffle=True,
84	128	nb_epoch=nb_epochs,
	129	+ verbose = 1,
85	130	batch_size=batch_size,
86		- validation_data=(x_dev, y_dev))
	131	+ validation_data=(x_dev, y_dev),
	132	+ callbacks = [ZeroStopping(monitor='val_loss', thresh=0, verbose=0, mode='min')]
	133	+ )
87	134
88	135	# build a model to project inputs on the latent space
89	136	encoder = Model(x, z_mean)