Commit 2af8e57f4e1ebcfdd5ba9d3e8963c4853e472982

Authored by Killian
1 parent e5108393c8
Exists in master

change all

Showing 9 changed files with 428 additions and 124 deletions Side-by-side Diff

... ... @@ -31,8 +31,11 @@
31 31 #db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True)
32 32 origin_corps=shelve.open("{}".format(sys.argv[2]))
33 33 in_dir = sys.argv[1]
  34 +if len(sys.argv) > 3 :
  35 + features_key = sys.argv[3]
  36 +else :
  37 + features_key = "LDA"
34 38  
35   -
36 39 out_db=shelve.open("{}/mlp_scores.shelve".format(in_dir),writeback=True)
37 40  
38 41 mlp_h = [ 250, 250 ]
39 42  
40 43  
... ... @@ -40,16 +43,16 @@
40 43 mlp_dropouts = [0.25]* len(mlp_h)
41 44 mlp_sgd = Adam(lr=0.0001)
42 45 mlp_epochs = 3000
43   -mlp_batch_size = 1
  46 +mlp_batch_size = 5
44 47 mlp_input_activation = "relu"
45 48 mlp_output_activation="softmax"
46 49  
47 50 ress = []
48   -for key in ["TRS", "ASR"] :
  51 +for key in origin_corps["features_key"].keys() :
49 52  
50   - res=mlp.train_mlp(origin_corps["LDA"][key]["TRAIN"],origin_corps["LABEL"][key]["TRAIN"],
51   - origin_corps["LDA"][key]["DEV"],origin_corps["LABEL"][key]["DEV"],
52   - origin_corps["LDA"][key]["TEST"],origin_corps["LABEL"][key]["TEST"],
  53 + res=mlp.train_mlp(origin_corps[features_key][key]["TRAIN"],origin_corps["LABEL"][key]["TRAIN"],
  54 + origin_corps[features_key][key]["DEV"],origin_corps["LABEL"][key]["DEV"],
  55 + origin_corps[features_key][key]["TEST"],origin_corps["LABEL"][key]["TEST"],
53 56 mlp_h,dropouts=mlp_dropouts,sgd=mlp_sgd,
54 57 epochs=mlp_epochs,
55 58 batch_size=mlp_batch_size,
LDA/04b-mmf_mini_ae.py
... ... @@ -10,6 +10,7 @@
10 10 from sklearn import preprocessing
11 11 from keras.models import Sequential
12 12 from keras.optimizers import SGD,Adam
  13 +from keras.layers.advanced_activations import ELU,PReLU
13 14 from mlp import *
14 15 import sklearn.metrics
15 16 import shelve
16 17  
... ... @@ -24,12 +25,24 @@
24 25 in_dir = sys.argv[1]
25 26 #['ASR', 'TRS', 'LABEL']
26 27 # In[6]:
27   -
  28 +if len(sys.argv) > 4 :
  29 + features_key = sys.argv[4]
  30 +else :
  31 + features_key = "LDA"
  32 +save_projection = True
28 33 json_conf =json.load(open(sys.argv[3]))
29 34 ae_conf = json_conf["ae"]
30 35  
31 36 hidden_size= ae_conf["hidden_size"]
32   -input_activation=ae_conf["input_activation"]
  37 +input_activation = None
  38 +print ae_conf["input_activation"]
  39 +if ae_conf["input_activation"] == "elu":
  40 + print " ELU"
  41 + input_activation = PReLU()
  42 +else:
  43 + print " ELSE"
  44 + input_activation = ae_conf["input_activation"]
  45 +#input_activation=ae_conf["input_activation"]
33 46 output_activation=ae_conf["output_activation"]
34 47 loss=ae_conf["loss"]
35 48 epochs=ae_conf["epochs"]
36 49  
37 50  
... ... @@ -72,14 +85,18 @@
72 85 db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
73 86 db["LABEL"]=infer_model["LABEL"]
74 87 #
75   -keys = ["ASR","TRS"]
  88 +keys = infer_model[features_key].keys()
76 89  
77 90 db["AE"] = {}
78   -db["LDA"] = {}
  91 +db[features_key] = {}
79 92 for mod in keys :
80   - db["LDA"][mod] = train_mlp(infer_model["LDA"][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
81   - infer_model["LDA"][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
82   - infer_model["LDA"][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
  93 + print infer_model[features_key][mod]["TRAIN"].shape
  94 + print infer_model[features_key][mod]["DEV"].shape
  95 + print infer_model[features_key][mod]["TEST"].shape
  96 +
  97 + db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
  98 + infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
  99 + infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
83 100 mlp_h ,sgd=mlp_sgd,
84 101 epochs=mlp_epochs,
85 102 batch_size=mlp_batch_size,
86 103  
... ... @@ -87,13 +104,25 @@
87 104 output_activation=mlp_output_activation,
88 105 dropouts=mlp_dropouts,
89 106 fit_verbose=0)
90   -
91   - res=train_ae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"],
  107 + print input_activation
  108 + res=train_ae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"],
92 109 hidden_size,patience = patience,sgd=sgd,
93 110 dropouts=do_do,input_activation=input_activation,output_activation=output_activation,
94 111 loss=loss,epochs=epochs,batch_size=batch,verbose=0)
95 112 mlp_res_list=[]
96   - for layer in res :
  113 + for nb,layer in enumerate(res) :
  114 + if save_projection:
  115 + pd = pandas.DataFrame(layer[0])
  116 + col_count = (pd.sum(axis=0) != 0)
  117 + pd = pd.loc[:,col_count]
  118 + pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN")
  119 + pd = pandas.DataFrame(layer[1])
  120 + pd = pd.loc[:,col_count]
  121 + pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV")
  122 + pd = pandas.DataFrame(layer[2])
  123 + pd = pd.loc[:,col_count]
  124 + pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST")
  125 + del pd
97 126 mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
98 127 layer[1],infer_model["LABEL"][mod]["DEV"],
99 128 layer[2],infer_model["LABEL"][mod]["TEST"],
100 129  
101 130  
102 131  
... ... @@ -103,30 +132,44 @@
103 132 batch_size=mlp_batch_size,fit_verbose=0))
104 133 db["AE"][mod]=mlp_res_list
105 134  
106   -mod = "ASR"
107   -mod2= "TRS"
108   -mlp_res_list=[]
  135 +if "ASR" in keys and "TRS" in keys:
  136 + mod = "ASR"
  137 + mod2= "TRS"
  138 + mlp_res_list=[]
109 139  
110   -res = train_ae(infer_model["LDA"][mod]["TRAIN"],
111   - infer_model["LDA"][mod]["DEV"],
112   - infer_model["LDA"][mod]["TEST"],
113   - hidden_size,dropouts=do_do,patience = patience,
114   - sgd=sgd,input_activation=input_activation,output_activation=output_activation,loss=loss,epochs=epochs,
115   - batch_size=batch,
116   - y_train=infer_model["LDA"][mod]["TRAIN"],
117   - y_dev=infer_model["LDA"][mod2]["DEV"],
118   - y_test=infer_model["LDA"][mod2]["TEST"])
  140 + res = train_ae(infer_model[features_key][mod]["TRAIN"],
  141 + infer_model[features_key][mod]["DEV"],
  142 + infer_model[features_key][mod]["TEST"],
  143 + hidden_size,dropouts=do_do,patience = patience,
  144 + sgd=sgd,input_activation=input_activation,output_activation=output_activation,loss=loss,epochs=epochs,
  145 + batch_size=batch,
  146 + y_train=infer_model[features_key][mod]["TRAIN"],
  147 + y_dev=infer_model[features_key][mod2]["DEV"],
  148 + y_test=infer_model[features_key][mod2]["TEST"])
119 149  
120   -for layer in res :
121   - mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
122   - layer[1],infer_model["LABEL"][mod]["DEV"],
123   - layer[2],infer_model["LABEL"][mod]["TEST"],
124   - mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
125   - output_activation=mlp_output_activation,
126   - input_activation=input_activation,
127   - batch_size=mlp_batch_size,fit_verbose=0))
  150 + for nb,layer in enumerate(res) :
  151 + if save_projection:
  152 + pd = pandas.DataFrame(layer[0])
  153 + col_count= (pd.sum(axis=0) != 0)
  154 + pd = pd.loc[:,col_count]
  155 + pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN")
  156 + pd = pandas.DataFrame(layer[1])
  157 + pd = pd.loc[:,col_count]
  158 + pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV")
  159 + pd = pandas.DataFrame(layer[2])
  160 + pd = pd.loc[:,col_count]
  161 + pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST")
  162 + del pd
128 163  
129   -db["AE"]["SPE"] = mlp_res_list
  164 + mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
  165 + layer[1],infer_model["LABEL"][mod]["DEV"],
  166 + layer[2],infer_model["LABEL"][mod]["TEST"],
  167 + mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
  168 + output_activation=mlp_output_activation,
  169 + input_activation=input_activation,
  170 + batch_size=mlp_batch_size,fit_verbose=0))
  171 +
  172 + db["AE"]["SPE"] = mlp_res_list
130 173  
131 174 db.sync()
132 175 db.close()
... ... @@ -23,6 +23,11 @@
23 23  
24 24 infer_model=shelve.open("{}".format(sys.argv[2]))
25 25 in_dir = sys.argv[1]
  26 +if len(sys.argv) > 4 :
  27 + features_key = sys.argv[4]
  28 +else :
  29 + features_key = "LDA"
  30 +save_projection = True
26 31 #['ASR', 'TRS', 'LABEL']
27 32 # In[6]:
28 33 json_conf =json.load(open(sys.argv[3]))
29 34  
... ... @@ -47,13 +52,13 @@
47 52 sgd = sae_conf["sgd"]
48 53  
49 54 name = json_conf["name"]
  55 +print name
50 56 try:
51 57 os.mkdir("{}/{}".format(in_dir,name))
52 58 except:
53 59 pass
54 60 db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
55 61 #
56   -keys = ["ASR","TRS"]
57 62 mlp_conf = json_conf["mlp"]
58 63 mlp_h = mlp_conf["hidden_size"]
59 64 mlp_loss = mlp_conf["loss"]
60 65  
61 66  
62 67  
63 68  
... ... @@ -72,23 +77,38 @@
72 77 except :
73 78 mlp_sgd = mlp_conf["sgd"]
74 79  
75   -
  80 +keys = infer_model[features_key].keys()
76 81 db["SAE"] = {}
77 82  
78 83 db["SAEFT"] = {}
79 84 for mod in keys :
80   - res_tuple=train_sae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],
81   - infer_model["LDA"][mod]["TEST"],
  85 + res_tuple=train_sae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],
  86 + infer_model[features_key][mod]["TEST"],
82 87 hidden_size,dropouts=do_do,
83 88 patience = "patience",sgd=sgd,input_activation="tanh",
84 89 output_activation="tanh",loss=loss,epochs=epochs,
85 90 batch_size=batch,verbose=0)
86 91 #print len(res), [len(x) for x in res[0]], [ len(x) for x in res[1]]
87   - for name , levels in zip(["SAE","SAEFT"],res_tuple):
  92 + for i, levels in zip(["SAE","SAEFT"],res_tuple):
88 93 mlp_res_by_level = []
89   - for res in levels:
  94 + for lvl,res in enumerate(levels):
90 95 mlp_res_list=[]
91 96 for nb,layer in enumerate(res) :
  97 + if save_projection:
  98 + pd = pandas.DataFrame(layer[0])
  99 + col_count= (pd.sum(axis=0) != 0)
  100 + pd = pd.loc[:,col_count]
  101 + hdffile = "{}/{}/{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,mod)
  102 + print hdffile
  103 + pd.to_hdf(hdffile,"TRAIN")
  104 + pd = pandas.DataFrame(layer[1])
  105 + pd = pd.loc[:,col_count]
  106 + pd.to_hdf(hdffile,"DEV")
  107 + pd = pandas.DataFrame(layer[2])
  108 + pd = pd.loc[:,col_count]
  109 + pd.to_hdf(hdffile,"TEST")
  110 + del pd
  111 +
92 112 mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
93 113 layer[1],infer_model["LABEL"][mod]["DEV"],
94 114 layer[2],infer_model["LABEL"][mod]["TEST"],
95 115  
96 116  
... ... @@ -96,33 +116,48 @@
96 116 sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,
97 117 fit_verbose=0))
98 118 mlp_res_by_level.append(mlp_res_list)
99   - db[name][mod]=mlp_res_by_level
  119 + db[i][mod]=mlp_res_by_level
100 120  
101   -mod = "ASR"
102   -mod2= "TRS"
103   -res_tuple = train_sae(infer_model["LDA"][mod]["TRAIN"],
104   - infer_model["LDA"][mod]["DEV"],
105   - infer_model["LDA"][mod]["TEST"],
106   - hidden_size,dropouts=[0],patience="patience",
107   - sgd=sgd,input_activation=input_activation,output_activation=input_activation,
108   - loss=loss,epochs=epochs,batch_size=batch,
109   - y_train=infer_model["LDA"][mod2]["TRAIN"],
110   - y_dev=infer_model["LDA"][mod2]["DEV"],
111   - y_test=infer_model["LDA"][mod2]["TEST"])
112 121  
113   -for name , levels in zip(["SAE","SAEFT"],res_tuple):
114   - mlp_res_by_level = []
115   - for res in levels :
116   - mlp_res_list=[]
117   - for layer in res :
118   - mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
119   - layer[1],infer_model["LABEL"][mod]["DEV"],layer[2],
120   - infer_model["LABEL"][mod]["TEST"],
121   - mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
122   - sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,
123   - fit_verbose=0))
124   - mlp_res_by_level.append(mlp_res_list)
125   - db[name]["SPE"] = mlp_res_by_level
  122 +if "ASR" in keys and "TRS" in keys :
  123 + mod = "ASR"
  124 + mod2= "TRS"
  125 + res_tuple = train_sae(infer_model[features_key][mod]["TRAIN"],
  126 + infer_model[features_key][mod]["DEV"],
  127 + infer_model[features_key][mod]["TEST"],
  128 + hidden_size,dropouts=[0],patience="patience",
  129 + sgd=sgd,input_activation=input_activation,output_activation=input_activation,
  130 + loss=loss,epochs=epochs,batch_size=batch,
  131 + y_train=infer_model[features_key][mod2]["TRAIN"],
  132 + y_dev=infer_model[features_key][mod2]["DEV"],
  133 + y_test=infer_model[features_key][mod2]["TEST"])
  134 +
  135 + for i , levels in zip(["SAE","SAEFT"],res_tuple):
  136 + mlp_res_by_level = []
  137 + for lvl,res in enumerate(levels) :
  138 + mlp_res_list=[]
  139 + for nb,layer in enumerate(res) :
  140 + if save_projection:
  141 + pd = pandas.DataFrame(layer[0])
  142 + col_count= (pd.sum(axis=0) != 0)
  143 + pd = pd.loc[:,col_count]
  144 + pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"TRAIN")
  145 + pd = pandas.DataFrame(layer[1])
  146 + pd = pd.loc[:,col_count]
  147 + pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"DEV")
  148 + pd = pandas.DataFrame(layer[2])
  149 + pd = pd.loc[:,col_count]
  150 + pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"TEST")
  151 + del pd
  152 +
  153 + mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
  154 + layer[1],infer_model["LABEL"][mod]["DEV"],layer[2],
  155 + infer_model["LABEL"][mod]["TEST"],
  156 + mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,
  157 + sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,
  158 + fit_verbose=0))
  159 + mlp_res_by_level.append(mlp_res_list)
  160 + db[i]["SPE"] = mlp_res_by_level
126 161  
127 162 db.sync()
128 163 db.close()
... ... @@ -26,6 +26,10 @@
26 26 in_dir = sys.argv[1]
27 27 #['ASR', 'TRS', 'LABEL']
28 28 # In[6]:
  29 +if len(sys.argv) > 4 :
  30 + features_key = sys.argv[4]
  31 +else :
  32 + features_key = "LDA"
29 33  
30 34 json_conf =json.load(open(sys.argv[3]))
31 35  
... ... @@ -101,9 +105,9 @@
101 105  
102 106 db["DSAEFT"] = {}
103 107 mod = "ASR"
104   -res_tuple_ASR = train_ae(infer_model["LDA"][mod]["TRAIN"],
105   - infer_model["LDA"][mod]["DEV"],
106   - infer_model["LDA"][mod]["TEST"],
  108 +res_tuple_ASR = train_ae(infer_model[features_key][mod]["TRAIN"],
  109 + infer_model[features_key][mod]["DEV"],
  110 + infer_model[features_key][mod]["TEST"],
107 111 hidden_size,dropouts=do_do,
108 112 patience = patience,sgd=sgd,
109 113 input_activation=input_activation,
... ... @@ -122,9 +126,9 @@
122 126  
123 127 db["DSAE"][mod] = mlp_res_list
124 128 mod = "TRS"
125   -res_tuple_TRS = train_ae(infer_model["LDA"][mod]["TRAIN"],
126   - infer_model["LDA"][mod]["DEV"],
127   - infer_model["LDA"][mod]["TEST"],
  129 +res_tuple_TRS = train_ae(infer_model[features_key][mod]["TRAIN"],
  130 + infer_model[features_key][mod]["DEV"],
  131 + infer_model[features_key][mod]["TEST"],
128 132 hidden_size,dropouts=do_do,
129 133 sgd=sgd,input_activation=input_activation,
130 134 output_activation=output_activation,loss=loss,epochs=epochs,
... ... @@ -202,12 +206,12 @@
202 206  
203 207 #print "Wtr", len(Wtr), [ len(x) for x in Wtr],[ len(x[1]) for x in Wtr]
204 208  
205   -ft_res = ft_dsae(infer_model["LDA"]["ASR"]["TRAIN"],
206   - infer_model["LDA"]["ASR"]["DEV"],
207   - infer_model["LDA"]["ASR"]["TEST"],
208   - y_train=infer_model["LDA"]["TRS"]["TRAIN"],
209   - y_dev=infer_model["LDA"]["TRS"]["DEV"],
210   - y_test=infer_model["LDA"]["TRS"]["TEST"],
  209 +ft_res = ft_dsae(infer_model[features_key]["ASR"]["TRAIN"],
  210 + infer_model[features_key]["ASR"]["DEV"],
  211 + infer_model[features_key]["ASR"]["TEST"],
  212 + y_train=infer_model[features_key]["TRS"]["TRAIN"],
  213 + y_dev=infer_model[features_key]["TRS"]["DEV"],
  214 + y_test=infer_model[features_key]["TRS"]["TEST"],
211 215 ae_hidden = hidden_size,
212 216 transfer_hidden = trans_hidden_size,
213 217 start_weights = WA,
... ... @@ -21,7 +21,12 @@
21 21 in_dir = sys.argv[1]
22 22 #['ASR', 'TRS', 'LABEL']
23 23 # In[6]:
  24 +if len(sys.argv) > 4 :
  25 + features_key = sys.argv[4]
  26 +else :
  27 + features_key = "LDA"
24 28  
  29 +save_projection = True
25 30 json_conf =json.load(open(sys.argv[3]))
26 31 vae_conf = json_conf["vae"]
27 32  
28 33  
... ... @@ -63,10 +68,11 @@
63 68  
64 69 name = json_conf["name"]
65 70  
66   -
67   -try:
  71 +try :
  72 + print "make folder "
68 73 os.mkdir("{}/{}".format(in_dir,name))
69 74 except:
  75 + print "folder not maked"
70 76 pass
71 77  
72 78  
73 79  
74 80  
75 81  
... ... @@ -74,15 +80,16 @@
74 80 db["LABEL"]=infer_model["LABEL"]
75 81 #
76 82  
77   -keys = ["ASR","TRS"]
78 83  
  84 +keys = infer_model[features_key].keys()
  85 +
79 86 db["VAE"] = {}
80   -db["LDA"] = {}
  87 +db[features_key] = {}
81 88 for mod in keys :
82 89 #print mod
83   - db["LDA"][mod] = train_mlp(infer_model["LDA"][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
84   - infer_model["LDA"][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
85   - infer_model["LDA"][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
  90 + db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
  91 + infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
  92 + infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
86 93 mlp_h ,sgd=mlp_sgd,
87 94 epochs=mlp_epochs,
88 95 batch_size=mlp_batch_size,
89 96  
... ... @@ -91,13 +98,26 @@
91 98 dropouts=mlp_dropouts,
92 99 fit_verbose=0)
93 100  
94   - res=train_vae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"],
  101 + res=train_vae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"],
95 102 hidden_size=hidden_size[0],
96 103 latent_dim=latent_dim,sgd=sgd,
97 104 input_activation=input_activation,output_activation=output_activation,
98 105 nb_epochs=epochs,batch_size=batch)
99 106 mlp_res_list=[]
100   - for layer in res :
  107 + for nb,layer in enumerate(res) :
  108 + if save_projection:
  109 + pd = pandas.DataFrame(layer[0])
  110 + col_count = (pd.sum(axis=0) != 0)
  111 + pd = pd.loc[:,cyyol_count]
  112 + pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN")
  113 + pd = pandas.DataFrame(layer[1])
  114 + pd = pd.loc[:,col_count]
  115 + pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV")
  116 + pd = pandas.DataFrame(layer[2])
  117 + pd = pd.loc[:,col_count]
  118 + pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST")
  119 + del pd
  120 +
101 121 mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
102 122 layer[1],infer_model["LABEL"][mod]["DEV"],
103 123 layer[2],infer_model["LABEL"][mod]["TEST"],
104 124  
105 125  
106 126  
... ... @@ -107,32 +127,46 @@
107 127 batch_size=mlp_batch_size,fit_verbose=0))
108 128 db["VAE"][mod]=mlp_res_list
109 129  
110   -mod = "ASR"
111   -mod2= "TRS"
112   -mlp_res_list=[]
  130 +if "ASR" in keys and "TRS" in keys :
  131 + mod = "ASR"
  132 + mod2= "TRS"
  133 + mlp_res_list=[]
113 134  
114   -res = train_vae(infer_model["LDA"][mod]["TRAIN"],
115   - infer_model["LDA"][mod]["DEV"],
116   - infer_model["LDA"][mod]["TEST"],
117   - hidden_size=hidden_size[0],
118   - sgd=sgd,input_activation=input_activation,output_activation=output_activation,
119   - latent_dim=latent_dim,
120   - nb_epochs=epochs,
121   - batch_size=batch,
122   - y_train=infer_model["LDA"][mod2]["TRAIN"],
123   - y_dev=infer_model["LDA"][mod2]["DEV"],
124   - y_test=infer_model["LDA"][mod2]["TEST"])
  135 + res = train_vae(infer_model[features_key][mod]["TRAIN"],
  136 + infer_model[features_key][mod]["DEV"],
  137 + infer_model[features_key][mod]["TEST"],
  138 + hidden_size=hidden_size[0],
  139 + sgd=sgd,input_activation=input_activation,output_activation=output_activation,
  140 + latent_dim=latent_dim,
  141 + nb_epochs=epochs,
  142 + batch_size=batch,
  143 + y_train=infer_model[features_key][mod2]["TRAIN"],
  144 + y_dev=infer_model[features_key][mod2]["DEV"],
  145 + y_test=infer_model[features_key][mod2]["TEST"])
125 146  
126   -for layer in res :
127   - mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
128   - layer[1],infer_model["LABEL"][mod]["DEV"],
129   - layer[2],infer_model["LABEL"][mod]["TEST"],
130   - mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
131   - output_activation=mlp_output_activation,
132   - input_activation=input_activation,
133   - batch_size=mlp_batch_size,fit_verbose=0))
  147 + for nb,layer in enumerate(res) :
  148 + if save_projection:
  149 + pd = pandas.DataFrame(layer[0])
  150 + col_count = (pd.sum(axis=0) != 0)
  151 + pd = pd.loc[:,col_count]
  152 + pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN")
  153 + pd = pandas.DataFrame(layer[1])
  154 + pd = pd.loc[:,col_count]
  155 + pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV")
  156 + pd = pandas.DataFrame(layer[2])
  157 + pd = pd.loc[:,col_count]
  158 + pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST")
  159 + del pd
134 160  
135   -db["VAE"]["SPE"] = mlp_res_list
  161 + mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
  162 + layer[1],infer_model["LABEL"][mod]["DEV"],
  163 + layer[2],infer_model["LABEL"][mod]["TEST"],
  164 + mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
  165 + output_activation=mlp_output_activation,
  166 + input_activation=input_activation,
  167 + batch_size=mlp_batch_size,fit_verbose=0))
  168 +
  169 + db["VAE"]["SPE"] = mlp_res_list
136 170  
137 171 db.sync()
138 172 db.close()
LDA/05-lts_scoring.py
  1 +import sys
  2 +import shelve
  3 +import pickle
  4 +from utils import *
  5 +import sys
  6 +import os
  7 +import json
  8 +import glob
  9 +import tempfile
  10 +import pandas
  11 +import subprocess
  12 +from subprocess import CalledProcessError
  13 +import shutil
  14 +import numpy
  15 +
  16 +in_dir = sys.argv[1]
  17 +json_conf =json.load(open(sys.argv[2]))
  18 +name = json_conf["name"]
  19 +
  20 +ae_m = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name))
  21 +y_train=numpy.argmax(ae_m["LABEL"]["ASR"]["TRAIN"],axis=1)
  22 +_,ytr_path=tempfile.mkstemp()
  23 +ytr_open= open(ytr_path,"w")
  24 +for el in y_train:
  25 + print >>ytr_open, el
  26 +ytr_open.close()
  27 +
  28 +y_dev=numpy.argmax(ae_m["LABEL"]["ASR"]["DEV"],axis=1)
  29 +_,yd_path=tempfile.mkstemp()
  30 +yd_open = open(yd_path,"w")
  31 +for el in y_dev:
  32 + print >>yd_open, el
  33 +yd_open.close()
  34 +
  35 +y_test=numpy.argmax(ae_m["LABEL"]["ASR"]["TEST"],axis=1)
  36 +_,yte_path=tempfile.mkstemp()
  37 +yte_open=open(yte_path,"w")
  38 +for el in y_test:
  39 + print >>yte_open, el
  40 +yte_open.close()
  41 +
  42 +hdfs_files=glob.glob("{}/{}/*.hdf".format(in_dir,name))
  43 +temp_dir=tempfile.mkdtemp()
  44 +out_file=open("{}/{}/malaha_res.txt".format(in_dir,name),"a")
  45 +
  46 +for hdf in hdfs_files:
  47 + print >>out_file, "Start ---------------------------------------------------"
  48 + print >>out_file, hdf
  49 + x_train = pandas.read_hdf(hdf,"TRAIN")
  50 + x_train.to_csv("{}/xtrain.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False)
  51 + x_train = pandas.read_hdf(hdf,"DEV")
  52 + x_train.to_csv("{}/xdev.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False)
  53 + x_train = pandas.read_hdf(hdf,"TEST")
  54 + x_train.to_csv("{}/xtest.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False)
  55 + try :
  56 + resdev=subprocess.check_output(['Rscript',
  57 + '/home/laboinfo/janod/WorkingDir/erreur_traduction/Author_Topic_Decoda/estimate.R',
  58 + "{}/xtrain.dat".format(temp_dir),
  59 + "{}/xdev.dat".format(temp_dir),
  60 + ytr_path,yd_path])
  61 +
  62 + restest=subprocess.check_output(['Rscript',
  63 + '/home/laboinfo/janod/WorkingDir/erreur_traduction/Author_Topic_Decoda/estimate.R',
  64 + "{}/xtrain.dat".format(temp_dir),
  65 + "{}/xtest.dat".format(temp_dir),
  66 + ytr_path,yte_path])
  67 +
  68 + print >>out_file, resdev
  69 + print >>out_file, hdf
  70 + print >>out_file, restest
  71 + except CalledProcessError:
  72 + print >>out_file, "FAILED"
  73 + print >>out_file, hdf
  74 + print >>out_file, "End ---------------------------------------------------"
  75 +
  76 +shutil.rmtree(temp_dir)
  77 +os.remove(ytr_path)
  78 +os.remove(yd_path)
  79 +os.remove(yte_path)
... ... @@ -82,7 +82,6 @@
82 82  
83 83 def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False):
84 84  
85   -
86 85 layers = [Input(shape=(x_train.shape[1],))]
87 86  
88 87 for h in hidden_size:
1 1 # -*- coding: utf-8 -*-
2 2 import nltk
3 3 import re
  4 +import codecs
  5 +import numpy as np
  6 +import sqlite3
  7 +
4 8 pattern = ur"\d+(?:\.\d+)?\s*%?|\w{1,2}'|<unk>|[\wéàèùêôûâòìîç]+|[^\w\s]"
5 9 rer_b = re.compile(ur" r e r(?: e r)? b ")
6 10 rer_c = re.compile(ur" r e r(?: e r)? c |r e r( e r)? c' est | rer c' est")
... ... @@ -43,4 +47,55 @@
43 47  
44 48 def select_mmf(elm):
45 49 return int(elm.split("_")[0])
  50 +
  51 +def get_score(table):
  52 + mx_train = np.max(table[0])
  53 + argmx_dev = np.argmax(table[1])
  54 + mx_dev = table[1][argmx_dev]
  55 + best_test = table[2][argmx_dev]
  56 + mx_test = np.max(table[2])
  57 + print """\tmax train : {}
  58 + \tmax dev : {}
  59 + \tmax test : {} - best test : {}
  60 + \t best epochs : {}""".format(mx_train,mx_dev,mx_test,best_test,argmx_dev)
  61 + return mx_train,mx_dev,mx_test,best_test,argmx_dev
  62 +class WeightedWordsList :
  63 + @staticmethod
  64 + def get_key(wtuple):
  65 + return wtuple[1]
  66 + @staticmethod
  67 + def get_okey(wtuple):
  68 + return wtuple[1][1]
  69 +
  70 +
  71 + def __init__(self,file_path):
  72 + self.wlist = codecs.open(file_path,"r","utf8").readlines()
  73 + self.wlist = [x.strip().split(':') for x in self.wlist ]
  74 + self.wlist = [ (x, float(y)) for x,y in self.wlist ]
  75 + self.wdict = {}
  76 + for x,y in self.wlist:
  77 + self.wdict[x.encode("utf8")] = y
  78 +
  79 + def select_best(self,word_list,lenght=5):
  80 + scored_word = []
  81 + for w in word_list:
  82 + w = w.encode("utf8")
  83 + if w not in self.wdict :
  84 + continue
  85 +
  86 + if len(scored_word) < lenght:
  87 + scored_word.append((w,self.wdict[w]))
  88 + else :
  89 + w_min= min(enumerate(scored_word),key=WeightedWordsList.get_okey)
  90 + w_curr = (w, self.wdict[w])
  91 + if w_min[1][1] < w_curr[1]:
  92 + del scored_word[w_min[0]]
  93 + scored_word.append(w_curr)
  94 + w_min = min(enumerate(scored_word),key=WeightedWordsList.get_okey)
  95 + while len(scored_word) > lenght and w_min[1][1] < w_curr[1] :
  96 + del scored_word[w_min[0]]
  97 + w_min = min(enumerate(scored_word),key=WeightedWordsList.get_okey)
  98 + elif w_min[1][1] == w_curr[1]:
  99 + scored_word.append(w_curr)
  100 + return [ w[0] for w in scored_word ]
... ... @@ -16,15 +16,59 @@
16 16 from keras import backend as K
17 17 from keras import objectives
18 18 from keras.datasets import mnist
  19 +from keras.callbacks import EarlyStopping,Callback
19 20  
20 21 import pandas
21 22 import shelve
22 23 import pickle
23 24  
24 25  
  26 +class ZeroStopping(Callback):
  27 + '''Stop training when a monitored quantity has stopped improving.
  28 + # Arguments
  29 + monitor: quantity to be monitored.
  30 + patience: number of epochs with no improvement
  31 + after which training will be stopped.
  32 + verbose: verbosity mode.
  33 + mode: one of {auto, min, max}. In 'min' mode,
  34 + training will stop when the quantity
  35 + monitored has stopped decreasing; in 'max'
  36 + mode it will stop when the quantity
  37 + monitored has stopped increasing.
  38 + '''
  39 + def __init__(self, monitor='val_loss', verbose=0, mode='auto', thresh = 0):
  40 + super(ZeroStopping, self).__init__()
25 41  
  42 + self.monitor = monitor
  43 + self.verbose = verbose
  44 + self.thresh = thresh # is a rythme
26 45  
  46 + if mode not in ['auto', 'min', 'max']:
  47 + warnings.warn('EarlyStopping mode %s is unknown, '
  48 + 'fallback to auto mode.' % (self.mode),
  49 + RuntimeWarning)
  50 + mode = 'auto'
27 51  
  52 + if mode == 'min':
  53 + self.monitor_op = np.less
  54 + elif mode == 'max':
  55 + self.monitor_op = np.greater
  56 + else:
  57 + if 'acc' in self.monitor:
  58 + self.monitor_op = np.greater
  59 + else:
  60 + self.monitor_op = np.less
  61 +
  62 + def on_epoch_end(self, epoch, logs={}):
  63 + current = logs.get(self.monitor)
  64 + if current is None:
  65 + warnings.warn('Zero stopping requires %s available!' %
  66 + (self.monitor), RuntimeWarning)
  67 +
  68 + if self.monitor_op(current, self.thresh):
  69 + self.best = current
  70 + self.model.stop_training = True
  71 +
28 72 #batch_size = 16
29 73 #original_dim = 784
30 74 #latent_dim = 2
31 75  
... ... @@ -82,8 +126,11 @@
82 126 vae.fit(x_train, y_train,
83 127 shuffle=True,
84 128 nb_epoch=nb_epochs,
  129 + verbose = 1,
85 130 batch_size=batch_size,
86   - validation_data=(x_dev, y_dev))
  131 + validation_data=(x_dev, y_dev),
  132 + callbacks = [ZeroStopping(monitor='val_loss', thresh=0, verbose=0, mode='min')]
  133 + )
87 134  
88 135 # build a model to project inputs on the latent space
89 136 encoder = Model(x, z_mean)