Commit 2af8e57f4e1ebcfdd5ba9d3e8963c4853e472982
1 parent
e5108393c8
Exists in
master
change all
Showing 9 changed files with 428 additions and 124 deletions Inline Diff
LDA/04a-mmdf.py
1 | 1 | ||
2 | # coding: utf-8 | 2 | # coding: utf-8 |
3 | 3 | ||
4 | # In[29]: | 4 | # In[29]: |
5 | 5 | ||
6 | # Import | 6 | # Import |
7 | import itertools | 7 | import itertools |
8 | import shelve | 8 | import shelve |
9 | import pickle | 9 | import pickle |
10 | import numpy | 10 | import numpy |
11 | import scipy | 11 | import scipy |
12 | from scipy import sparse | 12 | from scipy import sparse |
13 | import scipy.sparse | 13 | import scipy.sparse |
14 | import scipy.io | 14 | import scipy.io |
15 | from mlp import * | 15 | from mlp import * |
16 | import mlp | 16 | import mlp |
17 | import sys | 17 | import sys |
18 | import utils | 18 | import utils |
19 | import dill | 19 | import dill |
20 | from collections import Counter | 20 | from collections import Counter |
21 | from gensim.models import LdaModel | 21 | from gensim.models import LdaModel |
22 | 22 | ||
23 | 23 | ||
24 | 24 | ||
25 | # In[3]: | 25 | # In[3]: |
26 | 26 | ||
27 | #30_50_50_150_0.0001 | 27 | #30_50_50_150_0.0001 |
28 | 28 | ||
29 | # In[4]: | 29 | # In[4]: |
30 | 30 | ||
31 | #db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True) | 31 | #db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True) |
32 | origin_corps=shelve.open("{}".format(sys.argv[2])) | 32 | origin_corps=shelve.open("{}".format(sys.argv[2])) |
33 | in_dir = sys.argv[1] | 33 | in_dir = sys.argv[1] |
34 | if len(sys.argv) > 3 : | ||
35 | features_key = sys.argv[3] | ||
36 | else : | ||
37 | features_key = "LDA" | ||
34 | 38 | ||
35 | |||
36 | out_db=shelve.open("{}/mlp_scores.shelve".format(in_dir),writeback=True) | 39 | out_db=shelve.open("{}/mlp_scores.shelve".format(in_dir),writeback=True) |
37 | 40 | ||
38 | mlp_h = [ 250, 250 ] | 41 | mlp_h = [ 250, 250 ] |
39 | mlp_loss = "categorical_crossentropy" | 42 | mlp_loss = "categorical_crossentropy" |
40 | mlp_dropouts = [0.25]* len(mlp_h) | 43 | mlp_dropouts = [0.25]* len(mlp_h) |
41 | mlp_sgd = Adam(lr=0.0001) | 44 | mlp_sgd = Adam(lr=0.0001) |
42 | mlp_epochs = 3000 | 45 | mlp_epochs = 3000 |
43 | mlp_batch_size = 1 | 46 | mlp_batch_size = 5 |
44 | mlp_input_activation = "relu" | 47 | mlp_input_activation = "relu" |
45 | mlp_output_activation="softmax" | 48 | mlp_output_activation="softmax" |
46 | 49 | ||
47 | ress = [] | 50 | ress = [] |
48 | for key in ["TRS", "ASR"] : | 51 | for key in origin_corps["features_key"].keys() : |
49 | 52 | ||
50 | res=mlp.train_mlp(origin_corps["LDA"][key]["TRAIN"],origin_corps["LABEL"][key]["TRAIN"], | 53 | res=mlp.train_mlp(origin_corps[features_key][key]["TRAIN"],origin_corps["LABEL"][key]["TRAIN"], |
51 | origin_corps["LDA"][key]["DEV"],origin_corps["LABEL"][key]["DEV"], | 54 | origin_corps[features_key][key]["DEV"],origin_corps["LABEL"][key]["DEV"], |
52 | origin_corps["LDA"][key]["TEST"],origin_corps["LABEL"][key]["TEST"], | 55 | origin_corps[features_key][key]["TEST"],origin_corps["LABEL"][key]["TEST"], |
53 | mlp_h,dropouts=mlp_dropouts,sgd=mlp_sgd, | 56 | mlp_h,dropouts=mlp_dropouts,sgd=mlp_sgd, |
54 | epochs=mlp_epochs, | 57 | epochs=mlp_epochs, |
55 | batch_size=mlp_batch_size, | 58 | batch_size=mlp_batch_size, |
56 | save_pred=False,keep_histo=False, | 59 | save_pred=False,keep_histo=False, |
57 | loss="categorical_crossentropy",fit_verbose=0) | 60 | loss="categorical_crossentropy",fit_verbose=0) |
58 | arg_best=[] | 61 | arg_best=[] |
59 | dev_best=[] | 62 | dev_best=[] |
60 | arg_best.append(numpy.argmax(res[1])) | 63 | arg_best.append(numpy.argmax(res[1])) |
61 | dev_best.append(res[1][arg_best[-1]]) | 64 | dev_best.append(res[1][arg_best[-1]]) |
62 | res[1][arg_best[-1]]=0 | 65 | res[1][arg_best[-1]]=0 |
63 | arg_best.append(numpy.argmax(res[1])) | 66 | arg_best.append(numpy.argmax(res[1])) |
64 | dev_best.append(res[1][arg_best[-1]]) | 67 | dev_best.append(res[1][arg_best[-1]]) |
65 | res[1][arg_best[-1]]=0 | 68 | res[1][arg_best[-1]]=0 |
66 | arg_best.append(numpy.argmax(res[1])) | 69 | arg_best.append(numpy.argmax(res[1])) |
67 | dev_best.append(res[1][arg_best[-1]]) | 70 | dev_best.append(res[1][arg_best[-1]]) |
68 | res[1][arg_best[-1]]=0 | 71 | res[1][arg_best[-1]]=0 |
69 | arg_best.append(numpy.argmax(res[1])) | 72 | arg_best.append(numpy.argmax(res[1])) |
70 | dev_best.append(res[1][arg_best[-1]]) | 73 | dev_best.append(res[1][arg_best[-1]]) |
71 | res[1][arg_best[-1]]=0 | 74 | res[1][arg_best[-1]]=0 |
72 | arg_best.append(numpy.argmax(res[1])) | 75 | arg_best.append(numpy.argmax(res[1])) |
73 | dev_best.append(res[1][arg_best[-1]]) | 76 | dev_best.append(res[1][arg_best[-1]]) |
74 | res[1][arg_best[-1]]=0 | 77 | res[1][arg_best[-1]]=0 |
75 | arg_best.append(numpy.argmax(res[1])) | 78 | arg_best.append(numpy.argmax(res[1])) |
76 | dev_best.append(res[1][arg_best[-1]]) | 79 | dev_best.append(res[1][arg_best[-1]]) |
77 | res[1][arg_best[-1]]=0 | 80 | res[1][arg_best[-1]]=0 |
78 | arg_best.append(numpy.argmax(res[1])) | 81 | arg_best.append(numpy.argmax(res[1])) |
79 | dev_best.append(res[1][arg_best[-1]]) | 82 | dev_best.append(res[1][arg_best[-1]]) |
80 | res[1][arg_best[-1]]=0 | 83 | res[1][arg_best[-1]]=0 |
81 | arg_best.append(numpy.argmax(res[1])) | 84 | arg_best.append(numpy.argmax(res[1])) |
82 | dev_best.append(res[1][arg_best[-1]]) | 85 | dev_best.append(res[1][arg_best[-1]]) |
83 | res[1][arg_best[-1]]=0 | 86 | res[1][arg_best[-1]]=0 |
84 | arg_best.append(numpy.argmax(res[1])) | 87 | arg_best.append(numpy.argmax(res[1])) |
85 | dev_best.append(res[1][arg_best[-1]]) | 88 | dev_best.append(res[1][arg_best[-1]]) |
86 | res[1][arg_best[-1]]=0 | 89 | res[1][arg_best[-1]]=0 |
87 | arg_best.append(numpy.argmax(res[1])) | 90 | arg_best.append(numpy.argmax(res[1])) |
88 | dev_best.append(res[1][arg_best[-1]]) | 91 | dev_best.append(res[1][arg_best[-1]]) |
89 | res[1][arg_best[-1]]=0 | 92 | res[1][arg_best[-1]]=0 |
90 | arg_best.append(numpy.argmax(res[1])) | 93 | arg_best.append(numpy.argmax(res[1])) |
91 | dev_best.append(res[1][arg_best[-1]]) | 94 | dev_best.append(res[1][arg_best[-1]]) |
92 | res[1][arg_best[-1]]=0 | 95 | res[1][arg_best[-1]]=0 |
93 | arg_best.append(numpy.argmax(res[1])) | 96 | arg_best.append(numpy.argmax(res[1])) |
94 | dev_best.append(res[1][arg_best[-1]]) | 97 | dev_best.append(res[1][arg_best[-1]]) |
95 | res[1][arg_best[-1]]=0 | 98 | res[1][arg_best[-1]]=0 |
96 | 99 | ||
97 | 100 | ||
98 | 101 | ||
99 | 102 | ||
100 | test_best =[ res[2][x] for x in arg_best ] | 103 | test_best =[ res[2][x] for x in arg_best ] |
101 | test_max = numpy.max(res[2]) | 104 | test_max = numpy.max(res[2]) |
102 | out_db[key]=(res,(dev_best,test_best,test_max)) | 105 | out_db[key]=(res,(dev_best,test_best,test_max)) |
103 | ress.append((key,dev_best,test_best,test_max)) | 106 | ress.append((key,dev_best,test_best,test_max)) |
104 | print sys.argv[2] | 107 | print sys.argv[2] |
105 | for el in ress : | 108 | for el in ress : |
106 | print el | 109 | print el |
107 | out_db.close() | 110 | out_db.close() |
108 | origin_corps.close() | 111 | origin_corps.close() |
LDA/04b-mmf_mini_ae.py
1 | 1 | ||
2 | # coding: utf-8 | 2 | # coding: utf-8 |
3 | 3 | ||
4 | # In[2]: | 4 | # In[2]: |
5 | 5 | ||
6 | # Import | 6 | # Import |
7 | import gensim | 7 | import gensim |
8 | from scipy import sparse | 8 | from scipy import sparse |
9 | import itertools | 9 | import itertools |
10 | from sklearn import preprocessing | 10 | from sklearn import preprocessing |
11 | from keras.models import Sequential | 11 | from keras.models import Sequential |
12 | from keras.optimizers import SGD,Adam | 12 | from keras.optimizers import SGD,Adam |
13 | from keras.layers.advanced_activations import ELU,PReLU | ||
13 | from mlp import * | 14 | from mlp import * |
14 | import sklearn.metrics | 15 | import sklearn.metrics |
15 | import shelve | 16 | import shelve |
16 | import pickle | 17 | import pickle |
17 | from utils import * | 18 | from utils import * |
18 | import sys | 19 | import sys |
19 | import os | 20 | import os |
20 | import json | 21 | import json |
21 | # In[4]: | 22 | # In[4]: |
22 | 23 | ||
23 | infer_model=shelve.open("{}".format(sys.argv[2])) | 24 | infer_model=shelve.open("{}".format(sys.argv[2])) |
24 | in_dir = sys.argv[1] | 25 | in_dir = sys.argv[1] |
25 | #['ASR', 'TRS', 'LABEL'] | 26 | #['ASR', 'TRS', 'LABEL'] |
26 | # In[6]: | 27 | # In[6]: |
27 | 28 | if len(sys.argv) > 4 : | |
29 | features_key = sys.argv[4] | ||
30 | else : | ||
31 | features_key = "LDA" | ||
32 | save_projection = True | ||
28 | json_conf =json.load(open(sys.argv[3])) | 33 | json_conf =json.load(open(sys.argv[3])) |
29 | ae_conf = json_conf["ae"] | 34 | ae_conf = json_conf["ae"] |
30 | 35 | ||
31 | hidden_size= ae_conf["hidden_size"] | 36 | hidden_size= ae_conf["hidden_size"] |
32 | input_activation=ae_conf["input_activation"] | 37 | input_activation = None |
38 | print ae_conf["input_activation"] | ||
39 | if ae_conf["input_activation"] == "elu": | ||
40 | print " ELU" | ||
41 | input_activation = PReLU() | ||
42 | else: | ||
43 | print " ELSE" | ||
44 | input_activation = ae_conf["input_activation"] | ||
45 | #input_activation=ae_conf["input_activation"] | ||
33 | output_activation=ae_conf["output_activation"] | 46 | output_activation=ae_conf["output_activation"] |
34 | loss=ae_conf["loss"] | 47 | loss=ae_conf["loss"] |
35 | epochs=ae_conf["epochs"] | 48 | epochs=ae_conf["epochs"] |
36 | batch=ae_conf["batch"] | 49 | batch=ae_conf["batch"] |
37 | patience=ae_conf["patience"] | 50 | patience=ae_conf["patience"] |
38 | do_do=ae_conf["do"] | 51 | do_do=ae_conf["do"] |
39 | try: | 52 | try: |
40 | k = ae_conf["sgd"] | 53 | k = ae_conf["sgd"] |
41 | if ae_conf["sgd"]["name"] == "adam": | 54 | if ae_conf["sgd"]["name"] == "adam": |
42 | sgd = Adam(lr=ae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | 55 | sgd = Adam(lr=ae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) |
43 | elif ae_conf["sgd"]["name"] == "sgd": | 56 | elif ae_conf["sgd"]["name"] == "sgd": |
44 | sgd = SGD(lr=ae_conf["sgd"]["lr"]) | 57 | sgd = SGD(lr=ae_conf["sgd"]["lr"]) |
45 | except: | 58 | except: |
46 | sgd = ae_conf["sgd"] | 59 | sgd = ae_conf["sgd"] |
47 | 60 | ||
48 | mlp_conf = json_conf["mlp"] | 61 | mlp_conf = json_conf["mlp"] |
49 | mlp_h = mlp_conf["hidden_size"] | 62 | mlp_h = mlp_conf["hidden_size"] |
50 | mlp_loss = mlp_conf["loss"] | 63 | mlp_loss = mlp_conf["loss"] |
51 | mlp_dropouts = mlp_conf["do"] | 64 | mlp_dropouts = mlp_conf["do"] |
52 | mlp_epochs = mlp_conf["epochs"] | 65 | mlp_epochs = mlp_conf["epochs"] |
53 | mlp_batch_size = mlp_conf["batch"] | 66 | mlp_batch_size = mlp_conf["batch"] |
54 | mlp_input_activation=mlp_conf["input_activation"] | 67 | mlp_input_activation=mlp_conf["input_activation"] |
55 | mlp_output_activation=mlp_conf["output_activation"] | 68 | mlp_output_activation=mlp_conf["output_activation"] |
56 | 69 | ||
57 | try: | 70 | try: |
58 | k = mlp_conf["sgd"] | 71 | k = mlp_conf["sgd"] |
59 | if mlp_conf["sgd"]["name"] == "adam": | 72 | if mlp_conf["sgd"]["name"] == "adam": |
60 | mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | 73 | mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) |
61 | elif mlp_conf["sgd"]["name"] == "sgd": | 74 | elif mlp_conf["sgd"]["name"] == "sgd": |
62 | mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | 75 | mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"]) |
63 | except: | 76 | except: |
64 | mlp_sgd = mlp_conf["sgd"] | 77 | mlp_sgd = mlp_conf["sgd"] |
65 | 78 | ||
66 | 79 | ||
67 | name = json_conf["name"] | 80 | name = json_conf["name"] |
68 | try: | 81 | try: |
69 | os.mkdir("{}/{}".format(in_dir,name)) | 82 | os.mkdir("{}/{}".format(in_dir,name)) |
70 | except: | 83 | except: |
71 | pass | 84 | pass |
72 | db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True) | 85 | db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True) |
73 | db["LABEL"]=infer_model["LABEL"] | 86 | db["LABEL"]=infer_model["LABEL"] |
74 | # | 87 | # |
75 | keys = ["ASR","TRS"] | 88 | keys = infer_model[features_key].keys() |
76 | 89 | ||
77 | db["AE"] = {} | 90 | db["AE"] = {} |
78 | db["LDA"] = {} | 91 | db[features_key] = {} |
79 | for mod in keys : | 92 | for mod in keys : |
80 | db["LDA"][mod] = train_mlp(infer_model["LDA"][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"], | 93 | print infer_model[features_key][mod]["TRAIN"].shape |
81 | infer_model["LDA"][mod]["DEV"],infer_model["LABEL"][mod]["DEV"], | 94 | print infer_model[features_key][mod]["DEV"].shape |
82 | infer_model["LDA"][mod]["TEST"],infer_model["LABEL"][mod]["TEST"], | 95 | print infer_model[features_key][mod]["TEST"].shape |
96 | |||
97 | db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"], | ||
98 | infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"], | ||
99 | infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"], | ||
83 | mlp_h ,sgd=mlp_sgd, | 100 | mlp_h ,sgd=mlp_sgd, |
84 | epochs=mlp_epochs, | 101 | epochs=mlp_epochs, |
85 | batch_size=mlp_batch_size, | 102 | batch_size=mlp_batch_size, |
86 | input_activation=mlp_input_activation, | 103 | input_activation=mlp_input_activation, |
87 | output_activation=mlp_output_activation, | 104 | output_activation=mlp_output_activation, |
88 | dropouts=mlp_dropouts, | 105 | dropouts=mlp_dropouts, |
89 | fit_verbose=0) | 106 | fit_verbose=0) |
90 | 107 | print input_activation | |
91 | res=train_ae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"], | 108 | res=train_ae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"], |
92 | hidden_size,patience = patience,sgd=sgd, | 109 | hidden_size,patience = patience,sgd=sgd, |
93 | dropouts=do_do,input_activation=input_activation,output_activation=output_activation, | 110 | dropouts=do_do,input_activation=input_activation,output_activation=output_activation, |
94 | loss=loss,epochs=epochs,batch_size=batch,verbose=0) | 111 | loss=loss,epochs=epochs,batch_size=batch,verbose=0) |
95 | mlp_res_list=[] | 112 | mlp_res_list=[] |
96 | for layer in res : | 113 | for nb,layer in enumerate(res) : |
114 | if save_projection: | ||
115 | pd = pandas.DataFrame(layer[0]) | ||
116 | col_count = (pd.sum(axis=0) != 0) | ||
117 | pd = pd.loc[:,col_count] | ||
118 | pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN") | ||
119 | pd = pandas.DataFrame(layer[1]) | ||
120 | pd = pd.loc[:,col_count] | ||
121 | pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV") | ||
122 | pd = pandas.DataFrame(layer[2]) | ||
123 | pd = pd.loc[:,col_count] | ||
124 | pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST") | ||
125 | del pd | ||
97 | mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"], | 126 | mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"], |
98 | layer[1],infer_model["LABEL"][mod]["DEV"], | 127 | layer[1],infer_model["LABEL"][mod]["DEV"], |
99 | layer[2],infer_model["LABEL"][mod]["TEST"], | 128 | layer[2],infer_model["LABEL"][mod]["TEST"], |
100 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs, | 129 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs, |
101 | output_activation=mlp_output_activation, | 130 | output_activation=mlp_output_activation, |
102 | input_activation=mlp_input_activation, | 131 | input_activation=mlp_input_activation, |
103 | batch_size=mlp_batch_size,fit_verbose=0)) | 132 | batch_size=mlp_batch_size,fit_verbose=0)) |
104 | db["AE"][mod]=mlp_res_list | 133 | db["AE"][mod]=mlp_res_list |
105 | 134 | ||
106 | mod = "ASR" | 135 | if "ASR" in keys and "TRS" in keys: |
107 | mod2= "TRS" | 136 | mod = "ASR" |
108 | mlp_res_list=[] | 137 | mod2= "TRS" |
138 | mlp_res_list=[] | ||
109 | 139 | ||
110 | res = train_ae(infer_model["LDA"][mod]["TRAIN"], | 140 | res = train_ae(infer_model[features_key][mod]["TRAIN"], |
111 | infer_model["LDA"][mod]["DEV"], | 141 | infer_model[features_key][mod]["DEV"], |
112 | infer_model["LDA"][mod]["TEST"], | 142 | infer_model[features_key][mod]["TEST"], |
113 | hidden_size,dropouts=do_do,patience = patience, | 143 | hidden_size,dropouts=do_do,patience = patience, |
114 | sgd=sgd,input_activation=input_activation,output_activation=output_activation,loss=loss,epochs=epochs, | 144 | sgd=sgd,input_activation=input_activation,output_activation=output_activation,loss=loss,epochs=epochs, |
115 | batch_size=batch, | 145 | batch_size=batch, |
116 | y_train=infer_model["LDA"][mod]["TRAIN"], | 146 | y_train=infer_model[features_key][mod]["TRAIN"], |
117 | y_dev=infer_model["LDA"][mod2]["DEV"], | 147 | y_dev=infer_model[features_key][mod2]["DEV"], |
118 | y_test=infer_model["LDA"][mod2]["TEST"]) | 148 | y_test=infer_model[features_key][mod2]["TEST"]) |
119 | 149 | ||
120 | for layer in res : | 150 | for nb,layer in enumerate(res) : |
121 | mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"], | 151 | if save_projection: |
122 | layer[1],infer_model["LABEL"][mod]["DEV"], | 152 | pd = pandas.DataFrame(layer[0]) |
123 | layer[2],infer_model["LABEL"][mod]["TEST"], | 153 | col_count= (pd.sum(axis=0) != 0) |
124 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs, | 154 | pd = pd.loc[:,col_count] |
125 | output_activation=mlp_output_activation, | 155 | pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN") |
126 | input_activation=input_activation, | 156 | pd = pandas.DataFrame(layer[1]) |
127 | batch_size=mlp_batch_size,fit_verbose=0)) | 157 | pd = pd.loc[:,col_count] |
158 | pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV") | ||
159 | pd = pandas.DataFrame(layer[2]) | ||
160 | pd = pd.loc[:,col_count] | ||
161 | pd.to_hdf("{}/{}/AE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST") | ||
162 | del pd | ||
128 | 163 | ||
129 | db["AE"]["SPE"] = mlp_res_list | 164 | mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"], |
165 | layer[1],infer_model["LABEL"][mod]["DEV"], | ||
166 | layer[2],infer_model["LABEL"][mod]["TEST"], | ||
167 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs, | ||
168 | output_activation=mlp_output_activation, | ||
169 | input_activation=input_activation, | ||
170 | batch_size=mlp_batch_size,fit_verbose=0)) | ||
171 | |||
172 | db["AE"]["SPE"] = mlp_res_list | ||
130 | 173 | ||
131 | db.sync() | 174 | db.sync() |
132 | db.close() | 175 | db.close() |
133 | 176 |
LDA/04c-mmf_sae.py
1 | 1 | ||
2 | # coding: utf-8 | 2 | # coding: utf-8 |
3 | 3 | ||
4 | # In[2]: | 4 | # In[2]: |
5 | 5 | ||
6 | # Import | 6 | # Import |
7 | import gensim | 7 | import gensim |
8 | from scipy import sparse | 8 | from scipy import sparse |
9 | import itertools | 9 | import itertools |
10 | from sklearn import preprocessing | 10 | from sklearn import preprocessing |
11 | from keras.models import Sequential | 11 | from keras.models import Sequential |
12 | from keras.optimizers import SGD,Adam | 12 | from keras.optimizers import SGD,Adam |
13 | from mlp import * | 13 | from mlp import * |
14 | import mlp | 14 | import mlp |
15 | import sklearn.metrics | 15 | import sklearn.metrics |
16 | import shelve | 16 | import shelve |
17 | import pickle | 17 | import pickle |
18 | from utils import * | 18 | from utils import * |
19 | import sys | 19 | import sys |
20 | import os | 20 | import os |
21 | import json | 21 | import json |
22 | # In[4]: | 22 | # In[4]: |
23 | 23 | ||
24 | infer_model=shelve.open("{}".format(sys.argv[2])) | 24 | infer_model=shelve.open("{}".format(sys.argv[2])) |
25 | in_dir = sys.argv[1] | 25 | in_dir = sys.argv[1] |
26 | if len(sys.argv) > 4 : | ||
27 | features_key = sys.argv[4] | ||
28 | else : | ||
29 | features_key = "LDA" | ||
30 | save_projection = True | ||
26 | #['ASR', 'TRS', 'LABEL'] | 31 | #['ASR', 'TRS', 'LABEL'] |
27 | # In[6]: | 32 | # In[6]: |
28 | json_conf =json.load(open(sys.argv[3])) | 33 | json_conf =json.load(open(sys.argv[3])) |
29 | sae_conf = json_conf["sae"] | 34 | sae_conf = json_conf["sae"] |
30 | 35 | ||
31 | hidden_size= sae_conf["hidden_size"] | 36 | hidden_size= sae_conf["hidden_size"] |
32 | input_activation=sae_conf["input_activation"] | 37 | input_activation=sae_conf["input_activation"] |
33 | output_activation=sae_conf["output_activation"] | 38 | output_activation=sae_conf["output_activation"] |
34 | loss=sae_conf["loss"] | 39 | loss=sae_conf["loss"] |
35 | epochs=sae_conf["epochs"] | 40 | epochs=sae_conf["epochs"] |
36 | batch=sae_conf["batch"] | 41 | batch=sae_conf["batch"] |
37 | patience=sae_conf["patience"] | 42 | patience=sae_conf["patience"] |
38 | do_do=sae_conf["do"] | 43 | do_do=sae_conf["do"] |
39 | 44 | ||
40 | try: | 45 | try: |
41 | k = sae_conf["sgd"] | 46 | k = sae_conf["sgd"] |
42 | if sae_conf["sgd"]["name"] == "adam": | 47 | if sae_conf["sgd"]["name"] == "adam": |
43 | sgd = Adam(lr=sae_conf["sgd"]["lr"]) | 48 | sgd = Adam(lr=sae_conf["sgd"]["lr"]) |
44 | elif sae_conf["sgd"]["name"] == "sgd": | 49 | elif sae_conf["sgd"]["name"] == "sgd": |
45 | sgd = SGD(lr=sae_conf["sgd"]["lr"]) | 50 | sgd = SGD(lr=sae_conf["sgd"]["lr"]) |
46 | except : | 51 | except : |
47 | sgd = sae_conf["sgd"] | 52 | sgd = sae_conf["sgd"] |
48 | 53 | ||
49 | name = json_conf["name"] | 54 | name = json_conf["name"] |
55 | print name | ||
50 | try: | 56 | try: |
51 | os.mkdir("{}/{}".format(in_dir,name)) | 57 | os.mkdir("{}/{}".format(in_dir,name)) |
52 | except: | 58 | except: |
53 | pass | 59 | pass |
54 | db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True) | 60 | db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True) |
55 | # | 61 | # |
56 | keys = ["ASR","TRS"] | ||
57 | mlp_conf = json_conf["mlp"] | 62 | mlp_conf = json_conf["mlp"] |
58 | mlp_h = mlp_conf["hidden_size"] | 63 | mlp_h = mlp_conf["hidden_size"] |
59 | mlp_loss = mlp_conf["loss"] | 64 | mlp_loss = mlp_conf["loss"] |
60 | mlp_dropouts = mlp_conf["do"] | 65 | mlp_dropouts = mlp_conf["do"] |
61 | mlp_epochs = mlp_conf["epochs"] | 66 | mlp_epochs = mlp_conf["epochs"] |
62 | mlp_batch_size = mlp_conf["batch"] | 67 | mlp_batch_size = mlp_conf["batch"] |
63 | mlp_input_activation=mlp_conf["input_activation"] | 68 | mlp_input_activation=mlp_conf["input_activation"] |
64 | mlp_output_activation=mlp_conf["output_activation"] | 69 | mlp_output_activation=mlp_conf["output_activation"] |
65 | 70 | ||
66 | try: | 71 | try: |
67 | k = mlp_conf["sgd"] | 72 | k = mlp_conf["sgd"] |
68 | if mlp_conf["sgd"]["name"] == "adam": | 73 | if mlp_conf["sgd"]["name"] == "adam": |
69 | mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"]) | 74 | mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"]) |
70 | elif mlp_conf["sgd"]["name"] == "sgd" : | 75 | elif mlp_conf["sgd"]["name"] == "sgd" : |
71 | mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | 76 | mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"]) |
72 | except : | 77 | except : |
73 | mlp_sgd = mlp_conf["sgd"] | 78 | mlp_sgd = mlp_conf["sgd"] |
74 | 79 | ||
75 | 80 | keys = infer_model[features_key].keys() | |
76 | db["SAE"] = {} | 81 | db["SAE"] = {} |
77 | 82 | ||
78 | db["SAEFT"] = {} | 83 | db["SAEFT"] = {} |
79 | for mod in keys : | 84 | for mod in keys : |
80 | res_tuple=train_sae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"], | 85 | res_tuple=train_sae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"], |
81 | infer_model["LDA"][mod]["TEST"], | 86 | infer_model[features_key][mod]["TEST"], |
82 | hidden_size,dropouts=do_do, | 87 | hidden_size,dropouts=do_do, |
83 | patience = "patience",sgd=sgd,input_activation="tanh", | 88 | patience = "patience",sgd=sgd,input_activation="tanh", |
84 | output_activation="tanh",loss=loss,epochs=epochs, | 89 | output_activation="tanh",loss=loss,epochs=epochs, |
85 | batch_size=batch,verbose=0) | 90 | batch_size=batch,verbose=0) |
86 | #print len(res), [len(x) for x in res[0]], [ len(x) for x in res[1]] | 91 | #print len(res), [len(x) for x in res[0]], [ len(x) for x in res[1]] |
87 | for name , levels in zip(["SAE","SAEFT"],res_tuple): | 92 | for i, levels in zip(["SAE","SAEFT"],res_tuple): |
88 | mlp_res_by_level = [] | 93 | mlp_res_by_level = [] |
89 | for res in levels: | 94 | for lvl,res in enumerate(levels): |
90 | mlp_res_list=[] | 95 | mlp_res_list=[] |
91 | for nb,layer in enumerate(res) : | 96 | for nb,layer in enumerate(res) : |
97 | if save_projection: | ||
98 | pd = pandas.DataFrame(layer[0]) | ||
99 | col_count= (pd.sum(axis=0) != 0) | ||
100 | pd = pd.loc[:,col_count] | ||
101 | hdffile = "{}/{}/{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,mod) | ||
102 | print hdffile | ||
103 | pd.to_hdf(hdffile,"TRAIN") | ||
104 | pd = pandas.DataFrame(layer[1]) | ||
105 | pd = pd.loc[:,col_count] | ||
106 | pd.to_hdf(hdffile,"DEV") | ||
107 | pd = pandas.DataFrame(layer[2]) | ||
108 | pd = pd.loc[:,col_count] | ||
109 | pd.to_hdf(hdffile,"TEST") | ||
110 | del pd | ||
111 | |||
92 | mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"], | 112 | mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"], |
93 | layer[1],infer_model["LABEL"][mod]["DEV"], | 113 | layer[1],infer_model["LABEL"][mod]["DEV"], |
94 | layer[2],infer_model["LABEL"][mod]["TEST"], | 114 | layer[2],infer_model["LABEL"][mod]["TEST"], |
95 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts, | 115 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts, |
96 | sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size, | 116 | sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size, |
97 | fit_verbose=0)) | 117 | fit_verbose=0)) |
98 | mlp_res_by_level.append(mlp_res_list) | 118 | mlp_res_by_level.append(mlp_res_list) |
99 | db[name][mod]=mlp_res_by_level | 119 | db[i][mod]=mlp_res_by_level |
100 | 120 | ||
101 | mod = "ASR" | ||
102 | mod2= "TRS" | ||
103 | res_tuple = train_sae(infer_model["LDA"][mod]["TRAIN"], | ||
104 | infer_model["LDA"][mod]["DEV"], | ||
105 | infer_model["LDA"][mod]["TEST"], | ||
106 | hidden_size,dropouts=[0],patience="patience", | ||
107 | sgd=sgd,input_activation=input_activation,output_activation=input_activation, | ||
108 | loss=loss,epochs=epochs,batch_size=batch, | ||
109 | y_train=infer_model["LDA"][mod2]["TRAIN"], | ||
110 | y_dev=infer_model["LDA"][mod2]["DEV"], | ||
111 | y_test=infer_model["LDA"][mod2]["TEST"]) | ||
112 | 121 | ||
113 | for name , levels in zip(["SAE","SAEFT"],res_tuple): | 122 | if "ASR" in keys and "TRS" in keys : |
114 | mlp_res_by_level = [] | 123 | mod = "ASR" |
115 | for res in levels : | 124 | mod2= "TRS" |
116 | mlp_res_list=[] | 125 | res_tuple = train_sae(infer_model[features_key][mod]["TRAIN"], |
117 | for layer in res : | 126 | infer_model[features_key][mod]["DEV"], |
118 | mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"], | 127 | infer_model[features_key][mod]["TEST"], |
119 | layer[1],infer_model["LABEL"][mod]["DEV"],layer[2], | 128 | hidden_size,dropouts=[0],patience="patience", |
120 | infer_model["LABEL"][mod]["TEST"], | 129 | sgd=sgd,input_activation=input_activation,output_activation=input_activation, |
121 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts, | 130 | loss=loss,epochs=epochs,batch_size=batch, |
122 | sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size, | 131 | y_train=infer_model[features_key][mod2]["TRAIN"], |
123 | fit_verbose=0)) | 132 | y_dev=infer_model[features_key][mod2]["DEV"], |
124 | mlp_res_by_level.append(mlp_res_list) | 133 | y_test=infer_model[features_key][mod2]["TEST"]) |
125 | db[name]["SPE"] = mlp_res_by_level | 134 | |
135 | for i , levels in zip(["SAE","SAEFT"],res_tuple): | ||
136 | mlp_res_by_level = [] | ||
137 | for lvl,res in enumerate(levels) : | ||
138 | mlp_res_list=[] | ||
139 | for nb,layer in enumerate(res) : | ||
140 | if save_projection: | ||
141 | pd = pandas.DataFrame(layer[0]) | ||
142 | col_count= (pd.sum(axis=0) != 0) | ||
143 | pd = pd.loc[:,col_count] | ||
144 | pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"TRAIN") | ||
145 | pd = pandas.DataFrame(layer[1]) | ||
146 | pd = pd.loc[:,col_count] | ||
147 | pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"DEV") | ||
148 | pd = pandas.DataFrame(layer[2]) | ||
149 | pd = pd.loc[:,col_count] | ||
150 | pd.to_hdf("{}/{}/{}_{}_{}_{}_df.hdf".format(in_dir,name,i,lvl,nb,"SPE"),"TEST") | ||
151 | del pd | ||
152 |
LDA/04d-mmf_dsae.py
1 | 1 | ||
2 | # coding: utf-8 | 2 | # coding: utf-8 |
3 | 3 | ||
4 | # In[2]: | 4 | # In[2]: |
5 | 5 | ||
6 | # Import | 6 | # Import |
7 | import gensim | 7 | import gensim |
8 | from scipy import sparse | 8 | from scipy import sparse |
9 | import itertools | 9 | import itertools |
10 | from sklearn import preprocessing | 10 | from sklearn import preprocessing |
11 | from keras.models import Sequential | 11 | from keras.models import Sequential |
12 | from keras.optimizers import SGD,Adam | 12 | from keras.optimizers import SGD,Adam |
13 | from mlp import * | 13 | from mlp import * |
14 | import mlp | 14 | import mlp |
15 | import sklearn.metrics | 15 | import sklearn.metrics |
16 | import shelve | 16 | import shelve |
17 | import pickle | 17 | import pickle |
18 | 18 | ||
19 | from utils import * | 19 | from utils import * |
20 | import sys | 20 | import sys |
21 | import os | 21 | import os |
22 | import json | 22 | import json |
23 | # In[4]: | 23 | # In[4]: |
24 | 24 | ||
25 | infer_model=shelve.open("{}".format(sys.argv[2])) | 25 | infer_model=shelve.open("{}".format(sys.argv[2])) |
26 | in_dir = sys.argv[1] | 26 | in_dir = sys.argv[1] |
27 | #['ASR', 'TRS', 'LABEL'] | 27 | #['ASR', 'TRS', 'LABEL'] |
28 | # In[6]: | 28 | # In[6]: |
29 | if len(sys.argv) > 4 : | ||
30 | features_key = sys.argv[4] | ||
31 | else : | ||
32 | features_key = "LDA" | ||
29 | 33 | ||
30 | json_conf =json.load(open(sys.argv[3])) | 34 | json_conf =json.load(open(sys.argv[3])) |
31 | 35 | ||
32 | dsae_conf = json_conf["dsae"] | 36 | dsae_conf = json_conf["dsae"] |
33 | 37 | ||
34 | hidden_size= dsae_conf["hidden_size"] | 38 | hidden_size= dsae_conf["hidden_size"] |
35 | input_activation=dsae_conf["input_activation"] | 39 | input_activation=dsae_conf["input_activation"] |
36 | output_activation=dsae_conf["output_activation"] | 40 | output_activation=dsae_conf["output_activation"] |
37 | loss=dsae_conf["loss"] | 41 | loss=dsae_conf["loss"] |
38 | epochs=dsae_conf["epochs"] | 42 | epochs=dsae_conf["epochs"] |
39 | batch_size=dsae_conf["batch"] | 43 | batch_size=dsae_conf["batch"] |
40 | patience=dsae_conf["patience"] | 44 | patience=dsae_conf["patience"] |
41 | do_do=dsae_conf["do"] | 45 | do_do=dsae_conf["do"] |
42 | try: | 46 | try: |
43 | k = dsae_conf["sgd"] | 47 | k = dsae_conf["sgd"] |
44 | if dsae_conf["sgd"]["name"] == "adam": | 48 | if dsae_conf["sgd"]["name"] == "adam": |
45 | sgd = Adam(lr=dsae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | 49 | sgd = Adam(lr=dsae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) |
46 | elif dsae_conf["sgd"]["name"] == "sgd": | 50 | elif dsae_conf["sgd"]["name"] == "sgd": |
47 | sgd = SGD(lr=dsae_conf["sgd"]["lr"]) | 51 | sgd = SGD(lr=dsae_conf["sgd"]["lr"]) |
48 | except: | 52 | except: |
49 | sgd = dsae_conf["sgd"] | 53 | sgd = dsae_conf["sgd"] |
50 | 54 | ||
51 | trans_conf = json_conf["dsae"]["transform"] | 55 | trans_conf = json_conf["dsae"]["transform"] |
52 | trans_hidden_size=trans_conf["hidden_size"] | 56 | trans_hidden_size=trans_conf["hidden_size"] |
53 | trans_input_activation=trans_conf["input_activation"] | 57 | trans_input_activation=trans_conf["input_activation"] |
54 | trans_output_activation=trans_conf["output_activation"] | 58 | trans_output_activation=trans_conf["output_activation"] |
55 | trans_loss=trans_conf["loss"] | 59 | trans_loss=trans_conf["loss"] |
56 | trans_epochs=trans_conf["epochs"] | 60 | trans_epochs=trans_conf["epochs"] |
57 | trans_batch_size=trans_conf["batch"] | 61 | trans_batch_size=trans_conf["batch"] |
58 | trans_patience=trans_conf["patience"] | 62 | trans_patience=trans_conf["patience"] |
59 | trans_do=trans_conf["do"] | 63 | trans_do=trans_conf["do"] |
60 | try: | 64 | try: |
61 | k = trans_conf["sgd"] | 65 | k = trans_conf["sgd"] |
62 | if trans_conf["sgd"]["name"] == "adam": | 66 | if trans_conf["sgd"]["name"] == "adam": |
63 | trans_sgd = Adam(lr=trans_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | 67 | trans_sgd = Adam(lr=trans_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) |
64 | elif trans_conf["sgd"]["name"] == "sgd": | 68 | elif trans_conf["sgd"]["name"] == "sgd": |
65 | trans_sgd = SGD(lr=trans_conf["sgd"]["lr"]) | 69 | trans_sgd = SGD(lr=trans_conf["sgd"]["lr"]) |
66 | except e : | 70 | except e : |
67 | trans_sgd = trans_conf["sgd"] | 71 | trans_sgd = trans_conf["sgd"] |
68 | 72 | ||
69 | 73 | ||
70 | mlp_conf = json_conf["mlp"] | 74 | mlp_conf = json_conf["mlp"] |
71 | mlp_h = mlp_conf["hidden_size"] | 75 | mlp_h = mlp_conf["hidden_size"] |
72 | mlp_loss = mlp_conf["loss"] | 76 | mlp_loss = mlp_conf["loss"] |
73 | mlp_dropouts = mlp_conf["do"] | 77 | mlp_dropouts = mlp_conf["do"] |
74 | mlp_epochs = mlp_conf["epochs"] | 78 | mlp_epochs = mlp_conf["epochs"] |
75 | mlp_batch_size = mlp_conf["batch"] | 79 | mlp_batch_size = mlp_conf["batch"] |
76 | mlp_input_activation=mlp_conf["input_activation"] | 80 | mlp_input_activation=mlp_conf["input_activation"] |
77 | mlp_output_activation=mlp_conf["output_activation"] | 81 | mlp_output_activation=mlp_conf["output_activation"] |
78 | try: | 82 | try: |
79 | k = mlp_conf["sgd"] | 83 | k = mlp_conf["sgd"] |
80 | if mlp_conf["sgd"]["name"] == "adam": | 84 | if mlp_conf["sgd"]["name"] == "adam": |
81 | mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | 85 | mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) |
82 | elif mlp_conf["sgd"]["name"] == "sgd": | 86 | elif mlp_conf["sgd"]["name"] == "sgd": |
83 | mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | 87 | mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"]) |
84 | except: | 88 | except: |
85 | mlp_sgd = mlp_conf["sgd"] | 89 | mlp_sgd = mlp_conf["sgd"] |
86 | 90 | ||
87 | 91 | ||
88 | name = json_conf["name"] | 92 | name = json_conf["name"] |
89 | try: | 93 | try: |
90 | os.mkdir("{}/{}".format(in_dir,name)) | 94 | os.mkdir("{}/{}".format(in_dir,name)) |
91 | except: | 95 | except: |
92 | pass | 96 | pass |
93 | db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True) | 97 | db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True) |
94 | # | 98 | # |
95 | 99 | ||
96 | keys = ["ASR","TRS"] | 100 | keys = ["ASR","TRS"] |
97 | 101 | ||
98 | 102 | ||
99 | 103 | ||
100 | db["DSAE"] = {} | 104 | db["DSAE"] = {} |
101 | 105 | ||
102 | db["DSAEFT"] = {} | 106 | db["DSAEFT"] = {} |
103 | mod = "ASR" | 107 | mod = "ASR" |
104 | res_tuple_ASR = train_ae(infer_model["LDA"][mod]["TRAIN"], | 108 | res_tuple_ASR = train_ae(infer_model[features_key][mod]["TRAIN"], |
105 | infer_model["LDA"][mod]["DEV"], | 109 | infer_model[features_key][mod]["DEV"], |
106 | infer_model["LDA"][mod]["TEST"], | 110 | infer_model[features_key][mod]["TEST"], |
107 | hidden_size,dropouts=do_do, | 111 | hidden_size,dropouts=do_do, |
108 | patience = patience,sgd=sgd, | 112 | patience = patience,sgd=sgd, |
109 | input_activation=input_activation, | 113 | input_activation=input_activation, |
110 | output_activation=output_activation,loss=loss,epochs=epochs, | 114 | output_activation=output_activation,loss=loss,epochs=epochs, |
111 | batch_size=batch_size,verbose=0,get_weights=True) | 115 | batch_size=batch_size,verbose=0,get_weights=True) |
112 | mlp_res_list = [] | 116 | mlp_res_list = [] |
113 | for layer in res_tuple_ASR[0]: | 117 | for layer in res_tuple_ASR[0]: |
114 | mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"], | 118 | mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"], |
115 | layer[1],infer_model["LABEL"][mod]["DEV"], | 119 | layer[1],infer_model["LABEL"][mod]["DEV"], |
116 | layer[2],infer_model["LABEL"][mod]["TEST"], | 120 | layer[2],infer_model["LABEL"][mod]["TEST"], |
117 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts, | 121 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts, |
118 | sgd=mlp_sgd,epochs=mlp_epochs, | 122 | sgd=mlp_sgd,epochs=mlp_epochs, |
119 | output_activation=mlp_output_activation, | 123 | output_activation=mlp_output_activation, |
120 | input_activation=mlp_input_activation, | 124 | input_activation=mlp_input_activation, |
121 | batch_size=mlp_batch_size,fit_verbose=0)) | 125 | batch_size=mlp_batch_size,fit_verbose=0)) |
122 | 126 | ||
123 | db["DSAE"][mod] = mlp_res_list | 127 | db["DSAE"][mod] = mlp_res_list |
124 | mod = "TRS" | 128 | mod = "TRS" |
125 | res_tuple_TRS = train_ae(infer_model["LDA"][mod]["TRAIN"], | 129 | res_tuple_TRS = train_ae(infer_model[features_key][mod]["TRAIN"], |
126 | infer_model["LDA"][mod]["DEV"], | 130 | infer_model[features_key][mod]["DEV"], |
127 | infer_model["LDA"][mod]["TEST"], | 131 | infer_model[features_key][mod]["TEST"], |
128 | hidden_size,dropouts=do_do, | 132 | hidden_size,dropouts=do_do, |
129 | sgd=sgd,input_activation=input_activation, | 133 | sgd=sgd,input_activation=input_activation, |
130 | output_activation=output_activation,loss=loss,epochs=epochs, | 134 | output_activation=output_activation,loss=loss,epochs=epochs, |
131 | batch_size=batch_size,patience=patience, | 135 | batch_size=batch_size,patience=patience, |
132 | verbose=0,get_weights=True) | 136 | verbose=0,get_weights=True) |
133 | 137 | ||
134 | mlp_res_list = [] | 138 | mlp_res_list = [] |
135 | for layer in res_tuple_TRS[0]: | 139 | for layer in res_tuple_TRS[0]: |
136 | mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"], | 140 | mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"], |
137 | layer[1],infer_model["LABEL"][mod]["DEV"], | 141 | layer[1],infer_model["LABEL"][mod]["DEV"], |
138 | layer[2],infer_model["LABEL"][mod]["TEST"], | 142 | layer[2],infer_model["LABEL"][mod]["TEST"], |
139 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts, | 143 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts, |
140 | sgd=mlp_sgd,epochs=mlp_epochs, | 144 | sgd=mlp_sgd,epochs=mlp_epochs, |
141 | output_activation=mlp_output_activation, | 145 | output_activation=mlp_output_activation, |
142 | input_activation=mlp_input_activation, | 146 | input_activation=mlp_input_activation, |
143 | batch_size=mlp_batch_size,fit_verbose=0)) | 147 | batch_size=mlp_batch_size,fit_verbose=0)) |
144 | 148 | ||
145 | db["DSAE"][mod] = mlp_res_list | 149 | db["DSAE"][mod] = mlp_res_list |
146 | 150 | ||
147 | 151 | ||
148 | 152 | ||
149 | transfert = [] | 153 | transfert = [] |
150 | 154 | ||
151 | print " get weight trans" | 155 | print " get weight trans" |
152 | 156 | ||
153 | #for asr_pred, trs_pred in zip(res_tuple_ASR[0], res_tuple_TRS[0]): | 157 | #for asr_pred, trs_pred in zip(res_tuple_ASR[0], res_tuple_TRS[0]): |
154 | # print "ASR", [ x.shape for x in asr_pred] | 158 | # print "ASR", [ x.shape for x in asr_pred] |
155 | 159 | ||
156 | # print "TRS", [ x.shape for x in trs_pred] | 160 | # print "TRS", [ x.shape for x in trs_pred] |
157 | 161 | ||
158 | for asr_pred, trs_pred in zip(res_tuple_ASR[0], res_tuple_TRS[0]): | 162 | for asr_pred, trs_pred in zip(res_tuple_ASR[0], res_tuple_TRS[0]): |
159 | # print "ASR", [ x.shape for x in asr_pred] | 163 | # print "ASR", [ x.shape for x in asr_pred] |
160 | 164 | ||
161 | # print "TRS", [ x.shape for x in trs_pred] | 165 | # print "TRS", [ x.shape for x in trs_pred] |
162 | # print " TRANS SGD", trans_sgd | 166 | # print " TRANS SGD", trans_sgd |
163 | transfert.append( train_ae(asr_pred[0], | 167 | transfert.append( train_ae(asr_pred[0], |
164 | asr_pred[1], | 168 | asr_pred[1], |
165 | asr_pred[2], | 169 | asr_pred[2], |
166 | trans_hidden_size, | 170 | trans_hidden_size, |
167 | dropouts=trans_do, | 171 | dropouts=trans_do, |
168 | y_train = trs_pred[0], | 172 | y_train = trs_pred[0], |
169 | y_dev=trs_pred[1], | 173 | y_dev=trs_pred[1], |
170 | y_test = trs_pred[2], | 174 | y_test = trs_pred[2], |
171 | patience = trans_patience,sgd=trans_sgd, | 175 | patience = trans_patience,sgd=trans_sgd, |
172 | input_activation=trans_input_activation, | 176 | input_activation=trans_input_activation, |
173 | output_activation=trans_output_activation, | 177 | output_activation=trans_output_activation, |
174 | loss=trans_loss, | 178 | loss=trans_loss, |
175 | epochs=trans_epochs, | 179 | epochs=trans_epochs, |
176 | batch_size=trans_batch_size,verbose=0,get_weights=True) ) | 180 | batch_size=trans_batch_size,verbose=0,get_weights=True) ) |
177 | mod = "ASR" | 181 | mod = "ASR" |
178 | mlp_res_bylvl = [] | 182 | mlp_res_bylvl = [] |
179 | print " MLP on transfert " | 183 | print " MLP on transfert " |
180 | for level, w in transfert : | 184 | for level, w in transfert : |
181 | mlp_res_list = [] | 185 | mlp_res_list = [] |
182 | for layer in level : | 186 | for layer in level : |
183 | mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"], | 187 | mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"], |
184 | layer[1],infer_model["LABEL"][mod]["DEV"], | 188 | layer[1],infer_model["LABEL"][mod]["DEV"], |
185 | layer[2],infer_model["LABEL"][mod]["TEST"], | 189 | layer[2],infer_model["LABEL"][mod]["TEST"], |
186 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts, | 190 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts, |
187 | sgd=mlp_sgd,epochs=mlp_epochs, | 191 | sgd=mlp_sgd,epochs=mlp_epochs, |
188 | output_activation=mlp_output_activation, | 192 | output_activation=mlp_output_activation, |
189 | input_activation=mlp_input_activation, | 193 | input_activation=mlp_input_activation, |
190 | batch_size=mlp_batch_size,fit_verbose=0)) | 194 | batch_size=mlp_batch_size,fit_verbose=0)) |
191 | mlp_res_bylvl.append(mlp_res_list) | 195 | mlp_res_bylvl.append(mlp_res_list) |
192 | db["DSAE"]["transfert"] = mlp_res_bylvl | 196 | db["DSAE"]["transfert"] = mlp_res_bylvl |
193 | 197 | ||
194 | 198 | ||
195 | print " FT " | 199 | print " FT " |
196 | WA = res_tuple_ASR[1] | 200 | WA = res_tuple_ASR[1] |
197 | #print "WA", len(WA), [ len(x) for x in WA] | 201 | #print "WA", len(WA), [ len(x) for x in WA] |
198 | WT = res_tuple_TRS[1] | 202 | WT = res_tuple_TRS[1] |
199 | 203 | ||
200 | #print "WT", len(WT), [ len(x) for x in WT] | 204 | #print "WT", len(WT), [ len(x) for x in WT] |
201 | Wtr = [ x[1] for x in transfert] | 205 | Wtr = [ x[1] for x in transfert] |
202 | 206 | ||
203 | #print "Wtr", len(Wtr), [ len(x) for x in Wtr],[ len(x[1]) for x in Wtr] | 207 | #print "Wtr", len(Wtr), [ len(x) for x in Wtr],[ len(x[1]) for x in Wtr] |
204 | 208 | ||
205 | ft_res = ft_dsae(infer_model["LDA"]["ASR"]["TRAIN"], | 209 | ft_res = ft_dsae(infer_model[features_key]["ASR"]["TRAIN"], |
206 | infer_model["LDA"]["ASR"]["DEV"], | 210 | infer_model[features_key]["ASR"]["DEV"], |
207 | infer_model["LDA"]["ASR"]["TEST"], | 211 | infer_model[features_key]["ASR"]["TEST"], |
208 | y_train=infer_model["LDA"]["TRS"]["TRAIN"], | 212 | y_train=infer_model[features_key]["TRS"]["TRAIN"], |
209 | y_dev=infer_model["LDA"]["TRS"]["DEV"], | 213 | y_dev=infer_model[features_key]["TRS"]["DEV"], |
210 | y_test=infer_model["LDA"]["TRS"]["TEST"], | 214 | y_test=infer_model[features_key]["TRS"]["TEST"], |
211 | ae_hidden = hidden_size, | 215 | ae_hidden = hidden_size, |
212 | transfer_hidden = trans_hidden_size, | 216 | transfer_hidden = trans_hidden_size, |
213 | start_weights = WA, | 217 | start_weights = WA, |
214 | transfer_weights = Wtr, | 218 | transfer_weights = Wtr, |
215 | end_weights = WT, | 219 | end_weights = WT, |
216 | input_activation = input_activation, | 220 | input_activation = input_activation, |
217 | output_activation = output_activation, | 221 | output_activation = output_activation, |
218 | ae_dropouts= do_do, | 222 | ae_dropouts= do_do, |
219 | transfer_do = trans_do, | 223 | transfer_do = trans_do, |
220 | sgd = sgd, | 224 | sgd = sgd, |
221 | loss = loss , | 225 | loss = loss , |
222 | patience = patience, | 226 | patience = patience, |
223 | batch_size = batch_size, | 227 | batch_size = batch_size, |
224 | epochs= epochs) | 228 | epochs= epochs) |
225 | mlps_by_lvls= [] | 229 | mlps_by_lvls= [] |
226 | for level in ft_res : | 230 | for level in ft_res : |
227 | mlp_res_list = [] | 231 | mlp_res_list = [] |
228 | for layer in level : | 232 | for layer in level : |
229 | mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"], | 233 | mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"], |
230 | layer[1],infer_model["LABEL"][mod]["DEV"], | 234 | layer[1],infer_model["LABEL"][mod]["DEV"], |
231 | layer[2],infer_model["LABEL"][mod]["TEST"], | 235 | layer[2],infer_model["LABEL"][mod]["TEST"], |
232 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts, | 236 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts, |
233 | sgd=mlp_sgd,epochs=mlp_epochs, | 237 | sgd=mlp_sgd,epochs=mlp_epochs, |
234 | output_activation=mlp_output_activation, | 238 | output_activation=mlp_output_activation, |
235 | input_activation=mlp_input_activation, | 239 | input_activation=mlp_input_activation, |
236 | batch_size=mlp_batch_size,fit_verbose=0)) | 240 | batch_size=mlp_batch_size,fit_verbose=0)) |
237 | mlps_by_lvls.append(mlp_res_list) | 241 | mlps_by_lvls.append(mlp_res_list) |
238 | 242 | ||
239 | 243 | ||
240 | db["DSAEFT"]["transfert"] = mlps_by_lvls | 244 | db["DSAEFT"]["transfert"] = mlps_by_lvls |
241 | 245 | ||
242 | db.close() | 246 | db.close() |
243 | 247 |
LDA/04e-mm_vae.py
1 | 1 | ||
2 | # coding: utf-8 | 2 | # coding: utf-8 |
3 | import gensim | 3 | import gensim |
4 | from scipy import sparse | 4 | from scipy import sparse |
5 | import itertools | 5 | import itertools |
6 | from sklearn import preprocessing | 6 | from sklearn import preprocessing |
7 | from keras.models import Sequential | 7 | from keras.models import Sequential |
8 | from keras.optimizers import SGD,Adam | 8 | from keras.optimizers import SGD,Adam |
9 | from mlp import * | 9 | from mlp import * |
10 | from vae import * | 10 | from vae import * |
11 | import sklearn.metrics | 11 | import sklearn.metrics |
12 | import shelve | 12 | import shelve |
13 | import pickle | 13 | import pickle |
14 | from utils import * | 14 | from utils import * |
15 | import sys | 15 | import sys |
16 | import os | 16 | import os |
17 | import json | 17 | import json |
18 | # In[4]: | 18 | # In[4]: |
19 | 19 | ||
20 | infer_model=shelve.open("{}".format(sys.argv[2])) | 20 | infer_model=shelve.open("{}".format(sys.argv[2])) |
21 | in_dir = sys.argv[1] | 21 | in_dir = sys.argv[1] |
22 | #['ASR', 'TRS', 'LABEL'] | 22 | #['ASR', 'TRS', 'LABEL'] |
23 | # In[6]: | 23 | # In[6]: |
24 | if len(sys.argv) > 4 : | ||
25 | features_key = sys.argv[4] | ||
26 | else : | ||
27 | features_key = "LDA" | ||
24 | 28 | ||
29 | save_projection = True | ||
25 | json_conf =json.load(open(sys.argv[3])) | 30 | json_conf =json.load(open(sys.argv[3])) |
26 | vae_conf = json_conf["vae"] | 31 | vae_conf = json_conf["vae"] |
27 | 32 | ||
28 | hidden_size= vae_conf["hidden_size"] | 33 | hidden_size= vae_conf["hidden_size"] |
29 | input_activation=vae_conf["input_activation"] | 34 | input_activation=vae_conf["input_activation"] |
30 | output_activation=vae_conf["output_activation"] | 35 | output_activation=vae_conf["output_activation"] |
31 | epochs=vae_conf["epochs"] | 36 | epochs=vae_conf["epochs"] |
32 | batch=vae_conf["batch"] | 37 | batch=vae_conf["batch"] |
33 | patience=vae_conf["patience"] | 38 | patience=vae_conf["patience"] |
34 | latent_dim = vae_conf["latent"] | 39 | latent_dim = vae_conf["latent"] |
35 | try: | 40 | try: |
36 | k = vae_conf["sgd"] | 41 | k = vae_conf["sgd"] |
37 | if vae_conf["sgd"]["name"] == "adam": | 42 | if vae_conf["sgd"]["name"] == "adam": |
38 | sgd = Adam(lr=vae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | 43 | sgd = Adam(lr=vae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) |
39 | elif vae_conf["sgd"]["name"] == "sgd": | 44 | elif vae_conf["sgd"]["name"] == "sgd": |
40 | sgd = SGD(lr=vae_conf["sgd"]["lr"]) | 45 | sgd = SGD(lr=vae_conf["sgd"]["lr"]) |
41 | except: | 46 | except: |
42 | sgd = vae_conf["sgd"] | 47 | sgd = vae_conf["sgd"] |
43 | 48 | ||
44 | mlp_conf = json_conf["mlp"] | 49 | mlp_conf = json_conf["mlp"] |
45 | mlp_h = mlp_conf["hidden_size"] | 50 | mlp_h = mlp_conf["hidden_size"] |
46 | mlp_loss = mlp_conf["loss"] | 51 | mlp_loss = mlp_conf["loss"] |
47 | mlp_dropouts = mlp_conf["do"] | 52 | mlp_dropouts = mlp_conf["do"] |
48 | mlp_epochs = mlp_conf["epochs"] | 53 | mlp_epochs = mlp_conf["epochs"] |
49 | mlp_batch_size = mlp_conf["batch"] | 54 | mlp_batch_size = mlp_conf["batch"] |
50 | mlp_input_activation=mlp_conf["input_activation"] | 55 | mlp_input_activation=mlp_conf["input_activation"] |
51 | mlp_output_activation=mlp_conf["output_activation"] | 56 | mlp_output_activation=mlp_conf["output_activation"] |
52 | 57 | ||
53 | 58 | ||
54 | try: | 59 | try: |
55 | k = mlp_conf["sgd"] | 60 | k = mlp_conf["sgd"] |
56 | if mlp_conf["sgd"]["name"] == "adam": | 61 | if mlp_conf["sgd"]["name"] == "adam": |
57 | mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) | 62 | mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True) |
58 | elif mlp_conf["sgd"]["name"] == "sgd": | 63 | elif mlp_conf["sgd"]["name"] == "sgd": |
59 | mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"]) | 64 | mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"]) |
60 | except: | 65 | except: |
61 | mlp_sgd = mlp_conf["sgd"] | 66 | mlp_sgd = mlp_conf["sgd"] |
62 | 67 | ||
63 | 68 | ||
64 | name = json_conf["name"] | 69 | name = json_conf["name"] |
65 | 70 | ||
66 | 71 | try : | |
67 | try: | 72 | print "make folder " |
68 | os.mkdir("{}/{}".format(in_dir,name)) | 73 | os.mkdir("{}/{}".format(in_dir,name)) |
69 | except: | 74 | except: |
75 | print "folder not maked" | ||
70 | pass | 76 | pass |
71 | 77 | ||
72 | 78 | ||
73 | db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True) | 79 | db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True) |
74 | db["LABEL"]=infer_model["LABEL"] | 80 | db["LABEL"]=infer_model["LABEL"] |
75 | # | 81 | # |
76 | 82 | ||
77 | keys = ["ASR","TRS"] | ||
78 | 83 | ||
84 | keys = infer_model[features_key].keys() | ||
85 | |||
79 | db["VAE"] = {} | 86 | db["VAE"] = {} |
80 | db["LDA"] = {} | 87 | db[features_key] = {} |
81 | for mod in keys : | 88 | for mod in keys : |
82 | #print mod | 89 | #print mod |
83 | db["LDA"][mod] = train_mlp(infer_model["LDA"][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"], | 90 | db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"], |
84 | infer_model["LDA"][mod]["DEV"],infer_model["LABEL"][mod]["DEV"], | 91 | infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"], |
85 | infer_model["LDA"][mod]["TEST"],infer_model["LABEL"][mod]["TEST"], | 92 | infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"], |
86 | mlp_h ,sgd=mlp_sgd, | 93 | mlp_h ,sgd=mlp_sgd, |
87 | epochs=mlp_epochs, | 94 | epochs=mlp_epochs, |
88 | batch_size=mlp_batch_size, | 95 | batch_size=mlp_batch_size, |
89 | input_activation=input_activation, | 96 | input_activation=input_activation, |
90 | output_activation=mlp_output_activation, | 97 | output_activation=mlp_output_activation, |
91 | dropouts=mlp_dropouts, | 98 | dropouts=mlp_dropouts, |
92 | fit_verbose=0) | 99 | fit_verbose=0) |
93 | 100 | ||
94 | res=train_vae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"], | 101 | res=train_vae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"], |
95 | hidden_size=hidden_size[0], | 102 | hidden_size=hidden_size[0], |
96 | latent_dim=latent_dim,sgd=sgd, | 103 | latent_dim=latent_dim,sgd=sgd, |
97 | input_activation=input_activation,output_activation=output_activation, | 104 | input_activation=input_activation,output_activation=output_activation, |
98 | nb_epochs=epochs,batch_size=batch) | 105 | nb_epochs=epochs,batch_size=batch) |
99 | mlp_res_list=[] | 106 | mlp_res_list=[] |
100 | for layer in res : | 107 | for nb,layer in enumerate(res) : |
108 | if save_projection: | ||
109 | pd = pandas.DataFrame(layer[0]) | ||
110 | col_count = (pd.sum(axis=0) != 0) | ||
111 | pd = pd.loc[:,cyyol_count] | ||
112 | pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN") | ||
113 | pd = pandas.DataFrame(layer[1]) | ||
114 | pd = pd.loc[:,col_count] | ||
115 | pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV") | ||
116 | pd = pandas.DataFrame(layer[2]) | ||
117 | pd = pd.loc[:,col_count] | ||
118 | pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST") | ||
119 | del pd | ||
120 | |||
101 | mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"], | 121 | mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"], |
102 | layer[1],infer_model["LABEL"][mod]["DEV"], | 122 | layer[1],infer_model["LABEL"][mod]["DEV"], |
103 | layer[2],infer_model["LABEL"][mod]["TEST"], | 123 | layer[2],infer_model["LABEL"][mod]["TEST"], |
104 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs, | 124 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs, |
105 | output_activation=mlp_output_activation, | 125 | output_activation=mlp_output_activation, |
106 | input_activation=input_activation, | 126 | input_activation=input_activation, |
107 | batch_size=mlp_batch_size,fit_verbose=0)) | 127 | batch_size=mlp_batch_size,fit_verbose=0)) |
108 | db["VAE"][mod]=mlp_res_list | 128 | db["VAE"][mod]=mlp_res_list |
109 | 129 | ||
110 | mod = "ASR" | 130 | if "ASR" in keys and "TRS" in keys : |
111 | mod2= "TRS" | 131 | mod = "ASR" |
112 | mlp_res_list=[] | 132 | mod2= "TRS" |
133 | mlp_res_list=[] | ||
113 | 134 | ||
114 | res = train_vae(infer_model["LDA"][mod]["TRAIN"], | 135 | res = train_vae(infer_model[features_key][mod]["TRAIN"], |
115 | infer_model["LDA"][mod]["DEV"], | 136 | infer_model[features_key][mod]["DEV"], |
116 | infer_model["LDA"][mod]["TEST"], | 137 | infer_model[features_key][mod]["TEST"], |
117 | hidden_size=hidden_size[0], | 138 | hidden_size=hidden_size[0], |
118 | sgd=sgd,input_activation=input_activation,output_activation=output_activation, | 139 | sgd=sgd,input_activation=input_activation,output_activation=output_activation, |
119 | latent_dim=latent_dim, | 140 | latent_dim=latent_dim, |
120 | nb_epochs=epochs, | 141 | nb_epochs=epochs, |
121 | batch_size=batch, | 142 | batch_size=batch, |
122 | y_train=infer_model["LDA"][mod2]["TRAIN"], | 143 | y_train=infer_model[features_key][mod2]["TRAIN"], |
123 | y_dev=infer_model["LDA"][mod2]["DEV"], | 144 | y_dev=infer_model[features_key][mod2]["DEV"], |
124 | y_test=infer_model["LDA"][mod2]["TEST"]) | 145 | y_test=infer_model[features_key][mod2]["TEST"]) |
125 | 146 | ||
126 | for layer in res : | 147 | for nb,layer in enumerate(res) : |
127 | mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"], | 148 | if save_projection: |
128 | layer[1],infer_model["LABEL"][mod]["DEV"], | 149 | pd = pandas.DataFrame(layer[0]) |
129 | layer[2],infer_model["LABEL"][mod]["TEST"], | 150 | col_count = (pd.sum(axis=0) != 0) |
130 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs, | 151 | pd = pd.loc[:,col_count] |
131 | output_activation=mlp_output_activation, | 152 | pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN") |
132 | input_activation=input_activation, | 153 | pd = pandas.DataFrame(layer[1]) |
133 | batch_size=mlp_batch_size,fit_verbose=0)) | 154 | pd = pd.loc[:,col_count] |
155 | pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV") | ||
156 | pd = pandas.DataFrame(layer[2]) | ||
157 | pd = pd.loc[:,col_count] | ||
158 | pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST") | ||
159 | del pd | ||
134 | 160 | ||
135 | db["VAE"]["SPE"] = mlp_res_list | 161 | mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"], |
162 | layer[1],infer_model["LABEL"][mod]["DEV"], | ||
163 | layer[2],infer_model["LABEL"][mod]["TEST"], | ||
164 | mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs, | ||
165 | output_activation=mlp_output_activation, | ||
166 | input_activation=input_activation, | ||
167 | batch_size=mlp_batch_size,fit_verbose=0)) | ||
168 | |||
169 | db["VAE"]["SPE"] = mlp_res_list | ||
136 | 170 | ||
137 | db.sync() | 171 | db.sync() |
138 | db.close() | 172 | db.close() |
LDA/05-lts_scoring.py
File was created | 1 | import sys | |
2 | import shelve | ||
3 | import pickle | ||
4 | from utils import * | ||
5 | import sys | ||
6 | import os | ||
7 | import json | ||
8 | import glob | ||
9 | import tempfile | ||
10 | import pandas | ||
11 | import subprocess | ||
12 | from subprocess import CalledProcessError | ||
13 | import shutil | ||
14 | import numpy | ||
15 | |||
16 | in_dir = sys.argv[1] | ||
17 | json_conf =json.load(open(sys.argv[2])) | ||
18 | name = json_conf["name"] | ||
19 | |||
20 | ae_m = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name)) | ||
21 | y_train=numpy.argmax(ae_m["LABEL"]["ASR"]["TRAIN"],axis=1) | ||
22 | _,ytr_path=tempfile.mkstemp() | ||
23 | ytr_open= open(ytr_path,"w") | ||
24 | for el in y_train: | ||
25 | print >>ytr_open, el | ||
26 | ytr_open.close() | ||
27 | |||
28 | y_dev=numpy.argmax(ae_m["LABEL"]["ASR"]["DEV"],axis=1) | ||
29 | _,yd_path=tempfile.mkstemp() | ||
30 | yd_open = open(yd_path,"w") | ||
31 | for el in y_dev: | ||
32 | print >>yd_open, el | ||
33 | yd_open.close() | ||
34 | |||
35 | y_test=numpy.argmax(ae_m["LABEL"]["ASR"]["TEST"],axis=1) | ||
36 | _,yte_path=tempfile.mkstemp() | ||
37 | yte_open=open(yte_path,"w") | ||
38 | for el in y_test: | ||
39 | print >>yte_open, el | ||
40 | yte_open.close() | ||
41 | |||
42 | hdfs_files=glob.glob("{}/{}/*.hdf".format(in_dir,name)) | ||
43 | temp_dir=tempfile.mkdtemp() | ||
44 | out_file=open("{}/{}/malaha_res.txt".format(in_dir,name),"a") | ||
45 | |||
46 | for hdf in hdfs_files: | ||
47 | print >>out_file, "Start ---------------------------------------------------" | ||
48 | print >>out_file, hdf | ||
49 | x_train = pandas.read_hdf(hdf,"TRAIN") | ||
50 | x_train.to_csv("{}/xtrain.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False) | ||
51 | x_train = pandas.read_hdf(hdf,"DEV") | ||
52 | x_train.to_csv("{}/xdev.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False) | ||
53 | x_train = pandas.read_hdf(hdf,"TEST") | ||
54 | x_train.to_csv("{}/xtest.dat".format(temp_dir),sep=" ",header=False,index=False, index_label=False) | ||
55 | try : | ||
56 | resdev=subprocess.check_output(['Rscript', | ||
57 | '/home/laboinfo/janod/WorkingDir/erreur_traduction/Author_Topic_Decoda/estimate.R', | ||
58 | "{}/xtrain.dat".format(temp_dir), | ||
59 | "{}/xdev.dat".format(temp_dir), | ||
60 | ytr_path,yd_path]) | ||
61 | |||
62 | restest=subprocess.check_output(['Rscript', | ||
63 | '/home/laboinfo/janod/WorkingDir/erreur_traduction/Author_Topic_Decoda/estimate.R', | ||
64 | "{}/xtrain.dat".format(temp_dir), | ||
65 | "{}/xtest.dat".format(temp_dir), | ||
66 | ytr_path,yte_path]) | ||
67 | |||
68 | print >>out_file, resdev | ||
69 | print >>out_file, hdf | ||
70 | print >>out_file, restest | ||
71 | except CalledProcessError: | ||
72 | print >>out_file, "FAILED" | ||
73 | print >>out_file, hdf | ||
74 | print >>out_file, "End ---------------------------------------------------" | ||
75 | |||
76 | shutil.rmtree(temp_dir) | ||
77 | os.remove(ytr_path) | ||
78 | os.remove(yd_path) | ||
79 | os.remove(yte_path) | ||
80 | |||
81 |
LDA/mlp.py
1 | # -*- coding: utf-8 -*- | 1 | # -*- coding: utf-8 -*- |
2 | import keras | 2 | import keras |
3 | import numpy as np | 3 | import numpy as np |
4 | #from keras.layers.core import Dense, Dropout, Activation | 4 | #from keras.layers.core import Dense, Dropout, Activation |
5 | from keras.optimizers import SGD,Adam | 5 | from keras.optimizers import SGD,Adam |
6 | from keras.models import Sequential | 6 | from keras.models import Sequential |
7 | from keras.layers import Input, Dense, Dropout | 7 | from keras.layers import Input, Dense, Dropout |
8 | from keras.models import Model | 8 | from keras.models import Model |
9 | from keras.utils.layer_utils import layer_from_config | 9 | from keras.utils.layer_utils import layer_from_config |
10 | from itertools import izip_longest | 10 | from itertools import izip_longest |
11 | 11 | ||
12 | import pandas | 12 | import pandas |
13 | from collections import namedtuple | 13 | from collections import namedtuple |
14 | from sklearn.metrics import accuracy_score as perf | 14 | from sklearn.metrics import accuracy_score as perf |
15 | save_tuple= namedtuple("save_tuple",["pred_train","pred_dev","pred_test"]) | 15 | save_tuple= namedtuple("save_tuple",["pred_train","pred_dev","pred_test"]) |
16 | 16 | ||
17 | 17 | ||
18 | def ft_dsae(train,dev,test, | 18 | def ft_dsae(train,dev,test, |
19 | y_train=None,y_dev=None,y_test=None, | 19 | y_train=None,y_dev=None,y_test=None, |
20 | ae_hidden=[20],transfer_hidden=[20], | 20 | ae_hidden=[20],transfer_hidden=[20], |
21 | start_weights=None,transfer_weights=None,end_weights=None, | 21 | start_weights=None,transfer_weights=None,end_weights=None, |
22 | input_activation="tanh", output_activation="tanh", | 22 | input_activation="tanh", output_activation="tanh", |
23 | init="glorot_uniform", | 23 | init="glorot_uniform", |
24 | ae_dropouts=[None], transfer_do=[None], | 24 | ae_dropouts=[None], transfer_do=[None], |
25 | sgd="sgd", loss="mse", patience=5, verbose=0, epochs=5, batch_size=8): | 25 | sgd="sgd", loss="mse", patience=5, verbose=0, epochs=5, batch_size=8): |
26 | 26 | ||
27 | if not start_weights : | 27 | if not start_weights : |
28 | start_weights = [ None ] * len(ae_hidden) | 28 | start_weights = [ None ] * len(ae_hidden) |
29 | if not transfer_weights : | 29 | if not transfer_weights : |
30 | transfer_weights = [None ] * len(transfer_hidden) | 30 | transfer_weights = [None ] * len(transfer_hidden) |
31 | if not end_weights : | 31 | if not end_weights : |
32 | end_weights = [ None ] * len(end_weights) | 32 | end_weights = [ None ] * len(end_weights) |
33 | if not transfer_do : | 33 | if not transfer_do : |
34 | transfer_do = [0] * len(transfer_hidden) | 34 | transfer_do = [0] * len(transfer_hidden) |
35 | predict_y = True | 35 | predict_y = True |
36 | if y_train is None or y_dev is None or y_test is None : | 36 | if y_train is None or y_dev is None or y_test is None : |
37 | y_train = train | 37 | y_train = train |
38 | y_dev = dev | 38 | y_dev = dev |
39 | y_test = test | 39 | y_test = test |
40 | predict_y = False | 40 | predict_y = False |
41 | param_predict = [ train, dev, test ] | 41 | param_predict = [ train, dev, test ] |
42 | if predict_y : | 42 | if predict_y : |
43 | param_predict += [ y_train, y_dev ,y_test ] | 43 | param_predict += [ y_train, y_dev ,y_test ] |
44 | 44 | ||
45 | pred_by_level = [] # Contient les prediction par niveaux de transfert | 45 | pred_by_level = [] # Contient les prediction par niveaux de transfert |
46 | layers = [Input(shape=(train.shape[1],))] | 46 | layers = [Input(shape=(train.shape[1],))] |
47 | #for w in transfer_weights: | 47 | #for w in transfer_weights: |
48 | #print "TW",[ [ y.shape for y in x ] for x in w] | 48 | #print "TW",[ [ y.shape for y in x ] for x in w] |
49 | #print "SW",[ [ y.shape for y in x] for x in start_weights] | 49 | #print "SW",[ [ y.shape for y in x] for x in start_weights] |
50 | #print "EW",[ [ y.shape for y in x ] for x in end_weights] | 50 | #print "EW",[ [ y.shape for y in x ] for x in end_weights] |
51 | for cpt in range(1,len(ae_hidden)): | 51 | for cpt in range(1,len(ae_hidden)): |
52 | #print ae_hidden,cpt | 52 | #print ae_hidden,cpt |
53 | #print cpt, "before" | 53 | #print cpt, "before" |
54 | #print "before2", [ [ x.shape for x in y] for y in start_weights[:cpt] ] | 54 | #print "before2", [ [ x.shape for x in y] for y in start_weights[:cpt] ] |
55 | #print "before3", [ [ x.shape for x in y] for y in transfer_weights[cpt]] | 55 | #print "before3", [ [ x.shape for x in y] for y in transfer_weights[cpt]] |
56 | #print "before4", [ [ x.shape for x in y] for y in end_weights[cpt:]] | 56 | #print "before4", [ [ x.shape for x in y] for y in end_weights[cpt:]] |
57 | sizes = ae_hidden[:cpt] + transfer_hidden + ae_hidden[cpt:] | 57 | sizes = ae_hidden[:cpt] + transfer_hidden + ae_hidden[cpt:] |
58 | weights = start_weights[:cpt] + transfer_weights[(cpt-1)] + end_weights[cpt:] | 58 | weights = start_weights[:cpt] + transfer_weights[(cpt-1)] + end_weights[cpt:] |
59 | #print "SIZES", sizes | 59 | #print "SIZES", sizes |
60 | #print "AW",[ [ y.shape for y in x ] for x in weights] | 60 | #print "AW",[ [ y.shape for y in x ] for x in weights] |
61 | #print "WEI", len(weights) , [ len(x) for x in weights ] | 61 | #print "WEI", len(weights) , [ len(x) for x in weights ] |
62 | if len(ae_dropouts) == len(ae_hidden): | 62 | if len(ae_dropouts) == len(ae_hidden): |
63 | do = ae_dropouts[:cpt] + transfer_do + ae_dropouts[cpt:] | 63 | do = ae_dropouts[:cpt] + transfer_do + ae_dropouts[cpt:] |
64 | else : | 64 | else : |
65 | do = [ 0 ] * (len(ae_hidden) + len(transfer_hidden)) | 65 | do = [ 0 ] * (len(ae_hidden) + len(transfer_hidden)) |
66 | for w in weights[:-1]: | 66 | for w in weights[:-1]: |
67 | #print "STEP", size | 67 | #print "STEP", size |
68 | layers.append(Dense(w[1].shape[0],activation=input_activation,init=init,weights=w)(layers[-1])) | 68 | layers.append(Dense(w[1].shape[0],activation=input_activation,init=init,weights=w)(layers[-1])) |
69 | if do : | 69 | if do : |
70 | d = do.pop(0) | 70 | d = do.pop(0) |
71 | if d > 0 : | 71 | if d > 0 : |
72 | layers.append(Dropout(d)(layers[-1])) | 72 | layers.append(Dropout(d)(layers[-1])) |
73 | 73 | ||
74 | layers.append(Dense(y_train.shape[1],activation=output_activation)(layers[-1])) | 74 | layers.append(Dense(y_train.shape[1],activation=output_activation)(layers[-1])) |
75 | models = [Model(input=layers[0] , output=x) for x in layers[1:]] | 75 | models = [Model(input=layers[0] , output=x) for x in layers[1:]] |
76 | models[-1].compile(optimizer=sgd,loss=loss) | 76 | models[-1].compile(optimizer=sgd,loss=loss) |
77 | models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose) | 77 | models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose) |
78 | predictions = [ [x.predict(y) for y in param_predict ] for x in models ] | 78 | predictions = [ [x.predict(y) for y in param_predict ] for x in models ] |
79 | pred_by_level.append(predictions) | 79 | pred_by_level.append(predictions) |
80 | 80 | ||
81 | return pred_by_level | 81 | return pred_by_level |
82 | 82 | ||
83 | def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False): | 83 | def train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,hidden_size,input_activation="relu",hidden_activation="relu",output_activation="softmax",loss="mse",init="glorot_uniform",dropouts=None,sgd=None,epochs=1200,batch_size=16,fit_verbose=1,test_verbose=0,save_pred=False,keep_histo=False): |
84 | 84 | ||
85 | |||
86 | layers = [Input(shape=(x_train.shape[1],))] | 85 | layers = [Input(shape=(x_train.shape[1],))] |
87 | 86 | ||
88 | for h in hidden_size: | 87 | for h in hidden_size: |
89 | if dropouts: | 88 | if dropouts: |
90 | d = dropouts.pop(0) | 89 | d = dropouts.pop(0) |
91 | if d > 0 : | 90 | if d > 0 : |
92 | layers.append(Dropout(d)(layers[-1])) | 91 | layers.append(Dropout(d)(layers[-1])) |
93 | 92 | ||
94 | layers.append(Dense(h,init=init,activation=input_activation)(layers[-1])) | 93 | layers.append(Dense(h,init=init,activation=input_activation)(layers[-1])) |
95 | #if dropouts: | 94 | #if dropouts: |
96 | # drop_prob=dropouts.pop(0) | 95 | # drop_prob=dropouts.pop(0) |
97 | # if drop_prob > 0: | 96 | # if drop_prob > 0: |
98 | # model.add(Dropout(drop_prob)) | 97 | # model.add(Dropout(drop_prob)) |
99 | 98 | ||
100 | #if dropouts: | 99 | #if dropouts: |
101 | # drop_prob=dropouts.pop(0) | 100 | # drop_prob=dropouts.pop(0) |
102 | # if drop_prob > 0: | 101 | # if drop_prob > 0: |
103 | # model.add(Dropout(drop_prob)) | 102 | # model.add(Dropout(drop_prob)) |
104 | 103 | ||
105 | #if dropouts: | 104 | #if dropouts: |
106 | # model.add(Dropout(dropouts.pop(0))) | 105 | # model.add(Dropout(dropouts.pop(0))) |
107 | if dropouts: | 106 | if dropouts: |
108 | d = dropouts.pop(0) | 107 | d = dropouts.pop(0) |
109 | if d > 0 : | 108 | if d > 0 : |
110 | layers.append(Dropout(d)(layers[-1])) | 109 | layers.append(Dropout(d)(layers[-1])) |
111 | 110 | ||
112 | layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1])) | 111 | layers.append(Dense( y_train.shape[1],activation=output_activation,init=init)(layers[-1])) |
113 | 112 | ||
114 | model = Model(layers[0] , layers[-1]) | 113 | model = Model(layers[0] , layers[-1]) |
115 | if not sgd: | 114 | if not sgd: |
116 | sgd = SGD(lr=0.01, decay=0, momentum=0.9) | 115 | sgd = SGD(lr=0.01, decay=0, momentum=0.9) |
117 | 116 | ||
118 | model.compile(loss=loss, optimizer=sgd,metrics=['accuracy']) | 117 | model.compile(loss=loss, optimizer=sgd,metrics=['accuracy']) |
119 | 118 | ||
120 | scores_dev=[] | 119 | scores_dev=[] |
121 | scores_test=[] | 120 | scores_test=[] |
122 | scores_train=[] | 121 | scores_train=[] |
123 | save=None | 122 | save=None |
124 | for i in range(epochs): | 123 | for i in range(epochs): |
125 | hist=model.fit(x_train, y_train, nb_epoch=1, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev)) | 124 | hist=model.fit(x_train, y_train, nb_epoch=1, batch_size=batch_size,verbose=fit_verbose,validation_data=(x_dev,y_dev)) |
126 | pred_train=model.predict(x_train) | 125 | pred_train=model.predict(x_train) |
127 | pred_dev=model.predict(x_dev) | 126 | pred_dev=model.predict(x_dev) |
128 | pred_test=model.predict(x_test) | 127 | pred_test=model.predict(x_test) |
129 | 128 | ||
130 | scores_train.append(perf(np.argmax(y_train,axis=1),np.argmax(pred_train,axis=1))) | 129 | scores_train.append(perf(np.argmax(y_train,axis=1),np.argmax(pred_train,axis=1))) |
131 | scores_dev.append(perf(np.argmax(y_dev,axis=1),np.argmax(pred_dev,axis=1))) | 130 | scores_dev.append(perf(np.argmax(y_dev,axis=1),np.argmax(pred_dev,axis=1))) |
132 | scores_test.append(perf(np.argmax(y_test,axis=1),np.argmax(pred_test,axis=1))) | 131 | scores_test.append(perf(np.argmax(y_test,axis=1),np.argmax(pred_test,axis=1))) |
133 | if fit_verbose : | 132 | if fit_verbose : |
134 | print "{} {} {} {}".format(i,scores_train[-1],scores_dev[-1],scores_test[-1]) | 133 | print "{} {} {} {}".format(i,scores_train[-1],scores_dev[-1],scores_test[-1]) |
135 | if save is None or (len(scores_dev)>2 and scores_dev[-1] > scores_dev[-2]): | 134 | if save is None or (len(scores_dev)>2 and scores_dev[-1] > scores_dev[-2]): |
136 | save=save_tuple(pred_train,pred_dev,pred_test) | 135 | save=save_tuple(pred_train,pred_dev,pred_test) |
137 | arg_dev = np.argmax(scores_dev) | 136 | arg_dev = np.argmax(scores_dev) |
138 | best_dev=scores_dev[arg_dev] | 137 | best_dev=scores_dev[arg_dev] |
139 | best_test=scores_test[arg_dev] | 138 | best_test=scores_test[arg_dev] |
140 | max_test=np.max(scores_test) | 139 | max_test=np.max(scores_test) |
141 | if fit_verbose: | 140 | if fit_verbose: |
142 | print " res : {} {} {}".format(best_dev,best_test,max_test) | 141 | print " res : {} {} {}".format(best_dev,best_test,max_test) |
143 | 142 | ||
144 | res=[scores_train,scores_dev,scores_test] | 143 | res=[scores_train,scores_dev,scores_test] |
145 | if save_pred: | 144 | if save_pred: |
146 | res.append(save) | 145 | res.append(save) |
147 | if keep_histo: | 146 | if keep_histo: |
148 | res.append(hist) | 147 | res.append(hist) |
149 | return res | 148 | return res |
150 | 149 | ||
151 | def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20,get_weights=False,set_weights=[]): | 150 | def train_ae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20,get_weights=False,set_weights=[]): |
152 | 151 | ||
153 | input_vect = Input(shape=(train.shape[1],)) | 152 | input_vect = Input(shape=(train.shape[1],)) |
154 | 153 | ||
155 | previous = [input_vect] | 154 | previous = [input_vect] |
156 | 155 | ||
157 | if dropouts is None: | 156 | if dropouts is None: |
158 | dropouts = [ 0 ] * (len(hidden_sizes) +1) | 157 | dropouts = [ 0 ] * (len(hidden_sizes) +1) |
159 | if sgd is None : | 158 | if sgd is None : |
160 | sgd = SGD(lr=0.01, decay=0, momentum=0.9) | 159 | sgd = SGD(lr=0.01, decay=0, momentum=0.9) |
161 | did_do = False | 160 | did_do = False |
162 | if dropouts : | 161 | if dropouts : |
163 | d = dropouts.pop(0) | 162 | d = dropouts.pop(0) |
164 | if d : | 163 | if d : |
165 | previous.append(Dropout(d)(previous[-1])) | 164 | previous.append(Dropout(d)(previous[-1])) |
166 | did_do = True | 165 | did_do = True |
167 | 166 | ||
168 | for h_layer,weight_layer in izip_longest(hidden_sizes,set_weights,fillvalue=None) : | 167 | for h_layer,weight_layer in izip_longest(hidden_sizes,set_weights,fillvalue=None) : |
169 | # ,weights=w | 168 | # ,weights=w |
170 | if weight_layer : | 169 | if weight_layer : |
171 | w = weight_layer[0] | 170 | w = weight_layer[0] |
172 | else : | 171 | else : |
173 | w = None | 172 | w = None |
174 | #print "ADD SIZE" , h_layer | 173 | #print "ADD SIZE" , h_layer |
175 | if did_do : | 174 | if did_do : |
176 | p = previous.pop() | 175 | p = previous.pop() |
177 | did_do = False | 176 | did_do = False |
178 | else : | 177 | else : |
179 | p = previous[-1] | 178 | p = previous[-1] |
180 | previous.append(Dense(h_layer,activation=input_activation,weights=w)(previous[-1])) | 179 | previous.append(Dense(h_layer,activation=input_activation,weights=w)(previous[-1])) |
181 | if dropouts: | 180 | if dropouts: |
182 | d = dropouts.pop(0) | 181 | d = dropouts.pop(0) |
183 | if d : | 182 | if d : |
184 | previous.append(Dropout(d)(previous[-1])) | 183 | previous.append(Dropout(d)(previous[-1])) |
185 | did_do = True | 184 | did_do = True |
186 | 185 | ||
187 | predict_y = True | 186 | predict_y = True |
188 | if y_train is None or y_dev is None or y_test is None : | 187 | if y_train is None or y_dev is None or y_test is None : |
189 | y_train = train | 188 | y_train = train |
190 | y_dev = dev | 189 | y_dev = dev |
191 | y_test = test | 190 | y_test = test |
192 | predict_y = False | 191 | predict_y = False |
193 | previous.append(Dense(y_train.shape[1],activation=output_activation)(previous[-1])) | 192 | previous.append(Dense(y_train.shape[1],activation=output_activation)(previous[-1])) |
194 | models = [Model(input=previous[0] , output=x) for x in previous[1:]] | 193 | models = [Model(input=previous[0] , output=x) for x in previous[1:]] |
195 | print "MLP", sgd, loss | 194 | print "MLP", sgd, loss |
196 | models[-1].compile(optimizer=sgd,loss=loss) | 195 | models[-1].compile(optimizer=sgd,loss=loss) |
197 | models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose) | 196 | models[-1].fit(train,y_train,nb_epoch=epochs,batch_size=batch_size,callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, verbose=0)],validation_data=(dev,dev),verbose=verbose) |
198 | param_predict = [ train, dev, test ] | 197 | param_predict = [ train, dev, test ] |
199 | if predict_y : | 198 | if predict_y : |
200 | param_predict += [ y_train, y_dev ,y_test ] | 199 | param_predict += [ y_train, y_dev ,y_test ] |
201 | predictions = [ [x.predict(y) for y in param_predict ] for x in models ] | 200 | predictions = [ [x.predict(y) for y in param_predict ] for x in models ] |
202 | if get_weights : | 201 | if get_weights : |
203 | weights = [ x.get_weights() for x in models[-1].layers if x.get_weights() ] | 202 | weights = [ x.get_weights() for x in models[-1].layers if x.get_weights() ] |
204 | return ( predictions , weights ) | 203 | return ( predictions , weights ) |
205 | else : | 204 | else : |
206 | return predictions | 205 | return predictions |
207 | 206 | ||
208 | def train_sae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20): | 207 | def train_sae(train,dev,test,hidden_sizes,y_train=None,y_dev=None,y_test=None,dropouts=None,input_activation="tanh",output_activation="tanh",loss="mse",sgd=None,epochs=500,batch_size=8,verbose=1,patience=20): |
209 | 208 | ||
210 | weights = [] | 209 | weights = [] |
211 | predictions = [[(train,dev,test),()]] | 210 | predictions = [[(train,dev,test),()]] |
212 | ft_pred = [] | 211 | ft_pred = [] |
213 | past_sizes = [] | 212 | past_sizes = [] |
214 | 213 | ||
215 | 214 | ||
216 | for size in hidden_sizes : | 215 | for size in hidden_sizes : |
217 | #print "DO size " , size , "FROM" , hidden_sizes | 216 | #print "DO size " , size , "FROM" , hidden_sizes |
218 | res_pred, res_wght = train_ae(predictions[-1][-2][0], predictions[-1][-2][1],predictions[-1][-2][2],[size], | 217 | res_pred, res_wght = train_ae(predictions[-1][-2][0], predictions[-1][-2][1],predictions[-1][-2][2],[size], |
219 | dropouts=dropouts, input_activation=input_activation, | 218 | dropouts=dropouts, input_activation=input_activation, |
220 | output_activation=output_activation, loss=loss, sgd=sgd, | 219 | output_activation=output_activation, loss=loss, sgd=sgd, |
221 | epochs=epochs, batch_size=batch_size, verbose=verbose, | 220 | epochs=epochs, batch_size=batch_size, verbose=verbose, |
222 | patience=patience,get_weights=True) | 221 | patience=patience,get_weights=True) |
223 | past_sizes.append(size) | 222 | past_sizes.append(size) |
224 | weights.append(res_wght) | 223 | weights.append(res_wght) |
225 | predictions.append(res_pred) | 224 | predictions.append(res_pred) |
226 | #print "FINE TUNE " | 225 | #print "FINE TUNE " |
227 | res_ftpred = train_ae(train,dev,test,past_sizes,y_train=y_train,y_dev=y_dev,y_test=y_test, | 226 | res_ftpred = train_ae(train,dev,test,past_sizes,y_train=y_train,y_dev=y_dev,y_test=y_test, |
228 | dropouts=dropouts, | 227 | dropouts=dropouts, |
229 | input_activation=input_activation, | 228 | input_activation=input_activation, |
230 | output_activation=output_activation, | 229 | output_activation=output_activation, |
231 | loss=loss,sgd=sgd,epochs=epochs, | 230 | loss=loss,sgd=sgd,epochs=epochs, |
232 | batch_size=batch_size,verbose=verbose,patience=patience, | 231 | batch_size=batch_size,verbose=verbose,patience=patience, |
233 | set_weights=weights) | 232 | set_weights=weights) |
234 | ft_pred.append(res_ftpred) | 233 | ft_pred.append(res_ftpred) |
235 | 234 | ||
236 | return ( predictions[1:] , ft_pred) | 235 | return ( predictions[1:] , ft_pred) |
237 | 236 | ||
238 | 237 | ||
239 | 238 |
LDA/utils.py
1 | # -*- coding: utf-8 -*- | 1 | # -*- coding: utf-8 -*- |
2 | import nltk | 2 | import nltk |
3 | import re | 3 | import re |
4 | import codecs | ||
5 | import numpy as np | ||
6 | import sqlite3 | ||
7 | |||
4 | pattern = ur"\d+(?:\.\d+)?\s*%?|\w{1,2}'|<unk>|[\wรฉร รจรนรชรดรปรขรฒรฌรฎรง]+|[^\w\s]" | 8 | pattern = ur"\d+(?:\.\d+)?\s*%?|\w{1,2}'|<unk>|[\wรฉร รจรนรชรดรปรขรฒรฌรฎรง]+|[^\w\s]" |
5 | rer_b = re.compile(ur" r e r(?: e r)? b ") | 9 | rer_b = re.compile(ur" r e r(?: e r)? b ") |
6 | rer_c = re.compile(ur" r e r(?: e r)? c |r e r( e r)? c' est | rer c' est") | 10 | rer_c = re.compile(ur" r e r(?: e r)? c |r e r( e r)? c' est | rer c' est") |
7 | rer = re.compile(ur" (e )?r e r(?: e r)? |re r( e r)? |rer e r | r e rer | r e r | r e rer |r( e r)+ ") | 11 | rer = re.compile(ur" (e )?r e r(?: e r)? |re r( e r)? |rer e r | r e rer | r e r | r e rer |r( e r)+ ") |
8 | sncf = re.compile(ur" s n c f ") | 12 | sncf = re.compile(ur" s n c f ") |
9 | jusq = re.compile(ur" jusqu ' ") | 13 | jusq = re.compile(ur" jusqu ' ") |
10 | ratp = re.compile(ur" r a t(?: p)? ") | 14 | ratp = re.compile(ur" r a t(?: p)? ") |
11 | quel = re.compile(ur" quelqu ' ") | 15 | quel = re.compile(ur" quelqu ' ") |
12 | space = re.compile(ur" +") | 16 | space = re.compile(ur" +") |
13 | tok2 = nltk.RegexpTokenizer(pattern,flags=re.UNICODE ) | 17 | tok2 = nltk.RegexpTokenizer(pattern,flags=re.UNICODE ) |
14 | # (?x)\d+(\.\d+)?\s*%| \w'| \w+| [^\w\s] | 18 | # (?x)\d+(\.\d+)?\s*%| \w'| \w+| [^\w\s] |
15 | 19 | ||
16 | def preproc(line): | 20 | def preproc(line): |
17 | # print 1,line.encode('utf8') | 21 | # print 1,line.encode('utf8') |
18 | line = space.subn(u" ",line)[0] | 22 | line = space.subn(u" ",line)[0] |
19 | line = rer_b.subn(u" rer b ",line)[0] | 23 | line = rer_b.subn(u" rer b ",line)[0] |
20 | line = rer_c.subn(u" rer c ",line)[0] | 24 | line = rer_c.subn(u" rer c ",line)[0] |
21 | line = rer.subn(u" rer ",line)[0] | 25 | line = rer.subn(u" rer ",line)[0] |
22 | line = sncf.subn(u" sncf ",line)[0] | 26 | line = sncf.subn(u" sncf ",line)[0] |
23 | line = ratp.subn(u" ratp ",line)[0] | 27 | line = ratp.subn(u" ratp ",line)[0] |
24 | line = jusq.subn(u" jusqu' ",line)[0] | 28 | line = jusq.subn(u" jusqu' ",line)[0] |
25 | line = quel.subn(u" quelqu' ",line)[0] | 29 | line = quel.subn(u" quelqu' ",line)[0] |
26 | line = space.subn(u" ",line)[0] | 30 | line = space.subn(u" ",line)[0] |
27 | # print 2,line.encode('utf8') | 31 | # print 2,line.encode('utf8') |
28 | return line.lower() | 32 | return line.lower() |
29 | 33 | ||
30 | def yield_corpus(df_list): | 34 | def yield_corpus(df_list): |
31 | for corpus in df_list: | 35 | for corpus in df_list: |
32 | for id,doc in corpus.iterrows(): | 36 | for id,doc in corpus.iterrows(): |
33 | try: | 37 | try: |
34 | a = tok2.tokenize(preproc(doc[2].decode("utf-8"))) | 38 | a = tok2.tokenize(preproc(doc[2].decode("utf-8"))) |
35 | # print 3, " ".join(a).encode("utf8") | 39 | # print 3, " ".join(a).encode("utf8") |
36 | yield a | 40 | yield a |
37 | except: | 41 | except: |
38 | print doc[2] | 42 | print doc[2] |
39 | raise | 43 | raise |
40 | def select(elm): | 44 | def select(elm): |
41 | return int(elm.split("_")[-1]) | 45 | return int(elm.split("_")[-1]) |
42 | 46 | ||
43 | 47 | ||
44 | def select_mmf(elm): | 48 | def select_mmf(elm): |
45 | return int(elm.split("_")[0]) | 49 | return int(elm.split("_")[0]) |
50 | |||
51 | def get_score(table): | ||
52 | mx_train = np.max(table[0]) | ||
53 | argmx_dev = np.argmax(table[1]) | ||
54 | mx_dev = table[1][argmx_dev] | ||
55 | best_test = table[2][argmx_dev] | ||
56 | mx_test = np.max(table[2]) | ||
57 | print """\tmax train : {} | ||
58 | \tmax dev : {} | ||
59 | \tmax test : {} - best test : {} | ||
60 | \t best epochs : {}""".format(mx_train,mx_dev,mx_test,best_test,argmx_dev) | ||
61 | return mx_train,mx_dev,mx_test,best_test,argmx_dev | ||
62 | class WeightedWordsList : | ||
63 | @staticmethod | ||
64 | def get_key(wtuple): | ||
65 | return wtuple[1] | ||
66 | @staticmethod | ||
67 | def get_okey(wtuple): | ||
68 | return wtuple[1][1] | ||
69 | |||
70 | |||
71 | def __init__(self,file_path): | ||
72 | self.wlist = codecs.open(file_path,"r","utf8").readlines() | ||
73 | self.wlist = [x.strip().split(':') for x in self.wlist ] | ||
74 | self.wlist = [ (x, float(y)) for x,y in self.wlist ] | ||
75 | self.wdict = {} | ||
76 | for x,y in self.wlist: | ||
77 | self.wdict[x.encode("utf8")] = y | ||
78 | |||
79 | def select_best(self,word_list,lenght=5): | ||
80 | scored_word = [] | ||
81 | for w in word_list: | ||
82 | w = w.encode("utf8") | ||
83 | if w not in self.wdict : | ||
84 | continue | ||
85 | |||
86 | if len(scored_word) < lenght: | ||
87 | scored_word.append((w,self.wdict[w])) | ||
88 | else : | ||
89 | w_min= min(enumerate(scored_word),key=WeightedWordsList.get_okey) | ||
90 | w_curr = (w, self.wdict[w]) | ||
91 | if w_min[1][1] < w_curr[1]: | ||
92 | del scored_word[w_min[0]] | ||
93 | scored_word.append(w_curr) | ||
94 | w_min = min(enumerate(scored_word),key=WeightedWordsList.get_okey) | ||
95 | while len(scored_word) > lenght and w_min[1][1] < w_curr[1] : | ||
96 | del scored_word[w_min[0]] | ||
97 | w_min = min(enumerate(scored_word),key=WeightedWordsList.get_okey) | ||
98 | elif w_min[1][1] == w_curr[1]: | ||
99 | scored_word.append(w_curr) | ||
100 | return [ w[0] for w in scored_word ] | ||
46 | 101 | ||
102 | |||
103 | |||
104 | |||
105 |
LDA/vae.py
1 | '''This script demonstrates how to build a variational autoencoder with Keras. | 1 | '''This script demonstrates how to build a variational autoencoder with Keras. |
2 | Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114 | 2 | Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114 |
3 | ''' | 3 | ''' |
4 | 4 | ||
5 | import itertools | 5 | import itertools |
6 | import sys | 6 | import sys |
7 | import json | 7 | import json |
8 | 8 | ||
9 | import numpy as np | 9 | import numpy as np |
10 | import matplotlib.pyplot as plt | 10 | import matplotlib.pyplot as plt |
11 | from scipy import sparse | 11 | from scipy import sparse |
12 | import scipy.io | 12 | import scipy.io |
13 | 13 | ||
14 | from keras.layers import Input, Dense, Lambda | 14 | from keras.layers import Input, Dense, Lambda |
15 | from keras.models import Model | 15 | from keras.models import Model |
16 | from keras import backend as K | 16 | from keras import backend as K |
17 | from keras import objectives | 17 | from keras import objectives |
18 | from keras.datasets import mnist | 18 | from keras.datasets import mnist |
19 | from keras.callbacks import EarlyStopping,Callback | ||
19 | 20 | ||
20 | import pandas | 21 | import pandas |
21 | import shelve | 22 | import shelve |
22 | import pickle | 23 | import pickle |
23 | 24 | ||
24 | 25 | ||
26 | class ZeroStopping(Callback): | ||
27 | '''Stop training when a monitored quantity has stopped improving. | ||
28 | # Arguments | ||
29 | monitor: quantity to be monitored. | ||
30 | patience: number of epochs with no improvement | ||
31 | after which training will be stopped. | ||
32 | verbose: verbosity mode. | ||
33 | mode: one of {auto, min, max}. In 'min' mode, | ||
34 | training will stop when the quantity | ||
35 | monitored has stopped decreasing; in 'max' | ||
36 | mode it will stop when the quantity | ||
37 | monitored has stopped increasing. | ||
38 | ''' | ||
39 | def __init__(self, monitor='val_loss', verbose=0, mode='auto', thresh = 0): | ||
40 | super(ZeroStopping, self).__init__() | ||
25 | 41 | ||
42 | self.monitor = monitor | ||
43 | self.verbose = verbose | ||
44 | self.thresh = thresh # is a rythme | ||
26 | 45 | ||
46 | if mode not in ['auto', 'min', 'max']: | ||
47 | warnings.warn('EarlyStopping mode %s is unknown, ' | ||
48 | 'fallback to auto mode.' % (self.mode), | ||
49 | RuntimeWarning) | ||
50 | mode = 'auto' | ||
27 | 51 | ||
52 | if mode == 'min': | ||
53 | self.monitor_op = np.less | ||
54 | elif mode == 'max': | ||
55 | self.monitor_op = np.greater | ||
56 | else: | ||
57 | if 'acc' in self.monitor: | ||
58 | self.monitor_op = np.greater | ||
59 | else: | ||
60 | self.monitor_op = np.less | ||
61 | |||
62 | def on_epoch_end(self, epoch, logs={}): | ||
63 | current = logs.get(self.monitor) | ||
64 | if current is None: | ||
65 | warnings.warn('Zero stopping requires %s available!' % | ||
66 | (self.monitor), RuntimeWarning) | ||
67 | |||
68 | if self.monitor_op(current, self.thresh): | ||
69 | self.best = current | ||
70 | self.model.stop_training = True | ||
71 | |||
28 | #batch_size = 16 | 72 | #batch_size = 16 |
29 | #original_dim = 784 | 73 | #original_dim = 784 |
30 | #latent_dim = 2 | 74 | #latent_dim = 2 |
31 | #intermediate_dim = 128 | 75 | #intermediate_dim = 128 |
32 | #epsilon_std = 0.01 | 76 | #epsilon_std = 0.01 |
33 | #nb_epoch = 40 | 77 | #nb_epoch = 40 |
34 | 78 | ||
35 | 79 | ||
36 | 80 | ||
37 | 81 | ||
38 | def train_vae(x_train,x_dev,x_test,y_train=None,y_dev=None,y_test=None,hidden_size=80,latent_dim=12,batch_size=8,nb_epochs=10,sgd="rmsprop",input_activation = "relu",output_activation = "sigmoid",epsilon_std=0.01): | 82 | def train_vae(x_train,x_dev,x_test,y_train=None,y_dev=None,y_test=None,hidden_size=80,latent_dim=12,batch_size=8,nb_epochs=10,sgd="rmsprop",input_activation = "relu",output_activation = "sigmoid",epsilon_std=0.01): |
39 | 83 | ||
40 | 84 | ||
41 | 85 | ||
42 | def sampling(args): | 86 | def sampling(args): |
43 | z_mean, z_log_std = args | 87 | z_mean, z_log_std = args |
44 | epsilon = K.random_normal(shape=(batch_size, latent_dim), | 88 | epsilon = K.random_normal(shape=(batch_size, latent_dim), |
45 | mean=0., std=epsilon_std) | 89 | mean=0., std=epsilon_std) |
46 | return z_mean + K.exp(z_log_std) * epsilon | 90 | return z_mean + K.exp(z_log_std) * epsilon |
47 | 91 | ||
48 | def vae_loss(x, x_decoded_mean): | 92 | def vae_loss(x, x_decoded_mean): |
49 | xent_loss = objectives.binary_crossentropy(x, x_decoded_mean) | 93 | xent_loss = objectives.binary_crossentropy(x, x_decoded_mean) |
50 | kl_loss = - 0.5 * K.mean(1 + z_log_std - K.square(z_mean) - K.exp(z_log_std), axis=-1) | 94 | kl_loss = - 0.5 * K.mean(1 + z_log_std - K.square(z_mean) - K.exp(z_log_std), axis=-1) |
51 | return xent_loss + kl_loss | 95 | return xent_loss + kl_loss |
52 | 96 | ||
53 | original_dim = x_train.shape[1] | 97 | original_dim = x_train.shape[1] |
54 | 98 | ||
55 | 99 | ||
56 | x = Input(batch_shape=(batch_size, original_dim)) | 100 | x = Input(batch_shape=(batch_size, original_dim)) |
57 | h = Dense(hidden_size, activation=input_activation)(x) | 101 | h = Dense(hidden_size, activation=input_activation)(x) |
58 | z_mean = Dense(latent_dim)(h) | 102 | z_mean = Dense(latent_dim)(h) |
59 | z_log_std = Dense(latent_dim)(h) | 103 | z_log_std = Dense(latent_dim)(h) |
60 | 104 | ||
61 | 105 | ||
62 | # note that "output_shape" isn't necessary with the TensorFlow backend | 106 | # note that "output_shape" isn't necessary with the TensorFlow backend |
63 | # so you could write `Lambda(sampling)([z_mean, z_log_std])` | 107 | # so you could write `Lambda(sampling)([z_mean, z_log_std])` |
64 | z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_std]) | 108 | z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_std]) |
65 | 109 | ||
66 | # we instantiate these layers separately so as to reuse them later | 110 | # we instantiate these layers separately so as to reuse them later |
67 | decoder_h = Dense(hidden_size, activation=input_activation) | 111 | decoder_h = Dense(hidden_size, activation=input_activation) |
68 | decoder_mean = Dense(original_dim, activation=output_activation) | 112 | decoder_mean = Dense(original_dim, activation=output_activation) |
69 | h_decoded = decoder_h(z) | 113 | h_decoded = decoder_h(z) |
70 | x_decoded_mean = decoder_mean(h_decoded) | 114 | x_decoded_mean = decoder_mean(h_decoded) |
71 | 115 | ||
72 | 116 | ||
73 | vae = Model(x, x_decoded_mean) | 117 | vae = Model(x, x_decoded_mean) |
74 | vae.compile(optimizer=sgd, loss=vae_loss) | 118 | vae.compile(optimizer=sgd, loss=vae_loss) |
75 | 119 | ||
76 | # train the VAE on MNIST digits | 120 | # train the VAE on MNIST digits |
77 | if y_train is None or y_dev is None or y_test is None : | 121 | if y_train is None or y_dev is None or y_test is None : |
78 | y_train = x_train | 122 | y_train = x_train |
79 | y_dev = x_dev | 123 | y_dev = x_dev |
80 | y_test = x_test | 124 | y_test = x_test |
81 | 125 | ||
82 | vae.fit(x_train, y_train, | 126 | vae.fit(x_train, y_train, |
83 | shuffle=True, | 127 | shuffle=True, |
84 | nb_epoch=nb_epochs, | 128 | nb_epoch=nb_epochs, |
129 | verbose = 1, | ||
85 | batch_size=batch_size, | 130 | batch_size=batch_size, |
86 | validation_data=(x_dev, y_dev)) | 131 | validation_data=(x_dev, y_dev), |
132 | callbacks = [ZeroStopping(monitor='val_loss', thresh=0, verbose=0, mode='min')] | ||
133 | ) | ||
87 | 134 | ||
88 | # build a model to project inputs on the latent space | 135 | # build a model to project inputs on the latent space |
89 | encoder = Model(x, z_mean) | 136 | encoder = Model(x, z_mean) |
90 | pred_train = encoder.predict(x_train, batch_size=batch_size) | 137 | pred_train = encoder.predict(x_train, batch_size=batch_size) |
91 | pred_dev = encoder.predict(x_dev, batch_size=batch_size) | 138 | pred_dev = encoder.predict(x_dev, batch_size=batch_size) |
92 | pred_test = encoder.predict(x_test,batch_size=batch_size) | 139 | pred_test = encoder.predict(x_test,batch_size=batch_size) |
93 | return [ [ pred_train, pred_dev, pred_test ] ] | 140 | return [ [ pred_train, pred_dev, pred_test ] ] |
94 | # display a 2D plot of the digit classes in the latent space | 141 | # display a 2D plot of the digit classes in the latent space |
95 | #x_test_encoded = encoder.predict(x_test, batch_size=batch_size) | 142 | #x_test_encoded = encoder.predict(x_test, batch_size=batch_size) |
96 | # build a digit generator that can sample from the learned distribution | 143 | # build a digit generator that can sample from the learned distribution |
97 | #decoder_input = Input(shape=(latent_dim,)) | 144 | #decoder_input = Input(shape=(latent_dim,)) |
98 | #_h_decoded = decoder_h(decoder_input) | 145 | #_h_decoded = decoder_h(decoder_input) |
99 | #_x_decoded_mean = decoder_mean(_h_decoded) | 146 | #_x_decoded_mean = decoder_mean(_h_decoded) |
100 | #generator = Model(decoder_input, _x_decoded_mean) | 147 | #generator = Model(decoder_input, _x_decoded_mean) |
101 | #x_decoded = generator.predict(z_sample) | 148 | #x_decoded = generator.predict(z_sample) |
102 | 149 | ||
103 | 150 |