Commit d414b83e18cdc5d0313f6880349609082dc035c1

Authored by Killian
1 parent 7c16f9bfe8
Exists in master

add Botttleneck MLp + script

Showing 16 changed files with 719 additions and 0 deletions Inline Diff

BOTTLENECK/01a-mlp_proj.py
File was created 1
2 # coding: utf-8
3
4 # In[2]:
5
6 # Import
7 import gensim
8 from scipy import sparse
9 import itertools
10 from sklearn import preprocessing
11 from keras.models import Sequential
12 from keras.optimizers import SGD,Adam
13 from keras.layers.advanced_activations import ELU,PReLU
14 from keras.callbacks import ModelCheckpoint
15 from mlp import *
16 import sklearn.metrics
17 from sklearn.preprocessing import LabelBinarizer
18 import shelve
19 import pickle
20 from utils import *
21 import sys
22 import os
23 import json
24 # In[4]:
25
26 infer_model=shelve.open("{}".format(sys.argv[2]))
27 in_dir = sys.argv[1]
28 #['ASR', 'TRS', 'LABEL']
29 # In[6]:
30 if len(sys.argv) > 4 :
31 features_key = sys.argv[4]
32 else :
33 features_key = "LDA"
34 save_projection = True
35 json_conf =json.load(open(sys.argv[3]))
36 ae_conf = json_conf["mlp_proj"]
37
38 hidden_size= ae_conf["hidden_size"]
39 input_activation = None
40 if ae_conf["input_activation"] == "elu":
41 print " ELU"
42 input_activation = PReLU()
43 else:
44 print " ELSE"
45 input_activation = ae_conf["input_activation"]
46 #input_activation=ae_conf["input_activation"]
47 output_activation=ae_conf["output_activation"]
48 loss=ae_conf["loss"]
49 epochs=ae_conf["epochs"]
50 batch_size=ae_conf["batch"]
51 patience=ae_conf["patience"]
52 dropouts=ae_conf["do"]
53 try:
54 k = ae_conf["sgd"]
55 if ae_conf["sgd"]["name"] == "adam":
56 sgd = Adam(lr=ae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
57 elif ae_conf["sgd"]["name"] == "sgd":
58 sgd = SGD(lr=ae_conf["sgd"]["lr"])
59 except:
60 sgd = ae_conf["sgd"]
61
62 mlp_conf = json_conf["mlp"]
63 mlp_h = mlp_conf["hidden_size"]
64 mlp_loss = mlp_conf["loss"]
65 mlp_dropouts = mlp_conf["do"]
66 mlp_epochs = mlp_conf["epochs"]
67 mlp_batch_size = mlp_conf["batch"]
68 mlp_input_activation=mlp_conf["input_activation"]
69 mlp_output_activation=mlp_conf["output_activation"]
70
71 try:
72 k = mlp_conf["sgd"]
73 if mlp_conf["sgd"]["name"] == "adam":
74 mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
75 elif mlp_conf["sgd"]["name"] == "sgd":
76 mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"])
77 except:
78 mlp_sgd = mlp_conf["sgd"]
79
80
81 name = json_conf["name"]
82 try :
83 os.mkdir("{}/{}".format(in_dir,name))
84 except OSError :
85 pass
86 db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
87 db["IDS"]=dict(infer_model["LABEL"])
88 #
89 keys = infer_model[features_key].keys()
90 LABELS = {}
91 for mod in keys :
92
93 int_labels_train = map(select,infer_model["LABEL"][mod]["TRAIN"])
94 binarizer = LabelBinarizer()
95 y_train=binarizer.fit_transform(int_labels_train)
96 y_dev=binarizer.transform(map(select,infer_model["LABEL"][mod]["DEV"]))
97 y_test=binarizer.transform(map(select,infer_model["LABEL"][mod]["TEST"]))
98 LABELS[mod]= { "TRAIN":y_train , "DEV" : y_dev, "TEST" : y_test}
99 sumary,proj = train_mlp_proj(infer_model[features_key][mod]["TRAIN"].todense(),y_train,
100 infer_model[features_key][mod]["DEV"].todense(),y_dev,
101 infer_model[features_key][mod]["TEST"].todense(),y_test,
102 hidden_size ,sgd=sgd,
103 epochs=epochs,
104 patience=patience,
105 batch_size=batch_size,
106 input_activation=input_activation,
107 output_activation=output_activation,
108 dropouts=dropouts,
109 fit_verbose=1)
110 with open("{}/{}/{}_sum.txt".format(in_dir,name,mod),"w") as output_sum :
111 print >>output_sum, sumary
112 for num_lvl,level in enumerate(proj):
113 print len(level)
114 for num,corp_type in enumerate(["TRAIN","DEV","TEST"]):
115 pd = pandas.DataFrame(level[num])
116 pd.to_hdf("{}/{}/MLP_proj_df.hdf".format(in_dir,name),"{}/lvl{}/{}".format(mod,num_lvl,corp_type))
117 db["LABEL"] = LABELS
118 db.sync()
119 db.close()
120
BOTTLENECK/02a-mlp_score_on_BN.py
File was created 1
2 # coding: utf-8
3
4 # In[2]:
5
6 # Import
7 import gensim
8 from scipy import sparse
9 import itertools
10 from sklearn import preprocessing
11 from keras.models import Sequential
12 from keras.optimizers import SGD,Adam
13 from keras.layers.advanced_activations import ELU,PReLU
14 from keras.callbacks import ModelCheckpoint
15 from mlp import *
16 import sklearn.metrics
17 from sklearn.preprocessing import LabelBinarizer
18 import shelve
19 import pickle
20 from utils import *
21 import sys
22 import os
23 import json
24 # In[4]:
25
26 in_dir = sys.argv[1]
27 #['ASR', 'TRS', 'LABEL']
28 # In[6]:
29 json_conf =json.load(open(sys.argv[2]))
30
31 mlp_conf = json_conf["mlp"]
32 hidden_size = mlp_conf["hidden_size"]
33 loss = mlp_conf["loss"]
34 patience = mlp_conf["patience"]
35 dropouts = mlp_conf["do"]
36 epochs = mlp_conf["epochs"]
37 batch_size = mlp_conf["batch"]
38 input_activation=mlp_conf["input_activation"]
39 output_activation=mlp_conf["output_activation"]
40
41 try:
42 k = mlp_conf["sgd"]
43 if mlp_conf["sgd"]["name"] == "adam":
44 sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
45 elif mlp_conf["sgd"]["name"] == "sgd":
46 sgd = SGD(lr=mlp_conf["sgd"]["lr"])
47 except:
48 sgd = mlp_conf["sgd"]
49 name = json_conf["name"]
50
51 db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
52 shelve_logs=shelve.open("{}/{}/02a_logs.shelve".format(in_dir,name))
53
54 #
55 keys = db["LABEL"].keys()
56 proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name))
57 hdf_keys = proj_hdf.keys()
58 proj_hdf.close()
59 hdf_mods = set([ x.split("/")[1] for x in hdf_keys ])
60 hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ])
61 hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
62 print hdf_mods
63 print hdf_lvl
64 print hdf_crossval
65
66 hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name)
67 labels_dict = {"origine":{} }
68 logs = {}
69 for lvl in hdf_lvl :
70 labels_dict[lvl] = {}
71 for mod in hdf_mods:
72 labels_dict[lvl][mod] = {}
73
74 for mod in hdf_mods:
75 for lvl in hdf_lvl :
76 x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN"))
77 x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV"))
78 x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST"))
79 if x_train.shape[1] <= 8 :
80 labels_dict["origine"]["TRAIN"] = np.argmax(x_train.values,axis=1)
81 labels_dict["origine"]["DEV"] = np.argmax(x_dev.values,axis=1)
82 labels_dict["origine"]["TEST"] = np.argmax(x_test.values,axis=1)
83 continue
84 y_train = db["LABEL"][mod]["TRAIN"]
85 y_dev = db["LABEL"][mod]["DEV"]
86 y_test = db["LABEL"][mod]["TEST"]
87
88 print x_train.shape
89 print x_dev.shape
90 print x_test.shape
91 print y_train.shape
92 print y_dev.shape
93 print y_test.shape
94 pred,hist = train_mlp_pred(x_train.values,y_train,
95 x_dev.values,y_dev,
96 x_test.values,y_test,
97 hidden_size ,sgd=sgd,
98 epochs=epochs,
99 patience=patience,
100 batch_size=batch_size,
101 input_activation=input_activation,
102 output_activation=output_activation,
103 dropouts=dropouts,
104 fit_verbose=1)
105 shelve_logs["{}/{}".format(mod,lvl)] = hist
106 labels_dict[lvl][mod]["TRAIN"] = np.argmax(pred[0],axis=1)
107 labels_dict[lvl][mod]["DEV"] = np.argmax(pred[1],axis=1)
108 labels_dict[lvl][mod]["TEST"] = np.argmax(pred[2],axis=1)
109
110 for lvl in hdf_lvl:
111 db[lvl] = labels_dict[lvl]
112 shelve_logs.sync()
113 shelve_logs.close()
114 db.sync()
115 db.close()
116
BOTTLENECK/02b-transfert_ae.py
File was created 1
2 # coding: utf-8
3
4 # In[2]:
5
6 # Import
7 import gensim
8 from scipy import sparse
9 import itertools
10 from sklearn import preprocessing
11 from keras.models import Sequential
12 from keras.optimizers import SGD,Adam
13 from keras.layers.advanced_activations import ELU,PReLU
14 from keras.callbacks import ModelCheckpoint
15 from mlp import *
16 import pandas as pd
17 import sklearn.metrics
18 from sklearn.preprocessing import LabelBinarizer
19 import shelve
20 import pickle
21 from utils import *
22 import sys
23 import os
24 import json
25 # In[4]:
26
27 in_dir = sys.argv[1]
28 #['ASR', 'TRS', 'LABEL']
29 # In[6]:
30 json_conf =json.load(open(sys.argv[2]))
31
32 mlp_conf = json_conf["transfert"]
33 hidden_size = mlp_conf["hidden_size"]
34 loss = mlp_conf["loss"]
35 patience = mlp_conf["patience"]
36 dropouts = mlp_conf["do"]
37 epochs = mlp_conf["epochs"]
38 batch_size = mlp_conf["batch"]
39 input_activation=mlp_conf["input_activation"]
40 output_activation=mlp_conf["output_activation"]
41
42 try:
43 k = mlp_conf["sgd"]
44 if mlp_conf["sgd"]["name"] == "adam":
45 sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
46 elif mlp_conf["sgd"]["name"] == "sgd":
47 sgd = SGD(lr=mlp_conf["sgd"]["lr"])
48 except:
49 sgd = mlp_conf["sgd"]
50 name = json_conf["name"]
51
52 #
53 proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name))
54 hdf_keys = proj_hdf.keys()
55 proj_hdf.close()
56 hdf_mods = set([ x.split("/")[1] for x in hdf_keys ])
57 hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ])
58 hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
59 print hdf_mods
60 print hdf_lvl
61 print hdf_crossval
62
63 hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name)
64 transfert_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name)
65 mod1,mod2 = "ASR","TRS"
66 for lvl in hdf_lvl :
67 x_train_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TRAIN"))
68 x_dev_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"DEV"))
69 x_test_ASR = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod1,lvl,"TEST"))
70 x_train_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TRAIN"))
71 x_dev_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"DEV"))
72 x_test_TRS = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod2,lvl,"TEST"))
73
74 if x_train_ASR.shape[1] <= 8 :
75 continue
76
77 pred = train_ae(x_train_ASR.values,
78 x_dev_ASR.values,
79 x_test_ASR.values,
80 hidden_size ,sgd=sgd,
81 y_train=x_train_TRS.values,
82 y_dev=x_dev_TRS.values,
83 y_test=x_test_TRS.values,
84 epochs=epochs,
85 patience=patience,
86 batch_size=batch_size,
87 input_activation=input_activation,
88 output_activation=output_activation,
89 dropouts=dropouts,
90 best_mod=True,
91 verbose=1)
92 for num_layer,layer in enumerate(pred):
93 transfert_train = pd.DataFrame(layer[0])
94 transfert_dev = pd.DataFrame(layer[1])
95 transfert_test = pd.DataFrame(layer[2])
96 transfert_train.to_hdf(transfert_proj_path,"{}/{}/TRAIN".format(lvl,"layer"+str(num_layer)))
97 transfert_dev.to_hdf(transfert_proj_path,"{}/{}/DEV".format(lvl,"layer"+str(num_layer)))
98 transfert_test.to_hdf(transfert_proj_path,"{}/{}/TEST".format(lvl,"layer"+str(num_layer)))
99
100
BOTTLENECK/02c-tsne_mlproj.py
File was created 1
2 # coding: utf-8
3
4 # In[2]:
5
6 # Import
7 import gensim
8 from scipy import sparse
9 import itertools
10 from sklearn import preprocessing
11 from keras.models import Sequential
12 from keras.optimizers import SGD,Adam
13 from keras.layers.advanced_activations import ELU,PReLU
14 from keras.callbacks import ModelCheckpoint
15 from mlp import *
16 import pandas as pd
17 import sklearn.metrics
18 from sklearn.preprocessing import LabelBinarizer
19 from sklearn.manifold import TSNE
20 import shelve
21 import pickle
22 from utils import *
23 import sys
24 import os
25 import json
26 # In[4]:
27
28 in_dir = sys.argv[1]
29 #['ASR', 'TRS', 'LABEL']
30 # In[6]:
31 json_conf =json.load(open(sys.argv[2]))
32
33 mlp_conf = json_conf["transfert"]
34 hidden_size = mlp_conf["hidden_size"]
35 loss = mlp_conf["loss"]
36 patience = mlp_conf["patience"]
37 dropouts = mlp_conf["do"]
38 epochs = mlp_conf["epochs"]
39 batch_size = mlp_conf["batch"]
40 input_activation=mlp_conf["input_activation"]
41 output_activation=mlp_conf["output_activation"]
42
43 try:
44 k = mlp_conf["sgd"]
45 if mlp_conf["sgd"]["name"] == "adam":
46 sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
47 elif mlp_conf["sgd"]["name"] == "sgd":
48 sgd = SGD(lr=mlp_conf["sgd"]["lr"])
49 except:
50 sgd = mlp_conf["sgd"]
51 name = json_conf["name"]
52
53 #
54 print " MLP"
55 proj_hdf = pandas.HDFStore("{}/{}/MLP_proj_df.hdf".format(in_dir,name))
56 hdf_keys = proj_hdf.keys()
57 proj_hdf.close()
58 hdf_mods = set([ x.split("/")[1] for x in hdf_keys ])
59 hdf_lvl = set( [ x.split("/")[2] for x in hdf_keys ])
60 hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
61 print hdf_mods
62 print hdf_lvl
63 print hdf_crossval
64
65 hdf_proj_path = "{}/{}/MLP_proj_df.hdf".format(in_dir,name)
66 tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name)
67 for mod in hdf_mods:
68 for lvl in hdf_lvl :
69 x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TRAIN"))
70 x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"DEV"))
71 x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(mod,lvl,"TEST"))
72
73 if x_train.shape[1] <= 8 :
74 continue
75 tsne= TSNE()
76 tsne_train=tsne.fit_transform(x_train.values)
77 pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TRAIN"))
78 tsne= TSNE()
79 tsne_dev=tsne.fit_transform(x_dev.values)
80 pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"DEV"))
81 tsne= TSNE()
82 tsne_test=tsne.fit_transform(x_test.values)
83 pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"TEST"))
84 tsne = TSNE()
85 tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values)
86 pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="MLP/{}/{}/{}".format(mod,lvl,"CONCAT"))
87
88 print " TRANSFERT"
89
90 hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name)
91 proj_hdf = pandas.HDFStore(hdf_proj_path)
92 print proj_hdf
93 hdf_keys = proj_hdf.keys()
94 proj_hdf.close()
95 print hdf_keys
96 hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ])
97 hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ])
98 hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
99 print hdf_lvl
100 print hdf_layer
101 print hdf_crossval
102
103 tsne_proj_path = "{}/{}/tsne_proj_df.hdf".format(in_dir,name)
104 for lvl in hdf_lvl :
105 for layer in hdf_layer:
106 x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN"))
107 x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV"))
108 x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TEST"))
109
110 if x_train.shape[1] <= 8 :
111 continue
112 tsne= TSNE()
113 tsne_train=tsne.fit_transform(x_train.values)
114 pd.DataFrame(tsne_train).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TRAIN"))
115 tsne= TSNE()
116 tsne_dev=tsne.fit_transform(x_dev.values)
117 pd.DataFrame(tsne_dev).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"DEV"))
118 tsne= TSNE()
119 tsne_test=tsne.fit_transform(x_test.values)
120 pd.DataFrame(tsne_test).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"TEST"))
121 tsne = TSNE()
122 tsne_all = tsne.fit_transform(pd.concat([x_train,x_dev,x_test]).values)
123 pd.DataFrame(tsne_all).to_hdf(tsne_proj_path,key="transfert/{}/{}/{}".format(mod,lvl,"CONCAT"))
124
BOTTLENECK/03-mlp_score_on_transfert.py
File was created 1
2 # coding: utf-8
3
4 # In[2]:
5
6 # Import
7 import gensim
8 from scipy import sparse
9 import itertools
10 from sklearn import preprocessing
11 from keras.models import Sequential
12 from keras.optimizers import SGD,Adam
13 from keras.layers.advanced_activations import ELU,PReLU
14 from keras.callbacks import ModelCheckpoint
15 from mlp import *
16 import sklearn.metrics
17 from sklearn.preprocessing import LabelBinarizer
18 import shelve
19 import pickle
20 from utils import *
21 import sys
22 import os
23 import json
24 # In[4]:
25
26 in_dir = sys.argv[1]
27 #['ASR', 'TRS', 'LABEL']
28 # In[6]:
29 json_conf =json.load(open(sys.argv[2]))
30
31 mlp_conf = json_conf["mlp"]
32 hidden_size = mlp_conf["hidden_size"]
33 loss = mlp_conf["loss"]
34 patience = mlp_conf["patience"]
35 dropouts = mlp_conf["do"]
36 epochs = mlp_conf["epochs"]
37 batch_size = mlp_conf["batch"]
38 input_activation=mlp_conf["input_activation"]
39 output_activation=mlp_conf["output_activation"]
40
41 try:
42 k = mlp_conf["sgd"]
43 if mlp_conf["sgd"]["name"] == "adam":
44 sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
45 elif mlp_conf["sgd"]["name"] == "sgd":
46 sgd = SGD(lr=mlp_conf["sgd"]["lr"])
47 except:
48 sgd = mlp_conf["sgd"]
49 name = json_conf["name"]
50
51 db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
52 shelve_logs=shelve.open("{}/{}/03_logs.shelve".format(in_dir,name),writeback=True)
53
54 #
55 keys = db["LABEL"].keys()
56
57 hdf_proj_path = "{}/{}/transfert_proj_df.hdf".format(in_dir,name)
58 proj_hdf = pandas.HDFStore(hdf_proj_path)
59 hdf_keys = proj_hdf.keys()
60 print hdf_keys
61 proj_hdf.close()
62 hdf_lvl = set([ x.split("/")[1] for x in hdf_keys ])
63 hdf_layer = set( [ x.split("/")[2] for x in hdf_keys ])
64 hdf_crossval = set([ x.split("/")[3] for x in hdf_keys ])
65 print hdf_lvl
66 print hdf_crossval
67
68 labels_dict = { }
69 logs = {}
70 for lvl in hdf_lvl :
71 labels_dict[lvl] = {}
72 for layer in hdf_layer:
73 labels_dict[lvl][layer] = {}
74
75 for lvl in hdf_lvl :
76 for layer in hdf_layer:
77 x_train = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"TRAIN"))
78 x_dev = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer,"DEV"))
79 x_test = pandas.read_hdf(hdf_proj_path,key="/{}/{}/{}".format(lvl,layer, "TEST"))
80
81 y_train = db["LABEL"]["ASR"]["TRAIN"]
82 y_dev = db["LABEL"]["ASR"]["DEV"]
83 y_test = db["LABEL"]["ASR"]["TEST"]
84
85 print x_train.shape
86 print x_dev.shape
87 print x_test.shape
88 print y_train.shape
89 print y_dev.shape
90 print y_test.shape
91 pred,hist = train_mlp_pred(x_train.values,y_train,
92 x_dev.values,y_dev,
93 x_test.values,y_test,
94 hidden_size ,sgd=sgd,
95 epochs=epochs,
96 patience=patience,
97 batch_size=batch_size,
98 input_activation=input_activation,
99 output_activation=output_activation,
100 dropouts=dropouts,
101 fit_verbose=1)
102 shelve_logs["{}/{}".format(lvl,layer)] = hist
103 labels_dict[lvl][layer]["TRAIN"] = np.argmax(pred[0],axis=1)
104 labels_dict[lvl][layer]["DEV"] = np.argmax(pred[1],axis=1)
105 labels_dict[lvl][layer]["TEST"] = np.argmax(pred[2],axis=1)
106
107 db["transfert"] = labels_dict
108 shelve_logs.sync()
109 shelve_logs.close()
110 db.sync()
111 db.close()
112
BOTTLENECK/04-accuracyscore.py
File was created 1
2 # coding: utf-8
3
4 # In[2]:
5
6 # Import
7 import gensim
8 from scipy import sparse
9 import numpy as np
10 import itertools
11 from sklearn import preprocessing
12 from keras.models import Sequential
13 from keras.optimizers import SGD,Adam
14 from keras.layers.advanced_activations import ELU,PReLU
15 from keras.callbacks import ModelCheckpoint
16 from mlp import *
17 from sklearn import metrics
18 from sklearn.preprocessing import LabelBinarizer
19 import shelve
20 import pickle
21 from utils import *
22 import sys
23 import os
24 import json
25
26 # In[4]:
27
28 in_dir = sys.argv[1]
29 #['ASR', 'TRS', 'LABEL']
30 # In[6]:
31 json_conf =json.load(open(sys.argv[2]))
32
33 name = json_conf["name"]
34
35 db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
36 #
37 keys = sorted(db.keys())
38 keys.remove("IDS")
39 keys.remove("transfert")
40 keys.remove("LABEL")
41 mods = ["ASR", "TRS"]
42 ref_train = db["LABEL"]["ASR"]["TRAIN"]
43 ref_dev = db["LABEL"]["ASR"]["DEV"]
44 ref_test = db["LABEL"]["ASR"]["TEST"]
45
46 print "name,MOD,level,train,dev,test"
47 for mod in mods :
48 for lvl in keys :
49 if "TEST" in db[lvl][mod] :
50 train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db[lvl][mod]["TRAIN"])
51 dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db[lvl][mod]["DEV"])
52 test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db[lvl][mod]["TEST"])
53 else :
54 train_score = "ERROR"
55 dev_score = "ERROR"
56 test_score = "ERROR"
57 print ",".join([name,mod, lvl, str(train_score), str(dev_score) , str(test_score)])
58
59 for level in db["transfert"].keys() :
60 for layer in db["transfert"][level].keys():
61 if "TRAIN" in db["transfert"][level][layer].keys():
62
63 train_score = metrics.accuracy_score(np.argmax(ref_train,axis=1),db["transfert"][level][layer]["TRAIN"])
64 dev_score = metrics.accuracy_score(np.argmax(ref_dev,axis=1),db["transfert"][level][layer]["DEV"])
65 test_score = metrics.accuracy_score(np.argmax(ref_test,axis=1),db["transfert"][level][layer]["TEST"])
66 print ",".join([name,"transfert",level+"/"+layer, str(train_score), str(dev_score) , str(test_score)])
67
68 db.close()
69
File was created 1 ../LDA/mlp.py
BOTTLENECK/run01_do_alljson.sh
File was created 1 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0.json RAW
2 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L0do.json RAW
3 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1.json RAW
4 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L1do.json RAW
5 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2.json RAW
6 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L2do.json RAW
7 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3.json RAW
8 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 01a-mlp_proj.py output_1 sparse_tfidf.shelve output_1/L3do.json RAW
9
BOTTLENECK/run02_mlpscore.sh
File was created 1 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0.json
2 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L0do.json
3 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1.json
4 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L1do.json
5 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2.json
6 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L2do.json
7 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3.json
8 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 02a-mlp_score_on_BN.py output_1 output_1/L3do.json
9
10
11
12
BOTTLENECK/run02b-transfert.sh
File was created 1 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0.json
2 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L0do.json
3 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1.json
4 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L1do.json
5 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2.json
6 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L2do.json
7 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3.json
8 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02b-transfert_ae.py output_1 output_1/L3do.json
9
BOTTLENECK/run03_tsne_MLPtransfert.sh
File was created 1 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0.json
2 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L0do.json
3 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1.json
4 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L1do.json
5 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2.json
6 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L2do.json
7 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3.json
8 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 02c-tsne_mlproj.py output_1 output_1/L3do.json
9
BOTTLENECK/run04-mlp_on_transfert.sh
File was created 1 #THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1.json
2 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L1do.json
3 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2.json
4 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L2do.json
5 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3.json
6 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L3do.json
7
8 #THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0.json
9
10 #THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 03-mlp_score_on_transfert.py output_1 output_1/L0do.json
11
BOTTLENECK/run05_accuracy.sh
File was created 1 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1.json
2 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L1do.json
3 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2.json
4 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L2do.json
5 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3.json
6 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L3do.json
7 THEANO_FLAGS=mode=FAST_RUN,device=gpu0,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0.json
8 THEANO_FLAGS=mode=FAST_RUN,device=gpu1,floatX=float32 python 04-accuracyscore.py output_1 output_1/L0do.json
9
BOTTLENECK/run_all.sh
File was created 1 bash run_one.sh output_3 output_3/L0do.json gpu0 &
2 bash run_one.sh output_3 output_3/L0.json gpu1 &
3 bash run_one.sh output_3 output_3/L1do.json gpu0 &
4 bash run_one.sh output_3 output_3/L1.json gpu1 &
5 wait
6 bash run_one.sh output_3 output_3/L2do.json gpu0 &
7 bash run_one.sh output_3 output_3/L2.json gpu1 &
8 bash run_one.sh output_3 output_3/L3bndo.json gpu0 &
9 bash run_one.sh output_3 output_3/L3ce1.json gpu1 &
10 wait
11 bash run_one.sh output_3 output_3/L3ce.json gpu0 &
12 bash run_one.sh output_3 output_3/L3do.json gpu1 &
13 bash run_one.sh output_3 output_3/L3.json gpu0 &
14 bash run_one.sh output_3 output_3/L3sigmo.json gpu1 &
15 wait
16 bash run_one.sh output_3 output_3/L4do.json gpu0 &
17 bash run_one.sh output_3 output_3/L5do.json gpu1 &
18 bash run_one.sh output_3 output_3/L6do.json gpu0 &
19 bash run_one.sh output_3 output_3/L7do.json gpu1 &
20 wait
21 bash run_one.sh output_3 output_3/MaxMLP.json gpu0 &
22 bash run_one.sh output_3 output_3/MinMLP.json gpu1 &
23
BOTTLENECK/run_one.sh
File was created 1 bn=$(basename $2)
2 time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 01a-mlp_proj.py $1 Sparse_tfidf2.shelve $2 RAW) 2>> logs/${bn}_time ; echo MLP_$2 >> logs/${bn}_time
3 THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02a-mlp_score_on_BN.py $1 $2
4 THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02b-transfert_ae.py $1 $2
5 THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 02c-tsne_mlproj.py $1 $2
6 time (THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 03-mlp_score_on_transfert.py $1 $2) 2>> logs/${bn}_time ; echo transfert_$2 >> logs/${bn}_time
7 THEANO_FLAGS=mode=FAST_RUN,device=$3,floatX=float32 python 04-accuracyscore.py $1 $2 >> $1/res.csv
8
File was created 1 ../utils.py