01a-mlp_proj.py
3.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# coding: utf-8
# In[2]:
# Import
import gensim
from scipy import sparse
import itertools
from sklearn import preprocessing
from keras.models import Sequential
from keras.optimizers import SGD,Adam
from keras.layers.advanced_activations import ELU,PReLU
from keras.callbacks import ModelCheckpoint
from mlp import *
import sklearn.metrics
from sklearn.preprocessing import LabelBinarizer
import shelve
import pickle
from utils import *
import sys
import os
import json
# In[4]:
infer_model=shelve.open("{}".format(sys.argv[2]))
in_dir = sys.argv[1]
#['ASR', 'TRS', 'LABEL']
# In[6]:
if len(sys.argv) > 4 :
features_key = sys.argv[4]
else :
features_key = "LDA"
save_projection = True
json_conf =json.load(open(sys.argv[3]))
ae_conf = json_conf["mlp_proj"]
hidden_size= ae_conf["hidden_size"]
input_activation = None
if ae_conf["input_activation"] == "elu":
print " ELU"
input_activation = PReLU()
else:
print " ELSE"
input_activation = ae_conf["input_activation"]
#input_activation=ae_conf["input_activation"]
output_activation=ae_conf["output_activation"]
loss=ae_conf["loss"]
epochs=ae_conf["epochs"]
batch_size=ae_conf["batch"]
patience=ae_conf["patience"]
dropouts=ae_conf["do"]
try:
k = ae_conf["sgd"]
if ae_conf["sgd"]["name"] == "adam":
sgd = Adam(lr=ae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
elif ae_conf["sgd"]["name"] == "sgd":
sgd = SGD(lr=ae_conf["sgd"]["lr"])
except:
sgd = ae_conf["sgd"]
mlp_conf = json_conf["mlp"]
mlp_h = mlp_conf["hidden_size"]
mlp_loss = mlp_conf["loss"]
mlp_dropouts = mlp_conf["do"]
mlp_epochs = mlp_conf["epochs"]
mlp_batch_size = mlp_conf["batch"]
mlp_input_activation=mlp_conf["input_activation"]
mlp_output_activation=mlp_conf["output_activation"]
try:
k = mlp_conf["sgd"]
if mlp_conf["sgd"]["name"] == "adam":
mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
elif mlp_conf["sgd"]["name"] == "sgd":
mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"])
except:
mlp_sgd = mlp_conf["sgd"]
name = json_conf["name"]
try :
os.mkdir("{}/{}".format(in_dir,name))
except OSError :
pass
db = shelve.open("{}/{}/labels.shelve".format(in_dir,name))
db["IDS"]=dict(infer_model["LABEL"])
#
keys = infer_model[features_key].keys()
LABELS = {}
for mod in keys :
int_labels_train = map(select,infer_model["LABEL"][mod]["TRAIN"])
binarizer = LabelBinarizer()
y_train=binarizer.fit_transform(int_labels_train)
y_dev=binarizer.transform(map(select,infer_model["LABEL"][mod]["DEV"]))
y_test=binarizer.transform(map(select,infer_model["LABEL"][mod]["TEST"]))
LABELS[mod]= { "TRAIN":y_train , "DEV" : y_dev, "TEST" : y_test}
sumary,proj = train_mlp_proj(infer_model[features_key][mod]["TRAIN"].todense(),y_train,
infer_model[features_key][mod]["DEV"].todense(),y_dev,
infer_model[features_key][mod]["TEST"].todense(),y_test,
hidden_size ,sgd=sgd,
epochs=epochs,
patience=patience,
batch_size=batch_size,
input_activation=input_activation,
output_activation=output_activation,
dropouts=dropouts,
fit_verbose=1)
with open("{}/{}/{}_sum.txt".format(in_dir,name,mod),"w") as output_sum :
print >>output_sum, sumary
for num_lvl,level in enumerate(proj):
print len(level)
for num,corp_type in enumerate(["TRAIN","DEV","TEST"]):
pd = pandas.DataFrame(level[num])
pd.to_hdf("{}/{}/MLP_proj_df.hdf".format(in_dir,name),"{}/lvl{}/{}".format(mod,num_lvl,corp_type))
db["LABEL"] = LABELS
db.sync()
db.close()