Killian / decodopr

Browse Code »

Commit d1012a7a1689588ac0d1e4a716497562663c14c2

Authored by Killian 2016-09-23 08:59:47 +0200

1 parent ee9023b1c9

Exists in master

update LDA/.py

Showing 7 changed files with 8 additions and 289 deletions Inline Diff

LDA/00-mmf_make_features.py
LDA/02-lda_split.py
LDA/02b-lda_order.py
LDA/04b-mini_ae.py
LDA/04e-mm_vae.py
LDA/run.sh
LDA/vae.py

LDA/00-mmf_make_features.py

Diff comments View file @ d1012a7

 import sys
 import os
 import pandas
 import numpy
 import shelve
 from sklearn.preprocessing import LabelBinarizer
 from utils import select_mmf as select
 input_dir = sys.argv[1] # Dossier de premire niveau contient ASR et TRS
 level = sys.argv[2] # taille de LDA ( -5) voulu
 output_dir = sys.argv[3]
 lb=LabelBinarizer()
 #y_train=lb.fit_transform([utils.select(ligneid) for ligneid in origin_corps["LABEL"]["TRAIN"]])
 data = shelve.open("{}/mmf_{}.shelve".format(output_dir,level),writeback=True)
 data["LABEL"]= {}
 data["LDA"] = {"ASR":{},"TRS":{}}
 for mod in ["ASR", "TRS" ]:
-    train = pandas.read_table("{}/{}/train_{}.ssv".format(input_dir, mod, level), sep=" ", header=None )
+    train = pandas.read_table("{}/{}/train_{}.tab".format(input_dir, mod, level), sep=" ", header=None )
-    dev = pandas.read_table("{}/{}/dev_{}.ssv".format(input_dir, mod, level), sep=" ", header=None )
+    dev = pandas.read_table("{}/{}/dev_{}.tab".format(input_dir, mod, level), sep=" ", header=None )
-    test = pandas.read_table("{}/{}/test_{}.ssv".format(input_dir, mod, level), sep=" ", header=None )
+    test = pandas.read_table("{}/{}/test_{}.tab".format(input_dir, mod, level), sep=" ", header=None )
     y_train = train.iloc[:,0].apply(select)
     y_dev = dev.iloc[:,0].apply(select)
     y_test = test.iloc[:,0].apply(select)
     lb.fit(y_train)
     data["LABEL"][mod]={"TRAIN":lb.transform(y_train),"DEV":lb.transform(y_dev), "TEST": lb.transform(y_test)}
    # data["LDA"][mod]={'ASR':[]}
-    print data["LDA"][mod]
     print train.values
     data["LDA"][mod]["TRAIN"]=train.iloc[:,1:-1].values
     data["LDA"][mod]["DEV"]=dev.iloc[:,1:-1].values
     data["LDA"][mod]["TEST"]=test.iloc[:,1:-1].values
+    print data["LDA"][mod]["TRAIN"].shape
 data.sync()
 data.close()

LDA/02-lda_split.py

View file @ d1012a7

1	import gensim		File was deleted
2	import os
3	import sys
4	import pickle
5	from gensim.models.ldamodel import LdaModel
6	from gensim.models.ldamulticore import LdaMulticore
7	from collections import Counter
8	import numpy as np
9	import codecs
10	import shelve
11	import logging
12
13	def calc_perp(in_dir,train):
14	name = in_dir.split("/")[-1]
15	# s40_it1_sw50_a0.01_e0.1_p6_c1000
16	sw_size = int(name.split("_")[2][2:])
17
18	logging.warning(" go {} ".format(name))
19
20
21	logging.warning("Redo Vocab and stop")
22	asr_count=Counter([ x for y in train["ASR_wid"]["TRAIN"] for x in y])
23	trs_count=Counter([ x for y in train["TRS_wid"]["TRAIN"] for x in y])
24	asr_sw = [ x[0] for x in asr_count.most_common(sw_size) ]
25	trs_sw = [ x[0] for x in trs_count.most_common(sw_size) ]
26	stop_words=set(asr_sw) \| set(trs_sw)
27
28	logging.warning("TRS to be done")
29	entry = Query()
30	value=db.search(entry.name == name)
31	if len(value) > 0 :
32	logging.warning("{} already done".format(name))
33	return
34
35	dev_trs=[ [ (x,y) for x,y in Counter(z).items() if x not in stop_words] for z in train["TRS_wid"]["DEV"]]
36	lda_trs = LdaModel.load("{}/lda_trs.model".format(in_dir))
37	perp_trs = lda_trs.log_perplexity(dev_trs)
38	logging.warning("ASR to be done")
39	dev_asr = [ [ (x,y) for x,y in Counter(z).items() if x not in stop_words] for z in train["ASR_wid"]["DEV"]]
40	lda_asr = LdaModel.load("{}/lda_asr.model".format(in_dir))
41	perp_asr = lda_asr.log_perplexity(dev_asr)
42	logging.warning("ASR saving")
43	res_dict = {"name" : name, "asr" : perp_asr, "trs" : perp_trs}
44	return res_dict
45
46
47
48
49	def train_lda(out_dir,train,name,size,it,sw_size,alpha,eta,passes,chunk):
50	output_dir = "{}/s{}_it{}_sw{}_a{}_e{}_p{}_c{}".format(out_dir,size,it,sw_size,alpha,eta,passes,chunk)
51	os.mkdir(output_dir)
52	logging.info(output_dir+" to be done")
53	asr_count=Counter([ x for y in train["ASR_wid"]["TRAIN"] for x in y])
54	trs_count=Counter([ x for y in train["TRS_wid"]["TRAIN"] for x in y])
55	asr_sw = [ x[0] for x in asr_count.most_common(sw_size) ]
56	trs_sw = [ x[0] for x in trs_count.most_common(sw_size) ]
57	stop_words=set(asr_sw) \| set(trs_sw)
58
59	logging.info("TRS to be done")
60
61	lda_trs = LdaModel(corpus=[ [ (x,y) for x,y in Counter(z).items() if x not in stop_words] for z in train["TRS_wid"]["TRAIN"]], id2word=train["vocab"], num_topics=int(size), chunksize=1000,iterations=it)
62
63	logging.info("ASR to be done")
64	lda_asr = LdaModel(corpus=[ [ (x,y) for x,y in Counter(z).items() if x not in stop_words] for z in train["ASR_wid"]["TRAIN"]], id2word=train["vocab"], num_topics=int(size), chunksize=1000,iterations=it)
65
66	#logger.info("ASR saving")
67	#lda_asr.save("{}/lda_asr.model".format(output_dir,name,size,it))
68	#lda_trs.save("{}/lda_trs.model".format(output_dir,name,size,it))
69
70
71	out_file_asr=codecs.open("{}/asr_wordTopic.txt".format(output_dir),"w","utf-8")
72	out_file_trs=codecs.open("{}/trs_wordTopic.txt".format(output_dir),"w","utf-8")
73
74	dico = train["vocab"]
75	print >>out_file_asr, ",\t".join( [ dico[x] for x in range(len(train["vocab"]))])
76	for line in lda_asr.expElogbeta:
77	nline = line / np.sum(line)
78	print >>out_file_asr, ",\t".join( str(x) for x in nline)
79	out_file_asr.close()
80
81	print >>out_file_trs, ",\t".join( [ dico[x] for x in range(len(train["vocab"]))])
82	for line in lda_trs.expElogbeta:
83	nline = line / np.sum(line)
84	print >>out_file_trs, ",\t".join( str(x) for x in nline)
85	out_file_trs.close()
86
87	K = lda_asr.num_topics
88	topicWordProbMat = lda_asr.print_topics(K,10)
89	out_file_asr=codecs.open("{}/asr_best10.txt".format(output_dir),"w","utf-8")
90	for i in topicWordProbMat:
91	print >>out_file_asr,i
92	out_file_asr.close()
93
94	K = lda_trs.num_topics
95	topicWordProbMat = lda_trs.print_topics(K,10)
96	out_file_trs=codecs.open("{}/trs_best10.txt".format(output_dir),"w","utf-8")
97	for i in topicWordProbMat:
98	print >>out_file_trs,i
99	out_file_trs.close()
100
101	if __name__ == "__main__":
102	logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.WARNING)
103
104	input_shelve = sys.argv[1]
105	output_dir = sys.argv[2]
106	size = [ int(x) for x in sys.argv[3].split("_")]
107	workers = int(sys.argv[4])
108	name = sys.argv[5]
109	it = [ int(x) for x in sys.argv[6].split("_")]
110	sw_size = [ int(x) for x in sys.argv[7].split("_")]
111	alpha = ["auto" , "symmetric"] + [ float(x) for x in sys.argv[8].split("_")]
112	eta = ["auto"] + [ float(x) for x in sys.argv[9].split("_")]
113	passes = [ int(x) for x in sys.argv[10].split("_")]
114	chunk = [ int(x) for x in sys.argv[11].split("_")]
115
116	#train=pickle.load(open("{}/newsgroup_bow_train.pk".format(input_dir)))
117	train = shelve.open(input_shelve)
118	out_dir = "{}/{}".format(output_dir,name)
119	os.mkdir(out_dir)
120
121	for s in size:
122	for i in it :
123	for sw in sw_size:
124	for a in alpha:
125	for e in eta:
126	for p in passes:
127	for c in chunk:
128	train_lda(out_dir,train,name,s,i,sw,a,e,p,c)
129		1	import gensim
130		2	import os

LDA/02b-lda_order.py

View file @ d1012a7

1	import gensim		File was deleted
2	import os
3	import sys
4	import pickle
5	from gensim.models.ldamodel import LdaModel
6	from gensim.models.ldamulticore import LdaMulticore
7	from collections import Counter
8	import numpy as np
9	import codecs
10	import shelve
11	import logging
12	import dill
13	from tinydb import TinyDB, where, Query
14	import time
15	from joblib import Parallel, delayed
16
17	def calc_perp(models,train):
18
19
20	stop_words=models[1]
21	name = models[0]
22
23	logging.warning(" go {} ".format(name))
24	logging.warning("TRS to be done")
25	entry = Query()
26	value=db.search(entry.name == name)
27	if len(value) > 0 :
28	logging.warning("{} already done".format(name))
29	return
30
31	dev_trs=[ [ (x,y) for x,y in Counter(z).items() if x not in stop_words] for z in train["TRS_wid"]["DEV"]]
32	lda_trs = models[2]
33	perp_trs = lda_trs.log_perplexity(dev_trs)
34
35	logging.warning("ASR to be done")
36	dev_asr = [ [ (x,y) for x,y in Counter(z).items() if x not in stop_words] for z in train["ASR_wid"]["DEV"]]
37	lda_asr = models[5]
38	perp_asr = lda_asr.log_perplexity(dev_asr)
39	logging.warning("ASR saving")
40	res_dict = {"name" : name, "asr" : perp_asr, "trs" : perp_trs }
41	return res_dict
42
43
44
45
46	def train_lda(out_dir,train,size,it,sw_size,alpha,eta,passes,chunk):
47	name = "s{}_it{}_sw{}_a{}_e{}_p{}_c{}".format(size,it,sw_size,alpha,eta,passes,chunk)
48	logging.warning(name)
49	deep_out_dir = out_dir+"/"+name
50	if os.path.isdir(deep_out_dir):
51	logging.error(name+" already done")
52	return
53	logging.warning(name+" to be done")
54	asr_count=Counter([ x for y in train["ASR_wid"]["TRAIN"] for x in y])
55	trs_count=Counter([ x for y in train["TRS_wid"]["TRAIN"] for x in y])
56	asr_sw = [ x[0] for x in asr_count.most_common(sw_size) ]
57	trs_sw = [ x[0] for x in trs_count.most_common(sw_size) ]
58	stop_words=set(asr_sw) \| set(trs_sw)
59
60	logging.warning("TRS to be done")
61
62	lda_trs = LdaModel(corpus=[ [ (x,y) for x,y in Counter(z).items() if x not in stop_words] for z in train["TRS_wid"]["TRAIN"]], id2word=train["vocab"], num_topics=int(size), chunksize=chunk,iterations=it,alpha=alpha,eta=eta,passes=passes)
63
64	logging.warning("ASR to be done")
65	lda_asr = LdaModel(corpus=[ [ (x,y) for x,y in Counter(z).items() if x not in stop_words] for z in train["ASR_wid"]["TRAIN"]], id2word=train["vocab"], num_topics=int(size), chunksize=chunk,iterations=it,alpha=alpha,eta=eta,passes=passes)
66
67	dico = train["vocab"]
68	word_list = [ dico[x] for x in range(len(train["vocab"]))]
69	asr_probs = []
70	for line in lda_asr.expElogbeta:
71	nline = line / np.sum(line)
72	asr_probs.append([ str(x) for x in nline])
73	trs_probs = []
74	for line in lda_trs.expElogbeta:
75	nline = line / np.sum(line)
76	trs_probs.append([str(x) for x in nline])
77
78	K = lda_asr.num_topics
79	topicWordProbMat_asr = lda_asr.print_topics(K,10)
80
81	K = lda_trs.num_topics
82	topicWordProbMat_trs = lda_trs.print_topics(K,10)
83	os.mkdir(deep_out_dir)
84	dill.dump([x for x in stop_words],open(deep_out_dir+"/stopwords.dill","w"))
85	lda_asr.save(deep_out_dir+"/lda_asr.model")
86	lda_trs.save(deep_out_dir+"/lda_trs.model")
87	dill.dump([x for x in asr_probs],open(deep_out_dir+"/lda_asr_probs.dill","w"))
88	dill.dump([x for x in trs_probs],open(deep_out_dir+"/lda_trs_probs.dill","w"))
89
90	return [name, stop_words, lda_asr , asr_probs , topicWordProbMat_asr, lda_trs, trs_probs, topicWordProbMat_trs]
91
92	def train_one(name,train,s,i,sw,a,e,p,c):
93	st=time.time()
94	logging.warning(" ; ".join([str(x) for x in [s,i,sw,a,e,p,c]]))
95	models = train_lda(name,train,s,i,sw,a,e,p,c)
96	if models:
97	m = calc_perp(models,train)
98	#dill.dump(models,open("{}/{}.dill".format(name,models[0]),"wb"))
99	else :
100	m = None
101	e = time.time()
102	logging.warning("fin en : {}".format(e-st))
103	return m
104
105
106
107
108	if __name__ == "__main__":
109	logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.WARNING)
110
111	input_shelve = sys.argv[1]
112	db_path = sys.argv[2]
113	size = [ int(x) for x in sys.argv[3].split("_")]
114	workers = int(sys.argv[4])
115	name = sys.argv[5]
116	it = [ int(x) for x in sys.argv[6].split("_")]
117	sw_size = [ int(x) for x in sys.argv[7].split("_")]
118	if sys.argv[8] != "None" :
119	alpha = [ "symmetric", "auto" ] + [ float(x) for x in sys.argv[8].split("_")]
120	eta = ["auto"] + [ float(x) for x in sys.argv[9].split("_")]
121	else :
122	alpha = ["symmetric"]
123	eta = ["auto"]
124	passes = [ int(x) for x in sys.argv[10].split("_")]
125	chunk = [ int(x) for x in sys.argv[11].split("_")]
126
127	#train=pickle.load(open("{}/newsgroup_bow_train.pk".format(input_dir)))
128	train = shelve.open(input_shelve)
129	try :
130	os.mkdir(name)
131	except :
132	logging.warning(" folder already existe " )
133	db = TinyDB(db_path)
134	nb_model = len(passes) * len(chunk) * len(it) * len(sw_size) * len(alpha) * len(eta) * len(size)
135	logging.warning(" hey will train {} models ".format(nb_model))
136
137	args_list=[]
138	for p in passes:
139	for c in chunk:
140	for i in it :
141	for sw in sw_size:
142	for a in alpha:
143	for e in eta:
144	for s in size:
145	args_list.append((name,train,s,i,sw,a,e,p,c))
146	res_list= Parallel(n_jobs=15)(delayed(train_one)(*args) for args in args_list)
147	for m in res_list :
148	db.insert(m)
149
150		1	import gensim

LDA/04b-mini_ae.py

Diff comments View file @ d1012a7

1		1
2	# coding: utf-8	2	# coding: utf-8
3		3
4	# In[2]:	4	# In[2]:
5		5
6	# Import	6	# Import
7	import gensim	7	import gensim
8	from scipy import sparse	8	from scipy import sparse
9	import itertools	9	import itertools
10	from sklearn import preprocessing	10	from sklearn import preprocessing
11	from keras.models import Sequential	11	from keras.models import Sequential
12	from keras.optimizers import SGD,Adam	12	from keras.optimizers import SGD,Adam
13	from mlp import *	13	from mlp import *
14	import mlp	14	import mlp
15	import sklearn.metrics	15	import sklearn.metrics
16	import shelve	16	import shelve
17	import pickle	17	import pickle
18	from utils import *	18	from utils import *
19	import sys	19	import sys
20	import os	20	import os
21	import json	21	import json
22	# In[4]:	22	# In[4]:
23		23
24	sparse_model=shelve.open("{}".format(sys.argv[2]))	24	sparse_model=shelve.open("{}".format(sys.argv[2]))
25	in_dir = sys.argv[1]	25	in_dir = sys.argv[1]
26	infer_model=shelve.open("{}/infer.shelve".format(in_dir))	26	infer_model=shelve.open("{}/infer.shelve".format(in_dir))
27	#['ASR', 'TRS', 'LABEL']	27	#['ASR', 'TRS', 'LABEL']
28	# In[6]:	28	# In[6]:
29	ASR=sparse_model["ASR_wid"]	29	ASR=sparse_model["ASR_wid"]
30	TRS=sparse_model["TRS_wid"]	30	TRS=sparse_model["TRS_wid"]
31	LABEL=sparse_model["LABEL"]	31	LABEL=sparse_model["LABEL"]
32		32
33		33
34	hidden_size=40	34	hidden_size=40
35	input_activation="tanh"	35	input_activation="tanh"
36	out_activation="tanh"	36	out_activation="tanh"
37	loss="mse"	37	loss="mse"
38	epochs=500	38	epochs=500
39	batch=1	39	batch=1
40	patience=60	40	patience=60
41	do_do=False	41	do_do=False
42	sgd = Adam(lr=0.00001)#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)	42	sgd = Adam(lr=0.00001)#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
43	try :	43	try :
44	sgd_repr=sgd.get_config()["name"]	44	sgd_repr=sgd.get_config()["name"]
45	except AttributeError :	45	except AttributeError :
46	sgd_repr=sgd	46	sgd_repr=sgd
47		47
48	params={ "h1" : hidden_size,	48	params={ "h1" : hidden_size,
49	"inside_activation" : input_activation,	49	"inside_activation" : input_activation,
50	"out_activation" : out_activation,	50	"out_activation" : out_activation,
51	"do_dropout": do_do,	51	"do_dropout": do_do,
52	"loss" : loss,	52	"loss" : loss,
53	"epochs" : epochs ,	53	"epochs" : epochs ,
54	"batch_size" : batch,	54	"batch_size" : batch,
55	"patience" : patience,	55	"patience" : patience,
56	"sgd" : sgd_repr}	56	"sgd" : sgd_repr}
57	name = "_".join([ str(x) for x in params.values()])	57	name = "_".join([ str(x) for x in params.values()])
58	try:	58	try:
59	os.mkdir("{}/{}".format(in_dir,name))	59	os.mkdir("{}/{}".format(in_dir,name))
60	except:	60	except:
61	pass	61	pass
62	db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)	62	db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
63	db["params"] = params	63	db["params"] = params
64	db["LABEL"]=LABEL	64	db["LABEL"]=LABEL
65	#	65	#
66	json.dump(params,	66	json.dump(params,
67	open("{}/{}/ae_model.json".format(in_dir,name),"w"),	67	open("{}/{}/ae_model.json".format(in_dir,name),"w"),
68	indent=4)	68	indent=4)
69		69
70	keys = ["ASR","TRS"]	70	keys = ["ASR","TRS"]
71		71
72	mlp_h = [ 40 , 25 , 40]	72	mlp_h = [ 512 , 1024 , 2048]
73	mlp_loss ="categorical_crossentropy"	73	mlp_loss ="categorical_crossentropy"
74	mlp_dropouts = [0,0,0,0]	74	mlp_dropouts = [0,0,0,0]
75	mlp_sgd = Adam(0.0001)	75	mlp_sgd = Adam(0.0001)
76	mlp_epochs = 200	76	mlp_epochs = 200
77	mlp_batch_size = 8	77	mlp_batch_size = 8
78		78
79	db["AE"] = {}	79	db["AE"] = {}
80	for mod in keys :	80	for mod in keys :
81	res=train_ae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"],[params["h1"]],patience = params["patience"],sgd=sgd,in_activation="tanh",out_activation="tanh",loss=loss,epochs=epochs,batch_size=batch,verbose=0)	81	res=train_ae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"],[params["h1"]],patience = params["patience"],sgd=sgd,in_activation="tanh",out_activation="tanh",loss=loss,epochs=epochs,batch_size=batch,verbose=0)
82	mlp_res_list=[]	82	mlp_res_list=[]
83	for layer in res :	83	for layer in res :
84	mlp_res_list.append(train_mlp(layer[0],LABEL["TRAIN"],layer[1],LABEL["DEV"],layer[2],LABEL["TEST"],mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,fit_verbose=0))	84	mlp_res_list.append(train_mlp(layer[0],LABEL["TRAIN"],layer[1],LABEL["DEV"],layer[2],LABEL["TEST"],mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,fit_verbose=0))
85	db["AE"][mod]=mlp_res_list	85	db["AE"][mod]=mlp_res_list
86		86
87	mod = "ASR"	87	mod = "ASR"
88	mod2= "TRS"	88	mod2= "TRS"
89	mlp_res_list=[]	89	mlp_res_list=[]
90		90
91	res = train_ae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"],[params["h1"]],dropouts=[0],patience = params["patience"],sgd=sgd,in_activation="tanh",out_activation="tanh",loss=loss,epochs=epochs,batch_size=batch,y_train=infer_model["LDA"][mod]["TRAIN"],y_dev=infer_model["LDA"][mod2]["DEV"],y_test=infer_model["LDA"][mod2]["TEST"])	91	res = train_ae(infer_model["LDA"][mod]["TRAIN"],infer_model["LDA"][mod]["DEV"],infer_model["LDA"][mod]["TEST"],[params["h1"]],dropouts=[0],patience = params["patience"],sgd=sgd,in_activation="tanh",out_activation="tanh",loss=loss,epochs=epochs,batch_size=batch,y_train=infer_model["LDA"][mod]["TRAIN"],y_dev=infer_model["LDA"][mod2]["DEV"],y_test=infer_model["LDA"][mod2]["TEST"])
92	for layer in res :	92	for layer in res :
93	mlp_res_list.append(train_mlp(layer[0],LABEL["TRAIN"],layer[1],LABEL["DEV"],layer[2],LABEL["TEST"],mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,fit_verbose=0))	93	mlp_res_list.append(train_mlp(layer[0],LABEL["TRAIN"],layer[1],LABEL["DEV"],layer[2],LABEL["TEST"],mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,batch_size=mlp_batch_size,fit_verbose=0))
94		94
95	db["AE"]["SPE"] = mlp_res_list	95	db["AE"]["SPE"] = mlp_res_list
96		96
97		97
98	db.close()	98	db.close()
99		99

LDA/04e-mm_vae.py

Diff comments View file @ d1012a7

1		1
2	# coding: utf-8	2	# coding: utf-8
3	import gensim	3	import gensim
4	from scipy import sparse	4	from scipy import sparse
5	import itertools	5	import itertools
6	from sklearn import preprocessing	6	from sklearn import preprocessing
7	from keras.models import Sequential	7	from keras.models import Sequential
8	from keras.optimizers import SGD,Adam	8	from keras.optimizers import SGD,Adam
9	from mlp import *	9	from mlp import *
10	from vae import *	10	from vae import *
11	import sklearn.metrics	11	import sklearn.metrics
12	import shelve	12	import shelve
13	import pickle	13	import pickle
14	from utils import *	14	from utils import *
15	import sys	15	import sys
16	import os	16	import os
17	import json	17	import json
18	# In[4]:	18	# In[4]:
19		19
20	infer_model=shelve.open("{}".format(sys.argv[2]))	20	infer_model=shelve.open("{}".format(sys.argv[2]))
21	in_dir = sys.argv[1]	21	in_dir = sys.argv[1]
22	#['ASR', 'TRS', 'LABEL']	22	#['ASR', 'TRS', 'LABEL']
23	# In[6]:	23	# In[6]:
24	if len(sys.argv) > 4 :	24	if len(sys.argv) > 4 :
25	features_key = sys.argv[4]	25	features_key = sys.argv[4]
26	else :	26	else :
27	features_key = "LDA"	27	features_key = "LDA"
28		28
29	save_projection = True	29	save_projection = True
30	json_conf =json.load(open(sys.argv[3]))	30	json_conf =json.load(open(sys.argv[3]))
31	vae_conf = json_conf["vae"]	31	vae_conf = json_conf["vae"]
32		32
33	hidden_size= vae_conf["hidden_size"]	33	hidden_size= vae_conf["hidden_size"]
34	input_activation=vae_conf["input_activation"]	34	input_activation=vae_conf["input_activation"]
35	output_activation=vae_conf["output_activation"]	35	output_activation=vae_conf["output_activation"]
36	epochs=vae_conf["epochs"]	36	epochs=vae_conf["epochs"]
37	batch=vae_conf["batch"]	37	batch=vae_conf["batch"]
38	patience=vae_conf["patience"]	38	patience=vae_conf["patience"]
39	latent_dim = vae_conf["latent"]	39	latent_dim = vae_conf["latent"]
40	try:	40	try:
41	k = vae_conf["sgd"]	41	k = vae_conf["sgd"]
42	if vae_conf["sgd"]["name"] == "adam":	42	if vae_conf["sgd"]["name"] == "adam":
43	sgd = Adam(lr=vae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)	43	sgd = Adam(lr=vae_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
44	elif vae_conf["sgd"]["name"] == "sgd":	44	elif vae_conf["sgd"]["name"] == "sgd":
45	sgd = SGD(lr=vae_conf["sgd"]["lr"])	45	sgd = SGD(lr=vae_conf["sgd"]["lr"])
46	except:	46	except:
47	sgd = vae_conf["sgd"]	47	sgd = vae_conf["sgd"]
48		48
49	mlp_conf = json_conf["mlp"]	49	mlp_conf = json_conf["mlp"]
50	mlp_h = mlp_conf["hidden_size"]	50	mlp_h = mlp_conf["hidden_size"]
51	mlp_loss = mlp_conf["loss"]	51	mlp_loss = mlp_conf["loss"]
52	mlp_dropouts = mlp_conf["do"]	52	mlp_dropouts = mlp_conf["do"]
53	mlp_epochs = mlp_conf["epochs"]	53	mlp_epochs = mlp_conf["epochs"]
54	mlp_batch_size = mlp_conf["batch"]	54	mlp_batch_size = mlp_conf["batch"]
55	mlp_input_activation=mlp_conf["input_activation"]	55	mlp_input_activation=mlp_conf["input_activation"]
56	mlp_output_activation=mlp_conf["output_activation"]	56	mlp_output_activation=mlp_conf["output_activation"]
57		57
58		58
59	try:	59	try:
60	k = mlp_conf["sgd"]	60	k = mlp_conf["sgd"]
61	if mlp_conf["sgd"]["name"] == "adam":	61	if mlp_conf["sgd"]["name"] == "adam":
62	mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)	62	mlp_sgd = Adam(lr=mlp_conf["sgd"]["lr"])#SGD(lr=0.00001,nesterov=False) #'rmsprop'# Adam(lr=0.00001)#SGD(lr=0.001, momentum=0.9, nesterov=True)
63	elif mlp_conf["sgd"]["name"] == "sgd":	63	elif mlp_conf["sgd"]["name"] == "sgd":
64	mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"])	64	mlp_sgd = SGD(lr=mlp_conf["sgd"]["lr"])
65	except:	65	except:
66	mlp_sgd = mlp_conf["sgd"]	66	mlp_sgd = mlp_conf["sgd"]
67		67
68		68
69	name = json_conf["name"]	69	name = json_conf["name"]
70		70
71	try :	71	try :
72	print "make folder "	72	print "make folder "
73	os.mkdir("{}/{}".format(in_dir,name))	73	os.mkdir("{}/{}".format(in_dir,name))
74	except:	74	except:
75	print "folder not maked"	75	print "folder not maked"
76	pass	76	pass
77		77
78		78
79	db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)	79	db = shelve.open("{}/{}/ae_model.shelve".format(in_dir,name),writeback=True)
80	db["LABEL"]=infer_model["LABEL"]	80	db["LABEL"]=infer_model["LABEL"]
81	#	81	#
82		82
83		83
84	keys = infer_model[features_key].keys()	84	keys = infer_model[features_key].keys()
85		85
86	db["VAE"] = {}	86	db["VAE"] = {}
87	db[features_key] = {}	87	db[features_key] = {}
88	for mod in keys :	88	for mod in keys :
89	#print mod	89	#print mod
90	db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],	90	db[features_key][mod] = train_mlp(infer_model[features_key][mod]["TRAIN"],infer_model["LABEL"][mod]["TRAIN"],
91	infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],	91	infer_model[features_key][mod]["DEV"],infer_model["LABEL"][mod]["DEV"],
92	infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],	92	infer_model[features_key][mod]["TEST"],infer_model["LABEL"][mod]["TEST"],
93	mlp_h ,sgd=mlp_sgd,	93	mlp_h ,sgd=mlp_sgd,
94	epochs=mlp_epochs,	94	epochs=mlp_epochs,
95	batch_size=mlp_batch_size,	95	batch_size=mlp_batch_size,
96	input_activation=input_activation,	96	input_activation=input_activation,
97	output_activation=mlp_output_activation,	97	output_activation=mlp_output_activation,
98	dropouts=mlp_dropouts,	98	dropouts=mlp_dropouts,
99	fit_verbose=0)	99	fit_verbose=0)
100		100
101	res=train_vae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"],	101	res=train_vae(infer_model[features_key][mod]["TRAIN"],infer_model[features_key][mod]["DEV"],infer_model[features_key][mod]["TEST"],
102	hidden_size=hidden_size[0],	102	hidden_size=hidden_size[0],
103	latent_dim=latent_dim,sgd=sgd,	103	latent_dim=latent_dim,sgd=sgd,
104	input_activation=input_activation,output_activation=output_activation,	104	input_activation=input_activation,output_activation=output_activation,
105	nb_epochs=epochs,batch_size=batch)	105	nb_epochs=epochs,batch_size=batch)
106	mlp_res_list=[]	106	mlp_res_list=[]
107	for nb,layer in enumerate(res) :	107	for nb,layer in enumerate(res) :
108	if save_projection:	108	if save_projection:
109	pd = pandas.DataFrame(layer[0])	109	pd = pandas.DataFrame(layer[0])
110	col_count = (pd.sum(axis=0) != 0)	110	col_count = (pd.sum(axis=0) != 0)
111	pd = pd.loc[:,cyyol_count]	111	pd = pd.loc[:,col_count]
112	pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN")	112	pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TRAIN")
113	pd = pandas.DataFrame(layer[1])	113	pd = pandas.DataFrame(layer[1])
114	pd = pd.loc[:,col_count]	114	pd = pd.loc[:,col_count]
115	pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV")	115	pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"DEV")
116	pd = pandas.DataFrame(layer[2])	116	pd = pandas.DataFrame(layer[2])
117	pd = pd.loc[:,col_count]	117	pd = pd.loc[:,col_count]
118	pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST")	118	pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,mod),"TEST")
119	del pd	119	del pd
120		120
121	mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],	121	mlp_res_list.append(train_mlp(layer[0],infer_model['LABEL'][mod]["TRAIN"],
122	layer[1],infer_model["LABEL"][mod]["DEV"],	122	layer[1],infer_model["LABEL"][mod]["DEV"],
123	layer[2],infer_model["LABEL"][mod]["TEST"],	123	layer[2],infer_model["LABEL"][mod]["TEST"],
124	mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,	124	mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
125	output_activation=mlp_output_activation,	125	output_activation=mlp_output_activation,
126	input_activation=input_activation,	126	input_activation=input_activation,
127	batch_size=mlp_batch_size,fit_verbose=0))	127	batch_size=mlp_batch_size,fit_verbose=0))
128	db["VAE"][mod]=mlp_res_list	128	db["VAE"][mod]=mlp_res_list
129		129
130	if "ASR" in keys and "TRS" in keys :	130	if "ASR" in keys and "TRS" in keys :
131	mod = "ASR"	131	mod = "ASR"
132	mod2= "TRS"	132	mod2= "TRS"
133	mlp_res_list=[]	133	mlp_res_list=[]
134		134
135	res = train_vae(infer_model[features_key][mod]["TRAIN"],	135	res = train_vae(infer_model[features_key][mod]["TRAIN"],
136	infer_model[features_key][mod]["DEV"],	136	infer_model[features_key][mod]["DEV"],
137	infer_model[features_key][mod]["TEST"],	137	infer_model[features_key][mod]["TEST"],
138	hidden_size=hidden_size[0],	138	hidden_size=hidden_size[0],
139	sgd=sgd,input_activation=input_activation,output_activation=output_activation,	139	sgd=sgd,input_activation=input_activation,output_activation=output_activation,
140	latent_dim=latent_dim,	140	latent_dim=latent_dim,
141	nb_epochs=epochs,	141	nb_epochs=epochs,
142	batch_size=batch,	142	batch_size=batch,
143	y_train=infer_model[features_key][mod2]["TRAIN"],	143	y_train=infer_model[features_key][mod2]["TRAIN"],
144	y_dev=infer_model[features_key][mod2]["DEV"],	144	y_dev=infer_model[features_key][mod2]["DEV"],
145	y_test=infer_model[features_key][mod2]["TEST"])	145	y_test=infer_model[features_key][mod2]["TEST"])
146		146
147	for nb,layer in enumerate(res) :	147	for nb,layer in enumerate(res) :
148	if save_projection:	148	if save_projection:
149	pd = pandas.DataFrame(layer[0])	149	pd = pandas.DataFrame(layer[0])
150	col_count = (pd.sum(axis=0) != 0)	150	col_count = (pd.sum(axis=0) != 0)
151	pd = pd.loc[:,col_count]	151	pd = pd.loc[:,col_count]
152	pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN")	152	pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TRAIN")
153	pd = pandas.DataFrame(layer[1])	153	pd = pandas.DataFrame(layer[1])
154	pd = pd.loc[:,col_count]	154	pd = pd.loc[:,col_count]
155	pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV")	155	pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"DEV")
156	pd = pandas.DataFrame(layer[2])	156	pd = pandas.DataFrame(layer[2])
157	pd = pd.loc[:,col_count]	157	pd = pd.loc[:,col_count]
158	pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST")	158	pd.to_hdf("{}/{}/VAE_{}_{}_df.hdf".format(in_dir,name,nb,"SPE"),"TEST")
159	del pd	159	del pd
160		160
161	mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],	161	mlp_res_list.append(train_mlp(layer[0],infer_model["LABEL"][mod]["TRAIN"],
162	layer[1],infer_model["LABEL"][mod]["DEV"],	162	layer[1],infer_model["LABEL"][mod]["DEV"],
163	layer[2],infer_model["LABEL"][mod]["TEST"],	163	layer[2],infer_model["LABEL"][mod]["TEST"],
164	mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,	164	mlp_h,loss=mlp_loss,dropouts=mlp_dropouts,sgd=mlp_sgd,epochs=mlp_epochs,
165	output_activation=mlp_output_activation,	165	output_activation=mlp_output_activation,
166	input_activation=input_activation,	166	input_activation=input_activation,
167	batch_size=mlp_batch_size,fit_verbose=0))	167	batch_size=mlp_batch_size,fit_verbose=0))
168		168
169	db["VAE"]["SPE"] = mlp_res_list	169	db["VAE"]["SPE"] = mlp_res_list
170		170
171	db.sync()	171	db.sync()
172	db.close()	172	db.close()
173		173

LDA/run.sh

View file @ d1012a7

1	python 00-prepross.py		File was deleted
2	python 02-lda_split.py DECODA_list_wid.shelve output_v1/ 100 12 test2 1 400
3	python 03-mono_perplex.py DECODA_list_wid.shelve output_v1/test2 output_v1/t2db.json
4		1	python 00-prepross.py

LDA/vae.py

Diff comments View file @ d1012a7

1	'''This script demonstrates how to build a variational autoencoder with Keras.	1	'''This script demonstrates how to build a variational autoencoder with Keras.
2	Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114	2	Reference: "Auto-Encoding Variational Bayes" https://arxiv.org/abs/1312.6114
3	'''	3	'''
4		4
5	import itertools	5	import itertools
6	import sys	6	import sys
7	import json	7	import json
8		8
9	import numpy as np	9	import numpy as np
10	import matplotlib.pyplot as plt	10	import matplotlib.pyplot as plt
11	from scipy import sparse	11	from scipy import sparse
12	import scipy.io	12	import scipy.io
13		13
14	from keras.layers import Input, Dense, Lambda	14	from keras.layers import Input, Dense, Lambda
15	from keras.models import Model	15	from keras.models import Model
16	from keras import backend as K	16	from keras import backend as K
17	from keras import objectives	17	from keras import objectives
18	from keras.datasets import mnist	18	from keras.datasets import mnist
19	from keras.callbacks import EarlyStopping,Callback	19	from keras.callbacks import EarlyStopping,Callback
20		20
21	import pandas	21	import pandas
22	import shelve	22	import shelve
23	import pickle	23	import pickle
24		24
25		25
26	class ZeroStopping(Callback):	26	class ZeroStopping(Callback):
27	'''Stop training when a monitored quantity has stopped improving.	27	'''Stop training when a monitored quantity has stopped improving.
28	# Arguments	28	# Arguments
29	monitor: quantity to be monitored.	29	monitor: quantity to be monitored.
30	patience: number of epochs with no improvement	30	patience: number of epochs with no improvement
31	after which training will be stopped.	31	after which training will be stopped.
32	verbose: verbosity mode.	32	verbose: verbosity mode.
33	mode: one of {auto, min, max}. In 'min' mode,	33	mode: one of {auto, min, max}. In 'min' mode,
34	training will stop when the quantity	34	training will stop when the quantity
35	monitored has stopped decreasing; in 'max'	35	monitored has stopped decreasing; in 'max'
36	mode it will stop when the quantity	36	mode it will stop when the quantity
37	monitored has stopped increasing.	37	monitored has stopped increasing.
38	'''	38	'''
39	def __init__(self, monitor='val_loss', verbose=0, mode='auto', thresh = 0):	39	def __init__(self, monitor='val_loss', verbose=0, mode='auto', thresh = 0):
40	super(ZeroStopping, self).__init__()	40	super(ZeroStopping, self).__init__()
41		41
42	self.monitor = monitor	42	self.monitor = monitor
43	self.verbose = verbose	43	self.verbose = verbose
44	self.thresh = thresh # is a rythme	44	self.thresh = thresh # is a rythme
45		45
46	if mode not in ['auto', 'min', 'max']:	46	if mode not in ['auto', 'min', 'max']:
47	warnings.warn('EarlyStopping mode %s is unknown, '	47	warnings.warn('EarlyStopping mode %s is unknown, '
48	'fallback to auto mode.' % (self.mode),	48	'fallback to auto mode.' % (self.mode),
49	RuntimeWarning)	49	RuntimeWarning)
50	mode = 'auto'	50	mode = 'auto'
51		51
52	if mode == 'min':	52	if mode == 'min':
53	self.monitor_op = np.less	53	self.monitor_op = np.less
54	elif mode == 'max':	54	elif mode == 'max':
55	self.monitor_op = np.greater	55	self.monitor_op = np.greater
56	else:	56	else:
57	if 'acc' in self.monitor:	57	if 'acc' in self.monitor:
58	self.monitor_op = np.greater	58	self.monitor_op = np.greater
59	else:	59	else:
60	self.monitor_op = np.less	60	self.monitor_op = np.less
61		61
62	def on_epoch_end(self, epoch, logs={}):	62	def on_epoch_end(self, epoch, logs={}):
63	current = logs.get(self.monitor)	63	current = logs.get(self.monitor)
64	if current is None:	64	if current is None:
65	warnings.warn('Zero stopping requires %s available!' %	65	warnings.warn('Zero stopping requires %s available!' %
66	(self.monitor), RuntimeWarning)	66	(self.monitor), RuntimeWarning)
67		67
68	if self.monitor_op(current, self.thresh):	68	if self.monitor_op(current, self.thresh):
69	self.best = current	69	self.best = current
70	self.model.stop_training = True	70	self.model.stop_training = True
71		71
72	#batch_size = 16	72	#batch_size = 16
73	#original_dim = 784	73	#original_dim = 784
74	#latent_dim = 2	74	#latent_dim = 2
75	#intermediate_dim = 128	75	#intermediate_dim = 128
76	#epsilon_std = 0.01	76	#epsilon_std = 0.01
77	#nb_epoch = 40	77	#nb_epoch = 40
78		78
79		79
80		80
81		81
82	def train_vae(x_train,x_dev,x_test,y_train=None,y_dev=None,y_test=None,hidden_size=80,latent_dim=12,batch_size=8,nb_epochs=10,sgd="rmsprop",input_activation = "relu",output_activation = "sigmoid",epsilon_std=0.01):	82	def train_vae(x_train,x_dev,x_test,y_train=None,y_dev=None,y_test=None,hidden_size=80,latent_dim=12,batch_size=8,nb_epochs=10,sgd="rmsprop",input_activation = "relu",output_activation = "sigmoid",epsilon_std=0.01):
83		83
84		84
85		85
86	def sampling(args):	86	def sampling(args):
87	z_mean, z_log_std = args	87	z_mean, z_log_std = args
88	epsilon = K.random_normal(shape=(batch_size, latent_dim),	88	epsilon = K.random_normal(shape=(batch_size, latent_dim),
89	mean=0., std=epsilon_std)	89	mean=0., std=epsilon_std)
90	return z_mean + K.exp(z_log_std) * epsilon	90	return z_mean + K.exp(z_log_std) * epsilon
91		91
92	def vae_loss(x, x_decoded_mean):	92	def vae_loss(x, x_decoded_mean):
93	xent_loss = objectives.binary_crossentropy(x, x_decoded_mean)	93	xent_loss = objectives.binary_crossentropy(x, x_decoded_mean)
94	kl_loss = - 0.5 * K.mean(1 + z_log_std - K.square(z_mean) - K.exp(z_log_std), axis=-1)	94	kl_loss = - 0.5 * K.mean(1 + z_log_std - K.square(z_mean) - K.exp(z_log_std), axis=-1)
95	return xent_loss + kl_loss	95	return xent_loss + kl_loss
96		96
97	original_dim = x_train.shape[1]	97	original_dim = x_train.shape[1]
98		98
99		99
100	x = Input(batch_shape=(batch_size, original_dim))	100	x = Input(batch_shape=(batch_size, original_dim))
101	h = Dense(hidden_size, activation=input_activation)(x)	101	h = Dense(hidden_size, activation=input_activation)(x)
102	z_mean = Dense(latent_dim)(h)	102	z_mean = Dense(latent_dim)(h)
103	z_log_std = Dense(latent_dim)(h)	103	z_log_std = Dense(latent_dim)(h)
104		104
105		105
106	# note that "output_shape" isn't necessary with the TensorFlow backend	106	# note that "output_shape" isn't necessary with the TensorFlow backend
107	# so you could write `Lambda(sampling)([z_mean, z_log_std])`	107	# so you could write `Lambda(sampling)([z_mean, z_log_std])`
108	z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_std])	108	z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_std])
109		109
110	# we instantiate these layers separately so as to reuse them later	110	# we instantiate these layers separately so as to reuse them later
111	decoder_h = Dense(hidden_size, activation=input_activation)	111	decoder_h = Dense(hidden_size, activation=input_activation)
112	decoder_mean = Dense(original_dim, activation=output_activation)	112	decoder_mean = Dense(original_dim, activation=output_activation)
113	h_decoded = decoder_h(z)	113	h_decoded = decoder_h(z)
114	x_decoded_mean = decoder_mean(h_decoded)	114	x_decoded_mean = decoder_mean(h_decoded)
115		115
116		116
117	vae = Model(x, x_decoded_mean)	117	vae = Model(x, x_decoded_mean)
118	vae.compile(optimizer=sgd, loss=vae_loss)	118	vae.compile(optimizer=sgd, loss=vae_loss)
119		119
120	# train the VAE on MNIST digits	120	# train the VAE on MNIST digits
121	if y_train is None or y_dev is None or y_test is None :	121	if y_train is None or y_dev is None or y_test is None :
122	y_train = x_train	122	y_train = x_train
123	y_dev = x_dev	123	y_dev = x_dev
124	y_test = x_test	124	y_test = x_test
125		125
126	vae.fit(x_train, y_train,	126	vae.fit(x_train, y_train,
127	shuffle=True,	127	shuffle=True,
128	nb_epoch=nb_epochs,	128	nb_epoch=nb_epochs,
129	verbose = 1,	129	verbose = 1,
130	batch_size=batch_size,	130	batch_size=batch_size,
131	validation_data=(x_dev, y_dev),	131	validation_data=(x_dev, y_dev)
132	callbacks = [ZeroStopping(monitor='val_loss', thresh=0, verbose=0, mode='min')]	132	#callbacks = [ZeroStopping(monitor='val_loss', thresh=0, verbose=0, mode='min')]
133	)	133	)
134		134
135	# build a model to project inputs on the latent space	135	# build a model to project inputs on the latent space
136	encoder = Model(x, z_mean)	136	encoder = Model(x, z_mean)
137	pred_train = encoder.predict(x_train, batch_size=batch_size)	137	pred_train = encoder.predict(x_train, batch_size=batch_size)
138	pred_dev = encoder.predict(x_dev, batch_size=batch_size)	138	pred_dev = encoder.predict(x_dev, batch_size=batch_size)
139	pred_test = encoder.predict(x_test,batch_size=batch_size)	139	pred_test = encoder.predict(x_test,batch_size=batch_size)
140	return [ [ pred_train, pred_dev, pred_test ] ]	140	return [ [ pred_train, pred_dev, pred_test ] ]
141	# display a 2D plot of the digit classes in the latent space	141	# display a 2D plot of the digit classes in the latent space
142	#x_test_encoded = encoder.predict(x_test, batch_size=batch_size)	142	#x_test_encoded = encoder.predict(x_test, batch_size=batch_size)
143	# build a digit generator that can sample from the learned distribution	143	# build a digit generator that can sample from the learned distribution
144	#decoder_input = Input(shape=(latent_dim,))	144	#decoder_input = Input(shape=(latent_dim,))
145	#_h_decoded = decoder_h(decoder_input)	145	#_h_decoded = decoder_h(decoder_input)
146	#_x_decoded_mean = decoder_mean(_h_decoded)	146	#_x_decoded_mean = decoder_mean(_h_decoded)
147	#generator = Model(decoder_input, _x_decoded_mean)	147	#generator = Model(decoder_input, _x_decoded_mean)
148	#x_decoded = generator.predict(z_sample)	148	#x_decoded = generator.predict(z_sample)
149		149
150		150