TRANS_MINIAE_mlp.py
2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# coding: utf-8
# In[29]:
# Import
import itertools
import shelve
import pickle
import pandas
import numpy
import nltk
import codecs
import gensim
import scipy
from scipy import sparse
import scipy.sparse
import scipy.io
import sklearn
from sklearn.feature_extraction.text import CountVectorizer
import sklearn.metrics
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import confusion_matrix
from sklearn import preprocessing
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation,AutoEncoder
from keras.optimizers import SGD,Adam
from keras.layers import containers
from mlp import *
import mlp
import sys
import utils
from sklearn.preprocessing import LabelBinarizer
# In[3]:
# In[4]:
#db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True)
corps=shelve.open(sys.argv[1]+".shelve")
## ['vocab',
#'ASR_AE_OUT_RELU',
#'ASR_AE_H2_RELU',
#'ASR_H1_TRANSFORMED_W2_RELU',
#'ASR_AE_H1_RELU',
#'ASR_H1_TRANFORMED_OUT_RELU',
#'ASR_H1_TRANFORMED_TRSH2_RELU',
#'TRS_AE_H2_RELU',
#'ASR_H2_TRANSFORMED_W1_RELU',
#'ASR_H2_TRANSFORMED_W2_RELU',
#'TRS_AE_H1_RELU',
#'ASR_H2_TRANFORMED_OUT_RELU',
#'ASR_SPARSE',
#'ASR_H2_TRANFORMED_TRSH2_RELU',
#'ASR_H1_TRANSFORMED_W1_RELU',
#'TRS_AE_OUT_RELU']
##
#
# [ 'vocab', 'LABEL', 'TRS_SPARSE', 'ASR_SPARSE']
out_db=shelve.open(sys.argv[2]+".shelve",writeback=True)
out_db_do=shelve.open(sys.argv[2]+"_DOMLP.shelve",writeback=True)
lb=LabelBinarizer()
y_train=lb.fit_transform([utils.select(ligneid) for ligneid in corps["LABEL"]["TRAIN"]])
y_dev=lb.transform([utils.select(ligneid) for ligneid in corps["LABEL"]["DEV"]])
y_test=lb.transform([utils.select(ligneid) for ligneid in corps["LABEL"]["TEST"]])
keys = corps.keys()
if "LABEL" in keys:
keys.remove("LABEL")
nb_epochs=200
for key in keys:
print key
try:
x_train=corps[key]["TRAIN"].todense()
x_dev=corps[key]["DEV"].todense()
x_test=corps[key]["TEST"].todense()
except :
x_train=corps[key]["TRAIN"]
x_dev=corps[key]["DEV"]
x_test=corps[key]["TEST"]
out_db[key]=mlp.train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,[256,384,256],dropouts=[0,0,0],sgd=Adam(lr=0.0001),epochs=nb_epochs,batch_size=8,save_pred=True,keep_histo=True)
out_db_do[key]=mlp.train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,[256,383,256],dropouts=[0.5,0,0],sgd=Adam(lr=0.0001),epochs=nb_epochs,batch_size=8,save_pred=True,keep_histo=True)
corps.close()
out_db.close()