Blame view
UNFIXED_TRANS_MINIAE_mlp.py
2.47 KB
b6d0165d1 Initial commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# coding: utf-8 # In[29]: # Import import itertools import shelve import pickle import pandas import numpy import nltk import codecs import gensim import scipy from scipy import sparse import scipy.sparse import scipy.io import sklearn from sklearn.feature_extraction.text import CountVectorizer import sklearn.metrics from sklearn.neighbors import NearestNeighbors from sklearn.metrics import confusion_matrix from sklearn import preprocessing from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation,AutoEncoder from keras.optimizers import SGD,Adam from keras.layers import containers from mlp import * import mlp import sys import utils from sklearn.preprocessing import LabelBinarizer # In[3]: # In[4]: #db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True) corps=shelve.open(sys.argv[1]+".shelve") labelc= shelve.open("Sparse_mat_tfidf.shelve") ## ['vocab', #'ASR_AE_OUT_RELU', #'ASR_AE_H2_RELU', #'ASR_H1_TRANSFORMED_W2_RELU', #'ASR_AE_H1_RELU', #'ASR_H1_TRANFORMED_OUT_RELU', #'ASR_H1_TRANFORMED_TRSH2_RELU', #'TRS_AE_H2_RELU', #'ASR_H2_TRANSFORMED_W1_RELU', #'ASR_H2_TRANSFORMED_W2_RELU', #'TRS_AE_H1_RELU', #'ASR_H2_TRANFORMED_OUT_RELU', #'ASR_SPARSE', #'ASR_H2_TRANFORMED_TRSH2_RELU', #'ASR_H1_TRANSFORMED_W1_RELU', #'TRS_AE_OUT_RELU'] ## # # [ 'vocab', 'LABEL', 'TRS_SPARSE', 'ASR_SPARSE'] out_db=shelve.open(sys.argv[2]+".shelve",writeback=True) out_db_do=shelve.open(sys.argv[2]+"_DOMLP.shelve",writeback=True) lb=LabelBinarizer() y_train=lb.fit_transform([utils.select(ligneid) for ligneid in labelc["LABEL"]["TRAIN"]]) y_dev=lb.transform([utils.select(ligneid) for ligneid in labelc["LABEL"]["DEV"]]) y_test=lb.transform([utils.select(ligneid) for ligneid in labelc["LABEL"]["TEST"]]) keys = corps.keys() if "LABEL" in keys: keys.remove("LABEL") nb_epochs=250 for key in keys: print key try: x_train=corps[key]["TRAIN"].todense() x_dev=corps[key]["DEV"].todense() x_test=corps[key]["TEST"].todense() except : x_train=corps[key]["TRAIN"] x_dev=corps[key]["DEV"] x_test=corps[key]["TEST"] out_db[key]=mlp.train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,[256,128,256],dropouts=[0,0,0],sgd=Adam(lr=0.0001),epochs=nb_epochs,batch_size=8,save_pred=True,keep_histo=True) out_db_do[key]=mlp.train_mlp(x_train,y_train,x_dev,y_dev,x_test,y_test,[256,128,256],dropouts=[0.5,0,0],sgd=Adam(lr=0.0001),epochs=nb_epochs,batch_size=8,save_pred=True,keep_histo=True) corps.close() out_db.close() |