Blame view
LDA/04a-mlp.py
3.28 KB
b6d0165d1 Initial commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# coding: utf-8 # In[29]: # Import import itertools import shelve import pickle import numpy import scipy from scipy import sparse import scipy.sparse import scipy.io from mlp import * import mlp import sys import utils import dill from collections import Counter from gensim.models import LdaModel # In[3]: #30_50_50_150_0.0001 # In[4]: #db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True) in_dir = sys.argv[1] origin_corps = shelve.open(sys.argv[2]) ## ['vocab', #'ASR_AE_OUT_RELU', #'ASR_AE_H2_RELU', #'ASR_H1_TRANSFORMED_W2_RELU', #'ASR_AE_H1_RELU', #'ASR_H1_TRANFORMED_OUT_RELU', #'ASR_H1_TRANFORMED_TRSH2_RELU', #'TRS_AE_H2_RELU', #'ASR_H2_TRANSFORMED_W1_RELU', #'ASR_H2_TRANSFORMED_W2_RELU', #'TRS_AE_H1_RELU', #'ASR_H2_TRANFORMED_OUT_RELU', #'ASR_SPARSE', #'ASR_H2_TRANFORMED_TRSH2_RELU', #'ASR_H1_TRANSFORMED_W1_RELU', #'TRS_AE_OUT_RELU'] ## # # [ 'vocab', 'LABEL', 'TRS_SPARSE', 'ASR_SPARSE'] out_db=shelve.open("{}/mlp_scores.shelve".format(in_dir),writeback=True) infer_db=shelve.open("{}/infer.shelve".format(in_dir),writeback=True) #lb=LabelBinarizer() #y_train=lb.fit_transform([utils.select(ligneid) for ligneid in origin_corps["LABEL"]["TRAIN"]]) #y_dev=lb.transform([utils.select(ligneid) for ligneid in origin_corps["LABEL"]["DEV"]]) #y_test=lb.transform([utils.select(ligneid) for ligneid in origin_corps["LABEL"]["TEST"]]) y_train=origin_corps["LABEL"]["TRAIN"] y_dev= origin_corps["LABEL"]["DEV"] y_test=origin_corps["LABEL"]["TEST"] sw =dill.load(open("{}/stopwords.dill".format(in_dir))) # stop words LDAs={} LDAs["ASR"] = LdaModel.load("{}/lda_asr.model".format(in_dir)) LDAs["TRS"] = LdaModel.load("{}/lda_trs.model".format(in_dir)) data = {"RAW":{"ASR":{},"TRS":{}},"LDA":{"ASR":{},"TRS":{}}} data["RAW"]["ASR"]["TRAIN"] = [[ (x,y) for x,y in Counter(z).items() if x not in sw ] for z in origin_corps["ASR_wid"]["TRAIN"] ] data["RAW"]["ASR"]["DEV"] = [[ (x,y) for x,y in Counter(z).items() if x not in sw ] for z in origin_corps["ASR_wid"]["DEV"] ] data["RAW"]["ASR"]["TEST"] = [[ (x,y) for x,y in Counter(z).items() if x not in sw ] for z in origin_corps["ASR_wid"]["TEST"] ] data["RAW"]["TRS"]["TRAIN"] = [[ (x,y) for x,y in Counter(z).items() if x not in sw ] for z in origin_corps["TRS_wid"]["TRAIN"] ] data["RAW"]["TRS"]["DEV"] = [[ (x,y) for x,y in Counter(z).items() if x not in sw ] for z in origin_corps["TRS_wid"]["DEV"] ] data["RAW"]["TRS"]["TEST"] = [[ (x,y) for x,y in Counter(z).items() if x not in sw ] for z in origin_corps["TRS_wid"]["TEST"] ] nb_epochs=500 for key in ["TRS", "ASR"] : for corp_key in data["RAW"][key].keys(): data["LDA"][key][corp_key]= \ LDAs[key].inference( data["RAW"][key][corp_key])[0] res=mlp.train_mlp(data["LDA"][key]["TRAIN"],y_train,data["LDA"][key]["DEV"],y_dev,data["LDA"][key]["TEST"],y_test,[40,25,40],dropouts=[0,0,0,0],sgd=Adam(lr=0.0001),epochs=nb_epochs,batch_size=8,save_pred=False,keep_histo=False,loss="categorical_crossentropy",fit_verbose=0) arg_best=numpy.argmax(res[1]) dev_best = res[1][arg_best] test_best = res[2][arg_best] out_db[key]=(res,(dev_best,test_best)) print in_dir,dev_best,test_best for k,v in data.items(): infer_db[k] = v for key in out_db.keys(): print key,out_db[key][1] out_db.close() infer_db.close() origin_corps.close() |