Blame view
KNN_replace.py
3.6 KB
b6d0165d1 Initial commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# coding: utf-8 # In[29]: # Import import itertools import shelve import pickle import pandas import numpy import nltk import codecs import gensim import scipy from scipy import sparse import scipy.sparse import scipy.io import sklearn from sklearn.feature_extraction.text import CountVectorizer import sklearn.metrics from sklearn.neighbors import NearestNeighbors from sklearn.metrics import confusion_matrix from sklearn import preprocessing from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation,AutoEncoder from keras.optimizers import SGD from keras.layers import containers from mlp import * import mlp # In[2]: # In[3]: # In[4]: #db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True) corps=shelve.open("DECODA_SPELIKE.shelve") # [ TRS_SPELIKE_H2_RELU', 'ASR_SPELIKE_OUT_RELU', 'ASR_SPELIKE_H2_RELU' ] sparse_corp=shelve.open("DECODA_sparse.shelve") # [ 'vocab', 'LABEL', 'TRS_SPARSE', 'ASR_SPARSE'] scores=shelve.open("scores/KNN_SPE.shelve",writeback=True) # In[8]: ASR_sparse=sparse_corp["ASR_SPARSE"] TRS_sparse=sparse_corp["TRS_SPARSE"] # In[9]: def most_common(lst): return max(set(lst), key=lst.count) # In[2]: # In[6]: print "nbrs" nbrs = NearestNeighbors(n_neighbors=1, algorithm='auto').fit(TRS_sparse["TRAIN"]) # RAW ASR print "raw asr" TEST_distances, TEST_indices = nbrs.kneighbors(ASR_sparse["TEST"]) DEV_distances, DEV_indices = nbrs.kneighbors(ASR_sparse["DEV"]) x_test=scipy.sparse.csr.csr_matrix([TRS_sparse["TRAIN"][x].toarray()[0] for x in TEST_indices]) x_dev=scipy.sparse.csr.csr_matrix([TRS_sparse["TRAIN"][x].toarray()[0] for x in DEV_indices]) scores["ASR_RAW"]=mlp.train_mlp(TRS_sparse["TRAIN"].todense(),sparse_corp["LABEL"]["TRAIN"],x_dev.todense(),sparse_corp["LABEL"]["DEV"],x_test.todense(),sparse_corp["LABEL"]["TEST"],[1024,512,1024],dropouts=[0.5,0.25,0],sgd="adam",epochs=150) print "asr ae out" # AE OUT TEST_distances, TEST_indices = nbrs.kneighbors(corps["ASR_SPELIKE_OUT_RELU"]["TEST"]) DEV_distances, DEV_indices = nbrs.kneighbors(corps["ASR_SPELIKE_OUT_RELU"]["DEV"]) x_test=scipy.sparse.csr.csr_matrix([TRS_sparse["TRAIN"][x].toarray()[0] for x in TEST_indices]) x_dev=scipy.sparse.csr.csr_matrix([TRS_sparse["TRAIN"][x].toarray()[0] for x in DEV_indices]) scores["ASR_AE_OUT"]=mlp.train_mlp(TRS_sparse["TRAIN"].todense(),sparse_corp["LABEL"]["TRAIN"],x_dev.todense(),sparse_corp["LABEL"]["DEV"],x_test.todense(),sparse_corp["LABEL"]["TEST"],[1024,512,1024],dropouts=[0.5,0.25,0],sgd="adam",epochs=150) # AE H2 MLP OUT print "asr h2 MLP OUT" nbrs = NearestNeighbors(n_neighbors=1, algorithm='auto').fit(corps["TRS_SPELIKE_H2_RELU"]["TRAIN"]) TEST_distances, TEST_indices = nbrs.kneighbors(corps["ASR_SPELIKE_H2_RELU"]["TEST"]) DEV_distances, DEV_indices = nbrs.kneighbors(corps["ASR_SPELIKE_H2_RELU"]["DEV"]) x_test=numpy.array([TRS_sparse["TRAIN"][x].toarray()[0] for x in TEST_indices]) x_dev=numpy.array([TRS_sparse["TRAIN"][x].toarray()[0] for x in DEV_indices]) scores["ASR_AE_H2_MLP_OUT"]=mlp.train_mlp(TRS_sparse["TRAIN"].todense(),sparse_corp["LABEL"]["TRAIN"],x_dev,sparse_corp["LABEL"]["DEV"],x_test,sparse_corp["LABEL"]["TEST"],[1024,512,1024],dropouts=[0.5,0.25,0],sgd="adam",epochs=150) # AE HZ MLP H2 print "asr h2 MLP h2" x_test=numpy.array([corps["TRS_SPELIKE_H2_RELU"]["TRAIN"][x][0] for x in TEST_indices]) x_dev=numpy.array([corps["TRS_SPELIKE_H2_RELU"]["TRAIN"][x][0] for x in DEV_indices]) scores["ASR_AE_H2_MLP_H2"]=mlp.train_mlp(corps["TRS_SPELIKE_H2_RELU"]["TRAIN"],sparse_corp["LABEL"]["TRAIN"],x_dev,sparse_corp["LABEL"]["DEV"],x_test,sparse_corp["LABEL"]["TEST"],[1024,512,1024],dropouts=[0.5,0.25,0],sgd="adam",epochs=150) |