Commit 128365a4fbbb1e8ec99a742ab7d462b7467e584e

Authored by Killian
1 parent 2af8e57f4e
Exists in master

ajout pca

Showing 1 changed file with 96 additions and 0 deletions Side-by-side Diff

  1 +
  2 +# coding: utf-8
  3 +
  4 +# In[29]:
  5 +
  6 +# Import
  7 +import itertools
  8 +import shelve
  9 +import pickle
  10 +import numpy
  11 +import scipy
  12 +from scipy import sparse
  13 +import scipy.sparse
  14 +import scipy.io
  15 +from mlp import *
  16 +import mlp
  17 +import sys
  18 +import utils
  19 +import dill
  20 +from collections import Counter
  21 +from gensim.models import LdaModel
  22 +from sklearn.decomposition import PCA
  23 +
  24 +
  25 +
  26 +# In[3]:
  27 +
  28 +#30_50_50_150_0.0001
  29 +
  30 +# In[4]:
  31 +
  32 +#db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True)
  33 +origin_corps=shelve.open("{}".format(sys.argv[2]))
  34 +in_dir = sys.argv[1]
  35 +if len(sys.argv) > 3 :
  36 + features_key = sys.argv[3]
  37 +else :
  38 + features_key = "LDA"
  39 +
  40 +out_db=shelve.open("{}/pca_scores.shelve".format(in_dir),writeback=True)
  41 +mlp_h = [ 250, 250 ]
  42 +mlp_loss = "categorical_crossentropy"
  43 +mlp_dropouts = [0.25]* len(mlp_h)
  44 +mlp_sgd = Adam(lr=0.0001)
  45 +mlp_epochs = 3000
  46 +mlp_batch_size = 5
  47 +mlp_input_activation = "relu"
  48 +mlp_output_activation="softmax"
  49 +
  50 +ress = []
  51 +print
  52 +
  53 +for key in origin_corps[features_key].keys() :
  54 + print "#########" + key + "########"
  55 + dev_best =[]
  56 + test_best = []
  57 + test_max = []
  58 + pca = PCA(n_components=200, copy=True, whiten=True)
  59 + x_train_big = pca.fit_transform(origin_corps[features_key][key]["TRAIN"])
  60 + y_train =origin_corps["LABEL"][key]["TRAIN"]
  61 +
  62 +
  63 +
  64 + x_dev_big = pca.transform(origin_corps[features_key][key]["DEV"])
  65 + y_dev = origin_corps["LABEL"][key]["DEV"]
  66 +
  67 + x_test_big = pca.transform(origin_corps[features_key][key]["TEST"])
  68 + y_test = origin_corps["LABEL"][key]["TEST"]
  69 + for i in range(1,200):
  70 + x_train = x_train_big[:,:i]
  71 + x_dev = x_dev_big[:,:i]
  72 + x_test = x_test_big[:,:i]
  73 + print "xshape",x_train.shape
  74 + print "xdev", x_dev.shape
  75 + print "xtest",x_test.shape
  76 + res=mlp.train_mlp(x_train,y_train,
  77 + x_dev,y_dev,
  78 + x_test ,y_test,
  79 + mlp_h,dropouts=mlp_dropouts,sgd=mlp_sgd,
  80 + epochs=mlp_epochs,
  81 + batch_size=mlp_batch_size,
  82 + save_pred=False,keep_histo=False,
  83 + loss="categorical_crossentropy",fit_verbose=0)
  84 + arg_best = numpy.argmax(res[1])
  85 + dev_best.append(res[1][arg_best])
  86 + test_best.append(res[2][arg_best])
  87 + test_max.append(numpy.max(res[2]))
  88 + print dev_best[-1],test_best[-1]
  89 + out_db[key]=(res,(dev_best,test_best,test_max))
  90 + ress.append((key,dev_best,test_best,test_max))
  91 + out_db.sync()
  92 +
  93 +for el in ress :
  94 + print el
  95 +out_db.close()
  96 +origin_corps.close()