Commit 128365a4fbbb1e8ec99a742ab7d462b7467e584e

Authored by Killian
1 parent 2af8e57f4e
Exists in master

ajout pca

Showing 1 changed file with 96 additions and 0 deletions Inline Diff

File was created 1
2 # coding: utf-8
3
4 # In[29]:
5
6 # Import
7 import itertools
8 import shelve
9 import pickle
10 import numpy
11 import scipy
12 from scipy import sparse
13 import scipy.sparse
14 import scipy.io
15 from mlp import *
16 import mlp
17 import sys
18 import utils
19 import dill
20 from collections import Counter
21 from gensim.models import LdaModel
22 from sklearn.decomposition import PCA
23
24
25
26 # In[3]:
27
28 #30_50_50_150_0.0001
29
30 # In[4]:
31
32 #db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True)
33 origin_corps=shelve.open("{}".format(sys.argv[2]))
34 in_dir = sys.argv[1]
35 if len(sys.argv) > 3 :
36 features_key = sys.argv[3]
37 else :
38 features_key = "LDA"
39
40 out_db=shelve.open("{}/pca_scores.shelve".format(in_dir),writeback=True)
41 mlp_h = [ 250, 250 ]
42 mlp_loss = "categorical_crossentropy"
43 mlp_dropouts = [0.25]* len(mlp_h)
44 mlp_sgd = Adam(lr=0.0001)
45 mlp_epochs = 3000
46 mlp_batch_size = 5
47 mlp_input_activation = "relu"
48 mlp_output_activation="softmax"
49
50 ress = []
51 print
52
53 for key in origin_corps[features_key].keys() :
54 print "#########" + key + "########"
55 dev_best =[]
56 test_best = []
57 test_max = []
58 pca = PCA(n_components=200, copy=True, whiten=True)
59 x_train_big = pca.fit_transform(origin_corps[features_key][key]["TRAIN"])
60 y_train =origin_corps["LABEL"][key]["TRAIN"]
61
62
63
64 x_dev_big = pca.transform(origin_corps[features_key][key]["DEV"])
65 y_dev = origin_corps["LABEL"][key]["DEV"]
66
67 x_test_big = pca.transform(origin_corps[features_key][key]["TEST"])
68 y_test = origin_corps["LABEL"][key]["TEST"]
69 for i in range(1,200):
70 x_train = x_train_big[:,:i]
71 x_dev = x_dev_big[:,:i]
72 x_test = x_test_big[:,:i]
73 print "xshape",x_train.shape
74 print "xdev", x_dev.shape
75 print "xtest",x_test.shape
76 res=mlp.train_mlp(x_train,y_train,
77 x_dev,y_dev,
78 x_test ,y_test,
79 mlp_h,dropouts=mlp_dropouts,sgd=mlp_sgd,
80 epochs=mlp_epochs,
81 batch_size=mlp_batch_size,
82 save_pred=False,keep_histo=False,
83 loss="categorical_crossentropy",fit_verbose=0)
84 arg_best = numpy.argmax(res[1])
85 dev_best.append(res[1][arg_best])
86 test_best.append(res[2][arg_best])
87 test_max.append(numpy.max(res[2]))
88 print dev_best[-1],test_best[-1]
89 out_db[key]=(res,(dev_best,test_best,test_max))
90 ress.append((key,dev_best,test_best,test_max))
91 out_db.sync()
92
93 for el in ress :
94 print el
95 out_db.close()
96 origin_corps.close()
97