Commit 128365a4fbbb1e8ec99a742ab7d462b7467e584e
1 parent
2af8e57f4e
Exists in
master
ajout pca
Showing 1 changed file with 96 additions and 0 deletions Inline Diff
LDA/04f-pca.py
File was created | 1 | ||
2 | # coding: utf-8 | ||
3 | |||
4 | # In[29]: | ||
5 | |||
6 | # Import | ||
7 | import itertools | ||
8 | import shelve | ||
9 | import pickle | ||
10 | import numpy | ||
11 | import scipy | ||
12 | from scipy import sparse | ||
13 | import scipy.sparse | ||
14 | import scipy.io | ||
15 | from mlp import * | ||
16 | import mlp | ||
17 | import sys | ||
18 | import utils | ||
19 | import dill | ||
20 | from collections import Counter | ||
21 | from gensim.models import LdaModel | ||
22 | from sklearn.decomposition import PCA | ||
23 | |||
24 | |||
25 | |||
26 | # In[3]: | ||
27 | |||
28 | #30_50_50_150_0.0001 | ||
29 | |||
30 | # In[4]: | ||
31 | |||
32 | #db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True) | ||
33 | origin_corps=shelve.open("{}".format(sys.argv[2])) | ||
34 | in_dir = sys.argv[1] | ||
35 | if len(sys.argv) > 3 : | ||
36 | features_key = sys.argv[3] | ||
37 | else : | ||
38 | features_key = "LDA" | ||
39 | |||
40 | out_db=shelve.open("{}/pca_scores.shelve".format(in_dir),writeback=True) | ||
41 | mlp_h = [ 250, 250 ] | ||
42 | mlp_loss = "categorical_crossentropy" | ||
43 | mlp_dropouts = [0.25]* len(mlp_h) | ||
44 | mlp_sgd = Adam(lr=0.0001) | ||
45 | mlp_epochs = 3000 | ||
46 | mlp_batch_size = 5 | ||
47 | mlp_input_activation = "relu" | ||
48 | mlp_output_activation="softmax" | ||
49 | |||
50 | ress = [] | ||
51 | |||
52 | |||
53 | for key in origin_corps[features_key].keys() : | ||
54 | print "#########" + key + "########" | ||
55 | dev_best =[] | ||
56 | test_best = [] | ||
57 | test_max = [] | ||
58 | pca = PCA(n_components=200, copy=True, whiten=True) | ||
59 | x_train_big = pca.fit_transform(origin_corps[features_key][key]["TRAIN"]) | ||
60 | y_train =origin_corps["LABEL"][key]["TRAIN"] | ||
61 | |||
62 | |||
63 | |||
64 | x_dev_big = pca.transform(origin_corps[features_key][key]["DEV"]) | ||
65 | y_dev = origin_corps["LABEL"][key]["DEV"] | ||
66 | |||
67 | x_test_big = pca.transform(origin_corps[features_key][key]["TEST"]) | ||
68 | y_test = origin_corps["LABEL"][key]["TEST"] | ||
69 | for i in range(1,200): | ||
70 | x_train = x_train_big[:,:i] | ||
71 | x_dev = x_dev_big[:,:i] | ||
72 | x_test = x_test_big[:,:i] | ||
73 | print "xshape",x_train.shape | ||
74 | print "xdev", x_dev.shape | ||
75 | print "xtest",x_test.shape | ||
76 | res=mlp.train_mlp(x_train,y_train, | ||
77 | x_dev,y_dev, | ||
78 | x_test ,y_test, | ||
79 | mlp_h,dropouts=mlp_dropouts,sgd=mlp_sgd, | ||
80 | epochs=mlp_epochs, | ||
81 | batch_size=mlp_batch_size, | ||
82 | save_pred=False,keep_histo=False, | ||
83 | loss="categorical_crossentropy",fit_verbose=0) | ||
84 | arg_best = numpy.argmax(res[1]) | ||
85 | dev_best.append(res[1][arg_best]) | ||
86 | test_best.append(res[2][arg_best]) | ||
87 | test_max.append(numpy.max(res[2])) | ||
88 | print dev_best[-1],test_best[-1] | ||
89 | out_db[key]=(res,(dev_best,test_best,test_max)) | ||
90 | ress.append((key,dev_best,test_best,test_max)) | ||
91 | out_db.sync() | ||
92 | |||
93 | for el in ress : | ||
94 | print el | ||
95 | out_db.close() | ||
96 | origin_corps.close() | ||
97 |