Commit 128365a4fbbb1e8ec99a742ab7d462b7467e584e
1 parent
2af8e57f4e
Exists in
master
ajout pca
Showing 1 changed file with 96 additions and 0 deletions Side-by-side Diff
LDA/04f-pca.py
1 | + | |
2 | +# coding: utf-8 | |
3 | + | |
4 | +# In[29]: | |
5 | + | |
6 | +# Import | |
7 | +import itertools | |
8 | +import shelve | |
9 | +import pickle | |
10 | +import numpy | |
11 | +import scipy | |
12 | +from scipy import sparse | |
13 | +import scipy.sparse | |
14 | +import scipy.io | |
15 | +from mlp import * | |
16 | +import mlp | |
17 | +import sys | |
18 | +import utils | |
19 | +import dill | |
20 | +from collections import Counter | |
21 | +from gensim.models import LdaModel | |
22 | +from sklearn.decomposition import PCA | |
23 | + | |
24 | + | |
25 | + | |
26 | +# In[3]: | |
27 | + | |
28 | +#30_50_50_150_0.0001 | |
29 | + | |
30 | +# In[4]: | |
31 | + | |
32 | +#db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True) | |
33 | +origin_corps=shelve.open("{}".format(sys.argv[2])) | |
34 | +in_dir = sys.argv[1] | |
35 | +if len(sys.argv) > 3 : | |
36 | + features_key = sys.argv[3] | |
37 | +else : | |
38 | + features_key = "LDA" | |
39 | + | |
40 | +out_db=shelve.open("{}/pca_scores.shelve".format(in_dir),writeback=True) | |
41 | +mlp_h = [ 250, 250 ] | |
42 | +mlp_loss = "categorical_crossentropy" | |
43 | +mlp_dropouts = [0.25]* len(mlp_h) | |
44 | +mlp_sgd = Adam(lr=0.0001) | |
45 | +mlp_epochs = 3000 | |
46 | +mlp_batch_size = 5 | |
47 | +mlp_input_activation = "relu" | |
48 | +mlp_output_activation="softmax" | |
49 | + | |
50 | +ress = [] | |
51 | ||
52 | + | |
53 | +for key in origin_corps[features_key].keys() : | |
54 | + print "#########" + key + "########" | |
55 | + dev_best =[] | |
56 | + test_best = [] | |
57 | + test_max = [] | |
58 | + pca = PCA(n_components=200, copy=True, whiten=True) | |
59 | + x_train_big = pca.fit_transform(origin_corps[features_key][key]["TRAIN"]) | |
60 | + y_train =origin_corps["LABEL"][key]["TRAIN"] | |
61 | + | |
62 | + | |
63 | + | |
64 | + x_dev_big = pca.transform(origin_corps[features_key][key]["DEV"]) | |
65 | + y_dev = origin_corps["LABEL"][key]["DEV"] | |
66 | + | |
67 | + x_test_big = pca.transform(origin_corps[features_key][key]["TEST"]) | |
68 | + y_test = origin_corps["LABEL"][key]["TEST"] | |
69 | + for i in range(1,200): | |
70 | + x_train = x_train_big[:,:i] | |
71 | + x_dev = x_dev_big[:,:i] | |
72 | + x_test = x_test_big[:,:i] | |
73 | + print "xshape",x_train.shape | |
74 | + print "xdev", x_dev.shape | |
75 | + print "xtest",x_test.shape | |
76 | + res=mlp.train_mlp(x_train,y_train, | |
77 | + x_dev,y_dev, | |
78 | + x_test ,y_test, | |
79 | + mlp_h,dropouts=mlp_dropouts,sgd=mlp_sgd, | |
80 | + epochs=mlp_epochs, | |
81 | + batch_size=mlp_batch_size, | |
82 | + save_pred=False,keep_histo=False, | |
83 | + loss="categorical_crossentropy",fit_verbose=0) | |
84 | + arg_best = numpy.argmax(res[1]) | |
85 | + dev_best.append(res[1][arg_best]) | |
86 | + test_best.append(res[2][arg_best]) | |
87 | + test_max.append(numpy.max(res[2])) | |
88 | + print dev_best[-1],test_best[-1] | |
89 | + out_db[key]=(res,(dev_best,test_best,test_max)) | |
90 | + ress.append((key,dev_best,test_best,test_max)) | |
91 | + out_db.sync() | |
92 | + | |
93 | +for el in ress : | |
94 | + print el | |
95 | +out_db.close() | |
96 | +origin_corps.close() |