diff --git a/LDA/04f-pca.py b/LDA/04f-pca.py new file mode 100644 index 0000000..458493c --- /dev/null +++ b/LDA/04f-pca.py @@ -0,0 +1,96 @@ + +# coding: utf-8 + +# In[29]: + +# Import +import itertools +import shelve +import pickle +import numpy +import scipy +from scipy import sparse +import scipy.sparse +import scipy.io +from mlp import * +import mlp +import sys +import utils +import dill +from collections import Counter +from gensim.models import LdaModel +from sklearn.decomposition import PCA + + + +# In[3]: + +#30_50_50_150_0.0001 + +# In[4]: + +#db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True) +origin_corps=shelve.open("{}".format(sys.argv[2])) +in_dir = sys.argv[1] +if len(sys.argv) > 3 : + features_key = sys.argv[3] +else : + features_key = "LDA" + +out_db=shelve.open("{}/pca_scores.shelve".format(in_dir),writeback=True) +mlp_h = [ 250, 250 ] +mlp_loss = "categorical_crossentropy" +mlp_dropouts = [0.25]* len(mlp_h) +mlp_sgd = Adam(lr=0.0001) +mlp_epochs = 3000 +mlp_batch_size = 5 +mlp_input_activation = "relu" +mlp_output_activation="softmax" + +ress = [] +print + +for key in origin_corps[features_key].keys() : + print "#########" + key + "########" + dev_best =[] + test_best = [] + test_max = [] + pca = PCA(n_components=200, copy=True, whiten=True) + x_train_big = pca.fit_transform(origin_corps[features_key][key]["TRAIN"]) + y_train =origin_corps["LABEL"][key]["TRAIN"] + + + + x_dev_big = pca.transform(origin_corps[features_key][key]["DEV"]) + y_dev = origin_corps["LABEL"][key]["DEV"] + + x_test_big = pca.transform(origin_corps[features_key][key]["TEST"]) + y_test = origin_corps["LABEL"][key]["TEST"] + for i in range(1,200): + x_train = x_train_big[:,:i] + x_dev = x_dev_big[:,:i] + x_test = x_test_big[:,:i] + print "xshape",x_train.shape + print "xdev", x_dev.shape + print "xtest",x_test.shape + res=mlp.train_mlp(x_train,y_train, + x_dev,y_dev, + x_test ,y_test, + mlp_h,dropouts=mlp_dropouts,sgd=mlp_sgd, + epochs=mlp_epochs, + batch_size=mlp_batch_size, + save_pred=False,keep_histo=False, + loss="categorical_crossentropy",fit_verbose=0) + arg_best = numpy.argmax(res[1]) + dev_best.append(res[1][arg_best]) + test_best.append(res[2][arg_best]) + test_max.append(numpy.max(res[2])) + print dev_best[-1],test_best[-1] + out_db[key]=(res,(dev_best,test_best,test_max)) + ress.append((key,dev_best,test_best,test_max)) + out_db.sync() + +for el in ress : + print el +out_db.close() +origin_corps.close()