Killian / decodopr

Browse Code »

Commit 128365a4fbbb1e8ec99a742ab7d462b7467e584e

Authored by Killian 2016-09-23 08:58:51 +0200

1 parent 2af8e57f4e

Exists in master

ajout pca

Showing 1 changed file with 96 additions and 0 deletions Side-by-side Diff

LDA/04f-pca.py

LDA/04f-pca.py

Diff comments View file @ 128365a

	1	+
	2	+# coding: utf-8
	3	+
	4	+# In[29]:
	5	+
	6	+# Import
	7	+import itertools
	8	+import shelve
	9	+import pickle
	10	+import numpy
	11	+import scipy
	12	+from scipy import sparse
	13	+import scipy.sparse
	14	+import scipy.io
	15	+from mlp import *
	16	+import mlp
	17	+import sys
	18	+import utils
	19	+import dill
	20	+from collections import Counter
	21	+from gensim.models import LdaModel
	22	+from sklearn.decomposition import PCA
	23	+
	24	+
	25	+
	26	+# In[3]:
	27	+
	28	+#30_50_50_150_0.0001
	29	+
	30	+# In[4]:
	31	+
	32	+#db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True)
	33	+origin_corps=shelve.open("{}".format(sys.argv[2]))
	34	+in_dir = sys.argv[1]
	35	+if len(sys.argv) > 3 :
	36	+ features_key = sys.argv[3]
	37	+else :
	38	+ features_key = "LDA"
	39	+
	40	+out_db=shelve.open("{}/pca_scores.shelve".format(in_dir),writeback=True)
	41	+mlp_h = [ 250, 250 ]
	42	+mlp_loss = "categorical_crossentropy"
	43	+mlp_dropouts = [0.25]* len(mlp_h)
	44	+mlp_sgd = Adam(lr=0.0001)
	45	+mlp_epochs = 3000
	46	+mlp_batch_size = 5
	47	+mlp_input_activation = "relu"
	48	+mlp_output_activation="softmax"
	49	+
	50	+ress = []
	51	+print
	52	+
	53	+for key in origin_corps[features_key].keys() :
	54	+ print "#########" + key + "########"
	55	+ dev_best =[]
	56	+ test_best = []
	57	+ test_max = []
	58	+ pca = PCA(n_components=200, copy=True, whiten=True)
	59	+ x_train_big = pca.fit_transform(origin_corps[features_key][key]["TRAIN"])
	60	+ y_train =origin_corps["LABEL"][key]["TRAIN"]
	61	+
	62	+
	63	+
	64	+ x_dev_big = pca.transform(origin_corps[features_key][key]["DEV"])
	65	+ y_dev = origin_corps["LABEL"][key]["DEV"]
	66	+
	67	+ x_test_big = pca.transform(origin_corps[features_key][key]["TEST"])
	68	+ y_test = origin_corps["LABEL"][key]["TEST"]
	69	+ for i in range(1,200):
	70	+ x_train = x_train_big[:,:i]
	71	+ x_dev = x_dev_big[:,:i]
	72	+ x_test = x_test_big[:,:i]
	73	+ print "xshape",x_train.shape
	74	+ print "xdev", x_dev.shape
	75	+ print "xtest",x_test.shape
	76	+ res=mlp.train_mlp(x_train,y_train,
	77	+ x_dev,y_dev,
	78	+ x_test ,y_test,
	79	+ mlp_h,dropouts=mlp_dropouts,sgd=mlp_sgd,
	80	+ epochs=mlp_epochs,
	81	+ batch_size=mlp_batch_size,
	82	+ save_pred=False,keep_histo=False,
	83	+ loss="categorical_crossentropy",fit_verbose=0)
	84	+ arg_best = numpy.argmax(res[1])
	85	+ dev_best.append(res[1][arg_best])
	86	+ test_best.append(res[2][arg_best])
	87	+ test_max.append(numpy.max(res[2]))
	88	+ print dev_best[-1],test_best[-1]
	89	+ out_db[key]=(res,(dev_best,test_best,test_max))
	90	+ ress.append((key,dev_best,test_best,test_max))
	91	+ out_db.sync()
	92	+
	93	+for el in ress :
	94	+ print el
	95	+out_db.close()
	96	+origin_corps.close()