Killian / decodopr

Browse Code »

Commit 128365a4fbbb1e8ec99a742ab7d462b7467e584e

Authored by Killian 2016-09-23 08:58:51 +0200

1 parent 2af8e57f4e

Exists in master

ajout pca

Showing 1 changed file with 96 additions and 0 deletions Inline Diff

LDA/04f-pca.py

LDA/04f-pca.py

Diff comments View file @ 128365a

File was created	1
	2	# coding: utf-8
	3
	4	# In[29]:
	5
	6	# Import
	7	import itertools
	8	import shelve
	9	import pickle
	10	import numpy
	11	import scipy
	12	from scipy import sparse
	13	import scipy.sparse
	14	import scipy.io
	15	from mlp import *
	16	import mlp
	17	import sys
	18	import utils
	19	import dill
	20	from collections import Counter
	21	from gensim.models import LdaModel
	22	from sklearn.decomposition import PCA
	23
	24
	25
	26	# In[3]:
	27
	28	#30_50_50_150_0.0001
	29
	30	# In[4]:
	31
	32	#db=shelve.open("SPELIKE_MLP_DB.shelve",writeback=True)
	33	origin_corps=shelve.open("{}".format(sys.argv[2]))
	34	in_dir = sys.argv[1]
	35	if len(sys.argv) > 3 :
	36	features_key = sys.argv[3]
	37	else :
	38	features_key = "LDA"
	39
	40	out_db=shelve.open("{}/pca_scores.shelve".format(in_dir),writeback=True)
	41	mlp_h = [ 250, 250 ]
	42	mlp_loss = "categorical_crossentropy"
	43	mlp_dropouts = [0.25]* len(mlp_h)
	44	mlp_sgd = Adam(lr=0.0001)
	45	mlp_epochs = 3000
	46	mlp_batch_size = 5
	47	mlp_input_activation = "relu"
	48	mlp_output_activation="softmax"
	49
	50	ress = []
	51	print
	52
	53	for key in origin_corps[features_key].keys() :
	54	print "#########" + key + "########"
	55	dev_best =[]
	56	test_best = []
	57	test_max = []
	58	pca = PCA(n_components=200, copy=True, whiten=True)
	59	x_train_big = pca.fit_transform(origin_corps[features_key][key]["TRAIN"])
	60	y_train =origin_corps["LABEL"][key]["TRAIN"]
	61
	62
	63
	64	x_dev_big = pca.transform(origin_corps[features_key][key]["DEV"])
	65	y_dev = origin_corps["LABEL"][key]["DEV"]
	66
	67	x_test_big = pca.transform(origin_corps[features_key][key]["TEST"])
	68	y_test = origin_corps["LABEL"][key]["TEST"]
	69	for i in range(1,200):
	70	x_train = x_train_big[:,:i]
	71	x_dev = x_dev_big[:,:i]
	72	x_test = x_test_big[:,:i]
	73	print "xshape",x_train.shape
	74	print "xdev", x_dev.shape
	75	print "xtest",x_test.shape
	76	res=mlp.train_mlp(x_train,y_train,
	77	x_dev,y_dev,
	78	x_test ,y_test,
	79	mlp_h,dropouts=mlp_dropouts,sgd=mlp_sgd,
	80	epochs=mlp_epochs,
	81	batch_size=mlp_batch_size,
	82	save_pred=False,keep_histo=False,
	83	loss="categorical_crossentropy",fit_verbose=0)
	84	arg_best = numpy.argmax(res[1])
	85	dev_best.append(res[1][arg_best])
	86	test_best.append(res[2][arg_best])
	87	test_max.append(numpy.max(res[2]))
	88	print dev_best[-1],test_best[-1]
	89	out_db[key]=(res,(dev_best,test_best,test_max))
	90	ress.append((key,dev_best,test_best,test_max))
	91	out_db.sync()
	92
	93	for el in ress :
	94	print el
	95	out_db.close()
	96	origin_corps.close()
	97