Killian / decodopr

Blame view

LDA/00-mmf_make_features.py 1.39 KB

7db73861f Killian add vae et mmf	1 2 3 4 5 6 7 8 9 10 11 12 13	import sys import os import pandas import numpy import shelve from sklearn.preprocessing import LabelBinarizer from utils import select_mmf as select input_dir = sys.argv[1] # Dossier de premire niveau contient ASR et TRS level = sys.argv[2] # taille de LDA ( -5) voulu
e5108393c Killian replace du mlp.p...	14	output_dir = sys.argv[3]
7db73861f Killian add vae et mmf	15 16 17	lb=LabelBinarizer() #y_train=lb.fit_transform([utils.select(ligneid) for ligneid in origin_corps["LABEL"]["TRAIN"]])
e5108393c Killian replace du mlp.p...	18 19 20 21	data = shelve.open("{}/mmf_{}.shelve".format(output_dir,level),writeback=True) data["LABEL"]= {} data["LDA"] = {"ASR":{},"TRS":{}} for mod in ["ASR", "TRS" ]:
7db73861f Killian add vae et mmf	22 23 24 25 26 27 28 29 30	train = pandas.read_table("{}/{}/train_{}.ssv".format(input_dir, mod, level), sep=" ", header=None ) dev = pandas.read_table("{}/{}/dev_{}.ssv".format(input_dir, mod, level), sep=" ", header=None ) test = pandas.read_table("{}/{}/test_{}.ssv".format(input_dir, mod, level), sep=" ", header=None ) y_train = train.iloc[:,0].apply(select) y_dev = dev.iloc[:,0].apply(select) y_test = test.iloc[:,0].apply(select) lb.fit(y_train) data["LABEL"][mod]={"TRAIN":lb.transform(y_train),"DEV":lb.transform(y_dev), "TEST": lb.transform(y_test)}
e5108393c Killian replace du mlp.p...	31 32 33 34 35 36	# data["LDA"][mod]={'ASR':[]} print data["LDA"][mod] print train.values data["LDA"][mod]["TRAIN"]=train.iloc[:,1:-1].values data["LDA"][mod]["DEV"]=dev.iloc[:,1:-1].values data["LDA"][mod]["TEST"]=test.iloc[:,1:-1].values
7db73861f Killian add vae et mmf	37 38 39	data.sync() data.close()