Blame view
LDA/00-mmf_make_features.py
1.39 KB
7db73861f add vae et mmf |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
import sys import os import pandas import numpy import shelve from sklearn.preprocessing import LabelBinarizer from utils import select_mmf as select input_dir = sys.argv[1] # Dossier de premire niveau contient ASR et TRS level = sys.argv[2] # taille de LDA ( -5) voulu |
e5108393c replace du mlp.p... |
14 |
output_dir = sys.argv[3] |
7db73861f add vae et mmf |
15 16 17 |
lb=LabelBinarizer() #y_train=lb.fit_transform([utils.select(ligneid) for ligneid in origin_corps["LABEL"]["TRAIN"]]) |
e5108393c replace du mlp.p... |
18 19 20 21 |
data = shelve.open("{}/mmf_{}.shelve".format(output_dir,level),writeback=True) data["LABEL"]= {} data["LDA"] = {"ASR":{},"TRS":{}} for mod in ["ASR", "TRS" ]: |
7db73861f add vae et mmf |
22 23 24 25 26 27 28 29 30 |
train = pandas.read_table("{}/{}/train_{}.ssv".format(input_dir, mod, level), sep=" ", header=None ) dev = pandas.read_table("{}/{}/dev_{}.ssv".format(input_dir, mod, level), sep=" ", header=None ) test = pandas.read_table("{}/{}/test_{}.ssv".format(input_dir, mod, level), sep=" ", header=None ) y_train = train.iloc[:,0].apply(select) y_dev = dev.iloc[:,0].apply(select) y_test = test.iloc[:,0].apply(select) lb.fit(y_train) data["LABEL"][mod]={"TRAIN":lb.transform(y_train),"DEV":lb.transform(y_dev), "TEST": lb.transform(y_test)} |
e5108393c replace du mlp.p... |
31 32 33 34 35 36 |
# data["LDA"][mod]={'ASR':[]} print data["LDA"][mod] print train.values data["LDA"][mod]["TRAIN"]=train.iloc[:,1:-1].values data["LDA"][mod]["DEV"]=dev.iloc[:,1:-1].values data["LDA"][mod]["TEST"]=test.iloc[:,1:-1].values |
7db73861f add vae et mmf |
37 38 39 |
data.sync() data.close() |