Blame view
LDA/00-mmf_make_features.py
1.26 KB
7db73861f add vae et mmf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import sys import os import pandas import numpy import shelve from sklearn.preprocessing import LabelBinarizer from utils import select_mmf as select input_dir = sys.argv[1] # Dossier de premire niveau contient ASR et TRS level = sys.argv[2] # taille de LDA ( -5) voulu lb=LabelBinarizer() #y_train=lb.fit_transform([utils.select(ligneid) for ligneid in origin_corps["LABEL"]["TRAIN"]]) data = shelve.open("{}/mmf_{}.shelve".format(input_dir,level)) data["LABEL"]= {"LDA":{}} for mod in ["ASR", "TRS" ] train = pandas.read_table("{}/{}/train_{}.ssv".format(input_dir, mod, level), sep=" ", header=None ) dev = pandas.read_table("{}/{}/dev_{}.ssv".format(input_dir, mod, level), sep=" ", header=None ) test = pandas.read_table("{}/{}/test_{}.ssv".format(input_dir, mod, level), sep=" ", header=None ) y_train = train.iloc[:,0].apply(select) y_dev = dev.iloc[:,0].apply(select) y_test = test.iloc[:,0].apply(select) lb.fit(y_train) data["LABEL"][mod]={"TRAIN":lb.transform(y_train),"DEV":lb.transform(y_dev), "TEST": lb.transform(y_test)} data["LDA"][mod]={} data["LDA"][mod]["TRAIN"]=train.iloc[:,1:].values data["LDA"][mod]["DEV"]=dev.iloc[:,1:].values data["LDA"][mod]["TEST"]=test.iloc[:,1:].values data.sync() data.close() |