Blame view
volia/masseffect.py
5.23 KB
62fc82e59
|
1 |
import argparse |
a7a92c6a2
|
2 |
from os import path |
62fc82e59
|
3 4 |
import core.data from utils import SubCommandRunner |
a7a92c6a2
|
5 |
import os |
62fc82e59
|
6 |
|
233b7d451
|
7 |
|
62fc82e59
|
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
def utt2char(features: str, outfile: str): """Allow the user to generate utt2char file from masseffect features file. TODO: Don't forget to manage two cases: one with old ids, and an other with new ones. Args: features (str): [description] outfile (str): [description] """ data = core.data.read_features(features) keys = list(data.keys()) with open(outfile, "w") as f: for key in keys: splited = key.replace(" ", "").split(",") character = splited[1] f.write(",".join(splited) + " " + character + " ") |
5fe43711c
|
28 |
|
62fc82e59
|
29 30 31 32 |
def char2utt(features: str, outfile: str): raise Exception("Not implemented yet") pass |
a7a92c6a2
|
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
def wavscp(datadir: str, outfile: str): """Generate the masseffect wav scp file from the directories. Args: datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available outfile (str): path of the wav scp output file Raises: Exception: if one of the directory is not available """ en_us_dir = os.path.join(datadir, "audio_en-us") fr_fr_dir = os.path.join(datadir, "audio_fr-fr") if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)): raise Exception("Directory audio_en-us or audio_fr-fr does not exist") _,_,filenames_en=next(os.walk(en_us_dir)) # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ] dir_en = [ en_us_dir for f in filenames_en ] _,_,filenames_fr=next(os.walk(fr_fr_dir)) dir_fr = [ fr_fr_dir for f in filenames_fr ] # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ] directories = dir_en + dir_fr filenames = filenames_en + filenames_fr |
a7a92c6a2
|
58 59 60 61 62 63 64 65 66 67 |
with open(outfile, "w") as f: for i, fn in enumerate(filenames): splited = fn.split(".")[0].split(",") lang = splited[0] character = splited[1] record_id = splited[3] path = os.path.join(directories[i], fn) f.write(f"{lang},{character},{record_id} {path} ") |
233b7d451
|
68 69 70 71 72 73 74 75 |
def changelabels(source: str, labels: str, outfile: str): data_dict = core.data.read_id_values(source) labels_dict = core.data.read_labels(labels) keys = list(data_dict.keys()) with open(outfile, "w") as f: for key in keys: splited = key.split(",") |
5fe43711c
|
76 77 |
splited[1] = labels_dict[key][0] core.data.write_line(",".join(splited), data_dict[key], out=f) |
233b7d451
|
78 |
|
f774442a8
|
79 80 81 82 83 84 85 86 87 88 |
def converter(file: str, outtype: str, outfile: str): data = core.data.read_id_values(file) with open(outfile, "w") as of: for key in data: splited = key.replace(" ", "").split(",") of.write(key.replace(" ", "") + " " + ",".join([splited[0], splited[1], splited[3]]) + " ") |
62fc82e59
|
89 90 91 92 93 94 |
if __name__ == '__main__': # Main parser parser = argparse.ArgumentParser(description="...") subparsers = parser.add_subparsers(title="action") # utt2char |
a7a92c6a2
|
95 |
parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file") |
62fc82e59
|
96 97 98 99 100 |
parser_utt2char.add_argument("--features", type=str, help="features file") parser_utt2char.add_argument("--outfile", type=str, help="output file") parser_utt2char.set_defaults(which="utt2char") # char2utt |
a7a92c6a2
|
101 |
parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file") |
62fc82e59
|
102 103 104 |
parser_char2utt.add_argument("--features", type=str, help="features file") parser_char2utt.add_argument("--outfile", type=str, help="output file") parser_char2utt.set_defaults(which="char2utt") |
a7a92c6a2
|
105 106 107 108 109 |
# wavscp parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file") parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect") parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file") parser_wavscp.set_defaults(which="wavscp") |
62fc82e59
|
110 |
|
233b7d451
|
111 112 113 114 115 116 |
# Change labels parser_changelabels = subparsers.add_parser("changelabels", help="...") parser_changelabels.add_argument("--source", required=True, type=str, help="source file where we want to change ids.") parser_changelabels.add_argument("--labels", required=True, type=str, help="file with labels") parser_changelabels.add_argument("--outfile", required=True, type=str, help="Output file") parser_changelabels.set_defaults(which="changelabels") |
f774442a8
|
117 118 119 120 121 122 123 124 125 |
# Create converter parser_converter = subparsers.add_parser("converter", help="Create converter file") parser_converter.add_argument("--file", type=str, required=True, help="File with ids from which create converter.") parser_converter.add_argument("--outtype", type=str, choices=["complet", "kaldi"]) parser_converter.add_argument("--outfile", type=str, required=True, help="") parser_converter.set_defaults(which="converter") |
62fc82e59
|
126 127 128 129 130 131 132 |
# Parse args = parser.parse_args() # Run commands runner = SubCommandRunner({ "utt2char" : utt2char, "char2utt": char2utt, |
233b7d451
|
133 |
"wavscp": wavscp, |
f774442a8
|
134 135 |
"changelabels": changelabels, "converter": converter |
62fc82e59
|
136 |
}) |
5fe43711c
|
137 |
runner.run(args.which, args.__dict__, remove="which") |