From 482306aa33024473a765db484c101119102b5140 Mon Sep 17 00:00:00 2001 From: Mathias Quillot Date: Thu, 2 Sep 2021 15:43:17 +0200 Subject: [PATCH] new utt2sub and sub2utt command --- volia/masseffect.py | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/volia/masseffect.py b/volia/masseffect.py index 3cbb813..c1be671 100644 --- a/volia/masseffect.py +++ b/volia/masseffect.py @@ -78,6 +78,48 @@ def changelabels(source: str, labels: str, outfile: str): core.data.write_line(",".join(splited), data_dict[key], out=f) +def converter(file: str, outtype: str, outfile: str): + data = core.data.read_id_values(file) + + with open(outfile, "w") as of: + for key in data: + splited = key.replace("\n", "").split(",") + masseffect_id = key.replace("\n", "") + kaldi_id = ",".join([splited[0], splited[1], splited[3]]) + if outtype == "masseffect2kaldi": + of.write(f"{masseffect_id} {kaldi_id}\n") + elif outtype == "kaldi2masseffect": + of.write(f"{kaldi_id} {masseffect_id}\n") + + +def utt2sub(self, file: str, subfile: str, outfile: str): + data = core.data.read_id_values(file) + keys = [key for key in data] + + data_sub = core.data.read_id_values(subfile) + keys_sub = [key for key in data_sub] + + with open(outfile) as of: + for key in keys: + subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)] + subkeys_str = " ".join(subkeys) + of.write(f"{key} {subkeys_str}") + + +def sub2utt(self, file: str, subfile: str, outfile: str): + data = core.data.read_id_values(file) + keys = [key for key in data] + + data_sub = core.data.read_id_values(subfile) + keys_sub = [key for key in data_sub] + + with open(outfile) as of: + for key in keys: + subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)] + for subkey in subkeys: + of.write(f"{subkey} {key}") + + if __name__ == '__main__': # Main parser parser = argparse.ArgumentParser(description="...") @@ -108,6 +150,31 @@ if __name__ == '__main__': parser_changelabels.add_argument("--outfile", required=True, type=str, help="Output file") parser_changelabels.set_defaults(which="changelabels") + # Create converter + parser_converter = subparsers.add_parser("converter", help="Create converter file") + parser_converter.add_argument("--file", + type=str, + required=True, + help="File with ids from which create converter.") + parser_converter.add_argument("--outtype", type=str, choices=["kaldi2masseffect", "masseffect2kaldi"]) + parser_converter.add_argument("--outfile", type=str, required=True, help="") + parser_converter.set_defaults(which="converter") + + # Create utt2sub + parser_utt2sub = subparsers.add_parser("utt2sub", help="generate utt2sub file") + parser_utt2sub.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids") + parser_utt2sub.add_argument("--subfile", required=True, type=str, help="features, list or labels file with sub ids") + parser_utt2sub.add_argument("--outfile", required=True, type=str, help="output file") + parser_utt2sub.set_defaults(which="utt2sub") + + # Create sub2utt + parser_sub2utt = subparsers.add_parser("sub2utt", help="generate sub2utt file") + parser_sub2utt.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids") + parser_sub2utt.add_argument("--subfile", required=True, type=str, help="features, list or labels file sub ids") + parser_sub2utt.add_argument("--outfile", required=True, type=str, help="output file") + parser_sub2utt.set_defaults(which="sub2utt") + + # Parse args = parser.parse_args() @@ -116,7 +183,11 @@ if __name__ == '__main__': "utt2char" : utt2char, "char2utt": char2utt, "wavscp": wavscp, - "changelabels": changelabels + "changelabels": changelabels, + "converter": converter, + "utt2sub": utt2sub, + "sub2utt": sub2utt }) runner.run(args.which, args.__dict__, remove="which") + -- 1.8.2.3