diff --git a/volia/masseffect.py b/volia/masseffect.py index 8828dbd..fbe55b3 100644 --- a/volia/masseffect.py +++ b/volia/masseffect.py @@ -92,6 +92,34 @@ def converter(file: str, outtype: str, outfile: str): of.write(f"{kaldi_id} {masseffect_id}\n") +def utt2sub(self, file: str, subfile: str, outfile: str): + data = core.data.read_id_values(file) + keys = [key for key in data] + + data_sub = core.data.read_id_values(subfile) + keys_sub = [key for key in data_sub] + + with open(outfile) as of: + for key in keys: + subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)] + subkeys_str = " ".join(subkeys) + of.write(f"{key} {subkeys_str}") + + +def sub2utt(self, file: str, subfile: str, outfile: str): + data = core.data.read_id_values(file) + keys = [key for key in data] + + data_sub = core.data.read_id_values(subfile) + keys_sub = [key for key in data_sub] + + with open(outfile) as of: + for key in keys: + subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)] + for subkey in subkeys: + of.write(f"{subkey} {key}") + + if __name__ == '__main__': # Main parser parser = argparse.ArgumentParser(description="...") @@ -132,6 +160,20 @@ if __name__ == '__main__': parser_converter.add_argument("--outfile", type=str, required=True, help="") parser_converter.set_defaults(which="converter") + # Create utt2sub + parser_utt2sub = subparsers.add_parser("utt2sub", help="generate utt2sub file") + parser_utt2sub.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids") + parser_utt2sub.add_argument("--subfile", required=True, type=str, help="features, list or labels file with sub ids") + parser_utt2sub.add_argument("--outfile", required=True, type=str, help="output file") + parser_utt2sub.set_defaults(which="utt2sub") + + # Create sub2utt + parser_sub2utt = subparsers.add_parser("sub2utt", help="generate sub2utt file") + parser_sub2utt.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids") + parser_sub2utt.add_argument("--subfile", required=True, type=str, help="features, list or labels file sub ids") + parser_sub2utt.add_argument("--outfile", required=True, type=str, help="output file") + parser_sub2utt.set_defaults(which="sub2utt") + # Parse args = parser.parse_args() @@ -142,7 +184,9 @@ if __name__ == '__main__': "char2utt": char2utt, "wavscp": wavscp, "changelabels": changelabels, - "converter": converter + "converter": converter, + "utt2sub": utt2sub, + "sub2utt": sub2utt }) runner.run(args.which, args.__dict__, remove="which")