diff --git a/volia/masseffect.py b/volia/masseffect.py index 8dc1467..0163eea 100644 --- a/volia/masseffect.py +++ b/volia/masseffect.py @@ -92,32 +92,30 @@ def converter(file: str, outtype: str, outfile: str): of.write(f"{kaldi_id} {masseffect_id}\n") -def utt2sub(file: str, subfile: str, outfile: str): +def utt2sub(file: str, outfile: str): data = core.data.read_id_values(file) keys = [key for key in data] - data_sub = core.data.read_id_values(subfile) - keys_sub = [key for key in data_sub] - with open(outfile, "w") as of: - for key in keys: - subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)] - subkeys_str = " ".join(subkeys) + key_2_subkeys = {} + for subkey in keys: + key = subkey.replace(" ", "").replace("\n", "").split("_")[:-1] + key_2_subkeys[key] = [] + key_2_subkeys[key].append(subkey) + + for key in key_2_subkeys: + subkeys_str = " ".join(key_2_subkeys[key]) of.write(f"{key} {subkeys_str}\n") -def sub2utt(file: str, subfile: str, outfile: str): +def sub2utt(file: str, outfile: str): data = core.data.read_id_values(file) keys = [key for key in data] - data_sub = core.data.read_id_values(subfile) - keys_sub = [key for key in data_sub] - with open(outfile, "w") as of: - for key in keys: - subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)] - for subkey in subkeys: - of.write(f"{subkey} {key}\n") + for subkey in keys: + key = subkey.replace(" ", "").replace("\n", "").split("_")[:-1] + of.write(f"{subkey} {key}\n") if __name__ == '__main__': @@ -162,15 +160,13 @@ if __name__ == '__main__': # Create utt2sub parser_utt2sub = subparsers.add_parser("utt2sub", help="generate utt2sub file") - parser_utt2sub.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids") - parser_utt2sub.add_argument("--subfile", required=True, type=str, help="features, list or labels file with sub ids") + parser_utt2sub.add_argument("--file", required=True, type=str, help="features, list or labels file with sub ids") parser_utt2sub.add_argument("--outfile", required=True, type=str, help="output file") parser_utt2sub.set_defaults(which="utt2sub") # Create sub2utt parser_sub2utt = subparsers.add_parser("sub2utt", help="generate sub2utt file") - parser_sub2utt.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids") - parser_sub2utt.add_argument("--subfile", required=True, type=str, help="features, list or labels file sub ids") + parser_sub2utt.add_argument("--file", required=True, type=str, help="features, list or labels file with sub ids") parser_sub2utt.add_argument("--outfile", required=True, type=str, help="output file") parser_sub2utt.set_defaults(which="sub2utt")