Commit 34f649f9b05f9bd4da1d18d4324b56367c2b4f24
1 parent
44433f16b4
Exists in
master
Adding utt2sub and sub2utt commands
Showing 1 changed file with 45 additions and 1 deletions Side-by-side Diff
volia/masseffect.py
| ... | ... | @@ -92,6 +92,34 @@ |
| 92 | 92 | of.write(f"{kaldi_id} {masseffect_id}\n") |
| 93 | 93 | |
| 94 | 94 | |
| 95 | +def utt2sub(self, file: str, subfile: str, outfile: str): | |
| 96 | + data = core.data.read_id_values(file) | |
| 97 | + keys = [key for key in data] | |
| 98 | + | |
| 99 | + data_sub = core.data.read_id_values(subfile) | |
| 100 | + keys_sub = [key for key in data_sub] | |
| 101 | + | |
| 102 | + with open(outfile) as of: | |
| 103 | + for key in keys: | |
| 104 | + subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)] | |
| 105 | + subkeys_str = " ".join(subkeys) | |
| 106 | + of.write(f"{key} {subkeys_str}") | |
| 107 | + | |
| 108 | + | |
| 109 | +def sub2utt(self, file: str, subfile: str, outfile: str): | |
| 110 | + data = core.data.read_id_values(file) | |
| 111 | + keys = [key for key in data] | |
| 112 | + | |
| 113 | + data_sub = core.data.read_id_values(subfile) | |
| 114 | + keys_sub = [key for key in data_sub] | |
| 115 | + | |
| 116 | + with open(outfile) as of: | |
| 117 | + for key in keys: | |
| 118 | + subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)] | |
| 119 | + for subkey in subkeys: | |
| 120 | + of.write(f"{subkey} {key}") | |
| 121 | + | |
| 122 | + | |
| 95 | 123 | if __name__ == '__main__': |
| 96 | 124 | # Main parser |
| 97 | 125 | parser = argparse.ArgumentParser(description="...") |
| 98 | 126 | |
| ... | ... | @@ -132,7 +160,21 @@ |
| 132 | 160 | parser_converter.add_argument("--outfile", type=str, required=True, help="") |
| 133 | 161 | parser_converter.set_defaults(which="converter") |
| 134 | 162 | |
| 163 | + # Create utt2sub | |
| 164 | + parser_utt2sub = subparsers.add_parser("utt2sub", help="generate utt2sub file") | |
| 165 | + parser_utt2sub.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids") | |
| 166 | + parser_utt2sub.add_argument("--subfile", required=True, type=str, help="features, list or labels file with sub ids") | |
| 167 | + parser_utt2sub.add_argument("--outfile", required=True, type=str, help="output file") | |
| 168 | + parser_utt2sub.set_defaults(which="utt2sub") | |
| 135 | 169 | |
| 170 | + # Create sub2utt | |
| 171 | + parser_sub2utt = subparsers.add_parser("sub2utt", help="generate sub2utt file") | |
| 172 | + parser_sub2utt.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids") | |
| 173 | + parser_sub2utt.add_argument("--subfile", required=True, type=str, help="features, list or labels file sub ids") | |
| 174 | + parser_sub2utt.add_argument("--outfile", required=True, type=str, help="output file") | |
| 175 | + parser_sub2utt.set_defaults(which="sub2utt") | |
| 176 | + | |
| 177 | + | |
| 136 | 178 | # Parse |
| 137 | 179 | args = parser.parse_args() |
| 138 | 180 | |
| ... | ... | @@ -142,7 +184,9 @@ |
| 142 | 184 | "char2utt": char2utt, |
| 143 | 185 | "wavscp": wavscp, |
| 144 | 186 | "changelabels": changelabels, |
| 145 | - "converter": converter | |
| 187 | + "converter": converter, | |
| 188 | + "utt2sub": utt2sub, | |
| 189 | + "sub2utt": sub2utt | |
| 146 | 190 | }) |
| 147 | 191 | |
| 148 | 192 | runner.run(args.which, args.__dict__, remove="which") |