Commit 34f649f9b05f9bd4da1d18d4324b56367c2b4f24

Authored by quillotm
1 parent 44433f16b4
Exists in master

Adding utt2sub and sub2utt commands

Showing 1 changed file with 45 additions and 1 deletions Side-by-side Diff

... ... @@ -92,6 +92,34 @@
92 92 of.write(f"{kaldi_id} {masseffect_id}\n")
93 93  
94 94  
  95 +def utt2sub(self, file: str, subfile: str, outfile: str):
  96 + data = core.data.read_id_values(file)
  97 + keys = [key for key in data]
  98 +
  99 + data_sub = core.data.read_id_values(subfile)
  100 + keys_sub = [key for key in data_sub]
  101 +
  102 + with open(outfile) as of:
  103 + for key in keys:
  104 + subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)]
  105 + subkeys_str = " ".join(subkeys)
  106 + of.write(f"{key} {subkeys_str}")
  107 +
  108 +
  109 +def sub2utt(self, file: str, subfile: str, outfile: str):
  110 + data = core.data.read_id_values(file)
  111 + keys = [key for key in data]
  112 +
  113 + data_sub = core.data.read_id_values(subfile)
  114 + keys_sub = [key for key in data_sub]
  115 +
  116 + with open(outfile) as of:
  117 + for key in keys:
  118 + subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)]
  119 + for subkey in subkeys:
  120 + of.write(f"{subkey} {key}")
  121 +
  122 +
95 123 if __name__ == '__main__':
96 124 # Main parser
97 125 parser = argparse.ArgumentParser(description="...")
98 126  
... ... @@ -132,7 +160,21 @@
132 160 parser_converter.add_argument("--outfile", type=str, required=True, help="")
133 161 parser_converter.set_defaults(which="converter")
134 162  
  163 + # Create utt2sub
  164 + parser_utt2sub = subparsers.add_parser("utt2sub", help="generate utt2sub file")
  165 + parser_utt2sub.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids")
  166 + parser_utt2sub.add_argument("--subfile", required=True, type=str, help="features, list or labels file with sub ids")
  167 + parser_utt2sub.add_argument("--outfile", required=True, type=str, help="output file")
  168 + parser_utt2sub.set_defaults(which="utt2sub")
135 169  
  170 + # Create sub2utt
  171 + parser_sub2utt = subparsers.add_parser("sub2utt", help="generate sub2utt file")
  172 + parser_sub2utt.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids")
  173 + parser_sub2utt.add_argument("--subfile", required=True, type=str, help="features, list or labels file sub ids")
  174 + parser_sub2utt.add_argument("--outfile", required=True, type=str, help="output file")
  175 + parser_sub2utt.set_defaults(which="sub2utt")
  176 +
  177 +
136 178 # Parse
137 179 args = parser.parse_args()
138 180  
... ... @@ -142,7 +184,9 @@
142 184 "char2utt": char2utt,
143 185 "wavscp": wavscp,
144 186 "changelabels": changelabels,
145   - "converter": converter
  187 + "converter": converter,
  188 + "utt2sub": utt2sub,
  189 + "sub2utt": sub2utt
146 190 })
147 191  
148 192 runner.run(args.which, args.__dict__, remove="which")