Commit 482306aa33024473a765db484c101119102b5140

Authored by Mathias Quillot
1 parent d727acc21a
Exists in master

new utt2sub and sub2utt command

Showing 1 changed file with 72 additions and 1 deletions Side-by-side Diff

... ... @@ -78,6 +78,48 @@
78 78 core.data.write_line(",".join(splited), data_dict[key], out=f)
79 79  
80 80  
  81 +def converter(file: str, outtype: str, outfile: str):
  82 + data = core.data.read_id_values(file)
  83 +
  84 + with open(outfile, "w") as of:
  85 + for key in data:
  86 + splited = key.replace("\n", "").split(",")
  87 + masseffect_id = key.replace("\n", "")
  88 + kaldi_id = ",".join([splited[0], splited[1], splited[3]])
  89 + if outtype == "masseffect2kaldi":
  90 + of.write(f"{masseffect_id} {kaldi_id}\n")
  91 + elif outtype == "kaldi2masseffect":
  92 + of.write(f"{kaldi_id} {masseffect_id}\n")
  93 +
  94 +
  95 +def utt2sub(self, file: str, subfile: str, outfile: str):
  96 + data = core.data.read_id_values(file)
  97 + keys = [key for key in data]
  98 +
  99 + data_sub = core.data.read_id_values(subfile)
  100 + keys_sub = [key for key in data_sub]
  101 +
  102 + with open(outfile) as of:
  103 + for key in keys:
  104 + subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)]
  105 + subkeys_str = " ".join(subkeys)
  106 + of.write(f"{key} {subkeys_str}")
  107 +
  108 +
  109 +def sub2utt(self, file: str, subfile: str, outfile: str):
  110 + data = core.data.read_id_values(file)
  111 + keys = [key for key in data]
  112 +
  113 + data_sub = core.data.read_id_values(subfile)
  114 + keys_sub = [key for key in data_sub]
  115 +
  116 + with open(outfile) as of:
  117 + for key in keys:
  118 + subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)]
  119 + for subkey in subkeys:
  120 + of.write(f"{subkey} {key}")
  121 +
  122 +
81 123 if __name__ == '__main__':
82 124 # Main parser
83 125 parser = argparse.ArgumentParser(description="...")
... ... @@ -108,6 +150,31 @@
108 150 parser_changelabels.add_argument("--outfile", required=True, type=str, help="Output file")
109 151 parser_changelabels.set_defaults(which="changelabels")
110 152  
  153 + # Create converter
  154 + parser_converter = subparsers.add_parser("converter", help="Create converter file")
  155 + parser_converter.add_argument("--file",
  156 + type=str,
  157 + required=True,
  158 + help="File with ids from which create converter.")
  159 + parser_converter.add_argument("--outtype", type=str, choices=["kaldi2masseffect", "masseffect2kaldi"])
  160 + parser_converter.add_argument("--outfile", type=str, required=True, help="")
  161 + parser_converter.set_defaults(which="converter")
  162 +
  163 + # Create utt2sub
  164 + parser_utt2sub = subparsers.add_parser("utt2sub", help="generate utt2sub file")
  165 + parser_utt2sub.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids")
  166 + parser_utt2sub.add_argument("--subfile", required=True, type=str, help="features, list or labels file with sub ids")
  167 + parser_utt2sub.add_argument("--outfile", required=True, type=str, help="output file")
  168 + parser_utt2sub.set_defaults(which="utt2sub")
  169 +
  170 + # Create sub2utt
  171 + parser_sub2utt = subparsers.add_parser("sub2utt", help="generate sub2utt file")
  172 + parser_sub2utt.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids")
  173 + parser_sub2utt.add_argument("--subfile", required=True, type=str, help="features, list or labels file sub ids")
  174 + parser_sub2utt.add_argument("--outfile", required=True, type=str, help="output file")
  175 + parser_sub2utt.set_defaults(which="sub2utt")
  176 +
  177 +
111 178 # Parse
112 179 args = parser.parse_args()
113 180  
... ... @@ -116,7 +183,10 @@
116 183 "utt2char" : utt2char,
117 184 "char2utt": char2utt,
118 185 "wavscp": wavscp,
119   - "changelabels": changelabels
  186 + "changelabels": changelabels,
  187 + "converter": converter,
  188 + "utt2sub": utt2sub,
  189 + "sub2utt": sub2utt
120 190 })
121 191  
122 192 runner.run(args.which, args.__dict__, remove="which")