Commit 482306aa33024473a765db484c101119102b5140
1 parent
d727acc21a
Exists in
master
new utt2sub and sub2utt command
Showing 1 changed file with 72 additions and 1 deletions Side-by-side Diff
volia/masseffect.py
... | ... | @@ -78,6 +78,48 @@ |
78 | 78 | core.data.write_line(",".join(splited), data_dict[key], out=f) |
79 | 79 | |
80 | 80 | |
81 | +def converter(file: str, outtype: str, outfile: str): | |
82 | + data = core.data.read_id_values(file) | |
83 | + | |
84 | + with open(outfile, "w") as of: | |
85 | + for key in data: | |
86 | + splited = key.replace("\n", "").split(",") | |
87 | + masseffect_id = key.replace("\n", "") | |
88 | + kaldi_id = ",".join([splited[0], splited[1], splited[3]]) | |
89 | + if outtype == "masseffect2kaldi": | |
90 | + of.write(f"{masseffect_id} {kaldi_id}\n") | |
91 | + elif outtype == "kaldi2masseffect": | |
92 | + of.write(f"{kaldi_id} {masseffect_id}\n") | |
93 | + | |
94 | + | |
95 | +def utt2sub(self, file: str, subfile: str, outfile: str): | |
96 | + data = core.data.read_id_values(file) | |
97 | + keys = [key for key in data] | |
98 | + | |
99 | + data_sub = core.data.read_id_values(subfile) | |
100 | + keys_sub = [key for key in data_sub] | |
101 | + | |
102 | + with open(outfile) as of: | |
103 | + for key in keys: | |
104 | + subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)] | |
105 | + subkeys_str = " ".join(subkeys) | |
106 | + of.write(f"{key} {subkeys_str}") | |
107 | + | |
108 | + | |
109 | +def sub2utt(self, file: str, subfile: str, outfile: str): | |
110 | + data = core.data.read_id_values(file) | |
111 | + keys = [key for key in data] | |
112 | + | |
113 | + data_sub = core.data.read_id_values(subfile) | |
114 | + keys_sub = [key for key in data_sub] | |
115 | + | |
116 | + with open(outfile) as of: | |
117 | + for key in keys: | |
118 | + subkeys = [subkey for subkey in keys_sub if subkey.startswith(key)] | |
119 | + for subkey in subkeys: | |
120 | + of.write(f"{subkey} {key}") | |
121 | + | |
122 | + | |
81 | 123 | if __name__ == '__main__': |
82 | 124 | # Main parser |
83 | 125 | parser = argparse.ArgumentParser(description="...") |
... | ... | @@ -108,6 +150,31 @@ |
108 | 150 | parser_changelabels.add_argument("--outfile", required=True, type=str, help="Output file") |
109 | 151 | parser_changelabels.set_defaults(which="changelabels") |
110 | 152 | |
153 | + # Create converter | |
154 | + parser_converter = subparsers.add_parser("converter", help="Create converter file") | |
155 | + parser_converter.add_argument("--file", | |
156 | + type=str, | |
157 | + required=True, | |
158 | + help="File with ids from which create converter.") | |
159 | + parser_converter.add_argument("--outtype", type=str, choices=["kaldi2masseffect", "masseffect2kaldi"]) | |
160 | + parser_converter.add_argument("--outfile", type=str, required=True, help="") | |
161 | + parser_converter.set_defaults(which="converter") | |
162 | + | |
163 | + # Create utt2sub | |
164 | + parser_utt2sub = subparsers.add_parser("utt2sub", help="generate utt2sub file") | |
165 | + parser_utt2sub.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids") | |
166 | + parser_utt2sub.add_argument("--subfile", required=True, type=str, help="features, list or labels file with sub ids") | |
167 | + parser_utt2sub.add_argument("--outfile", required=True, type=str, help="output file") | |
168 | + parser_utt2sub.set_defaults(which="utt2sub") | |
169 | + | |
170 | + # Create sub2utt | |
171 | + parser_sub2utt = subparsers.add_parser("sub2utt", help="generate sub2utt file") | |
172 | + parser_sub2utt.add_argument("--file", required=True, type=str, help="features, list or labels file with normal ids") | |
173 | + parser_sub2utt.add_argument("--subfile", required=True, type=str, help="features, list or labels file sub ids") | |
174 | + parser_sub2utt.add_argument("--outfile", required=True, type=str, help="output file") | |
175 | + parser_sub2utt.set_defaults(which="sub2utt") | |
176 | + | |
177 | + | |
111 | 178 | # Parse |
112 | 179 | args = parser.parse_args() |
113 | 180 | |
... | ... | @@ -116,7 +183,10 @@ |
116 | 183 | "utt2char" : utt2char, |
117 | 184 | "char2utt": char2utt, |
118 | 185 | "wavscp": wavscp, |
119 | - "changelabels": changelabels | |
186 | + "changelabels": changelabels, | |
187 | + "converter": converter, | |
188 | + "utt2sub": utt2sub, | |
189 | + "sub2utt": sub2utt | |
120 | 190 | }) |
121 | 191 | |
122 | 192 | runner.run(args.which, args.__dict__, remove="which") |