Commit 362c9d37a9e82b861d794ffc9dd11cd43d066cce

Authored by quillotm
1 parent 1d8b79f614
Exists in master

Fixed simple issue

Showing 1 changed file with 4 additions and 3 deletions Inline Diff

1 import argparse 1 import argparse
2 from os import path 2 from os import path
3 import core.data 3 import core.data
4 from utils import SubCommandRunner 4 from utils import SubCommandRunner
5 import os 5 import os
6 6
7 7
8 def utt2char(features: str, outfile: str): 8 def utt2char(features: str, outfile: str):
9 """Allow the user to generate utt2char file from masseffect features file. 9 """Allow the user to generate utt2char file from masseffect features file.
10 10
11 TODO: Don't forget to manage two cases: one with old ids, and an other with 11 TODO: Don't forget to manage two cases: one with old ids, and an other with
12 new ones. 12 new ones.
13 13
14 Args: 14 Args:
15 features (str): [description] 15 features (str): [description]
16 outfile (str): [description] 16 outfile (str): [description]
17 """ 17 """
18 data = core.data.read_features(features) 18 data = core.data.read_features(features)
19 keys = list(data.keys()) 19 keys = list(data.keys())
20 20
21 with open(outfile, "w") as f: 21 with open(outfile, "w") as f:
22 for key in keys: 22 for key in keys:
23 splited = key.replace("\n", "").split(",") 23 splited = key.replace("\n", "").split(",")
24 character = splited[1] 24 character = splited[1]
25 f.write(",".join(splited) + " " + character + "\n") 25 f.write(",".join(splited) + " " + character + "\n")
26 26
27 27
28 def char2utt(features: str, outfile: str): 28 def char2utt(features: str, outfile: str):
29 raise Exception("Not implemented yet") 29 raise Exception("Not implemented yet")
30 pass 30 pass
31 31
32 32
33 def wavscp(datadir: str, outfile: str): 33 def wavscp(datadir: str, outfile: str):
34 """Generate the masseffect wav scp file from the directories. 34 """Generate the masseffect wav scp file from the directories.
35 35
36 Args: 36 Args:
37 datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available 37 datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available
38 outfile (str): path of the wav scp output file 38 outfile (str): path of the wav scp output file
39 39
40 Raises: 40 Raises:
41 Exception: if one of the directory is not available 41 Exception: if one of the directory is not available
42 """ 42 """
43 en_us_dir = os.path.join(datadir, "audio_en-us") 43 en_us_dir = os.path.join(datadir, "audio_en-us")
44 fr_fr_dir = os.path.join(datadir, "audio_fr-fr") 44 fr_fr_dir = os.path.join(datadir, "audio_fr-fr")
45 45
46 if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)): 46 if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)):
47 raise Exception("Directory audio_en-us or audio_fr-fr does not exist") 47 raise Exception("Directory audio_en-us or audio_fr-fr does not exist")
48 48
49 _,_,filenames_en=next(os.walk(en_us_dir)) 49 _,_,filenames_en=next(os.walk(en_us_dir))
50 # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ] 50 # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ]
51 dir_en = [ en_us_dir for f in filenames_en ] 51 dir_en = [ en_us_dir for f in filenames_en ]
52 _,_,filenames_fr=next(os.walk(fr_fr_dir)) 52 _,_,filenames_fr=next(os.walk(fr_fr_dir))
53 dir_fr = [ fr_fr_dir for f in filenames_fr ] 53 dir_fr = [ fr_fr_dir for f in filenames_fr ]
54 # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ] 54 # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ]
55 55
56 directories = dir_en + dir_fr 56 directories = dir_en + dir_fr
57 filenames = filenames_en + filenames_fr 57 filenames = filenames_en + filenames_fr
58 58
59 with open(outfile, "w") as f: 59 with open(outfile, "w") as f:
60 for i, fn in enumerate(filenames): 60 for i, fn in enumerate(filenames):
61 splited = fn.split(".")[0].split(",") 61 splited = fn.split(".")[0].split(",")
62 lang = splited[0] 62 lang = splited[0]
63 character = splited[1] 63 character = splited[1]
64 record_id = splited[3] 64 record_id = splited[3]
65 path = os.path.join(directories[i], fn) 65 path = os.path.join(directories[i], fn)
66 f.write(f"{lang},{character},{record_id} {path}\n") 66 f.write(f"{lang},{character},{record_id} {path}\n")
67 67
68 68
69 def changelabels(source: str, labels: str, outfile: str): 69 def changelabels(source: str, labels: str, outfile: str):
70 data_dict = core.data.read_id_values(source) 70 data_dict = core.data.read_id_values(source)
71 labels_dict = core.data.read_labels(labels) 71 labels_dict = core.data.read_labels(labels)
72 keys = list(data_dict.keys()) 72 keys = list(data_dict.keys())
73 73
74 with open(outfile, "w") as f: 74 with open(outfile, "w") as f:
75 for key in keys: 75 for key in keys:
76 splited = key.split(",") 76 splited = key.split(",")
77 splited[1] = labels_dict[key][0] 77 splited[1] = labels_dict[key][0]
78 core.data.write_line(",".join(splited), data_dict[key], out=f) 78 core.data.write_line(",".join(splited), data_dict[key], out=f)
79 79
80 80
81 def converter(file: str, outtype: str, outfile: str): 81 def converter(file: str, outtype: str, outfile: str):
82 data = core.data.read_id_values(file) 82 data = core.data.read_id_values(file)
83 83
84 with open(outfile, "w") as of: 84 with open(outfile, "w") as of:
85 for key in data: 85 for key in data:
86 splited = key.replace("\n", "").split(",") 86 splited = key.replace("\n", "").split(",")
87 masseffect_id = key.replace("\n", "") 87 masseffect_id = key.replace("\n", "")
88 kaldi_id = ",".join([splited[0], splited[1], splited[3]]) 88 kaldi_id = ",".join([splited[0], splited[1], splited[3]])
89 if outtype == "masseffect2kaldi": 89 if outtype == "masseffect2kaldi":
90 of.write(f"{masseffect_id} {kaldi_id}\n") 90 of.write(f"{masseffect_id} {kaldi_id}\n")
91 elif outtype == "kaldi2masseffect": 91 elif outtype == "kaldi2masseffect":
92 of.write(f"{kaldi_id} {masseffect_id}\n") 92 of.write(f"{kaldi_id} {masseffect_id}\n")
93 93
94 94
95 def utt2sub(file: str, outfile: str): 95 def utt2sub(file: str, outfile: str):
96 data = core.data.read_id_values(file) 96 data = core.data.read_id_values(file)
97 keys = [key for key in data] 97 keys = [key for key in data]
98 98
99 with open(outfile, "w") as of: 99 with open(outfile, "w") as of:
100 key_2_subkeys = {} 100 key_2_subkeys = {}
101 for subkey in keys: 101 for subkey in keys:
102 key = subkey.replace(" ", "").replace("\n", "").split("_")[:-1] 102 key = "_".join(subkey.replace(" ", "").replace("\n", "").split("_")[:-1])
103 key_2_subkeys[key] = [] 103 if key not in key_2_subkeys:
104 key_2_subkeys[key] = []
104 key_2_subkeys[key].append(subkey) 105 key_2_subkeys[key].append(subkey)
105 106
106 for key in key_2_subkeys: 107 for key in key_2_subkeys:
107 subkeys_str = " ".join(key_2_subkeys[key]) 108 subkeys_str = " ".join(key_2_subkeys[key])
108 of.write(f"{key} {subkeys_str}\n") 109 of.write(f"{key} {subkeys_str}\n")
109 110
110 111
111 def sub2utt(file: str, outfile: str): 112 def sub2utt(file: str, outfile: str):
112 data = core.data.read_id_values(file) 113 data = core.data.read_id_values(file)
113 keys = [key for key in data] 114 keys = [key for key in data]
114 115
115 with open(outfile, "w") as of: 116 with open(outfile, "w") as of:
116 for subkey in keys: 117 for subkey in keys:
117 key = subkey.replace(" ", "").replace("\n", "").split("_")[:-1] 118 key = "_".join(subkey.replace(" ", "").replace("\n", "").split("_")[:-1])
118 of.write(f"{subkey} {key}\n") 119 of.write(f"{subkey} {key}\n")
119 120
120 121
121 if __name__ == '__main__': 122 if __name__ == '__main__':
122 # Main parser 123 # Main parser
123 parser = argparse.ArgumentParser(description="...") 124 parser = argparse.ArgumentParser(description="...")
124 subparsers = parser.add_subparsers(title="action") 125 subparsers = parser.add_subparsers(title="action")
125 126
126 # utt2char 127 # utt2char
127 parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file") 128 parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file")
128 parser_utt2char.add_argument("--features", type=str, help="features file") 129 parser_utt2char.add_argument("--features", type=str, help="features file")
129 parser_utt2char.add_argument("--outfile", type=str, help="output file") 130 parser_utt2char.add_argument("--outfile", type=str, help="output file")
130 parser_utt2char.set_defaults(which="utt2char") 131 parser_utt2char.set_defaults(which="utt2char")
131 132
132 # char2utt 133 # char2utt
133 parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file") 134 parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file")
134 parser_char2utt.add_argument("--features", type=str, help="features file") 135 parser_char2utt.add_argument("--features", type=str, help="features file")
135 parser_char2utt.add_argument("--outfile", type=str, help="output file") 136 parser_char2utt.add_argument("--outfile", type=str, help="output file")
136 parser_char2utt.set_defaults(which="char2utt") 137 parser_char2utt.set_defaults(which="char2utt")
137 138
138 # wavscp 139 # wavscp
139 parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file") 140 parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file")
140 parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect") 141 parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect")
141 parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file") 142 parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file")
142 parser_wavscp.set_defaults(which="wavscp") 143 parser_wavscp.set_defaults(which="wavscp")
143 144
144 # Change labels 145 # Change labels
145 parser_changelabels = subparsers.add_parser("changelabels", help="...") 146 parser_changelabels = subparsers.add_parser("changelabels", help="...")
146 parser_changelabels.add_argument("--source", required=True, type=str, help="source file where we want to change ids.") 147 parser_changelabels.add_argument("--source", required=True, type=str, help="source file where we want to change ids.")
147 parser_changelabels.add_argument("--labels", required=True, type=str, help="file with labels") 148 parser_changelabels.add_argument("--labels", required=True, type=str, help="file with labels")
148 parser_changelabels.add_argument("--outfile", required=True, type=str, help="Output file") 149 parser_changelabels.add_argument("--outfile", required=True, type=str, help="Output file")
149 parser_changelabels.set_defaults(which="changelabels") 150 parser_changelabels.set_defaults(which="changelabels")
150 151
151 # Create converter 152 # Create converter
152 parser_converter = subparsers.add_parser("converter", help="Create converter file") 153 parser_converter = subparsers.add_parser("converter", help="Create converter file")
153 parser_converter.add_argument("--file", 154 parser_converter.add_argument("--file",
154 type=str, 155 type=str,
155 required=True, 156 required=True,
156 help="File with ids from which create converter.") 157 help="File with ids from which create converter.")
157 parser_converter.add_argument("--outtype", type=str, choices=["kaldi2masseffect", "masseffect2kaldi"]) 158 parser_converter.add_argument("--outtype", type=str, choices=["kaldi2masseffect", "masseffect2kaldi"])
158 parser_converter.add_argument("--outfile", type=str, required=True, help="") 159 parser_converter.add_argument("--outfile", type=str, required=True, help="")
159 parser_converter.set_defaults(which="converter") 160 parser_converter.set_defaults(which="converter")
160 161
161 # Create utt2sub 162 # Create utt2sub
162 parser_utt2sub = subparsers.add_parser("utt2sub", help="generate utt2sub file") 163 parser_utt2sub = subparsers.add_parser("utt2sub", help="generate utt2sub file")
163 parser_utt2sub.add_argument("--file", required=True, type=str, help="features, list or labels file with sub ids") 164 parser_utt2sub.add_argument("--file", required=True, type=str, help="features, list or labels file with sub ids")
164 parser_utt2sub.add_argument("--outfile", required=True, type=str, help="output file") 165 parser_utt2sub.add_argument("--outfile", required=True, type=str, help="output file")
165 parser_utt2sub.set_defaults(which="utt2sub") 166 parser_utt2sub.set_defaults(which="utt2sub")
166 167
167 # Create sub2utt 168 # Create sub2utt
168 parser_sub2utt = subparsers.add_parser("sub2utt", help="generate sub2utt file") 169 parser_sub2utt = subparsers.add_parser("sub2utt", help="generate sub2utt file")
169 parser_sub2utt.add_argument("--file", required=True, type=str, help="features, list or labels file with sub ids") 170 parser_sub2utt.add_argument("--file", required=True, type=str, help="features, list or labels file with sub ids")
170 parser_sub2utt.add_argument("--outfile", required=True, type=str, help="output file") 171 parser_sub2utt.add_argument("--outfile", required=True, type=str, help="output file")
171 parser_sub2utt.set_defaults(which="sub2utt") 172 parser_sub2utt.set_defaults(which="sub2utt")
172 173
173 174
174 # Parse 175 # Parse
175 args = parser.parse_args() 176 args = parser.parse_args()
176 177
177 # Run commands 178 # Run commands
178 runner = SubCommandRunner({ 179 runner = SubCommandRunner({
179 "utt2char" : utt2char, 180 "utt2char" : utt2char,
180 "char2utt": char2utt, 181 "char2utt": char2utt,
181 "wavscp": wavscp, 182 "wavscp": wavscp,
182 "changelabels": changelabels, 183 "changelabels": changelabels,
183 "converter": converter, 184 "converter": converter,
184 "utt2sub": utt2sub, 185 "utt2sub": utt2sub,
185 "sub2utt": sub2utt 186 "sub2utt": sub2utt
186 }) 187 })
187 188
188 runner.run(args.which, args.__dict__, remove="which") 189 runner.run(args.which, args.__dict__, remove="which")
189 190
190 191