Commit a7a92c6a2094a92cb8299af422531719077fc375

Authored by Quillot Mathias
1 parent 88260de938
Exists in master

add wav.scp generator to masseffect module

Showing 1 changed file with 48 additions and 3 deletions Inline Diff

1 import argparse 1 import argparse
2 from os import path
2 import core.data 3 import core.data
3 from utils import SubCommandRunner 4 from utils import SubCommandRunner
5 import os
4 6
5
6 def utt2char(features: str, outfile: str): 7 def utt2char(features: str, outfile: str):
7 """Allow the user to generate utt2char file from masseffect features file. 8 """Allow the user to generate utt2char file from masseffect features file.
8 9
9 TODO: Don't forget to manage two cases: one with old ids, and an other with 10 TODO: Don't forget to manage two cases: one with old ids, and an other with
10 new ones. 11 new ones.
11 12
12 Args: 13 Args:
13 features (str): [description] 14 features (str): [description]
14 outfile (str): [description] 15 outfile (str): [description]
15 """ 16 """
16 data = core.data.read_features(features) 17 data = core.data.read_features(features)
17 keys = list(data.keys()) 18 keys = list(data.keys())
18 19
19 with open(outfile, "w") as f: 20 with open(outfile, "w") as f:
20 for key in keys: 21 for key in keys:
21 splited = key.replace("\n", "").split(",") 22 splited = key.replace("\n", "").split(",")
22 character = splited[1] 23 character = splited[1]
23 f.write(",".join(splited) + " " + character + "\n") 24 f.write(",".join(splited) + " " + character + "\n")
24 25
25 26
26 def char2utt(features: str, outfile: str): 27 def char2utt(features: str, outfile: str):
27 raise Exception("Not implemented yet") 28 raise Exception("Not implemented yet")
28 pass 29 pass
29 30
30 31
32 def wavscp(datadir: str, outfile: str):
33 """Generate the masseffect wav scp file from the directories.
34
35 Args:
36 datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available
37 outfile (str): path of the wav scp output file
38
39 Raises:
40 Exception: if one of the directory is not available
41 """
42 en_us_dir = os.path.join(datadir, "audio_en-us")
43 fr_fr_dir = os.path.join(datadir, "audio_fr-fr")
44
45 if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)):
46 raise Exception("Directory audio_en-us or audio_fr-fr does not exist")
47
48 _,_,filenames_en=next(os.walk(en_us_dir))
49 # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ]
50 dir_en = [ en_us_dir for f in filenames_en ]
51 _,_,filenames_fr=next(os.walk(fr_fr_dir))
52 dir_fr = [ fr_fr_dir for f in filenames_fr ]
53 # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ]
54
55 directories = dir_en + dir_fr
56 filenames = filenames_en + filenames_fr
57
58
59 with open(outfile, "w") as f:
60 for i, fn in enumerate(filenames):
61 splited = fn.split(".")[0].split(",")
62 lang = splited[0]
63 character = splited[1]
64 record_id = splited[3]
65 path = os.path.join(directories[i], fn)
66 f.write(f"{lang},{character},{record_id} {path}\n")
67
68
69
31 if __name__ == '__main__': 70 if __name__ == '__main__':
32 # Main parser 71 # Main parser
33 parser = argparse.ArgumentParser(description="...") 72 parser = argparse.ArgumentParser(description="...")
34 subparsers = parser.add_subparsers(title="action") 73 subparsers = parser.add_subparsers(title="action")
35 74
36 # utt2char 75 # utt2char
37 parser_utt2char = subparsers.add_parser("utt2char") 76 parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file")
38 parser_utt2char.add_argument("--features", type=str, help="features file") 77 parser_utt2char.add_argument("--features", type=str, help="features file")
39 parser_utt2char.add_argument("--outfile", type=str, help="output file") 78 parser_utt2char.add_argument("--outfile", type=str, help="output file")
40 parser_utt2char.set_defaults(which="utt2char") 79 parser_utt2char.set_defaults(which="utt2char")
41 80
42 # char2utt 81 # char2utt
43 parser_char2utt = subparsers.add_parser("char2utt") 82 parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file")
44 parser_char2utt.add_argument("--features", type=str, help="features file") 83 parser_char2utt.add_argument("--features", type=str, help="features file")
45 parser_char2utt.add_argument("--outfile", type=str, help="output file") 84 parser_char2utt.add_argument("--outfile", type=str, help="output file")
46 parser_char2utt.set_defaults(which="char2utt") 85 parser_char2utt.set_defaults(which="char2utt")
47 86
87 # wavscp
88 parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file")
89 parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect")
90 parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file")
91 parser_wavscp.set_defaults(which="wavscp")
48 92
49 # Parse 93 # Parse
50 args = parser.parse_args() 94 args = parser.parse_args()
51 95
52 # Run commands 96 # Run commands
53 runner = SubCommandRunner({ 97 runner = SubCommandRunner({
54 "utt2char" : utt2char, 98 "utt2char" : utt2char,
55 "char2utt": char2utt, 99 "char2utt": char2utt,
100 "wavscp": wavscp
56 }) 101 })
57 102