Commit a7a92c6a2094a92cb8299af422531719077fc375
1 parent
88260de938
Exists in
master
add wav.scp generator to masseffect module
Showing 1 changed file with 48 additions and 3 deletions Side-by-side Diff
volia/masseffect.py
1 | 1 | import argparse |
2 | +from os import path | |
2 | 3 | import core.data |
3 | 4 | from utils import SubCommandRunner |
5 | +import os | |
4 | 6 | |
5 | - | |
6 | 7 | def utt2char(features: str, outfile: str): |
7 | 8 | """Allow the user to generate utt2char file from masseffect features file. |
8 | 9 | |
9 | 10 | |
10 | 11 | |
11 | 12 | |
... | ... | @@ -28,23 +29,66 @@ |
28 | 29 | pass |
29 | 30 | |
30 | 31 | |
32 | +def wavscp(datadir: str, outfile: str): | |
33 | + """Generate the masseffect wav scp file from the directories. | |
34 | + | |
35 | + Args: | |
36 | + datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available | |
37 | + outfile (str): path of the wav scp output file | |
38 | + | |
39 | + Raises: | |
40 | + Exception: if one of the directory is not available | |
41 | + """ | |
42 | + en_us_dir = os.path.join(datadir, "audio_en-us") | |
43 | + fr_fr_dir = os.path.join(datadir, "audio_fr-fr") | |
44 | + | |
45 | + if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)): | |
46 | + raise Exception("Directory audio_en-us or audio_fr-fr does not exist") | |
47 | + | |
48 | + _,_,filenames_en=next(os.walk(en_us_dir)) | |
49 | + # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ] | |
50 | + dir_en = [ en_us_dir for f in filenames_en ] | |
51 | + _,_,filenames_fr=next(os.walk(fr_fr_dir)) | |
52 | + dir_fr = [ fr_fr_dir for f in filenames_fr ] | |
53 | + # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ] | |
54 | + | |
55 | + directories = dir_en + dir_fr | |
56 | + filenames = filenames_en + filenames_fr | |
57 | + | |
58 | + | |
59 | + with open(outfile, "w") as f: | |
60 | + for i, fn in enumerate(filenames): | |
61 | + splited = fn.split(".")[0].split(",") | |
62 | + lang = splited[0] | |
63 | + character = splited[1] | |
64 | + record_id = splited[3] | |
65 | + path = os.path.join(directories[i], fn) | |
66 | + f.write(f"{lang},{character},{record_id} {path}\n") | |
67 | + | |
68 | + | |
69 | + | |
31 | 70 | if __name__ == '__main__': |
32 | 71 | # Main parser |
33 | 72 | parser = argparse.ArgumentParser(description="...") |
34 | 73 | subparsers = parser.add_subparsers(title="action") |
35 | 74 | |
36 | 75 | # utt2char |
37 | - parser_utt2char = subparsers.add_parser("utt2char") | |
76 | + parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file") | |
38 | 77 | parser_utt2char.add_argument("--features", type=str, help="features file") |
39 | 78 | parser_utt2char.add_argument("--outfile", type=str, help="output file") |
40 | 79 | parser_utt2char.set_defaults(which="utt2char") |
41 | 80 | |
42 | 81 | # char2utt |
43 | - parser_char2utt = subparsers.add_parser("char2utt") | |
82 | + parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file") | |
44 | 83 | parser_char2utt.add_argument("--features", type=str, help="features file") |
45 | 84 | parser_char2utt.add_argument("--outfile", type=str, help="output file") |
46 | 85 | parser_char2utt.set_defaults(which="char2utt") |
47 | 86 | |
87 | + # wavscp | |
88 | + parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file") | |
89 | + parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect") | |
90 | + parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file") | |
91 | + parser_wavscp.set_defaults(which="wavscp") | |
48 | 92 | |
49 | 93 | # Parse |
50 | 94 | args = parser.parse_args() |
... | ... | @@ -53,6 +97,7 @@ |
53 | 97 | runner = SubCommandRunner({ |
54 | 98 | "utt2char" : utt2char, |
55 | 99 | "char2utt": char2utt, |
100 | + "wavscp": wavscp | |
56 | 101 | }) |
57 | 102 | |
58 | 103 | runner.run(args.which, args.__dict__, remove="which") |