Commit a7a92c6a2094a92cb8299af422531719077fc375
1 parent
88260de938
Exists in
master
add wav.scp generator to masseffect module
Showing 1 changed file with 48 additions and 3 deletions Inline Diff
volia/masseffect.py
1 | import argparse | 1 | import argparse |
2 | from os import path | ||
2 | import core.data | 3 | import core.data |
3 | from utils import SubCommandRunner | 4 | from utils import SubCommandRunner |
5 | import os | ||
4 | 6 | ||
5 | |||
6 | def utt2char(features: str, outfile: str): | 7 | def utt2char(features: str, outfile: str): |
7 | """Allow the user to generate utt2char file from masseffect features file. | 8 | """Allow the user to generate utt2char file from masseffect features file. |
8 | 9 | ||
9 | TODO: Don't forget to manage two cases: one with old ids, and an other with | 10 | TODO: Don't forget to manage two cases: one with old ids, and an other with |
10 | new ones. | 11 | new ones. |
11 | 12 | ||
12 | Args: | 13 | Args: |
13 | features (str): [description] | 14 | features (str): [description] |
14 | outfile (str): [description] | 15 | outfile (str): [description] |
15 | """ | 16 | """ |
16 | data = core.data.read_features(features) | 17 | data = core.data.read_features(features) |
17 | keys = list(data.keys()) | 18 | keys = list(data.keys()) |
18 | 19 | ||
19 | with open(outfile, "w") as f: | 20 | with open(outfile, "w") as f: |
20 | for key in keys: | 21 | for key in keys: |
21 | splited = key.replace("\n", "").split(",") | 22 | splited = key.replace("\n", "").split(",") |
22 | character = splited[1] | 23 | character = splited[1] |
23 | f.write(",".join(splited) + " " + character + "\n") | 24 | f.write(",".join(splited) + " " + character + "\n") |
24 | 25 | ||
25 | 26 | ||
26 | def char2utt(features: str, outfile: str): | 27 | def char2utt(features: str, outfile: str): |
27 | raise Exception("Not implemented yet") | 28 | raise Exception("Not implemented yet") |
28 | pass | 29 | pass |
29 | 30 | ||
30 | 31 | ||
32 | def wavscp(datadir: str, outfile: str): | ||
33 | """Generate the masseffect wav scp file from the directories. | ||
34 | |||
35 | Args: | ||
36 | datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available | ||
37 | outfile (str): path of the wav scp output file | ||
38 | |||
39 | Raises: | ||
40 | Exception: if one of the directory is not available | ||
41 | """ | ||
42 | en_us_dir = os.path.join(datadir, "audio_en-us") | ||
43 | fr_fr_dir = os.path.join(datadir, "audio_fr-fr") | ||
44 | |||
45 | if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)): | ||
46 | raise Exception("Directory audio_en-us or audio_fr-fr does not exist") | ||
47 | |||
48 | _,_,filenames_en=next(os.walk(en_us_dir)) | ||
49 | # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ] | ||
50 | dir_en = [ en_us_dir for f in filenames_en ] | ||
51 | _,_,filenames_fr=next(os.walk(fr_fr_dir)) | ||
52 | dir_fr = [ fr_fr_dir for f in filenames_fr ] | ||
53 | # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ] | ||
54 | |||
55 | directories = dir_en + dir_fr | ||
56 | filenames = filenames_en + filenames_fr | ||
57 | |||
58 | |||
59 | with open(outfile, "w") as f: | ||
60 | for i, fn in enumerate(filenames): | ||
61 | splited = fn.split(".")[0].split(",") | ||
62 | lang = splited[0] | ||
63 | character = splited[1] | ||
64 | record_id = splited[3] | ||
65 | path = os.path.join(directories[i], fn) | ||
66 | f.write(f"{lang},{character},{record_id} {path}\n") | ||
67 | |||
68 | |||
69 | |||
31 | if __name__ == '__main__': | 70 | if __name__ == '__main__': |
32 | # Main parser | 71 | # Main parser |
33 | parser = argparse.ArgumentParser(description="...") | 72 | parser = argparse.ArgumentParser(description="...") |
34 | subparsers = parser.add_subparsers(title="action") | 73 | subparsers = parser.add_subparsers(title="action") |
35 | 74 | ||
36 | # utt2char | 75 | # utt2char |
37 | parser_utt2char = subparsers.add_parser("utt2char") | 76 | parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file") |
38 | parser_utt2char.add_argument("--features", type=str, help="features file") | 77 | parser_utt2char.add_argument("--features", type=str, help="features file") |
39 | parser_utt2char.add_argument("--outfile", type=str, help="output file") | 78 | parser_utt2char.add_argument("--outfile", type=str, help="output file") |
40 | parser_utt2char.set_defaults(which="utt2char") | 79 | parser_utt2char.set_defaults(which="utt2char") |
41 | 80 | ||
42 | # char2utt | 81 | # char2utt |
43 | parser_char2utt = subparsers.add_parser("char2utt") | 82 | parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file") |
44 | parser_char2utt.add_argument("--features", type=str, help="features file") | 83 | parser_char2utt.add_argument("--features", type=str, help="features file") |
45 | parser_char2utt.add_argument("--outfile", type=str, help="output file") | 84 | parser_char2utt.add_argument("--outfile", type=str, help="output file") |
46 | parser_char2utt.set_defaults(which="char2utt") | 85 | parser_char2utt.set_defaults(which="char2utt") |
47 | 86 | ||
87 | # wavscp | ||
88 | parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file") | ||
89 | parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect") | ||
90 | parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file") | ||
91 | parser_wavscp.set_defaults(which="wavscp") | ||
48 | 92 | ||
49 | # Parse | 93 | # Parse |
50 | args = parser.parse_args() | 94 | args = parser.parse_args() |
51 | 95 | ||
52 | # Run commands | 96 | # Run commands |
53 | runner = SubCommandRunner({ | 97 | runner = SubCommandRunner({ |
54 | "utt2char" : utt2char, | 98 | "utt2char" : utt2char, |
55 | "char2utt": char2utt, | 99 | "char2utt": char2utt, |
100 | "wavscp": wavscp | ||
56 | }) | 101 | }) |
57 | 102 |