Commit 233b7d451bb4f8b0632c25899452061b70018c96
1 parent
b6c178927e
Exists in
master
adding changelabels function
Showing 1 changed file with 22 additions and 3 deletions Inline Diff
volia/masseffect.py
1 | import argparse | 1 | import argparse |
2 | from os import path | 2 | from os import path |
3 | import core.data | 3 | import core.data |
4 | from utils import SubCommandRunner | 4 | from utils import SubCommandRunner |
5 | import os | 5 | import os |
6 | 6 | ||
7 | |||
7 | def utt2char(features: str, outfile: str): | 8 | def utt2char(features: str, outfile: str): |
8 | """Allow the user to generate utt2char file from masseffect features file. | 9 | """Allow the user to generate utt2char file from masseffect features file. |
9 | 10 | ||
10 | TODO: Don't forget to manage two cases: one with old ids, and an other with | 11 | TODO: Don't forget to manage two cases: one with old ids, and an other with |
11 | new ones. | 12 | new ones. |
12 | 13 | ||
13 | Args: | 14 | Args: |
14 | features (str): [description] | 15 | features (str): [description] |
15 | outfile (str): [description] | 16 | outfile (str): [description] |
16 | """ | 17 | """ |
17 | data = core.data.read_features(features) | 18 | data = core.data.read_features(features) |
18 | keys = list(data.keys()) | 19 | keys = list(data.keys()) |
19 | 20 | ||
20 | with open(outfile, "w") as f: | 21 | with open(outfile, "w") as f: |
21 | for key in keys: | 22 | for key in keys: |
22 | splited = key.replace("\n", "").split(",") | 23 | splited = key.replace("\n", "").split(",") |
23 | character = splited[1] | 24 | character = splited[1] |
24 | f.write(",".join(splited) + " " + character + "\n") | 25 | f.write(",".join(splited) + " " + character + "\n") |
25 | 26 | ||
26 | 27 | ||
27 | def char2utt(features: str, outfile: str): | 28 | def char2utt(features: str, outfile: str): |
28 | raise Exception("Not implemented yet") | 29 | raise Exception("Not implemented yet") |
29 | pass | 30 | pass |
30 | 31 | ||
31 | 32 | ||
32 | def wavscp(datadir: str, outfile: str): | 33 | def wavscp(datadir: str, outfile: str): |
33 | """Generate the masseffect wav scp file from the directories. | 34 | """Generate the masseffect wav scp file from the directories. |
34 | 35 | ||
35 | Args: | 36 | Args: |
36 | datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available | 37 | datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available |
37 | outfile (str): path of the wav scp output file | 38 | outfile (str): path of the wav scp output file |
38 | 39 | ||
39 | Raises: | 40 | Raises: |
40 | Exception: if one of the directory is not available | 41 | Exception: if one of the directory is not available |
41 | """ | 42 | """ |
42 | en_us_dir = os.path.join(datadir, "audio_en-us") | 43 | en_us_dir = os.path.join(datadir, "audio_en-us") |
43 | fr_fr_dir = os.path.join(datadir, "audio_fr-fr") | 44 | fr_fr_dir = os.path.join(datadir, "audio_fr-fr") |
44 | 45 | ||
45 | if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)): | 46 | if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)): |
46 | raise Exception("Directory audio_en-us or audio_fr-fr does not exist") | 47 | raise Exception("Directory audio_en-us or audio_fr-fr does not exist") |
47 | 48 | ||
48 | _,_,filenames_en=next(os.walk(en_us_dir)) | 49 | _,_,filenames_en=next(os.walk(en_us_dir)) |
49 | # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ] | 50 | # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ] |
50 | dir_en = [ en_us_dir for f in filenames_en ] | 51 | dir_en = [ en_us_dir for f in filenames_en ] |
51 | _,_,filenames_fr=next(os.walk(fr_fr_dir)) | 52 | _,_,filenames_fr=next(os.walk(fr_fr_dir)) |
52 | dir_fr = [ fr_fr_dir for f in filenames_fr ] | 53 | dir_fr = [ fr_fr_dir for f in filenames_fr ] |
53 | # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ] | 54 | # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ] |
54 | 55 | ||
55 | directories = dir_en + dir_fr | 56 | directories = dir_en + dir_fr |
56 | filenames = filenames_en + filenames_fr | 57 | filenames = filenames_en + filenames_fr |
57 | |||
58 | 58 | ||
59 | with open(outfile, "w") as f: | 59 | with open(outfile, "w") as f: |
60 | for i, fn in enumerate(filenames): | 60 | for i, fn in enumerate(filenames): |
61 | splited = fn.split(".")[0].split(",") | 61 | splited = fn.split(".")[0].split(",") |
62 | lang = splited[0] | 62 | lang = splited[0] |
63 | character = splited[1] | 63 | character = splited[1] |
64 | record_id = splited[3] | 64 | record_id = splited[3] |
65 | path = os.path.join(directories[i], fn) | 65 | path = os.path.join(directories[i], fn) |
66 | f.write(f"{lang},{character},{record_id} {path}\n") | 66 | f.write(f"{lang},{character},{record_id} {path}\n") |
67 | 67 | ||
68 | 68 | ||
69 | 69 | def changelabels(source: str, labels: str, outfile: str): | |
70 | data_dict = core.data.read_id_values(source) | ||
71 | labels_dict = core.data.read_labels(labels) | ||
72 | keys = list(data_dict.keys()) | ||
73 | |||
74 | with open(outfile, "w") as f: | ||
75 | for key in keys: | ||
76 | splited = key.split(",") | ||
77 | splited[1] = labels_dict[key] | ||
78 | core.data.write_line(",".join(splited), data_dict[key]) | ||
79 | |||
80 | |||
70 | if __name__ == '__main__': | 81 | if __name__ == '__main__': |
71 | # Main parser | 82 | # Main parser |
72 | parser = argparse.ArgumentParser(description="...") | 83 | parser = argparse.ArgumentParser(description="...") |
73 | subparsers = parser.add_subparsers(title="action") | 84 | subparsers = parser.add_subparsers(title="action") |
74 | 85 | ||
75 | # utt2char | 86 | # utt2char |
76 | parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file") | 87 | parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file") |
77 | parser_utt2char.add_argument("--features", type=str, help="features file") | 88 | parser_utt2char.add_argument("--features", type=str, help="features file") |
78 | parser_utt2char.add_argument("--outfile", type=str, help="output file") | 89 | parser_utt2char.add_argument("--outfile", type=str, help="output file") |
79 | parser_utt2char.set_defaults(which="utt2char") | 90 | parser_utt2char.set_defaults(which="utt2char") |
80 | 91 | ||
81 | # char2utt | 92 | # char2utt |
82 | parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file") | 93 | parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file") |
83 | parser_char2utt.add_argument("--features", type=str, help="features file") | 94 | parser_char2utt.add_argument("--features", type=str, help="features file") |
84 | parser_char2utt.add_argument("--outfile", type=str, help="output file") | 95 | parser_char2utt.add_argument("--outfile", type=str, help="output file") |
85 | parser_char2utt.set_defaults(which="char2utt") | 96 | parser_char2utt.set_defaults(which="char2utt") |
86 | 97 | ||
87 | # wavscp | 98 | # wavscp |
88 | parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file") | 99 | parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file") |
89 | parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect") | 100 | parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect") |
90 | parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file") | 101 | parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file") |
91 | parser_wavscp.set_defaults(which="wavscp") | 102 | parser_wavscp.set_defaults(which="wavscp") |
92 | 103 | ||
104 | # Change labels | ||
105 | parser_changelabels = subparsers.add_parser("changelabels", help="...") | ||
106 | parser_changelabels.add_argument("--source", required=True, type=str, help="source file where we want to change ids.") | ||
107 | parser_changelabels.add_argument("--labels", required=True, type=str, help="file with labels") | ||
108 | parser_changelabels.add_argument("--outfile", required=True, type=str, help="Output file") | ||
109 | parser_changelabels.set_defaults(which="changelabels") | ||
110 | |||
93 | # Parse | 111 | # Parse |
94 | args = parser.parse_args() | 112 | args = parser.parse_args() |
95 | 113 | ||
96 | # Run commands | 114 | # Run commands |
97 | runner = SubCommandRunner({ | 115 | runner = SubCommandRunner({ |
98 | "utt2char" : utt2char, | 116 | "utt2char" : utt2char, |
99 | "char2utt": char2utt, | 117 | "char2utt": char2utt, |
100 | "wavscp": wavscp | 118 | "wavscp": wavscp, |
119 | "changelabels": changelabels | ||
101 | }) | 120 | }) |
102 | 121 |