Commit 4333e05c50ad35a7e5650ace81e734d23631c154
Exists in
master
Merge branch 'master' of gitlia.univ-avignon.fr:mathias.quillot/volia
Showing 1 changed file Inline Diff
volia/masseffect.py
1 | import argparse | 1 | import argparse |
2 | from os import path | 2 | from os import path |
3 | import core.data | 3 | import core.data |
4 | from utils import SubCommandRunner | 4 | from utils import SubCommandRunner |
5 | import os | 5 | import os |
6 | 6 | ||
7 | 7 | ||
8 | def utt2char(features: str, outfile: str): | 8 | def utt2char(features: str, outfile: str): |
9 | """Allow the user to generate utt2char file from masseffect features file. | 9 | """Allow the user to generate utt2char file from masseffect features file. |
10 | 10 | ||
11 | TODO: Don't forget to manage two cases: one with old ids, and an other with | 11 | TODO: Don't forget to manage two cases: one with old ids, and an other with |
12 | new ones. | 12 | new ones. |
13 | 13 | ||
14 | Args: | 14 | Args: |
15 | features (str): [description] | 15 | features (str): [description] |
16 | outfile (str): [description] | 16 | outfile (str): [description] |
17 | """ | 17 | """ |
18 | data = core.data.read_features(features) | 18 | data = core.data.read_features(features) |
19 | keys = list(data.keys()) | 19 | keys = list(data.keys()) |
20 | 20 | ||
21 | with open(outfile, "w") as f: | 21 | with open(outfile, "w") as f: |
22 | for key in keys: | 22 | for key in keys: |
23 | splited = key.replace("\n", "").split(",") | 23 | splited = key.replace("\n", "").split(",") |
24 | character = splited[1] | 24 | character = splited[1] |
25 | f.write(",".join(splited) + " " + character + "\n") | 25 | f.write(",".join(splited) + " " + character + "\n") |
26 | |||
27 | 26 | ||
27 | |||
28 | def char2utt(features: str, outfile: str): | 28 | def char2utt(features: str, outfile: str): |
29 | raise Exception("Not implemented yet") | 29 | raise Exception("Not implemented yet") |
30 | pass | 30 | pass |
31 | 31 | ||
32 | 32 | ||
33 | def wavscp(datadir: str, outfile: str): | 33 | def wavscp(datadir: str, outfile: str): |
34 | """Generate the masseffect wav scp file from the directories. | 34 | """Generate the masseffect wav scp file from the directories. |
35 | 35 | ||
36 | Args: | 36 | Args: |
37 | datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available | 37 | datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available |
38 | outfile (str): path of the wav scp output file | 38 | outfile (str): path of the wav scp output file |
39 | 39 | ||
40 | Raises: | 40 | Raises: |
41 | Exception: if one of the directory is not available | 41 | Exception: if one of the directory is not available |
42 | """ | 42 | """ |
43 | en_us_dir = os.path.join(datadir, "audio_en-us") | 43 | en_us_dir = os.path.join(datadir, "audio_en-us") |
44 | fr_fr_dir = os.path.join(datadir, "audio_fr-fr") | 44 | fr_fr_dir = os.path.join(datadir, "audio_fr-fr") |
45 | 45 | ||
46 | if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)): | 46 | if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)): |
47 | raise Exception("Directory audio_en-us or audio_fr-fr does not exist") | 47 | raise Exception("Directory audio_en-us or audio_fr-fr does not exist") |
48 | 48 | ||
49 | _,_,filenames_en=next(os.walk(en_us_dir)) | 49 | _,_,filenames_en=next(os.walk(en_us_dir)) |
50 | # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ] | 50 | # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ] |
51 | dir_en = [ en_us_dir for f in filenames_en ] | 51 | dir_en = [ en_us_dir for f in filenames_en ] |
52 | _,_,filenames_fr=next(os.walk(fr_fr_dir)) | 52 | _,_,filenames_fr=next(os.walk(fr_fr_dir)) |
53 | dir_fr = [ fr_fr_dir for f in filenames_fr ] | 53 | dir_fr = [ fr_fr_dir for f in filenames_fr ] |
54 | # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ] | 54 | # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ] |
55 | 55 | ||
56 | directories = dir_en + dir_fr | 56 | directories = dir_en + dir_fr |
57 | filenames = filenames_en + filenames_fr | 57 | filenames = filenames_en + filenames_fr |
58 | 58 | ||
59 | with open(outfile, "w") as f: | 59 | with open(outfile, "w") as f: |
60 | for i, fn in enumerate(filenames): | 60 | for i, fn in enumerate(filenames): |
61 | splited = fn.split(".")[0].split(",") | 61 | splited = fn.split(".")[0].split(",") |
62 | lang = splited[0] | 62 | lang = splited[0] |
63 | character = splited[1] | 63 | character = splited[1] |
64 | record_id = splited[3] | 64 | record_id = splited[3] |
65 | path = os.path.join(directories[i], fn) | 65 | path = os.path.join(directories[i], fn) |
66 | f.write(f"{lang},{character},{record_id} {path}\n") | 66 | f.write(f"{lang},{character},{record_id} {path}\n") |
67 | 67 | ||
68 | 68 | ||
69 | def changelabels(source: str, labels: str, outfile: str): | 69 | def changelabels(source: str, labels: str, outfile: str): |
70 | data_dict = core.data.read_id_values(source) | 70 | data_dict = core.data.read_id_values(source) |
71 | labels_dict = core.data.read_labels(labels) | 71 | labels_dict = core.data.read_labels(labels) |
72 | keys = list(data_dict.keys()) | 72 | keys = list(data_dict.keys()) |
73 | 73 | ||
74 | with open(outfile, "w") as f: | 74 | with open(outfile, "w") as f: |
75 | for key in keys: | 75 | for key in keys: |
76 | splited = key.split(",") | 76 | splited = key.split(",") |
77 | splited[1] = labels_dict[key] | 77 | splited[1] = labels_dict[key][0] |
78 | core.data.write_line(",".join(splited), data_dict[key]) | 78 | core.data.write_line(",".join(splited), data_dict[key], out=f) |
79 | 79 | ||
80 | 80 | ||
81 | if __name__ == '__main__': | 81 | if __name__ == '__main__': |
82 | # Main parser | 82 | # Main parser |
83 | parser = argparse.ArgumentParser(description="...") | 83 | parser = argparse.ArgumentParser(description="...") |
84 | subparsers = parser.add_subparsers(title="action") | 84 | subparsers = parser.add_subparsers(title="action") |
85 | 85 | ||
86 | # utt2char | 86 | # utt2char |
87 | parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file") | 87 | parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file") |
88 | parser_utt2char.add_argument("--features", type=str, help="features file") | 88 | parser_utt2char.add_argument("--features", type=str, help="features file") |
89 | parser_utt2char.add_argument("--outfile", type=str, help="output file") | 89 | parser_utt2char.add_argument("--outfile", type=str, help="output file") |
90 | parser_utt2char.set_defaults(which="utt2char") | 90 | parser_utt2char.set_defaults(which="utt2char") |
91 | 91 | ||
92 | # char2utt | 92 | # char2utt |
93 | parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file") | 93 | parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file") |
94 | parser_char2utt.add_argument("--features", type=str, help="features file") | 94 | parser_char2utt.add_argument("--features", type=str, help="features file") |
95 | parser_char2utt.add_argument("--outfile", type=str, help="output file") | 95 | parser_char2utt.add_argument("--outfile", type=str, help="output file") |
96 | parser_char2utt.set_defaults(which="char2utt") | 96 | parser_char2utt.set_defaults(which="char2utt") |
97 | 97 | ||
98 | # wavscp | 98 | # wavscp |
99 | parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file") | 99 | parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file") |
100 | parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect") | 100 | parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect") |
101 | parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file") | 101 | parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file") |
102 | parser_wavscp.set_defaults(which="wavscp") | 102 | parser_wavscp.set_defaults(which="wavscp") |
103 | 103 | ||
104 | # Change labels | 104 | # Change labels |
105 | parser_changelabels = subparsers.add_parser("changelabels", help="...") | 105 | parser_changelabels = subparsers.add_parser("changelabels", help="...") |
106 | parser_changelabels.add_argument("--source", required=True, type=str, help="source file where we want to change ids.") | 106 | parser_changelabels.add_argument("--source", required=True, type=str, help="source file where we want to change ids.") |
107 | parser_changelabels.add_argument("--labels", required=True, type=str, help="file with labels") | 107 | parser_changelabels.add_argument("--labels", required=True, type=str, help="file with labels") |
108 | parser_changelabels.add_argument("--outfile", required=True, type=str, help="Output file") | 108 | parser_changelabels.add_argument("--outfile", required=True, type=str, help="Output file") |
109 | parser_changelabels.set_defaults(which="changelabels") | 109 | parser_changelabels.set_defaults(which="changelabels") |
110 | 110 | ||
111 | # Parse | 111 | # Parse |
112 | args = parser.parse_args() | 112 | args = parser.parse_args() |
113 | 113 | ||
114 | # Run commands | 114 | # Run commands |
115 | runner = SubCommandRunner({ | 115 | runner = SubCommandRunner({ |
116 | "utt2char" : utt2char, | 116 | "utt2char" : utt2char, |
117 | "char2utt": char2utt, | 117 | "char2utt": char2utt, |
118 | "wavscp": wavscp, | 118 | "wavscp": wavscp, |
119 | "changelabels": changelabels | 119 | "changelabels": changelabels |
120 | }) | 120 | }) |
121 | 121 | ||
122 | runner.run(args.which, args.__dict__, remove="which") | 122 | runner.run(args.which, args.__dict__, remove="which") |