Commit a36206bdd16e5370ebf3daf9f2392855aab135d5
1 parent
9a08c7beb3
Exists in
master
Add needed functionalities to generate utt2spk and utt2char files
Showing 1 changed file with 36 additions and 2 deletions Inline Diff
volia/skyrim.py
1 | 1 | ||
2 | import argparse | 2 | import argparse |
3 | from utils import SubCommandRunner | 3 | from utils import SubCommandRunner |
4 | from core.data import read_lst | 4 | from core.data import read_lst |
5 | import os | 5 | import os |
6 | 6 | ||
7 | 7 | ||
8 | 8 | ||
9 | def lst2wav(lst: str, root: str, outfile: str): | 9 | def lst2wav(lst: str, root: str, outfile: str): |
10 | lst_ = read_lst(lst) | 10 | lst_ = read_lst(lst) |
11 | 11 | ||
12 | with open(outfile, "w") as f: | 12 | with open(outfile, "w") as f: |
13 | for id_ in lst_: | 13 | for id_ in lst_: |
14 | # Split id | 14 | # Split id |
15 | splited = id_.replace("\n", "").split(",") | 15 | splited = id_.replace("\n", "").split(",") |
16 | 16 | ||
17 | # Get language | 17 | # Get language |
18 | language = None | 18 | language = None |
19 | if splited[0] == "en-us": | 19 | if splited[0] == "en-us": |
20 | language = "english" | 20 | language = "english" |
21 | elif splited[0] == "fr-fr": | 21 | elif splited[0] == "fr-fr": |
22 | language = "french" | 22 | language = "french" |
23 | 23 | ||
24 | # Create wav filename | 24 | # Create wav filename |
25 | splited3 = splited[3].split(".") | 25 | splited3 = splited[3].split(".") |
26 | wav = ("-".join(splited3) + ".wav").replace(" ", "") | 26 | wav = ("-".join(splited3) + ".wav").replace(" ", "") |
27 | 27 | ||
28 | # Create full path | 28 | # Create full path |
29 | full_path = os.path.join(root, language, "wave-audio", wav) | 29 | full_path = os.path.join(root, language, "wave-audio", wav) |
30 | 30 | ||
31 | f.write(f"{id_} {full_path}\n") | 31 | f.write(f"{id_} {full_path}\n") |
32 | pass | 32 | |
33 | |||
34 | def utt2spk(lst: str, outfile: str): | ||
35 | lst_ = read_lst(lst) | ||
36 | |||
37 | with open(outfile, "w") as f: | ||
38 | for id_ in lst: | ||
39 | splited = id_.split(",") | ||
40 | spk_id = splited[0] + "-" + splited[1] | ||
41 | f.write(id_ + " " + spk_id + "\n") | ||
42 | |||
43 | |||
44 | def utt2char(lst: str, outfile: str): | ||
45 | lst_ = read_lst(lst) | ||
46 | |||
47 | with open(outfile, "w") as f: | ||
48 | for id_ in lst: | ||
49 | splited = id_.split(",") | ||
50 | char_id = splited[1] | ||
51 | f.write(id_ + " " + char_id + "\n") | ||
52 | |||
53 | |||
54 | def utt2char(): | ||
33 | pass | 55 | pass |
34 | 56 | ||
35 | 57 | ||
36 | if __name__ == '__main__': | 58 | if __name__ == '__main__': |
37 | # Main parser | 59 | # Main parser |
38 | parser = argparse.ArgumentParser(description="Skyrim data-management commands") | 60 | parser = argparse.ArgumentParser(description="Skyrim data-management commands") |
39 | subparsers = parser.add_subparsers(title="action") | 61 | subparsers = parser.add_subparsers(title="action") |
40 | 62 | ||
41 | parser_lst2wav = subparsers.add_parser("lst2wav", help="Generate wav file from lst") | 63 | parser_lst2wav = subparsers.add_parser("lst2wav", help="Generate wav file from lst") |
42 | parser_lst2wav.add_argument("--lst", type=str, help="list file .lst") | 64 | parser_lst2wav.add_argument("--lst", type=str, help="list file .lst") |
43 | parser_lst2wav.add_argument("--root", type=str, help="root directory with audio files") | 65 | parser_lst2wav.add_argument("--root", type=str, help="root directory with audio files") |
44 | parser_lst2wav.add_argument("--outfile", type=str, help="output wav.scp-like file") | 66 | parser_lst2wav.add_argument("--outfile", type=str, help="output wav.scp-like file") |
45 | parser_lst2wav.set_defaults(which="lst2wav") | 67 | parser_lst2wav.set_defaults(which="lst2wav") |
46 | 68 | ||
69 | parser_utt2spk = subparsers.add_parser("utt2spk", help="generate utt2spk file") | ||
70 | parser_utt2spk.add_argument("--lst", required=True, type=str, help="list file .lst") | ||
71 | parser_utt2spk.add_argument("--outfile", required=True, type=str, help="utt2spk output file") | ||
72 | parser_utt2spk.set_defaults(which="utt2spk") | ||
73 | |||
74 | parser_utt2char = subparsers.add_parser("utt2char", help="..") | ||
75 | parser_utt2char.add_argument("--lst", required=True, type=str, help="list file .lst") | ||
76 | parser_utt2char.add_argument("--outfile", required=True, type=str, help="utt2char output file") | ||
77 | parser_utt2char.set_defaults(which="utt2char") | ||
78 | |||
47 | # Parse | 79 | # Parse |
48 | args = parser.parse_args() | 80 | args = parser.parse_args() |
49 | 81 | ||
50 | # Run commands | 82 | # Run commands |
51 | runner = SubCommandRunner({ | 83 | runner = SubCommandRunner({ |
52 | "lst2wav" : lst2wav | 84 | "lst2wav" : lst2wav, |
85 | "utt2spk": utt2spk, | ||
86 | "utt2char": utt2char | ||
53 | }) | 87 | }) |
54 | 88 | ||
55 | runner.run(args.which, args.__dict__, remove="which") | 89 | runner.run(args.which, args.__dict__, remove="which") |
56 | 90 |