Commit a36206bdd16e5370ebf3daf9f2392855aab135d5

Authored by quillotm
1 parent 9a08c7beb3
Exists in master

Add needed functionalities to generate utt2spk and utt2char files

Showing 1 changed file with 36 additions and 2 deletions Inline Diff

1 1
2 import argparse 2 import argparse
3 from utils import SubCommandRunner 3 from utils import SubCommandRunner
4 from core.data import read_lst 4 from core.data import read_lst
5 import os 5 import os
6 6
7 7
8 8
9 def lst2wav(lst: str, root: str, outfile: str): 9 def lst2wav(lst: str, root: str, outfile: str):
10 lst_ = read_lst(lst) 10 lst_ = read_lst(lst)
11 11
12 with open(outfile, "w") as f: 12 with open(outfile, "w") as f:
13 for id_ in lst_: 13 for id_ in lst_:
14 # Split id 14 # Split id
15 splited = id_.replace("\n", "").split(",") 15 splited = id_.replace("\n", "").split(",")
16 16
17 # Get language 17 # Get language
18 language = None 18 language = None
19 if splited[0] == "en-us": 19 if splited[0] == "en-us":
20 language = "english" 20 language = "english"
21 elif splited[0] == "fr-fr": 21 elif splited[0] == "fr-fr":
22 language = "french" 22 language = "french"
23 23
24 # Create wav filename 24 # Create wav filename
25 splited3 = splited[3].split(".") 25 splited3 = splited[3].split(".")
26 wav = ("-".join(splited3) + ".wav").replace(" ", "") 26 wav = ("-".join(splited3) + ".wav").replace(" ", "")
27 27
28 # Create full path 28 # Create full path
29 full_path = os.path.join(root, language, "wave-audio", wav) 29 full_path = os.path.join(root, language, "wave-audio", wav)
30 30
31 f.write(f"{id_} {full_path}\n") 31 f.write(f"{id_} {full_path}\n")
32 pass 32
33
34 def utt2spk(lst: str, outfile: str):
35 lst_ = read_lst(lst)
36
37 with open(outfile, "w") as f:
38 for id_ in lst:
39 splited = id_.split(",")
40 spk_id = splited[0] + "-" + splited[1]
41 f.write(id_ + " " + spk_id + "\n")
42
43
44 def utt2char(lst: str, outfile: str):
45 lst_ = read_lst(lst)
46
47 with open(outfile, "w") as f:
48 for id_ in lst:
49 splited = id_.split(",")
50 char_id = splited[1]
51 f.write(id_ + " " + char_id + "\n")
52
53
54 def utt2char():
33 pass 55 pass
34 56
35 57
36 if __name__ == '__main__': 58 if __name__ == '__main__':
37 # Main parser 59 # Main parser
38 parser = argparse.ArgumentParser(description="Skyrim data-management commands") 60 parser = argparse.ArgumentParser(description="Skyrim data-management commands")
39 subparsers = parser.add_subparsers(title="action") 61 subparsers = parser.add_subparsers(title="action")
40 62
41 parser_lst2wav = subparsers.add_parser("lst2wav", help="Generate wav file from lst") 63 parser_lst2wav = subparsers.add_parser("lst2wav", help="Generate wav file from lst")
42 parser_lst2wav.add_argument("--lst", type=str, help="list file .lst") 64 parser_lst2wav.add_argument("--lst", type=str, help="list file .lst")
43 parser_lst2wav.add_argument("--root", type=str, help="root directory with audio files") 65 parser_lst2wav.add_argument("--root", type=str, help="root directory with audio files")
44 parser_lst2wav.add_argument("--outfile", type=str, help="output wav.scp-like file") 66 parser_lst2wav.add_argument("--outfile", type=str, help="output wav.scp-like file")
45 parser_lst2wav.set_defaults(which="lst2wav") 67 parser_lst2wav.set_defaults(which="lst2wav")
46 68
69 parser_utt2spk = subparsers.add_parser("utt2spk", help="generate utt2spk file")
70 parser_utt2spk.add_argument("--lst", required=True, type=str, help="list file .lst")
71 parser_utt2spk.add_argument("--outfile", required=True, type=str, help="utt2spk output file")
72 parser_utt2spk.set_defaults(which="utt2spk")
73
74 parser_utt2char = subparsers.add_parser("utt2char", help="..")
75 parser_utt2char.add_argument("--lst", required=True, type=str, help="list file .lst")
76 parser_utt2char.add_argument("--outfile", required=True, type=str, help="utt2char output file")
77 parser_utt2char.set_defaults(which="utt2char")
78
47 # Parse 79 # Parse
48 args = parser.parse_args() 80 args = parser.parse_args()
49 81
50 # Run commands 82 # Run commands
51 runner = SubCommandRunner({ 83 runner = SubCommandRunner({
52 "lst2wav" : lst2wav 84 "lst2wav" : lst2wav,
85 "utt2spk": utt2spk,
86 "utt2char": utt2char
53 }) 87 })
54 88
55 runner.run(args.which, args.__dict__, remove="which") 89 runner.run(args.which, args.__dict__, remove="which")
56 90