masseffect.py
3.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import argparse
from os import path
import core.data
from utils import SubCommandRunner
import os
def utt2char(features: str, outfile: str):
"""Allow the user to generate utt2char file from masseffect features file.
TODO: Don't forget to manage two cases: one with old ids, and an other with
new ones.
Args:
features (str): [description]
outfile (str): [description]
"""
data = core.data.read_features(features)
keys = list(data.keys())
with open(outfile, "w") as f:
for key in keys:
splited = key.replace("\n", "").split(",")
character = splited[1]
f.write(",".join(splited) + " " + character + "\n")
def char2utt(features: str, outfile: str):
raise Exception("Not implemented yet")
pass
def wavscp(datadir: str, outfile: str):
"""Generate the masseffect wav scp file from the directories.
Args:
datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available
outfile (str): path of the wav scp output file
Raises:
Exception: if one of the directory is not available
"""
en_us_dir = os.path.join(datadir, "audio_en-us")
fr_fr_dir = os.path.join(datadir, "audio_fr-fr")
if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)):
raise Exception("Directory audio_en-us or audio_fr-fr does not exist")
_,_,filenames_en=next(os.walk(en_us_dir))
# filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ]
dir_en = [ en_us_dir for f in filenames_en ]
_,_,filenames_fr=next(os.walk(fr_fr_dir))
dir_fr = [ fr_fr_dir for f in filenames_fr ]
# filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ]
directories = dir_en + dir_fr
filenames = filenames_en + filenames_fr
with open(outfile, "w") as f:
for i, fn in enumerate(filenames):
splited = fn.split(".")[0].split(",")
lang = splited[0]
character = splited[1]
record_id = splited[3]
path = os.path.join(directories[i], fn)
f.write(f"{lang},{character},{record_id} {path}\n")
if __name__ == '__main__':
# Main parser
parser = argparse.ArgumentParser(description="...")
subparsers = parser.add_subparsers(title="action")
# utt2char
parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file")
parser_utt2char.add_argument("--features", type=str, help="features file")
parser_utt2char.add_argument("--outfile", type=str, help="output file")
parser_utt2char.set_defaults(which="utt2char")
# char2utt
parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file")
parser_char2utt.add_argument("--features", type=str, help="features file")
parser_char2utt.add_argument("--outfile", type=str, help="output file")
parser_char2utt.set_defaults(which="char2utt")
# wavscp
parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file")
parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect")
parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file")
parser_wavscp.set_defaults(which="wavscp")
# Parse
args = parser.parse_args()
# Run commands
runner = SubCommandRunner({
"utt2char" : utt2char,
"char2utt": char2utt,
"wavscp": wavscp
})
runner.run(args.which, args.__dict__, remove="which")