voxceleb.py
2.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import argparse
from utils import SubCommandRunner
def utt2spk(features: str, outfile: str):
"""Generate a utt2spk file from a feature file of voxceleb.
(it also works with list files instead of features)
Args:
features (str): features file (or list)
outfile (str): output file to store the utt2spk
"""
with open(features, "r") as f, open(outfile, "w") as out:
for line in f:
splited = line.replace("\n", "").split(" ")
id_ = splited[0]
id_splited = id_.split("-")
spk = id_splited[0]
out.write(id_ + " " + spk + "\n")
def spk2utt(features: str, outfile: str):
"""Generate a spk2utt file from a feature file of voxceleb.
(it also works with list files instead of features)
Args:
features (str): features file (or list)
outfile (str): output file to store the spk2utt
"""
with open(features, "r") as f, open(outfile, "w") as out:
spk2utt_dict = {}
for line in f:
splited = line.replace("\n", "").split(" ")
id_ = splited[0]
id_splited = id_.split("-")
spk = id_splited[0]
if spk not in spk2utt_dict:
spk2utt_dict[spk] = []
spk2utt_dict[spk].append(id_)
for spk, ids in spk2utt_dict.items():
out.write(spk + " " + " ".join(ids) + "\n")
def wavscp(datadir: str, outfile: str):
raise Exception("Under construction")
pass
if __name__ == "__main__":
# Main parser
parser = argparse.ArgumentParser(description="Voxceleb data management")
subparsers = parser.add_subparsers(title="action")
# utt2spk
parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).")
parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)")
parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file")
parser_utt2spk.set_defaults(which="utt2spk")
# spk2utt
parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).")
parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)")
parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file")
parser_spk2utt.set_defaults(which="spk2utt")
# wavscp
parser_wavscp = subparser.add_parser("wavscp", help="generate wav scp file")
parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect")
parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file")
parser_wavscp.set_defaults(which="wavscp")
# Parse
args = parser.parse_args()
# Run commands
runner = SubCommandRunner({
"utt2spk" : utt2spk,
"spk2utt": spk2utt,
"wavscp": wavscp
})
runner.run(args.which, args.__dict__, remove="which")