Commit 7213a2060906636344f308cf3eb3879bb086ed01
1 parent
8afb6416be
Exists in
master
Allow user to manage voxceleb files using features files. Other features incoming.
Showing 1 changed file with 70 additions and 0 deletions Side-by-side Diff
volia/voxceleb.py
1 | +import argparse | |
2 | +from utils import SubCommandRunner | |
3 | + | |
4 | +def utt2spk(features: str, outfile: str): | |
5 | + """Generate a utt2spk file from a feature file of voxceleb. | |
6 | + (it also works with list files instead of features) | |
7 | + | |
8 | + Args: | |
9 | + features (str): features file (or list) | |
10 | + outfile (str): output file to store the utt2spk | |
11 | + """ | |
12 | + with open(features, "r") as f, open(outfile, "w") as out: | |
13 | + for line in f: | |
14 | + splited = line.replace("\n", "").split(" ") | |
15 | + id_ = splited[0] | |
16 | + id_splited = id_.split("-") | |
17 | + spk = id_splited[0] | |
18 | + out.write(id_ + " " + spk + "\n") | |
19 | + | |
20 | + | |
21 | +def spk2utt(features: str, outfile: str): | |
22 | + """Generate a spk2utt file from a feature file of voxceleb. | |
23 | + (it also works with list files instead of features) | |
24 | + | |
25 | + Args: | |
26 | + features (str): features file (or list) | |
27 | + outfile (str): output file to store the spk2utt | |
28 | + """ | |
29 | + with open(features, "r") as f, open(outfile, "w") as out: | |
30 | + spk2utt_dict = {} | |
31 | + for line in f: | |
32 | + splited = line.replace("\n", "").split(" ") | |
33 | + id_ = splited[0] | |
34 | + id_splited = id_.split("-") | |
35 | + spk = id_splited[0] | |
36 | + if spk not in spk2utt_dict: | |
37 | + spk2utt_dict[spk] = [] | |
38 | + spk2utt_dict[spk].append(id_) | |
39 | + | |
40 | + for spk, ids in spk2utt_dict.items(): | |
41 | + out.write(spk + " " + " ".join(ids) + "\n") | |
42 | + | |
43 | + | |
44 | +if __name__ == "__main__": | |
45 | + # Main parser | |
46 | + parser = argparse.ArgumentParser(description="Voxceleb data management") | |
47 | + subparsers = parser.add_subparsers(title="action") | |
48 | + | |
49 | + # utt2spk | |
50 | + parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).") | |
51 | + parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)") | |
52 | + parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file") | |
53 | + parser_utt2spk.set_defaults(which="utt2spk") | |
54 | + | |
55 | + # spk2utt | |
56 | + parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).") | |
57 | + parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)") | |
58 | + parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file") | |
59 | + parser_spk2utt.set_defaults(which="spk2utt") | |
60 | + | |
61 | + # Parse | |
62 | + args = parser.parse_args() | |
63 | + | |
64 | + # Run commands | |
65 | + runner = SubCommandRunner({ | |
66 | + "utt2spk" : utt2spk, | |
67 | + "spk2utt": spk2utt, | |
68 | + }) | |
69 | + | |
70 | + runner.run(args.which, args.__dict__, remove="which") |