Commit 7213a2060906636344f308cf3eb3879bb086ed01
1 parent
8afb6416be
Exists in
master
Allow user to manage voxceleb files using features files. Other features incoming.
Showing 1 changed file with 70 additions and 0 deletions Inline Diff
volia/voxceleb.py
File was created | 1 | import argparse | |
2 | from utils import SubCommandRunner | ||
3 | |||
4 | def utt2spk(features: str, outfile: str): | ||
5 | """Generate a utt2spk file from a feature file of voxceleb. | ||
6 | (it also works with list files instead of features) | ||
7 | |||
8 | Args: | ||
9 | features (str): features file (or list) | ||
10 | outfile (str): output file to store the utt2spk | ||
11 | """ | ||
12 | with open(features, "r") as f, open(outfile, "w") as out: | ||
13 | for line in f: | ||
14 | splited = line.replace("\n", "").split(" ") | ||
15 | id_ = splited[0] | ||
16 | id_splited = id_.split("-") | ||
17 | spk = id_splited[0] | ||
18 | out.write(id_ + " " + spk + "\n") | ||
19 | |||
20 | |||
21 | def spk2utt(features: str, outfile: str): | ||
22 | """Generate a spk2utt file from a feature file of voxceleb. | ||
23 | (it also works with list files instead of features) | ||
24 | |||
25 | Args: | ||
26 | features (str): features file (or list) | ||
27 | outfile (str): output file to store the spk2utt | ||
28 | """ | ||
29 | with open(features, "r") as f, open(outfile, "w") as out: | ||
30 | spk2utt_dict = {} | ||
31 | for line in f: | ||
32 | splited = line.replace("\n", "").split(" ") | ||
33 | id_ = splited[0] | ||
34 | id_splited = id_.split("-") | ||
35 | spk = id_splited[0] | ||
36 | if spk not in spk2utt_dict: | ||
37 | spk2utt_dict[spk] = [] | ||
38 | spk2utt_dict[spk].append(id_) | ||
39 | |||
40 | for spk, ids in spk2utt_dict.items(): | ||
41 | out.write(spk + " " + " ".join(ids) + "\n") | ||
42 | |||
43 | |||
44 | if __name__ == "__main__": | ||
45 | # Main parser | ||
46 | parser = argparse.ArgumentParser(description="Voxceleb data management") | ||
47 | subparsers = parser.add_subparsers(title="action") | ||
48 | |||
49 | # utt2spk | ||
50 | parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).") | ||
51 | parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)") | ||
52 | parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file") | ||
53 | parser_utt2spk.set_defaults(which="utt2spk") | ||
54 | |||
55 | # spk2utt | ||
56 | parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).") | ||
57 | parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)") | ||
58 | parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file") | ||
59 | parser_spk2utt.set_defaults(which="spk2utt") | ||
60 | |||
61 | # Parse | ||
62 | args = parser.parse_args() | ||
63 | |||
64 | # Run commands | ||
65 | runner = SubCommandRunner({ | ||
66 | "utt2spk" : utt2spk, | ||
67 | "spk2utt": spk2utt, | ||
68 | }) | ||
69 | |||
70 | runner.run(args.which, args.__dict__, remove="which") | ||
71 |