Commit 7213a2060906636344f308cf3eb3879bb086ed01

Authored by Quillot Mathias
1 parent 8afb6416be
Exists in master

Allow user to manage voxceleb files using features files. Other features incoming.

Showing 1 changed file with 70 additions and 0 deletions Inline Diff

File was created 1 import argparse
2 from utils import SubCommandRunner
3
4 def utt2spk(features: str, outfile: str):
5 """Generate a utt2spk file from a feature file of voxceleb.
6 (it also works with list files instead of features)
7
8 Args:
9 features (str): features file (or list)
10 outfile (str): output file to store the utt2spk
11 """
12 with open(features, "r") as f, open(outfile, "w") as out:
13 for line in f:
14 splited = line.replace("\n", "").split(" ")
15 id_ = splited[0]
16 id_splited = id_.split("-")
17 spk = id_splited[0]
18 out.write(id_ + " " + spk + "\n")
19
20
21 def spk2utt(features: str, outfile: str):
22 """Generate a spk2utt file from a feature file of voxceleb.
23 (it also works with list files instead of features)
24
25 Args:
26 features (str): features file (or list)
27 outfile (str): output file to store the spk2utt
28 """
29 with open(features, "r") as f, open(outfile, "w") as out:
30 spk2utt_dict = {}
31 for line in f:
32 splited = line.replace("\n", "").split(" ")
33 id_ = splited[0]
34 id_splited = id_.split("-")
35 spk = id_splited[0]
36 if spk not in spk2utt_dict:
37 spk2utt_dict[spk] = []
38 spk2utt_dict[spk].append(id_)
39
40 for spk, ids in spk2utt_dict.items():
41 out.write(spk + " " + " ".join(ids) + "\n")
42
43
44 if __name__ == "__main__":
45 # Main parser
46 parser = argparse.ArgumentParser(description="Voxceleb data management")
47 subparsers = parser.add_subparsers(title="action")
48
49 # utt2spk
50 parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).")
51 parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)")
52 parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file")
53 parser_utt2spk.set_defaults(which="utt2spk")
54
55 # spk2utt
56 parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).")
57 parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)")
58 parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file")
59 parser_spk2utt.set_defaults(which="spk2utt")
60
61 # Parse
62 args = parser.parse_args()
63
64 # Run commands
65 runner = SubCommandRunner({
66 "utt2spk" : utt2spk,
67 "spk2utt": spk2utt,
68 })
69
70 runner.run(args.which, args.__dict__, remove="which")
71