Commit acbafc41477a4063d51e5312961c6a045171f0eb

Authored by Quillot Mathias
1 parent a7a92c6a20
Exists in master

add wav scp method to voxceleb module but it is under construction

Showing 1 changed file with 11 additions and 0 deletions Inline Diff

1 import argparse 1 import argparse
2 from utils import SubCommandRunner 2 from utils import SubCommandRunner
3 3
4 def utt2spk(features: str, outfile: str): 4 def utt2spk(features: str, outfile: str):
5 """Generate a utt2spk file from a feature file of voxceleb. 5 """Generate a utt2spk file from a feature file of voxceleb.
6 (it also works with list files instead of features) 6 (it also works with list files instead of features)
7 7
8 Args: 8 Args:
9 features (str): features file (or list) 9 features (str): features file (or list)
10 outfile (str): output file to store the utt2spk 10 outfile (str): output file to store the utt2spk
11 """ 11 """
12 with open(features, "r") as f, open(outfile, "w") as out: 12 with open(features, "r") as f, open(outfile, "w") as out:
13 for line in f: 13 for line in f:
14 splited = line.replace("\n", "").split(" ") 14 splited = line.replace("\n", "").split(" ")
15 id_ = splited[0] 15 id_ = splited[0]
16 id_splited = id_.split("-") 16 id_splited = id_.split("-")
17 spk = id_splited[0] 17 spk = id_splited[0]
18 out.write(id_ + " " + spk + "\n") 18 out.write(id_ + " " + spk + "\n")
19 19
20 20
21 def spk2utt(features: str, outfile: str): 21 def spk2utt(features: str, outfile: str):
22 """Generate a spk2utt file from a feature file of voxceleb. 22 """Generate a spk2utt file from a feature file of voxceleb.
23 (it also works with list files instead of features) 23 (it also works with list files instead of features)
24 24
25 Args: 25 Args:
26 features (str): features file (or list) 26 features (str): features file (or list)
27 outfile (str): output file to store the spk2utt 27 outfile (str): output file to store the spk2utt
28 """ 28 """
29 with open(features, "r") as f, open(outfile, "w") as out: 29 with open(features, "r") as f, open(outfile, "w") as out:
30 spk2utt_dict = {} 30 spk2utt_dict = {}
31 for line in f: 31 for line in f:
32 splited = line.replace("\n", "").split(" ") 32 splited = line.replace("\n", "").split(" ")
33 id_ = splited[0] 33 id_ = splited[0]
34 id_splited = id_.split("-") 34 id_splited = id_.split("-")
35 spk = id_splited[0] 35 spk = id_splited[0]
36 if spk not in spk2utt_dict: 36 if spk not in spk2utt_dict:
37 spk2utt_dict[spk] = [] 37 spk2utt_dict[spk] = []
38 spk2utt_dict[spk].append(id_) 38 spk2utt_dict[spk].append(id_)
39 39
40 for spk, ids in spk2utt_dict.items(): 40 for spk, ids in spk2utt_dict.items():
41 out.write(spk + " " + " ".join(ids) + "\n") 41 out.write(spk + " " + " ".join(ids) + "\n")
42 42
43 43
44 def wavscp(datadir: str, outfile: str):
45 raise Exception("Under construction")
46 pass
47
44 if __name__ == "__main__": 48 if __name__ == "__main__":
45 # Main parser 49 # Main parser
46 parser = argparse.ArgumentParser(description="Voxceleb data management") 50 parser = argparse.ArgumentParser(description="Voxceleb data management")
47 subparsers = parser.add_subparsers(title="action") 51 subparsers = parser.add_subparsers(title="action")
48 52
49 # utt2spk 53 # utt2spk
50 parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).") 54 parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).")
51 parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)") 55 parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)")
52 parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file") 56 parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file")
53 parser_utt2spk.set_defaults(which="utt2spk") 57 parser_utt2spk.set_defaults(which="utt2spk")
54 58
55 # spk2utt 59 # spk2utt
56 parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).") 60 parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).")
57 parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)") 61 parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)")
58 parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file") 62 parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file")
59 parser_spk2utt.set_defaults(which="spk2utt") 63 parser_spk2utt.set_defaults(which="spk2utt")
60 64
65 # wavscp
66 parser_wavscp = subparser.add_parser("wavscp", help="generate wav scp file")
67 parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect")
68 parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file")
69 parser_wavscp.set_defaults(which="wavscp")
70
61 # Parse 71 # Parse
62 args = parser.parse_args() 72 args = parser.parse_args()
63 73
64 # Run commands 74 # Run commands
65 runner = SubCommandRunner({ 75 runner = SubCommandRunner({
66 "utt2spk" : utt2spk, 76 "utt2spk" : utt2spk,
67 "spk2utt": spk2utt, 77 "spk2utt": spk2utt,
78 "wavscp": wavscp
68 }) 79 })
69 80
70 runner.run(args.which, args.__dict__, remove="which") 81 runner.run(args.which, args.__dict__, remove="which")
71 82