Commit acbafc41477a4063d51e5312961c6a045171f0eb
1 parent
a7a92c6a20
Exists in
master
add wav scp method to voxceleb module but it is under construction
Showing 1 changed file with 11 additions and 0 deletions Inline Diff
volia/voxceleb.py
1 | import argparse | 1 | import argparse |
2 | from utils import SubCommandRunner | 2 | from utils import SubCommandRunner |
3 | 3 | ||
4 | def utt2spk(features: str, outfile: str): | 4 | def utt2spk(features: str, outfile: str): |
5 | """Generate a utt2spk file from a feature file of voxceleb. | 5 | """Generate a utt2spk file from a feature file of voxceleb. |
6 | (it also works with list files instead of features) | 6 | (it also works with list files instead of features) |
7 | 7 | ||
8 | Args: | 8 | Args: |
9 | features (str): features file (or list) | 9 | features (str): features file (or list) |
10 | outfile (str): output file to store the utt2spk | 10 | outfile (str): output file to store the utt2spk |
11 | """ | 11 | """ |
12 | with open(features, "r") as f, open(outfile, "w") as out: | 12 | with open(features, "r") as f, open(outfile, "w") as out: |
13 | for line in f: | 13 | for line in f: |
14 | splited = line.replace("\n", "").split(" ") | 14 | splited = line.replace("\n", "").split(" ") |
15 | id_ = splited[0] | 15 | id_ = splited[0] |
16 | id_splited = id_.split("-") | 16 | id_splited = id_.split("-") |
17 | spk = id_splited[0] | 17 | spk = id_splited[0] |
18 | out.write(id_ + " " + spk + "\n") | 18 | out.write(id_ + " " + spk + "\n") |
19 | 19 | ||
20 | 20 | ||
21 | def spk2utt(features: str, outfile: str): | 21 | def spk2utt(features: str, outfile: str): |
22 | """Generate a spk2utt file from a feature file of voxceleb. | 22 | """Generate a spk2utt file from a feature file of voxceleb. |
23 | (it also works with list files instead of features) | 23 | (it also works with list files instead of features) |
24 | 24 | ||
25 | Args: | 25 | Args: |
26 | features (str): features file (or list) | 26 | features (str): features file (or list) |
27 | outfile (str): output file to store the spk2utt | 27 | outfile (str): output file to store the spk2utt |
28 | """ | 28 | """ |
29 | with open(features, "r") as f, open(outfile, "w") as out: | 29 | with open(features, "r") as f, open(outfile, "w") as out: |
30 | spk2utt_dict = {} | 30 | spk2utt_dict = {} |
31 | for line in f: | 31 | for line in f: |
32 | splited = line.replace("\n", "").split(" ") | 32 | splited = line.replace("\n", "").split(" ") |
33 | id_ = splited[0] | 33 | id_ = splited[0] |
34 | id_splited = id_.split("-") | 34 | id_splited = id_.split("-") |
35 | spk = id_splited[0] | 35 | spk = id_splited[0] |
36 | if spk not in spk2utt_dict: | 36 | if spk not in spk2utt_dict: |
37 | spk2utt_dict[spk] = [] | 37 | spk2utt_dict[spk] = [] |
38 | spk2utt_dict[spk].append(id_) | 38 | spk2utt_dict[spk].append(id_) |
39 | 39 | ||
40 | for spk, ids in spk2utt_dict.items(): | 40 | for spk, ids in spk2utt_dict.items(): |
41 | out.write(spk + " " + " ".join(ids) + "\n") | 41 | out.write(spk + " " + " ".join(ids) + "\n") |
42 | 42 | ||
43 | 43 | ||
44 | def wavscp(datadir: str, outfile: str): | ||
45 | raise Exception("Under construction") | ||
46 | pass | ||
47 | |||
44 | if __name__ == "__main__": | 48 | if __name__ == "__main__": |
45 | # Main parser | 49 | # Main parser |
46 | parser = argparse.ArgumentParser(description="Voxceleb data management") | 50 | parser = argparse.ArgumentParser(description="Voxceleb data management") |
47 | subparsers = parser.add_subparsers(title="action") | 51 | subparsers = parser.add_subparsers(title="action") |
48 | 52 | ||
49 | # utt2spk | 53 | # utt2spk |
50 | parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).") | 54 | parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).") |
51 | parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)") | 55 | parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)") |
52 | parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file") | 56 | parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file") |
53 | parser_utt2spk.set_defaults(which="utt2spk") | 57 | parser_utt2spk.set_defaults(which="utt2spk") |
54 | 58 | ||
55 | # spk2utt | 59 | # spk2utt |
56 | parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).") | 60 | parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).") |
57 | parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)") | 61 | parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)") |
58 | parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file") | 62 | parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file") |
59 | parser_spk2utt.set_defaults(which="spk2utt") | 63 | parser_spk2utt.set_defaults(which="spk2utt") |
60 | 64 | ||
65 | # wavscp | ||
66 | parser_wavscp = subparser.add_parser("wavscp", help="generate wav scp file") | ||
67 | parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect") | ||
68 | parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file") | ||
69 | parser_wavscp.set_defaults(which="wavscp") | ||
70 | |||
61 | # Parse | 71 | # Parse |
62 | args = parser.parse_args() | 72 | args = parser.parse_args() |
63 | 73 | ||
64 | # Run commands | 74 | # Run commands |
65 | runner = SubCommandRunner({ | 75 | runner = SubCommandRunner({ |
66 | "utt2spk" : utt2spk, | 76 | "utt2spk" : utt2spk, |
67 | "spk2utt": spk2utt, | 77 | "spk2utt": spk2utt, |
78 | "wavscp": wavscp | ||
68 | }) | 79 | }) |
69 | 80 | ||
70 | runner.run(args.which, args.__dict__, remove="which") | 81 | runner.run(args.which, args.__dict__, remove="which") |
71 | 82 |