Blame view
volia/voxceleb.py
2.93 KB
7213a2060 Allow user to man... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import argparse from utils import SubCommandRunner def utt2spk(features: str, outfile: str): """Generate a utt2spk file from a feature file of voxceleb. (it also works with list files instead of features) Args: features (str): features file (or list) outfile (str): output file to store the utt2spk """ with open(features, "r") as f, open(outfile, "w") as out: for line in f: splited = line.replace(" ", "").split(" ") id_ = splited[0] id_splited = id_.split("-") spk = id_splited[0] out.write(id_ + " " + spk + " ") def spk2utt(features: str, outfile: str): """Generate a spk2utt file from a feature file of voxceleb. (it also works with list files instead of features) Args: features (str): features file (or list) outfile (str): output file to store the spk2utt """ with open(features, "r") as f, open(outfile, "w") as out: spk2utt_dict = {} for line in f: splited = line.replace(" ", "").split(" ") id_ = splited[0] id_splited = id_.split("-") spk = id_splited[0] if spk not in spk2utt_dict: spk2utt_dict[spk] = [] spk2utt_dict[spk].append(id_) for spk, ids in spk2utt_dict.items(): out.write(spk + " " + " ".join(ids) + " ") |
acbafc414 add wav scp metho... |
46 47 48 |
def wavscp(datadir: str, outfile: str): raise Exception("Under construction") pass |
7213a2060 Allow user to man... |
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
if __name__ == "__main__": # Main parser parser = argparse.ArgumentParser(description="Voxceleb data management") subparsers = parser.add_subparsers(title="action") # utt2spk parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).") parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)") parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file") parser_utt2spk.set_defaults(which="utt2spk") # spk2utt parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).") parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)") parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file") parser_spk2utt.set_defaults(which="spk2utt") |
acbafc414 add wav scp metho... |
65 66 67 68 69 |
# wavscp parser_wavscp = subparser.add_parser("wavscp", help="generate wav scp file") parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect") parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file") parser_wavscp.set_defaults(which="wavscp") |
7213a2060 Allow user to man... |
70 71 72 73 74 75 76 |
# Parse args = parser.parse_args() # Run commands runner = SubCommandRunner({ "utt2spk" : utt2spk, "spk2utt": spk2utt, |
acbafc414 add wav scp metho... |
77 |
"wavscp": wavscp |
7213a2060 Allow user to man... |
78 79 80 |
}) runner.run(args.which, args.__dict__, remove="which") |