From 7213a2060906636344f308cf3eb3879bb086ed01 Mon Sep 17 00:00:00 2001 From: Quillot Mathias Date: Wed, 5 May 2021 15:26:20 +0200 Subject: [PATCH] Allow user to manage voxceleb files using features files. Other features incoming. --- volia/voxceleb.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 volia/voxceleb.py diff --git a/volia/voxceleb.py b/volia/voxceleb.py new file mode 100644 index 0000000..2504dcd --- /dev/null +++ b/volia/voxceleb.py @@ -0,0 +1,70 @@ +import argparse +from utils import SubCommandRunner + +def utt2spk(features: str, outfile: str): + """Generate a utt2spk file from a feature file of voxceleb. + (it also works with list files instead of features) + + Args: + features (str): features file (or list) + outfile (str): output file to store the utt2spk + """ + with open(features, "r") as f, open(outfile, "w") as out: + for line in f: + splited = line.replace("\n", "").split(" ") + id_ = splited[0] + id_splited = id_.split("-") + spk = id_splited[0] + out.write(id_ + " " + spk + "\n") + + +def spk2utt(features: str, outfile: str): + """Generate a spk2utt file from a feature file of voxceleb. + (it also works with list files instead of features) + + Args: + features (str): features file (or list) + outfile (str): output file to store the spk2utt + """ + with open(features, "r") as f, open(outfile, "w") as out: + spk2utt_dict = {} + for line in f: + splited = line.replace("\n", "").split(" ") + id_ = splited[0] + id_splited = id_.split("-") + spk = id_splited[0] + if spk not in spk2utt_dict: + spk2utt_dict[spk] = [] + spk2utt_dict[spk].append(id_) + + for spk, ids in spk2utt_dict.items(): + out.write(spk + " " + " ".join(ids) + "\n") + + +if __name__ == "__main__": + # Main parser + parser = argparse.ArgumentParser(description="Voxceleb data management") + subparsers = parser.add_subparsers(title="action") + + # utt2spk + parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).") + parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)") + parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file") + parser_utt2spk.set_defaults(which="utt2spk") + + # spk2utt + parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).") + parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)") + parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file") + parser_spk2utt.set_defaults(which="spk2utt") + + # Parse + args = parser.parse_args() + + # Run commands + runner = SubCommandRunner({ + "utt2spk" : utt2spk, + "spk2utt": spk2utt, + }) + + runner.run(args.which, args.__dict__, remove="which") -- 1.8.2.3