Commit 7213a2060906636344f308cf3eb3879bb086ed01

Authored by Quillot Mathias
1 parent 8afb6416be
Exists in master

Allow user to manage voxceleb files using features files. Other features incoming.

Showing 1 changed file with 70 additions and 0 deletions Side-by-side Diff

  1 +import argparse
  2 +from utils import SubCommandRunner
  3 +
  4 +def utt2spk(features: str, outfile: str):
  5 + """Generate a utt2spk file from a feature file of voxceleb.
  6 + (it also works with list files instead of features)
  7 +
  8 + Args:
  9 + features (str): features file (or list)
  10 + outfile (str): output file to store the utt2spk
  11 + """
  12 + with open(features, "r") as f, open(outfile, "w") as out:
  13 + for line in f:
  14 + splited = line.replace("\n", "").split(" ")
  15 + id_ = splited[0]
  16 + id_splited = id_.split("-")
  17 + spk = id_splited[0]
  18 + out.write(id_ + " " + spk + "\n")
  19 +
  20 +
  21 +def spk2utt(features: str, outfile: str):
  22 + """Generate a spk2utt file from a feature file of voxceleb.
  23 + (it also works with list files instead of features)
  24 +
  25 + Args:
  26 + features (str): features file (or list)
  27 + outfile (str): output file to store the spk2utt
  28 + """
  29 + with open(features, "r") as f, open(outfile, "w") as out:
  30 + spk2utt_dict = {}
  31 + for line in f:
  32 + splited = line.replace("\n", "").split(" ")
  33 + id_ = splited[0]
  34 + id_splited = id_.split("-")
  35 + spk = id_splited[0]
  36 + if spk not in spk2utt_dict:
  37 + spk2utt_dict[spk] = []
  38 + spk2utt_dict[spk].append(id_)
  39 +
  40 + for spk, ids in spk2utt_dict.items():
  41 + out.write(spk + " " + " ".join(ids) + "\n")
  42 +
  43 +
  44 +if __name__ == "__main__":
  45 + # Main parser
  46 + parser = argparse.ArgumentParser(description="Voxceleb data management")
  47 + subparsers = parser.add_subparsers(title="action")
  48 +
  49 + # utt2spk
  50 + parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).")
  51 + parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)")
  52 + parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file")
  53 + parser_utt2spk.set_defaults(which="utt2spk")
  54 +
  55 + # spk2utt
  56 + parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).")
  57 + parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)")
  58 + parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file")
  59 + parser_spk2utt.set_defaults(which="spk2utt")
  60 +
  61 + # Parse
  62 + args = parser.parse_args()
  63 +
  64 + # Run commands
  65 + runner = SubCommandRunner({
  66 + "utt2spk" : utt2spk,
  67 + "spk2utt": spk2utt,
  68 + })
  69 +
  70 + runner.run(args.which, args.__dict__, remove="which")