Blame view

volia/voxceleb.py 2.93 KB
7213a2060   Quillot Mathias   Allow user to man...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
  import argparse
  from utils import SubCommandRunner
  
  def utt2spk(features: str, outfile: str):
      """Generate a utt2spk file from a feature file of voxceleb.
      (it also works with list files instead of features)
  
      Args:
          features (str): features file (or list)
          outfile (str): output file to store the utt2spk
      """
      with open(features, "r") as f, open(outfile, "w") as out:
          for line in f:
              splited = line.replace("
  ", "").split(" ")
              id_ = splited[0]
              id_splited = id_.split("-")
              spk = id_splited[0]
              out.write(id_ + " " + spk + "
  ")
  
  
  def spk2utt(features: str, outfile: str):
      """Generate a spk2utt file from a feature file of voxceleb.
      (it also works with list files instead of features)
  
      Args:
          features (str): features file (or list)
          outfile (str): output file to store the spk2utt
      """
      with open(features, "r") as f, open(outfile, "w") as out:
          spk2utt_dict = {}
          for line in f:
              splited = line.replace("
  ", "").split(" ")
              id_ = splited[0]
              id_splited = id_.split("-")
              spk = id_splited[0]
              if spk not in spk2utt_dict:
                  spk2utt_dict[spk] = []
              spk2utt_dict[spk].append(id_)
          
          for spk, ids in spk2utt_dict.items():
              out.write(spk + " " + " ".join(ids) + "
  ")
acbafc414   Quillot Mathias   add wav scp metho...
46
47
48
  def wavscp(datadir: str, outfile: str):
      raise Exception("Under construction")
      pass
7213a2060   Quillot Mathias   Allow user to man...
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
  if __name__ == "__main__":   
      # Main parser
      parser = argparse.ArgumentParser(description="Voxceleb data management")
      subparsers = parser.add_subparsers(title="action")
  
      # utt2spk
      parser_utt2spk = subparsers.add_parser("utt2spk", help="Generate utt2spk file from feature file (works with list).")
      parser_utt2spk.add_argument("--features", required=True, help="Features file (works with list)")
      parser_utt2spk.add_argument("--outfile", default="utt2spk", help="output file")
      parser_utt2spk.set_defaults(which="utt2spk")
  
      # spk2utt
      parser_spk2utt = subparsers.add_parser("spk2utt", help="Generate spk2utt file from feature file (works with list).")
      parser_spk2utt.add_argument("--features", required=True, help="Features file (works with list)")
      parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file")
      parser_spk2utt.set_defaults(which="spk2utt")
acbafc414   Quillot Mathias   add wav scp metho...
65
66
67
68
69
      # wavscp
      parser_wavscp = subparser.add_parser("wavscp", help="generate wav scp file")
      parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect")
      parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file")
      parser_wavscp.set_defaults(which="wavscp")
7213a2060   Quillot Mathias   Allow user to man...
70
71
72
73
74
75
76
      # Parse
      args = parser.parse_args()
  
      # Run commands
      runner = SubCommandRunner({
          "utt2spk" : utt2spk,
          "spk2utt": spk2utt,
acbafc414   Quillot Mathias   add wav scp metho...
77
          "wavscp": wavscp
7213a2060   Quillot Mathias   Allow user to man...
78
79
80
      })
  
      runner.run(args.which, args.__dict__, remove="which")