Commit a36206bdd16e5370ebf3daf9f2392855aab135d5

Authored by quillotm
1 parent 9a08c7beb3
Exists in master

Add needed functionalities to generate utt2spk and utt2char files

Showing 1 changed file with 36 additions and 2 deletions Side-by-side Diff

... ... @@ -29,7 +29,29 @@
29 29 full_path = os.path.join(root, language, "wave-audio", wav)
30 30  
31 31 f.write(f"{id_} {full_path}\n")
32   - pass
  32 +
  33 +
  34 +def utt2spk(lst: str, outfile: str):
  35 + lst_ = read_lst(lst)
  36 +
  37 + with open(outfile, "w") as f:
  38 + for id_ in lst:
  39 + splited = id_.split(",")
  40 + spk_id = splited[0] + "-" + splited[1]
  41 + f.write(id_ + " " + spk_id + "\n")
  42 +
  43 +
  44 +def utt2char(lst: str, outfile: str):
  45 + lst_ = read_lst(lst)
  46 +
  47 + with open(outfile, "w") as f:
  48 + for id_ in lst:
  49 + splited = id_.split(",")
  50 + char_id = splited[1]
  51 + f.write(id_ + " " + char_id + "\n")
  52 +
  53 +
  54 +def utt2char():
33 55 pass
34 56  
35 57  
36 58  
... ... @@ -44,12 +66,24 @@
44 66 parser_lst2wav.add_argument("--outfile", type=str, help="output wav.scp-like file")
45 67 parser_lst2wav.set_defaults(which="lst2wav")
46 68  
  69 + parser_utt2spk = subparsers.add_parser("utt2spk", help="generate utt2spk file")
  70 + parser_utt2spk.add_argument("--lst", required=True, type=str, help="list file .lst")
  71 + parser_utt2spk.add_argument("--outfile", required=True, type=str, help="utt2spk output file")
  72 + parser_utt2spk.set_defaults(which="utt2spk")
  73 +
  74 + parser_utt2char = subparsers.add_parser("utt2char", help="..")
  75 + parser_utt2char.add_argument("--lst", required=True, type=str, help="list file .lst")
  76 + parser_utt2char.add_argument("--outfile", required=True, type=str, help="utt2char output file")
  77 + parser_utt2char.set_defaults(which="utt2char")
  78 +
47 79 # Parse
48 80 args = parser.parse_args()
49 81  
50 82 # Run commands
51 83 runner = SubCommandRunner({
52   - "lst2wav" : lst2wav
  84 + "lst2wav" : lst2wav,
  85 + "utt2spk": utt2spk,
  86 + "utt2char": utt2char
53 87 })
54 88  
55 89 runner.run(args.which, args.__dict__, remove="which")