Commit a7a92c6a2094a92cb8299af422531719077fc375

Authored by Quillot Mathias
1 parent 88260de938
Exists in master

add wav.scp generator to masseffect module

Showing 1 changed file with 48 additions and 3 deletions Side-by-side Diff

1 1 import argparse
  2 +from os import path
2 3 import core.data
3 4 from utils import SubCommandRunner
  5 +import os
4 6  
5   -
6 7 def utt2char(features: str, outfile: str):
7 8 """Allow the user to generate utt2char file from masseffect features file.
8 9  
9 10  
10 11  
11 12  
... ... @@ -28,23 +29,66 @@
28 29 pass
29 30  
30 31  
  32 +def wavscp(datadir: str, outfile: str):
  33 + """Generate the masseffect wav scp file from the directories.
  34 +
  35 + Args:
  36 + datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available
  37 + outfile (str): path of the wav scp output file
  38 +
  39 + Raises:
  40 + Exception: if one of the directory is not available
  41 + """
  42 + en_us_dir = os.path.join(datadir, "audio_en-us")
  43 + fr_fr_dir = os.path.join(datadir, "audio_fr-fr")
  44 +
  45 + if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)):
  46 + raise Exception("Directory audio_en-us or audio_fr-fr does not exist")
  47 +
  48 + _,_,filenames_en=next(os.walk(en_us_dir))
  49 + # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ]
  50 + dir_en = [ en_us_dir for f in filenames_en ]
  51 + _,_,filenames_fr=next(os.walk(fr_fr_dir))
  52 + dir_fr = [ fr_fr_dir for f in filenames_fr ]
  53 + # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ]
  54 +
  55 + directories = dir_en + dir_fr
  56 + filenames = filenames_en + filenames_fr
  57 +
  58 +
  59 + with open(outfile, "w") as f:
  60 + for i, fn in enumerate(filenames):
  61 + splited = fn.split(".")[0].split(",")
  62 + lang = splited[0]
  63 + character = splited[1]
  64 + record_id = splited[3]
  65 + path = os.path.join(directories[i], fn)
  66 + f.write(f"{lang},{character},{record_id} {path}\n")
  67 +
  68 +
  69 +
31 70 if __name__ == '__main__':
32 71 # Main parser
33 72 parser = argparse.ArgumentParser(description="...")
34 73 subparsers = parser.add_subparsers(title="action")
35 74  
36 75 # utt2char
37   - parser_utt2char = subparsers.add_parser("utt2char")
  76 + parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file")
38 77 parser_utt2char.add_argument("--features", type=str, help="features file")
39 78 parser_utt2char.add_argument("--outfile", type=str, help="output file")
40 79 parser_utt2char.set_defaults(which="utt2char")
41 80  
42 81 # char2utt
43   - parser_char2utt = subparsers.add_parser("char2utt")
  82 + parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file")
44 83 parser_char2utt.add_argument("--features", type=str, help="features file")
45 84 parser_char2utt.add_argument("--outfile", type=str, help="output file")
46 85 parser_char2utt.set_defaults(which="char2utt")
47 86  
  87 + # wavscp
  88 + parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file")
  89 + parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect")
  90 + parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file")
  91 + parser_wavscp.set_defaults(which="wavscp")
48 92  
49 93 # Parse
50 94 args = parser.parse_args()
... ... @@ -53,6 +97,7 @@
53 97 runner = SubCommandRunner({
54 98 "utt2char" : utt2char,
55 99 "char2utt": char2utt,
  100 + "wavscp": wavscp
56 101 })
57 102  
58 103 runner.run(args.which, args.__dict__, remove="which")