From a7a92c6a2094a92cb8299af422531719077fc375 Mon Sep 17 00:00:00 2001 From: Quillot Mathias Date: Wed, 19 May 2021 09:27:12 +0200 Subject: [PATCH] add wav.scp generator to masseffect module --- volia/masseffect.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/volia/masseffect.py b/volia/masseffect.py index c6119fc..329b0e7 100644 --- a/volia/masseffect.py +++ b/volia/masseffect.py @@ -1,7 +1,8 @@ import argparse +from os import path import core.data from utils import SubCommandRunner - +import os def utt2char(features: str, outfile: str): """Allow the user to generate utt2char file from masseffect features file. @@ -28,23 +29,66 @@ def char2utt(features: str, outfile: str): pass +def wavscp(datadir: str, outfile: str): + """Generate the masseffect wav scp file from the directories. + + Args: + datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available + outfile (str): path of the wav scp output file + + Raises: + Exception: if one of the directory is not available + """ + en_us_dir = os.path.join(datadir, "audio_en-us") + fr_fr_dir = os.path.join(datadir, "audio_fr-fr") + + if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)): + raise Exception("Directory audio_en-us or audio_fr-fr does not exist") + + _,_,filenames_en=next(os.walk(en_us_dir)) + # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ] + dir_en = [ en_us_dir for f in filenames_en ] + _,_,filenames_fr=next(os.walk(fr_fr_dir)) + dir_fr = [ fr_fr_dir for f in filenames_fr ] + # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ] + + directories = dir_en + dir_fr + filenames = filenames_en + filenames_fr + + + with open(outfile, "w") as f: + for i, fn in enumerate(filenames): + splited = fn.split(".")[0].split(",") + lang = splited[0] + character = splited[1] + record_id = splited[3] + path = os.path.join(directories[i], fn) + f.write(f"{lang},{character},{record_id} {path}\n") + + + if __name__ == '__main__': # Main parser parser = argparse.ArgumentParser(description="...") subparsers = parser.add_subparsers(title="action") # utt2char - parser_utt2char = subparsers.add_parser("utt2char") + parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file") parser_utt2char.add_argument("--features", type=str, help="features file") parser_utt2char.add_argument("--outfile", type=str, help="output file") parser_utt2char.set_defaults(which="utt2char") # char2utt - parser_char2utt = subparsers.add_parser("char2utt") + parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file") parser_char2utt.add_argument("--features", type=str, help="features file") parser_char2utt.add_argument("--outfile", type=str, help="output file") parser_char2utt.set_defaults(which="char2utt") + # wavscp + parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file") + parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect") + parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file") + parser_wavscp.set_defaults(which="wavscp") # Parse args = parser.parse_args() @@ -53,6 +97,7 @@ if __name__ == '__main__': runner = SubCommandRunner({ "utt2char" : utt2char, "char2utt": char2utt, + "wavscp": wavscp }) runner.run(args.which, args.__dict__, remove="which") \ No newline at end of file -- 1.8.2.3