Quillot Mathias / volia

Blame view

volia/masseffect.py 5.23 KB

62fc82e59 Quillot Mathias Allow user to gen...	1	import argparse
a7a92c6a2 Quillot Mathias add wav.scp gener...	2	from os import path
62fc82e59 Quillot Mathias Allow user to gen...	3 4	import core.data from utils import SubCommandRunner
a7a92c6a2 Quillot Mathias add wav.scp gener...	5	import os
62fc82e59 Quillot Mathias Allow user to gen...	6
233b7d451 quillotm adding changelabe...	7
62fc82e59 Quillot Mathias Allow user to gen...	8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27	def utt2char(features: str, outfile: str): """Allow the user to generate utt2char file from masseffect features file. TODO: Don't forget to manage two cases: one with old ids, and an other with new ones. Args: features (str): [description] outfile (str): [description] """ data = core.data.read_features(features) keys = list(data.keys()) with open(outfile, "w") as f: for key in keys: splited = key.replace(" ", "").split(",") character = splited[1] f.write(",".join(splited) + " " + character + " ")
5fe43711c Mathias Quillot issue repaired. c...	28
62fc82e59 Quillot Mathias Allow user to gen...	29 30 31 32	def char2utt(features: str, outfile: str): raise Exception("Not implemented yet") pass
a7a92c6a2 Quillot Mathias add wav.scp gener...	33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57	def wavscp(datadir: str, outfile: str): """Generate the masseffect wav scp file from the directories. Args: datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available outfile (str): path of the wav scp output file Raises: Exception: if one of the directory is not available """ en_us_dir = os.path.join(datadir, "audio_en-us") fr_fr_dir = os.path.join(datadir, "audio_fr-fr") if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)): raise Exception("Directory audio_en-us or audio_fr-fr does not exist") _,_,filenames_en=next(os.walk(en_us_dir)) # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ] dir_en = [ en_us_dir for f in filenames_en ] _,_,filenames_fr=next(os.walk(fr_fr_dir)) dir_fr = [ fr_fr_dir for f in filenames_fr ] # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ] directories = dir_en + dir_fr filenames = filenames_en + filenames_fr
a7a92c6a2 Quillot Mathias add wav.scp gener...	58 59 60 61 62 63 64 65 66 67	with open(outfile, "w") as f: for i, fn in enumerate(filenames): splited = fn.split(".")[0].split(",") lang = splited[0] character = splited[1] record_id = splited[3] path = os.path.join(directories[i], fn) f.write(f"{lang},{character},{record_id} {path} ")
233b7d451 quillotm adding changelabe...	68 69 70 71 72 73 74 75	def changelabels(source: str, labels: str, outfile: str): data_dict = core.data.read_id_values(source) labels_dict = core.data.read_labels(labels) keys = list(data_dict.keys()) with open(outfile, "w") as f: for key in keys: splited = key.split(",")
5fe43711c Mathias Quillot issue repaired. c...	76 77	splited[1] = labels_dict[key][0] core.data.write_line(",".join(splited), data_dict[key], out=f)
233b7d451 quillotm adding changelabe...	78
f774442a8 quillotm Add converter cre...	79 80 81 82 83 84 85 86 87 88	def converter(file: str, outtype: str, outfile: str): data = core.data.read_id_values(file) with open(outfile, "w") as of: for key in data: splited = key.replace(" ", "").split(",") of.write(key.replace(" ", "") + " " + ",".join([splited[0], splited[1], splited[3]]) + " ")
62fc82e59 Quillot Mathias Allow user to gen...	89 90 91 92 93 94	if __name__ == '__main__': # Main parser parser = argparse.ArgumentParser(description="...") subparsers = parser.add_subparsers(title="action") # utt2char
a7a92c6a2 Quillot Mathias add wav.scp gener...	95	parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file")
62fc82e59 Quillot Mathias Allow user to gen...	96 97 98 99 100	parser_utt2char.add_argument("--features", type=str, help="features file") parser_utt2char.add_argument("--outfile", type=str, help="output file") parser_utt2char.set_defaults(which="utt2char") # char2utt
a7a92c6a2 Quillot Mathias add wav.scp gener...	101	parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file")
62fc82e59 Quillot Mathias Allow user to gen...	102 103 104	parser_char2utt.add_argument("--features", type=str, help="features file") parser_char2utt.add_argument("--outfile", type=str, help="output file") parser_char2utt.set_defaults(which="char2utt")
a7a92c6a2 Quillot Mathias add wav.scp gener...	105 106 107 108 109	# wavscp parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file") parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect") parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file") parser_wavscp.set_defaults(which="wavscp")
62fc82e59 Quillot Mathias Allow user to gen...	110
233b7d451 quillotm adding changelabe...	111 112 113 114 115 116	# Change labels parser_changelabels = subparsers.add_parser("changelabels", help="...") parser_changelabels.add_argument("--source", required=True, type=str, help="source file where we want to change ids.") parser_changelabels.add_argument("--labels", required=True, type=str, help="file with labels") parser_changelabels.add_argument("--outfile", required=True, type=str, help="Output file") parser_changelabels.set_defaults(which="changelabels")
f774442a8 quillotm Add converter cre...	117 118 119 120 121 122 123 124 125	# Create converter parser_converter = subparsers.add_parser("converter", help="Create converter file") parser_converter.add_argument("--file", type=str, required=True, help="File with ids from which create converter.") parser_converter.add_argument("--outtype", type=str, choices=["complet", "kaldi"]) parser_converter.add_argument("--outfile", type=str, required=True, help="") parser_converter.set_defaults(which="converter")
62fc82e59 Quillot Mathias Allow user to gen...	126 127 128 129 130 131 132	# Parse args = parser.parse_args() # Run commands runner = SubCommandRunner({ "utt2char" : utt2char, "char2utt": char2utt,
233b7d451 quillotm adding changelabe...	133	"wavscp": wavscp,
f774442a8 quillotm Add converter cre...	134 135	"changelabels": changelabels, "converter": converter
62fc82e59 Quillot Mathias Allow user to gen...	136	})
5fe43711c Mathias Quillot issue repaired. c...	137	runner.run(args.which, args.__dict__, remove="which")