Commit 233b7d451bb4f8b0632c25899452061b70018c96

Authored by quillotm
1 parent b6c178927e
Exists in master

adding changelabels function

Showing 1 changed file with 22 additions and 3 deletions Inline Diff

1 import argparse 1 import argparse
2 from os import path 2 from os import path
3 import core.data 3 import core.data
4 from utils import SubCommandRunner 4 from utils import SubCommandRunner
5 import os 5 import os
6 6
7
7 def utt2char(features: str, outfile: str): 8 def utt2char(features: str, outfile: str):
8 """Allow the user to generate utt2char file from masseffect features file. 9 """Allow the user to generate utt2char file from masseffect features file.
9 10
10 TODO: Don't forget to manage two cases: one with old ids, and an other with 11 TODO: Don't forget to manage two cases: one with old ids, and an other with
11 new ones. 12 new ones.
12 13
13 Args: 14 Args:
14 features (str): [description] 15 features (str): [description]
15 outfile (str): [description] 16 outfile (str): [description]
16 """ 17 """
17 data = core.data.read_features(features) 18 data = core.data.read_features(features)
18 keys = list(data.keys()) 19 keys = list(data.keys())
19 20
20 with open(outfile, "w") as f: 21 with open(outfile, "w") as f:
21 for key in keys: 22 for key in keys:
22 splited = key.replace("\n", "").split(",") 23 splited = key.replace("\n", "").split(",")
23 character = splited[1] 24 character = splited[1]
24 f.write(",".join(splited) + " " + character + "\n") 25 f.write(",".join(splited) + " " + character + "\n")
25 26
26 27
27 def char2utt(features: str, outfile: str): 28 def char2utt(features: str, outfile: str):
28 raise Exception("Not implemented yet") 29 raise Exception("Not implemented yet")
29 pass 30 pass
30 31
31 32
32 def wavscp(datadir: str, outfile: str): 33 def wavscp(datadir: str, outfile: str):
33 """Generate the masseffect wav scp file from the directories. 34 """Generate the masseffect wav scp file from the directories.
34 35
35 Args: 36 Args:
36 datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available 37 datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available
37 outfile (str): path of the wav scp output file 38 outfile (str): path of the wav scp output file
38 39
39 Raises: 40 Raises:
40 Exception: if one of the directory is not available 41 Exception: if one of the directory is not available
41 """ 42 """
42 en_us_dir = os.path.join(datadir, "audio_en-us") 43 en_us_dir = os.path.join(datadir, "audio_en-us")
43 fr_fr_dir = os.path.join(datadir, "audio_fr-fr") 44 fr_fr_dir = os.path.join(datadir, "audio_fr-fr")
44 45
45 if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)): 46 if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)):
46 raise Exception("Directory audio_en-us or audio_fr-fr does not exist") 47 raise Exception("Directory audio_en-us or audio_fr-fr does not exist")
47 48
48 _,_,filenames_en=next(os.walk(en_us_dir)) 49 _,_,filenames_en=next(os.walk(en_us_dir))
49 # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ] 50 # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ]
50 dir_en = [ en_us_dir for f in filenames_en ] 51 dir_en = [ en_us_dir for f in filenames_en ]
51 _,_,filenames_fr=next(os.walk(fr_fr_dir)) 52 _,_,filenames_fr=next(os.walk(fr_fr_dir))
52 dir_fr = [ fr_fr_dir for f in filenames_fr ] 53 dir_fr = [ fr_fr_dir for f in filenames_fr ]
53 # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ] 54 # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ]
54 55
55 directories = dir_en + dir_fr 56 directories = dir_en + dir_fr
56 filenames = filenames_en + filenames_fr 57 filenames = filenames_en + filenames_fr
57
58 58
59 with open(outfile, "w") as f: 59 with open(outfile, "w") as f:
60 for i, fn in enumerate(filenames): 60 for i, fn in enumerate(filenames):
61 splited = fn.split(".")[0].split(",") 61 splited = fn.split(".")[0].split(",")
62 lang = splited[0] 62 lang = splited[0]
63 character = splited[1] 63 character = splited[1]
64 record_id = splited[3] 64 record_id = splited[3]
65 path = os.path.join(directories[i], fn) 65 path = os.path.join(directories[i], fn)
66 f.write(f"{lang},{character},{record_id} {path}\n") 66 f.write(f"{lang},{character},{record_id} {path}\n")
67 67
68 68
69 69 def changelabels(source: str, labels: str, outfile: str):
70 data_dict = core.data.read_id_values(source)
71 labels_dict = core.data.read_labels(labels)
72 keys = list(data_dict.keys())
73
74 with open(outfile, "w") as f:
75 for key in keys:
76 splited = key.split(",")
77 splited[1] = labels_dict[key]
78 core.data.write_line(",".join(splited), data_dict[key])
79
80
70 if __name__ == '__main__': 81 if __name__ == '__main__':
71 # Main parser 82 # Main parser
72 parser = argparse.ArgumentParser(description="...") 83 parser = argparse.ArgumentParser(description="...")
73 subparsers = parser.add_subparsers(title="action") 84 subparsers = parser.add_subparsers(title="action")
74 85
75 # utt2char 86 # utt2char
76 parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file") 87 parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file")
77 parser_utt2char.add_argument("--features", type=str, help="features file") 88 parser_utt2char.add_argument("--features", type=str, help="features file")
78 parser_utt2char.add_argument("--outfile", type=str, help="output file") 89 parser_utt2char.add_argument("--outfile", type=str, help="output file")
79 parser_utt2char.set_defaults(which="utt2char") 90 parser_utt2char.set_defaults(which="utt2char")
80 91
81 # char2utt 92 # char2utt
82 parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file") 93 parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file")
83 parser_char2utt.add_argument("--features", type=str, help="features file") 94 parser_char2utt.add_argument("--features", type=str, help="features file")
84 parser_char2utt.add_argument("--outfile", type=str, help="output file") 95 parser_char2utt.add_argument("--outfile", type=str, help="output file")
85 parser_char2utt.set_defaults(which="char2utt") 96 parser_char2utt.set_defaults(which="char2utt")
86 97
87 # wavscp 98 # wavscp
88 parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file") 99 parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file")
89 parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect") 100 parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect")
90 parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file") 101 parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file")
91 parser_wavscp.set_defaults(which="wavscp") 102 parser_wavscp.set_defaults(which="wavscp")
92 103
104 # Change labels
105 parser_changelabels = subparsers.add_parser("changelabels", help="...")
106 parser_changelabels.add_argument("--source", required=True, type=str, help="source file where we want to change ids.")
107 parser_changelabels.add_argument("--labels", required=True, type=str, help="file with labels")
108 parser_changelabels.add_argument("--outfile", required=True, type=str, help="Output file")
109 parser_changelabels.set_defaults(which="changelabels")
110
93 # Parse 111 # Parse
94 args = parser.parse_args() 112 args = parser.parse_args()
95 113
96 # Run commands 114 # Run commands
97 runner = SubCommandRunner({ 115 runner = SubCommandRunner({
98 "utt2char" : utt2char, 116 "utt2char" : utt2char,
99 "char2utt": char2utt, 117 "char2utt": char2utt,
100 "wavscp": wavscp 118 "wavscp": wavscp,
119 "changelabels": changelabels
101 }) 120 })
102 121