Blame view

volia/masseffect.py 5.23 KB
62fc82e59   Quillot Mathias   Allow user to gen...
1
  import argparse
a7a92c6a2   Quillot Mathias   add wav.scp gener...
2
  from os import path
62fc82e59   Quillot Mathias   Allow user to gen...
3
4
  import core.data
  from utils import SubCommandRunner
a7a92c6a2   Quillot Mathias   add wav.scp gener...
5
  import os
62fc82e59   Quillot Mathias   Allow user to gen...
6

233b7d451   quillotm   adding changelabe...
7

62fc82e59   Quillot Mathias   Allow user to gen...
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
  def utt2char(features: str, outfile: str):
      """Allow the user to generate utt2char file from masseffect features file.
  
      TODO: Don't forget to manage two cases: one with old ids, and an other with
      new ones.
  
      Args:
          features (str): [description]
          outfile (str): [description]
      """
      data = core.data.read_features(features)
      keys = list(data.keys())
  
      with open(outfile, "w") as f:
          for key in keys:
              splited = key.replace("
  ", "").split(",")
              character = splited[1]
              f.write(",".join(splited) + " " + character + "
  ")
5fe43711c   Mathias Quillot   issue repaired. c...
28

62fc82e59   Quillot Mathias   Allow user to gen...
29
30
31
32
  
  def char2utt(features: str, outfile: str):
      raise Exception("Not implemented yet")
      pass
a7a92c6a2   Quillot Mathias   add wav.scp gener...
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
  def wavscp(datadir: str, outfile: str):
      """Generate the masseffect wav scp file from the directories.
  
      Args:
          datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available
          outfile (str): path of the wav scp output file
  
      Raises:
          Exception: if one of the directory is not available
      """
      en_us_dir = os.path.join(datadir, "audio_en-us")
      fr_fr_dir = os.path.join(datadir, "audio_fr-fr")
  
      if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)):
          raise Exception("Directory audio_en-us or audio_fr-fr does not exist")
  
      _,_,filenames_en=next(os.walk(en_us_dir))
      # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ]
      dir_en = [ en_us_dir for f in filenames_en ]
      _,_,filenames_fr=next(os.walk(fr_fr_dir))
      dir_fr = [ fr_fr_dir for f in filenames_fr ]
      # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ]
  
      directories = dir_en + dir_fr
      filenames = filenames_en + filenames_fr
a7a92c6a2   Quillot Mathias   add wav.scp gener...
58
59
60
61
62
63
64
65
66
67
  
      with open(outfile, "w") as f:
          for i, fn in enumerate(filenames):
              splited = fn.split(".")[0].split(",")
              lang = splited[0]
              character = splited[1]
              record_id = splited[3]
              path = os.path.join(directories[i], fn)
              f.write(f"{lang},{character},{record_id} {path}
  ")
233b7d451   quillotm   adding changelabe...
68
69
70
71
72
73
74
75
  def changelabels(source: str, labels: str, outfile: str):
      data_dict = core.data.read_id_values(source)
      labels_dict = core.data.read_labels(labels)
      keys = list(data_dict.keys())
  
      with open(outfile, "w") as f:
          for key in keys:
              splited = key.split(",")
5fe43711c   Mathias Quillot   issue repaired. c...
76
77
              splited[1] = labels_dict[key][0]
              core.data.write_line(",".join(splited), data_dict[key], out=f)
233b7d451   quillotm   adding changelabe...
78

f774442a8   quillotm   Add converter cre...
79
80
81
82
83
84
85
86
87
88
  def converter(file: str, outtype: str, outfile: str):
      data = core.data.read_id_values(file)
  
      with open(outfile, "w") as of:
          for key in data:
              splited = key.replace("
  ", "").split(",")
              of.write(key.replace("
  ", "") + " " + ",".join([splited[0], splited[1], splited[3]]) + "
  ")
62fc82e59   Quillot Mathias   Allow user to gen...
89
90
91
92
93
94
  if __name__ == '__main__':
      # Main parser
      parser = argparse.ArgumentParser(description="...")
      subparsers = parser.add_subparsers(title="action")
  
      # utt2char
a7a92c6a2   Quillot Mathias   add wav.scp gener...
95
      parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file")
62fc82e59   Quillot Mathias   Allow user to gen...
96
97
98
99
100
      parser_utt2char.add_argument("--features", type=str, help="features file")
      parser_utt2char.add_argument("--outfile", type=str, help="output file")
      parser_utt2char.set_defaults(which="utt2char")
  
      # char2utt
a7a92c6a2   Quillot Mathias   add wav.scp gener...
101
      parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file")
62fc82e59   Quillot Mathias   Allow user to gen...
102
103
104
      parser_char2utt.add_argument("--features", type=str, help="features file")
      parser_char2utt.add_argument("--outfile", type=str, help="output file")
      parser_char2utt.set_defaults(which="char2utt")
a7a92c6a2   Quillot Mathias   add wav.scp gener...
105
106
107
108
109
      # wavscp
      parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file")
      parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect")
      parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file")
      parser_wavscp.set_defaults(which="wavscp")
62fc82e59   Quillot Mathias   Allow user to gen...
110

233b7d451   quillotm   adding changelabe...
111
112
113
114
115
116
      # Change labels
      parser_changelabels = subparsers.add_parser("changelabels", help="...")
      parser_changelabels.add_argument("--source", required=True, type=str, help="source file where we want to change ids.")
      parser_changelabels.add_argument("--labels", required=True, type=str, help="file with labels")
      parser_changelabels.add_argument("--outfile", required=True, type=str, help="Output file")
      parser_changelabels.set_defaults(which="changelabels")
f774442a8   quillotm   Add converter cre...
117
118
119
120
121
122
123
124
125
      # Create converter
      parser_converter = subparsers.add_parser("converter", help="Create converter file")
      parser_converter.add_argument("--file",
                                    type=str,
                                    required=True,
                                    help="File with ids from which create converter.")
      parser_converter.add_argument("--outtype", type=str, choices=["complet", "kaldi"])
      parser_converter.add_argument("--outfile", type=str, required=True, help="")
      parser_converter.set_defaults(which="converter")
62fc82e59   Quillot Mathias   Allow user to gen...
126
127
128
129
130
131
132
      # Parse
      args = parser.parse_args()
  
      # Run commands
      runner = SubCommandRunner({
          "utt2char" : utt2char,
          "char2utt": char2utt,
233b7d451   quillotm   adding changelabe...
133
          "wavscp": wavscp,
f774442a8   quillotm   Add converter cre...
134
135
          "changelabels": changelabels,
          "converter": converter
62fc82e59   Quillot Mathias   Allow user to gen...
136
      })
5fe43711c   Mathias Quillot   issue repaired. c...
137
      runner.run(args.which, args.__dict__, remove="which")