Compare View

switch
from
...
to
 
Commits (3)

Changes

Showing 3 changed files Side-by-side Diff

... ... @@ -8,7 +8,6 @@ import core.data
8 8  
9 9  
10 10  
11   -
12 11 def filter_file(file, filter, outfile):
13 12 file_path = file
14 13 filter_path = filter
... ... @@ -24,8 +23,28 @@ def filter_file(file, filter, outfile):
24 23  
25 24  
26 25 def convert(file, type_from, type):
  26 +
27 27 pass
28 28  
  29 +def utt2dur(wavscp: str, outfile: str):
  30 +
  31 + import wave
  32 + import contextlib
  33 +
  34 + with open(wavscp, "r") as f, open(outfile, "w") as of:
  35 + for line in f:
  36 + splited = line.replace("\n", "").split(" ")
  37 + id_ = splited[0]
  38 + wav_ = splited[1]
  39 + duration = 0
  40 +
  41 + with contextlib.closing(wave.open(wav_,'r')) as wav_f:
  42 + frames = wav_f.getnframes()
  43 + rate = wav_f.getframerate()
  44 + duration = frames / float(rate)
  45 +
  46 + of.write(f"{id_} {duration}\n")
  47 +
29 48  
30 49 if __name__ == "__main__":
31 50 # Main parser
... ... @@ -46,6 +65,16 @@ if __name__ == "__main__":
46 65 parser_convert.add_argument("--type", choices=["old-masseffect", "new-masseffect"], required=True)
47 66 parser_convert.set_defaults(which="convert")
48 67  
  68 + # utt2dur
  69 + parser_utt2dur = subparsers.add_parser("utt2dur", help="generate utt2dur file")
  70 + parser_utt2dur.add_argument("--wavscp", type=str, help="wav file", required=True)
  71 + parser_utt2dur.add_argument("--outfile", type=str, default="utt2dur", help="output file")
  72 + parser_utt2dur.set_defaults(which="utt2dur")
  73 +
  74 + # TODO: utt2label_to_label2utt
  75 +
  76 + # TODO: label2utt_to_utt2label
  77 +
49 78 # Parse
50 79 args = parser.parse_args()
51 80  
... ... @@ -53,6 +82,7 @@ if __name__ == "__main__":
53 82 runner = SubCommandRunner({
54 83 "convert" : convert,
55 84 "filter": filter_file,
  85 + "utt2dur": utt2dur
56 86 })
57 87  
58 88 runner.run(args.which, args.__dict__, remove="which")
1 1 import argparse
  2 +from os import path
2 3 import core.data
3 4 from utils import SubCommandRunner
4   -
  5 +import os
5 6  
6 7 def utt2char(features: str, outfile: str):
7 8 """Allow the user to generate utt2char file from masseffect features file.
... ... @@ -28,23 +29,66 @@ def char2utt(features: str, outfile: str):
28 29 pass
29 30  
30 31  
  32 +def wavscp(datadir: str, outfile: str):
  33 + """Generate the masseffect wav scp file from the directories.
  34 +
  35 + Args:
  36 + datadir (str): oath of the data directory where "audio_en-us" and "audio_fr-fr" are available
  37 + outfile (str): path of the wav scp output file
  38 +
  39 + Raises:
  40 + Exception: if one of the directory is not available
  41 + """
  42 + en_us_dir = os.path.join(datadir, "audio_en-us")
  43 + fr_fr_dir = os.path.join(datadir, "audio_fr-fr")
  44 +
  45 + if (not os.path.isdir(en_us_dir)) or (not os.path.isdir(fr_fr_dir)):
  46 + raise Exception("Directory audio_en-us or audio_fr-fr does not exist")
  47 +
  48 + _,_,filenames_en=next(os.walk(en_us_dir))
  49 + # filenames_en = [ os.path.join(en_us_dir, f) for f in filenames_en ]
  50 + dir_en = [ en_us_dir for f in filenames_en ]
  51 + _,_,filenames_fr=next(os.walk(fr_fr_dir))
  52 + dir_fr = [ fr_fr_dir for f in filenames_fr ]
  53 + # filenames_fr = [ os.path.join(fr_fr_dir, f) for f in filenames_fr ]
  54 +
  55 + directories = dir_en + dir_fr
  56 + filenames = filenames_en + filenames_fr
  57 +
  58 +
  59 + with open(outfile, "w") as f:
  60 + for i, fn in enumerate(filenames):
  61 + splited = fn.split(".")[0].split(",")
  62 + lang = splited[0]
  63 + character = splited[1]
  64 + record_id = splited[3]
  65 + path = os.path.join(directories[i], fn)
  66 + f.write(f"{lang},{character},{record_id} {path}\n")
  67 +
  68 +
  69 +
31 70 if __name__ == '__main__':
32 71 # Main parser
33 72 parser = argparse.ArgumentParser(description="...")
34 73 subparsers = parser.add_subparsers(title="action")
35 74  
36 75 # utt2char
37   - parser_utt2char = subparsers.add_parser("utt2char")
  76 + parser_utt2char = subparsers.add_parser("utt2char", help="generate utt2char file")
38 77 parser_utt2char.add_argument("--features", type=str, help="features file")
39 78 parser_utt2char.add_argument("--outfile", type=str, help="output file")
40 79 parser_utt2char.set_defaults(which="utt2char")
41 80  
42 81 # char2utt
43   - parser_char2utt = subparsers.add_parser("char2utt")
  82 + parser_char2utt = subparsers.add_parser("char2utt", help="generate char2utt file")
44 83 parser_char2utt.add_argument("--features", type=str, help="features file")
45 84 parser_char2utt.add_argument("--outfile", type=str, help="output file")
46 85 parser_char2utt.set_defaults(which="char2utt")
47 86  
  87 + # wavscp
  88 + parser_wavscp = subparsers.add_parser("wavscp", help="generate wav scp file")
  89 + parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect")
  90 + parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file")
  91 + parser_wavscp.set_defaults(which="wavscp")
48 92  
49 93 # Parse
50 94 args = parser.parse_args()
... ... @@ -53,6 +97,7 @@ if __name__ == '__main__':
53 97 runner = SubCommandRunner({
54 98 "utt2char" : utt2char,
55 99 "char2utt": char2utt,
  100 + "wavscp": wavscp
56 101 })
57 102  
58 103 runner.run(args.which, args.__dict__, remove="which")
59 104 \ No newline at end of file
... ... @@ -41,6 +41,10 @@ def spk2utt(features: str, outfile: str):
41 41 out.write(spk + " " + " ".join(ids) + "\n")
42 42  
43 43  
  44 +def wavscp(datadir: str, outfile: str):
  45 + raise Exception("Under construction")
  46 + pass
  47 +
44 48 if __name__ == "__main__":
45 49 # Main parser
46 50 parser = argparse.ArgumentParser(description="Voxceleb data management")
... ... @@ -58,6 +62,12 @@ if __name__ == "__main__":
58 62 parser_spk2utt.add_argument("--outfile", default="spk2utt", help="output file")
59 63 parser_spk2utt.set_defaults(which="spk2utt")
60 64  
  65 + # wavscp
  66 + parser_wavscp = subparser.add_parser("wavscp", help="generate wav scp file")
  67 + parser_wavscp.add_argument("--datadir", required=True, help="data directory of masseffect")
  68 + parser_wavscp.add_argument("--outfile", default="wav.scp", help="wav.scp output file")
  69 + parser_wavscp.set_defaults(which="wavscp")
  70 +
61 71 # Parse
62 72 args = parser.parse_args()
63 73  
... ... @@ -65,6 +75,7 @@ if __name__ == "__main__":
65 75 runner = SubCommandRunner({
66 76 "utt2spk" : utt2spk,
67 77 "spk2utt": spk2utt,
  78 + "wavscp": wavscp
68 79 })
69 80  
70 81 runner.run(args.which, args.__dict__, remove="which")