Commit 88260de938b023b156eb46502688c7531f257dae

Authored by Quillot Mathias
1 parent 3b5a487de8
Exists in master

add utt2dur generation function to the data module

Showing 1 changed file with 31 additions and 1 deletions Inline Diff

1 import argparse 1 import argparse
2 import sys 2 import sys
3 3
4 from utils import SubCommandRunner 4 from utils import SubCommandRunner
5 5
6 import core.data 6 import core.data
7 7
8 8
9 9
10 10
11
12 def filter_file(file, filter, outfile): 11 def filter_file(file, filter, outfile):
13 file_path = file 12 file_path = file
14 filter_path = filter 13 filter_path = filter
15 file_data = core.data.read_id_values(file_path) 14 file_data = core.data.read_id_values(file_path)
16 filter_data = core.data.read_lst(filter_path) 15 filter_data = core.data.read_lst(filter_path)
17 16
18 with open(outfile, "w") as out: 17 with open(outfile, "w") as out:
19 for id_ in filter_data: 18 for id_ in filter_data:
20 core.data.write_line(id_, file_data[id_], out) 19 core.data.write_line(id_, file_data[id_], out)
21 20
22 21
23 22
24 23
25 24
26 def convert(file, type_from, type): 25 def convert(file, type_from, type):
26
27 pass 27 pass
28 28
29 def utt2dur(wavscp: str, outfile: str):
30
31 import wave
32 import contextlib
29 33
34 with open(wavscp, "r") as f, open(outfile, "w") as of:
35 for line in f:
36 splited = line.replace("\n", "").split(" ")
37 id_ = splited[0]
38 wav_ = splited[1]
39 duration = 0
40
41 with contextlib.closing(wave.open(wav_,'r')) as wav_f:
42 frames = wav_f.getnframes()
43 rate = wav_f.getframerate()
44 duration = frames / float(rate)
45
46 of.write(f"{id_} {duration}\n")
47
48
30 if __name__ == "__main__": 49 if __name__ == "__main__":
31 # Main parser 50 # Main parser
32 parser = argparse.ArgumentParser(description="manage files") 51 parser = argparse.ArgumentParser(description="manage files")
33 subparsers = parser.add_subparsers(title="actions") 52 subparsers = parser.add_subparsers(title="actions")
34 53
35 # Filter ids 54 # Filter ids
36 parser_filter = subparsers.add_parser("filter", help="filter a file") 55 parser_filter = subparsers.add_parser("filter", help="filter a file")
37 parser_filter.add_argument("--file", type=str, help="path of the file to filter", required=True) 56 parser_filter.add_argument("--file", type=str, help="path of the file to filter", required=True)
38 parser_filter.add_argument("--filter", type=str, help="filter file [id_from] [id_to]", required=True) 57 parser_filter.add_argument("--filter", type=str, help="filter file [id_from] [id_to]", required=True)
39 parser_filter.add_argument("--outfile", type=str, help="output file", required=True) 58 parser_filter.add_argument("--outfile", type=str, help="output file", required=True)
40 parser_filter.set_defaults(which="filter") 59 parser_filter.set_defaults(which="filter")
41 60
42 # Convert 61 # Convert
43 parser_convert = subparsers.add_parser("convert", help="convert a file") 62 parser_convert = subparsers.add_parser("convert", help="convert a file")
44 parser_convert.add_argument("--file", type=str, help="...", required=True) 63 parser_convert.add_argument("--file", type=str, help="...", required=True)
45 parser_convert.add_argument("--type-from", type=str, choices=["old-masseffect", "new-masseffect"], help="...", required=True) 64 parser_convert.add_argument("--type-from", type=str, choices=["old-masseffect", "new-masseffect"], help="...", required=True)
46 parser_convert.add_argument("--type", choices=["old-masseffect", "new-masseffect"], required=True) 65 parser_convert.add_argument("--type", choices=["old-masseffect", "new-masseffect"], required=True)
47 parser_convert.set_defaults(which="convert") 66 parser_convert.set_defaults(which="convert")
48 67
68 # utt2dur
69 parser_utt2dur = subparsers.add_parser("utt2dur", help="generate utt2dur file")
70 parser_utt2dur.add_argument("--wavscp", type=str, help="wav file", required=True)
71 parser_utt2dur.add_argument("--outfile", type=str, default="utt2dur", help="output file")
72 parser_utt2dur.set_defaults(which="utt2dur")
73
74 # TODO: utt2label_to_label2utt
75
76 # TODO: label2utt_to_utt2label
77
49 # Parse 78 # Parse
50 args = parser.parse_args() 79 args = parser.parse_args()
51 80
52 # Run commands 81 # Run commands
53 runner = SubCommandRunner({ 82 runner = SubCommandRunner({
54 "convert" : convert, 83 "convert" : convert,
55 "filter": filter_file, 84 "filter": filter_file,
85 "utt2dur": utt2dur
56 }) 86 })
57 87
58 runner.run(args.which, args.__dict__, remove="which") 88 runner.run(args.which, args.__dict__, remove="which")