Commit 88260de938b023b156eb46502688c7531f257dae
1 parent
3b5a487de8
Exists in
master
add utt2dur generation function to the data module
Showing 1 changed file with 31 additions and 1 deletions Inline Diff
volia/data.py
1 | import argparse | 1 | import argparse |
2 | import sys | 2 | import sys |
3 | 3 | ||
4 | from utils import SubCommandRunner | 4 | from utils import SubCommandRunner |
5 | 5 | ||
6 | import core.data | 6 | import core.data |
7 | 7 | ||
8 | 8 | ||
9 | 9 | ||
10 | 10 | ||
11 | |||
12 | def filter_file(file, filter, outfile): | 11 | def filter_file(file, filter, outfile): |
13 | file_path = file | 12 | file_path = file |
14 | filter_path = filter | 13 | filter_path = filter |
15 | file_data = core.data.read_id_values(file_path) | 14 | file_data = core.data.read_id_values(file_path) |
16 | filter_data = core.data.read_lst(filter_path) | 15 | filter_data = core.data.read_lst(filter_path) |
17 | 16 | ||
18 | with open(outfile, "w") as out: | 17 | with open(outfile, "w") as out: |
19 | for id_ in filter_data: | 18 | for id_ in filter_data: |
20 | core.data.write_line(id_, file_data[id_], out) | 19 | core.data.write_line(id_, file_data[id_], out) |
21 | 20 | ||
22 | 21 | ||
23 | 22 | ||
24 | 23 | ||
25 | 24 | ||
26 | def convert(file, type_from, type): | 25 | def convert(file, type_from, type): |
26 | |||
27 | pass | 27 | pass |
28 | 28 | ||
29 | def utt2dur(wavscp: str, outfile: str): | ||
30 | |||
31 | import wave | ||
32 | import contextlib | ||
29 | 33 | ||
34 | with open(wavscp, "r") as f, open(outfile, "w") as of: | ||
35 | for line in f: | ||
36 | splited = line.replace("\n", "").split(" ") | ||
37 | id_ = splited[0] | ||
38 | wav_ = splited[1] | ||
39 | duration = 0 | ||
40 | |||
41 | with contextlib.closing(wave.open(wav_,'r')) as wav_f: | ||
42 | frames = wav_f.getnframes() | ||
43 | rate = wav_f.getframerate() | ||
44 | duration = frames / float(rate) | ||
45 | |||
46 | of.write(f"{id_} {duration}\n") | ||
47 | |||
48 | |||
30 | if __name__ == "__main__": | 49 | if __name__ == "__main__": |
31 | # Main parser | 50 | # Main parser |
32 | parser = argparse.ArgumentParser(description="manage files") | 51 | parser = argparse.ArgumentParser(description="manage files") |
33 | subparsers = parser.add_subparsers(title="actions") | 52 | subparsers = parser.add_subparsers(title="actions") |
34 | 53 | ||
35 | # Filter ids | 54 | # Filter ids |
36 | parser_filter = subparsers.add_parser("filter", help="filter a file") | 55 | parser_filter = subparsers.add_parser("filter", help="filter a file") |
37 | parser_filter.add_argument("--file", type=str, help="path of the file to filter", required=True) | 56 | parser_filter.add_argument("--file", type=str, help="path of the file to filter", required=True) |
38 | parser_filter.add_argument("--filter", type=str, help="filter file [id_from] [id_to]", required=True) | 57 | parser_filter.add_argument("--filter", type=str, help="filter file [id_from] [id_to]", required=True) |
39 | parser_filter.add_argument("--outfile", type=str, help="output file", required=True) | 58 | parser_filter.add_argument("--outfile", type=str, help="output file", required=True) |
40 | parser_filter.set_defaults(which="filter") | 59 | parser_filter.set_defaults(which="filter") |
41 | 60 | ||
42 | # Convert | 61 | # Convert |
43 | parser_convert = subparsers.add_parser("convert", help="convert a file") | 62 | parser_convert = subparsers.add_parser("convert", help="convert a file") |
44 | parser_convert.add_argument("--file", type=str, help="...", required=True) | 63 | parser_convert.add_argument("--file", type=str, help="...", required=True) |
45 | parser_convert.add_argument("--type-from", type=str, choices=["old-masseffect", "new-masseffect"], help="...", required=True) | 64 | parser_convert.add_argument("--type-from", type=str, choices=["old-masseffect", "new-masseffect"], help="...", required=True) |
46 | parser_convert.add_argument("--type", choices=["old-masseffect", "new-masseffect"], required=True) | 65 | parser_convert.add_argument("--type", choices=["old-masseffect", "new-masseffect"], required=True) |
47 | parser_convert.set_defaults(which="convert") | 66 | parser_convert.set_defaults(which="convert") |
48 | 67 | ||
68 | # utt2dur | ||
69 | parser_utt2dur = subparsers.add_parser("utt2dur", help="generate utt2dur file") | ||
70 | parser_utt2dur.add_argument("--wavscp", type=str, help="wav file", required=True) | ||
71 | parser_utt2dur.add_argument("--outfile", type=str, default="utt2dur", help="output file") | ||
72 | parser_utt2dur.set_defaults(which="utt2dur") | ||
73 | |||
74 | # TODO: utt2label_to_label2utt | ||
75 | |||
76 | # TODO: label2utt_to_utt2label | ||
77 | |||
49 | # Parse | 78 | # Parse |
50 | args = parser.parse_args() | 79 | args = parser.parse_args() |
51 | 80 | ||
52 | # Run commands | 81 | # Run commands |
53 | runner = SubCommandRunner({ | 82 | runner = SubCommandRunner({ |
54 | "convert" : convert, | 83 | "convert" : convert, |
55 | "filter": filter_file, | 84 | "filter": filter_file, |
85 | "utt2dur": utt2dur | ||
56 | }) | 86 | }) |
57 | 87 | ||
58 | runner.run(args.which, args.__dict__, remove="which") | 88 | runner.run(args.which, args.__dict__, remove="which") |