data.py
2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import argparse
import sys
from utils import SubCommandRunner
import core.data
def filter_file(file, filter, outfile):
file_path = file
filter_path = filter
file_data = core.data.read_id_values(file_path)
filter_data = core.data.read_id_values(filter_path)
with open(outfile, "w") as out:
for id_ in filter_data:
core.data.write_line(id_, file_data[id_], out)
def convert(file, type_from, type):
pass
def utt2dur(wavscp: str, outfile: str):
import wave
import contextlib
with open(wavscp, "r") as f, open(outfile, "w") as of:
for line in f:
splited = line.replace("\n", "").split(" ")
id_ = splited[0]
wav_ = splited[1]
duration = 0
with contextlib.closing(wave.open(wav_,'r')) as wav_f:
frames = wav_f.getnframes()
rate = wav_f.getframerate()
duration = frames / float(rate)
of.write(f"{id_} {duration}\n")
if __name__ == "__main__":
# Main parser
parser = argparse.ArgumentParser(description="manage files")
subparsers = parser.add_subparsers(title="actions")
# Filter ids
parser_filter = subparsers.add_parser("filter", help="filter a file")
parser_filter.add_argument("--file", type=str, help="path of the file to filter", required=True)
parser_filter.add_argument("--filter", type=str, help="filter file [id_from] [id_to]", required=True)
parser_filter.add_argument("--outfile", type=str, help="output file", required=True)
parser_filter.set_defaults(which="filter")
# Convert
parser_convert = subparsers.add_parser("convert", help="convert a file")
parser_convert.add_argument("--file", type=str, help="...", required=True)
parser_convert.add_argument("--type-from", type=str, choices=["old-masseffect", "new-masseffect"], help="...", required=True)
parser_convert.add_argument("--type", choices=["old-masseffect", "new-masseffect"], required=True)
parser_convert.set_defaults(which="convert")
# utt2dur
parser_utt2dur = subparsers.add_parser("utt2dur", help="generate utt2dur file")
parser_utt2dur.add_argument("--wavscp", type=str, help="wav file", required=True)
parser_utt2dur.add_argument("--outfile", type=str, default="utt2dur", help="output file")
parser_utt2dur.set_defaults(which="utt2dur")
# TODO: utt2label_to_label2utt
# TODO: label2utt_to_utt2label
# Parse
args = parser.parse_args()
# Run commands
runner = SubCommandRunner({
"convert" : convert,
"filter": filter_file,
"utt2dur": utt2dur
})
runner.run(args.which, args.__dict__, remove="which")