make_dihard_2018_dev.py
2.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python3
# This script is called by local/make_dihard_2018_dev.sh, and it creates the
# necessary files for DIHARD 2018 development directory.
import sys, os
def prepare_dihard_2018_dev(src_dir, data_dir):
wavscp_fi = open(data_dir + "/wav.scp" , 'w')
utt2spk_fi = open(data_dir + "/utt2spk" , 'w')
segments_fi = open(data_dir + "/segments" , 'w')
rttm_fi = open(data_dir + "/rttm" , 'w')
reco2num_spk_fi = open(data_dir + "/reco2num_spk" , 'w')
for subdir, dirs, files in os.walk(src_dir):
for file in files:
filename = os.path.join(subdir, file)
if filename.endswith(".lab"):
utt = os.path.basename(filename).split(".")[0]
lines = open(filename, 'r').readlines()
segment_id = 0
for line in lines:
start, end, speech = line.split()
segment_id_str = "{}_{}".format(utt, str(segment_id).zfill(4))
segments_str = "{} {} {} {}\n".format(segment_id_str, utt, start, end)
utt2spk_str = "{} {}\n".format(segment_id_str, utt)
segments_fi.write(segments_str)
utt2spk_fi.write(utt2spk_str)
segment_id += 1
wav_str = "{} sox -t flac {}/data/flac/{}.flac -t wav -r 16k "\
"-b 16 - channels 1 |\n".format(utt, src_dir, utt)
wavscp_fi.write(wav_str)
with open("{}/data/rttm/{}.rttm".format(src_dir, utt), 'r') as fh:
rttm_str = fh.read()
rttm_fi.write(rttm_str)
with open("{}/data/rttm/{}.rttm".format(src_dir, utt), 'r') as fh:
rttm_list = fh.readlines()
spk_list = [(x.split())[7] for x in rttm_list]
num_spk = len(set(spk_list))
reco2num_spk_fi.write("{} {}\n".format(utt, num_spk))
wavscp_fi.close()
utt2spk_fi.close()
segments_fi.close()
rttm_fi.close()
reco2num_spk_fi.close()
return 0
def main():
src_dir = sys.argv[1]
data_dir = sys.argv[2]
if not os.path.exists(data_dir):
os.makedirs(data_dir)
prepare_dihard_2018_dev(src_dir, data_dir)
return 0
if __name__=="__main__":
main()