reverberate_wavs.py
4.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
# Copyright 2014 Johns Hopkins University (Authors: Vijayaditya Peddinti). Apache 2.0.
# 2015 Tom Ko
# script to generate multicondition training data / dev data / test data
import argparse, glob, math, os, random, scipy.io.wavfile, sys
class list_cyclic_iterator(object):
def __init__(self, list, random_seed = 0):
self.list_index = 0
self.list = list
random.seed(random_seed)
random.shuffle(self.list)
def __next__(self):
item = self.list[self.list_index]
self.list_index = (self.list_index + 1) % len(self.list)
return item
next = __next__ # for Python 2
def return_nonempty_lines(lines):
new_lines = []
for line in lines:
if len(line.strip()) > 0:
new_lines.append(line.strip())
return new_lines
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--snrs', type=str, default = '20:10:0', help='snrs to be used for corruption')
parser.add_argument('--check-output-exists', type = str, default = 'True', help = 'process file only if output file does not exist', choices = ['True', 'true', 'False', 'false'])
parser.add_argument('--random-seed', type = int, default = 0, help = 'seed to be used in the randomization of impulses')
parser.add_argument('wav_file_list', type=str, help='wav.scp file to corrupt')
parser.add_argument('output_wav_file_list', type=str, help='wav.scp file to write corrupted output')
parser.add_argument('impulses_noises_dir', type=str, help='directory with impulses and noises and info directory (created by local/prep_rirs.sh)')
parser.add_argument('output_command_file', type=str, help='file to output the corruption commands')
params = parser.parse_args()
add_noise = True
snr_string_parts = params.snrs.split(':')
if (len(snr_string_parts) == 1) and snr_string_parts[0] == "inf":
add_noise = False
snrs = list_cyclic_iterator(params.snrs.split(':'))
if params.check_output_exists.lower == 'True':
params.check_output_exists = True
else:
params.check_output_exists = False
wav_files = return_nonempty_lines(open(params.wav_file_list, 'r').readlines())
wav_out_files = return_nonempty_lines(open(params.output_wav_file_list, 'r').readlines())
assert(len(wav_files) == len(wav_out_files))
impulses = list_cyclic_iterator(return_nonempty_lines(open(params.impulses_noises_dir+'/info/impulse_files').readlines()), random_seed = params.random_seed)
noises_impulses_files = glob.glob(params.impulses_noises_dir+'/info/noise_impulse_*')
impulse_noise_index = []
for file in noises_impulses_files:
noises_list = []
impulses_set = set([])
for line in return_nonempty_lines(open(file).readlines()):
line = line.strip()
if len(line) == 0 or line[0] == '#':
continue
parts = line.split('=')
if parts[0].strip() == 'noise_files':
noises_list = list_cyclic_iterator(parts[1].split())
elif parts[0].strip() == 'impulse_files':
impulses_set = set(parts[1].split())
else:
raise Exception('Unknown format of ' + file)
impulse_noise_index.append([impulses_set, noises_list])
command_list = []
for i in range(len(wav_files)):
wav_file = " ".join(wav_files[i].split()[1:])
output_wav_file = wav_out_files[i]
impulse_file = next(impulses)
noise_file = ''
snr = ''
found_impulse = False
if add_noise:
for i in range(len(impulse_noise_index)):
if impulse_file in impulse_noise_index[i][0]:
noise_file = next(impulse_noise_index[i][1])
snr = next(snrs)
assert(len(wav_file.strip()) > 0)
assert(len(impulse_file.strip()) > 0)
assert(len(noise_file.strip()) > 0)
assert(len(snr.strip()) > 0)
assert(len(output_wav_file.strip()) > 0)
command_list.append("{4} {0} wav-reverberate --noise-file={2} --snr-db={3} - {1} - |\n".format(wav_file, impulse_file, noise_file, snr, output_wav_file))
found_impulse = True
break
if not found_impulse:
assert(len(wav_file.strip()) > 0)
assert(len(impulse_file.strip()) > 0)
assert(len(output_wav_file.strip()) > 0)
command_list.append("{2} {0} wav-reverberate - {1} - |\n".format(wav_file, impulse_file, output_wav_file))
file_handle = open(params.output_command_file, 'w')
file_handle.write("".join(command_list))
file_handle.close()