Blame view

egs/aspire/s5/local/multi_condition/reverberate_wavs.py 4.32 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  #!/usr/bin/env python
  # Copyright 2014  Johns Hopkins University (Authors: Vijayaditya Peddinti).  Apache 2.0.
  #           2015  Tom Ko
  # script to generate multicondition training data / dev data / test data
  import argparse, glob, math, os, random, scipy.io.wavfile, sys
  
  class list_cyclic_iterator(object):
    def __init__(self, list, random_seed = 0):
      self.list_index = 0
      self.list = list
      random.seed(random_seed)
      random.shuffle(self.list)
  
    def __next__(self):
      item = self.list[self.list_index]
      self.list_index = (self.list_index + 1) % len(self.list)
      return item
  
    next = __next__  # for Python 2
  
  def return_nonempty_lines(lines):
    new_lines = []
    for line in lines:
      if len(line.strip()) > 0:
        new_lines.append(line.strip())
  
    return new_lines
  
  if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--snrs', type=str, default = '20:10:0', help='snrs to be used for corruption')
    parser.add_argument('--check-output-exists', type = str, default = 'True', help = 'process file only if output file does not exist', choices = ['True', 'true', 'False', 'false'])
    parser.add_argument('--random-seed', type = int, default = 0, help = 'seed to be used in the randomization of impulses')
    parser.add_argument('wav_file_list', type=str, help='wav.scp file to corrupt')
    parser.add_argument('output_wav_file_list', type=str, help='wav.scp file to write corrupted output')
    parser.add_argument('impulses_noises_dir', type=str, help='directory with impulses and noises and info directory (created by local/prep_rirs.sh)')
    parser.add_argument('output_command_file', type=str, help='file to output the corruption commands')
    params = parser.parse_args() 
    
    add_noise = True
    snr_string_parts = params.snrs.split(':')
    if (len(snr_string_parts) == 1) and snr_string_parts[0] == "inf":
      add_noise = False
    snrs = list_cyclic_iterator(params.snrs.split(':'))
    if params.check_output_exists.lower == 'True':
      params.check_output_exists = True
    else:
      params.check_output_exists = False
  
    wav_files = return_nonempty_lines(open(params.wav_file_list, 'r').readlines())
    wav_out_files = return_nonempty_lines(open(params.output_wav_file_list, 'r').readlines())
    assert(len(wav_files) == len(wav_out_files))
    impulses = list_cyclic_iterator(return_nonempty_lines(open(params.impulses_noises_dir+'/info/impulse_files').readlines()), random_seed = params.random_seed)
    noises_impulses_files = glob.glob(params.impulses_noises_dir+'/info/noise_impulse_*')
    impulse_noise_index = []
    for file in noises_impulses_files:
      noises_list = []
      impulses_set = set([])
      for line in return_nonempty_lines(open(file).readlines()):
        line = line.strip()
        if len(line) == 0 or line[0] == '#':
          continue
        parts = line.split('=')
        if parts[0].strip() == 'noise_files':
          noises_list = list_cyclic_iterator(parts[1].split())
        elif parts[0].strip() == 'impulse_files':
          impulses_set = set(parts[1].split())
        else:
          raise Exception('Unknown format of ' + file)
        impulse_noise_index.append([impulses_set, noises_list])
  
    command_list = []
    for i in range(len(wav_files)):
      wav_file = " ".join(wav_files[i].split()[1:])
      output_wav_file = wav_out_files[i]
      impulse_file = next(impulses)
      noise_file = ''
      snr = ''
      found_impulse = False
      if add_noise:
        for i in range(len(impulse_noise_index)):
          if impulse_file in impulse_noise_index[i][0]:
            noise_file = next(impulse_noise_index[i][1])
            snr = next(snrs)
            assert(len(wav_file.strip()) > 0)
            assert(len(impulse_file.strip()) > 0)
            assert(len(noise_file.strip()) > 0)
            assert(len(snr.strip()) > 0)
            assert(len(output_wav_file.strip()) > 0)
            command_list.append("{4} {0} wav-reverberate --noise-file={2} --snr-db={3} - {1} - |
  ".format(wav_file, impulse_file, noise_file, snr, output_wav_file))
            found_impulse = True
            break
      if not found_impulse:
        assert(len(wav_file.strip()) > 0)
        assert(len(impulse_file.strip()) > 0)
        assert(len(output_wav_file.strip()) > 0)
        command_list.append("{2} {0} wav-reverberate - {1} - |
  ".format(wav_file, impulse_file, output_wav_file))
    file_handle = open(params.output_command_file, 'w')
    file_handle.write("".join(command_list))
    file_handle.close()