Blame view

egs/wsj/s5/utils/data/internal/perturb_volume.py 3.9 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
  #!/usr/bin/env python
  
  # Copyright 2017  Vimal Manohar
  # Apache 2.0
  
  """
  This script reads a wav.scp file from the input and perturbs the
  volume of the recordings and writes to stdout the contents of
  a new wav.scp file.
  """
  from __future__ import print_function
  
  import argparse
  import re
  import random
  import sys
  
  def get_args():
      parser = argparse.ArgumentParser(description="""
          This script reads a wav.scp file from the input and perturbs the
          volume of the recordings and writes to stdout the contents of
          a new wav.scp file.
          If --reco2vol is provided, then for each recording, the volume factor
          specified in that file is applied.
          Otherwise, a volume factor is chosen randomly from a uniform
          distribution between --scale-low and --scale-high.
          """)
  
      parser.add_argument("--scale-low", type=float, default=0.125,
                          help="Minimum volume scale to be applied.")
      parser.add_argument("--scale-high", type=float, default=2,
                          help="Maximum volume scale to tbe applid.")
      parser.add_argument("--reco2vol", type=str, default=None,
                          help="If supplied, it must be a file of the format "
                          "<reco-id> <volume-scale>, which specifies the "
                          "volume scale to be applied for each recording.")
      parser.add_argument("--write-reco2vol", type=str, default=None,
                          help="If provided, the volume scale used for each "
                          "recording will be written to this file")
      args = parser.parse_args()
  
      if args.reco2vol == "":
          args.reco2vol = None
      if args.write_reco2vol == "":
          args.write_reco2vol = None
  
      return args
  
  
  def read_reco2vol(volumes_file):
      """Read volume scales for recordings.
      The format of volumes_file is <reco-id> <volume-scale>
      Returns a dictionary { reco-id : volume-scale }
      """
      volumes = {}
      with open(volumes_file) as volume_reader:
          for line in volume_reader.readlines():
              if len(line.strip()) == 0:
                  continue
  
              parts = line.strip().split()
              if len(parts) != 2:
                  raise RuntimeError("Unable to parse the line {0} in file {1}."
                                     "".format(line.strip(), volumes_file))
              volumes[parts[0]] = float(parts[1])
      return volumes
  
  
  def run(args):
      random.seed(0)
  
      volumes = None
      if args.reco2vol is not None:
          volumes = read_reco2vol(args.reco2vol)
  
      if args.write_reco2vol is not None:
          volume_writer = open(args.write_reco2vol, 'w')
  
      for line in sys.stdin.readlines():
          if len(line.strip()) == 0:
              continue
          parts = line.strip().split()
          reco_id = parts[0]
  
          vol = random.uniform(args.scale_low, args.scale_high)
          if volumes is not None:
              if reco_id not in volumes:
                  raise RuntimeError('Could not find volume for id {0} in '
                                     '{1}'.format(reco_id, args.reco2vol))
              vol = volumes[reco_id]
  
          # Handle three cases of rxfilenames appropriately;
          # 'input piped command', 'file offset' and 'filename'
          if line.strip()[-1] == '|':
              print ('{0} sox --vol {1} -t wav - -t wav - |'.format(
                  line.strip(), vol))
          elif re.search(':[0-9]+$', line.strip()) is not None:
              print ('{id} wav-copy {wav} - | '
                     'sox --vol {vol} -t wav - -t wav - |'.format(
                         id=parts[0], wav=' '.join(parts[1:]), vol=vol))
          else:
              print ('{id} sox --vol {vol} -t wav {wav} -t wav - |'.format(
                  id=parts[0], wav=' '.join(parts[1:]), vol=vol))
  
          if args.write_reco2vol is not None:
              volume_writer.write('{id} {vol}
  '.format(id=parts[0], vol=vol))
  
  
  def main():
      args = get_args()
      run(args)
  
  
  if __name__ == "__main__":
      main()