Blame view

egs/wsj/s5/utils/data/resample_data_dir.sh 1.23 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
  #! /bin/bash
  
  # Copyright 2016  Vimal Manohar
  #           2018  Xiaohui Zhang
  # Apache 2.0.
  
  if [ $# -ne 2 ]; then
    echo "This script adds a sox line in wav.scp to resample the audio at a "
    echo "different sampling-rate"
    echo "Usage: $0 <frequency> <data-dir>"
    echo " e.g.: $0 8000 data/dev"
    exit 1
  fi
  
  freq=$1
  dir=$2
  
  sox=`which sox` || { echo "Could not find sox in PATH"; exit 1; }
  
  if [ -f $dir/feats.scp ]; then
    mkdir -p $dir/.backup
    mv $dir/feats.scp $dir/.backup/
    if [ -f $dir/cmvn.scp ]; then
      mv $dir/cmvn.scp $dir/.backup/
    fi
    echo "$0: feats.scp already exists. Moving it to $dir/.backup"
  fi
  
  # After resampling we cannot compute utt2dur from wav.scp any more,
  # so we create utt2dur now, in case it's needed later
  if [ ! -s $dir/utt2dur ]; then
    utils/data/get_utt2dur.sh $dir 1>&2 || exit 1;
  fi
  
  mv $dir/wav.scp $dir/wav.scp.tmp
  cat $dir/wav.scp.tmp | python -c "import sys
  for line in sys.stdin.readlines():
    splits = line.strip().split()
    if splits[-1] == '|':
      out_line = line.strip() + ' $sox -t wav - -c 1 -b 16 -t wav - rate $freq |'
    else:
      out_line = '{0} cat {1} | $sox -t wav - -c 1 -b 16 -t wav - rate $freq |'.format(splits[0], ' '.join(splits[1:]))
    print (out_line)" > ${dir}/wav.scp
  rm $dir/wav.scp.tmp