Blame view

egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh 3.1 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
  #!/bin/bash
  
  # Copyright 2016-2018  Johns Hopkins University (author: Daniel Povey)
  #                2018  Hossein Hadian
  
  # Apache 2.0
  
  # This script does the standard 3-way speed perturbing of
  # a data directory (it operates on the wav.scp).
  
  # If you add the option "--always-include-prefix true", it will include the
  # prefix "sp1.0-" for the original un-perturbed data.  This can help resolve
  # problems with sorting.
  # We don't make '--always-include-prefix true' the default  behavior because
  # it can break some older scripts that relied on the original utterance-ids
  # being a subset of the perturbed data's utterance-ids.
  
  always_include_prefix=false
  
  . utils/parse_options.sh
  
  if [ $# != 2 ]; then
    echo "Usage: perturb_data_dir_speed_3way.sh <srcdir> <destdir>"
    echo "Applies standard 3-way speed perturbation using factors of 0.9, 1.0 and 1.1."
    echo "e.g.:"
    echo " $0 [options] data/train data/train_sp"
    echo "Note: if <destdir>/feats.scp already exists, this will refuse to run."
    echo "Options:"
    echo "    --always-include-prefix [true|false]   # default: false.  If set to true,"
    echo "                                           # it will add the prefix 'sp1.0-' to"
    echo "                                           # utterance and speaker-ids for data at"
    echo "                                           # the original speed.  Can resolve"
    echo "                                           # issues RE data sorting."
    exit 1
  fi
  
  srcdir=$1
  destdir=$2
  
  if [ ! -f $srcdir/wav.scp ]; then
    echo "$0: expected $srcdir/wav.scp to exist"
    exit 1
  fi
  
  if [ -f $destdir/feats.scp ]; then
    echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)"
    exit 1
  fi
  
  echo "$0: making sure the utt2dur and the reco2dur files are present"
  echo "... in ${srcdir}, because obtaining it after speed-perturbing"
  echo "... would be very slow, and you might need them."
  utils/data/get_utt2dur.sh ${srcdir}
  utils/data/get_reco2dur.sh ${srcdir}
  
  utils/data/perturb_data_dir_speed.sh 0.9 ${srcdir} ${destdir}_speed0.9 || exit 1
  utils/data/perturb_data_dir_speed.sh 1.1 ${srcdir} ${destdir}_speed1.1 || exit 1
  
  if $always_include_prefix; then
    utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- ${srcdir} ${destdir}_speed1.0
    if [ ! -f $srcdir/utt2uniq ]; then
      cat $srcdir/utt2spk | awk  '{printf("sp1.0-%s %s
  ", $1, $1);}' > ${destdir}_speed1.0/utt2uniq
    else
      cat $srcdir/utt2uniq | awk '{printf("sp1.0-%s %s
  ", $1, $2);}' > ${destdir}_speed1.0/utt2uniq
    fi
    utils/data/combine_data.sh $destdir ${destdir}_speed1.0 ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1
  
    rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 ${destdir}_speed1.0
  else
    utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1
    rm -r ${destdir}_speed0.9 ${destdir}_speed1.1
  fi
  
  echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir"
  if ! utils/validate_data_dir.sh --no-feats --no-text $destdir; then
    echo "$0: Validation failed.  If it is a sorting issue, try the option '--always-include-prefix true'."
    exit 1
  fi
  
  exit 0