Blame view
egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh
3.1 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
#!/bin/bash # Copyright 2016-2018 Johns Hopkins University (author: Daniel Povey) # 2018 Hossein Hadian # Apache 2.0 # This script does the standard 3-way speed perturbing of # a data directory (it operates on the wav.scp). # If you add the option "--always-include-prefix true", it will include the # prefix "sp1.0-" for the original un-perturbed data. This can help resolve # problems with sorting. # We don't make '--always-include-prefix true' the default behavior because # it can break some older scripts that relied on the original utterance-ids # being a subset of the perturbed data's utterance-ids. always_include_prefix=false . utils/parse_options.sh if [ $# != 2 ]; then echo "Usage: perturb_data_dir_speed_3way.sh <srcdir> <destdir>" echo "Applies standard 3-way speed perturbation using factors of 0.9, 1.0 and 1.1." echo "e.g.:" echo " $0 [options] data/train data/train_sp" echo "Note: if <destdir>/feats.scp already exists, this will refuse to run." echo "Options:" echo " --always-include-prefix [true|false] # default: false. If set to true," echo " # it will add the prefix 'sp1.0-' to" echo " # utterance and speaker-ids for data at" echo " # the original speed. Can resolve" echo " # issues RE data sorting." exit 1 fi srcdir=$1 destdir=$2 if [ ! -f $srcdir/wav.scp ]; then echo "$0: expected $srcdir/wav.scp to exist" exit 1 fi if [ -f $destdir/feats.scp ]; then echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)" exit 1 fi echo "$0: making sure the utt2dur and the reco2dur files are present" echo "... in ${srcdir}, because obtaining it after speed-perturbing" echo "... would be very slow, and you might need them." utils/data/get_utt2dur.sh ${srcdir} utils/data/get_reco2dur.sh ${srcdir} utils/data/perturb_data_dir_speed.sh 0.9 ${srcdir} ${destdir}_speed0.9 || exit 1 utils/data/perturb_data_dir_speed.sh 1.1 ${srcdir} ${destdir}_speed1.1 || exit 1 if $always_include_prefix; then utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- ${srcdir} ${destdir}_speed1.0 if [ ! -f $srcdir/utt2uniq ]; then cat $srcdir/utt2spk | awk '{printf("sp1.0-%s %s ", $1, $1);}' > ${destdir}_speed1.0/utt2uniq else cat $srcdir/utt2uniq | awk '{printf("sp1.0-%s %s ", $1, $2);}' > ${destdir}_speed1.0/utt2uniq fi utils/data/combine_data.sh $destdir ${destdir}_speed1.0 ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1 rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 ${destdir}_speed1.0 else utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1 rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 fi echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir" if ! utils/validate_data_dir.sh --no-feats --no-text $destdir; then echo "$0: Validation failed. If it is a sorting issue, try the option '--always-include-prefix true'." exit 1 fi exit 0 |