Blame view
egs/wsj/s5/utils/perturb_data_dir_speed.sh
4.54 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
#!/bin/bash # Copyright 2013 Johns Hopkins University (author: Daniel Povey) # 2014 Tom Ko # 2018 Emotech LTD (author: Pawel Swietojanski) # Apache 2.0 # This script operates on a directory, such as in data/train/, # that contains some subset of the following files: # wav.scp # spk2utt # utt2spk # text # utt2dur # reco2dur # # It generates the files which are used for perturbing the speed of the original data. . utils/parse_options.sh if [ $# != 3 ]; then echo "Usage: perturb_data_dir_speed.sh <warping-factor> <srcdir> <destdir>" echo "e.g.:" echo " $0 0.9 data/train_si284 data/train_si284p" exit 1 fi export LC_ALL=C factor=$1 srcdir=$2 destdir=$3 label="sp" spk_prefix=$label$factor"-" utt_prefix=$label$factor"-" #check is sox on the path which sox &>/dev/null ! [ $? -eq 0 ] && echo "sox: command not found" && exit 1; if [ ! -f $srcdir/utt2spk ]; then echo "$0: no such file $srcdir/utt2spk" exit 1; fi if [ "$destdir" == "$srcdir" ]; then echo "$0: this script requires <srcdir> and <destdir> to be different." exit 1 fi set -e; set -o pipefail mkdir -p $destdir cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s %s%s ", $1, p, $1);}' > $destdir/utt_map cat $srcdir/spk2utt | awk -v p=$spk_prefix '{printf("%s %s%s ", $1, p, $1);}' > $destdir/spk_map cat $srcdir/wav.scp | awk -v p=$spk_prefix '{printf("%s %s%s ", $1, p, $1);}' > $destdir/reco_map if [ ! -f $srcdir/utt2uniq ]; then cat $srcdir/utt2spk | awk -v p=$utt_prefix '{printf("%s%s %s ", p, $1, $1);}' > $destdir/utt2uniq else cat $srcdir/utt2uniq | awk -v p=$utt_prefix '{printf("%s%s %s ", p, $1, $2);}' > $destdir/utt2uniq fi cat $srcdir/utt2spk | utils/apply_map.pl -f 1 $destdir/utt_map | \ utils/apply_map.pl -f 2 $destdir/spk_map >$destdir/utt2spk utils/utt2spk_to_spk2utt.pl <$destdir/utt2spk >$destdir/spk2utt if [ -f $srcdir/segments ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/segments | \ utils/apply_map.pl -f 2 $destdir/reco_map | \ awk -v factor=$factor \ '{s=$3/factor; e=$4/factor; if (e > s + 0.01) { printf("%s %s %.2f %.2f ", $1, $2, $3/factor, $4/factor);} }' >$destdir/segments utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \ # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename" awk -v factor=$factor \ '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"} else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" } else {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp if [ -f $srcdir/reco2file_and_channel ]; then utils/apply_map.pl -f 1 $destdir/reco_map <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel fi else # no segments->wav indexed by utterance. if [ -f $srcdir/wav.scp ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/wav.scp | sed 's/| *$/ |/' | \ # Handle three cases of rxfilenames appropriately; "input piped command", "file offset" and "filename" awk -v factor=$factor \ '{wid=$1; $1=""; if ($NF=="|") {print wid $_ " sox -t wav - -t wav - speed " factor " |"} else if (match($0, /:[0-9]+$/)) {print wid " wav-copy" $_ " - | sox -t wav - -t wav - speed " factor " |" } else {print wid " sox -t wav" $_ " -t wav - speed " factor " |"}}' > $destdir/wav.scp fi fi if [ -f $srcdir/text ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/text >$destdir/text fi if [ -f $srcdir/spk2gender ]; then utils/apply_map.pl -f 1 $destdir/spk_map <$srcdir/spk2gender >$destdir/spk2gender fi if [ -f $srcdir/utt2lang ]; then utils/apply_map.pl -f 1 $destdir/utt_map <$srcdir/utt2lang >$destdir/utt2lang fi #prepare speed-perturbed utt2dur if [ ! -f $srcdir/utt2dur ]; then # generate utt2dur if it does not exist in srcdir utils/data/get_utt2dur.sh $srcdir fi cat $srcdir/utt2dur | utils/apply_map.pl -f 1 $destdir/utt_map | \ awk -v factor=$factor '{print $1, $2/factor;}' >$destdir/utt2dur #prepare speed-perturbed reco2dur if [ ! -f $srcdir/reco2dur ]; then # generate reco2dur if it does not exist in srcdir utils/data/get_reco2dur.sh $srcdir fi cat $srcdir/reco2dur | utils/apply_map.pl -f 1 $destdir/reco_map | \ awk -v factor=$factor '{print $1, $2/factor;}' >$destdir/reco2dur rm $destdir/spk_map $destdir/utt_map $destdir/reco_map 2>/dev/null echo "$0: generated speed-perturbed version of data in $srcdir, in $destdir" utils/validate_data_dir.sh --no-feats --no-text $destdir |