Blame view
egs/wsj/s5/steps/make_plp.sh
5.19 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
#!/bin/bash # Copyright 2012-2016 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 # To be run from .. (one directory up from here) # see ../run.sh for example # Begin configuration section. nj=4 cmd=run.pl plp_config=conf/plp.conf compress=true write_utt2num_frames=true # If true writes utt2num_frames. write_utt2dur=true # End configuration section. echo "$0 $@" # Print the command line for logging. if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -lt 1 ] || [ $# -gt 3 ]; then cat >&2 <<EOF Usage: $0 [options] <data-dir> [<log-dir> [<plp-dir>] ] e.g.: $0 data/train Note: <log-dir> defaults to <data-dir>/log, and <plp-dir> defaults to <data-dir>/data Options: --plp-config <config-file> # config passed to compute-plp-feats. --nj <nj> # number of parallel jobs. --cmd <run.pl|queue.pl <queue opts>> # how to run jobs. --write-utt2num-frames <true|false> # If true, write utt2num_frames file. --write-utt2dur <true|false> # If true, write utt2dur file. EOF exit 1; fi data=$1 if [ $# -ge 2 ]; then logdir=$2 else logdir=$data/log fi if [ $# -ge 3 ]; then plpdir=$3 else plpdir=$data/data fi # make $plpdir an absolute pathname. plpdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $plpdir ${PWD}` # use "name" as part of name of the archive. name=`basename $data` mkdir -p $plpdir || exit 1; mkdir -p $logdir || exit 1; if [ -f $data/feats.scp ]; then mkdir -p $data/.backup echo "$0: moving $data/feats.scp to $data/.backup" mv $data/feats.scp $data/.backup fi scp=$data/wav.scp required="$scp $plp_config" for f in $required; do if [ ! -f $f ]; then echo "$0: no such file $f" exit 1; fi done utils/validate_data_dir.sh --no-text --no-feats $data || exit 1; if [ -f $data/spk2warp ]; then echo "$0 [info]: using VTLN warp factors from $data/spk2warp" vtln_opts="--vtln-map=ark:$data/spk2warp --utt2spk=ark:$data/utt2spk" elif [ -f $data/utt2warp ]; then echo "$0 [info]: using VTLN warp factors from $data/utt2warp" vtln_opts="--vtln-map=ark:$data/utt2warp" else vtln_opts= fi for n in $(seq $nj); do # the next command does nothing unless $plpdir/storage/ exists, see # utils/create_data_link.pl for more info. utils/create_data_link.pl $plpdir/raw_plp_$name.$n.ark done if $write_utt2num_frames; then write_num_frames_opt="--write-num-frames=ark,t:$logdir/utt2num_frames.JOB" else write_num_frames_opt= fi if $write_utt2dur; then write_utt2dur_opt="--write-utt2dur=ark,t:$logdir/utt2dur.JOB" else write_utt2dur_opt= fi if [ -f $data/segments ]; then echo "$0 [info]: segments file exists: using that." split_segments= for n in $(seq $nj); do split_segments="$split_segments $logdir/segments.$n" done utils/split_scp.pl $data/segments $split_segments || exit 1; rm $logdir/.error 2>/dev/null $cmd JOB=1:$nj $logdir/make_plp_${name}.JOB.log \ extract-segments scp,p:$scp $logdir/segments.JOB ark:- \| \ compute-plp-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ --config=$plp_config ark:- ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$plpdir/raw_plp_$name.JOB.ark,$plpdir/raw_plp_$name.JOB.scp \ || exit 1; else echo "$0: [info]: no segments file exists: assuming wav.scp indexed by utterance." split_scps= for n in $(seq $nj); do split_scps="$split_scps $logdir/wav_${name}.$n.scp" done utils/split_scp.pl $scp $split_scps || exit 1; $cmd JOB=1:$nj $logdir/make_plp_${name}.JOB.log \ compute-plp-feats $vtln_opts $write_utt2dur_opt --verbose=2 \ --config=$plp_config scp,p:$logdir/wav_${name}.JOB.scp ark:- \| \ copy-feats --compress=$compress $write_num_frames_opt ark:- \ ark,scp:$plpdir/raw_plp_$name.JOB.ark,$plpdir/raw_plp_$name.JOB.scp \ || exit 1; fi if [ -f $logdir/.error.$name ]; then echo "$0: Error producing PLP features for $name:" tail $logdir/make_plp_${name}.1.log exit 1; fi # concatenate the .scp files together. for n in $(seq $nj); do cat $plpdir/raw_plp_$name.$n.scp || exit 1 done > $data/feats.scp if $write_utt2num_frames; then for n in $(seq $nj); do cat $logdir/utt2num_frames.$n || exit 1 done > $data/utt2num_frames || exit 1 fi if $write_utt2dur; then for n in $(seq $nj); do cat $logdir/utt2dur.$n || exit 1 done > $data/utt2dur || exit 1 fi # Store frame_shift and plp_config along with features. frame_shift=$(perl -ne 'if (/^--frame-shift=(\d+)/) { printf "%.3f", 0.001 * $1; exit; }' $plp_config) echo ${frame_shift:-'0.01'} > $data/frame_shift mkdir -p $data/conf && cp $plp_config $data/conf/plp.conf || exit 1 rm $logdir/wav_${name}.*.scp $logdir/segments.* \ $logdir/utt2num_frames.* $logdir/utt2dur.* 2>/dev/null nf=$(wc -l < $data/feats.scp) nu=$(wc -l < $data/utt2spk) if [ $nf -ne $nu ]; then echo "$0: It seems not all of the feature files were successfully procesed" \ "($nf != $nu); consider using utils/fix_data_dir.sh $data" fi if (( nf < nu - nu/20 )); then echo "$0: Less than 95% the features were successfully generated."\ "Probably a serious error." exit 1 fi echo "$0: Succeeded creating PLP features for $name" |