Blame view
egs/wsj/s5/steps/online/nnet3/prepare_online_decoding.sh
6.83 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
#!/bin/bash # Copyright 2014 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 # Begin configuration. stage=0 # This allows restarting after partway, when something when wrong. feature_type=mfcc add_pitch=false mfcc_config=conf/mfcc.conf # you can override any of these you need to override. plp_config=conf/plp.conf fbank_config=conf/fbank.conf # online_pitch_config is the config file for both pitch extraction and # post-processing; we combine them into one because during training this # is given to the program compute-and-process-kaldi-pitch-feats. online_pitch_config=conf/online_pitch.conf # Below are some options that affect the iVectors, and should probably # match those used in extract_ivectors_online.sh. num_gselect=5 # Gaussian-selection using diagonal model: number of Gaussians to select posterior_scale=0.1 # Scale on the acoustic posteriors, intended to account for # inter-frame correlations. min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out) # caution: you should use the same value in the online-estimation # code. max_count=100 # This max-count of 100 can make iVectors more consistent for # different lengths of utterance, by scaling up the prior term # when the data-count exceeds this value. The data-count is # after posterior-scaling, so assuming the posterior-scale is # 0.1, --max-count 100 starts having effect after 1000 frames, # or 10 seconds of data. iter=final # End configuration. echo "$0 $@" # Print the command line for logging [ -f path.sh ] && . ./path.sh; . parse_options.sh || exit 1; if [ $# -ne 4 ] && [ $# -ne 3 ]; then echo "Usage: $0 [options] <lang-dir> [<ivector-extractor-dir>] <nnet-dir> <output-dir>" echo "e.g.: $0 data/lang exp/nnet2_online/extractor exp/nnet2_online/nnet exp/nnet2_online/nnet_online" echo "main options (for others, see top of script file)" echo " --feature-type <mfcc|plp> # Type of the base features; " echo " # important to generate the correct" echo " # configs in <output-dir>/conf/" echo " --add-pitch <true|false> # Append pitch features to cmvn" echo " # (default: false)" echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." echo " --config <config-file> # config containing options" echo " --iter <model-iteration|final> # iteration of model to take." echo " --stage <stage> # stage to do partial re-run from." exit 1; fi if [ $# -eq 4 ]; then lang=$1 iedir=$2 srcdir=$3 dir=$4 else [ $# -eq 3 ] || exit 1; lang=$1 iedir= srcdir=$2 dir=$3 fi for f in $lang/phones/silence.csl $srcdir/${iter}.mdl $srcdir/tree; do [ ! -f $f ] && echo "$0: no such file $f" && exit 1; done if [ ! -z "$iedir" ]; then for f in final.{mat,ie,dubm} splice_opts global_cmvn.stats online_cmvn.conf; do [ ! -f $iedir/$f ] && echo "$0: no such file $iedir/$f" && exit 1; done if $add_pitch; then iedim=`matrix-dim $iedir/final.mat | awk '{print $1}'` amdim=`nnet3-am-info $srcdir/${iter}.mdl | grep "input-dim:" | awk '{print $2}'` [ $(($amdim-$iedim)) -eq 0 ] && echo "$0: remove pitch from the input of ivector extractor" && exit 1; fi fi dir=$(utils/make_absolute.sh $dir) # Convert $dir to an absolute pathname, so that the # configuration files we write will contain absolute # pathnames. mkdir -p $dir/conf utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1; cp $lang/phones.txt $dir || exit 1; cp $srcdir/${iter}.mdl $dir/final.mdl || exit 1; cp $srcdir/tree $dir/ || exit 1; if [ -f $srcdir/frame_subsampling_factor ]; then cp $srcdir/frame_subsampling_factor $dir/ fi if [ ! -z "$iedir" ]; then mkdir -p $dir/ivector_extractor/ cp $iedir/final.{mat,ie,dubm} $iedir/global_cmvn.stats $dir/ivector_extractor/ || exit 1; # The following things won't be needed directly by the online decoding, but # will allow us to run prepare_online_decoding.sh again with # $dir/ivector_extractor/ as the input directory (useful in certain # cross-system training scenarios). cp $iedir/splice_opts $iedir/online_cmvn.conf $dir/ivector_extractor/ || exit 1; fi mkdir -p $dir/conf rm $dir/{plp,mfcc,fbank}.conf 2>/dev/null echo "$0: preparing configuration files in $dir/conf" if [ -f $dir/conf/online.conf ]; then echo "$0: moving $dir/conf/online.conf to $dir/conf/online.conf.bak" mv $dir/conf/online.conf $dir/conf/online.conf.bak fi conf=$dir/conf/online.conf echo -n >$conf echo "--feature-type=$feature_type" >>$conf case "$feature_type" in mfcc) echo "--mfcc-config=$dir/conf/mfcc.conf" >>$conf cp $mfcc_config $dir/conf/mfcc.conf || exit 1;; plp) echo "--plp-config=$dir/conf/plp.conf" >>$conf cp $plp_config $dir/conf/plp.conf || exit 1;; fbank) echo "--fbank-config=$dir/conf/fbank.conf" >>$conf cp $fbank_config $dir/conf/fbank.conf || exit 1;; *) echo "Unknown feature type $feature_type" esac if [ ! -z "$iedir" ]; then ieconf=$dir/conf/ivector_extractor.conf echo -n >$ieconf echo "--ivector-extraction-config=$ieconf" >>$conf cp $iedir/online_cmvn.conf $dir/conf/online_cmvn.conf || exit 1; # the next line puts each option from splice_opts on its own line in the config. for x in $(cat $iedir/splice_opts); do echo "$x"; done > $dir/conf/splice.conf echo "--splice-config=$dir/conf/splice.conf" >>$ieconf echo "--cmvn-config=$dir/conf/online_cmvn.conf" >>$ieconf echo "--lda-matrix=$dir/ivector_extractor/final.mat" >>$ieconf echo "--global-cmvn-stats=$dir/ivector_extractor/global_cmvn.stats" >>$ieconf echo "--diag-ubm=$dir/ivector_extractor/final.dubm" >>$ieconf echo "--ivector-extractor=$dir/ivector_extractor/final.ie" >>$ieconf echo "--num-gselect=$num_gselect" >>$ieconf echo "--min-post=$min_post" >>$ieconf echo "--posterior-scale=$posterior_scale" >>$ieconf # this is currently the default in the scripts. echo "--max-remembered-frames=1000" >>$ieconf # the default echo "--max-count=$max_count" >>$ieconf fi if $add_pitch; then echo "$0: enabling pitch features" echo "--add-pitch=true" >>$conf echo "$0: creating $dir/conf/online_pitch.conf" if [ ! -f $online_pitch_config ]; then echo "$0: expected file '$online_pitch_config' to exist."; exit 1; fi cp $online_pitch_config $dir/conf/online_pitch.conf || exit 1; echo "--online-pitch-config=$dir/conf/online_pitch.conf" >>$conf fi silphonelist=`cat $lang/phones/silence.csl` || exit 1; echo "--endpoint.silence-phones=$silphonelist" >>$conf echo "$0: created config file $conf" |