Blame view
egs/chime5/s5b/local/nnet3/decode.sh
5.98 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
#!/bin/bash # Copyright 2016 Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) # 2019 Vimal Manohar # Apache 2.0. # This script does 2-stage decoding where the first stage is used to get # reliable frames for i-vector extraction. set -e # general opts iter= stage=0 nj=30 affix= # affix for decode directory # ivector opts max_count=75 # parameter for extract_ivectors.sh sub_speaker_frames=6000 ivector_scale=0.75 get_weights_from_ctm=true weights_file= # use weights from this archive (must be compressed using gunzip) silence_weight=0.00001 # apply this weight to silence frames during i-vector extraction ivector_dir=exp/nnet3 # decode opts pass2_decode_opts="--min-active 1000" lattice_beam=8 extra_left_context=0 # change for (B)LSTM extra_right_context=0 # change for BLSTM frames_per_chunk=50 # change for (B)LSTM acwt=0.1 # important to change this when using chain models post_decode_acwt=1.0 # important to change this when using chain models extra_left_context_initial=0 extra_right_context_final=0 score_opts="--min-lmwt 6 --max-lmwt 13" . ./cmd.sh [ -f ./path.sh ] && . ./path.sh . utils/parse_options.sh || exit 1; if [ $# -ne 4 ]; then echo "Usage: $0 [options] <data-dir> <lang-dir> <graph-dir> <model-dir>" echo " Options:" echo " --stage (0|1|2) # start scoring script from part-way through." echo "e.g.:" echo "$0 data/dev data/lang exp/tri5a/graph_pp exp/nnet3/tdnn" exit 1; fi data=$1 # data directory lang=$2 # data/lang graph=$3 #exp/tri5a/graph_pp dir=$4 # exp/nnet3/tdnn model_affix=`basename $dir` ivector_affix=${affix:+_$affix}_chain_${model_affix}${iter:+_iter$iter} affix=${affix:+_${affix}}${iter:+_iter${iter}} if [ $stage -le 1 ]; then if [ ! -s ${data}_hires/feats.scp ]; then utils/copy_data_dir.sh $data ${data}_hires steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj $nj --cmd "$train_cmd" ${data}_hires steps/compute_cmvn_stats.sh ${data}_hires utils/fix_data_dir.sh ${data}_hires fi fi data_set=$(basename $data) if [ $stage -le 2 ]; then echo "Extracting i-vectors, stage 1" steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \ --max-count $max_count \ ${data}_hires $ivector_dir/extractor \ $ivector_dir/ivectors_${data_set}${ivector_affix}_stage1; # float comparisons are hard in bash if [ `bc <<< "$ivector_scale != 1"` -eq 1 ]; then ivector_scale_affix=_scale$ivector_scale else ivector_scale_affix= fi if [ ! -z "$ivector_scale_affix" ]; then echo "$0: Scaling iVectors, stage 1" srcdir=$ivector_dir/ivectors_${data_set}${ivector_affix}_stage1 outdir=$ivector_dir/ivectors_${data_set}${ivector_affix}${ivector_scale_affix}_stage1 mkdir -p $outdir $train_cmd $outdir/log/scale_ivectors.log \ copy-matrix --scale=$ivector_scale scp:$srcdir/ivector_online.scp ark:- \| \ copy-feats --compress=true ark:- ark,scp:$outdir/ivector_online.ark,$outdir/ivector_online.scp; cp $srcdir/ivector_period $outdir/ivector_period fi fi decode_dir=$dir/decode_${data_set}${affix} # generate the lattices if [ $stage -le 3 ]; then echo "Generating lattices, stage 1" steps/nnet3/decode.sh --nj $nj --cmd "$decode_cmd" \ --acwt $acwt --post-decode-acwt $post_decode_acwt \ --extra-left-context $extra_left_context \ --extra-right-context $extra_right_context \ --extra-left-context-initial $extra_left_context_initial \ --extra-right-context-final $extra_right_context_final \ --frames-per-chunk "$frames_per_chunk" \ --online-ivector-dir $ivector_dir/ivectors_${data_set}${ivector_affix}${ivector_scale_affix}_stage1 \ --skip-scoring true ${iter:+--iter $iter} \ $graph ${data}_hires ${decode_dir}_stage1; fi if [ $stage -le 4 ]; then if $get_weights_from_ctm; then if [ ! -z $weights_file ]; then echo "$0: Using provided vad weights file $weights_file" ivector_extractor_weights=$weights_file else echo "$0 : Generating vad weights file" ivector_extractor_weights=${decode_dir}_stage1/weights${affix}.gz local/extract_vad_weights.sh --silence-weight $silence_weight \ --cmd "$decode_cmd" ${iter:+--iter $iter} \ ${data}_hires $lang \ ${decode_dir}_stage1 $ivector_extractor_weights fi else # get weights from best path decoding ivector_extractor_weights=${decode_dir}_stage1 fi fi if [ $stage -le 5 ]; then echo "Extracting i-vectors, stage 2 with weights from $ivector_extractor_weights" # this does offline decoding, except we estimate the iVectors per # speaker, excluding silence (based on alignments from a DNN decoding), with a # different script. This is just to demonstrate that script. # the --sub-speaker-frames is optional; if provided, it will divide each speaker # up into "sub-speakers" of at least that many frames... can be useful if # acoustic conditions drift over time within the speaker's data. steps/online/nnet2/extract_ivectors.sh --cmd "$train_cmd" --nj $nj \ --silence-weight $silence_weight \ --sub-speaker-frames $sub_speaker_frames --max-count $max_count \ ${data}_hires $lang $ivector_dir/extractor \ $ivector_extractor_weights $ivector_dir/ivectors_${data_set}${ivector_affix}; fi if [ $stage -le 6 ]; then echo "Generating lattices, stage 2 with --acwt $acwt" rm -f ${decode_dir}/.error steps/nnet3/decode.sh --nj $nj --cmd "$decode_cmd" $pass2_decode_opts \ --acwt $acwt --post-decode-acwt $post_decode_acwt \ --extra-left-context $extra_left_context \ --extra-right-context $extra_right_context \ --extra-left-context-initial $extra_left_context_initial \ --extra-right-context-final $extra_right_context_final \ --frames-per-chunk "$frames_per_chunk" \ --skip-scoring false ${iter:+--iter $iter} --lattice-beam $lattice_beam \ --online-ivector-dir $ivector_dir/ivectors_${data_set}${ivector_affix} \ $graph ${data}_hires ${decode_dir} || touch ${decode_dir}/.error [ -f ${decode_dir}/.error ] && echo "$0: Error decoding" && exit 1; fi exit 0 |