Yannick Estève / ONTRAC-Kaldi

Blame view

egs/rm/s5/local/run_dnn_convert_nnet2.sh 5.81 KB
  #!/bin/bash
  
  # This script demonstrates some commands that you could run after run_dnn.sh,
  # that relate to conversion to the nnet2 model format.
  
  
  
  steps/nnet2/convert_nnet1_to_nnet2.sh exp/dnn4b_pretrain-dbn_dnn exp/dnn4b_nnet2
   
  steps/nnet2/decode.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode \
     --config conf/decode.config exp/tri3b/graph data/test exp/dnn4b_nnet2/decode
  steps/nnet2/decode.sh --nj 10 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode \
     --config conf/decode.config exp/tri3b/graph_ug data/test exp/dnn4b_nnet2/decode_ug
  
  # decoding results are essentially the same (any small difference is probably because
  # decode.config != decode_dnn.config).
  # %WER 1.58 [ 198 / 12533, 22 ins, 45 del, 131 sub ] exp/dnn4b_nnet2/decode/wer_3
  # %WER 1.59 [ 199 / 12533, 23 ins, 45 del, 131 sub ] exp/dnn4b_pretrain-dbn_dnn/decode/wer_3
  
  
  steps/nnet2/convert_lda_to_raw.sh exp/dnn4b_nnet2 exp/dnn4b_nnet2_raw
  steps/nnet2/decode.sh --nj 10 --cmd "$decode_cmd" \
      --feat-type raw --config conf/decode.config exp/tri3b/graph data/test exp/dnn4b_nnet2_raw/decode
  
  # This is worse because we're decoding without fMLLR.  It's OK, I just wanted to demonstrate
  # the script, which I plan to use for systems without fMLLR.
  # grep WER exp/dnn4b_nnet2_raw/decode/wer_* | utils/best_wer.sh 
  # %WER 3.84 [ 481 / 12533, 44 ins, 136 del, 301 sub ] exp/dnn4b_nnet2_raw/decode/wer_7
  
  matrix-sum scp:data/train/cmvn.scp global.cmvn
  steps/nnet2/convert_lda_to_raw.sh --global-cmvn-stats global.cmvn exp/dnn4b_nnet2 exp/dnn4b_nnet2_raw_no_cmvn
  rm global.cmvn
  steps/nnet2/decode.sh --nj 10 --cmd "$decode_cmd" \
      --feat-type raw --config conf/decode.config exp/tri3b/graph data/test exp/dnn4b_nnet2_raw_no_cmvn/decode
  # Even worse results, but this is expected due to the mismatch.
  # grep WER exp/dnn4b_nnet2_raw_no_cmvn/decode/wer_* | utils/best_wer.sh 
  # %WER 5.31 [ 666 / 12533, 76 ins, 163 del, 427 sub ] exp/dnn4b_nnet2_raw_no_cmvn/decode/wer_7
  
  
  ( # We demonstrate doing further training on top of a model initially
    # trained by Karel's tools.
    nnet-am-switch-preconditioning exp/dnn4b_nnet2/final.mdl - | \
      nnet-am-copy --learning-rate=0.001 - exp/dnn4b_nnet2/final.mdl.mod
  
    mkdir -p exp/dnn4b_nnet2_retrain
  
    steps/nnet2/get_egs.sh --samples-per-iter 200000 \
      --num-jobs-nnet 4 --splice-width 5 --cmd "$train_cmd" \
      data/train data/lang exp/tri3b_ali \
      exp/dnn4b_nnet2_retrain
  
   # options here are for GPU use.
    steps/nnet2/train_more.sh --learning-rate-factor 0.1 --cmd "$train_cmd" \
      --parallel-opts "--gpu 1" --num-threads 1  --minibatch-size 512 \
      exp/dnn4b_nnet2/final.mdl.mod exp/dnn4b_nnet2_retrain/egs exp/dnn4b_nnet2_retrain
  
    steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode \
      --config conf/decode.config exp/tri3b/graph data/test exp/dnn4b_nnet2_retrain/decode
    steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode \
      --config conf/decode.config exp/tri3b/graph_ug data/test exp/dnn4b_nnet2_retrain/decode_ug
   #Results for this experiment:
   #for x in exp/dnn4b_nnet2_retrain/decode*; do grep WER $x/wer_* | utils/best_wer.sh ; done
   #%WER 1.58 [ 198 / 12533, 29 ins, 38 del, 131 sub ] exp/dnn4b_nnet2_retrain/decode/wer_3
   #%WER 7.60 [ 953 / 12533, 56 ins, 168 del, 729 sub ] exp/dnn4b_nnet2_retrain/decode_ug/wer_10
  
   # vs. the following baseline (our experiment got 0.2% abs. improvement on unigram only).
   #for x in exp/dnn4b_nnet2/decode*; do grep WER $x/wer_* | utils/best_wer.sh ; done
   # %WER 1.58 [ 198 / 12533, 22 ins, 45 del, 131 sub ] exp/dnn4b_nnet2/decode/wer_3
   #%WER 7.80 [ 977 / 12533, 83 ins, 151 del, 743 sub ] exp/dnn4b_nnet2/decode_ug/wer_6
  
  )
  
  (
    # We demonstrate doing further training on top of a DBN trained
    # generatively by Karel's tools.
    mkdir -p exp/dnn4b_nnet2_dbn_in
    for f in final.mdl final.feature_transform ali_train_pdf.counts; do
      cp exp/dnn4b_pretrain-dbn_dnn/$f exp/dnn4b_nnet2_dbn_in/
    done
    cp exp/dnn4b_pretrain-dbn/6.dbn exp/dnn4b_nnet2_dbn_in/final.dbn
    steps/nnet2/convert_nnet1_to_nnet2.sh exp/dnn4b_nnet2_dbn_in exp/dnn4b_nnet2_dbn
    cp exp/tri3b/splice_opts exp/tri3b/cmvn_opts exp/tri3b/final.mat exp/tri3b/tree exp/dnn4b_nnet2_dbn/
  
  
    nnet-am-switch-preconditioning exp/dnn4b_nnet2_dbn/final.mdl - | \
      nnet-am-copy --learning-rate=0.01 - exp/dnn4b_nnet2_dbn/final.mdl.mod
  
    steps/nnet2/get_egs.sh --samples-per-iter 200000 \
       --num-jobs-nnet 4 --splice-width 5 --cmd "$train_cmd" \
       data/train data/lang exp/tri3b_ali \
        exp/dnn4b_nnet2_dbn_retrain
  
    steps/nnet2/train_more.sh --learning-rate-factor 0.1 --cmd "$train_cmd" \
      --parallel-opts "--gpu 1" --num-threads 1  --minibatch-size 512 \
      exp/dnn4b_nnet2_dbn/final.mdl.mod exp/dnn4b_nnet2_dbn_retrain/egs exp/dnn4b_nnet2_dbn_retrain
  
  
    steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode \
      --config conf/decode.config exp/tri3b/graph data/test exp/dnn4b_nnet2_dbn_retrain/decode &
    steps/nnet2/decode.sh --nj 30 --cmd "$decode_cmd" --transform-dir exp/tri3b/decode \
      --config conf/decode.config exp/tri3b/graph_ug data/test exp/dnn4b_nnet2_dbn_retrain/decode_ug &
  
   # Here are the results (and note that we never tuned this at all, it was our first guess
   # at what might be good parameters).
   #for x in exp/dnn4b_nnet2_dbn_retrain/decode*; do grep WER $x/wer_* | utils/best_wer.sh ; done
   #%WER 1.68 [ 210 / 12533, 36 ins, 43 del, 131 sub ] exp/dnn4b_nnet2_dbn_retrain/decode/wer_3
   #%WER 7.86 [ 985 / 12533, 72 ins, 172 del, 741 sub ] exp/dnn4b_nnet2_dbn_retrain/decode_ug/wer_8
  
   # Here is the baseline... we're slightly worse than the baseline on both test scenarios.
   #for x in exp/dnn4b_nnet2/decode*; do grep WER $x/wer_* | utils/best_wer.sh ; done
   #%WER 1.58 [ 198 / 12533, 22 ins, 45 del, 131 sub ] exp/dnn4b_nnet2/decode/wer_3
   #%WER 7.80 [ 977 / 12533, 83 ins, 151 del, 743 sub ] exp/dnn4b_nnet2/decode_ug/wer_6
  )