Yannick Estève / ONTRAC-Kaldi

Blame view

egs/rm/s5/local/online/run_nnet2_wsj.sh 7.97 KB
  #!/bin/bash
  
  # note: see the newer, better script run_nnet2_wsj_joint.sh
  
  # This script assumes you have previously run the WSJ example script including
  # the optional part local/online/run_online_decoding_nnet2.sh.  It builds a
  # neural net for online decoding on top of the network we previously trained on
  # WSJ, by keeping everything but the last layer of that network and then
  # training just the last layer on our data.  We then train the whole thing.
  
  stage=0
  set -e
  
  train_stage=-10
  use_gpu=true
  . ./cmd.sh
  . ./path.sh
  . ./utils/parse_options.sh
  
  if $use_gpu; then
    if ! cuda-compiled; then
      cat <<EOF && exit 1
  This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
  If you want to use GPUs (and have them), go to src/, and configure and make on a machine
  where "nvcc" is installed.  Otherwise, call this script with --use-gpu false
  EOF
    fi
    parallel_opts="--gpu 1"
    num_threads=1
    minibatch_size=512
    dir=exp/nnet2_online_wsj/nnet_a
    trainfeats=exp/nnet2_online_wsj/wsj_activations_train
    # later we'll change the script to download the trained model from kaldi-asr.org.
    srcdir=../../wsj/s5/exp/nnet2_online/nnet_a_gpu_online
  else
    # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
    # almost the same, but this may be a little bit slow.
    num_threads=16
    minibatch_size=128
    parallel_opts="--num-threads $num_threads"
    dir=exp/nnet2_online_wsj/nnet_a
    trainfeats=exp/nnet2_online_wsj/wsj_activations_train
    srcdir=../../wsj/s5/exp/nnet2_online/nnet_a_online
  fi
  
  
  if [ $stage -le 0 ]; then
    echo "$0: dumping activations from WSJ model"
    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $trainfeats/feats/storage ]; then
      # this shows how you can split the data across multiple file-systems; it's optional.
      date=$(date +'%m_%d_%H_%M')
      utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/rm-$date/s5/$trainfeats/feats/storage \
         $trainfeats/feats/storage
    fi
    steps/online/nnet2/dump_nnet_activations.sh --cmd "$train_cmd" --nj 30 \
       data/train $srcdir $trainfeats
  fi
  
  if [ $stage -le 1 ]; then
    echo "$0: training 0-hidden-layer model on top of WSJ activations"
    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
      utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/rm-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
    fi
  
    steps/nnet2/retrain_fast.sh --stage $train_stage \
      --num-threads "$num_threads" \
      --minibatch-size "$minibatch_size" \
      --parallel-opts "$parallel_opts" \
      --cmd "$decode_cmd" \
      --num-jobs-nnet 4 \
      --mix-up 4000 \
      --initial-learning-rate 0.02 --final-learning-rate 0.004 \
       $trainfeats/data data/lang exp/tri3b_ali $dir
  fi
  
  if [ $stage -le 2 ]; then
    echo "$0: formatting combined model for online decoding."
    steps/online/nnet2/prepare_online_decoding_retrain.sh $srcdir $dir ${dir}_online
  fi
  
  if [ $stage -le 3 ]; then
    # do online decoding with the combined model.
    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
      exp/tri3b/graph data/test ${dir}_online/decode &
    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
      exp/tri3b/graph_ug data/test ${dir}_online/decode_ug || exit 1;
    wait
  fi
  
  if [ $stage -le 4 ]; then
    # do online per-utterance decoding with the combined model.
    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
       --per-utt true \
      exp/tri3b/graph data/test ${dir}_online/decode_utt &
    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
       --per-utt true \
      exp/tri3b/graph_ug data/test ${dir}_online/decode_ug_utt || exit 1;
    wait
  fi
  
  ## From this point on we try something else: we try training all the layers of
  ## the model on this dataset.  First we need to create a combined version of the
  ## model.
  if [ $stage -le 5 ]; then
    steps/nnet2/create_appended_model.sh $srcdir $dir ${dir}_combined_init
  
    # Set the learning rate in this initial value to our guess of a suitable value.
    # note: we initially tried 0.005, and this gave us WERs of (1.40, 1.48, 7.24, 7.70) vs.
    # (1.32, 1.38, 7.20, 7.44) with a learning rate of 0.01.
    initial_learning_rate=0.01
    nnet-am-copy --learning-rate=$initial_learning_rate ${dir}_combined_init/final.mdl ${dir}_combined_init/final.mdl
  fi
  
  if [ $stage -le 6 ]; then
    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
      utils/create_split_dir.pl \
        /export/b0{1,2,3,4}/$USER/kaldi-data/rm-$(date +'%m_%d_%H_%M')/s5/${dir}_combined/egs/storage \
          $dir_combined/egs/storage
    fi
  
    # This version of the get_egs.sh script does the feature extraction and iVector
    # extraction in a single binary, reading the config, as part of the script.
    steps/online/nnet2/get_egs.sh --cmd "$train_cmd" --num-jobs-nnet 4 \
      data/train exp/tri3b_ali ${dir}_online ${dir}_combined
  fi
  
  if [ $stage -le 7 ]; then
    steps/nnet2/train_more.sh --learning-rate-factor 0.1 --cmd "$train_cmd" \
      --num-threads "$num_threads" \
      --minibatch-size "$minibatch_size" \
      --parallel-opts "$parallel_opts" \
       ${dir}_combined_init/final.mdl  ${dir}_combined/egs ${dir}_combined
  fi
  
  if [ $stage -le 8 ]; then
    # Create an online-decoding dir corresponding to what we just trained above.
    # If this setup used PLP features, we'd have to give the option --feature-type plp
    # to the script below.
    steps/online/nnet2/prepare_online_decoding.sh data/lang $srcdir/ivector_extractor \
      ${dir}_combined ${dir}_combined_online || exit 1;
  fi
  
  if [ $stage -le 9 ]; then
    # do the online decoding on top of the retrained _combined_online model, and
    # also the per-utterance version of the online decoding.
    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
      exp/tri3b/graph data/test ${dir}_combined_online/decode &
    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
      exp/tri3b/graph_ug data/test ${dir}_combined_online/decode_ug &
    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
      --per-utt true exp/tri3b/graph data/test ${dir}_combined_online/decode_per_utt &
    steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
      --per-utt true exp/tri3b/graph_ug data/test ${dir}_combined_online/decode_ug_per_utt || exit 1;
    wait
  fi
  
  
  
  exit 0;
  
  # Here are the results when we just retrain the last layer:
  # grep WER exp/nnet2_online_wsj/nnet_a_online/decode/wer_* | utils/best_wer.sh
  #%WER 1.60 [ 201 / 12533, 22 ins, 46 del, 133 sub ] exp/nnet2_online_wsj/nnet_a_online/decode/wer_3
  #a11:s5: grep WER exp/nnet2_online_wsj/nnet_a_online/decode_ug/wer_* | utils/best_wer.sh
  #%WER 8.02 [ 1005 / 12533, 74 ins, 155 del, 776 sub ] exp/nnet2_online_wsj/nnet_a_online/decode_ug/wer_6
  
  # and with per-utterance decoding:
  # %WER 8.47 [ 1061 / 12533, 88 ins, 157 del, 816 sub ] exp/nnet2_online_wsj/nnet_a_online/decode_ug_utt/wer_6
  # %WER 1.70 [ 213 / 12533, 24 ins, 46 del, 143 sub ] exp/nnet2_online_wsj/nnet_a_online/decode_utt/wer_3
  
  
  
  #, here when we retrain the whole thing:
  #%WER 1.42 [ 178 / 12533, 16 ins, 44 del, 118 sub ] exp/nnet2_online_wsj/nnet_a_combined_online/decode/wer_4
  #%WER 7.08 [ 887 / 12533, 74 ins, 133 del, 680 sub ] exp/nnet2_online_wsj/nnet_a_combined_online/decode_ug/wer_6
  
  # and the same with per-utterance decoding:
  # %WER 1.56 [ 196 / 12533, 31 ins, 26 del, 139 sub ] exp/nnet2_online_wsj/nnet_a_combined_online/decode_per_utt/wer_2
  # %WER 7.86 [ 985 / 12533, 59 ins, 171 del, 755 sub ] exp/nnet2_online_wsj/nnet_a_combined_online/decode_ug_per_utt/wer_8
  
  # And this is a suitable baseline: a system trained on RM only.
  #a11:s5: grep WER exp/nnet2_online/nnet_a_online/decode/wer_* | utils/best_wer.sh
  #%WER 2.20 [ 276 / 12533, 25 ins, 69 del, 182 sub ] exp/nnet2_online/nnet_a_online/decode/wer_8
  #a11:s5: grep WER exp/nnet2_online/nnet_a_online/decode_ug/wer_* | utils/best_wer.sh
  #%WER 10.14 [ 1271 / 12533, 127 ins, 198 del, 946 sub ] exp/nnet2_online/nnet_a_online/decode_ug/wer_11