Yannick Estève / ONTRAC-Kaldi

Blame view

egs/wsj/s5/local/online/run_nnet2_perturb_speed.sh 5.64 KB
  #!/bin/bash
  # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
  #           2014  Tom Ko
  # Apache 2.0
  
  # This example script demonstrates how speed perturbation of the data helps the nnet training.
  
  . ./cmd.sh
  . ./path.sh
  
  stage=-1
  train_stage=-10
  use_gpu=true
  nnet_dir=exp/nnet2_online_perturb
  
  if $use_gpu; then
    if ! cuda-compiled; then
      cat <<EOF && exit 1
  This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
  If you want to use GPUs (and have them), go to src/, and configure and make on a machine
  where "nvcc" is installed.  Otherwise, call this script with --use-gpu false
  EOF
    fi
    parallel_opts="--gpu 1"
    num_threads=1
    minibatch_size=512
    # the _a is in case I want to change the parameters.
    dir=$nnet_dir/nnet_a_gpu
  else
    # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
    # almost the same, but this may be a little bit slow.
    num_threads=16
    minibatch_size=128
    parallel_opts="--num-threads $num_threads"
    dir=$nnet_dir/nnet_a
  fi
  
  
  if [ $stage -le -1 ]; then
    utils/perturb_data_dir_speed.sh 0.9 data/train_si284 data/train_si284temp1
    utils/perturb_data_dir_speed.sh 1.0 data/train_si284 data/train_si284temp2
    utils/perturb_data_dir_speed.sh 1.1 data/train_si284 data/train_si284temp3
    utils/combine_data.sh data/train_si284p data/train_si284temp1 data/train_si284temp2 data/train_si284temp3
    rm -r data/train_si284temp1 data/train_si284temp2 data/train_si284temp3
  
    mfccdir=mfcc_perturbed
    for x in train_si284p; do
      steps/make_mfcc.sh --cmd "$train_cmd" --nj 20 \
        data/$x exp/make_mfcc/$x $mfccdir || exit 1;
      steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
    done
  fi
  
  if [ $stage -le 0 ]; then
    steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
      data/train_si284p data/lang exp/tri4b exp/tri4b_ali_si284p || exit 1;
  fi
  
  
  if [ $stage -le 1 ]; then
    mkdir -p $nnet_dir
    # To train a diagonal UBM we don't need very much data, so use just the si84 data.
    # the tri3b is the input dir; the choice of this is not critical as we just use
    # it for the LDA matrix.  Since the iVectors don't make a great deal of difference,
    # we'll use 256 Gaussians for speed.
    steps/online/nnet2/train_diag_ubm.sh --cmd "$train_cmd" --nj 30 --num-frames 200000 \
      data/train_si84 256 exp/tri3b $nnet_dir/diag_ubm
  fi
  
  if [ $stage -le 2 ]; then
    # even though $nj is just 10, each job uses multiple processes and threads.
    steps/online/nnet2/train_ivector_extractor.sh --cmd "$train_cmd" --nj 10 \
      data/train_si284p $nnet_dir/diag_ubm $nnet_dir/extractor || exit 1;
  fi
  
  if [ $stage -le 3 ]; then
     steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 30 \
      data/train_si284p $nnet_dir/extractor $nnet_dir/ivectors_train_si284p || exit 1;
  fi
  
  if [ $stage -le 4 ]; then
    steps/nnet2/train_pnorm_simple2.sh --stage $train_stage \
      --online-ivector-dir $nnet_dir/ivectors_train_si284p \
      --num-epochs 4 \
      --splice-width 7 --feat-type raw \
      --cmvn-opts "--norm-means=false --norm-vars=false" \
      --num-threads "$num_threads" \
      --minibatch-size "$minibatch_size" \
      --parallel-opts "$parallel_opts" \
      --num-jobs-nnet 6 \
      --num-hidden-layers 4 \
      --mix-up 4000 \
      --initial-learning-rate 0.02 --final-learning-rate 0.004 \
      --cmd "$decode_cmd" \
      --pnorm-input-dim 2400 \
      --pnorm-output-dim 300 \
      date/train_si284p data/lang exp/tri4b_ali_si284p $dir  || exit 1;
  fi
  
  if [ $stage -le 5 ]; then
    for data in test_eval92 test_dev93 test_eval93; do
      steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 8 \
        data/${data} $nnet_dir/extractor $nnet_dir/ivectors_${data} || exit 1;
    done
  fi
  
  if [ $stage -le 6 ]; then
    # this does offline decoding that should give the same results as the real
    # online decoding.
    for lm_suffix in tgpr bd_tgpr; do
      graph_dir=exp/tri4b/graph_${lm_suffix}
      # use already-built graphs.
      for year in eval92 eval93 dev93; do
        steps/nnet2/decode.sh --nj 8 --cmd "$decode_cmd" \
          --online-ivector-dir $nnet_dir/ivectors_test_$year \
          $graph_dir data/test_$year $dir/decode_${lm_suffix}_${year} || exit 1;
      done
    done
  fi
  
  
  
  # Here are the results.
  
  # First, this is the baseline.
  # This is obtained from running the offline decoding in run_nnet2.sh which calls steps/nnet2/train_pnorm_simple2.sh
  
  # %WER 7.91 [ 651 / 8234, 79 ins, 102 del, 470 sub ] exp/nnet2_online/nnet_a_gpu/decode_bd_tgpr_dev93/wer_11
  # %WER 4.29 [ 242 / 5643, 38 ins, 9 del, 195 sub ] exp/nnet2_online/nnet_a_gpu/decode_bd_tgpr_eval92/wer_9
  # %WER 6.87 [ 237 / 3448, 21 ins, 45 del, 171 sub ] exp/nnet2_online/nnet_a_gpu/decode_bd_tgpr_eval93/wer_10
  # %WER 10.19 [ 839 / 8234, 177 ins, 96 del, 566 sub ] exp/nnet2_online/nnet_a_gpu/decode_tgpr_dev93/wer_12
  # %WER 6.79 [ 383 / 5643, 101 ins, 13 del, 269 sub ] exp/nnet2_online/nnet_a_gpu/decode_tgpr_eval92/wer_10
  # %WER 8.64 [ 298 / 3448, 38 ins, 41 del, 219 sub ] exp/nnet2_online/nnet_a_gpu/decode_tgpr_eval93/wer_11
  
  # Then this is the result obtained from this script.
  
  # %WER 7.30 [ 601 / 8234, 64 ins, 102 del, 435 sub ] exp/nnet2_online_perturb/nnet_a_gpu/decode_bd_tgpr_dev93/wer_13
  # %WER 4.15 [ 234 / 5643, 39 ins, 11 del, 184 sub ] exp/nnet2_online_perturb/nnet_a_gpu/decode_bd_tgpr_eval92/wer_9
  # %WER 6.41 [ 221 / 3448, 15 ins, 39 del, 167 sub ] exp/nnet2_online_perturb/nnet_a_gpu/decode_bd_tgpr_eval93/wer_11
  # %WER 9.85 [ 811 / 8234, 187 ins, 72 del, 552 sub ] exp/nnet2_online_perturb/nnet_a_gpu/decode_tgpr_dev93/wer_10
  # %WER 6.63 [ 374 / 5643, 88 ins, 16 del, 270 sub ] exp/nnet2_online_perturb/nnet_a_gpu/decode_tgpr_eval92/wer_13
  # %WER 8.06 [ 278 / 3448, 42 ins, 32 del, 204 sub ] exp/nnet2_online_perturb/nnet_a_gpu/decode_tgpr_eval93/wer_10