Blame view
egs/rm/s5/local/online/run_nnet2.sh
5.73 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
#!/bin/bash . ./cmd.sh stage=1 train_stage=-10 use_gpu=true dir=exp/nnet2_online/nnet_a . ./cmd.sh . ./path.sh . ./utils/parse_options.sh if $use_gpu; then if ! cuda-compiled; then cat <<EOF && exit 1 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA If you want to use GPUs (and have them), go to src/, and configure and make on a machine where "nvcc" is installed. Otherwise, call this script with --use-gpu false EOF fi parallel_opts="--gpu 1" num_threads=1 minibatch_size=512 else # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be # almost the same, but this may be a little bit slow. num_threads=16 minibatch_size=128 parallel_opts="--num-threads $num_threads" fi # stages 1 through 3 run in run_nnet2_common.sh. local/online/run_nnet2_common.sh --stage $stage || exit 1; if [ $stage -le 4 ]; then steps/nnet2/train_pnorm_simple2.sh --stage $train_stage \ --splice-width 7 \ --feat-type raw \ --online-ivector-dir exp/nnet2_online/ivectors \ --cmvn-opts "--norm-means=false --norm-vars=false" \ --num-threads "$num_threads" \ --minibatch-size "$minibatch_size" \ --parallel-opts "$parallel_opts" \ --num-jobs-nnet 4 \ --num-epochs 25 \ --add-layers-period 1 \ --num-hidden-layers 2 \ --mix-up 4000 \ --initial-learning-rate 0.02 --final-learning-rate 0.004 \ --cmd "$decode_cmd" \ --pnorm-input-dim 1000 \ --pnorm-output-dim 200 \ data/train data/lang exp/tri3b_ali $dir || exit 1; fi if [ $stage -le 5 ]; then steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 4 \ data/test exp/nnet2_online/extractor exp/nnet2_online/ivectors_test || exit 1; fi if [ $stage -le 6 ]; then # Note: comparing the results of this with run_online_decoding_nnet2_baseline.sh, # it's a bit worse, meaning the iVectors seem to hurt at this amount of data. # However, experiments by Haihua Xu (not checked in yet) on WSJ, show it helping # nicely. This setup seems to have too little data for it to work, but it suffices # to demonstrate the scripts. We will likely modify it to add noise to the # iVectors in training, which will tend to mitigate the over-training. steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ --online-ivector-dir exp/nnet2_online/ivectors_test \ exp/tri3b/graph data/test $dir/decode & steps/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ --online-ivector-dir exp/nnet2_online/ivectors_test \ exp/tri3b/graph_ug data/test $dir/decode_ug || exit 1; wait fi if [ $stage -le 7 ]; then # If this setup used PLP features, we'd have to give the option --feature-type plp # to the script below. steps/online/nnet2/prepare_online_decoding.sh data/lang exp/nnet2_online/extractor \ "$dir" ${dir}_online || exit 1; fi if [ $stage -le 8 ]; then # do the actual online decoding with iVectors. steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ exp/tri3b/graph data/test ${dir}_online/decode & steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ exp/tri3b/graph_ug data/test ${dir}_online/decode_ug || exit 1; wait fi if [ $stage -le 9 ]; then # this version of the decoding treats each utterance separately # without carrying forward speaker information. steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ --per-utt true \ exp/tri3b/graph data/test ${dir}_online/decode_per_utt & steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ --per-utt true \ exp/tri3b/graph_ug data/test ${dir}_online/decode_ug_per_utt || exit 1; wait fi exit 0; # the experiment (with GPU) #for x in exp/nnet2_online/nnet_a/decode*; do grep WER $x/wer_* | utils/best_wer.sh; done %WER 2.20 [ 276 / 12533, 37 ins, 61 del, 178 sub ] exp/nnet2_online/nnet_a/decode/wer_5 %WER 10.22 [ 1281 / 12533, 143 ins, 193 del, 945 sub ] exp/nnet2_online/nnet_a/decode_ug/wer_10 # This is the baseline with spliced non-CMVN cepstra and no iVector input. # The difference is pretty small on RM; I expect it to be more clear-cut on larger corpora. %WER 2.30 [ 288 / 12533, 35 ins, 57 del, 196 sub ] exp/nnet2_online/nnet_gpu_baseline/decode/wer_5 %WER 10.98 [ 1376 / 12533, 121 ins, 227 del, 1028 sub ] exp/nnet2_online/nnet_gpu_baseline/decode_ug/wer_10 # and this is the same (baseline) using truly-online decoding; it probably only differs because # of slight decoding-parameter differences. %WER 2.31 [ 290 / 12533, 34 ins, 57 del, 199 sub ] exp/nnet2_online/nnet_gpu_baseline_online/decode/wer_5 %WER 10.93 [ 1370 / 12533, 142 ins, 202 del, 1026 sub ] exp/nnet2_online/nnet_gpu_baseline_online/decode_ug/wer_9 # This is the online decoding. # This truly-online per-utterance decoding gives essentially the same WER as the offline decoding, which is # as we expect as the features and decoding parameters are the same. # for x in exp/nnet2_online/nnet_gpu_online/decode*utt; do grep WER $x/wer_* | utils/best_wer.sh; done %WER 2.28 [ 286 / 12533, 66 ins, 39 del, 181 sub ] exp/nnet2_online/nnet_a_online/decode_per_utt/wer_2 %WER 10.45 [ 1310 / 12533, 106 ins, 241 del, 963 sub ] exp/nnet2_online/nnet_a_online/decode_ug_per_utt/wer_12 # The following are online decoding, as above, but using previous utterances of # the same speaker to refine the adaptation state. It doesn't make much difference. # for x in exp/nnet2_online/nnet_gpu_online/decode*; do grep WER $x/wer_* | utils/best_wer.sh; done | grep -v utt %WER 2.27 [ 285 / 12533, 42 ins, 62 del, 181 sub ] exp/nnet2_online/nnet_a_online/decode/wer_5 %WER 10.26 [ 1286 / 12533, 140 ins, 188 del, 958 sub ] exp/nnet2_online/nnet_a_online/decode_ug/wer_10 |