Blame view
egs/rm/s5/local/online/run_nnet2_wsj.sh
7.97 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
#!/bin/bash # note: see the newer, better script run_nnet2_wsj_joint.sh # This script assumes you have previously run the WSJ example script including # the optional part local/online/run_online_decoding_nnet2.sh. It builds a # neural net for online decoding on top of the network we previously trained on # WSJ, by keeping everything but the last layer of that network and then # training just the last layer on our data. We then train the whole thing. stage=0 set -e train_stage=-10 use_gpu=true . ./cmd.sh . ./path.sh . ./utils/parse_options.sh if $use_gpu; then if ! cuda-compiled; then cat <<EOF && exit 1 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA If you want to use GPUs (and have them), go to src/, and configure and make on a machine where "nvcc" is installed. Otherwise, call this script with --use-gpu false EOF fi parallel_opts="--gpu 1" num_threads=1 minibatch_size=512 dir=exp/nnet2_online_wsj/nnet_a trainfeats=exp/nnet2_online_wsj/wsj_activations_train # later we'll change the script to download the trained model from kaldi-asr.org. srcdir=../../wsj/s5/exp/nnet2_online/nnet_a_gpu_online else # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be # almost the same, but this may be a little bit slow. num_threads=16 minibatch_size=128 parallel_opts="--num-threads $num_threads" dir=exp/nnet2_online_wsj/nnet_a trainfeats=exp/nnet2_online_wsj/wsj_activations_train srcdir=../../wsj/s5/exp/nnet2_online/nnet_a_online fi if [ $stage -le 0 ]; then echo "$0: dumping activations from WSJ model" if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $trainfeats/feats/storage ]; then # this shows how you can split the data across multiple file-systems; it's optional. date=$(date +'%m_%d_%H_%M') utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/rm-$date/s5/$trainfeats/feats/storage \ $trainfeats/feats/storage fi steps/online/nnet2/dump_nnet_activations.sh --cmd "$train_cmd" --nj 30 \ data/train $srcdir $trainfeats fi if [ $stage -le 1 ]; then echo "$0: training 0-hidden-layer model on top of WSJ activations" if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/rm-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage fi steps/nnet2/retrain_fast.sh --stage $train_stage \ --num-threads "$num_threads" \ --minibatch-size "$minibatch_size" \ --parallel-opts "$parallel_opts" \ --cmd "$decode_cmd" \ --num-jobs-nnet 4 \ --mix-up 4000 \ --initial-learning-rate 0.02 --final-learning-rate 0.004 \ $trainfeats/data data/lang exp/tri3b_ali $dir fi if [ $stage -le 2 ]; then echo "$0: formatting combined model for online decoding." steps/online/nnet2/prepare_online_decoding_retrain.sh $srcdir $dir ${dir}_online fi if [ $stage -le 3 ]; then # do online decoding with the combined model. steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ exp/tri3b/graph data/test ${dir}_online/decode & steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ exp/tri3b/graph_ug data/test ${dir}_online/decode_ug || exit 1; wait fi if [ $stage -le 4 ]; then # do online per-utterance decoding with the combined model. steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ --per-utt true \ exp/tri3b/graph data/test ${dir}_online/decode_utt & steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ --per-utt true \ exp/tri3b/graph_ug data/test ${dir}_online/decode_ug_utt || exit 1; wait fi ## From this point on we try something else: we try training all the layers of ## the model on this dataset. First we need to create a combined version of the ## model. if [ $stage -le 5 ]; then steps/nnet2/create_appended_model.sh $srcdir $dir ${dir}_combined_init # Set the learning rate in this initial value to our guess of a suitable value. # note: we initially tried 0.005, and this gave us WERs of (1.40, 1.48, 7.24, 7.70) vs. # (1.32, 1.38, 7.20, 7.44) with a learning rate of 0.01. initial_learning_rate=0.01 nnet-am-copy --learning-rate=$initial_learning_rate ${dir}_combined_init/final.mdl ${dir}_combined_init/final.mdl fi if [ $stage -le 6 ]; then if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then utils/create_split_dir.pl \ /export/b0{1,2,3,4}/$USER/kaldi-data/rm-$(date +'%m_%d_%H_%M')/s5/${dir}_combined/egs/storage \ $dir_combined/egs/storage fi # This version of the get_egs.sh script does the feature extraction and iVector # extraction in a single binary, reading the config, as part of the script. steps/online/nnet2/get_egs.sh --cmd "$train_cmd" --num-jobs-nnet 4 \ data/train exp/tri3b_ali ${dir}_online ${dir}_combined fi if [ $stage -le 7 ]; then steps/nnet2/train_more.sh --learning-rate-factor 0.1 --cmd "$train_cmd" \ --num-threads "$num_threads" \ --minibatch-size "$minibatch_size" \ --parallel-opts "$parallel_opts" \ ${dir}_combined_init/final.mdl ${dir}_combined/egs ${dir}_combined fi if [ $stage -le 8 ]; then # Create an online-decoding dir corresponding to what we just trained above. # If this setup used PLP features, we'd have to give the option --feature-type plp # to the script below. steps/online/nnet2/prepare_online_decoding.sh data/lang $srcdir/ivector_extractor \ ${dir}_combined ${dir}_combined_online || exit 1; fi if [ $stage -le 9 ]; then # do the online decoding on top of the retrained _combined_online model, and # also the per-utterance version of the online decoding. steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ exp/tri3b/graph data/test ${dir}_combined_online/decode & steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ exp/tri3b/graph_ug data/test ${dir}_combined_online/decode_ug & steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ --per-utt true exp/tri3b/graph data/test ${dir}_combined_online/decode_per_utt & steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \ --per-utt true exp/tri3b/graph_ug data/test ${dir}_combined_online/decode_ug_per_utt || exit 1; wait fi exit 0; # Here are the results when we just retrain the last layer: # grep WER exp/nnet2_online_wsj/nnet_a_online/decode/wer_* | utils/best_wer.sh #%WER 1.60 [ 201 / 12533, 22 ins, 46 del, 133 sub ] exp/nnet2_online_wsj/nnet_a_online/decode/wer_3 #a11:s5: grep WER exp/nnet2_online_wsj/nnet_a_online/decode_ug/wer_* | utils/best_wer.sh #%WER 8.02 [ 1005 / 12533, 74 ins, 155 del, 776 sub ] exp/nnet2_online_wsj/nnet_a_online/decode_ug/wer_6 # and with per-utterance decoding: # %WER 8.47 [ 1061 / 12533, 88 ins, 157 del, 816 sub ] exp/nnet2_online_wsj/nnet_a_online/decode_ug_utt/wer_6 # %WER 1.70 [ 213 / 12533, 24 ins, 46 del, 143 sub ] exp/nnet2_online_wsj/nnet_a_online/decode_utt/wer_3 #, here when we retrain the whole thing: #%WER 1.42 [ 178 / 12533, 16 ins, 44 del, 118 sub ] exp/nnet2_online_wsj/nnet_a_combined_online/decode/wer_4 #%WER 7.08 [ 887 / 12533, 74 ins, 133 del, 680 sub ] exp/nnet2_online_wsj/nnet_a_combined_online/decode_ug/wer_6 # and the same with per-utterance decoding: # %WER 1.56 [ 196 / 12533, 31 ins, 26 del, 139 sub ] exp/nnet2_online_wsj/nnet_a_combined_online/decode_per_utt/wer_2 # %WER 7.86 [ 985 / 12533, 59 ins, 171 del, 755 sub ] exp/nnet2_online_wsj/nnet_a_combined_online/decode_ug_per_utt/wer_8 # And this is a suitable baseline: a system trained on RM only. #a11:s5: grep WER exp/nnet2_online/nnet_a_online/decode/wer_* | utils/best_wer.sh #%WER 2.20 [ 276 / 12533, 25 ins, 69 del, 182 sub ] exp/nnet2_online/nnet_a_online/decode/wer_8 #a11:s5: grep WER exp/nnet2_online/nnet_a_online/decode_ug/wer_* | utils/best_wer.sh #%WER 10.14 [ 1271 / 12533, 127 ins, 198 del, 946 sub ] exp/nnet2_online/nnet_a_online/decode_ug/wer_11 |