#!/bin/bash # this RESULTS file was obtained by Dan Povey in Feb 2017, after # a rewrite of the run.sh file. # To see results from the scripts local/nnet3/ and local/chain/, # look at the top of those files, we don't put those in the # RESULTS file. for dir in exp/*; do steps/info/gmm_dir_info.pl $dir for x in $dir/decode*dev93* $dir/decode*eval92*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done done exit 0 # Use caution when comparing these results with other published results. # We use the "20k open" test condition, also known as the "60k vocabulary" # test condition, in which test utterances are not excluded even if they # contain words not in the language model. This is the hardest test condition, # and most published results are in the easier 5k and 20k-closed conditions, # in which we only test on utterances that are in either a 5k or 20k subset # of the vocabulary. # monophone, deltas, trained on the 2k shortest utterances from the si84 data. exp/mono0a: nj=10 align prob=-95.82 over 2.36h [retry=0.4%, fail=0.0%] states=132 gauss=973 %WER 34.33 [ 2827 / 8234, 266 ins, 457 del, 2104 sub ] exp/mono0a/decode_nosp_tgpr_dev93/wer_10_0.0 %WER 25.13 [ 1418 / 5643, 138 ins, 192 del, 1088 sub ] exp/mono0a/decode_nosp_tgpr_eval92/wer_10_0.0 # first triphone build. Built on half of SI-84. exp/tri1: nj=10 align prob=-93.75 over 7.38h [retry=0.4%, fail=0.0%] states=1567 gauss=10025 tree-impr=5.06 %WER 19.40 [ 1597 / 8234, 247 ins, 199 del, 1151 sub ] exp/tri1/decode_nosp_tgpr_dev93/wer_14_0.5 %WER 12.76 [ 720 / 5643, 110 ins, 89 del, 521 sub ] exp/tri1/decode_nosp_tgpr_eval92/wer_14_1.0 # the above, rescored with full trigram model [not pruned.] Note: the tg{1,2,3,4,5} are # different rescoring methods. They all give about the same results. Note: 3 and 4 give # the "correct" LM scores. %WER 18.23 [ 1501 / 8234, 245 ins, 181 del, 1075 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg1/wer_15_0.5 %WER 18.23 [ 1501 / 8234, 245 ins, 181 del, 1075 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg2/wer_15_0.5 %WER 18.16 [ 1495 / 8234, 268 ins, 153 del, 1074 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg3/wer_16_0.0 %WER 18.18 [ 1497 / 8234, 268 ins, 154 del, 1075 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg4/wer_16_0.0 %WER 18.16 [ 1495 / 8234, 220 ins, 202 del, 1073 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg5/wer_15_1.0 # tri2b is an LDA+MLLT system trained on SI-84 exp/tri2b: nj=10 align prob=-47.22 over 15.10h [retry=0.7%, fail=0.0%] states=2005 gauss=15036 tree-impr=5.45 lda-sum=26.20 mllt:impr,logdet=1.34,1.97 %WER 16.37 [ 1348 / 8234, 241 ins, 157 del, 950 sub ] exp/tri2b/decode_nosp_tgpr_dev93/wer_17_0.0 %WER 10.53 [ 594 / 5643, 110 ins, 60 del, 424 sub ] exp/tri2b/decode_nosp_tgpr_eval92/wer_17_0.5 # tri3b is an LDA+MLLT+SAT system trained on all of SI-284 exp/tri3b: nj=10 align prob=-44.30 over 81.23h [retry=0.8%, fail=0.1%] states=3362 gauss=40061 fmllr-impr=3.70 over 59.77h tree-impr=7.86 %WER 15.56 [ 1281 / 8234, 220 ins, 140 del, 921 sub ] exp/tri3b/decode_nosp_tgpr_dev93.si/wer_17_0.5 %WER 12.82 [ 1056 / 8234, 135 ins, 147 del, 774 sub ] exp/tri3b/decode_nosp_bd_tgpr_dev93.si/wer_15_0.0 %WER 9.24 [ 761 / 8234, 89 ins, 109 del, 563 sub ] exp/tri3b/decode_nosp_bd_tgpr_dev93/wer_16_0.0 %WER 11.53 [ 949 / 8234, 179 ins, 94 del, 676 sub ] exp/tri3b/decode_nosp_tgpr_dev93/wer_15_0.5 %WER 10.94 [ 901 / 8234, 181 ins, 82 del, 638 sub ] exp/tri3b/decode_nosp_tg_dev93/wer_14_0.5 %WER 8.16 [ 672 / 8234, 94 ins, 94 del, 484 sub ] exp/tri3b/decode_nosp_bd_tgpr_dev93_fg/wer_17_0.0 %WER 10.95 [ 618 / 5643, 148 ins, 36 del, 434 sub ] exp/tri3b/decode_nosp_tgpr_eval92.si/wer_14_0.0 %WER 8.19 [ 462 / 5643, 77 ins, 51 del, 334 sub ] exp/tri3b/decode_nosp_bd_tgpr_eval92.si/wer_16_0.0 %WER 5.55 [ 313 / 5643, 35 ins, 45 del, 233 sub ] exp/tri3b/decode_nosp_bd_tgpr_eval92/wer_17_1.0 %WER 4.89 [ 276 / 5643, 47 ins, 28 del, 201 sub ] exp/tri3b/decode_nosp_bd_tgpr_eval92_fg/wer_15_0.5 %WER 7.53 [ 425 / 5643, 112 ins, 20 del, 293 sub ] exp/tri3b/decode_nosp_tg_eval92/wer_17_0.0 %WER 8.15 [ 460 / 5643, 113 ins, 30 del, 317 sub ] exp/tri3b/decode_nosp_tgpr_eval92/wer_14_1.0 # tri4b is an LDA+MLLT+SAT system after estimating pronunciation probabilities # and word-and-pronunciation-dependent silence probabilities. exp/tri4b: nj=10 align prob=-44.46 over 81.23h [retry=0.6%, fail=0.1%] states=3413 gauss=40059 fmllr-impr=0.17 over 60.20h tree-impr=8.70 %WER 15.16 [ 1248 / 8234, 253 ins, 96 del, 899 sub ] exp/tri4b/decode_tgpr_dev93.si/wer_17_0.0 %WER 12.62 [ 1039 / 8234, 141 ins, 124 del, 774 sub ] exp/tri4b/decode_bd_tgpr_dev93.si/wer_17_0.0 %WER 9.01 [ 742 / 8234, 106 ins, 97 del, 539 sub ] exp/tri4b/decode_bd_tgpr_dev93/wer_16_0.0 %WER 8.25 [ 679 / 8234, 94 ins, 100 del, 485 sub ] exp/tri4b/decode_bd_tgpr_dev93_fg/wer_17_0.5 %WER 10.92 [ 899 / 8234, 186 ins, 92 del, 621 sub ] exp/tri4b/decode_tg_dev93/wer_17_0.5 %WER 11.44 [ 942 / 8234, 203 ins, 87 del, 652 sub ] exp/tri4b/decode_tgpr_dev93/wer_14_0.5 %WER 10.93 [ 617 / 5643, 147 ins, 33 del, 437 sub ] exp/tri4b/decode_tgpr_eval92.si/wer_14_1.0 %WER 8.74 [ 493 / 5643, 104 ins, 34 del, 355 sub ] exp/tri4b/decode_bd_tgpr_eval92.si/wer_15_0.0 %WER 5.69 [ 321 / 5643, 50 ins, 34 del, 237 sub ] exp/tri4b/decode_bd_tgpr_eval92/wer_17_0.5 %WER 4.71 [ 266 / 5643, 40 ins, 27 del, 199 sub ] exp/tri4b/decode_bd_tgpr_eval92_fg/wer_17_1.0 %WER 7.39 [ 417 / 5643, 107 ins, 24 del, 286 sub ] exp/tri4b/decode_tg_eval92/wer_16_1.0 %WER 7.90 [ 446 / 5643, 111 ins, 27 del, 308 sub ] exp/tri4b/decode_tgpr_eval92/wer_15_1.0 ###################################### ## Results below this point were mostly obtained in 2013 by Hainan Xu, ## They are from parts of the script that are now not run by default in the run.sh. ## you can look in the git history to figure out when these results were added. %WER 7.99 [ 658 / 8234, 72 ins, 95 del, 491 sub ] exp/tri4b_fmmi_a/decode_bd_tgpr_dev93_it8/wer_12 %WER 11.15 [ 918 / 8234, 180 ins, 81 del, 657 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it3/wer_15 %WER 11.23 [ 925 / 8234, 201 ins, 77 del, 647 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it4/wer_12 %WER 10.64 [ 876 / 8234, 180 ins, 80 del, 616 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it5/wer_13 %WER 10.43 [ 859 / 8234, 174 ins, 76 del, 609 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it6/wer_12 %WER 10.42 [ 858 / 8234, 178 ins, 70 del, 610 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it7/wer_11 %WER 10.41 [ 857 / 8234, 179 ins, 66 del, 612 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it8/wer_10 %WER 4.09 [ 231 / 5643, 40 ins, 12 del, 179 sub ] exp/tri4b_fmmi_a/decode_tgpr_eval92_it8/wer_11 %WER 10.61 [ 874 / 8234, 188 ins, 75 del, 611 sub ] exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it3/wer_13 %WER 10.44 [ 860 / 8234, 183 ins, 79 del, 598 sub ] exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it4/wer_13 %WER 10.27 [ 846 / 8234, 180 ins, 73 del, 593 sub ] exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it5/wer_12 %WER 10.27 [ 846 / 8234, 174 ins, 72 del, 600 sub ] exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it6/wer_12 %WER 10.06 [ 828 / 8234, 162 ins, 80 del, 586 sub ] exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it7/wer_13 %WER 10.08 [ 830 / 8234, 158 ins, 84 del, 588 sub ] exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it8/wer_13 %WER 10.77 [ 887 / 8234, 194 ins, 72 del, 621 sub ] exp/tri4b_mmi_b0.1/decode_tgpr_dev93/wer_12 %WER 12.27 [ 1010 / 8234, 188 ins, 104 del, 718 sub ] exp/sgmm2_5a/decode_tgpr_dev93/wer_14 %WER 11.87 [ 977 / 8234, 201 ins, 75 del, 701 sub ] exp/sgmm2_5a_mmi_b0.1/decode_tgpr_dev93_it1/wer_11 %WER 11.84 [ 975 / 8234, 195 ins, 81 del, 699 sub ] exp/sgmm2_5a_mmi_b0.1/decode_tgpr_dev93_it2/wer_13 %WER 11.67 [ 961 / 8234, 196 ins, 77 del, 688 sub ] exp/sgmm2_5a_mmi_b0.1/decode_tgpr_dev93_it3/wer_13 %WER 11.78 [ 970 / 8234, 190 ins, 82 del, 698 sub ] exp/sgmm2_5a_mmi_b0.1/decode_tgpr_dev93_it4/wer_14 %WER 11.87 [ 977 / 8234, 201 ins, 75 del, 701 sub ] exp/sgmm2_5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it1/wer_11 %WER 11.85 [ 976 / 8234, 195 ins, 81 del, 700 sub ] exp/sgmm2_5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it2/wer_13 %WER 11.67 [ 961 / 8234, 196 ins, 77 del, 688 sub ] exp/sgmm2_5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it3/wer_13 %WER 11.78 [ 970 / 8234, 190 ins, 82 del, 698 sub ] exp/sgmm2_5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it4/wer_14 %WER 8.23 [ 678 / 8234, 87 ins, 103 del, 488 sub ] exp/sgmm2_5b/decode_bd_tgpr_dev93/wer_12 %WER 4.29 [ 242 / 5643, 37 ins, 18 del, 187 sub ] exp/sgmm2_5b/decode_bd_tgpr_eval92/wer_12 %WER 10.88 [ 896 / 8234, 195 ins, 82 del, 619 sub ] exp/sgmm2_5b/decode_tgpr_dev93/wer_12 %WER 6.86 [ 387 / 5643, 97 ins, 18 del, 272 sub ] exp/sgmm2_5b/decode_tgpr_eval92/wer_13 %WER 3.93 [ 222 / 5643, 36 ins, 14 del, 172 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it1/wer_11 %WER 3.77 [ 213 / 5643, 33 ins, 12 del, 168 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it2/wer_12 %WER 3.62 [ 204 / 5643, 35 ins, 10 del, 159 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3/wer_10 %WER 3.69 [ 208 / 5643, 33 ins, 11 del, 164 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3.mbr/wer_11 %WER 3.51 [ 198 / 5643, 34 ins, 9 del, 155 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it4/wer_10 %WER 7.83 [ 645 / 8234, 83 ins, 95 del, 467 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_dev93_it1/wer_12 %WER 7.63 [ 628 / 8234, 76 ins, 99 del, 453 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_dev93_it2/wer_14 %WER 7.52 [ 619 / 8234, 86 ins, 88 del, 445 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_dev93_it3/wer_11 %WER 7.41 [ 610 / 8234, 76 ins, 93 del, 441 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_dev93_it4/wer_13 %WER 3.92 [ 221 / 5643, 36 ins, 14 del, 171 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_eval92_it1/wer_11 %WER 3.72 [ 210 / 5643, 32 ins, 12 del, 166 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_eval92_it2/wer_13 %WER 3.67 [ 207 / 5643, 33 ins, 10 del, 164 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_eval92_it3/wer_12 %WER 3.60 [ 203 / 5643, 35 ins, 10 del, 158 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_eval92_it4/wer_11 # regular SGMM (only ran the basic one, not discriminatively trained, although the # scripts are there.) # Rescored with quinphone. # not updated # DNN on fMLLR features (Karel's setup, [7.8.2015]). # frame cross-entropy training %WER 6.05 [ 498 / 8234, 59 ins, 67 del, 372 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_bd_tgpr_dev93/wer_11_0.0 %WER 3.69 [ 208 / 5643, 19 ins, 19 del, 170 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_bd_tgpr_eval92/wer_11_1.0 # sMBR training (5 iteration, no lattice-regeneration), %WER 5.60 [ 461 / 8234, 58 ins, 62 del, 341 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_bd_tgpr_dev93_it4/wer_11_0.0 %WER 3.35 [ 189 / 5643, 18 ins, 14 del, 157 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_bd_tgpr_eval92_it5/wer_13_1.0 #DNN results with cpu based setup %WER 7.21 [ 594 / 8234, 64 ins, 98 del, 432 sub ] exp/nnet5c1/decode_bd_tgpr_dev93/wer_14 #==== Below are some DNN results from an older version of the RESULTS file, # Dan's cpu-based neural net recipe. Note: the best number for dev93 is 7.10, an SGMM+MMI system, # and for eval92 is 3.79, the same system. (On this setup, discriminative training helped a lot, # which seems to be the reason we can't beat the SGMM+MMI numbers here.) exp/nnet5c1/decode_bd_tgpr_dev93/wer_14:%WER 7.32 [ 603 / 8234, 61 ins, 101 del, 441 sub ] exp/nnet5c1/decode_bd_tgpr_eval92/wer_14:%WER 4.39 [ 248 / 5643, 32 ins, 17 del, 199 sub ] # Note: my 4.39% result is worse than Karel's 3.56%. # some GPU-based neural network training results... # Below is the recipe with multiple VTLN warps and mel-filterbank inputs.. %WER 7.24 [ 596 / 8234, 62 ins, 98 del, 436 sub ] exp/nnet5b_gpu/decode_bd_tgpr_dev93/wer_15 %WER 3.95 [ 223 / 5643, 30 ins, 18 del, 175 sub ] exp/nnet5b_gpu/decode_bd_tgpr_eval92/wer_16 # 5c is GPU tanh recipe %WER 7.08 [ 583 / 8234, 64 ins, 94 del, 425 sub ] exp/nnet5c/decode_bd_tgpr_dev93/wer_14 %WER 4.02 [ 227 / 5643, 32 ins, 16 del, 179 sub ] exp/nnet5c/decode_bd_tgpr_eval92/wer_13 # 5c_gpu (the same, run on GPU) # note, for 5c and 5c_gpu, we could get better results by using only 4 # GPU jobs and half the learning rate, but of course it would take twice longer. %WER 7.29 [ 600 / 8234, 60 ins, 99 del, 441 sub ] exp/nnet5c_gpu/decode_bd_tgpr_dev93/wer_14 %WER 4.08 [ 230 / 5643, 34 ins, 15 del, 181 sub ] exp/nnet5c_gpu/decode_bd_tgpr_eval92/wer_13 # 5d is the pnorm recipe, with 4 jobs; compare with nnet5c. %WER 6.97 [ 574 / 8234, 72 ins, 84 del, 418 sub ] exp/nnet5d/decode_bd_tgpr_dev93/wer_12 %WER 3.86 [ 218 / 5643, 27 ins, 12 del, 179 sub ] exp/nnet5d/decode_bd_tgpr_eval92/wer_13 (the same without the big dictionary) %WER 9.40 [ 774 / 8234, 164 ins, 71 del, 539 sub ] exp/nnet5d/decode_tgpr_dev93/wer_12 %WER 6.45 [ 364 / 5643, 81 ins, 19 del, 264 sub ] exp/nnet5d/decode_tgpr_eval92/wer_14 # 5d_gpu is the pnorm recipe with GPU; %WER 7.07 [ 582 / 8234, 62 ins, 94 del, 426 sub ] exp/nnet5d_gpu/decode_bd_tgpr_dev93/wer_13 %WER 4.06 [ 229 / 5643, 31 ins, 13 del, 185 sub ] exp/nnet5d_gpu/decode_bd_tgpr_eval92/wer_12 (the same without the big dictionary)_13 %WER 9.35 [ 770 / 8234, 161 ins, 78 del, 531 sub ] exp/nnet5d_gpu/decode_tgpr_dev93/wer_12 %WER 6.59 [ 372 / 5643, 91 ins, 15 del, 266 sub ] exp/nnet5d_gpu/decode_tgpr_eval92/wer_12 %WER 7.13 [ 587 / 8234, 72 ins, 93 del, 422 sub ] exp/nnet5d_gpu/decode_bd_tgpr_dev93/wer_13 %WER 4.06 [ 229 / 5643, 31 ins, 16 del, 182 sub ] exp/nnet5d_gpu/decode_bd_tgpr_eval92/wer_14 # 5e is GPU version of ensemble training of pnorm nnets recipe, with 4 jobs; compare with nnet5d. %WER 7.19 [ 592 / 8234, 72 ins, 89 del, 431 sub ] exp/nnet5e_gpu/decode_bd_tgpr_dev93/wer_10 %WER 3.97 [ 224 / 5643, 25 ins, 16 del, 183 sub ] exp/nnet5e_gpu/decode_bd_tgpr_eval92/wer_14 # decoded with tgpr LM. %WER 9.55 [ 786 / 8234, 163 ins, 83 del, 540 sub ] exp/nnet5d_gpu/decode_tgpr_dev93/wer_13 %WER 6.50 [ 367 / 5643, 95 ins, 16 del, 256 sub ] exp/nnet5d_gpu/decode_tgpr_eval92/wer_14 # for results with VTLN, see for example local/run_vtln2.sh, they are at the end. # Online-nnet2 results: for x in exp/nnet2_online/nnet_ms_a_online/decode_*; do grep WER $x/wer_* | utils/best_wer.sh ; done %WER 7.02 [ 578 / 8234, 85 ins, 88 del, 405 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93/wer_10 %WER 6.56 [ 540 / 8234, 86 ins, 79 del, 375 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_smbr_epoch1/wer_11 %WER 6.51 [ 536 / 8234, 84 ins, 77 del, 375 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_smbr_epoch2/wer_12 %WER 6.42 [ 529 / 8234, 90 ins, 72 del, 367 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_smbr_epoch3/wer_12 %WER 6.40 [ 527 / 8234, 91 ins, 71 del, 365 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_smbr_epoch4/wer_12 ** %WER 7.23 [ 595 / 8234, 77 ins, 96 del, 422 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_utt/wer_10 %WER 6.95 [ 572 / 8234, 70 ins, 90 del, 412 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_utt_offline/wer_11 %WER 3.93 [ 222 / 5643, 32 ins, 12 del, 178 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92/wer_10 %WER 3.76 [ 212 / 5643, 27 ins, 11 del, 174 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_smbr_epoch1/wer_11 %WER 3.60 [ 203 / 5643, 27 ins, 9 del, 167 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_smbr_epoch2/wer_11 ** %WER 3.62 [ 204 / 5643, 28 ins, 7 del, 169 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_smbr_epoch3/wer_11 %WER 3.72 [ 210 / 5643, 32 ins, 7 del, 171 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_smbr_epoch4/wer_11 %WER 4.02 [ 227 / 5643, 32 ins, 14 del, 181 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_utt/wer_9 %WER 3.99 [ 225 / 5643, 29 ins, 13 del, 183 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_utt_offline/wer_10 %WER 9.45 [ 778 / 8234, 180 ins, 73 del, 525 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_dev93/wer_10 %WER 9.39 [ 773 / 8234, 163 ins, 79 del, 531 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_dev93_utt/wer_11 %WER 9.25 [ 762 / 8234, 174 ins, 69 del, 519 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_dev93_utt_offline/wer_10 %WER 6.57 [ 371 / 5643, 95 ins, 12 del, 264 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_eval92/wer_11 %WER 6.68 [ 377 / 5643, 102 ins, 13 del, 262 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_eval92_utt/wer_10 %WER 6.56 [ 370 / 5643, 100 ins, 12 del, 258 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_eval92_utt_offline/wer_10 # RNNLM n-best rescoring with Mikolov's model: for x in exp/nnet2_online/nnet_ms_a_online/decode_*rnnlm.h300.voc40k; do grep WER $x/wer_* | utils/best_wer.sh ; done %WER 5.60 [ 461 / 8234, 51 ins, 70 del, 340 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_rnnlm.h300.voc40k/wer_15_0.0 %WER 2.64 [ 149 / 5643, 21 ins, 13 del, 115 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_rnnlm.h300.voc40k/wer_11_0.5 %WER 8.16 [ 672 / 8234, 136 ins, 70 del, 466 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_dev93_rnnlm.h300.voc40k/wer_14_0.5 %WER 5.39 [ 304 / 5643, 74 ins, 16 del, 214 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_eval92_rnnlm.h300.voc40k/wer_17_0.5 # RNNLM lattice rescoring with Mikolov's model: for x in exp/nnet2_online/nnet_ms_a_online/decode_*rnnlm.h300.voc40k_lat; do grep WER $x/wer_* | utils/best_wer.sh ; done %WER 5.05 [ 416 / 8234, 47 ins, 72 del, 297 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_rnnlm.h300.voc40k_lat/wer_16_0.0 %WER 2.59 [ 146 / 5643, 19 ins, 14 del, 113 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_rnnlm.h300.voc40k_lat/wer_10_0.5 %WER 7.70 [ 634 / 8234, 133 ins, 67 del, 434 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_dev93_rnnlm.h300.voc40k_lat/wer_13_0.5 %WER 5.25 [ 296 / 5643, 81 ins, 14 del, 201 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_eval92_rnnlm.h300.voc40k_lat/wer_14_0.5 # RNNLM n-best rescoring with Yandex's model: for x in exp/nnet2_online/nnet_ms_a_online/decode_*rnnlm-hs.nce20.h400.voc40k; do grep WER $x/wer_* | utils/best_wer.sh ; done %WER 5.31 [ 437 / 8234, 50 ins, 66 del, 321 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_rnnlm-hs.nce20.h400.voc40k/wer_13_0.0 %WER 2.91 [ 164 / 5643, 24 ins, 9 del, 131 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_rnnlm-hs.nce20.h400.voc40k/wer_10_0.0 %WER 7.83 [ 645 / 8234, 159 ins, 50 del, 436 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_dev93_rnnlm-hs.nce20.h400.voc40k/wer_11_0.0 %WER 5.40 [ 305 / 5643, 77 ins, 16 del, 212 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_eval92_rnnlm-hs.nce20.h400.voc40k/wer_13_1.0 # TDNN results: for x in exp/nnet3/nnet_tdnn_a/decode_*; do grep WER $x/wer_* | utils/best_wer.sh ; done %WER 7.19 [ 592 / 8234, 51 ins, 109 del, 432 sub ] exp/nnet3/nnet_tdnn_a/decode_bd_tgpr_dev93/wer_13_0.5 %WER 3.93 [ 222 / 5643, 23 ins, 20 del, 179 sub ] exp/nnet3/nnet_tdnn_a/decode_bd_tgpr_eval92/wer_10_1.0 %WER 9.78 [ 805 / 8234, 167 ins, 72 del, 566 sub ] exp/nnet3/nnet_tdnn_a/decode_tgpr_dev93/wer_10_0.0 %WER 6.40 [ 361 / 5643, 87 ins, 16 del, 258 sub ] exp/nnet3/nnet_tdnn_a/decode_tgpr_eval92/wer_10_1.0 # local/nnet3/run_lstm.sh # LSTM results: cell_dim=1024, recurrent_projection_dim=non_recurrent_projection_dim=256,lstm_delay=-1 -2 -3, label_delay=5, num_params=11894059 %WER 7.32 exp/nnet3/lstm_ld5/decode_bd_tgpr_dev93/wer_11_0.0 %WER 4.24 exp/nnet3/lstm_ld5/decode_bd_tgpr_eval92/wer_10_1.0 %WER 9.57 exp/nnet3/lstm_ld5/decode_tgpr_dev93/wer_9_1.0 %WER 6.86 exp/nnet3/lstm_ld5/decode_tgpr_eval92/wer_10_1.0 # bidirectional LSTM # ----------------------- # local/nnet3/run_lstm.sh --affix bidirectional \ # --lstm-delay " [-1,1] [-2,2] [-3,3] " \ # --label-delay 0 \ # --cell-dim 640 \ # --recurrent-projection-dim 128 \ # --non-recurrent-projection-dim 128 \ # --chunk-left-context 40 \ # --chunk-right-context 40 # num_params=11485739 %WER 6.81 exp/nnet3/lstm_bidirectional/decode_bd_tgpr_dev93/wer_11_0.0 %WER 4.27 exp/nnet3/lstm_bidirectional/decode_bd_tgpr_eval92/wer_11_0.0 %WER 9.29 exp/nnet3/lstm_bidirectional/decode_tgpr_dev93/wer_11_0.5 %WER 6.61 exp/nnet3/lstm_bidirectional/decode_tgpr_eval92/wer_11_1.0