#!/bin/bash for x in exp/*/decode*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done exit 0 # Monophone, MFCC+delta+accel %WER 8.74 [ 1095 / 12533, 143 ins, 226 del, 726 sub ] exp/mono/decode/wer_2 # MFCC+delta+accel %WER 3.26 [ 408 / 12533, 53 ins, 94 del, 261 sub ] exp/tri1/decode/wer_7 # MFCC+delta+accel (on top of better alignments) %WER 3.44 [ 431 / 12533, 74 ins, 82 del, 275 sub ] exp/tri2a/decode/wer_5 # LDA+MLLT %WER 2.98 [ 373 / 12533, 56 ins, 66 del, 251 sub ] exp/tri2b/decode/wer_5 # Some MMI/MPE experiments (MMI, boosted MMI, MPE) on top of the LDA+MLLT system. %WER 2.77 [ 347 / 12533, 54 ins, 54 del, 239 sub ] exp/tri2b_mmi/decode_it3/wer_6 %WER 2.91 [ 365 / 12533, 66 ins, 47 del, 252 sub ] exp/tri2b_mmi/decode_it4/wer_5 %WER 2.74 [ 343 / 12533, 54 ins, 55 del, 234 sub ] exp/tri2b_mmi_b0.05/decode_it3/wer_6 %WER 2.92 [ 366 / 12533, 68 ins, 44 del, 254 sub ] exp/tri2b_mmi_b0.05/decode_it4/wer_5 %WER 2.86 [ 358 / 12533, 54 ins, 66 del, 238 sub ] exp/tri2b_mpe/decode_it3/wer_6 %WER 2.84 [ 356 / 12533, 55 ins, 64 del, 237 sub ] exp/tri2b_mpe/decode_it4/wer_6 # LDA+MLLT+SAT %WER 2.07 [ 260 / 12533, 39 ins, 48 del, 173 sub ] exp/tri3b/decode/wer_4 %WER 3.38 [ 423 / 12533, 54 ins, 96 del, 273 sub ] exp/tri3b/decode.si/wer_6 # Decoding tri3b with unigram language model, which has higher WER. %WER 10.38 [ 1301 / 12533, 131 ins, 200 del, 970 sub ] exp/tri3b/decode_ug/wer_13 %WER 13.69 [ 1716 / 12533, 163 ins, 273 del, 1280 sub ] exp/tri3b/decode_ug.si/wer_13 # LDA+MLLT+SAT+MMI (MMI on top of the SAT system) %WER 1.94 [ 243 / 12533, 36 ins, 43 del, 164 sub ] exp/tri3b_mmi/decode/wer_4 %WER 3.38 [ 423 / 12533, 54 ins, 96 del, 273 sub ] exp/tri3b_mmi/decode.si/wer_6 %WER 1.77 [ 222 / 12533, 34 ins, 33 del, 155 sub ] exp/tri3b_mmi/decode2/wer_4 # LDA+MLLT+SAT+fMMI (fMMI+MMI on top of this SAT system) Various configurations. # Note: it doesn't really help here. Probably not enough data. %WER 1.87 [ 234 / 12533, 35 ins, 42 del, 157 sub ] exp/tri3b_fmmi_b/decode_it3/wer_4 %WER 1.85 [ 232 / 12533, 38 ins, 39 del, 155 sub ] exp/tri3b_fmmi_b/decode_it4/wer_4 %WER 1.76 [ 221 / 12533, 38 ins, 32 del, 151 sub ] exp/tri3b_fmmi_b/decode_it5/wer_3 %WER 1.76 [ 221 / 12533, 37 ins, 30 del, 154 sub ] exp/tri3b_fmmi_b/decode_it6/wer_3 %WER 1.77 [ 222 / 12533, 34 ins, 36 del, 152 sub ] exp/tri3b_fmmi_b/decode_it7/wer_5 %WER 1.75 [ 219 / 12533, 34 ins, 34 del, 151 sub ] exp/tri3b_fmmi_b/decode_it8/wer_5 %WER 1.97 [ 247 / 12533, 34 ins, 45 del, 168 sub ] exp/tri3b_fmmi_c/decode_it3/wer_4 %WER 2.03 [ 255 / 12533, 40 ins, 45 del, 170 sub ] exp/tri3b_fmmi_c/decode_it4/wer_4 %WER 1.84 [ 231 / 12533, 40 ins, 31 del, 160 sub ] exp/tri3b_fmmi_c/decode_it5/wer_2 %WER 1.76 [ 220 / 12533, 30 ins, 36 del, 154 sub ] exp/tri3b_fmmi_c/decode_it6/wer_4 %WER 1.72 [ 215 / 12533, 31 ins, 32 del, 152 sub ] exp/tri3b_fmmi_c/decode_it7/wer_4 %WER 1.71 [ 214 / 12533, 30 ins, 34 del, 150 sub ] exp/tri3b_fmmi_c/decode_it8/wer_5 %WER 1.91 [ 239 / 12533, 22 ins, 61 del, 156 sub ] exp/tri3b_fmmi_d/decode_it3/wer_8 %WER 1.91 [ 240 / 12533, 24 ins, 59 del, 157 sub ] exp/tri3b_fmmi_d/decode_it4/wer_8 %WER 1.96 [ 246 / 12533, 40 ins, 41 del, 165 sub ] exp/tri3b_fmmi_d/decode_it5/wer_5 %WER 1.91 [ 239 / 12533, 36 ins, 39 del, 164 sub ] exp/tri3b_fmmi_d/decode_it6/wer_5 %WER 1.92 [ 241 / 12533, 26 ins, 52 del, 163 sub ] exp/tri3b_fmmi_d/decode_it7/wer_7 %WER 1.92 [ 241 / 12533, 32 ins, 43 del, 166 sub ] exp/tri3b_fmmi_d/decode_it8/wer_6 # These are some experiments with "raw-fMLLR": fMLLR on the raw MFCCs, but # computed with the LDA+MLLT model (it's complicated). Compare with 3b. Results # are pretty similar. Main anticipated use is prior to neural net training. %WER 2.11 [ 265 / 12533, 21 ins, 74 del, 170 sub ] exp/tri3c/decode/wer_9 %WER 2.07 [ 260 / 12533, 35 ins, 58 del, 167 sub ] exp/tri3c/decode_2fmllr/wer_5 %WER 10.60 [ 1329 / 12533, 152 ins, 198 del, 979 sub ] exp/tri3c/decode_2fmllr_ug/wer_12 %WER 10.68 [ 1338 / 12533, 142 ins, 223 del, 973 sub ] exp/tri3c/decode_ug/wer_13 # Some "SGMM2" experiments. SGMM2 is a new version of the code that # has tying of the substates a bit like "state-clustered tied mixture" systems; # and which has speaker-dependent mixture weights. # we don't any longer show the old SGMM results, although the script is still # there, commented out. %WER 1.45 [ 182 / 12533, 19 ins, 39 del, 124 sub ] exp/sgmm2_4a/decode/wer_5 %WER 1.46 [ 183 / 12533, 23 ins, 31 del, 129 sub ] exp/sgmm2_4a/decode_fmllr/wer_4 %WER 1.36 [ 170 / 12533, 19 ins, 30 del, 121 sub ] exp/sgmm2_4a_mmi_b0.2/decode_it1/wer_5 %WER 1.36 [ 170 / 12533, 19 ins, 30 del, 121 sub ] exp/sgmm2_4a_mmi_b0.2/decode_it2/wer_5 %WER 1.38 [ 173 / 12533, 24 ins, 29 del, 120 sub ] exp/sgmm2_4a_mmi_b0.2/decode_it3/wer_4 %WER 1.39 [ 174 / 12533, 27 ins, 28 del, 119 sub ] exp/sgmm2_4a_mmi_b0.2/decode_it4/wer_3 # This is testing an option "--zero-if-disjoint true" to MMI-- no clear difference here. %WER 1.36 [ 171 / 12533, 17 ins, 35 del, 119 sub ] exp/sgmm2_4a_mmi_b0.2_x/decode_it1/wer_6 %WER 1.36 [ 170 / 12533, 22 ins, 29 del, 119 sub ] exp/sgmm2_4a_mmi_b0.2_x/decode_it2/wer_4 %WER 1.35 [ 169 / 12533, 22 ins, 29 del, 118 sub ] exp/sgmm2_4a_mmi_b0.2_x/decode_it3/wer_4 %WER 1.36 [ 170 / 12533, 22 ins, 29 del, 119 sub ] exp/sgmm2_4a_mmi_b0.2_x/decode_it4/wer_4 # sgmm2_4c is as 4a but starting from the raw-fMLLR features. No clear difference. %WER 1.56 [ 195 / 12533, 18 ins, 46 del, 131 sub ] exp/sgmm2_4c/decode/wer_6 %WER 1.56 [ 195 / 12533, 33 ins, 31 del, 131 sub ] exp/sgmm2_4c/decode_fmllr/wer_2 %WER 8.03 [ 1007 / 12533, 95 ins, 167 del, 745 sub ] exp/sgmm2_4c/decode_ug/wer_10 ## HERE # Deep neural net -- various types of hybrid system. %WER 2.02 [ 253 / 12533, 27 ins, 64 del, 162 sub ] exp/nnet4a/decode/wer_4 %WER 9.77 [ 1224 / 12533, 95 ins, 251 del, 878 sub ] exp/nnet4a/decode_ug/wer_9 %WER 1.68 [ 211 / 12533, 20 ins, 53 del, 138 sub ] exp/nnet4b/decode/wer_5 %WER 8.96 [ 1123 / 12533, 97 ins, 166 del, 860 sub ] exp/nnet4b/decode_ug/wer_8 %WER 1.91 [ 240 / 12533, 20 ins, 59 del, 161 sub ] exp/nnet4b_gpu/decode/wer_7 %WER 8.41 [ 1054 / 12533, 80 ins, 166 del, 808 sub ] exp/nnet4b_gpu/decode_ug/wer_10 # when I ran this before I got this: # prob. just random. # %WER 1.72 [ 216 / 12533, 25 ins, 38 del, 153 sub ] exp/nnet4b_gpu/decode/wer_4 # %WER 8.34 [ 1045 / 12533, 94 ins, 146 del, 805 sub ] exp/nnet4b_gpu/decode_ug/wer_10 # this another unadapted setup: %WER 1.93 [ 242 / 12533, 40 ins, 44 del, 158 sub ] exp/nnet4b2_gpu/decode/wer_3 %WER 9.08 [ 1138 / 12533, 89 ins, 182 del, 867 sub ] exp/nnet4b2_gpu/decode_ug/wer_9 %WER 1.80 [ 226 / 12533, 29 ins, 44 del, 153 sub ] exp/nnet4c/decode/wer_4 %WER 8.49 [ 1064 / 12533, 80 ins, 175 del, 809 sub ] exp/nnet4c/decode_ug/wer_11 %WER 1.70 [ 213 / 12533, 28 ins, 44 del, 141 sub ] exp/nnet4d3/decode/wer_4 %WER 8.51 [ 1066 / 12533, 97 ins, 176 del, 793 sub ] exp/nnet4d3/decode_ug/wer_9 %WER 1.74 [ 218 / 12533, 25 ins, 48 del, 145 sub ] exp/nnet4d_gpu/decode/wer_6 %WER 8.39 [ 1051 / 12533, 106 ins, 149 del, 796 sub ] exp/nnet4d_gpu/decode_ug/wer_10 %WER 1.53 [ 192 / 12533, 22 ins, 42 del, 128 sub ] exp/nnet4d2/decode/wer_3 %WER 8.06 [ 1010 / 12533, 79 ins, 152 del, 779 sub ] exp/nnet4d2/decode_ug/wer_8 %WER 1.51 [ 189 / 12533, 25 ins, 34 del, 130 sub ] exp/nnet4d2_gpu/decode/wer_3 %WER 7.97 [ 999 / 12533, 78 ins, 152 del, 769 sub ] exp/nnet4d2_gpu/decode_ug/wer_8 %WER 1.37 [ 172 / 12533, 14 ins, 36 del, 122 sub ] exp/nnet4e_gpu/decode/wer_3 %WER 8.03 [ 1006 / 12533, 61 ins, 179 del, 766 sub ] exp/nnet4e_gpu/decode_ug/wer_8 # Discriminatively trained system (using SMBR, on CPU) %WER 1.70 [ 213 / 12533, 21 ins, 52 del, 140 sub ] exp/nnet5c_mpe/decode_epoch1/wer_4 %WER 1.71 [ 214 / 12533, 21 ins, 50 del, 143 sub ] exp/nnet5c_mpe/decode_epoch2/wer_4 %WER 1.66 [ 208 / 12533, 29 ins, 36 del, 143 sub ] exp/nnet5c_mpe/decode_epoch3/wer_3 %WER 1.75 [ 219 / 12533, 32 ins, 46 del, 141 sub ] exp/nnet5c_mpe/decode_epoch4/wer_4 %WER 8.50 [ 1065 / 12533, 82 ins, 181 del, 802 sub ] exp/nnet5c_mpe/decode_ug_epoch1/wer_9 %WER 8.39 [ 1052 / 12533, 71 ins, 189 del, 792 sub ] exp/nnet5c_mpe/decode_ug_epoch2/wer_10 %WER 8.31 [ 1042 / 12533, 73 ins, 183 del, 786 sub ] exp/nnet5c_mpe/decode_ug_epoch3/wer_10 %WER 8.33 [ 1044 / 12533, 75 ins, 178 del, 791 sub ] exp/nnet5c_mpe/decode_ug_epoch4/wer_10 # Discriminatively trained system (using SMBR, on GPU) %WER 1.73 [ 217 / 12533, 17 ins, 55 del, 145 sub ] exp/nnet5c_mpe_gpu/decode_epoch1/wer_6 %WER 1.76 [ 221 / 12533, 20 ins, 52 del, 149 sub ] exp/nnet5c_mpe_gpu/decode_epoch2/wer_6 %WER 1.72 [ 215 / 12533, 18 ins, 52 del, 145 sub ] exp/nnet5c_mpe_gpu/decode_epoch3/wer_6 %WER 1.67 [ 209 / 12533, 14 ins, 53 del, 142 sub ] exp/nnet5c_mpe_gpu/decode_epoch4/wer_7 %WER 8.58 [ 1075 / 12533, 100 ins, 157 del, 818 sub ] exp/nnet5c_mpe_gpu/decode_ug_epoch1/wer_10 %WER 8.43 [ 1056 / 12533, 97 ins, 153 del, 806 sub ] exp/nnet5c_mpe_gpu/decode_ug_epoch2/wer_10 %WER 8.43 [ 1057 / 12533, 100 ins, 153 del, 804 sub ] exp/nnet5c_mpe_gpu/decode_ug_epoch3/wer_10 %WER 8.36 [ 1048 / 12533, 89 ins, 158 del, 801 sub ] exp/nnet5c_mpe_gpu/decode_ug_epoch4/wer_11 # Discriminatively trained system (using p-norm rather than tanh nonlinearities, using SMBR, on GPU) %WER 1.74 [ 218 / 12533, 25 ins, 48 del, 145 sub ] exp/nnet5d_mpe_gpu/decode_epoch1/wer_6 %WER 8.40 [ 1053 / 12533, 108 ins, 148 del, 797 sub ] exp/nnet5d_mpe_gpu/decode_ug_epoch1/wer_10 # Discriminatively trained system on top of ensemble trained p-norm network (using SMBR, on GPU) %WER 1.36 [ 170 / 12533, 15 ins, 34 del, 121 sub ] exp/nnet5e_mpe_gpu/decode_epoch2/wer_3 %WER 7.73 [ 969 / 12533, 74 ins, 157 del, 738 sub ] exp/nnet5e_mpe_gpu/decode_ug_epoch4/wer_9 # Some system combination experiments. %WER 3.18 [ 398 / 12533, 60 ins, 75 del, 263 sub ] exp/combine_1_2a/decode/wer_4 %WER 1.56 [ 196 / 12533, 27 ins, 32 del, 137 sub ] exp/combine_sgmm2_4a_3b/decode/wer_2 %WER 1.53 [ 192 / 12533, 23 ins, 30 del, 139 sub ] exp/combine_sgmm2_4a_3b_fmmic5/decode/wer_4 %WER 1.47 [ 184 / 12533, 23 ins, 27 del, 134 sub ] exp/combine_sgmm2_4a_mmi_3b_fmmic5/decode/wer_4 # Some things relating to nnet2 online decoding. for x in exp/nnet2_online/nnet*/decode*; do grep WER $x/wer_* | utils/best_wer.sh ; done %WER 2.75 [ 345 / 12533, 43 ins, 81 del, 221 sub ] exp/nnet2_online/nnet/decode/wer_7 %WER 10.94 [ 1371 / 12533, 133 ins, 220 del, 1018 sub ] exp/nnet2_online/nnet/decode_ug/wer_11 # script is not checked in for this, it's pnorm with 800/160 instead of 1000/200. %WER 2.58 [ 323 / 12533, 38 ins, 81 del, 204 sub ] exp/nnet2_online/nnet2b/decode/wer_6 %WER 10.72 [ 1344 / 12533, 124 ins, 234 del, 986 sub ] exp/nnet2_online/nnet2b/decode_ug/wer_10 # This is the baseline for the nnet+ivector decoding, with no iVector. This is # better than with the iVector, i.e. the iVector is not working. I assume this # is due to overtraining. I plan to try this on a larger setup. %WER 2.30 [ 288 / 12533, 44 ins, 51 del, 193 sub ] exp/nnet2_online/nnet_baseline/decode/wer_4 %WER 10.70 [ 1341 / 12533, 122 ins, 221 del, 998 sub ] exp/nnet2_online/nnet_baseline/decode_ug/wer_10 # normal recipe: %WER 2.27 [ 285 / 12533, 42 ins, 62 del, 181 sub ] exp/nnet2_online/nnet_a_online/decode/wer_5 %WER 2.28 [ 286 / 12533, 66 ins, 39 del, 181 sub ] exp/nnet2_online/nnet_a_online/decode_per_utt/wer_2 %WER 10.26 [ 1286 / 12533, 140 ins, 188 del, 958 sub ] exp/nnet2_online/nnet_a_online/decode_ug/wer_10 %WER 10.45 [ 1310 / 12533, 106 ins, 241 del, 963 sub ] exp/nnet2_online/nnet_a_online/decode_ug_per_utt/wer_12 # multi-splice recipe: %WER 2.29 [ 287 / 12533, 32 ins, 70 del, 185 sub ] exp/nnet2_online/nnet_ms_a/decode/wer_9_0.0 %WER 9.30 [ 1166 / 12533, 94 ins, 219 del, 853 sub ] exp/nnet2_online/nnet_ms_a/decode_ug/wer_15_0.0 %WER 2.30 [ 288 / 12533, 32 ins, 68 del, 188 sub ] exp/nnet2_online/nnet_ms_a_online/decode/wer_9_0.0 %WER 2.34 [ 293 / 12533, 33 ins, 72 del, 188 sub ] exp/nnet2_online/nnet_ms_a_online/decode_per_utt/wer_9_0.0 %WER 9.17 [ 1149 / 12533, 87 ins, 224 del, 838 sub ] exp/nnet2_online/nnet_ms_a_online/decode_ug/wer_14_0.5 %WER 9.37 [ 1174 / 12533, 121 ins, 192 del, 861 sub ] exp/nnet2_online/nnet_ms_a_online/decode_ug_per_utt/wer_13_0.0 # baseline with multi-splice script # provided for reference, modify splice-indexes in local/online/run_nnet2_multisplice.sh # to "layer0/-7:-6:-5:-4:-3:-2:-1:0:1:2:3:4:5:6:7" to reproduce these results %WER 2.31 [ 290 / 12533, 26 ins, 91 del, 173 sub ] exp/nnet2_online/nnet_a/decode/wer_9_0.0 %WER 9.90 [ 1241 / 12533, 103 ins, 208 del, 930 sub ] exp/nnet2_online/nnet_a/decode_ug/wer_11_0.5 %WER 2.27 [ 284 / 12533, 25 ins, 88 del, 171 sub ] exp/nnet2_online/nnet_a_online/decode/wer_9_0.0 %WER 2.30 [ 288 / 12533, 20 ins, 85 del, 183 sub ] exp/nnet2_online/nnet_a_online/decode_per_utt/wer_9_0.0 %WER 9.97 [ 1250 / 12533, 104 ins, 208 del, 938 sub ] exp/nnet2_online/nnet_a_online/decode_ug/wer_10_1.0 %WER 10.18 [ 1276 / 12533, 129 ins, 193 del, 954 sub ] exp/nnet2_online/nnet_a_online/decode_ug_per_utt/wer_11_0.0 # Joint training with WSJ data (call this recipe "multilingual" because it doesn't use # a shared phone set).' # Note, I didn't tune the settings of this at all, it was just the first try. #for x in exp/nnet2_online_wsj/nnet_ms_a_rm_online/decode*; do grep WER $x/wer_* | utils/best_wer.sh ; done %WER 1.56 [ 196 / 12533, 30 ins, 36 del, 130 sub ] exp/nnet2_online_wsj/nnet_ms_a_rm_online/decode/wer_5 %WER 7.51 [ 941 / 12533, 90 ins, 162 del, 689 sub ] exp/nnet2_online_wsj/nnet_ms_a_rm_online/decode_ug/wer_12 # Discriminative training on top of the previous system (the joint system, with WSJ)... we don't get # too much from it on this particular setup. for x in exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_*; do grep WER $x/wer* | utils/best_wer.sh ; done %WER 1.56 [ 196 / 12533, 30 ins, 36 del, 130 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_epoch0/wer_5 %WER 1.57 [ 197 / 12533, 29 ins, 35 del, 133 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_epoch1/wer_6 %WER 1.55 [ 194 / 12533, 29 ins, 35 del, 130 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_epoch2/wer_6 %WER 1.53 [ 192 / 12533, 32 ins, 30 del, 130 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_epoch3/wer_5 %WER 1.51 [ 189 / 12533, 33 ins, 28 del, 128 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_epoch4/wer_5 %WER 7.51 [ 941 / 12533, 90 ins, 162 del, 689 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_ug_epoch0/wer_12 %WER 7.49 [ 939 / 12533, 89 ins, 150 del, 700 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_ug_epoch1/wer_12 %WER 7.37 [ 924 / 12533, 80 ins, 156 del, 688 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_ug_epoch2/wer_13 %WER 7.33 [ 919 / 12533, 80 ins, 153 del, 686 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_ug_epoch3/wer_13 %WER 7.36 [ 923 / 12533, 85 ins, 148 del, 690 sub ] exp/nnet2_online_wsj/nnet_ms_a_smbr_0.00005/1/decode_ug_epoch4/wer_13 ### chain results ### # current best chain result with TDNN (check local/chain/run_tdnn_5g.sh) %WER 2.86 [ 358 / 12533, 46 ins, 61 del, 251 sub ] exp/chain/tdnn_5g/decode/wer_5_0.0 %WER 2.71 [ 340 / 12533, 58 ins, 59 del, 223 sub ] exp/chain/tdnn_5n/decode/wer_4_0.0 # Its topology of chain model is from mini_librispeech's. # It uses a new configs convention for chain model after kaldi 5.2. %WER 1.32 [ 166 / 12533, 19 ins, 31 del, 116 sub ] exp/chain/tdnn_5o/decode/wer_4_0.0 ### WSJ->RM Transfer learning using chain model ### %WER 1.68 [ 210 / 12533, 25 ins, 33 del, 152 sub ] exp/chain/tdnn_wsj_rm_1a/decode/wer_2_0.0 ### nnet1 results ### # dnn4b, MFCC,LDA,fMLLR feaures, (Karel - 30.7.2015) # Xent, %WER 1.75 [ 219 / 12533, 36 ins, 35 del, 148 sub ] exp/dnn4b_pretrain-dbn_dnn/decode/wer_2_0.0 %WER 7.90 [ 990 / 12533, 90 ins, 147 del, 753 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_ug/wer_5_1.0 # sMBR, %WER 1.77 [ 222 / 12533, 21 ins, 57 del, 144 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it1/wer_4_0.0 %WER 1.68 [ 210 / 12533, 24 ins, 43 del, 143 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it3/wer_4_0.0 %WER 1.58 [ 198 / 12533, 20 ins, 41 del, 137 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_it6/wer_5_0.0 # cnn4c, FBANK+pitch features, (Karel - 30.7.2015) # Xent, no-RBM, %WER 2.00 [ 251 / 12533, 34 ins, 54 del, 163 sub ] exp/cnn4c/decode/wer_3_0.5 # Xent, RBM on top of CNN, %WER 2.04 [ 256 / 12533, 20 ins, 78 del, 158 sub ] exp/cnn4c_pretrain-dbn_dnn/decode/wer_6_0.5 # sMBR, %WER 2.02 [ 253 / 12533, 35 ins, 54 del, 164 sub ] exp/cnn4c_pretrain-dbn_dnn_smbr/decode_it1/wer_5_0.0 %WER 1.93 [ 242 / 12533, 23 ins, 62 del, 157 sub ] exp/cnn4c_pretrain-dbn_dnn_smbr/decode_it3/wer_6_0.5 %WER 1.90 [ 238 / 12533, 29 ins, 49 del, 160 sub ] exp/cnn4c_pretrain-dbn_dnn_smbr/decode_it6/wer_6_0.0 # dnn4d, FBANK+pitch, (Karel - 30.7.2015) # Xent, %WER 1.95 [ 245 / 12533, 22 ins, 63 del, 160 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode/wer_4_1.0 # sMBR, %WER 1.98 [ 248 / 12533, 35 ins, 50 del, 163 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_it1/wer_3_0.0 %WER 1.91 [ 239 / 12533, 19 ins, 60 del, 160 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_it3/wer_5_0.5 %WER 1.88 [ 236 / 12533, 17 ins, 61 del, 158 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_it6/wer_6_0.5 # Relu, FBANK+pitch, (Karel - 8.9.2016) # - no pre-train, # Xent, %WER 1.95 [ 245 / 12533, 15 ins, 77 del, 153 sub ] exp/dnn4d-6L1024-relu-fbank/decode/wer_7_0.5 # ParametricRelu, FBANK+pitch, (Karel - 8.9.2016) # Xent, %WER 1.79 [ 224 / 12533, 27 ins, 47 del, 150 sub ] exp/dnn4d-6L1024-relu-fbank-alpha-beta/decode/wer_4_1.0 # sMBR, %WER 1.74 [ 218 / 12533, 21 ins, 47 del, 150 sub ] exp/dnn4d-6L1024-relu-fbank-alpha-beta_smbr/decode_it1/wer_5_0.5 %WER 1.74 [ 218 / 12533, 22 ins, 45 del, 151 sub ] exp/dnn4d-6L1024-relu-fbank-alpha-beta_smbr/decode_it3/wer_6_0.0 %WER 1.76 [ 220 / 12533, 23 ins, 43 del, 154 sub ] exp/dnn4d-6L1024-relu-fbank-alpha-beta_smbr/decode_it6/wer_6_0.5 # => Better than 'Sigmoid' with pre-training, # dnn4e, FBANK+pitch, 2 output layers: rm + wsj, (Karel - 10.7.2015) %WER 1.52 [ 191 / 12533, 17 ins, 52 del, 122 sub ] exp/dnn4e-fbank_blocksoftmax/decode/wer_4_0.5 <<<[BEST] %WER 7.86 [ 985 / 12533, 84 ins, 160 del, 741 sub ] exp/dnn4e-fbank_blocksoftmax/decode_ug/wer_8_0.0 # lstm4f, FBANK+pitch, 2LSTMs, (Karel - 11.8.2015) %WER 2.90 [ 364 / 12533, 28 ins, 96 del, 240 sub ] exp/lstm4f/decode/wer_4_1.0 # 'multistream-perutt' %WER 2.51 [ 315 / 12533, 43 ins, 63 del, 209 sub ] exp/lstm4f_truncated_BPTT/decode/wer_4_0.0 # 'multistream (minibatches)' # cnn4g-2D, FBANK+pitch, 2D-CNN system (from Harish Mallidi, run by Karel - 22.6.2015) %WER 2.07 [ 260 / 12533, 32 ins, 60 del, 168 sub ] exp/cnn2d4c/decode/wer_4_0.0 # dnn4h, FBANK+pitch, ``dummy ivector'', should be same as 'dnn4d', (Karel - 30.7.2015) # Xent, no-RBM, %WER 2.14 [ 268 / 12533, 29 ins, 71 del, 168 sub ] exp/dnn4h-dummy-ivec/decode/wer_4_0.0 # Xent, RBM, %WER 1.84 [ 230 / 12533, 29 ins, 51 del, 150 sub ] exp/dnn4h-dummy-ivec_pretrain-dbn_dnn/decode/wer_3_1.0 # sMBR, %WER 1.83 [ 229 / 12533, 29 ins, 50 del, 150 sub ] exp/dnn4h-dummy-ivec_pretrain-dbn_dnn_smbr/decode_it1/wer_3_1.0 %WER 1.81 [ 227 / 12533, 29 ins, 49 del, 149 sub ] exp/dnn4h-dummy-ivec_pretrain-dbn_dnn_smbr/decode_it3/wer_3_1.0 %WER 1.86 [ 233 / 12533, 34 ins, 46 del, 153 sub ] exp/dnn4h-dummy-ivec_pretrain-dbn_dnn_smbr/decode_it6/wer_3_0.5 # blstm4i, FBANK+pitch, (Karel - 9.8.2016) %WER 2.03 [ 254 / 12533, 21 ins, 63 del, 170 sub ] exp/blstm4i/decode/wer_4_0.5 ### ^^^ nnet1 results ^^^ ###