RESULTS
20 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
#!/bin/bash
# this RESULTS file was obtained by Dan Povey in Feb 2017, after
# a rewrite of the run.sh file.
# To see results from the scripts local/nnet3/ and local/chain/,
# look at the top of those files, we don't put those in the
# RESULTS file.
for dir in exp/*; do
steps/info/gmm_dir_info.pl $dir
for x in $dir/decode*dev93* $dir/decode*eval92*; do [ -d $x ] && [[ $x =~ "$1" ]] && grep WER $x/wer_* | utils/best_wer.sh; done
done
exit 0
# Use caution when comparing these results with other published results.
# We use the "20k open" test condition, also known as the "60k vocabulary"
# test condition, in which test utterances are not excluded even if they
# contain words not in the language model. This is the hardest test condition,
# and most published results are in the easier 5k and 20k-closed conditions,
# in which we only test on utterances that are in either a 5k or 20k subset
# of the vocabulary.
# monophone, deltas, trained on the 2k shortest utterances from the si84 data.
exp/mono0a: nj=10 align prob=-95.82 over 2.36h [retry=0.4%, fail=0.0%] states=132 gauss=973
%WER 34.33 [ 2827 / 8234, 266 ins, 457 del, 2104 sub ] exp/mono0a/decode_nosp_tgpr_dev93/wer_10_0.0
%WER 25.13 [ 1418 / 5643, 138 ins, 192 del, 1088 sub ] exp/mono0a/decode_nosp_tgpr_eval92/wer_10_0.0
# first triphone build. Built on half of SI-84.
exp/tri1: nj=10 align prob=-93.75 over 7.38h [retry=0.4%, fail=0.0%] states=1567 gauss=10025 tree-impr=5.06
%WER 19.40 [ 1597 / 8234, 247 ins, 199 del, 1151 sub ] exp/tri1/decode_nosp_tgpr_dev93/wer_14_0.5
%WER 12.76 [ 720 / 5643, 110 ins, 89 del, 521 sub ] exp/tri1/decode_nosp_tgpr_eval92/wer_14_1.0
# the above, rescored with full trigram model [not pruned.] Note: the tg{1,2,3,4,5} are
# different rescoring methods. They all give about the same results. Note: 3 and 4 give
# the "correct" LM scores.
%WER 18.23 [ 1501 / 8234, 245 ins, 181 del, 1075 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg1/wer_15_0.5
%WER 18.23 [ 1501 / 8234, 245 ins, 181 del, 1075 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg2/wer_15_0.5
%WER 18.16 [ 1495 / 8234, 268 ins, 153 del, 1074 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg3/wer_16_0.0
%WER 18.18 [ 1497 / 8234, 268 ins, 154 del, 1075 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg4/wer_16_0.0
%WER 18.16 [ 1495 / 8234, 220 ins, 202 del, 1073 sub ] exp/tri1/decode_nosp_tgpr_dev93_tg5/wer_15_1.0
# tri2b is an LDA+MLLT system trained on SI-84
exp/tri2b: nj=10 align prob=-47.22 over 15.10h [retry=0.7%, fail=0.0%] states=2005 gauss=15036 tree-impr=5.45 lda-sum=26.20 mllt:impr,logdet=1.34,1.97
%WER 16.37 [ 1348 / 8234, 241 ins, 157 del, 950 sub ] exp/tri2b/decode_nosp_tgpr_dev93/wer_17_0.0
%WER 10.53 [ 594 / 5643, 110 ins, 60 del, 424 sub ] exp/tri2b/decode_nosp_tgpr_eval92/wer_17_0.5
# tri3b is an LDA+MLLT+SAT system trained on all of SI-284
exp/tri3b: nj=10 align prob=-44.30 over 81.23h [retry=0.8%, fail=0.1%] states=3362 gauss=40061 fmllr-impr=3.70 over 59.77h tree-impr=7.86
%WER 15.56 [ 1281 / 8234, 220 ins, 140 del, 921 sub ] exp/tri3b/decode_nosp_tgpr_dev93.si/wer_17_0.5
%WER 12.82 [ 1056 / 8234, 135 ins, 147 del, 774 sub ] exp/tri3b/decode_nosp_bd_tgpr_dev93.si/wer_15_0.0
%WER 9.24 [ 761 / 8234, 89 ins, 109 del, 563 sub ] exp/tri3b/decode_nosp_bd_tgpr_dev93/wer_16_0.0
%WER 11.53 [ 949 / 8234, 179 ins, 94 del, 676 sub ] exp/tri3b/decode_nosp_tgpr_dev93/wer_15_0.5
%WER 10.94 [ 901 / 8234, 181 ins, 82 del, 638 sub ] exp/tri3b/decode_nosp_tg_dev93/wer_14_0.5
%WER 8.16 [ 672 / 8234, 94 ins, 94 del, 484 sub ] exp/tri3b/decode_nosp_bd_tgpr_dev93_fg/wer_17_0.0
%WER 10.95 [ 618 / 5643, 148 ins, 36 del, 434 sub ] exp/tri3b/decode_nosp_tgpr_eval92.si/wer_14_0.0
%WER 8.19 [ 462 / 5643, 77 ins, 51 del, 334 sub ] exp/tri3b/decode_nosp_bd_tgpr_eval92.si/wer_16_0.0
%WER 5.55 [ 313 / 5643, 35 ins, 45 del, 233 sub ] exp/tri3b/decode_nosp_bd_tgpr_eval92/wer_17_1.0
%WER 4.89 [ 276 / 5643, 47 ins, 28 del, 201 sub ] exp/tri3b/decode_nosp_bd_tgpr_eval92_fg/wer_15_0.5
%WER 7.53 [ 425 / 5643, 112 ins, 20 del, 293 sub ] exp/tri3b/decode_nosp_tg_eval92/wer_17_0.0
%WER 8.15 [ 460 / 5643, 113 ins, 30 del, 317 sub ] exp/tri3b/decode_nosp_tgpr_eval92/wer_14_1.0
# tri4b is an LDA+MLLT+SAT system after estimating pronunciation probabilities
# and word-and-pronunciation-dependent silence probabilities.
exp/tri4b: nj=10 align prob=-44.46 over 81.23h [retry=0.6%, fail=0.1%] states=3413 gauss=40059 fmllr-impr=0.17 over 60.20h tree-impr=8.70
%WER 15.16 [ 1248 / 8234, 253 ins, 96 del, 899 sub ] exp/tri4b/decode_tgpr_dev93.si/wer_17_0.0
%WER 12.62 [ 1039 / 8234, 141 ins, 124 del, 774 sub ] exp/tri4b/decode_bd_tgpr_dev93.si/wer_17_0.0
%WER 9.01 [ 742 / 8234, 106 ins, 97 del, 539 sub ] exp/tri4b/decode_bd_tgpr_dev93/wer_16_0.0
%WER 8.25 [ 679 / 8234, 94 ins, 100 del, 485 sub ] exp/tri4b/decode_bd_tgpr_dev93_fg/wer_17_0.5
%WER 10.92 [ 899 / 8234, 186 ins, 92 del, 621 sub ] exp/tri4b/decode_tg_dev93/wer_17_0.5
%WER 11.44 [ 942 / 8234, 203 ins, 87 del, 652 sub ] exp/tri4b/decode_tgpr_dev93/wer_14_0.5
%WER 10.93 [ 617 / 5643, 147 ins, 33 del, 437 sub ] exp/tri4b/decode_tgpr_eval92.si/wer_14_1.0
%WER 8.74 [ 493 / 5643, 104 ins, 34 del, 355 sub ] exp/tri4b/decode_bd_tgpr_eval92.si/wer_15_0.0
%WER 5.69 [ 321 / 5643, 50 ins, 34 del, 237 sub ] exp/tri4b/decode_bd_tgpr_eval92/wer_17_0.5
%WER 4.71 [ 266 / 5643, 40 ins, 27 del, 199 sub ] exp/tri4b/decode_bd_tgpr_eval92_fg/wer_17_1.0
%WER 7.39 [ 417 / 5643, 107 ins, 24 del, 286 sub ] exp/tri4b/decode_tg_eval92/wer_16_1.0
%WER 7.90 [ 446 / 5643, 111 ins, 27 del, 308 sub ] exp/tri4b/decode_tgpr_eval92/wer_15_1.0
######################################
## Results below this point were mostly obtained in 2013 by Hainan Xu,
## They are from parts of the script that are now not run by default in the run.sh.
## you can look in the git history to figure out when these results were added.
%WER 7.99 [ 658 / 8234, 72 ins, 95 del, 491 sub ] exp/tri4b_fmmi_a/decode_bd_tgpr_dev93_it8/wer_12
%WER 11.15 [ 918 / 8234, 180 ins, 81 del, 657 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it3/wer_15
%WER 11.23 [ 925 / 8234, 201 ins, 77 del, 647 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it4/wer_12
%WER 10.64 [ 876 / 8234, 180 ins, 80 del, 616 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it5/wer_13
%WER 10.43 [ 859 / 8234, 174 ins, 76 del, 609 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it6/wer_12
%WER 10.42 [ 858 / 8234, 178 ins, 70 del, 610 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it7/wer_11
%WER 10.41 [ 857 / 8234, 179 ins, 66 del, 612 sub ] exp/tri4b_fmmi_a/decode_tgpr_dev93_it8/wer_10
%WER 4.09 [ 231 / 5643, 40 ins, 12 del, 179 sub ] exp/tri4b_fmmi_a/decode_tgpr_eval92_it8/wer_11
%WER 10.61 [ 874 / 8234, 188 ins, 75 del, 611 sub ] exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it3/wer_13
%WER 10.44 [ 860 / 8234, 183 ins, 79 del, 598 sub ] exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it4/wer_13
%WER 10.27 [ 846 / 8234, 180 ins, 73 del, 593 sub ] exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it5/wer_12
%WER 10.27 [ 846 / 8234, 174 ins, 72 del, 600 sub ] exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it6/wer_12
%WER 10.06 [ 828 / 8234, 162 ins, 80 del, 586 sub ] exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it7/wer_13
%WER 10.08 [ 830 / 8234, 158 ins, 84 del, 588 sub ] exp/tri4b_fmmi_indirect/decode_tgpr_dev93_it8/wer_13
%WER 10.77 [ 887 / 8234, 194 ins, 72 del, 621 sub ] exp/tri4b_mmi_b0.1/decode_tgpr_dev93/wer_12
%WER 12.27 [ 1010 / 8234, 188 ins, 104 del, 718 sub ] exp/sgmm2_5a/decode_tgpr_dev93/wer_14
%WER 11.87 [ 977 / 8234, 201 ins, 75 del, 701 sub ] exp/sgmm2_5a_mmi_b0.1/decode_tgpr_dev93_it1/wer_11
%WER 11.84 [ 975 / 8234, 195 ins, 81 del, 699 sub ] exp/sgmm2_5a_mmi_b0.1/decode_tgpr_dev93_it2/wer_13
%WER 11.67 [ 961 / 8234, 196 ins, 77 del, 688 sub ] exp/sgmm2_5a_mmi_b0.1/decode_tgpr_dev93_it3/wer_13
%WER 11.78 [ 970 / 8234, 190 ins, 82 del, 698 sub ] exp/sgmm2_5a_mmi_b0.1/decode_tgpr_dev93_it4/wer_14
%WER 11.87 [ 977 / 8234, 201 ins, 75 del, 701 sub ] exp/sgmm2_5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it1/wer_11
%WER 11.85 [ 976 / 8234, 195 ins, 81 del, 700 sub ] exp/sgmm2_5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it2/wer_13
%WER 11.67 [ 961 / 8234, 196 ins, 77 del, 688 sub ] exp/sgmm2_5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it3/wer_13
%WER 11.78 [ 970 / 8234, 190 ins, 82 del, 698 sub ] exp/sgmm2_5a_mmi_b0.1_m0.9/decode_tgpr_dev93_it4/wer_14
%WER 8.23 [ 678 / 8234, 87 ins, 103 del, 488 sub ] exp/sgmm2_5b/decode_bd_tgpr_dev93/wer_12
%WER 4.29 [ 242 / 5643, 37 ins, 18 del, 187 sub ] exp/sgmm2_5b/decode_bd_tgpr_eval92/wer_12
%WER 10.88 [ 896 / 8234, 195 ins, 82 del, 619 sub ] exp/sgmm2_5b/decode_tgpr_dev93/wer_12
%WER 6.86 [ 387 / 5643, 97 ins, 18 del, 272 sub ] exp/sgmm2_5b/decode_tgpr_eval92/wer_13
%WER 3.93 [ 222 / 5643, 36 ins, 14 del, 172 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it1/wer_11
%WER 3.77 [ 213 / 5643, 33 ins, 12 del, 168 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it2/wer_12
%WER 3.62 [ 204 / 5643, 35 ins, 10 del, 159 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3/wer_10
%WER 3.69 [ 208 / 5643, 33 ins, 11 del, 164 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it3.mbr/wer_11
%WER 3.51 [ 198 / 5643, 34 ins, 9 del, 155 sub ] exp/sgmm2_5b_mmi_b0.1/decode_bd_tgpr_eval92_it4/wer_10
%WER 7.83 [ 645 / 8234, 83 ins, 95 del, 467 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_dev93_it1/wer_12
%WER 7.63 [ 628 / 8234, 76 ins, 99 del, 453 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_dev93_it2/wer_14
%WER 7.52 [ 619 / 8234, 86 ins, 88 del, 445 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_dev93_it3/wer_11
%WER 7.41 [ 610 / 8234, 76 ins, 93 del, 441 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_dev93_it4/wer_13
%WER 3.92 [ 221 / 5643, 36 ins, 14 del, 171 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_eval92_it1/wer_11
%WER 3.72 [ 210 / 5643, 32 ins, 12 del, 166 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_eval92_it2/wer_13
%WER 3.67 [ 207 / 5643, 33 ins, 10 del, 164 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_eval92_it3/wer_12
%WER 3.60 [ 203 / 5643, 35 ins, 10 del, 158 sub ] exp/sgmm2_5b_mmi_b0.1_z/decode_bd_tgpr_eval92_it4/wer_11
# regular SGMM (only ran the basic one, not discriminatively trained, although the
# scripts are there.)
# Rescored with quinphone.
# not updated
# DNN on fMLLR features (Karel's setup, [7.8.2015]).
# frame cross-entropy training
%WER 6.05 [ 498 / 8234, 59 ins, 67 del, 372 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_bd_tgpr_dev93/wer_11_0.0
%WER 3.69 [ 208 / 5643, 19 ins, 19 del, 170 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_bd_tgpr_eval92/wer_11_1.0
# sMBR training (5 iteration, no lattice-regeneration),
%WER 5.60 [ 461 / 8234, 58 ins, 62 del, 341 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_bd_tgpr_dev93_it4/wer_11_0.0
%WER 3.35 [ 189 / 5643, 18 ins, 14 del, 157 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_bd_tgpr_eval92_it5/wer_13_1.0
#DNN results with cpu based setup
%WER 7.21 [ 594 / 8234, 64 ins, 98 del, 432 sub ] exp/nnet5c1/decode_bd_tgpr_dev93/wer_14
#==== Below are some DNN results from an older version of the RESULTS file,
# Dan's cpu-based neural net recipe. Note: the best number for dev93 is 7.10, an SGMM+MMI system,
# and for eval92 is 3.79, the same system. (On this setup, discriminative training helped a lot,
# which seems to be the reason we can't beat the SGMM+MMI numbers here.)
exp/nnet5c1/decode_bd_tgpr_dev93/wer_14:%WER 7.32 [ 603 / 8234, 61 ins, 101 del, 441 sub ]
exp/nnet5c1/decode_bd_tgpr_eval92/wer_14:%WER 4.39 [ 248 / 5643, 32 ins, 17 del, 199 sub ]
# Note: my 4.39% result is worse than Karel's 3.56%.
# some GPU-based neural network training results...
# Below is the recipe with multiple VTLN warps and mel-filterbank inputs..
%WER 7.24 [ 596 / 8234, 62 ins, 98 del, 436 sub ] exp/nnet5b_gpu/decode_bd_tgpr_dev93/wer_15
%WER 3.95 [ 223 / 5643, 30 ins, 18 del, 175 sub ] exp/nnet5b_gpu/decode_bd_tgpr_eval92/wer_16
# 5c is GPU tanh recipe
%WER 7.08 [ 583 / 8234, 64 ins, 94 del, 425 sub ] exp/nnet5c/decode_bd_tgpr_dev93/wer_14
%WER 4.02 [ 227 / 5643, 32 ins, 16 del, 179 sub ] exp/nnet5c/decode_bd_tgpr_eval92/wer_13
# 5c_gpu (the same, run on GPU)
# note, for 5c and 5c_gpu, we could get better results by using only 4
# GPU jobs and half the learning rate, but of course it would take twice longer.
%WER 7.29 [ 600 / 8234, 60 ins, 99 del, 441 sub ] exp/nnet5c_gpu/decode_bd_tgpr_dev93/wer_14
%WER 4.08 [ 230 / 5643, 34 ins, 15 del, 181 sub ] exp/nnet5c_gpu/decode_bd_tgpr_eval92/wer_13
# 5d is the pnorm recipe, with 4 jobs; compare with nnet5c.
%WER 6.97 [ 574 / 8234, 72 ins, 84 del, 418 sub ] exp/nnet5d/decode_bd_tgpr_dev93/wer_12
%WER 3.86 [ 218 / 5643, 27 ins, 12 del, 179 sub ] exp/nnet5d/decode_bd_tgpr_eval92/wer_13
(the same without the big dictionary)
%WER 9.40 [ 774 / 8234, 164 ins, 71 del, 539 sub ] exp/nnet5d/decode_tgpr_dev93/wer_12
%WER 6.45 [ 364 / 5643, 81 ins, 19 del, 264 sub ] exp/nnet5d/decode_tgpr_eval92/wer_14
# 5d_gpu is the pnorm recipe with GPU;
%WER 7.07 [ 582 / 8234, 62 ins, 94 del, 426 sub ] exp/nnet5d_gpu/decode_bd_tgpr_dev93/wer_13
%WER 4.06 [ 229 / 5643, 31 ins, 13 del, 185 sub ] exp/nnet5d_gpu/decode_bd_tgpr_eval92/wer_12
(the same without the big dictionary)_13
%WER 9.35 [ 770 / 8234, 161 ins, 78 del, 531 sub ] exp/nnet5d_gpu/decode_tgpr_dev93/wer_12
%WER 6.59 [ 372 / 5643, 91 ins, 15 del, 266 sub ] exp/nnet5d_gpu/decode_tgpr_eval92/wer_12
%WER 7.13 [ 587 / 8234, 72 ins, 93 del, 422 sub ] exp/nnet5d_gpu/decode_bd_tgpr_dev93/wer_13
%WER 4.06 [ 229 / 5643, 31 ins, 16 del, 182 sub ] exp/nnet5d_gpu/decode_bd_tgpr_eval92/wer_14
# 5e is GPU version of ensemble training of pnorm nnets recipe, with 4 jobs; compare with nnet5d.
%WER 7.19 [ 592 / 8234, 72 ins, 89 del, 431 sub ] exp/nnet5e_gpu/decode_bd_tgpr_dev93/wer_10
%WER 3.97 [ 224 / 5643, 25 ins, 16 del, 183 sub ] exp/nnet5e_gpu/decode_bd_tgpr_eval92/wer_14
# decoded with tgpr LM.
%WER 9.55 [ 786 / 8234, 163 ins, 83 del, 540 sub ] exp/nnet5d_gpu/decode_tgpr_dev93/wer_13
%WER 6.50 [ 367 / 5643, 95 ins, 16 del, 256 sub ] exp/nnet5d_gpu/decode_tgpr_eval92/wer_14
# for results with VTLN, see for example local/run_vtln2.sh, they are at the end.
# Online-nnet2 results:
for x in exp/nnet2_online/nnet_ms_a_online/decode_*; do grep WER $x/wer_* | utils/best_wer.sh ; done
%WER 7.02 [ 578 / 8234, 85 ins, 88 del, 405 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93/wer_10
%WER 6.56 [ 540 / 8234, 86 ins, 79 del, 375 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_smbr_epoch1/wer_11
%WER 6.51 [ 536 / 8234, 84 ins, 77 del, 375 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_smbr_epoch2/wer_12
%WER 6.42 [ 529 / 8234, 90 ins, 72 del, 367 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_smbr_epoch3/wer_12
%WER 6.40 [ 527 / 8234, 91 ins, 71 del, 365 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_smbr_epoch4/wer_12 **
%WER 7.23 [ 595 / 8234, 77 ins, 96 del, 422 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_utt/wer_10
%WER 6.95 [ 572 / 8234, 70 ins, 90 del, 412 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_utt_offline/wer_11
%WER 3.93 [ 222 / 5643, 32 ins, 12 del, 178 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92/wer_10
%WER 3.76 [ 212 / 5643, 27 ins, 11 del, 174 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_smbr_epoch1/wer_11
%WER 3.60 [ 203 / 5643, 27 ins, 9 del, 167 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_smbr_epoch2/wer_11 **
%WER 3.62 [ 204 / 5643, 28 ins, 7 del, 169 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_smbr_epoch3/wer_11
%WER 3.72 [ 210 / 5643, 32 ins, 7 del, 171 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_smbr_epoch4/wer_11
%WER 4.02 [ 227 / 5643, 32 ins, 14 del, 181 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_utt/wer_9
%WER 3.99 [ 225 / 5643, 29 ins, 13 del, 183 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_utt_offline/wer_10
%WER 9.45 [ 778 / 8234, 180 ins, 73 del, 525 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_dev93/wer_10
%WER 9.39 [ 773 / 8234, 163 ins, 79 del, 531 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_dev93_utt/wer_11
%WER 9.25 [ 762 / 8234, 174 ins, 69 del, 519 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_dev93_utt_offline/wer_10
%WER 6.57 [ 371 / 5643, 95 ins, 12 del, 264 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_eval92/wer_11
%WER 6.68 [ 377 / 5643, 102 ins, 13 del, 262 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_eval92_utt/wer_10
%WER 6.56 [ 370 / 5643, 100 ins, 12 del, 258 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_eval92_utt_offline/wer_10
# RNNLM n-best rescoring with Mikolov's model:
for x in exp/nnet2_online/nnet_ms_a_online/decode_*rnnlm.h300.voc40k; do grep WER $x/wer_* | utils/best_wer.sh ; done
%WER 5.60 [ 461 / 8234, 51 ins, 70 del, 340 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_rnnlm.h300.voc40k/wer_15_0.0
%WER 2.64 [ 149 / 5643, 21 ins, 13 del, 115 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_rnnlm.h300.voc40k/wer_11_0.5
%WER 8.16 [ 672 / 8234, 136 ins, 70 del, 466 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_dev93_rnnlm.h300.voc40k/wer_14_0.5
%WER 5.39 [ 304 / 5643, 74 ins, 16 del, 214 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_eval92_rnnlm.h300.voc40k/wer_17_0.5
# RNNLM lattice rescoring with Mikolov's model:
for x in exp/nnet2_online/nnet_ms_a_online/decode_*rnnlm.h300.voc40k_lat; do grep WER $x/wer_* | utils/best_wer.sh ; done
%WER 5.05 [ 416 / 8234, 47 ins, 72 del, 297 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_rnnlm.h300.voc40k_lat/wer_16_0.0
%WER 2.59 [ 146 / 5643, 19 ins, 14 del, 113 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_rnnlm.h300.voc40k_lat/wer_10_0.5
%WER 7.70 [ 634 / 8234, 133 ins, 67 del, 434 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_dev93_rnnlm.h300.voc40k_lat/wer_13_0.5
%WER 5.25 [ 296 / 5643, 81 ins, 14 del, 201 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_eval92_rnnlm.h300.voc40k_lat/wer_14_0.5
# RNNLM n-best rescoring with Yandex's model:
for x in exp/nnet2_online/nnet_ms_a_online/decode_*rnnlm-hs.nce20.h400.voc40k; do grep WER $x/wer_* | utils/best_wer.sh ; done
%WER 5.31 [ 437 / 8234, 50 ins, 66 del, 321 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_dev93_rnnlm-hs.nce20.h400.voc40k/wer_13_0.0
%WER 2.91 [ 164 / 5643, 24 ins, 9 del, 131 sub ] exp/nnet2_online/nnet_ms_a_online/decode_bd_tgpr_eval92_rnnlm-hs.nce20.h400.voc40k/wer_10_0.0
%WER 7.83 [ 645 / 8234, 159 ins, 50 del, 436 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_dev93_rnnlm-hs.nce20.h400.voc40k/wer_11_0.0
%WER 5.40 [ 305 / 5643, 77 ins, 16 del, 212 sub ] exp/nnet2_online/nnet_ms_a_online/decode_tgpr_eval92_rnnlm-hs.nce20.h400.voc40k/wer_13_1.0
# TDNN results:
for x in exp/nnet3/nnet_tdnn_a/decode_*; do grep WER $x/wer_* | utils/best_wer.sh ; done
%WER 7.19 [ 592 / 8234, 51 ins, 109 del, 432 sub ] exp/nnet3/nnet_tdnn_a/decode_bd_tgpr_dev93/wer_13_0.5
%WER 3.93 [ 222 / 5643, 23 ins, 20 del, 179 sub ] exp/nnet3/nnet_tdnn_a/decode_bd_tgpr_eval92/wer_10_1.0
%WER 9.78 [ 805 / 8234, 167 ins, 72 del, 566 sub ] exp/nnet3/nnet_tdnn_a/decode_tgpr_dev93/wer_10_0.0
%WER 6.40 [ 361 / 5643, 87 ins, 16 del, 258 sub ] exp/nnet3/nnet_tdnn_a/decode_tgpr_eval92/wer_10_1.0
# local/nnet3/run_lstm.sh
# LSTM results: cell_dim=1024, recurrent_projection_dim=non_recurrent_projection_dim=256,lstm_delay=-1 -2 -3, label_delay=5, num_params=11894059
%WER 7.32 exp/nnet3/lstm_ld5/decode_bd_tgpr_dev93/wer_11_0.0
%WER 4.24 exp/nnet3/lstm_ld5/decode_bd_tgpr_eval92/wer_10_1.0
%WER 9.57 exp/nnet3/lstm_ld5/decode_tgpr_dev93/wer_9_1.0
%WER 6.86 exp/nnet3/lstm_ld5/decode_tgpr_eval92/wer_10_1.0
# bidirectional LSTM
# -----------------------
# local/nnet3/run_lstm.sh --affix bidirectional \
# --lstm-delay " [-1,1] [-2,2] [-3,3] " \
# --label-delay 0 \
# --cell-dim 640 \
# --recurrent-projection-dim 128 \
# --non-recurrent-projection-dim 128 \
# --chunk-left-context 40 \
# --chunk-right-context 40
# num_params=11485739
%WER 6.81 exp/nnet3/lstm_bidirectional/decode_bd_tgpr_dev93/wer_11_0.0
%WER 4.27 exp/nnet3/lstm_bidirectional/decode_bd_tgpr_eval92/wer_11_0.0
%WER 9.29 exp/nnet3/lstm_bidirectional/decode_tgpr_dev93/wer_11_0.5
%WER 6.61 exp/nnet3/lstm_bidirectional/decode_tgpr_eval92/wer_11_1.0