Blame view
egs/tedlium/s5_r2/local/run_unk_model.sh
2.37 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
#!/bin/bash utils/lang/make_unk_lm.sh data/local/dict exp/unk_lang_model utils/prepare_lang.sh --unk-fst exp/unk_lang_model/unk_fst.txt data/local/dict "<unk>" data/local/lang data/lang_unk # note: it's important that the LM we built in data/lang/G.fst was created using # pocolm with the option --limit-unk-history=true (see ted_train_lm.sh). This # keeps the graph compact after adding the unk model (we only have to add one # copy of it). cp data/lang/G.fst data/lang_unk/G.fst utils/mkgraph.sh data/lang_unk exp/tri3 exp/tri3/graph_unk . ./cmd.sh ## Caution: if you use this unk-model stuff, be sure that the scoring script ## does not use lattice-align-words-lexicon, because it's not compatible with ## the unk-model. Instead you should use lattice-align-words (of course, this ## only works if you have position-dependent phones). decode_nj=30 for dset in dev test; do steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \ exp/tri3/graph_unk data/${dset} exp/tri3/decode_${dset}_unk steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \ data/${dset} exp/tri3/decode_${dset}_unk exp/tri3/decode_${dset}_unk_rescore done # # for x in exp/tri3/decode*; do grep Sum $x/*/*ys | utils/best_wer.sh ; done | grep -v old | grep -v si # # dev results. unk-model helps slightly before rescoring. # %WER 19.3 | 507 17783 | 83.7 11.6 4.7 3.0 19.3 91.5 | -0.076 | exp/tri3/decode_dev/score_17_0.0/ctm.filt.filt.sys # %WER 18.2 | 507 17783 | 84.8 10.7 4.5 3.0 18.2 91.3 | -0.111 | exp/tri3/decode_dev_rescore/score_16_0.0/ctm.filt.filt.sys # %WER 19.1 | 507 17783 | 83.7 11.3 5.1 2.8 19.1 91.9 | -0.044 | exp/tri3/decode_dev_unk/score_17_0.0/ctm.filt.filt.sys # %WER 18.2 | 507 17783 | 84.5 10.6 4.9 2.8 18.2 91.5 | -0.047 | exp/tri3/decode_dev_unk_rescore/score_15_0.0/ctm.filt.filt.sys # # dev results. unk-model helps slightly after rescoring. # %WER 17.3 | 1155 27500 | 85.0 11.5 3.5 2.4 17.3 86.9 | -0.035 | exp/tri3/decode_test/score_15_0.0/ctm.filt.filt.sys # %WER 16.6 | 1155 27500 | 85.8 11.0 3.2 2.4 16.6 86.4 | -0.098 | exp/tri3/decode_test_rescore/score_14_0.0/ctm.filt.filt.sys # %WER 17.3 | 1155 27500 | 84.9 11.3 3.8 2.2 17.3 87.4 | -0.015 | exp/tri3/decode_test_unk/score_15_0.0/ctm.filt.filt.sys # %WER 16.5 | 1155 27500 | 85.7 10.7 3.6 2.2 16.5 86.7 | -0.075 | exp/tri3/decode_test_unk_rescore/score_14_0.0/ctm.filt.filt.sys |