04e_train_sgmm.sh
1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/sh
. 00_init_paths.sh
# TODO :
# - AM : quinphones, MLP ....
# - LM : perplexity on dev, native 4g, rnnLM
# SGMM ---> 19.52
steps/train_ubm.sh 600 data/train lang_train exp/tri3b_ali exp/ubm5b2 || exit 1;
steps/train_sgmm2.sh 11000 25000 data/train lang_train exp/tri3b_ali exp/ubm5b2/final.ubm exp/sgmm2_5b2 || exit 1;
utils/mkgraph.sh lang exp/sgmm2_5b2 exp/sgmm2_5b2/graph
steps/decode_sgmm2.sh --nj 6 --transform-dir exp/tri3b/decode_dev exp/sgmm2_5b2/graph data/dev exp/sgmm2_5b2/decode_dev
steps/align_sgmm2.sh --nj 20 --transform-dir exp/tri3b_ali --use-graphs true --use-gselect true data/train lang_train exp/sgmm2_5b2 exp/sgmm2_5b2_ali || exit 1;
steps/make_denlats_sgmm2.sh --nj 20 --sub-split 20 --transform-dir exp/tri3b_ali data/train lang_train exp/sgmm2_5b2_ali exp/sgmm2_5b2_denlats || exit 1;
steps/train_mmi_sgmm2.sh --transform-dir exp/tri3b_ali --boost 0.1 data/train lang_train exp/sgmm2_5b2_ali exp/sgmm2_5b2_denlats exp/sgmm2_5b2_mmi_b0.1 || exit 1;
for iter in 1 2 3 4; do
steps/decode_sgmm2_rescore.sh --iter $iter --transform-dir exp/tri3b/decode_dev lang data/dev exp/sgmm2_5b2/decode_dev exp/sgmm2_5b2_mmi_b0.1/decode_dev_it$iter
done
steps/train_mmi_sgmm2.sh --transform-dir exp/tri3b_ali --boost 0.1 --zero-if-disjoint true data/train lang_train exp/sgmm2_5b2_ali exp/sgmm2_5b2_denlats exp/sgmm2_5b2_mmi_b0.1_z
for iter in 1 2 3 4; do
steps/decode_sgmm2_rescore.sh --iter $iter --transform-dir exp/tri3b/decode_dev lang data/dev exp/sgmm2_5b2/decode_dev exp/sgmm2_5b2_mmi_b0.1_z/decode_dev_it$iter
done
# MBR ---> 19.49
cp -r -T exp/sgmm2_5b2_mmi_b0.1/decode_dev_it3{,.mbr}
local/score_mbr.sh data/dev lang exp/sgmm2_5b2_mmi_b0.1/decode_dev_it3.mbr
# SGMM+MMI+fMMI ---> 18.00
local/score_combine.sh data/dev lang exp/tri3b_fmmi_indirect/decode_dev_it3 exp/sgmm2_5b_mmi_b0.1/decode_dev_it3 exp/combine_tri3b_fmmi_indirect_sgmm2_5b_mmi_b0.1/decode_dev_it8_3