run_mmi.sh
1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/bash
. cmd.sh
set -e
dev_set=
# Thish runs on all the data data/train_nodup; the triphone baseline, tri4 is
# also trained on that set
if [ -e data/train_dev ]; then
dev_set=train_dev
fi
### MMI training
# MMI training starting from the LDA+MLLT+SAT systems with the entire train_nodup (233hr)
steps/make_denlats.sh --nj 50 --cmd "$decode_cmd" --config conf/decode.config \
--transform-dir exp/tri4_ali_nodup \
data/train_nodup data/lang exp/tri4 exp/tri4_denlats_nodup
# 4 iterations of MMI seems to work well overall. The number of iterations is
# used as an explicit argument even though train_mmi.sh will use 4 iterations by
# default.
num_mmi_iters=4
steps/train_mmi.sh --cmd "$decode_cmd" --boost 0.1 --num-iters $num_mmi_iters \
data/train_nodup data/lang exp/tri4_{ali,denlats}_nodup exp/tri4_mmi_b0.1
for eval_num in eval1 eval2 eval3 $dev_set ; do
for iter in 1 2 3 4; do
graph_dir=exp/tri4/graph_csj_tg
decode_dir=exp/tri4_mmi_b0.1/decode_${eval_num}_${iter}.mdl_csj
steps/decode.sh --nj 10 --cmd "$decode_cmd" --config conf/decode.config \
--iter $iter --transform-dir exp/tri4/decode_${eval_num}_csj \
$graph_dir data/$eval_num $decode_dir
done
done
wait
# Now do fMMI+MMI training
steps/train_diag_ubm.sh --silence-weight 0.5 --nj 50 --cmd "$train_cmd" \
700 data/train_nodup data/lang exp/tri4_ali_nodup exp/tri4_dubm
steps/train_mmi_fmmi.sh --learning-rate 0.005 --boost 0.1 --cmd "$train_cmd" \
data/train_nodup data/lang exp/tri4_ali_nodup exp/tri4_dubm \
exp/tri4_denlats_nodup exp/tri4_fmmi_b0.1
for eval_num in eval1 eval2 eval3 $dev_set ; do
for iter in 4 5 6 7 8; do
graph_dir=exp/tri4/graph_csj_tg
decode_dir=exp/tri4_fmmi_b0.1/decode_${eval_num}_it${iter}_csj
steps/decode_fmmi.sh --nj 10 --cmd "$decode_cmd" --iter $iter \
--transform-dir exp/tri4/decode_${eval_num}_csj \
--config conf/decode.config $graph_dir data/${eval_num} $decode_dir
done
done
wait