Blame view
egs/wsj/s5/local/run_bnf_sgmm.sh
6.55 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
#!/bin/bash # This script builds the SGMM system on top of the kaldi internal bottleneck features. . ./cmd.sh set -e set -o pipefail set -u # Set my_nj; typically 64. numLeaves=2500 numGauss=15000 numLeavesSGMM=10000 bnf_num_gauss_ubm=600 bnf_num_gauss_sgmm=7000 align_dir=exp/tri4b_ali_si284 bnf_decode_acwt=0.0357 sgmm_group_extra_opts=(--group 3 --cmd "queue.pl --mem 7G") if [ ! -d exp_bnf ]; then echo "$0: before running this script, please run local/run_bnf.sh" exit 1; fi echo --------------------------------------------------------------------- echo "Starting exp_bnf/tri5 on" `date` echo --------------------------------------------------------------------- if [ ! exp_bnf/tri5/.done -nt data_bnf/train/.done ]; then steps/train_lda_mllt.sh --splice-opts "--left-context=1 --right-context=1" \ --dim 60 --cmd "$train_cmd" \ $numLeaves $numGauss data_bnf/train data/lang $align_dir exp_bnf/tri5 ; touch exp_bnf/tri5/.done fi echo --------------------------------------------------------------------- echo "Starting exp_bnf/tri6 on" `date` echo --------------------------------------------------------------------- if [ ! exp_bnf/tri6/.done -nt exp_bnf/tri5/.done ]; then steps/train_sat.sh --cmd "$train_cmd" \ $numLeaves $numGauss data_bnf/train data/lang exp_bnf/tri5 exp_bnf/tri6 touch exp_bnf/tri6/.done fi echo --------------------------------------------------------------------- echo "Decoding with SAT models on top of bottleneck features on" `date` echo --------------------------------------------------------------------- decode1=exp_bnf/tri6/decode_bd_tgpr_eval92 decode2=exp_bnf/tri6/decode_bd_tgpr_dev93 utils/mkgraph.sh \ data/lang_test_bd_tgpr exp_bnf/tri6 exp_bnf/tri6/graph_bd_tgpr |tee exp_bnf/tri6/mkgraph.log mkdir -p $decode1 $decode2 #By default, we do not care about the lattices for this step -- we just want the transforms #Therefore, we will reduce the beam sizes, to reduce the decoding times steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4 \ --acwt $bnf_decode_acwt \ exp_bnf/tri6/graph_bd_tgpr data_bnf/eval92 ${decode1} |tee ${decode1}/decode.log steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4 \ --acwt $bnf_decode_acwt \ exp_bnf/tri6/graph_bd_tgpr data_bnf/dev93 ${decode2} |tee ${decode2}/decode.log echo --------------------------------------------------------------------- echo "Starting exp_bnf/ubm7 on" `date` echo --------------------------------------------------------------------- if [ ! exp_bnf/ubm7/.done -nt exp_bnf/tri6/.done ]; then steps/train_ubm.sh \ $bnf_num_gauss_ubm data_bnf/train data/lang exp_bnf/tri6 exp_bnf/ubm7 touch exp_bnf/ubm7/.done fi if [ ! exp_bnf/sgmm7/.done -nt exp_bnf/ubm7/.done ]; then echo --------------------------------------------------------------------- echo "Starting exp_bnf/sgmm7 on" `date` echo --------------------------------------------------------------------- steps/train_sgmm2_group.sh \ "${sgmm_group_extra_opts[@]}"\ $numLeavesSGMM $bnf_num_gauss_sgmm data_bnf/train data/lang \ exp_bnf/tri6 exp_bnf/ubm7/final.ubm exp_bnf/sgmm7 touch exp_bnf/sgmm7/.done fi ## SGMM2 decoding decode1=exp_bnf/sgmm7/decode_bd_tgpr_eval92 decode2=exp_bnf/sgmm7/decode_bd_tgpr_dev93 echo --------------------------------------------------------------------- echo "Spawning $decode1 and $decode2 on" `date` echo --------------------------------------------------------------------- utils/mkgraph.sh \ data/lang_test_bd_tgpr exp_bnf/sgmm7 exp_bnf/sgmm7/graph_bd_tgpr |tee exp_bnf/sgmm7/mkgraph.log mkdir -p $decode1 $decode2 steps/decode_sgmm2.sh --skip-scoring false --use-fmllr true \ --acwt $bnf_decode_acwt --scoring-opts "--min-lmwt 20 --max-lmwt 40" --cmd "$decode_cmd" \ --transform-dir exp_bnf/tri6/decode_bd_tgpr_eval92 \ exp_bnf/sgmm7/graph_bd_tgpr data_bnf/eval92 $decode1 |tee $decode1/decode.log steps/decode_sgmm2.sh --skip-scoring false --use-fmllr true \ --acwt $bnf_decode_acwt --scoring-opts "--min-lmwt 20 --max-lmwt 40" --cmd "$decode_cmd" \ --transform-dir exp_bnf/tri6/decode_bd_tgpr_dev93 \ exp_bnf/sgmm7/graph_bd_tgpr data_bnf/dev93 $decode2 |tee $decode2/decode.log if [ ! exp_bnf/sgmm7_ali/.done -nt exp_bnf/sgmm7/.done ]; then echo --------------------------------------------------------------------- echo "Starting exp_bnf/sgmm7_ali on" `date` echo --------------------------------------------------------------------- steps/align_sgmm2.sh \ --transform-dir exp_bnf/tri6 --nj 30 --use-graphs true \ data_bnf/train data/lang exp_bnf/sgmm7 exp_bnf/sgmm7_ali touch exp_bnf/sgmm7_ali/.done fi if [ ! exp_bnf/sgmm7_denlats/.done -nt exp_bnf/sgmm7/.done ]; then echo --------------------------------------------------------------------- echo "Starting exp_bnf/sgmm5_denlats on" `date` echo --------------------------------------------------------------------- steps/make_denlats_sgmm2.sh \ "${sgmm_denlats_extra_opts[@]}" \ --transform-dir exp_bnf/tri6 --nj 30 --beam 14.0 --acwt $bnf_decode_acwt --lattice-beam 8 \ data_bnf/train data/lang exp_bnf/sgmm7_ali exp_bnf/sgmm7_denlats touch exp_bnf/sgmm7_denlats/.done fi if [ ! exp_bnf/sgmm7_mmi_b0.1/.done -nt exp_bnf/sgmm7_denlats/.done ]; then steps/train_mmi_sgmm2.sh \ --acwt $bnf_decode_acwt \ --transform-dir exp_bnf/tri6 --boost 0.1 --drop-frames true \ data_bnf/train data/lang exp_bnf/sgmm7_ali exp_bnf/sgmm7_denlats \ exp_bnf/sgmm7_mmi_b0.1 touch exp_bnf/sgmm7_mmi_b0.1/.done; fi ## SGMM_MMI rescoring for iter in 1 2 3 4; do # Decode SGMM+MMI (via rescoring). decode1=exp_bnf/sgmm7_mmi_b0.1/decode_bd_tgpr_eval92_it$iter mkdir -p $decode1 steps/decode_sgmm2_rescore.sh --skip-scoring false --cmd "$decode_cmd" \ --iter $iter --transform-dir exp_bnf/tri6/decode_bd_tgpr_eval92 --scoring-opts "--min-lmwt 20 --max-lmwt 40" \ data/lang_test_bd_tgpr data_bnf/eval92 exp_bnf/sgmm7/decode_bd_tgpr_eval92 $decode1 | tee ${decode1}/decode.log done for iter in 1 2 3 4; do # Decode SGMM+MMI (via rescoring). decode2=exp_bnf/sgmm7_mmi_b0.1/decode_bd_tgpr_dev93_it$iter mkdir -p $decode2 steps/decode_sgmm2_rescore.sh --skip-scoring false --cmd "$decode_cmd" \ --iter $iter --transform-dir exp_bnf/tri6/decode_bd_tgpr_dev93 --scoring-opts "--min-lmwt 20 --max-lmwt 40" \ data/lang_test_bd_tgpr data_bnf/dev93 exp_bnf/sgmm7/decode_bd_tgpr_dev93 $decode2 | tee ${decode2}/decode.log done echo --------------------------------------------------------------------- echo "Finished successfully on" `date` echo --------------------------------------------------------------------- #exit 1 |