Blame view

egs/wsj/s5/local/run_bnf_sgmm.sh 6.55 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
  #!/bin/bash
  
  # This script builds the SGMM system on top of the kaldi internal bottleneck features.
  
  . ./cmd.sh
  
  set -e
  set -o pipefail
  set -u
  
  # Set my_nj; typically 64.
  numLeaves=2500
  numGauss=15000
  numLeavesSGMM=10000
  bnf_num_gauss_ubm=600
  bnf_num_gauss_sgmm=7000
  align_dir=exp/tri4b_ali_si284
  bnf_decode_acwt=0.0357
  sgmm_group_extra_opts=(--group 3 --cmd "queue.pl --mem 7G")
  
  if [ ! -d exp_bnf ]; then
    echo "$0: before running this script, please run local/run_bnf.sh"
    exit 1;
  fi
  
  echo ---------------------------------------------------------------------
  echo "Starting exp_bnf/tri5 on" `date`
  echo ---------------------------------------------------------------------
  if [ ! exp_bnf/tri5/.done -nt data_bnf/train/.done ]; then
    steps/train_lda_mllt.sh --splice-opts "--left-context=1 --right-context=1" \
      --dim 60  --cmd "$train_cmd" \
      $numLeaves $numGauss data_bnf/train data/lang $align_dir exp_bnf/tri5 ;
    touch exp_bnf/tri5/.done
  fi
  
  echo ---------------------------------------------------------------------
  echo "Starting exp_bnf/tri6 on" `date`
  echo ---------------------------------------------------------------------
  if [ ! exp_bnf/tri6/.done -nt exp_bnf/tri5/.done ]; then
    steps/train_sat.sh  --cmd "$train_cmd" \
      $numLeaves $numGauss data_bnf/train data/lang exp_bnf/tri5 exp_bnf/tri6
    touch exp_bnf/tri6/.done
  fi
  echo ---------------------------------------------------------------------
  echo "Decoding with SAT models on top of bottleneck features on" `date`
  echo ---------------------------------------------------------------------
  decode1=exp_bnf/tri6/decode_bd_tgpr_eval92
  decode2=exp_bnf/tri6/decode_bd_tgpr_dev93
  utils/mkgraph.sh \
    data/lang_test_bd_tgpr exp_bnf/tri6 exp_bnf/tri6/graph_bd_tgpr |tee exp_bnf/tri6/mkgraph.log
  
  mkdir -p $decode1 $decode2
  #By default, we do not care about the lattices for this step -- we just want the transforms
  #Therefore, we will reduce the beam sizes, to reduce the decoding times
  steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4 \
    --acwt $bnf_decode_acwt \
    exp_bnf/tri6/graph_bd_tgpr data_bnf/eval92 ${decode1} |tee ${decode1}/decode.log
  steps/decode_fmllr_extra.sh --skip-scoring true --beam 10 --lattice-beam 4 \
    --acwt $bnf_decode_acwt \
    exp_bnf/tri6/graph_bd_tgpr data_bnf/dev93 ${decode2} |tee ${decode2}/decode.log
  
  echo ---------------------------------------------------------------------
  echo "Starting exp_bnf/ubm7 on" `date`
  echo ---------------------------------------------------------------------
  if [ ! exp_bnf/ubm7/.done -nt exp_bnf/tri6/.done ]; then
    steps/train_ubm.sh \
      $bnf_num_gauss_ubm data_bnf/train data/lang exp_bnf/tri6 exp_bnf/ubm7
    touch exp_bnf/ubm7/.done
  fi
  
  if [ ! exp_bnf/sgmm7/.done -nt exp_bnf/ubm7/.done ]; then
    echo ---------------------------------------------------------------------
    echo "Starting exp_bnf/sgmm7 on" `date`
    echo ---------------------------------------------------------------------
    steps/train_sgmm2_group.sh \
      "${sgmm_group_extra_opts[@]}"\
      $numLeavesSGMM $bnf_num_gauss_sgmm data_bnf/train data/lang \
      exp_bnf/tri6 exp_bnf/ubm7/final.ubm exp_bnf/sgmm7
    touch exp_bnf/sgmm7/.done
  fi
  
  ## SGMM2 decoding
  decode1=exp_bnf/sgmm7/decode_bd_tgpr_eval92
  decode2=exp_bnf/sgmm7/decode_bd_tgpr_dev93
    echo ---------------------------------------------------------------------
    echo "Spawning $decode1 and $decode2 on" `date`
    echo ---------------------------------------------------------------------
    utils/mkgraph.sh \
      data/lang_test_bd_tgpr exp_bnf/sgmm7 exp_bnf/sgmm7/graph_bd_tgpr |tee exp_bnf/sgmm7/mkgraph.log
  
    mkdir -p $decode1 $decode2
    steps/decode_sgmm2.sh --skip-scoring false --use-fmllr true \
      --acwt $bnf_decode_acwt --scoring-opts "--min-lmwt 20 --max-lmwt 40"  --cmd "$decode_cmd" \
      --transform-dir exp_bnf/tri6/decode_bd_tgpr_eval92 \
      exp_bnf/sgmm7/graph_bd_tgpr data_bnf/eval92 $decode1 |tee $decode1/decode.log
    steps/decode_sgmm2.sh --skip-scoring false --use-fmllr true \
      --acwt $bnf_decode_acwt --scoring-opts "--min-lmwt 20 --max-lmwt 40"  --cmd "$decode_cmd" \
      --transform-dir exp_bnf/tri6/decode_bd_tgpr_dev93 \
      exp_bnf/sgmm7/graph_bd_tgpr data_bnf/dev93 $decode2 |tee $decode2/decode.log
  
  if [ ! exp_bnf/sgmm7_ali/.done -nt exp_bnf/sgmm7/.done ]; then
    echo ---------------------------------------------------------------------
    echo "Starting exp_bnf/sgmm7_ali on" `date`
    echo ---------------------------------------------------------------------
    steps/align_sgmm2.sh \
      --transform-dir exp_bnf/tri6 --nj 30 --use-graphs true \
      data_bnf/train data/lang exp_bnf/sgmm7 exp_bnf/sgmm7_ali
    touch exp_bnf/sgmm7_ali/.done
  fi
  
  if [ ! exp_bnf/sgmm7_denlats/.done -nt exp_bnf/sgmm7/.done ]; then
    echo ---------------------------------------------------------------------
    echo "Starting exp_bnf/sgmm5_denlats on" `date`
    echo ---------------------------------------------------------------------
    steps/make_denlats_sgmm2.sh \
       "${sgmm_denlats_extra_opts[@]}" \
      --transform-dir exp_bnf/tri6 --nj 30 --beam 14.0 --acwt $bnf_decode_acwt --lattice-beam 8 \
       data_bnf/train data/lang exp_bnf/sgmm7_ali exp_bnf/sgmm7_denlats
    touch exp_bnf/sgmm7_denlats/.done
  fi
  
  if [ ! exp_bnf/sgmm7_mmi_b0.1/.done -nt exp_bnf/sgmm7_denlats/.done ]; then
    steps/train_mmi_sgmm2.sh \
      --acwt $bnf_decode_acwt \
      --transform-dir exp_bnf/tri6 --boost 0.1 --drop-frames true \
      data_bnf/train data/lang exp_bnf/sgmm7_ali exp_bnf/sgmm7_denlats \
      exp_bnf/sgmm7_mmi_b0.1
    touch exp_bnf/sgmm7_mmi_b0.1/.done;
  fi
  
  ## SGMM_MMI rescoring
  for iter in 1 2 3 4; do
    # Decode SGMM+MMI (via rescoring).
    decode1=exp_bnf/sgmm7_mmi_b0.1/decode_bd_tgpr_eval92_it$iter
    mkdir -p $decode1
    steps/decode_sgmm2_rescore.sh  --skip-scoring false --cmd "$decode_cmd" \
      --iter $iter --transform-dir exp_bnf/tri6/decode_bd_tgpr_eval92 --scoring-opts "--min-lmwt 20 --max-lmwt 40" \
    data/lang_test_bd_tgpr data_bnf/eval92 exp_bnf/sgmm7/decode_bd_tgpr_eval92 $decode1 | tee ${decode1}/decode.log
  done
  
  for iter in 1 2 3 4; do
    # Decode SGMM+MMI (via rescoring).
    decode2=exp_bnf/sgmm7_mmi_b0.1/decode_bd_tgpr_dev93_it$iter
    mkdir -p $decode2
    steps/decode_sgmm2_rescore.sh  --skip-scoring false --cmd "$decode_cmd" \
      --iter $iter --transform-dir exp_bnf/tri6/decode_bd_tgpr_dev93 --scoring-opts "--min-lmwt 20 --max-lmwt 40" \
    data/lang_test_bd_tgpr data_bnf/dev93 exp_bnf/sgmm7/decode_bd_tgpr_dev93 $decode2 | tee ${decode2}/decode.log
  done
  
  echo ---------------------------------------------------------------------
  echo "Finished successfully on" `date`
  echo ---------------------------------------------------------------------
  
  #exit 1