Blame view

egs/sprakbanken/s5/local/sprak_run_sgmm2.sh 5.89 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
  #!/bin/bash
  
  # This script is invoked from ../run.sh
  # It contains some SGMM-related scripts that I am breaking out of the main run.sh for clarity.
  
  . ./cmd.sh
  
  # Note: you might want to try to give the option --spk-dep-weights=false to train_sgmm2.sh;
  # this takes out the "symmetric SGMM" part which is not always helpful.
  
  
  test=$1
  
  if [ ! -d exp/tri4b_ali ]; then
    steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
      data/train data/lang exp/tri4b exp/tri4b_ali || exit 1;
  fi
    steps/train_ubm.sh --cmd "$train_cmd" \
      400 data/train data/lang exp/tri4b_ali exp/ubm5a || exit 1;
  
    steps/train_sgmm2.sh --cmd "$train_cmd" \
      7000 9000 data/train data/lang exp/tri4b_ali \
      exp/ubm5a/final.ubm exp/sgmm2_5a || exit 1;
  
    (
      utils/mkgraph.sh data/lang_test_3g exp/sgmm2_5a exp/sgmm2_5a/graph_3g
      steps/decode_sgmm2.sh --nj 7 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_3g_${test} \
        exp/sgmm2_5a/graph_3g data/${test} exp/sgmm2_5a/decode_3g_${test}
    ) &
  
    steps/align_sgmm2.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali \
      --use-graphs true --use-gselect true data/train data/lang exp/sgmm2_5a exp/sgmm2_5a_ali || exit 1;
    steps/make_denlats_sgmm2.sh --nj 30 --sub-split 2 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali \
      data/train data/lang exp/sgmm2_5a_ali exp/sgmm2_5a_denlats
  
    wait
  
    steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali --boost 0.1 \
      data/train data/lang exp/sgmm2_5a_ali exp/sgmm2_5a_denlats exp/sgmm2_5a_mmi_b0.1
  
    wait
  
    for iter in 1 2 3 4; do
      steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
        --transform-dir exp/tri4b/decode_3g_${test} data/lang_test_3g data/${test} exp/sgmm2_5a/decode_3g_${test} \
        exp/sgmm2_5a_mmi_b0.1/decode_3g_${test}_it$iter &
    done
  
    wait
  
    steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali --boost 0.1 \
     --update-opts "--cov-min-value=0.9" data/train data/lang exp/sgmm2_5a_ali exp/sgmm2_5a_denlats exp/sgmm2_5a_mmi_b0.1_m0.9
  
     wait
  
    for iter in 1 2 3 4; do
      steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
        --transform-dir exp/tri4b/decode_3g_${test} data/lang_test_3g data/${test} exp/sgmm2_5a/decode_3g_${test} \
        exp/sgmm2_5a_mmi_b0.1_m0.9/decode_3g_${test}_it$iter &
    done
  
  
  
  
  # The next commands are the same thing on all the si284 data.
  
  # SGMM system on the si284 data [sgmm5b]
    steps/train_ubm.sh --cmd "$train_cmd" \
      600 data/train data/lang exp/tri4b_ali exp/ubm5b || exit 1;
  
    steps/train_sgmm2.sh --cmd "$train_cmd" \
     11000 25000 data/train data/lang exp/tri4b_ali \
      exp/ubm5b/final.ubm exp/sgmm2_5b || exit 1;
  
    (
      utils/mkgraph.sh data/lang_test_3g exp/sgmm2_5b exp/sgmm2_5b/graph_3g
      steps/decode_sgmm2.sh --nj 7 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_3g_${test} \
        exp/sgmm2_5b/graph_3g data/${test} exp/sgmm2_5b/decode_3g_${test}
  #    steps/decode_sgmm2.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_tgpr_eval92 \
  #      exp/sgmm2_5b/graph_tgpr data/test_eval92 exp/sgmm2_5b/decode_tgpr_eval92
  
      utils/mkgraph.sh data/lang_test_4g exp/sgmm2_5b exp/sgmm2_5b/graph_4g || exit 1;
      steps/decode_sgmm2.sh --nj 7 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_4g_${test} \
        exp/sgmm2_5b/graph_4g data/${test} exp/sgmm2_5b/decode_4g_${test}
  #    steps/decode_sgmm2.sh --nj 8 --cmd "$decode_cmd" --transform-dir exp/tri4b/decode_bd_tgpr_eval92 \
  #      exp/sgmm2_5b/graph_bd_tgpr data/test_eval92 exp/sgmm2_5b/decode_bd_tgpr_eval92
    ) &
  
  
   # This shows how you would build and test a quinphone SGMM2 system, but
    (
     steps/train_sgmm2.sh --cmd "$train_cmd" \
        --context-opts "--context-width=5 --central-position=2" \
      11000 25000 data/train data/lang exp/tri4b_ali \
       exp/ubm5b/final.ubm exp/sgmm2_5c || exit 1;
     # Decode from lattices in exp/sgmm2_5b
      steps/decode_sgmm2_fromlats.sh --cmd "$decode_cmd"  --transform-dir exp/tri4b/decode_3g_${test} \
         data/${test} data/lang_test_3g exp/sgmm2_5b/decode_3g_${test} exp/sgmm2_5c/decode_3g_${test} &
      steps/decode_sgmm2_fromlats.sh --cmd "$decode_cmd"  --transform-dir exp/tri4b/decode_4g_${test} \
         data/${test} data/lang_test_4g exp/sgmm2_5b/decode_4g_${test} exp/sgmm2_5c/decode_4g_${test}
    ) &
  
  wait
  
    steps/align_sgmm2.sh --nj 30 --cmd "$train_cmd" --transform-dir exp/tri4b_ali \
      --use-graphs true --use-gselect true data/train data/lang exp/sgmm2_5b exp/sgmm2_5b_ali
  
    steps/make_denlats_sgmm2.sh --nj 30 --sub-split 2 --cmd "$decode_cmd" --transform-dir exp/tri4b_ali \
      data/train data/lang exp/sgmm2_5b_ali exp/sgmm2_5b_denlats
  
    wait
  
    steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali --boost 0.1 \
      data/train data/lang exp/sgmm2_5b_ali exp/sgmm2_5b_denlats exp/sgmm2_5b_mmi_b0.1
  
    for iter in 1 2 3 4; do
      for test in ${test}; do # dev93
        steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
          --transform-dir exp/tri4b/decode_4g_${test} data/lang_test_4g data/${test} exp/sgmm2_5b/decode_4g_${test} \
          exp/sgmm2_5b_mmi_b0.1/decode_4g_${test}_it$iter &
       done
    done
  
    wait
  
    steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" --transform-dir exp/tri4b_ali --boost 0.1 \
      --drop-frames true data/train data/lang exp/sgmm2_5b_ali exp/sgmm2_5b_denlats exp/sgmm2_5b_mmi_b0.1_z
  
    for iter in 1 2 3 4; do
      for test in test ${test}; do
        steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
          --transform-dir exp/tri4b/decode_4g_${test} data/lang_test_4g data/${test} exp/sgmm2_5b/decode_4g_${test} \
          exp/sgmm2_5b_mmi_b0.1_z/decode_4g_${test}_it$iter &
       done
    done
  
  wait
  
  # Examples of combining some of the best decodings: SGMM+MMI with
  # MMI+fMMI on a conventional system.
  
  local/score_combine.sh data/${test} \
     data/lang_test_4g \
     exp/tri4b_fmmi_a/decode_3g_${test}_it8 \
     exp/sgmm2_5b_mmi_b0.1/decode_4g_${test}_it3 \
     exp/combine_tri4b_fmmi_a_sgmm2_5b_mmi_b0.1/decode_4g_${test}_it8_3