Blame view

egs/tedlium/s5/local/nnet/run_dnn_bn.sh 7.81 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
  #!/bin/bash
  #
  # Based mostly on the Switchboard recipe. The training database is TED-LIUM,
  # it consists of TED talks with cleaned automatic transcripts:
  #
  # http://www-lium.univ-lemans.fr/en/content/ted-lium-corpus
  # http://www.openslr.org/resources (Mirror).
  #
  # The data is distributed under 'Creative Commons BY-NC-ND 3.0' license,
  # which allow free non-commercial use, while only a citation is required.
  #
  # Copyright  2014 Nickolay V. Shmyrev 
  #            2015 Brno University of Technology (Author: Karel Vesely)
  #            2015 Alex Glubshev
  # Apache 2.0
  #
  
  . ./cmd.sh
  . ./path.sh
  
  nj=32
  njdec=11
  njfea=10
  
  # label,
  exp=BN
  
  # source data,
  ali_src=exp/tri3_ali
  graph_src=exp/tri3/graph
  
  # fbank features
  test=data-fbank/test
  train=data-fbank/train
  
  test_original=data/test
  train_original=data/train
  
  # bn features,
  test_bn=data-fbank-${exp}-bn/test
  train_bn=data-fbank-${exp}-bn/train
  
  # fmllr features,
  test_bn_fmllr=data-fbank-${exp}-bn-fmllr/test
  train_bn_fmllr=data-fbank-${exp}-bn-fmllr/train
  
  stage=0
  . utils/parse_options.sh # accept options
  
  # Make the kaldi FBANK+PITCH features,
  [ ! -e $test ] && if [ $stage -le 0 ]; then
    # Test set
    utils/copy_data_dir.sh $test_original $test || exit 1; rm $test/{cmvn,feats}.scp
    steps/make_fbank_pitch.sh --nj $njfea --cmd "$train_cmd" \
      $test $test/log $test/data || exit 1;
    steps/compute_cmvn_stats.sh $test $test/log $test/data || exit 1;  
    
    # Train set
    utils/copy_data_dir.sh $train_original $train || exit 1; rm $train/{cmvn,feats}.scp
    steps/make_fbank_pitch.sh --nj $njfea --cmd "$train_cmd" \
       $train $train/log $train/data || exit 1;
      steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1;
    
    # Split to training 90%, cv 10%
    utils/subset_data_dir_tr_cv.sh $train ${train}_tr90 ${train}_cv10 || exit 1;
  fi
  
  # Train the bottleneck network,
  lang=data/lang_test
  if [ $stage -le 1 ]; then
    dir=exp/dnn8a_${exp}_bn-feat
    ali=$ali_src
    $cuda_cmd $dir/log/train_nnet.log \
      steps/nnet/train.sh --hid-layers 2 --hid-dim 1500 --bn-dim 40 \
      --cmvn-opts "--norm-means=true --norm-vars=false" --feat-type traps \
      --splice 5 --traps-dct-basis 6 --learn-rate 0.008 \
      ${train}_tr90 ${train}_cv10 $lang $ali $ali $dir || exit 1
    
    # Decode test,
    steps/nnet/decode.sh --nj $njdec --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.10 \
      $graph_src $test $dir/decode_test || exit 1
  fi
  
  # Store the bottleneck features,
  if [ $stage -le 2 ]; then
    dir=exp/dnn8a_${exp}_bn-feat
    # dev
    steps/nnet/make_bn_feats.sh --nj $njfea --cmd "$train_cmd" $test_bn $test $dir $test_bn/log $test_bn/data || exit 1 
    steps/compute_cmvn_stats.sh $test_bn $test_bn/log $test_bn/data || exit 1;
    # train
    steps/nnet/make_bn_feats.sh --nj $njfea --cmd "$train_cmd" $train_bn $train $dir $train_bn/log $train_bn/data || exit 1
    steps/compute_cmvn_stats.sh $train_bn $train_bn/log $train_bn/data || exit 1;
  fi
  
  # Train GMM on bottleneck features,
  lang_test=data/lang_test
  if [ $stage -le 3 ]; then
    dir=exp/dnn8b_${exp}_bn-gmm
    # Train,
    # gmm on bn features, no cmvn, no lda-mllt,
    steps/train_deltas.sh --power 0.5 --boost-silence 1.5 --cmd "$train_cmd" \
      --delta-opts "--delta-order=0" \
      --cmvn-opts "--norm-means=false --norm-vars=false" \
      --beam 20 --retry-beam 80 \
      5000 80000 $train_bn $lang $ali_src $dir || exit 1
    # Decode,
    utils/mkgraph.sh $lang_test $dir $dir/graph || exit 1
    steps/decode.sh --nj $njdec --cmd "$decode_cmd" \
      --acwt 0.05 --beam 15.0 --lattice-beam 8.0 \
      $dir/graph $test_bn $dir/decode_$(basename $test_bn) || exit 1
    # Align,
    steps/align_fmllr.sh --boost-silence 1.5 --nj $nj --cmd "$train_cmd" \
      --beam 20 --retry-beam 80 \
      $train_bn $lang $dir ${dir}_ali || exit 1;
  fi
  
  # Train SAT-adapted GMM on bottleneck features,
  if [ $stage -le 4 ]; then
    dir=exp/dnn8c_${exp}_fmllr-gmm
    ali=exp/dnn8b_${exp}_bn-gmm_ali
    # Train,
    # fmllr-gmm system on bottleneck features, 
    # - no cmvn, put fmllr to the features directly (no lda),
    # - note1 : we don't need cmvn, similar effect has diagonal of fmllr transform,
    # - note2 : lda+mllt was causing a small hit <0.5%,
    steps/train_sat.sh --power 0.5 --boost-silence 1.5 --cmd "$train_cmd" \
      --beam 20 --retry-beam 80 \
      5000 80000 $train_bn $lang $ali $dir || exit 1
    # Decode,
    utils/mkgraph.sh $lang_test $dir $dir/graph || exit 1;
    steps/decode_fmllr.sh --nj $njdec --cmd "$decode_cmd" \
      --acwt 0.05 --beam 15.0 --lattice-beam 8.0 \
      $dir/graph $test_bn $dir/decode_$(basename $test_bn) || exit 1
    # Align,
    steps/align_fmllr.sh --boost-silence 1.5 --nj $nj --cmd "$train_cmd" \
      --beam 20 --retry-beam 80 \
      $train_bn $lang $dir ${dir}_ali || exit 1;
  fi
  
  # Store the bottleneck-FMLLR features,
  gmm=exp/dnn8c_${exp}_fmllr-gmm # fmllr-feats, dnn-targets,
  graph=$gmm/graph
  if [ $stage -le 5 ]; then
    # Dev_set
    steps/nnet/make_fmllr_feats.sh --nj $njfea --cmd "$train_cmd" \
       --transform-dir $gmm/decode_$(basename $test_bn) \
       $test_bn_fmllr $test_bn $gmm $test_bn_fmllr/log $test_bn_fmllr/data || exit 1;
    # Training set
    steps/nnet/make_fmllr_feats.sh --nj $njfea --cmd "$train_cmd --max-jobs-run 10" \
       --transform-dir ${gmm}_ali \
       $train_bn_fmllr $train_bn $gmm $train_bn_fmllr/log $train_bn_fmllr/data || exit 1;
    # Split the training set
    utils/subset_data_dir_tr_cv.sh --cv-spk-percent 10 $train_bn_fmllr ${train_bn_fmllr}_tr90 ${train_bn_fmllr}_cv10
  fi
  
  #------------------------------------------------------------------------------------
  # Pre-train stack of RBMs (6 layers, 2048 units),
  if [ $stage -le 6 ]; then
    dir=exp/dnn8d_${exp}_pretrain-dbn; mkdir -p $dir
    # Create input transform, splice 13 frames [ -10 -5..+5 +10 ],
    echo "<Splice> <InputDim> 40 <OutputDim> 520 <BuildVector> -10 -5:1:5 10 </BuildVector>" >$dir/proto.main
    $cuda_cmd $dir/log/pretrain_dbn.log \
      steps/nnet/pretrain_dbn.sh --feature-transform-proto $dir/proto.main \
      $train_bn_fmllr $dir || exit 1
  fi
  
  #------------------------------------------------------------------------------------
  # Train the DNN optimizing cross-entropy,
  if [ $stage -le 7 ]; then
    dir=exp/dnn8e_${exp}_pretrain-dbn_dnn
    ali=${gmm}_ali
    feature_transform=exp/dnn8d_${exp}_pretrain-dbn/final.feature_transform # re-use
    dbn=exp/dnn8d_${exp}_pretrain-dbn/6.dbn # re-use
    # Train  
    $cuda_cmd $dir/log/train_nnet.log \
      steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
      ${train_bn_fmllr}_tr90 ${train_bn_fmllr}_cv10 $lang $ali $ali $dir || exit 1;
    # Decode test
    steps/nnet/decode.sh --nj $njdec --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.10 \
      $graph $test_bn_fmllr $dir/decode_$(basename $test_bn_fmllr) || exit 1
  fi
  
  #------------------------------------------------------------------------------------
  # Finally we optimize sMBR criterion, we do Stochastic-GD with per-utterance updates, 
  dir=exp/dnn8f_${exp}_pretrain-dbn_dnn_smbr
  srcdir=exp/dnn8e_${exp}_pretrain-dbn_dnn
  acwt=0.1
  #
  if [ $stage -le 8 ]; then
    # Generate lattices and alignments
    steps/nnet/align.sh --nj $nj --cmd "$train_cmd" \
      $train_bn_fmllr $lang $srcdir ${srcdir}_ali || exit 1;
    steps/nnet/make_denlats.sh --nj $nj --cmd "$decode_cmd" --acwt $acwt \
      $train_bn_fmllr $lang $srcdir ${srcdir}_denlats  || exit 1;
  fi
  if [ $stage -le 9 ]; then
    # Do 4 epochs of sMBR (leaving out all silence frames and compensating insertions), 
    steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt \
      --do-smbr true --exclude-silphones true --one-silence-class true \
      $train_bn_fmllr $lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
    # Decode test,
    for ITER in 1 2 3 4; do
      steps/nnet/decode.sh --nj $njdec --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
        --nnet $dir/${ITER}.nnet \
        $graph $test_bn_fmllr $dir/decode_$(basename $test_bn_fmllr)_it${ITER} || exit 1
    done
  fi 
  
  echo $0 successs.
  exit 0