run_dnn_bn.sh
7.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#!/bin/bash
#
# Based mostly on the Switchboard recipe. The training database is TED-LIUM,
# it consists of TED talks with cleaned automatic transcripts:
#
# http://www-lium.univ-lemans.fr/en/content/ted-lium-corpus
# http://www.openslr.org/resources (Mirror).
#
# The data is distributed under 'Creative Commons BY-NC-ND 3.0' license,
# which allow free non-commercial use, while only a citation is required.
#
# Copyright 2014 Nickolay V. Shmyrev
# 2015 Brno University of Technology (Author: Karel Vesely)
# 2015 Alex Glubshev
# Apache 2.0
#
. ./cmd.sh
. ./path.sh
nj=32
njdec=11
njfea=10
# label,
exp=BN
# source data,
ali_src=exp/tri3_ali
graph_src=exp/tri3/graph
# fbank features
test=data-fbank/test
train=data-fbank/train
test_original=data/test
train_original=data/train
# bn features,
test_bn=data-fbank-${exp}-bn/test
train_bn=data-fbank-${exp}-bn/train
# fmllr features,
test_bn_fmllr=data-fbank-${exp}-bn-fmllr/test
train_bn_fmllr=data-fbank-${exp}-bn-fmllr/train
stage=0
. utils/parse_options.sh # accept options
# Make the kaldi FBANK+PITCH features,
[ ! -e $test ] && if [ $stage -le 0 ]; then
# Test set
utils/copy_data_dir.sh $test_original $test || exit 1; rm $test/{cmvn,feats}.scp
steps/make_fbank_pitch.sh --nj $njfea --cmd "$train_cmd" \
$test $test/log $test/data || exit 1;
steps/compute_cmvn_stats.sh $test $test/log $test/data || exit 1;
# Train set
utils/copy_data_dir.sh $train_original $train || exit 1; rm $train/{cmvn,feats}.scp
steps/make_fbank_pitch.sh --nj $njfea --cmd "$train_cmd" \
$train $train/log $train/data || exit 1;
steps/compute_cmvn_stats.sh $train $train/log $train/data || exit 1;
# Split to training 90%, cv 10%
utils/subset_data_dir_tr_cv.sh $train ${train}_tr90 ${train}_cv10 || exit 1;
fi
# Train the bottleneck network,
lang=data/lang_test
if [ $stage -le 1 ]; then
dir=exp/dnn8a_${exp}_bn-feat
ali=$ali_src
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh --hid-layers 2 --hid-dim 1500 --bn-dim 40 \
--cmvn-opts "--norm-means=true --norm-vars=false" --feat-type traps \
--splice 5 --traps-dct-basis 6 --learn-rate 0.008 \
${train}_tr90 ${train}_cv10 $lang $ali $ali $dir || exit 1
# Decode test,
steps/nnet/decode.sh --nj $njdec --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.10 \
$graph_src $test $dir/decode_test || exit 1
fi
# Store the bottleneck features,
if [ $stage -le 2 ]; then
dir=exp/dnn8a_${exp}_bn-feat
# dev
steps/nnet/make_bn_feats.sh --nj $njfea --cmd "$train_cmd" $test_bn $test $dir $test_bn/log $test_bn/data || exit 1
steps/compute_cmvn_stats.sh $test_bn $test_bn/log $test_bn/data || exit 1;
# train
steps/nnet/make_bn_feats.sh --nj $njfea --cmd "$train_cmd" $train_bn $train $dir $train_bn/log $train_bn/data || exit 1
steps/compute_cmvn_stats.sh $train_bn $train_bn/log $train_bn/data || exit 1;
fi
# Train GMM on bottleneck features,
lang_test=data/lang_test
if [ $stage -le 3 ]; then
dir=exp/dnn8b_${exp}_bn-gmm
# Train,
# gmm on bn features, no cmvn, no lda-mllt,
steps/train_deltas.sh --power 0.5 --boost-silence 1.5 --cmd "$train_cmd" \
--delta-opts "--delta-order=0" \
--cmvn-opts "--norm-means=false --norm-vars=false" \
--beam 20 --retry-beam 80 \
5000 80000 $train_bn $lang $ali_src $dir || exit 1
# Decode,
utils/mkgraph.sh $lang_test $dir $dir/graph || exit 1
steps/decode.sh --nj $njdec --cmd "$decode_cmd" \
--acwt 0.05 --beam 15.0 --lattice-beam 8.0 \
$dir/graph $test_bn $dir/decode_$(basename $test_bn) || exit 1
# Align,
steps/align_fmllr.sh --boost-silence 1.5 --nj $nj --cmd "$train_cmd" \
--beam 20 --retry-beam 80 \
$train_bn $lang $dir ${dir}_ali || exit 1;
fi
# Train SAT-adapted GMM on bottleneck features,
if [ $stage -le 4 ]; then
dir=exp/dnn8c_${exp}_fmllr-gmm
ali=exp/dnn8b_${exp}_bn-gmm_ali
# Train,
# fmllr-gmm system on bottleneck features,
# - no cmvn, put fmllr to the features directly (no lda),
# - note1 : we don't need cmvn, similar effect has diagonal of fmllr transform,
# - note2 : lda+mllt was causing a small hit <0.5%,
steps/train_sat.sh --power 0.5 --boost-silence 1.5 --cmd "$train_cmd" \
--beam 20 --retry-beam 80 \
5000 80000 $train_bn $lang $ali $dir || exit 1
# Decode,
utils/mkgraph.sh $lang_test $dir $dir/graph || exit 1;
steps/decode_fmllr.sh --nj $njdec --cmd "$decode_cmd" \
--acwt 0.05 --beam 15.0 --lattice-beam 8.0 \
$dir/graph $test_bn $dir/decode_$(basename $test_bn) || exit 1
# Align,
steps/align_fmllr.sh --boost-silence 1.5 --nj $nj --cmd "$train_cmd" \
--beam 20 --retry-beam 80 \
$train_bn $lang $dir ${dir}_ali || exit 1;
fi
# Store the bottleneck-FMLLR features,
gmm=exp/dnn8c_${exp}_fmllr-gmm # fmllr-feats, dnn-targets,
graph=$gmm/graph
if [ $stage -le 5 ]; then
# Dev_set
steps/nnet/make_fmllr_feats.sh --nj $njfea --cmd "$train_cmd" \
--transform-dir $gmm/decode_$(basename $test_bn) \
$test_bn_fmllr $test_bn $gmm $test_bn_fmllr/log $test_bn_fmllr/data || exit 1;
# Training set
steps/nnet/make_fmllr_feats.sh --nj $njfea --cmd "$train_cmd --max-jobs-run 10" \
--transform-dir ${gmm}_ali \
$train_bn_fmllr $train_bn $gmm $train_bn_fmllr/log $train_bn_fmllr/data || exit 1;
# Split the training set
utils/subset_data_dir_tr_cv.sh --cv-spk-percent 10 $train_bn_fmllr ${train_bn_fmllr}_tr90 ${train_bn_fmllr}_cv10
fi
#------------------------------------------------------------------------------------
# Pre-train stack of RBMs (6 layers, 2048 units),
if [ $stage -le 6 ]; then
dir=exp/dnn8d_${exp}_pretrain-dbn; mkdir -p $dir
# Create input transform, splice 13 frames [ -10 -5..+5 +10 ],
echo "<Splice> <InputDim> 40 <OutputDim> 520 <BuildVector> -10 -5:1:5 10 </BuildVector>" >$dir/proto.main
$cuda_cmd $dir/log/pretrain_dbn.log \
steps/nnet/pretrain_dbn.sh --feature-transform-proto $dir/proto.main \
$train_bn_fmllr $dir || exit 1
fi
#------------------------------------------------------------------------------------
# Train the DNN optimizing cross-entropy,
if [ $stage -le 7 ]; then
dir=exp/dnn8e_${exp}_pretrain-dbn_dnn
ali=${gmm}_ali
feature_transform=exp/dnn8d_${exp}_pretrain-dbn/final.feature_transform # re-use
dbn=exp/dnn8d_${exp}_pretrain-dbn/6.dbn # re-use
# Train
$cuda_cmd $dir/log/train_nnet.log \
steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
${train_bn_fmllr}_tr90 ${train_bn_fmllr}_cv10 $lang $ali $ali $dir || exit 1;
# Decode test
steps/nnet/decode.sh --nj $njdec --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt 0.10 \
$graph $test_bn_fmllr $dir/decode_$(basename $test_bn_fmllr) || exit 1
fi
#------------------------------------------------------------------------------------
# Finally we optimize sMBR criterion, we do Stochastic-GD with per-utterance updates,
dir=exp/dnn8f_${exp}_pretrain-dbn_dnn_smbr
srcdir=exp/dnn8e_${exp}_pretrain-dbn_dnn
acwt=0.1
#
if [ $stage -le 8 ]; then
# Generate lattices and alignments
steps/nnet/align.sh --nj $nj --cmd "$train_cmd" \
$train_bn_fmllr $lang $srcdir ${srcdir}_ali || exit 1;
steps/nnet/make_denlats.sh --nj $nj --cmd "$decode_cmd" --acwt $acwt \
$train_bn_fmllr $lang $srcdir ${srcdir}_denlats || exit 1;
fi
if [ $stage -le 9 ]; then
# Do 4 epochs of sMBR (leaving out all silence frames and compensating insertions),
steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt \
--do-smbr true --exclude-silphones true --one-silence-class true \
$train_bn_fmllr $lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
# Decode test,
for ITER in 1 2 3 4; do
steps/nnet/decode.sh --nj $njdec --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
--nnet $dir/${ITER}.nnet \
$graph $test_bn_fmllr $dir/decode_$(basename $test_bn_fmllr)_it${ITER} || exit 1
done
fi
echo $0 successs.
exit 0