run_tdnn_discriminative.sh
8.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
#!/bin/bash
echo "This script has not yet been tested, you would have to comment this statement if you want to run it. Please let us know if you see any issues" && exit 1;
set -o pipefail
set -e
# this is run_discriminative.sh
# This script does discriminative training on top of chain nnet3 system.
# note: this relies on having a cluster that has plenty of CPUs as well as GPUs,
# since the lattice generation runs in about real-time, so takes of the order of
# 1000 hours of CPU time.
#
stage=0
train_stage=-10 # can be used to start training in the middle.
get_egs_stage=-10
use_gpu=true # for training
cleanup=false # run with --cleanup true --stage 6 to clean up (remove large things like denlats,
# alignments and degs).
train_set=train_960_cleaned
gmm=tri6b_cleaned # this is the source gmm-dir for the data-type of interest; it
# should have alignments for the specified training data.
nnet3_affix=_cleaned
. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh
srcdir=exp/chain${nnet3_affix}/tdnn_sp
graph_dir=$srcdir/graph_tgsmall
train_data_dir=data/${train_set}_sp_hires_comb
train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires_comb
degs_dir= # If provided, will skip the degs directory creation
lats_dir= # If provided, will skip denlats creation
## Objective options
criterion=smbr
one_silence_class=true
dir=${srcdir}_${criterion}
## Egs options
frames_per_eg=150
frames_overlap_per_eg=30
## Nnet training options
effective_learning_rate=0.000001
max_param_change=1
num_jobs_nnet=4
num_epochs=3
regularization_opts="--xent-regularize=0.1 --l2-regularize=0.00005" # Applicable for providing --xent-regularize and --l2-regularize options
minibatch_size=64
## Decode options
decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more.
if $use_gpu; then
if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed. Otherwise, call this script with --use-gpu false
EOF
fi
num_threads=1
else
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
# almost the same, but this may be a little bit slow.
num_threads=16
fi
if [ ! -f ${srcdir}/final.mdl ]; then
echo "$0: expected ${srcdir}/final.mdl to exist; first run run_tdnn.sh or run_lstm.sh"
exit 1;
fi
lang=data/lang
frame_subsampling_opt=
frame_subsampling_factor=1
if [ -f $srcdir/frame_subsampling_factor ]; then
frame_subsampling_factor=$(cat $srcdir/frame_subsampling_factor)
frame_subsampling_opt="--frame-subsampling-factor $(cat $srcdir/frame_subsampling_factor)"
fi
affix= # Will be set if doing input frame shift
if [ $frame_subsampling_factor -ne 1 ]; then
if [ $stage -le 0 ]; then
mkdir -p ${train_ivector_dir}_fs
cp -r $train_ivector_dir/{conf,ivector_period} ${train_ivector_dir}_fs
rm ${train_ivector_dir}_fs/ivector_online.scp 2>/dev/null || true
data_dirs=
for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do
steps/shift_feats.sh --cmd "$train_cmd --max-jobs-run 40" --nj 350 \
$x $train_data_dir exp/shift_hires mfcc_hires
utils/fix_data_dir.sh ${train_data_dir}_fs$x
data_dirs="$data_dirs ${train_data_dir}_fs$x"
awk -v nfs=$x '{print "fs"nfs"-"$0}' $train_ivector_dir/ivector_online.scp >> ${train_ivector_dir}_fs/ivector_online.scp
done
utils/combine_data.sh ${train_data_dir}_fs $data_dirs
for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do
rm -r ${train_data_dir}_fs$x
done
fi
train_data_dir=${train_data_dir}_fs
affix=_fs
fi
rm ${train_ivector_dir}_fs/ivector_online.scp 2>/dev/null || true
for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do
awk -v nfs=$x '{print "fs"nfs"-"$0}' $train_ivector_dir/ivector_online.scp >> ${train_ivector_dir}_fs/ivector_online.scp
done
train_ivector_dir=${train_ivector_dir}_fs
if [ $stage -le 1 ]; then
# hardcode no-GPU for alignment, although you could use GPU [you wouldn't
# get excellent GPU utilization though.]
nj=350 # have a high number of jobs because this could take a while, and we might
# have some stragglers.
steps/nnet3/align.sh --cmd "$decode_cmd" --use-gpu false \
--online-ivector-dir $train_ivector_dir \
--scale-opts "--transition-scale=1.0 --acoustic-scale=1.0 --self-loop-scale=1.0" \
--nj $nj $train_data_dir $lang $srcdir ${srcdir}_ali${affix} ;
fi
if [ -z "$lats_dir" ]; then
lats_dir=${srcdir}_denlats${affix}
if [ $stage -le 2 ]; then
nj=50
# this doesn't really affect anything strongly, except the num-jobs for one of
# the phases of get_egs_discriminative.sh below.
num_threads_denlats=6
subsplit=40 # number of jobs that run per job (but 2 run at a time, so total jobs is 80, giving
# total slots = 80 * 6 = 480.
steps/nnet3/make_denlats.sh --cmd "$decode_cmd" \
--self-loop-scale 1.0 --acwt 1.0 --determinize true \
--online-ivector-dir $train_ivector_dir \
--nj $nj --sub-split $subsplit --num-threads "$num_threads_denlats" --config conf/decode.config \
$train_data_dir $lang $srcdir ${lats_dir} ;
fi
fi
model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'`
model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'`
left_context=$[model_left_context + extra_left_context]
right_context=$[model_right_context + extra_right_context]
cmvn_opts=`cat $srcdir/cmvn_opts`
if [ -z "$degs_dir" ]; then
degs_dir=${srcdir}_degs${affix}
if [ $stage -le 3 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${srcdir}_degs/storage ]; then
utils/create_split_dir.pl \
/export/b{01,02,12,13}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage
fi
# have a higher maximum num-jobs if
if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi
steps/nnet3/get_egs_discriminative.sh \
--cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \
--adjust-priors false --acwt 1.0 \
--online-ivector-dir $train_ivector_dir \
--left-context $left_context --right-context $right_context \
$frame_subsampling_opt \
--frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \
$train_data_dir $lang ${srcdir}_ali${affix} $lats_dir $srcdir/final.mdl $degs_dir ;
fi
fi
if [ $stage -le 4 ]; then
steps/nnet3/train_discriminative.sh --cmd "$decode_cmd" \
--stage $train_stage \
--effective-lrate $effective_learning_rate --max-param-change $max_param_change \
--criterion $criterion --drop-frames true --acoustic-scale 1.0 \
--num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \
--num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \
--regularization-opts "$regularization_opts" --use-frame-shift false \
${degs_dir} $dir ;
fi
if [ $stage -le 5 ]; then
rm $dir/.error 2>/dev/null || true
for x in `seq $decode_start_epoch $num_epochs`; do
for decode_set in test_clean test_other dev_clean dev_other; do
(
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
iter=epoch$[x*frame_subsampling_factor]
steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \
--acwt 1.0 --post-decode-acwt 10.0 \
--online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
$graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_tgsmall_$iter || exit 1
steps/lmrescore.sh --cmd "$decode_cmd" data/lang_test_{tgsmall,tgmed} \
data/${decode_set}_hires $dir/decode_${decode_set}_{tgsmall,tgmed}_$iter || exit 1
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,tglarge} \
data/${decode_set}_hires $dir/decode_${decode_set}_{tgsmall,tglarge}_$iter || exit 1
steps/lmrescore_const_arpa.sh \
--cmd "$decode_cmd" data/lang_test_{tgsmall,fglarge} \
data/${decode_set}_hires $dir/decode_${decode_set}_{tgsmall,fglarge}_$iter || exit 1
) || touch $dir/.error &
done
done
wait
[ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
fi
if [ $stage -le 6 ] && $cleanup; then
# if you run with "--cleanup true --stage 6" you can clean up.
rm ${lats_dir}/lat.*.gz || true
rm ${srcdir}_ali/ali.*.gz || true
steps/nnet2/remove_egs.sh ${srcdir}_degs || true
fi
exit 0;