train_nnet.sh
12.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
#!/bin/bash
# Copyright 2012/2013 Karel Vesely (Brno University of Technology)
# Apache 2.0
# Begin configuration.
config= # config, which is also sent to all other scripts
# NETWORK INITIALIZATION
mlp_init= # select initialized MLP (override initialization)
feature_transform= # select feature transform (=splice,rescaling,...) (don't build new one)
#
model_size=8000000 # nr. of parameteres in MLP
hid_layers=4 # nr. of hidden layers (prior to sotfmax or bottleneck)
bn_dim= # set a value to get a bottleneck network
hid_dim= # select hidden dimension directly (override $model_size)
dbn= # select DBN to prepend to the MLP initialization
#
init_opts= # options, passed to the initialization script
# FEATURE PROCESSING
copy_feats=true # resave the train features in the re-shuffled order to tmpdir
# feature config (applies always)
apply_cmvn=false # apply normalization to input features?
norm_vars=false # use variance normalization?
delta_order=
# feature_transform:
splice=5 # temporal splicing
splice_step=1 # stepsize of the splicing (1 == no gap between frames)
feat_type=plain
# feature config (applies to feat_type traps)
traps_dct_basis=11 # nr. od DCT basis (applies to `traps` feat_type, splice10 )
# feature config (applies to feat_type transf) (ie. LDA+MLLT, no fMLLR)
transf=
splice_after_transf=5
# feature config (applies to feat_type lda)
lda_dim=300 # LDA dimension (applies to `lda` feat_type)
# LABELS
labels= # use these labels to train (override deafault pdf alignments)
num_tgt= # force to use number of outputs in the MLP (default is autodetect)
# TRAINING SCHEDULER
learn_rate=0.008 # initial learning rate
train_opts= # options, passed to the training script
train_tool= # optionally change the training tool
# OTHER
use_gpu_id= # manually select GPU id to run on, (-1 disables GPU)
analyze_alignments=true # run the alignment analysis script
seed=777 # seed value used for training data shuffling and initialization
# End configuration.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh;
. parse_options.sh || exit 1;
if [ $# != 6 ]; then
echo "Usage: $0 <data-train> <data-dev> <lang-dir> <ali-train> <ali-dev> <exp-dir>"
echo " e.g.: $0 data/train data/cv data/lang exp/mono_ali exp/mono_ali_cv exp/mono_nnet"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
exit 1;
fi
data=$1
data_cv=$2
lang=$3
alidir=$4
alidir_cv=$5
dir=$6
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
for f in $alidir/final.mdl $alidir/ali.1.gz $alidir_cv/ali.1.gz $data/feats.scp $data_cv/feats.scp; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
echo
echo "# INFO"
echo "$0 : Training Neural Network"
printf "\t dir : $dir \n"
printf "\t Train-set : $data $alidir \n"
printf "\t CV-set : $data_cv $alidir_cv \n"
mkdir -p $dir/{log,nnet}
#skip when already trained
[ -e $dir/final.nnet ] && printf "\nSKIPPING TRAINING... ($0)\nnnet already trained : $dir/final.nnet ($(readlink $dir/final.nnet))\n\n" && exit 0
###### PREPARE ALIGNMENTS ######
echo
echo "# PREPARING ALIGNMENTS"
if [ ! -z $labels ]; then
echo "Using targets '$labels' (by force)"
else
echo "Using PDF targets from dirs '$alidir' '$alidir_cv'"
#define pdf-alignment rspecifiers
labels_tr="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- |"
if [[ "$alidir" == "$alidir_cv" ]]; then
labels="$labels_tr"
else
labels="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz $alidir_cv/ali.*.gz |\" ark:- |"
fi
#get the priors, get pdf-counts from alignments
analyze-counts --binary=false "$labels_tr" $dir/ali_train_pdf.counts || exit 1
#copy the old transition model, will be needed by decoder
copy-transition-model --binary=false $alidir/final.mdl $dir/final.mdl || exit 1
#copy the tree
cp $alidir/tree $dir/tree || exit 1
#analyze the train/cv alignments
if [ "$analyze_alignments" == "true" ]; then
utils/nnet/analyze_alignments.sh "TRAINING SET" "ark:gunzip -c $alidir/ali.*.gz |" $dir/final.mdl $lang > $dir/__ali_stats_train
utils/nnet/analyze_alignments.sh "VALIDATION SET" "ark:gunzip -c $alidir_cv/ali.*.gz |" $dir/final.mdl $lang > $dir/__ali_stats_cv
fi
fi
###### PREPARE FEATURES ######
echo
echo "# PREPARING FEATURES"
# shuffle the list
echo "Preparing train/cv lists :"
cat $data/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} > $dir/train.scp
cp $data_cv/feats.scp $dir/cv.scp
# print the list sizes
wc -l $dir/train.scp $dir/cv.scp
#re-save the shuffled features, so they are stored sequentially on the disk in /tmp/
if [ "$copy_feats" == "true" ]; then
tmpdir=$(mktemp -d); mv $dir/train.scp $dir/train.scp_non_local
utils/nnet/copy_feats.sh $dir/train.scp_non_local $tmpdir $dir/train.scp
#remove data on exit...
trap "echo \"Removing features tmpdir $tmpdir @ $(hostname)\"; rm -r $tmpdir" EXIT
fi
#create a 10k utt subset for global cmvn estimates
head -n 10000 $dir/train.scp > $dir/train.scp.10k
###### PREPARE FEATURE PIPELINE ######
#read the features
feats_tr="ark:copy-feats scp:$dir/train.scp ark:- |"
feats_cv="ark:copy-feats scp:$dir/cv.scp ark:- |"
#optionally add per-speaker CMVN
if [ $apply_cmvn == "true" ]; then
echo "Will use CMVN statistics : $data/cmvn.scp, $data_cv/cmvn.scp"
[ ! -r $data/cmvn.scp ] && echo "Cannot find cmvn stats $data/cmvn.scp" && exit 1;
[ ! -r $data_cv/cmvn.scp ] && echo "Cannot find cmvn stats $data_cv/cmvn.scp" && exit 1;
feats_tr="$feats_tr apply-cmvn --print-args=false --norm-vars=$norm_vars --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp ark:- ark:- |"
feats_cv="$feats_cv apply-cmvn --print-args=false --norm-vars=$norm_vars --utt2spk=ark:$data_cv/utt2spk scp:$data_cv/cmvn.scp ark:- ark:- |"
# keep track of norm_vars option
echo "$norm_vars" >$dir/norm_vars
else
echo "apply_cmvn is disabled (per speaker norm. on input features)"
fi
#optionally add deltas
if [ "$delta_order" != "" ]; then
feats_tr="$feats_tr add-deltas --delta-order=$delta_order ark:- ark:- |"
feats_cv="$feats_cv add-deltas --delta-order=$delta_order ark:- ark:- |"
echo "$delta_order" > $dir/delta_order
echo "add-deltas (delta_order $delta_order)"
fi
#get feature dim
echo "Getting feature dim : "
feat_dim=$(feat-to-dim --print-args=false "$feats_tr" -)
echo "Feature dim is : $feat_dim"
# Now we will start building complex feature_transform which will
# be forwarded in CUDA to gain more speed.
#
# We will use 1GPU for both feature_transform and MLP training in one binary tool.
# This is against the kaldi spirit, but it is necessary, because on some sites a GPU
# cannot be shared accross by two or more processes (compute exclusive mode),
# and we would like to use single GPU per training instance,
# so that the grid resources can be used efficiently...
if [ ! -z "$feature_transform" ]; then
echo "Using pre-computed feature-transform : '$feature_transform'"
tmp=$dir/$(basename $feature_transform)
cp $feature_transform $tmp; feature_transform=$tmp
else
# Generate the splice transform
echo "Using splice +/- $splice , step $splice_step"
feature_transform=$dir/tr_splice$splice-$splice_step.nnet
utils/nnet/gen_splice.py --fea-dim=$feat_dim --splice=$splice --splice-step=$splice_step > $feature_transform
# Choose further processing of spliced features
echo "Feature type : $feat_type"
case $feat_type in
plain)
;;
traps)
#generate hamming+dct transform
feature_transform_old=$feature_transform
feature_transform=${feature_transform%.nnet}_hamm_dct${traps_dct_basis}.nnet
echo "Preparing Hamming DCT transform into : $feature_transform"
#prepare matrices with time-transposed hamming and dct
utils/nnet/gen_hamm_mat.py --fea-dim=$feat_dim --splice=$splice > $dir/hamm.mat
utils/nnet/gen_dct_mat.py --fea-dim=$feat_dim --splice=$splice --dct-basis=$traps_dct_basis > $dir/dct.mat
#put everything together
compose-transforms --binary=false $dir/dct.mat $dir/hamm.mat - | \
transf-to-nnet - - | \
nnet-concat --binary=false $feature_transform_old - $feature_transform || exit 1
;;
transf)
feature_transform_old=$feature_transform
feature_transform=${feature_transform%.nnet}_transf_splice${splice_after_transf}.nnet
[ -z $transf ] && $alidir/final.mat
[ ! -f $transf ] && echo "Missing transf $transf" && exit 1
feat_dim=$(feat-to-dim "$feats_tr nnet-forward 'nnet-concat $feature_transform_old \"transf-to-nnet $transf - |\" - |' ark:- ark:- |" -)
nnet-concat --binary=false $feature_transform_old \
"transf-to-nnet $transf - |" \
"utils/nnet/gen_splice.py --fea-dim=$feat_dim --splice=$splice_after_transf |" \
$feature_transform || exit 1
;;
lda)
transf=$dir/lda$lda_dim.mat
#get the LDA statistics
if [ ! -r "$dir/lda.acc" ]; then
echo "LDA: Converting alignments to posteriors $dir/lda_post.scp"
ali-to-post "ark:gunzip -c $alidir/ali.*.gz|" ark:- | \
weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark,scp:$dir/lda_post.ark,$dir/lda_post.scp 2>$dir/log/ali-to-post-lda.log || exit 1;
echo "Accumulating LDA statistics $dir/lda.acc on top of spliced feats"
acc-lda --rand-prune=4.0 $alidir/final.mdl "$feats_tr nnet-forward $feature_transform ark:- ark:- |" scp:$dir/lda_post.scp $dir/lda.acc 2>$dir/log/acc-lda.log || exit 1;
else
echo "LDA: Using pre-computed stats $dir/lda.acc"
fi
#estimate the transform
echo "Estimating LDA transform $dir/lda.mat from the statistics $dir/lda.acc"
est-lda --write-full-matrix=$dir/lda.full.mat --dim=$lda_dim $transf $dir/lda.acc 2>$dir/log/lda.log || exit 1;
#append the LDA matrix to feature_transform
feature_transform_old=$feature_transform
feature_transform=${feature_transform%.nnet}_lda${lda_dim}.nnet
transf-to-nnet $transf - | \
nnet-concat --binary=false $feature_transform_old - $feature_transform || exit 1
#remove the temporary file
rm $dir/lda_post.{ark,scp}
;;
*)
echo "Unknown feature type $feat_type"
exit 1;
;;
esac
# keep track of feat_type
echo $feat_type > $dir/feat_type
# Renormalize the MLP input to zero mean and unit variance
feature_transform_old=$feature_transform
feature_transform=${feature_transform%.nnet}_cmvn-g.nnet
echo "Renormalizing MLP input features into $feature_transform"
nnet-forward ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
$feature_transform_old "$(echo $feats_tr | sed 's|train.scp|train.scp.10k|')" \
ark:- 2>$dir/log/nnet-forward-cmvn.log |\
compute-cmvn-stats ark:- - | cmvn-to-nnet - - |\
nnet-concat --binary=false $feature_transform_old - $feature_transform
fi
###### MAKE LINK TO THE FINAL feature_transform, so the other scripts will find it ######
(cd $dir; [ ! -f final.feature_transform ] && ln -s $(basename $feature_transform) final.feature_transform )
###### INITIALIZE THE NNET ######
echo
echo "# NN-INITIALIZATION"
if [ ! -z "$mlp_init" ]; then
echo "Using pre-initalized network $mlp_init";
else
echo "Getting input/output dims :"
#initializing the MLP, get the i/o dims...
#input-dim
num_fea=$(feat-to-dim "$feats_tr nnet-forward $feature_transform ark:- ark:- |" - )
{ #optioanlly take output dim of DBN
[ ! -z $dbn ] && num_fea=$(nnet-forward "nnet-concat $feature_transform $dbn -|" "$feats_tr" ark:- | feat-to-dim ark:- -)
[ -z "$num_fea" ] && echo "Getting nnet input dimension failed!!" && exit 1
}
#output-dim
[ -z $num_tgt ] && num_tgt=$(hmm-info --print-args=false $alidir/final.mdl | grep pdfs | awk '{ print $NF }')
#run the MLP initializing script
mlp_init=$dir/nnet.init
utils/nnet/init_nnet.sh --model_size $model_size --hid_layers $hid_layers \
${bn_dim:+ --bn-dim $bn_dim} \
${hid_dim:+ --hid-dim $hid_dim} \
--seed $seed ${init_opts} \
${config:+ --config $config} \
$num_fea $num_tgt $mlp_init || exit 1
#optionally prepend dbn to the initialization
if [ ! -z $dbn ]; then
mlp_init_old=$mlp_init; mlp_init=$dir/nnet_$(basename $dbn)_dnn.init
nnet-concat $dbn $mlp_init_old $mlp_init
fi
fi
###### TRAIN ######
echo
echo "# RUNNING THE NN-TRAINING SCHEDULER"
steps/train_nnet_scheduler.sh \
--feature-transform $feature_transform \
--learn-rate $learn_rate \
--seed $seed \
${train_opts} \
${train_tool:+ --train-tool "$train_tool"} \
${config:+ --config $config} \
${use_gpu_id:+ --use-gpu-id $use_gpu_id} \
$mlp_init "$feats_tr" "$feats_cv" "$labels" $dir || exit 1
echo "$0 successfuly finished.. $dir"
sleep 3
exit 0