train_nnet_scheduler.sh
5.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/bin/bash
# Copyright 2012 Karel Vesely (Brno University of Technology)
# Apache 2.0
# Train neural network
# Begin configuration.
# training options
learn_rate=0.008
momentum=0
l1_penalty=0
l2_penalty=0
# data processing
bunch_size=256
cache_size=16384
seed=777
feature_transform=
# learn rate scheduling
max_iters=20
min_iters=
start_halving_inc=0.5
end_halving_inc=0.1
halving_factor=0.5
# misc.
verbose=1
# gpu
use_gpu_id=
# tool
train_tool="nnet-train-xent-hardlab-frmshuff"
# End configuration.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh;
. parse_options.sh || exit 1;
if [ $# != 5 ]; then
echo "Usage: $0 <mlp-init> <feats-tr> <feats-cv> <labels> <exp-dir>"
echo " e.g.: $0 0.nnet scp:train.scp scp:cv.scp ark:labels.ark exp/dnn1"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
exit 1;
fi
mlp_init=$1
feats_tr=$2
feats_cv=$3
labels=$4
dir=$5
[ ! -d $dir ] && mkdir $dir
[ ! -d $dir/log ] && mkdir $dir/log
[ ! -d $dir/nnet ] && mkdir $dir/nnet
# Skip training
[ -e $dir/final.nnet ] && echo "'$dir/final.nnet' exists, skipping training" && exit 0
##############################
#start training
#choose mlp to start with
mlp_best=$mlp_init
mlp_base=${mlp_init##*/}; mlp_base=${mlp_base%.*}
#optionally resume training from the best epoch
[ -e $dir/.mlp_best ] && mlp_best=$(cat $dir/.mlp_best)
[ -e $dir/.learn_rate ] && learn_rate=$(cat $dir/.learn_rate)
#prerun cross-validation
$train_tool --cross-validate=true \
--bunchsize=$bunch_size --cachesize=$cache_size --verbose=$verbose \
${feature_transform:+ --feature-transform=$feature_transform} \
${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
$mlp_best "$feats_cv" "$labels" \
2> $dir/log/prerun.log || exit 1;
acc=$(cat $dir/log/prerun.log | awk '/FRAME_ACCURACY/{ acc=$3; sub(/%/,"",acc); } END{print acc}')
xent=$(cat $dir/log/prerun.log | awk 'BEGIN{FS=":"} /err\/frm:/{ xent = $NF; } END{print xent}')
echo "CROSSVAL PRERUN ACCURACY $(printf "%.2f" $acc) (avg.xent$(printf "%.4f" $xent)), "
#resume lr-halving
halving=0
[ -e $dir/.halving ] && halving=$(cat $dir/.halving)
#training
for iter in $(seq -w $max_iters); do
echo -n "ITERATION $iter: "
mlp_next=$dir/nnet/${mlp_base}_iter${iter}
#skip iteration if already done
[ -e $dir/.done_iter$iter ] && echo -n "skipping... " && ls $mlp_next* && continue
#training
$train_tool \
--learn-rate=$learn_rate --momentum=$momentum --l1-penalty=$l1_penalty --l2-penalty=$l2_penalty \
--bunchsize=$bunch_size --cachesize=$cache_size --randomize=true --verbose=$verbose \
${feature_transform:+ --feature-transform=$feature_transform} \
${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
${seed:+ --seed=$seed} \
$mlp_best "$feats_tr" "$labels" $mlp_next \
2> $dir/log/iter$iter.log || exit 1;
tr_acc=$(cat $dir/log/iter$iter.log | awk '/FRAME_ACCURACY/{ acc=$3; sub(/%/,"",acc); } END{print acc}')
tr_xent=$(cat $dir/log/iter$iter.log | awk 'BEGIN{FS=":"} /err\/frm:/{ xent = $NF; } END{print xent}')
echo -n "TRAIN ACCURACY $(printf "%.2f" $tr_acc) (avg.xent$(printf "%.4f" $tr_xent),lrate$(printf "%.6g" $learn_rate)), "
#cross-validation
$train_tool --cross-validate=true \
--bunchsize=$bunch_size --cachesize=$cache_size --verbose=$verbose \
${feature_transform:+ --feature-transform=$feature_transform} \
${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
$mlp_next "$feats_cv" "$labels" \
2>>$dir/log/iter$iter.log || exit 1;
acc_new=$(cat $dir/log/iter$iter.log | awk '/FRAME_ACCURACY/{ acc=$3; sub(/%/,"",acc); } END{print acc}')
xent_new=$(cat $dir/log/iter$iter.log | awk 'BEGIN{FS=":"} /err\/frm:/{ xent = $NF; } END{print xent}')
echo -n "CROSSVAL ACCURACY $(printf "%.2f" $acc_new) (avg.xent$(printf "%.4f" $xent_new)), "
#accept or reject new parameters (based no per-frame accuracy)
acc_prev=$acc
if [ "1" == "$(awk "BEGIN{print($acc_new>$acc);}")" ]; then
acc=$acc_new
mlp_best=$dir/nnet/${mlp_base}_iter${iter}_learnrate${learn_rate}_tr$(printf "%.2f" $tr_acc)_cv$(printf "%.2f" $acc_new)
mv $mlp_next $mlp_best
echo "nnet accepted ($(basename $mlp_best))"
echo $mlp_best > $dir/.mlp_best
else
mlp_reject=$dir/nnet/${mlp_base}_iter${iter}_learnrate${learn_rate}_tr$(printf "%.2f" $tr_acc)_cv$(printf "%.2f" $acc_new)_rejected
mv $mlp_next $mlp_reject
echo "nnet rejected ($(basename $mlp_reject))"
fi
#create .done file as a mark that iteration is over
touch $dir/.done_iter$iter
#stopping criterion
if [[ "1" == "$halving" && "1" == "$(awk "BEGIN{print($acc < $acc_prev+$end_halving_inc)}")" ]]; then
if [[ "$min_iters" != "" ]]; then
if [ $min_iters -gt $iter ]; then
echo we were supposed to finish, but we continue, min_iters : $min_iters
continue
fi
fi
echo finished, too small improvement $(awk "BEGIN{print($acc-$acc_prev)}")
break
fi
#start annealing when improvement is low
if [ "1" == "$(awk "BEGIN{print($acc < $acc_prev+$start_halving_inc)}")" ]; then
halving=1
echo $halving >$dir/.halving
fi
#do annealing
if [ "1" == "$halving" ]; then
learn_rate=$(awk "BEGIN{print($learn_rate*$halving_factor)}")
echo $learn_rate >$dir/.learn_rate
fi
done
#select the best network
if [ $mlp_best != $mlp_init ]; then
mlp_final=${mlp_best}_final_
( cd $dir/nnet; ln -s $(basename $mlp_best) $(basename $mlp_final); )
( cd $dir; ln -s nnet/$(basename $mlp_final) final.nnet; )
echo "Succeeded training the Neural Network : $dir/final.nnet"
else
"Error training neural network..."
exit 1
fi