Blame view
Scripts/steps/train_nnet_cpu_mmi.sh
16 KB
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 |
#!/bin/bash # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. # MMI (or boosted MMI) training (A.K.A. sequence training) of a neural net based # system as trained by train_nnet_cpu.sh # Begin configuration section. cmd=run.pl epochs_per_ebw_iter=1 # Number of times we iterate over the whole # data each time we do an "EBW" iteration. num_ebw_iters=4 # Number of "EBW" iterations. initial_learning_rate=0.001 # learning rate we start with. learning_rate_factor=1.0 # factor by which we change the learning # rate each iteration (should be <= 1.0) E=2.0 # this is slightly analogous to the constant E used in # Extended Baum-Welch updates of GMMs. It slows down (and # somewhat regularizes) the update. minibatch_size=256 # since the learning rate is always quite low compared with # what we have at the start of ML training, we can probably # afford a somewhat higher minibatch size than there, as # there is less risk of instability. samples_per_iter=400000 # each phase of training, see this many samples # per job. Note: this is a kind of suggestion; we # will actually find a number that will make the # #iters per epoch a whole number. num_jobs_nnet=8 # Number of neural net training jobs to run in parallel. # not the same as the num-jobs (nj) which will be the same as the # alignment and denlat directories. stage=0 sub_stage=-3 # this can be used to start from a particular sub-iteration of an # iteration acwt=0.1 boost=0.0 # boosting for BMMI (you can try 0.1).. this is applied per frame. transform_dir= # Note: by default any transforms in $alidir will be used. parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know. io_opts="-tc 10" # max 5 jobs running at one time (a lot of I/O.) num_threads=16 # number of threads for neural net trainer.. mkl_num_threads=1 random_copy=false # End configuration section. echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# != 6 ]; then echo "Usage: steps/train_nnet_cpu_mmi.sh [opts] <data> <lang> <src-dir> <ali-dir> <denlat-dir> <exp-dir>" echo "" echo "Main options (for others, see top of script file)" echo "Note, the terminology is: each iteration of EBW we do multiple epochs; each epoch" echo " we have multiple iterations of training (note the same as the EBW iters)." echo " --config <config-file> # config file containing options" echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." echo " --num-ebw-iters <#iters|4> # number of pseudo-Extended-Baum-Welch iterations (default: 4)" echo " --epochs-per-ebw-iter <#epochs|1> # number of times to see all the data per EBW iter." echo " --initial-learning-rate <initial-lrate|0.005> # learning rate to use on the first iteration" echo " --learning-rate-factor <lrate-factor|1.0> # Factor by which to change the learning rate on each" echo " # EBW iteration (should be <= 1.0)" echo " --num-jobs-nnet <num-jobs|8> # Number of parallel jobs to use for main neural net" echo " # training (will affect results as well as speed; try 8, 16)." echo " # Note: if you increase this, you may want to also increase" echo " # the learning rate." echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that" echo " # use multiple threads." echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for any especially I/O intensive jobs" echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, for each" echo " # process. Note: this will get modified to a number that will" echo " # divide the data into a whole number of pieces." echo " --transform-dir <dir> # Directory to find fMLLR transforms; if not specified, " echo " # $alidir will be used if it has transforms" echo " --stage <stage|0> # Used to run a partially-completed training process from somewhere in" echo " # the middle." echo " --sub-stage <sub-stage|0> # In conjunction with --stage, can be used to start a partially-completed" echo " # training process (refers to the phase number)" exit 1; fi data=$1 lang=$2 srcdir=$3 alidir=$4 # Also used for transforms by default, if transform-dir not specified. denlatdir=$5 dir=$6 # experimental directory # Check that some files exist, mostly to verify correct directory arguments. for f in $data/feats.scp $lang/L.fst $srcdir/final.mdl $alidir/ali.1.gz $denlatdir/lat.1.gz; do [ ! -f $f ] && echo "$0: no such file $f" && exit 1; done mkdir -p $dir/log cp $srcdir/tree $dir learning_rate=$initial_learning_rate if [ $stage -ge -1 ]; then $cmd $dir/log/copy_initial.log \ nnet-am-copy --learning-rate=$learning_rate $srcdir/final.mdl $dir/0.1.mdl fi nnet_context_opts="--left-context=`nnet-am-info $dir/0.1.mdl 2>/dev/null | grep -w left-context | awk '{print $2}'` --right-context=`nnet-am-info $dir/0.1.mdl 2>/dev/null | grep -w right-context | awk '{print $2}'`" || exit 1; silphonelist=`cat $lang/phones/silence.csl` || exit 1; nj=`cat $alidir/num_jobs` || exit 1; # number of jobs in alignment dir... nj2=`cat $denlatdir/num_jobs` || exit 1; # number of jobs in denlat dir [ "$nj" != "$nj2" ] && echo "Mismatch in #jobs $nj vs $nj2" && exit 1; sdata=$data/split$nj splice_opts=`cat $alidir/splice_opts 2>/dev/null` cp $alidir/splice_opts $dir 2>/dev/null cp $alidir/final.mat $dir 2>/dev/null # any LDA matrix... cp $alidir/tree $dir ## Set up features. Note: these are different from the normal features ## because we have one rspecifier that has the features for the entire ## training set, not separate ones for each batch. if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi echo "$0: feature type is $feat_type" case $feat_type in delta) all_feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |" feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |" ;; lda) all_feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:$data/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" ;; *) echo "$0: invalid feature type $feat_type" && exit 1; esac if [ -z "$transform_dir" ] && [ -f "$alidir/trans.1" ]; then # --transform-dir option not set and $alidir has transforms in it. transform_dir=$alidir fi if [ -f $alidir/trans.1 ]; then echo "$0: using transforms from $alidir" all_feats="$all_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |" feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |" else echo "$0: not using fMLLR transforms (assuming unadapted system)" fi echo "$0: working out number of frames of training data" num_frames=`feat-to-len scp:$data/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1; # round to closest int iters_per_epoch=`perl -e "print int($num_frames/($samples_per_iter * $num_jobs_nnet) + 0.5);"` || exit 1; [ $iters_per_epoch -eq 0 ] && iters_per_epoch=1 samples_per_iter_real=$[$num_frames/($num_jobs_nnet*$iters_per_epoch)] echo "Every EBW iteration, splitting the data up into $iters_per_epoch iterations," echo "giving samples-per-iteration of $samples_per_iter_real (you requested $samples_per_iter)." mkdir -p $dir/post $dir/egs num_epochs=$[$num_ebw_iters*$epochs_per_ebw_iter] x=0 while [ $x -lt $num_epochs ]; do z=$[$x / $epochs_per_ebw_iter]; # z is the (generally) smaller iteration number that identifies the EBW pass. if [ $x -eq $[$z * $epochs_per_ebw_iter] ]; then first_iter_of_epoch=true echo "Starting pass $z of EBW" else first_iter_of_epoch=false fi echo "Epoch $x of $num_epochs" if [ $stage -le $x ] && $first_iter_of_epoch; then if [ $stage -lt $x ] || [ $sub_stage -le -3 ]; then # First get the per-frame posteriors, by rescoring the lattices; this # process also gives us at the same time the posteriors of each state for # each frame (by default, pruned to 0.01 with a randomized algorithm). # The matrix-logprob stage produces a diagnostic and passes the pseudo-log-like # matrix through unchanged. (Note: nnet-logprob2-parallel can use up to # $num_threads threads, but in practice it may be limited by the speed of # the other elements of the pipe. $cmd $parallel_opts JOB=1:$nj $dir/log/post.$z.JOB.log \ nnet-logprob2-parallel --num-threads=$num_threads $dir/$x.1.mdl "$feats" \ "ark:|prob-to-post ark:- ark:- | gzip -c >$dir/post/smooth_post.$z.JOB.gz" ark:- \| \ matrix-logprob ark:- "ark:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $dir/$x.1.mdl ark:- ark:-|" ark:- \| \ lattice-rescore-mapped $dir/$x.1.mdl "ark:gunzip -c $denlatdir/lat.JOB.gz|" ark:- ark:- \| \ lattice-boost-ali --b=$boost --silence-phones=$silphonelist $dir/$x.1.mdl ark:- "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \ lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ post-to-pdf-post $dir/$x.1.mdl ark:- "ark:|gzip -c >$dir/post/den_post.$z.JOB.gz" || exit 1; fi if [ $stage -lt $x ] || [ $sub_stage -le -2 ]; then # run nnet-get-egs for all files, to get the training examples for each frame-- # combines the feature and label/posterior information. The posterior information # consists of 2 things: the numerator posteriors from the alignments, the denominator # posteriors from the lattices (times -1), and the smoothing posteriors from the # neural net log-probs (times E). # We copy the examples for each job round-robin to multiple archives, one for each # of 1...$num_jobs_nnet. egs_out="" for n in `seq 1 $num_jobs_nnet`; do # indexes are egs_orig.$z.$num_jobs_nnet.$nj egs_out="$egs_out ark:$dir/egs/egs_orig.$z.$n.JOB.ark" done $cmd JOB=1:$nj $dir/log/get_egs.$z.JOB.log \ ali-to-pdf $dir/$x.1.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \ ali-to-post ark:- ark:- \| \ sum-post --scale2=$E ark:- "ark:gunzip -c $dir/post/smooth_post.$z.JOB.gz|" ark:- \| \ sum-post --scale2=-1.0 ark:- "ark:gunzip -c $dir/post/den_post.$z.JOB.gz|" ark:- \| \ nnet-get-egs $nnet_context_opts "$feats" ark:- ark:- \| \ nnet-copy-egs ark:- $egs_out || exit 1; rm $dir/post/smooth_post.$z.*.gz $dir/post/den_post.$z.*.gz fi if $first_iter_of_epoch; then # Diagnostics-- work out an extra term in the objf that we have to add to # what we get from the nnet training. tail -n 50 $dir/log/post.$z.*.log | perl -e '$acwt=shift @ARGV; $acwt>0.0 || die "bad acwt"; while(<STDIN>) { if (m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames. Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames += $2; } if (m|matrix-logprob.+Average log-prob per frame is (\S+) over (\S+) frames|) { $tot_num_like += $1*$2; $tot_num_frames += $2; } } if (abs($tot_frames - $tot_num_frames) > 0.01*($tot_frames + $tot_num_frames)) { print STDERR "#frames differ $tot_frames vs $tot_num_frames "; } $tot_den_lat_like /= $tot_frames; $tot_num_like /= $tot_num_frames; $objf = $acwt * $tot_num_like - $tot_den_lat_like; print $objf." "; ' $acwt > $dir/log/objf.$z.log echo "Objf on EBW iter $z is `cat $dir/log/objf.$z.log`" fi if [ $stage -lt $x ] || [ $sub_stage -le -1 ]; then echo "Merging training examples across original #jobs ($nj), and " echo "splitting across number of nnet jobs $num_jobs_nnet" egs_out2="" for n in `seq 1 $iters_per_epoch`; do # indexes of egs_merged are: egs_merged.$z.$iters_per_epoch.$num_jobs_nnet egs_out2="$egs_out2 ark:$dir/egs/egs_merged.$z.$n.JOB.ark" done # Note: in the following command, JOB goes from 1 to $num_jobs_nnet, so one # job per parallel training job (different from the previous command). # We sum up over the index JOB in the previous $cmd, and write to multiple # archives, this time one for each "sub-iter". # indexes of egs_orig are: egs_orig.$z.$num_jobs_nnet.$nj $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/merge_and_split.$x.JOB.log \ cat $dir/egs/egs_orig.$z.JOB.*.ark \| \ nnet-copy-egs --random=$random_copy "--srand=\$[JOB+($x*$num_jobs_nnet)]" \ ark:- $egs_out2 '&&' rm $dir/egs/egs_orig.$z.JOB.*.ark || exit 1; fi if [ $stage -lt $x ] || [ $sub_stage -le 0 ]; then echo "Randomizing order of examples in each job" for n in `seq 1 $iters_per_epoch`; do s=$[$num_jobs_nnet*($n+($iters_per_epoch*$z))] # for srand $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$z.$n.JOB.log \ nnet-shuffle-egs "--srand=\$[JOB+$s]" \ ark:$dir/egs/egs_merged.$z.$n.JOB.ark ark:$dir/egs/egs.$z.$n.JOB.ark '&&' \ rm $dir/egs/egs_merged.$z.$n.JOB.ark || exit 1; done fi fi if [ $stage -le $x ]; then # This block does the $iters_per_epoch iters of training. y=1; # y is the "sub-iteration" number. while [ $y -le $iters_per_epoch ]; do echo "Iteration $x, sub-iteration $y" if [ $stage -lt $x ] || [ $sub_stage -le $y ]; then $cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.$y.JOB.log \ nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \ $dir/$x.$y.mdl ark:$dir/egs/egs.$z.$y.JOB.ark $dir/$x.$y.JOB.mdl \ || exit 1; nnets_list= for n in `seq 1 $num_jobs_nnet`; do nnets_list="$nnets_list $dir/$x.$y.$n.mdl" done if [ $y -eq $iters_per_epoch ]; then next_mdl=$dir/$[$x+1].1.mdl else next_mdl=$dir/$x.$[$y+1].mdl; fi # Average the parameters of all the parallel jobs. $cmd $dir/log/average.$x.$y.log \ nnet-am-average $nnets_list $next_mdl || exit 1; rm $nnets_list fi y=$[$y+1] done fi if [ $learning_rate_factor != 1.0 ]; then learning_rate=`perl -e "print $learning_rate * $learning_rate_factor;"`; ! nnet-am-copy --print-args=false --learning-rate=$learning_rate $dir/$[$x+1].1.mdl $dir/$[$x+1].1.mdl && \ echo Error changing learning rate of neural net && exit 1; fi x=$[$x+1] done rm $dir/final.mdl 2>/dev/null ln -s $x.1.mdl $dir/final.mdl echo Done |