#!/bin/bash # Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0. # MMI (or boosted MMI) training (A.K.A. sequence training) of a neural net based # system as trained by train_nnet_cpu.sh # Begin configuration section. cmd=run.pl epochs_per_ebw_iter=1 # Number of times we iterate over the whole # data each time we do an "EBW" iteration. num_ebw_iters=4 # Number of "EBW" iterations. initial_learning_rate=0.001 # learning rate we start with. learning_rate_factor=1.0 # factor by which we change the learning # rate each iteration (should be <= 1.0) E=2.0 # this is slightly analogous to the constant E used in # Extended Baum-Welch updates of GMMs. It slows down (and # somewhat regularizes) the update. minibatch_size=256 # since the learning rate is always quite low compared with # what we have at the start of ML training, we can probably # afford a somewhat higher minibatch size than there, as # there is less risk of instability. samples_per_iter=400000 # each phase of training, see this many samples # per job. Note: this is a kind of suggestion; we # will actually find a number that will make the # #iters per epoch a whole number. num_jobs_nnet=8 # Number of neural net training jobs to run in parallel. # not the same as the num-jobs (nj) which will be the same as the # alignment and denlat directories. stage=0 sub_stage=-3 # this can be used to start from a particular sub-iteration of an # iteration acwt=0.1 boost=0.0 # boosting for BMMI (you can try 0.1).. this is applied per frame. transform_dir= # Note: by default any transforms in $alidir will be used. parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know. io_opts="-tc 10" # max 5 jobs running at one time (a lot of I/O.) num_threads=16 # number of threads for neural net trainer.. mkl_num_threads=1 random_copy=false # End configuration section. echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# != 6 ]; then echo "Usage: steps/train_nnet_cpu_mmi.sh [opts] " echo "" echo "Main options (for others, see top of script file)" echo "Note, the terminology is: each iteration of EBW we do multiple epochs; each epoch" echo " we have multiple iterations of training (note the same as the EBW iters)." echo " --config # config file containing options" echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." echo " --num-ebw-iters <#iters|4> # number of pseudo-Extended-Baum-Welch iterations (default: 4)" echo " --epochs-per-ebw-iter <#epochs|1> # number of times to see all the data per EBW iter." echo " --initial-learning-rate # learning rate to use on the first iteration" echo " --learning-rate-factor # Factor by which to change the learning rate on each" echo " # EBW iteration (should be <= 1.0)" echo " --num-jobs-nnet # Number of parallel jobs to use for main neural net" echo " # training (will affect results as well as speed; try 8, 16)." echo " # Note: if you increase this, you may want to also increase" echo " # the learning rate." echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" echo " # use multiple threads." echo " --io-opts # Options given to e.g. queue.pl for any especially I/O intensive jobs" echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, for each" echo " # process. Note: this will get modified to a number that will" echo " # divide the data into a whole number of pieces." echo " --transform-dir # Directory to find fMLLR transforms; if not specified, " echo " # $alidir will be used if it has transforms" echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." echo " --sub-stage # In conjunction with --stage, can be used to start a partially-completed" echo " # training process (refers to the phase number)" exit 1; fi data=$1 lang=$2 srcdir=$3 alidir=$4 # Also used for transforms by default, if transform-dir not specified. denlatdir=$5 dir=$6 # experimental directory # Check that some files exist, mostly to verify correct directory arguments. for f in $data/feats.scp $lang/L.fst $srcdir/final.mdl $alidir/ali.1.gz $denlatdir/lat.1.gz; do [ ! -f $f ] && echo "$0: no such file $f" && exit 1; done mkdir -p $dir/log cp $srcdir/tree $dir learning_rate=$initial_learning_rate if [ $stage -ge -1 ]; then $cmd $dir/log/copy_initial.log \ nnet-am-copy --learning-rate=$learning_rate $srcdir/final.mdl $dir/0.1.mdl fi nnet_context_opts="--left-context=`nnet-am-info $dir/0.1.mdl 2>/dev/null | grep -w left-context | awk '{print $2}'` --right-context=`nnet-am-info $dir/0.1.mdl 2>/dev/null | grep -w right-context | awk '{print $2}'`" || exit 1; silphonelist=`cat $lang/phones/silence.csl` || exit 1; nj=`cat $alidir/num_jobs` || exit 1; # number of jobs in alignment dir... nj2=`cat $denlatdir/num_jobs` || exit 1; # number of jobs in denlat dir [ "$nj" != "$nj2" ] && echo "Mismatch in #jobs $nj vs $nj2" && exit 1; sdata=$data/split$nj splice_opts=`cat $alidir/splice_opts 2>/dev/null` cp $alidir/splice_opts $dir 2>/dev/null cp $alidir/final.mat $dir 2>/dev/null # any LDA matrix... cp $alidir/tree $dir ## Set up features. Note: these are different from the normal features ## because we have one rspecifier that has the features for the entire ## training set, not separate ones for each batch. if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi echo "$0: feature type is $feat_type" case $feat_type in delta) all_feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:$data/feats.scp ark:- | add-deltas ark:- ark:- |" feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |" ;; lda) all_feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:$data/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" ;; *) echo "$0: invalid feature type $feat_type" && exit 1; esac if [ -z "$transform_dir" ] && [ -f "$alidir/trans.1" ]; then # --transform-dir option not set and $alidir has transforms in it. transform_dir=$alidir fi if [ -f $alidir/trans.1 ]; then echo "$0: using transforms from $alidir" all_feats="$all_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |" feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |" else echo "$0: not using fMLLR transforms (assuming unadapted system)" fi echo "$0: working out number of frames of training data" num_frames=`feat-to-len scp:$data/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1; # round to closest int iters_per_epoch=`perl -e "print int($num_frames/($samples_per_iter * $num_jobs_nnet) + 0.5);"` || exit 1; [ $iters_per_epoch -eq 0 ] && iters_per_epoch=1 samples_per_iter_real=$[$num_frames/($num_jobs_nnet*$iters_per_epoch)] echo "Every EBW iteration, splitting the data up into $iters_per_epoch iterations," echo "giving samples-per-iteration of $samples_per_iter_real (you requested $samples_per_iter)." mkdir -p $dir/post $dir/egs num_epochs=$[$num_ebw_iters*$epochs_per_ebw_iter] x=0 while [ $x -lt $num_epochs ]; do z=$[$x / $epochs_per_ebw_iter]; # z is the (generally) smaller iteration number that identifies the EBW pass. if [ $x -eq $[$z * $epochs_per_ebw_iter] ]; then first_iter_of_epoch=true echo "Starting pass $z of EBW" else first_iter_of_epoch=false fi echo "Epoch $x of $num_epochs" if [ $stage -le $x ] && $first_iter_of_epoch; then if [ $stage -lt $x ] || [ $sub_stage -le -3 ]; then # First get the per-frame posteriors, by rescoring the lattices; this # process also gives us at the same time the posteriors of each state for # each frame (by default, pruned to 0.01 with a randomized algorithm). # The matrix-logprob stage produces a diagnostic and passes the pseudo-log-like # matrix through unchanged. (Note: nnet-logprob2-parallel can use up to # $num_threads threads, but in practice it may be limited by the speed of # the other elements of the pipe. $cmd $parallel_opts JOB=1:$nj $dir/log/post.$z.JOB.log \ nnet-logprob2-parallel --num-threads=$num_threads $dir/$x.1.mdl "$feats" \ "ark:|prob-to-post ark:- ark:- | gzip -c >$dir/post/smooth_post.$z.JOB.gz" ark:- \| \ matrix-logprob ark:- "ark:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $dir/$x.1.mdl ark:- ark:-|" ark:- \| \ lattice-rescore-mapped $dir/$x.1.mdl "ark:gunzip -c $denlatdir/lat.JOB.gz|" ark:- ark:- \| \ lattice-boost-ali --b=$boost --silence-phones=$silphonelist $dir/$x.1.mdl ark:- "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \ lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ post-to-pdf-post $dir/$x.1.mdl ark:- "ark:|gzip -c >$dir/post/den_post.$z.JOB.gz" || exit 1; fi if [ $stage -lt $x ] || [ $sub_stage -le -2 ]; then # run nnet-get-egs for all files, to get the training examples for each frame-- # combines the feature and label/posterior information. The posterior information # consists of 2 things: the numerator posteriors from the alignments, the denominator # posteriors from the lattices (times -1), and the smoothing posteriors from the # neural net log-probs (times E). # We copy the examples for each job round-robin to multiple archives, one for each # of 1...$num_jobs_nnet. egs_out="" for n in `seq 1 $num_jobs_nnet`; do # indexes are egs_orig.$z.$num_jobs_nnet.$nj egs_out="$egs_out ark:$dir/egs/egs_orig.$z.$n.JOB.ark" done $cmd JOB=1:$nj $dir/log/get_egs.$z.JOB.log \ ali-to-pdf $dir/$x.1.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \ ali-to-post ark:- ark:- \| \ sum-post --scale2=$E ark:- "ark:gunzip -c $dir/post/smooth_post.$z.JOB.gz|" ark:- \| \ sum-post --scale2=-1.0 ark:- "ark:gunzip -c $dir/post/den_post.$z.JOB.gz|" ark:- \| \ nnet-get-egs $nnet_context_opts "$feats" ark:- ark:- \| \ nnet-copy-egs ark:- $egs_out || exit 1; rm $dir/post/smooth_post.$z.*.gz $dir/post/den_post.$z.*.gz fi if $first_iter_of_epoch; then # Diagnostics-- work out an extra term in the objf that we have to add to # what we get from the nnet training. tail -n 50 $dir/log/post.$z.*.log | perl -e '$acwt=shift @ARGV; $acwt>0.0 || die "bad acwt"; while() { if (m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames. Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames += $2; } if (m|matrix-logprob.+Average log-prob per frame is (\S+) over (\S+) frames|) { $tot_num_like += $1*$2; $tot_num_frames += $2; } } if (abs($tot_frames - $tot_num_frames) > 0.01*($tot_frames + $tot_num_frames)) { print STDERR "#frames differ $tot_frames vs $tot_num_frames\n"; } $tot_den_lat_like /= $tot_frames; $tot_num_like /= $tot_num_frames; $objf = $acwt * $tot_num_like - $tot_den_lat_like; print $objf."\n"; ' $acwt > $dir/log/objf.$z.log echo "Objf on EBW iter $z is `cat $dir/log/objf.$z.log`" fi if [ $stage -lt $x ] || [ $sub_stage -le -1 ]; then echo "Merging training examples across original #jobs ($nj), and " echo "splitting across number of nnet jobs $num_jobs_nnet" egs_out2="" for n in `seq 1 $iters_per_epoch`; do # indexes of egs_merged are: egs_merged.$z.$iters_per_epoch.$num_jobs_nnet egs_out2="$egs_out2 ark:$dir/egs/egs_merged.$z.$n.JOB.ark" done # Note: in the following command, JOB goes from 1 to $num_jobs_nnet, so one # job per parallel training job (different from the previous command). # We sum up over the index JOB in the previous $cmd, and write to multiple # archives, this time one for each "sub-iter". # indexes of egs_orig are: egs_orig.$z.$num_jobs_nnet.$nj $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/merge_and_split.$x.JOB.log \ cat $dir/egs/egs_orig.$z.JOB.*.ark \| \ nnet-copy-egs --random=$random_copy "--srand=\$[JOB+($x*$num_jobs_nnet)]" \ ark:- $egs_out2 '&&' rm $dir/egs/egs_orig.$z.JOB.*.ark || exit 1; fi if [ $stage -lt $x ] || [ $sub_stage -le 0 ]; then echo "Randomizing order of examples in each job" for n in `seq 1 $iters_per_epoch`; do s=$[$num_jobs_nnet*($n+($iters_per_epoch*$z))] # for srand $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$z.$n.JOB.log \ nnet-shuffle-egs "--srand=\$[JOB+$s]" \ ark:$dir/egs/egs_merged.$z.$n.JOB.ark ark:$dir/egs/egs.$z.$n.JOB.ark '&&' \ rm $dir/egs/egs_merged.$z.$n.JOB.ark || exit 1; done fi fi if [ $stage -le $x ]; then # This block does the $iters_per_epoch iters of training. y=1; # y is the "sub-iteration" number. while [ $y -le $iters_per_epoch ]; do echo "Iteration $x, sub-iteration $y" if [ $stage -lt $x ] || [ $sub_stage -le $y ]; then $cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.$y.JOB.log \ nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \ $dir/$x.$y.mdl ark:$dir/egs/egs.$z.$y.JOB.ark $dir/$x.$y.JOB.mdl \ || exit 1; nnets_list= for n in `seq 1 $num_jobs_nnet`; do nnets_list="$nnets_list $dir/$x.$y.$n.mdl" done if [ $y -eq $iters_per_epoch ]; then next_mdl=$dir/$[$x+1].1.mdl else next_mdl=$dir/$x.$[$y+1].mdl; fi # Average the parameters of all the parallel jobs. $cmd $dir/log/average.$x.$y.log \ nnet-am-average $nnets_list $next_mdl || exit 1; rm $nnets_list fi y=$[$y+1] done fi if [ $learning_rate_factor != 1.0 ]; then learning_rate=`perl -e "print $learning_rate * $learning_rate_factor;"`; ! nnet-am-copy --print-args=false --learning-rate=$learning_rate $dir/$[$x+1].1.mdl $dir/$[$x+1].1.mdl && \ echo Error changing learning rate of neural net && exit 1; fi x=$[$x+1] done rm $dir/final.mdl 2>/dev/null ln -s $x.1.mdl $dir/final.mdl echo Done