Blame view
Scripts/steps/train_nnet_mpe.sh
5.26 KB
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
#!/bin/bash # Copyright 2013 Brno University of Technology (Author: Karel Vesely) # Apache 2.0. # Sequence-discriminative MPE/sMBR training of DNN. # 4 iterations (by default) of Stochastic Gradient Descent with per-utterance updates. # We select between MPE/sMBR optimization by '--do-smbr <bool>' option. # For the numerator we have a fixed alignment rather than a lattice-- # this actually follows from the way lattices are defined in Kaldi, which # is to have a single path for each word (output-symbol) sequence. # Begin configuration section. cmd=run.pl num_iters=4 acwt=0.1 lmwt=1.0 learn_rate=0.00001 halving_factor=1.0 #ie. disable halving do_smbr=true use_silphones=false #setting this to something will enable giving siphones to nnet-mpe verbose=1 use_gpu_id= seed=777 # seed value used for training data shuffling # End configuration section echo "$0 $@" # Print the command line for logging [ -f ./path.sh ] && . ./path.sh; # source the path. . parse_options.sh || exit 1; if [ $# -ne 6 ]; then echo "Usage: steps/$0 <data> <lang> <srcdir> <ali> <denlats> <exp>" echo " e.g.: steps/$0 data/train_all data/lang exp/tri3b_dnn exp/tri3b_dnn_ali exp/tri3b_dnn_denlats exp/tri3b_dnn_smbr" echo "Main options (for others, see top of script file)" echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." echo " --config <config-file> # config containing options" echo " --num-iters <N> # number of iterations to run" echo " --acwt <float> # acoustic score scaling" echo " --lmwt <float> # linguistic score scaling" echo " --learn-rate <float> # learning rate for NN training" echo " --do-smbr <bool> # do sMBR training, otherwise MPE" exit 1; fi data=$1 lang=$2 srcdir=$3 alidir=$4 denlatdir=$5 dir=$6 mkdir -p $dir/log for f in $data/feats.scp $alidir/{tree,final.mdl,ali.1.gz} $denlatdir/lat.scp $srcdir/{final.nnet,final.feature_transform}; do [ ! -f $f ] && echo "$0: no such file $f" && exit 1; done mkdir -p $dir/log cp $alidir/{final.mdl,tree} $dir silphonelist=`cat $lang/phones/silence.csl` || exit 1; #Get the files we will need nnet=$srcdir/$(readlink $srcdir/final.nnet || echo final.nnet); [ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1; cp $nnet $dir/0.nnet; nnet=$dir/0.nnet class_frame_counts=$srcdir/ali_train_pdf.counts [ -z "$class_frame_counts" ] && echo "Error class_frame_counts '$class_frame_counts' does not exist!" && exit 1; cp $srcdir/ali_train_pdf.counts $dir feature_transform=$srcdir/final.feature_transform if [ ! -f $feature_transform ]; then echo "Missing feature_transform '$feature_transform'" exit 1 fi cp $feature_transform $dir/final.feature_transform model=$dir/final.mdl [ -z "$model" ] && echo "Error transition model '$model' does not exist!" && exit 1; #enable/disable silphones from MPE training mpe_silphones_arg= #empty [ "$use_silphones" == "true" ] && mpe_silphones_arg="--silence-phones=$silphonelist" # Shuffle the feature list to make the GD stochastic! # By shuffling features, we have to use lattices with random access (indexed by .scp file). cat $data/feats.scp | utils/shuffle_list.pl --srand $seed > $dir/train.scp ### ### Prepare feature pipeline ### # Create the feature stream: feats="ark,s,cs:copy-feats scp:$dir/train.scp ark:- |" # Optionally add cmvn if [ -f $srcdir/norm_vars ]; then norm_vars=$(cat $srcdir/norm_vars 2>/dev/null) [ ! -f $data/cmvn.scp ] && echo "$0: cannot find cmvn stats $data/cmvn.scp" && exit 1 feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp ark:- ark:- |" cp $srcdir/norm_vars $dir fi # Optionally add deltas if [ -f $srcdir/delta_order ]; then delta_order=$(cat $srcdir/delta_order) feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |" cp $srcdir/delta_order $dir fi ### ### ### ### ### Prepare the alignments ### # Assuming all alignments will fit into memory ali="ark:gunzip -c $alidir/ali.*.gz |" ### ### Prepare the lattices ### # The lattices are indexed by SCP (they are not gziped because of the random access in SGD) lats="scp:$denlatdir/lat.scp" # Run several iterations of the MPE/sMBR training cur_mdl=$nnet x=1 while [ $x -le $num_iters ]; do echo "Pass $x (learnrate $learn_rate)" if [ -f $dir/$x.nnet ]; then echo "Skipped, file $dir/$x.nnet exists" else #train $cmd $dir/log/mpe.$x.log \ nnet-train-mpe-sequential \ --feature-transform=$feature_transform \ --class-frame-counts=$class_frame_counts \ --acoustic-scale=$acwt \ --lm-scale=$lmwt \ --learn-rate=$learn_rate \ --do-smbr=$do_smbr \ --verbose=$verbose \ $mpe_silphones_arg \ ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \ $cur_mdl $alidir/final.mdl "$feats" "$lats" "$ali" $dir/$x.nnet || exit 1 fi cur_mdl=$dir/$x.nnet #report the progress grep -B 2 "Overall average frame-accuracy" $dir/log/mpe.$x.log | sed -e 's|.*)||' x=$((x+1)) learn_rate=$(awk "BEGIN{print($learn_rate*$halving_factor)}") done (cd $dir; [ -e final.nnet ] && unlink final.nnet; ln -s $((x-1)).nnet final.nnet) echo "MPE/sMBR training finished" exit 0 |