Blame view
Scripts/steps/train_nnet.sh
12.6 KB
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 |
#!/bin/bash # Copyright 2012/2013 Karel Vesely (Brno University of Technology) # Apache 2.0 # Begin configuration. config= # config, which is also sent to all other scripts # NETWORK INITIALIZATION mlp_init= # select initialized MLP (override initialization) feature_transform= # select feature transform (=splice,rescaling,...) (don't build new one) # model_size=8000000 # nr. of parameteres in MLP hid_layers=4 # nr. of hidden layers (prior to sotfmax or bottleneck) bn_dim= # set a value to get a bottleneck network hid_dim= # select hidden dimension directly (override $model_size) dbn= # select DBN to prepend to the MLP initialization # init_opts= # options, passed to the initialization script # FEATURE PROCESSING copy_feats=true # resave the train features in the re-shuffled order to tmpdir # feature config (applies always) apply_cmvn=false # apply normalization to input features? norm_vars=false # use variance normalization? delta_order= # feature_transform: splice=5 # temporal splicing splice_step=1 # stepsize of the splicing (1 == no gap between frames) feat_type=plain # feature config (applies to feat_type traps) traps_dct_basis=11 # nr. od DCT basis (applies to `traps` feat_type, splice10 ) # feature config (applies to feat_type transf) (ie. LDA+MLLT, no fMLLR) transf= splice_after_transf=5 # feature config (applies to feat_type lda) lda_dim=300 # LDA dimension (applies to `lda` feat_type) # LABELS labels= # use these labels to train (override deafault pdf alignments) num_tgt= # force to use number of outputs in the MLP (default is autodetect) # TRAINING SCHEDULER learn_rate=0.008 # initial learning rate train_opts= # options, passed to the training script train_tool= # optionally change the training tool # OTHER use_gpu_id= # manually select GPU id to run on, (-1 disables GPU) analyze_alignments=true # run the alignment analysis script seed=777 # seed value used for training data shuffling and initialization # End configuration. echo "$0 $@" # Print the command line for logging [ -f path.sh ] && . ./path.sh; . parse_options.sh || exit 1; if [ $# != 6 ]; then echo "Usage: $0 <data-train> <data-dev> <lang-dir> <ali-train> <ali-dev> <exp-dir>" echo " e.g.: $0 data/train data/cv data/lang exp/mono_ali exp/mono_ali_cv exp/mono_nnet" echo "main options (for others, see top of script file)" echo " --config <config-file> # config containing options" exit 1; fi data=$1 data_cv=$2 lang=$3 alidir=$4 alidir_cv=$5 dir=$6 silphonelist=`cat $lang/phones/silence.csl` || exit 1; for f in $alidir/final.mdl $alidir/ali.1.gz $alidir_cv/ali.1.gz $data/feats.scp $data_cv/feats.scp; do [ ! -f $f ] && echo "$0: no such file $f" && exit 1; done echo echo "# INFO" echo "$0 : Training Neural Network" printf "\t dir : $dir " printf "\t Train-set : $data $alidir " printf "\t CV-set : $data_cv $alidir_cv " mkdir -p $dir/{log,nnet} #skip when already trained [ -e $dir/final.nnet ] && printf " SKIPPING TRAINING... ($0) nnet already trained : $dir/final.nnet ($(readlink $dir/final.nnet)) " && exit 0 ###### PREPARE ALIGNMENTS ###### echo echo "# PREPARING ALIGNMENTS" if [ ! -z $labels ]; then echo "Using targets '$labels' (by force)" else echo "Using PDF targets from dirs '$alidir' '$alidir_cv'" #define pdf-alignment rspecifiers labels_tr="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- |" if [[ "$alidir" == "$alidir_cv" ]]; then labels="$labels_tr" else labels="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz $alidir_cv/ali.*.gz |\" ark:- |" fi #get the priors, get pdf-counts from alignments analyze-counts --binary=false "$labels_tr" $dir/ali_train_pdf.counts || exit 1 #copy the old transition model, will be needed by decoder copy-transition-model --binary=false $alidir/final.mdl $dir/final.mdl || exit 1 #copy the tree cp $alidir/tree $dir/tree || exit 1 #analyze the train/cv alignments if [ "$analyze_alignments" == "true" ]; then utils/nnet/analyze_alignments.sh "TRAINING SET" "ark:gunzip -c $alidir/ali.*.gz |" $dir/final.mdl $lang > $dir/__ali_stats_train utils/nnet/analyze_alignments.sh "VALIDATION SET" "ark:gunzip -c $alidir_cv/ali.*.gz |" $dir/final.mdl $lang > $dir/__ali_stats_cv fi fi ###### PREPARE FEATURES ###### echo echo "# PREPARING FEATURES" # shuffle the list echo "Preparing train/cv lists :" cat $data/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} > $dir/train.scp cp $data_cv/feats.scp $dir/cv.scp # print the list sizes wc -l $dir/train.scp $dir/cv.scp #re-save the shuffled features, so they are stored sequentially on the disk in /tmp/ if [ "$copy_feats" == "true" ]; then tmpdir=$(mktemp -d); mv $dir/train.scp $dir/train.scp_non_local utils/nnet/copy_feats.sh $dir/train.scp_non_local $tmpdir $dir/train.scp #remove data on exit... trap "echo \"Removing features tmpdir $tmpdir @ $(hostname)\"; rm -r $tmpdir" EXIT fi #create a 10k utt subset for global cmvn estimates head -n 10000 $dir/train.scp > $dir/train.scp.10k ###### PREPARE FEATURE PIPELINE ###### #read the features feats_tr="ark:copy-feats scp:$dir/train.scp ark:- |" feats_cv="ark:copy-feats scp:$dir/cv.scp ark:- |" #optionally add per-speaker CMVN if [ $apply_cmvn == "true" ]; then echo "Will use CMVN statistics : $data/cmvn.scp, $data_cv/cmvn.scp" [ ! -r $data/cmvn.scp ] && echo "Cannot find cmvn stats $data/cmvn.scp" && exit 1; [ ! -r $data_cv/cmvn.scp ] && echo "Cannot find cmvn stats $data_cv/cmvn.scp" && exit 1; feats_tr="$feats_tr apply-cmvn --print-args=false --norm-vars=$norm_vars --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp ark:- ark:- |" feats_cv="$feats_cv apply-cmvn --print-args=false --norm-vars=$norm_vars --utt2spk=ark:$data_cv/utt2spk scp:$data_cv/cmvn.scp ark:- ark:- |" # keep track of norm_vars option echo "$norm_vars" >$dir/norm_vars else echo "apply_cmvn is disabled (per speaker norm. on input features)" fi #optionally add deltas if [ "$delta_order" != "" ]; then feats_tr="$feats_tr add-deltas --delta-order=$delta_order ark:- ark:- |" feats_cv="$feats_cv add-deltas --delta-order=$delta_order ark:- ark:- |" echo "$delta_order" > $dir/delta_order echo "add-deltas (delta_order $delta_order)" fi #get feature dim echo "Getting feature dim : " feat_dim=$(feat-to-dim --print-args=false "$feats_tr" -) echo "Feature dim is : $feat_dim" # Now we will start building complex feature_transform which will # be forwarded in CUDA to gain more speed. # # We will use 1GPU for both feature_transform and MLP training in one binary tool. # This is against the kaldi spirit, but it is necessary, because on some sites a GPU # cannot be shared accross by two or more processes (compute exclusive mode), # and we would like to use single GPU per training instance, # so that the grid resources can be used efficiently... if [ ! -z "$feature_transform" ]; then echo "Using pre-computed feature-transform : '$feature_transform'" tmp=$dir/$(basename $feature_transform) cp $feature_transform $tmp; feature_transform=$tmp else # Generate the splice transform echo "Using splice +/- $splice , step $splice_step" feature_transform=$dir/tr_splice$splice-$splice_step.nnet utils/nnet/gen_splice.py --fea-dim=$feat_dim --splice=$splice --splice-step=$splice_step > $feature_transform # Choose further processing of spliced features echo "Feature type : $feat_type" case $feat_type in plain) ;; traps) #generate hamming+dct transform feature_transform_old=$feature_transform feature_transform=${feature_transform%.nnet}_hamm_dct${traps_dct_basis}.nnet echo "Preparing Hamming DCT transform into : $feature_transform" #prepare matrices with time-transposed hamming and dct utils/nnet/gen_hamm_mat.py --fea-dim=$feat_dim --splice=$splice > $dir/hamm.mat utils/nnet/gen_dct_mat.py --fea-dim=$feat_dim --splice=$splice --dct-basis=$traps_dct_basis > $dir/dct.mat #put everything together compose-transforms --binary=false $dir/dct.mat $dir/hamm.mat - | \ transf-to-nnet - - | \ nnet-concat --binary=false $feature_transform_old - $feature_transform || exit 1 ;; transf) feature_transform_old=$feature_transform feature_transform=${feature_transform%.nnet}_transf_splice${splice_after_transf}.nnet [ -z $transf ] && $alidir/final.mat [ ! -f $transf ] && echo "Missing transf $transf" && exit 1 feat_dim=$(feat-to-dim "$feats_tr nnet-forward 'nnet-concat $feature_transform_old \"transf-to-nnet $transf - |\" - |' ark:- ark:- |" -) nnet-concat --binary=false $feature_transform_old \ "transf-to-nnet $transf - |" \ "utils/nnet/gen_splice.py --fea-dim=$feat_dim --splice=$splice_after_transf |" \ $feature_transform || exit 1 ;; lda) transf=$dir/lda$lda_dim.mat #get the LDA statistics if [ ! -r "$dir/lda.acc" ]; then echo "LDA: Converting alignments to posteriors $dir/lda_post.scp" ali-to-post "ark:gunzip -c $alidir/ali.*.gz|" ark:- | \ weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark,scp:$dir/lda_post.ark,$dir/lda_post.scp 2>$dir/log/ali-to-post-lda.log || exit 1; echo "Accumulating LDA statistics $dir/lda.acc on top of spliced feats" acc-lda --rand-prune=4.0 $alidir/final.mdl "$feats_tr nnet-forward $feature_transform ark:- ark:- |" scp:$dir/lda_post.scp $dir/lda.acc 2>$dir/log/acc-lda.log || exit 1; else echo "LDA: Using pre-computed stats $dir/lda.acc" fi #estimate the transform echo "Estimating LDA transform $dir/lda.mat from the statistics $dir/lda.acc" est-lda --write-full-matrix=$dir/lda.full.mat --dim=$lda_dim $transf $dir/lda.acc 2>$dir/log/lda.log || exit 1; #append the LDA matrix to feature_transform feature_transform_old=$feature_transform feature_transform=${feature_transform%.nnet}_lda${lda_dim}.nnet transf-to-nnet $transf - | \ nnet-concat --binary=false $feature_transform_old - $feature_transform || exit 1 #remove the temporary file rm $dir/lda_post.{ark,scp} ;; *) echo "Unknown feature type $feat_type" exit 1; ;; esac # keep track of feat_type echo $feat_type > $dir/feat_type # Renormalize the MLP input to zero mean and unit variance feature_transform_old=$feature_transform feature_transform=${feature_transform%.nnet}_cmvn-g.nnet echo "Renormalizing MLP input features into $feature_transform" nnet-forward ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \ $feature_transform_old "$(echo $feats_tr | sed 's|train.scp|train.scp.10k|')" \ ark:- 2>$dir/log/nnet-forward-cmvn.log |\ compute-cmvn-stats ark:- - | cmvn-to-nnet - - |\ nnet-concat --binary=false $feature_transform_old - $feature_transform fi ###### MAKE LINK TO THE FINAL feature_transform, so the other scripts will find it ###### (cd $dir; [ ! -f final.feature_transform ] && ln -s $(basename $feature_transform) final.feature_transform ) ###### INITIALIZE THE NNET ###### echo echo "# NN-INITIALIZATION" if [ ! -z "$mlp_init" ]; then echo "Using pre-initalized network $mlp_init"; else echo "Getting input/output dims :" #initializing the MLP, get the i/o dims... #input-dim num_fea=$(feat-to-dim "$feats_tr nnet-forward $feature_transform ark:- ark:- |" - ) { #optioanlly take output dim of DBN [ ! -z $dbn ] && num_fea=$(nnet-forward "nnet-concat $feature_transform $dbn -|" "$feats_tr" ark:- | feat-to-dim ark:- -) [ -z "$num_fea" ] && echo "Getting nnet input dimension failed!!" && exit 1 } #output-dim [ -z $num_tgt ] && num_tgt=$(hmm-info --print-args=false $alidir/final.mdl | grep pdfs | awk '{ print $NF }') #run the MLP initializing script mlp_init=$dir/nnet.init utils/nnet/init_nnet.sh --model_size $model_size --hid_layers $hid_layers \ ${bn_dim:+ --bn-dim $bn_dim} \ ${hid_dim:+ --hid-dim $hid_dim} \ --seed $seed ${init_opts} \ ${config:+ --config $config} \ $num_fea $num_tgt $mlp_init || exit 1 #optionally prepend dbn to the initialization if [ ! -z $dbn ]; then mlp_init_old=$mlp_init; mlp_init=$dir/nnet_$(basename $dbn)_dnn.init nnet-concat $dbn $mlp_init_old $mlp_init fi fi ###### TRAIN ###### echo echo "# RUNNING THE NN-TRAINING SCHEDULER" steps/train_nnet_scheduler.sh \ --feature-transform $feature_transform \ --learn-rate $learn_rate \ --seed $seed \ ${train_opts} \ ${train_tool:+ --train-tool "$train_tool"} \ ${config:+ --config $config} \ ${use_gpu_id:+ --use-gpu-id $use_gpu_id} \ $mlp_init "$feats_tr" "$feats_cv" "$labels" $dir || exit 1 echo "$0 successfuly finished.. $dir" sleep 3 exit 0 |