Blame view

Scripts/steps/train_nnet.sh 12.6 KB
ec85f8892   bigot benjamin   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
  #!/bin/bash
  
  # Copyright 2012/2013  Karel Vesely (Brno University of Technology)
  # Apache 2.0
  
  # Begin configuration.
  config=            # config, which is also sent to all other scripts
  
  # NETWORK INITIALIZATION
  mlp_init=          # select initialized MLP (override initialization)
  feature_transform= # select feature transform (=splice,rescaling,...) (don't build new one)
  #
  model_size=8000000 # nr. of parameteres in MLP
  hid_layers=4       # nr. of hidden layers (prior to sotfmax or bottleneck)
  bn_dim=            # set a value to get a bottleneck network
  hid_dim=           # select hidden dimension directly (override $model_size)
  dbn=               # select DBN to prepend to the MLP initialization
  #
  init_opts=         # options, passed to the initialization script
  
  # FEATURE PROCESSING
  copy_feats=true  # resave the train features in the re-shuffled order to tmpdir
  # feature config (applies always)
  apply_cmvn=false # apply normalization to input features?
   norm_vars=false # use variance normalization?
  delta_order=
  # feature_transform:
  splice=5         # temporal splicing
  splice_step=1    # stepsize of the splicing (1 == no gap between frames)
  feat_type=plain
  # feature config (applies to feat_type traps)
  traps_dct_basis=11 # nr. od DCT basis (applies to `traps` feat_type, splice10 )
  # feature config (applies to feat_type transf) (ie. LDA+MLLT, no fMLLR)
  transf=
  splice_after_transf=5
  # feature config (applies to feat_type lda)
  lda_dim=300        # LDA dimension (applies to `lda` feat_type)
  
  # LABELS
  labels=            # use these labels to train (override deafault pdf alignments) 
  num_tgt=           # force to use number of outputs in the MLP (default is autodetect)
  
  # TRAINING SCHEDULER
  learn_rate=0.008   # initial learning rate
  train_opts=        # options, passed to the training script
  train_tool=        # optionally change the training tool
  
  # OTHER
  use_gpu_id= # manually select GPU id to run on, (-1 disables GPU)
  analyze_alignments=true # run the alignment analysis script
  seed=777    # seed value used for training data shuffling and initialization
  # End configuration.
  
  echo "$0 $@"  # Print the command line for logging
  
  [ -f path.sh ] && . ./path.sh; 
  
  
  . parse_options.sh || exit 1;
  
  
  if [ $# != 6 ]; then
     echo "Usage: $0 <data-train> <data-dev> <lang-dir> <ali-train> <ali-dev> <exp-dir>"
     echo " e.g.: $0 data/train data/cv data/lang exp/mono_ali exp/mono_ali_cv exp/mono_nnet"
     echo "main options (for others, see top of script file)"
     echo "  --config <config-file>  # config containing options"
     exit 1;
  fi
  
  data=$1
  data_cv=$2
  lang=$3
  alidir=$4
  alidir_cv=$5
  dir=$6
  
  silphonelist=`cat $lang/phones/silence.csl` || exit 1;
  
  
  for f in $alidir/final.mdl $alidir/ali.1.gz $alidir_cv/ali.1.gz $data/feats.scp $data_cv/feats.scp; do
    [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
  done
  
  echo
  echo "# INFO"
  echo "$0 : Training Neural Network"
  printf "\t dir       : $dir 
  "
  printf "\t Train-set : $data $alidir 
  "
  printf "\t CV-set    : $data_cv $alidir_cv 
  "
  
  mkdir -p $dir/{log,nnet}
  
  #skip when already trained
  [ -e $dir/final.nnet ] && printf "
  SKIPPING TRAINING... ($0)
  nnet already trained : $dir/final.nnet ($(readlink $dir/final.nnet))
  
  " && exit 0
  
  ###### PREPARE ALIGNMENTS ######
  echo
  echo "# PREPARING ALIGNMENTS"
  if [ ! -z $labels ]; then
    echo "Using targets '$labels' (by force)"
  else
    echo "Using PDF targets from dirs '$alidir' '$alidir_cv'"
    #define pdf-alignment rspecifiers
    labels_tr="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- |"
    if [[ "$alidir" == "$alidir_cv" ]]; then
      labels="$labels_tr"
    else
      labels="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz $alidir_cv/ali.*.gz |\" ark:- |"
    fi
  
    #get the priors, get pdf-counts from alignments
    analyze-counts --binary=false "$labels_tr" $dir/ali_train_pdf.counts || exit 1
    #copy the old transition model, will be needed by decoder
    copy-transition-model --binary=false $alidir/final.mdl $dir/final.mdl || exit 1
    #copy the tree
    cp $alidir/tree $dir/tree || exit 1
  
    #analyze the train/cv alignments
    if [ "$analyze_alignments" == "true" ]; then
      utils/nnet/analyze_alignments.sh "TRAINING SET" "ark:gunzip -c $alidir/ali.*.gz |" $dir/final.mdl $lang > $dir/__ali_stats_train
      utils/nnet/analyze_alignments.sh "VALIDATION SET" "ark:gunzip -c $alidir_cv/ali.*.gz |" $dir/final.mdl $lang > $dir/__ali_stats_cv
    fi
  fi
  
  ###### PREPARE FEATURES ######
  echo
  echo "# PREPARING FEATURES"
  # shuffle the list
  echo "Preparing train/cv lists :"
  cat $data/feats.scp | utils/shuffle_list.pl --srand ${seed:-777} > $dir/train.scp
  cp $data_cv/feats.scp $dir/cv.scp
  # print the list sizes
  wc -l $dir/train.scp $dir/cv.scp
  
  #re-save the shuffled features, so they are stored sequentially on the disk in /tmp/
  if [ "$copy_feats" == "true" ]; then
    tmpdir=$(mktemp -d); mv $dir/train.scp $dir/train.scp_non_local
    utils/nnet/copy_feats.sh $dir/train.scp_non_local $tmpdir $dir/train.scp
    #remove data on exit...
    trap "echo \"Removing features tmpdir $tmpdir @ $(hostname)\"; rm -r $tmpdir" EXIT
  fi
  
  #create a 10k utt subset for global cmvn estimates
  head -n 10000 $dir/train.scp > $dir/train.scp.10k
  
  
  
  ###### PREPARE FEATURE PIPELINE ######
  
  #read the features
  feats_tr="ark:copy-feats scp:$dir/train.scp ark:- |"
  feats_cv="ark:copy-feats scp:$dir/cv.scp ark:- |"
  
  #optionally add per-speaker CMVN
  if [ $apply_cmvn == "true" ]; then
    echo "Will use CMVN statistics : $data/cmvn.scp, $data_cv/cmvn.scp"
    [ ! -r $data/cmvn.scp ] && echo "Cannot find cmvn stats $data/cmvn.scp" && exit 1;
    [ ! -r $data_cv/cmvn.scp ] && echo "Cannot find cmvn stats $data_cv/cmvn.scp" && exit 1;
    feats_tr="$feats_tr apply-cmvn --print-args=false --norm-vars=$norm_vars --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp ark:- ark:- |"
    feats_cv="$feats_cv apply-cmvn --print-args=false --norm-vars=$norm_vars --utt2spk=ark:$data_cv/utt2spk scp:$data_cv/cmvn.scp ark:- ark:- |"
    # keep track of norm_vars option
    echo "$norm_vars" >$dir/norm_vars 
  else
    echo "apply_cmvn is disabled (per speaker norm. on input features)"
  fi
  
  #optionally add deltas
  if [ "$delta_order" != "" ]; then
    feats_tr="$feats_tr add-deltas --delta-order=$delta_order ark:- ark:- |"
    feats_cv="$feats_cv add-deltas --delta-order=$delta_order ark:- ark:- |"
    echo "$delta_order" > $dir/delta_order
    echo "add-deltas (delta_order $delta_order)"
  fi
  
  #get feature dim
  echo "Getting feature dim : "
  feat_dim=$(feat-to-dim --print-args=false "$feats_tr" -)
  echo "Feature dim is : $feat_dim"
  
  # Now we will start building complex feature_transform which will 
  # be forwarded in CUDA to gain more speed.
  #
  # We will use 1GPU for both feature_transform and MLP training in one binary tool. 
  # This is against the kaldi spirit, but it is necessary, because on some sites a GPU 
  # cannot be shared accross by two or more processes (compute exclusive mode),
  # and we would like to use single GPU per training instance,
  # so that the grid resources can be used efficiently...
  
  if [ ! -z "$feature_transform" ]; then
    echo "Using pre-computed feature-transform : '$feature_transform'"
    tmp=$dir/$(basename $feature_transform) 
    cp $feature_transform $tmp; feature_transform=$tmp
  else
    # Generate the splice transform
    echo "Using splice +/- $splice , step $splice_step"
    feature_transform=$dir/tr_splice$splice-$splice_step.nnet
    utils/nnet/gen_splice.py --fea-dim=$feat_dim --splice=$splice --splice-step=$splice_step > $feature_transform
  
    # Choose further processing of spliced features
    echo "Feature type : $feat_type"
    case $feat_type in
      plain)
      ;;
      traps)
        #generate hamming+dct transform
        feature_transform_old=$feature_transform
        feature_transform=${feature_transform%.nnet}_hamm_dct${traps_dct_basis}.nnet
        echo "Preparing Hamming DCT transform into : $feature_transform"
        #prepare matrices with time-transposed hamming and dct
        utils/nnet/gen_hamm_mat.py --fea-dim=$feat_dim --splice=$splice > $dir/hamm.mat
        utils/nnet/gen_dct_mat.py --fea-dim=$feat_dim --splice=$splice --dct-basis=$traps_dct_basis > $dir/dct.mat
        #put everything together
        compose-transforms --binary=false $dir/dct.mat $dir/hamm.mat - | \
          transf-to-nnet - - | \
          nnet-concat --binary=false $feature_transform_old - $feature_transform || exit 1
      ;;
      transf)
        feature_transform_old=$feature_transform
        feature_transform=${feature_transform%.nnet}_transf_splice${splice_after_transf}.nnet
        [ -z $transf ] && $alidir/final.mat
        [ ! -f $transf ] && echo "Missing transf $transf" && exit 1
        feat_dim=$(feat-to-dim "$feats_tr nnet-forward 'nnet-concat $feature_transform_old \"transf-to-nnet $transf - |\" - |' ark:- ark:- |" -)
        nnet-concat --binary=false $feature_transform_old \
          "transf-to-nnet $transf - |" \
          "utils/nnet/gen_splice.py --fea-dim=$feat_dim --splice=$splice_after_transf |" \
          $feature_transform || exit 1
      ;;
      lda)
        transf=$dir/lda$lda_dim.mat
        #get the LDA statistics
        if [ ! -r "$dir/lda.acc" ]; then
          echo "LDA: Converting alignments to posteriors $dir/lda_post.scp"
          ali-to-post "ark:gunzip -c $alidir/ali.*.gz|" ark:- | \
            weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark,scp:$dir/lda_post.ark,$dir/lda_post.scp 2>$dir/log/ali-to-post-lda.log || exit 1;
          echo "Accumulating LDA statistics $dir/lda.acc on top of spliced feats"
          acc-lda --rand-prune=4.0 $alidir/final.mdl "$feats_tr nnet-forward $feature_transform ark:- ark:- |" scp:$dir/lda_post.scp $dir/lda.acc 2>$dir/log/acc-lda.log || exit 1;
        else
          echo "LDA: Using pre-computed stats $dir/lda.acc"
        fi
        #estimate the transform  
        echo "Estimating LDA transform $dir/lda.mat from the statistics $dir/lda.acc"
        est-lda --write-full-matrix=$dir/lda.full.mat --dim=$lda_dim $transf $dir/lda.acc 2>$dir/log/lda.log || exit 1;
        #append the LDA matrix to feature_transform
        feature_transform_old=$feature_transform
        feature_transform=${feature_transform%.nnet}_lda${lda_dim}.nnet
        transf-to-nnet $transf - | \
          nnet-concat --binary=false $feature_transform_old - $feature_transform || exit 1
        #remove the temporary file
        rm $dir/lda_post.{ark,scp}
      ;;
      *)
        echo "Unknown feature type $feat_type"
        exit 1;
      ;;
    esac
    # keep track of feat_type
    echo $feat_type > $dir/feat_type
  
    # Renormalize the MLP input to zero mean and unit variance
    feature_transform_old=$feature_transform
    feature_transform=${feature_transform%.nnet}_cmvn-g.nnet
    echo "Renormalizing MLP input features into $feature_transform"
    nnet-forward ${use_gpu_id:+ --use-gpu-id=$use_gpu_id} \
      $feature_transform_old "$(echo $feats_tr | sed 's|train.scp|train.scp.10k|')" \
      ark:- 2>$dir/log/nnet-forward-cmvn.log |\
    compute-cmvn-stats ark:- - | cmvn-to-nnet - - |\
    nnet-concat --binary=false $feature_transform_old - $feature_transform
  fi
  
  
  ###### MAKE LINK TO THE FINAL feature_transform, so the other scripts will find it ######
  (cd $dir; [ ! -f final.feature_transform ] && ln -s $(basename $feature_transform) final.feature_transform )
  
  
  ###### INITIALIZE THE NNET ######
  echo 
  echo "# NN-INITIALIZATION"
  if [ ! -z "$mlp_init" ]; then
    echo "Using pre-initalized network $mlp_init";
  else
    echo "Getting input/output dims :"
    #initializing the MLP, get the i/o dims...
    #input-dim
    num_fea=$(feat-to-dim "$feats_tr nnet-forward $feature_transform ark:- ark:- |" - )
    { #optioanlly take output dim of DBN
      [ ! -z $dbn ] && num_fea=$(nnet-forward "nnet-concat $feature_transform $dbn -|" "$feats_tr" ark:- | feat-to-dim ark:- -)
      [ -z "$num_fea" ] && echo "Getting nnet input dimension failed!!" && exit 1
    }
  
    #output-dim
    [ -z $num_tgt ] && num_tgt=$(hmm-info --print-args=false $alidir/final.mdl | grep pdfs | awk '{ print $NF }')
  
    #run the MLP initializing script
    mlp_init=$dir/nnet.init
    utils/nnet/init_nnet.sh --model_size $model_size --hid_layers $hid_layers \
      ${bn_dim:+ --bn-dim $bn_dim} \
      ${hid_dim:+ --hid-dim $hid_dim} \
      --seed $seed ${init_opts} \
      ${config:+ --config $config} \
      $num_fea $num_tgt $mlp_init || exit 1
  
    #optionally prepend dbn to the initialization
    if [ ! -z $dbn ]; then
      mlp_init_old=$mlp_init; mlp_init=$dir/nnet_$(basename $dbn)_dnn.init
      nnet-concat $dbn $mlp_init_old $mlp_init 
    fi
  fi
  
  
  ###### TRAIN ######
  echo
  echo "# RUNNING THE NN-TRAINING SCHEDULER"
  steps/train_nnet_scheduler.sh \
    --feature-transform $feature_transform \
    --learn-rate $learn_rate \
    --seed $seed \
    ${train_opts} \
    ${train_tool:+ --train-tool "$train_tool"} \
    ${config:+ --config $config} \
    ${use_gpu_id:+ --use-gpu-id $use_gpu_id} \
    $mlp_init "$feats_tr" "$feats_cv" "$labels" $dir || exit 1
  
  
  echo "$0 successfuly finished.. $dir"
  
  sleep 3
  exit 0