Yannick Estève / ONTRAC-Kaldi

Blame view

egs/wsj/s5/steps/nnet/pretrain_dbn.sh 14.3 KB
  #!/bin/bash
  # Copyright 2013-2015 Brno University of Technology (author: Karel Vesely)
  
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at
  #
  #  http://www.apache.org/licenses/LICENSE-2.0
  #
  # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  # MERCHANTABLITY OR NON-INFRINGEMENT.
  # See the Apache 2 License for the specific language governing permissions and
  # limitations under the License.
  
  # To be run from ../../
  #
  # Restricted Boltzman Machine (RBM) pre-training by Contrastive Divergence
  # algorithm (CD-1). A stack of RBMs forms a Deep Belief Neetwork (DBN).
  #
  # This script by default pre-trains on plain features (ie. saved fMLLR features),
  # building a 'feature_transform' containing +/-5 frame splice and global CMVN.
  #
  # There is also a support for adding speaker-based CMVN, deltas, i-vectors,
  # or passing custom 'feature_transform' or its prototype.
  #
  
  # Begin configuration.
  
  # topology, initialization,
  nn_depth=6             # number of hidden layers,
  hid_dim=2048           # number of neurons per layer,
  param_stddev_first=0.1 # init parameters in 1st RBM
  param_stddev=0.1 # init parameters in other RBMs
  input_vis_type=gauss # type of visible nodes on DBN input
  
  # number of iterations,
  rbm_iter=1            # number of pre-training epochs (Gaussian-Bernoulli RBM has 2x more)
  
  # pre-training opts,
  rbm_lrate=0.4         # RBM learning rate
  rbm_lrate_low=0.01    # lower RBM learning rate (for Gaussian units)
  rbm_l2penalty=0.0002  # L2 penalty (increases RBM-mixing rate)
  rbm_extra_opts=
  
  # data processing,
  copy_feats=true     # resave the features to tmpdir,
  copy_feats_tmproot=/tmp/kaldi.XXXX # sets tmproot for 'copy-feats',
  copy_feats_compress=true # compress feats while resaving
  
  # feature processing,
  splice=5            # (default) splice features both-ways along time axis,
  cmvn_opts=          # (optional) adds 'apply-cmvn' to input feature pipeline, see opts,
  delta_opts=         # (optional) adds 'add-deltas' to input feature pipeline, see opts,
  ivector=            # (optional) adds 'append-vector-to-feats', the option is rx-filename for the 2nd stream,
  ivector_append_tool=append-vector-to-feats # (optional) the tool for appending ivectors,
  
  feature_transform_proto= # (optional) use this prototype for 'feature_transform',
  feature_transform=  # (optional) directly use this 'feature_transform',
  
  # misc.
  verbose=1 # enable per-cache reports
  skip_cuda_check=false
  
  # End configuration.
  
  echo "$0 $@"  # Print the command line for logging
  
  [ -f path.sh ] && . ./path.sh;
  . parse_options.sh || exit 1;
  
  set -euo pipefail
  
  if [ $# != 2 ]; then
     echo "Usage: $0 <data> <exp-dir>"
     echo " e.g.: $0 data/train exp/rbm_pretrain"
     echo "main options (for others, see top of script file)"
     echo "  --config <config-file>           # config containing options"
     echo ""
     echo "  --nn-depth <N>                   # number of RBM layers"
     echo "  --hid-dim <N>                    # number of hidden units per layer"
     echo "  --rbm-iter <N>                   # number of CD-1 iterations per layer"
     echo "                                   # can be used to subsample large datasets"
     echo "  --rbm-lrate <float>              # learning-rate for Bernoulli-Bernoulli RBMs"
     echo "  --rbm-lrate-low <float>          # learning-rate for Gaussian-Bernoulli RBM"
     echo ""
     echo "  --cmvn-opts  <string>            # add 'apply-cmvn' to input feature pipeline"
     echo "  --delta-opts <string>            # add 'add-deltas' to input feature pipeline"
     echo "  --splice <N>                     # splice +/-N frames of input features"
     echo "  --copy-feats <bool>              # copy features to /tmp, lowers storage stress"
     echo ""
     echo "  --feature_transform_proto <file> # use this prototype for 'feature_transform'"
     echo "  --feature-transform <file>       # directly use this 'feature_transform'"
     exit 1;
  fi
  
  data=$1
  dir=$2
  
  for f in $data/feats.scp; do
    [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
  done
  
  echo "# INFO"
  echo "$0 : Pre-training Deep Belief Network as a stack of RBMs"
  printf "\t dir       : $dir 
  "
  printf "\t Train-set : $data '$(cat $data/feats.scp | wc -l)'
  "
  echo
  
  [ -e $dir/${nn_depth}.dbn ] && echo "$0 Skipping, already have $dir/${nn_depth}.dbn" && exit 0
  
  # check if CUDA compiled in and GPU is available,
  if ! $skip_cuda_check; then cuda-gpu-available || exit 1; fi
  
  mkdir -p $dir/log
  
  ###### PREPARE FEATURES ######
  echo
  echo "# PREPARING FEATURES"
  if [ "$copy_feats" == "true" ]; then
    # re-save the features to local disk into /tmp/,
    tmpdir=$(mktemp -d $copy_feats_tmproot)
    trap "echo \"# Removing features tmpdir $tmpdir @ $(hostname)\"; ls $tmpdir; rm -r $tmpdir" INT QUIT TERM EXIT
    copy-feats --compress=$copy_feats_compress scp:$data/feats.scp ark,scp:$tmpdir/train.ark,$dir/train_sorted.scp || exit 1
  else
    # or copy the list,
    cp $data/feats.scp $dir/train_sorted.scp
  fi
  # shuffle the list,
  utils/shuffle_list.pl --srand 777 <$dir/train_sorted.scp >$dir/train.scp
  
  # create a 10k utt subset for global cmvn estimates,
  head -n 10000 $dir/train.scp > $dir/train.scp.10k
  
  # for debugging, add list with non-local features,
  utils/shuffle_list.pl --srand 777 <$data/feats.scp >$dir/train.scp_non_local
  
  ###### OPTIONALLY IMPORT FEATURE SETTINGS ######
  ivector_dim= # no ivectors,
  if [ ! -z $feature_transform ]; then
    D=$(dirname $feature_transform)
    echo "# importing feature settings from dir '$D'"
    [ -e $D/cmvn_opts ] && cmvn_opts=$(cat $D/cmvn_opts)
    [ -e $D/delta_opts ] && delta_opts=$(cat $D/delta_opts)
    [ -e $D/ivector_dim ] && ivector_dim=$(cat $D/ivector_dim)
    [ -e $D/ivector_append_tool ] && ivector_append_tool=$(cat $D/ivector_append_tool)
    echo "# cmvn_opts='$cmvn_opts' delta_opts='$delta_opts' ivector_dim='$ivector_dim'"
  fi
  
  ###### PREPARE FEATURE PIPELINE ######
  # read the features
  feats_tr="ark:copy-feats scp:$dir/train.scp ark:- |"
  
  # optionally add per-speaker CMVN
  if [ ! -z "$cmvn_opts" ]; then
    echo "+ 'apply-cmvn' with '$cmvn_opts' using statistics : $data/cmvn.scp"
    [ ! -r $data/cmvn.scp ] && echo "Missing $data/cmvn.scp" && exit 1;
    [ ! -r $data/utt2spk ] && echo "Missing $data/utt2spk" && exit 1;
    feats_tr="$feats_tr apply-cmvn $cmvn_opts --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp ark:- ark:- |"
  else
    echo "# 'apply-cmvn' not used,"
  fi
  
  # optionally add deltas
  if [ ! -z "$delta_opts" ]; then
    feats_tr="$feats_tr add-deltas $delta_opts ark:- ark:- |"
    echo "# + 'add-deltas' with '$delta_opts'"
  fi
  
  # keep track of the config,
  [ ! -z "$cmvn_opts" ] && echo "$cmvn_opts" >$dir/cmvn_opts
  [ ! -z "$delta_opts" ] && echo "$delta_opts" >$dir/delta_opts
  #
  
  # get feature dim,
  feat_dim=$(feat-to-dim "$feats_tr" -)
  echo "# feature dim : $feat_dim (input of 'feature_transform')"
  
  # Now we start building 'feature_transform' which goes right in front of a NN.
  # The forwarding is computed on a GPU before the frame shuffling is applied.
  #
  # Same GPU is used both for 'feature_transform' and the NN training.
  # So it has to be done by a single process (we are using exclusive mode).
  # This also reduces the CPU-GPU uploads/downloads to minimum.
  
  if [ ! -z "$feature_transform" ]; then
    echo "# importing 'feature_transform' from '$feature_transform'"
    tmp=$dir/imported_$(basename $feature_transform)
    cp $feature_transform $tmp; feature_transform=$tmp
  else
    # Make default proto with splice,
    if [ ! -z $feature_transform_proto ]; then
      echo "# importing custom 'feature_transform_proto' from : $feature_transform_proto"
    else
      echo "+ default 'feature_transform_proto' with splice +/-$splice frames"
      feature_transform_proto=$dir/splice${splice}.proto
      echo "<Splice> <InputDim> $feat_dim <OutputDim> $(((2*splice+1)*feat_dim)) <BuildVector> -$splice:$splice </BuildVector>" >$feature_transform_proto
    fi
  
    # Initialize 'feature-transform' from a prototype,
    feature_transform=$dir/tr_$(basename $feature_transform_proto .proto).nnet
    nnet-initialize --binary=false $feature_transform_proto $feature_transform
  
    # Renormalize the MLP input to zero mean and unit variance,
    feature_transform_old=$feature_transform
    feature_transform=${feature_transform%.nnet}_cmvn-g.nnet
    echo "# compute normalization stats from 10k sentences"
    nnet-forward --print-args=true --use-gpu=yes $feature_transform_old \
      "$(echo $feats_tr | sed 's|train.scp|train.scp.10k|')" ark:- |\
      compute-cmvn-stats ark:- $dir/cmvn-g.stats
    echo "# + normalization of NN-input at '$feature_transform'"
    nnet-concat --print-args=false --binary=false $feature_transform_old \
      "cmvn-to-nnet $dir/cmvn-g.stats -|" $feature_transform
  fi
  
  if [ ! -z $ivector ]; then
    echo
    echo "# ADDING IVECTOR FEATURES"
    # The iVectors are concatenated 'as they are' directly to the input of the neural network,
    # To do this, we paste the features, and use <ParallelComponent> where the 1st component
    # contains the transform and 2nd network contains <Copy> component.
  
    echo "# getting dims,"
    dim_raw=$(feat-to-dim "$feats_tr" -)
    dim_raw_and_ivec=$(feat-to-dim "$feats_tr $ivector_append_tool ark:- '$ivector' ark:- |" -)
    dim_ivec=$((dim_raw_and_ivec - dim_raw))
    echo "# dims, feats-raw $dim_raw, ivectors $dim_ivec,"
  
    # Should we do something with 'feature_transform'?
    if [ ! -z $ivector_dim ]; then
      # No, the 'ivector_dim' comes from dir with 'feature_transform' with iVec forwarding,
      echo "# assuming we got '$feature_transform' with ivector forwarding,"
      [ $ivector_dim != $dim_ivec ] && \
      echo -n "Error, i-vector dimensionality mismatch!" && \
      echo " (expected $ivector_dim, got $dim_ivec in $ivector)" && exit 1
    else
      # Yes, adjust the transform to do ``iVec forwarding'',
      feature_transform_old=$feature_transform
      feature_transform=${feature_transform%.nnet}_ivec_copy.nnet
      echo "# setting up ivector forwarding into '$feature_transform',"
      dim_transformed=$(feat-to-dim "$feats_tr nnet-forward $feature_transform_old ark:- ark:- |" -)
      nnet-initialize --print-args=false <(echo "<Copy> <InputDim> $dim_ivec <OutputDim> $dim_ivec <BuildVector> 1:$dim_ivec </BuildVector>") $dir/tr_ivec_copy.nnet
      nnet-initialize --print-args=false <(echo "<ParallelComponent> <InputDim> $((dim_raw+dim_ivec)) <OutputDim> $((dim_transformed+dim_ivec)) <NestedNnetFilename> $feature_transform_old $dir/tr_ivec_copy.nnet </NestedNnetFilename>") $feature_transform
    fi
    echo $dim_ivec >$dir/ivector_dim # mark down the iVec dim!
    echo $ivector_append_tool >$dir/ivector_append_tool
  
    # pasting the iVecs to the feaures,
    echo "# + ivector input '$ivector'"
    feats_tr="$feats_tr $ivector_append_tool ark:- '$ivector' ark:- |"
  fi
  
  ###### Show the final 'feature_transform' in the log,
  echo
  echo "### Showing the final 'feature_transform':"
  nnet-info $feature_transform
  echo "###"
  
  ###### MAKE LINK TO THE FINAL feature_transform, so the other scripts will find it ######
  [ -f $dir/final.feature_transform ] && unlink $dir/final.feature_transform
  (cd $dir; ln -s $(basename $feature_transform) final.feature_transform )
  feature_transform=$dir/final.feature_transform
  
  
  ###### GET THE DIMENSIONS ######
  num_fea=$(feat-to-dim --print-args=false "$feats_tr nnet-forward --use-gpu=no $feature_transform ark:- ark:- |" - 2>/dev/null)
  num_hid=$hid_dim
  
  
  ###### PERFORM THE PRE-TRAINING ######
  for depth in $(seq 1 $nn_depth); do
    echo
    echo "# PRE-TRAINING RBM LAYER $depth"
    RBM=$dir/$depth.rbm
    [ -f $RBM ] && echo "RBM '$RBM' already trained, skipping." && continue
  
    # The first RBM needs special treatment, because of Gussian input nodes,
    if [ "$depth" == "1" ]; then
      # This is usually Gaussian-Bernoulli RBM (not if CNN layers are part of input transform)
      # initialize,
      echo "# initializing '$RBM.init'"
      echo "<Rbm> <InputDim> $num_fea <OutputDim> $num_hid <VisibleType> $input_vis_type <HiddenType> bern <ParamStddev> $param_stddev_first" > $RBM.proto
      nnet-initialize $RBM.proto $RBM.init 2>$dir/log/nnet-initialize.$depth.log || exit 1
      # pre-train,
      num_iter=$rbm_iter; [ $input_vis_type == "gauss" ] && num_iter=$((2*rbm_iter)) # 2x more epochs for Gaussian input
      [ $input_vis_type == "bern" ] && rbm_lrate_low=$rbm_lrate # original lrate for Bernoulli input
      echo "# pretraining '$RBM' (input $input_vis_type, lrate $rbm_lrate_low, iters $num_iter)"
      rbm-train-cd1-frmshuff --learn-rate=$rbm_lrate_low --l2-penalty=$rbm_l2penalty \
        --num-iters=$num_iter --verbose=$verbose \
        --feature-transform=$feature_transform \
        $rbm_extra_opts \
        $RBM.init "$feats_tr" $RBM 2>$dir/log/rbm.$depth.log || exit 1
    else
      # This is Bernoulli-Bernoulli RBM,
      # cmvn stats for init,
      echo "# computing cmvn stats '$dir/$depth.cmvn' for RBM initialization"
      if [ ! -f $dir/$depth.cmvn ]; then
        nnet-forward --print-args=false --use-gpu=yes \
          "nnet-concat $feature_transform $dir/$((depth-1)).dbn - |" \
          "$(echo $feats_tr | sed 's|train.scp|train.scp.10k|')" ark:- | \
        compute-cmvn-stats --print-args=false ark:- - | \
        cmvn-to-nnet --print-args=false - $dir/$depth.cmvn || exit 1
      else
        echo "# compute-cmvn-stats already done, skipping."
      fi
      # initialize,
      echo "initializing '$RBM.init'"
      echo "<Rbm> <InputDim> $num_hid <OutputDim> $num_hid <VisibleType> bern <HiddenType> bern <ParamStddev> $param_stddev <VisibleBiasCmvnFilename> $dir/$depth.cmvn" > $RBM.proto
      nnet-initialize $RBM.proto $RBM.init 2>$dir/log/nnet-initialize.$depth.log || exit 1
      # pre-train,
      echo "pretraining '$RBM' (lrate $rbm_lrate, iters $rbm_iter)"
      rbm-train-cd1-frmshuff --learn-rate=$rbm_lrate --l2-penalty=$rbm_l2penalty \
        --num-iters=$rbm_iter --verbose=$verbose \
        --feature-transform="nnet-concat $feature_transform $dir/$((depth-1)).dbn - |" \
        $rbm_extra_opts \
        $RBM.init "$feats_tr" $RBM 2>$dir/log/rbm.$depth.log || exit 1
    fi
  
    # Create DBN stack,
    if [ "$depth" == "1" ]; then
      echo "# converting RBM to $dir/$depth.dbn"
      rbm-convert-to-nnet $RBM $dir/$depth.dbn
    else
      echo "# appending RBM to $dir/$depth.dbn"
      nnet-concat $dir/$((depth-1)).dbn "rbm-convert-to-nnet $RBM - |"  $dir/$depth.dbn
    fi
  
  done
  
  echo
  echo "# REPORT"
  echo "# RBM pre-training progress (line per-layer)"
  grep progress $dir/log/rbm.*.log
  echo
  
  echo "Pre-training finished."
  
  sleep 3
  exit 0