Yannick Estève / ONTRAC-Kaldi

Blame view

egs/wsj/s5/steps/online/nnet3/prepare_online_decoding.sh 6.83 KB
  #!/bin/bash
  
  # Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
  # Apache 2.0
  
  # Begin configuration.
  stage=0 # This allows restarting after partway, when something when wrong.
  feature_type=mfcc
  add_pitch=false
  mfcc_config=conf/mfcc.conf # you can override any of these you need to override.
  plp_config=conf/plp.conf
  fbank_config=conf/fbank.conf
  # online_pitch_config is the config file for both pitch extraction and
  # post-processing; we combine them into one because during training this
  # is given to the program compute-and-process-kaldi-pitch-feats.
  online_pitch_config=conf/online_pitch.conf
  
  # Below are some options that affect the iVectors, and should probably
  # match those used in extract_ivectors_online.sh.
  num_gselect=5 # Gaussian-selection using diagonal model: number of Gaussians to select
  posterior_scale=0.1 # Scale on the acoustic posteriors, intended to account for
                      # inter-frame correlations.
  min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out)
                 # caution: you should use the same value in the online-estimation
                 # code.
  max_count=100   # This max-count of 100 can make iVectors more consistent for
                  # different lengths of utterance, by scaling up the prior term
                  # when the data-count exceeds this value.  The data-count is
                  # after posterior-scaling, so assuming the posterior-scale is
                  # 0.1, --max-count 100 starts having effect after 1000 frames,
                  # or 10 seconds of data.
  iter=final
  # End configuration.
  
  echo "$0 $@"  # Print the command line for logging
  
  [ -f path.sh ] && . ./path.sh;
  . parse_options.sh || exit 1;
  
  if [ $# -ne 4 ] && [ $# -ne 3 ]; then
     echo "Usage: $0 [options] <lang-dir> [<ivector-extractor-dir>] <nnet-dir> <output-dir>"
     echo "e.g.: $0 data/lang exp/nnet2_online/extractor exp/nnet2_online/nnet exp/nnet2_online/nnet_online"
     echo "main options (for others, see top of script file)"
     echo "  --feature-type <mfcc|plp>                        # Type of the base features; "
     echo "                                                   # important to generate the correct"
     echo "                                                   # configs in <output-dir>/conf/"
     echo "  --add-pitch <true|false>                         # Append pitch features to cmvn"
     echo "                                                   # (default: false)"
     echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
     echo "  --config <config-file>                           # config containing options"
     echo "  --iter <model-iteration|final>                   # iteration of model to take."
     echo "  --stage <stage>                                  # stage to do partial re-run from."
     exit 1;
  fi
  
  
  if [ $# -eq 4 ]; then
    lang=$1
    iedir=$2
    srcdir=$3
    dir=$4
  else
    [ $# -eq 3 ] || exit 1;
    lang=$1
    iedir=
    srcdir=$2
    dir=$3
  fi
  
  for f in $lang/phones/silence.csl $srcdir/${iter}.mdl $srcdir/tree; do
    [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
  done
  if [ ! -z "$iedir" ]; then
    for f in final.{mat,ie,dubm} splice_opts global_cmvn.stats online_cmvn.conf; do
      [ ! -f $iedir/$f ] && echo "$0: no such file $iedir/$f" && exit 1;
    done
    if $add_pitch; then
      iedim=`matrix-dim $iedir/final.mat | awk '{print $1}'`
      amdim=`nnet3-am-info $srcdir/${iter}.mdl | grep "input-dim:" | awk '{print $2}'`
      [ $(($amdim-$iedim)) -eq 0 ] && echo "$0: remove pitch from the input of ivector extractor" && exit 1;
    fi
  fi
  
  
  dir=$(utils/make_absolute.sh $dir) # Convert $dir to an absolute pathname, so that the
                          # configuration files we write will contain absolute
                          # pathnames.
  mkdir -p $dir/conf
  
  utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1;
  cp $lang/phones.txt $dir || exit 1;
  
  cp $srcdir/${iter}.mdl $dir/final.mdl || exit 1;
  cp $srcdir/tree $dir/ || exit 1;
  if [ -f $srcdir/frame_subsampling_factor ]; then
    cp $srcdir/frame_subsampling_factor $dir/
  fi
  
  if [ ! -z "$iedir" ]; then
    mkdir -p $dir/ivector_extractor/
    cp $iedir/final.{mat,ie,dubm} $iedir/global_cmvn.stats $dir/ivector_extractor/ || exit 1;
  
    # The following things won't be needed directly by the online decoding, but
    # will allow us to run prepare_online_decoding.sh again with
    # $dir/ivector_extractor/ as the input directory (useful in certain
    # cross-system training scenarios).
    cp $iedir/splice_opts $iedir/online_cmvn.conf $dir/ivector_extractor/ || exit 1;
  fi
  
  
  mkdir -p $dir/conf
  rm $dir/{plp,mfcc,fbank}.conf 2>/dev/null
  echo "$0: preparing configuration files in $dir/conf"
  
  if [ -f $dir/conf/online.conf ]; then
    echo "$0: moving $dir/conf/online.conf to $dir/conf/online.conf.bak"
    mv $dir/conf/online.conf $dir/conf/online.conf.bak
  fi
  
  conf=$dir/conf/online.conf
  echo -n >$conf
  
  echo "--feature-type=$feature_type" >>$conf
  
  case "$feature_type" in
    mfcc)
      echo "--mfcc-config=$dir/conf/mfcc.conf" >>$conf
      cp $mfcc_config $dir/conf/mfcc.conf || exit 1;;
    plp)
      echo "--plp-config=$dir/conf/plp.conf" >>$conf
      cp $plp_config $dir/conf/plp.conf || exit 1;;
    fbank)
      echo "--fbank-config=$dir/conf/fbank.conf" >>$conf
      cp $fbank_config $dir/conf/fbank.conf || exit 1;;
    *)
      echo "Unknown feature type $feature_type"
  esac
  
  
  
  if [ ! -z "$iedir" ]; then
    ieconf=$dir/conf/ivector_extractor.conf
    echo -n >$ieconf
    echo "--ivector-extraction-config=$ieconf" >>$conf
    cp $iedir/online_cmvn.conf $dir/conf/online_cmvn.conf || exit 1;
    # the next line puts each option from splice_opts on its own line in the config.
    for x in $(cat $iedir/splice_opts); do echo "$x"; done > $dir/conf/splice.conf
    echo "--splice-config=$dir/conf/splice.conf" >>$ieconf
    echo "--cmvn-config=$dir/conf/online_cmvn.conf" >>$ieconf
    echo "--lda-matrix=$dir/ivector_extractor/final.mat" >>$ieconf
    echo "--global-cmvn-stats=$dir/ivector_extractor/global_cmvn.stats" >>$ieconf
    echo "--diag-ubm=$dir/ivector_extractor/final.dubm" >>$ieconf
    echo "--ivector-extractor=$dir/ivector_extractor/final.ie" >>$ieconf
    echo "--num-gselect=$num_gselect"  >>$ieconf
    echo "--min-post=$min_post" >>$ieconf
    echo "--posterior-scale=$posterior_scale" >>$ieconf # this is currently the default in the scripts.
    echo "--max-remembered-frames=1000" >>$ieconf # the default
    echo "--max-count=$max_count" >>$ieconf
  fi
  
  if $add_pitch; then
    echo "$0: enabling pitch features"
    echo "--add-pitch=true" >>$conf
    echo "$0: creating $dir/conf/online_pitch.conf"
    if [ ! -f $online_pitch_config ]; then
      echo "$0: expected file '$online_pitch_config' to exist.";
      exit 1;
    fi
    cp $online_pitch_config $dir/conf/online_pitch.conf || exit 1;
    echo "--online-pitch-config=$dir/conf/online_pitch.conf" >>$conf
  fi
  
  silphonelist=`cat $lang/phones/silence.csl` || exit 1;
  echo "--endpoint.silence-phones=$silphonelist" >>$conf
  echo "$0: created config file $conf"