prepare_online_decoding.sh 10.3 KB
#!/bin/bash

# Copyright 2014  Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0

# Begin configuration.
stage=0 # This allows restarting after partway, when something when wrong.
feature_type=mfcc
online_cmvn_config=conf/online_cmvn.conf
add_pitch=false
pitch_config=conf/pitch.conf
pitch_process_config=conf/pitch_process.conf
per_utt_basis=true # If true, then treat each utterance as a separate speaker
                   # for purposes of basis training... this is recommended if
                   # the number of actual speakers in your training set is less
                   # than (feature-dim) * (feature-dim+1).
per_utt_cmvn=false # If true, apply online CMVN normalization per utterance
                   # rather than per speaker.
silence_weight=0.01
cmd=run.pl
cleanup=true
# End configuration.

echo "$0 $@"  # Print the command line for logging

[ -f path.sh ] && . ./path.sh;
. parse_options.sh || exit 1;

if [ $# -ne 4 -a $# -ne 5 ]; then
   echo "Usage: $0 [options] <data-dir> <lang-dir> <sat-model-dir> [<MMI-model>] <output-dir>"
   echo "e.g.: $0 data/train data/lang exp/tri3b exp/tri3b_mmi/final.mdl exp/tri3b_online"
   echo "main options (for others, see top of script file)"
   echo "  --feature-type <mfcc|plp>                        # Type of the base features; "
   echo "                                                   # important to generate the correct"
   echo "                                                   # configs in <output-dir>/conf/"
   echo "  --online-cmvn-config <config>                    # config for online cmvn,"
   echo "                                                   # default conf/online_cmvn.conf"
   echo "  --add-pitch <true|false>                         # Append pitch features to cmvn"
   echo "                                                   # (default: false)"
   echo "  --per-utt-cmvn <true|false>                      # Apply online CMVN per utt, not"
   echo "                                                   # per speaker (default: false)"
   echo "  --per-utt-basis <true|false>                     # Do basis computation per utterance"
   echo "                                                   # (default: true)"
   echo "  --silence-weight <weight>                        # Weight on silence for basis fMLLR;"
   echo "                                                   # default 0.01."
   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
   echo "  --config <config-file>                           # config containing options"
   echo "  --stage <stage>                                  # stage to do partial re-run from."
   exit 1;
fi


if [ $# -eq 5 ]; then
  data=$1
  lang=$2
  srcdir=$3
  mmi_model=$4
  dir=$5
else
  data=$1
  lang=$2
  srcdir=$3
  mmi_model=$srcdir/final.mdl
  dir=$4
fi


for f in $srcdir/final.mdl $srcdir/ali.1.gz $data/feats.scp $lang/phones.txt \
    $mmi_model $online_cmvn_config; do
  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done

nj=`cat $srcdir/num_jobs` || exit 1;
sdata=$data/split$nj;
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;

mkdir -p $dir/log
echo $nj >$dir/num_jobs || exit 1;

utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1;
cp $lang/phones.txt $dir || exit 1;

splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null`
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
cp $srcdir/splice_opts $srcdir/cmvn_opts $srcdir/final.mat $srcdir/final.mdl $dir/ 2>/dev/null

cp $mmi_model $dir/final.rescore_mdl

# Set up the unadapted features "$sifeats".
if [ -f $dir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
if ! $per_utt_cmvn; then
  online_cmvn_spk2utt_opt=
else
  online_cmvn_spk2utt_opt="--spk2utt=ark:$sdata/JOB/spk2utt"
fi


# create global_cmvn.stats
if ! matrix-sum --binary=false scp:$data/cmvn.scp - >$dir/global_cmvn.stats 2>/dev/null; then
  echo "$0: Error summing cmvn stats"
  exit 1
fi

if $add_pitch; then
  skip_opt="--skip-dims=13:14:15" # should make this more general.
fi

echo "$0: feature type is $feat_type";
case $feat_type in
  delta) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
        online_sifeats="ark,s,cs:apply-cmvn-online $skip_opt --config=$online_cmvn_config $dir/global_cmvn.stats $online_cmvn_spk2utt_opt scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
  lda) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
       online_sifeats="ark,s,cs:apply-cmvn-online $skip_opt --config=$online_cmvn_config $online_cmvn_spk2utt_opt $dir/global_cmvn.stats scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |";;
  *) echo "Invalid feature type $feat_type" && exit 1;
esac

# Set up the adapted features "$feats" for training set.
if [ -f $srcdir/trans.1 ]; then
  feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$srcdir/trans.JOB ark:- ark:- |";
else
  feats="$sifeats";
fi


if $per_utt_basis; then
  spk2utt_opt=  # treat each utterance as separate speaker when computing basis.
  echo "Doing per-utterance adaptation for purposes of computing the basis."
else
  echo "Doing per-speaker adaptation for purposes of computing the basis."
  [ `cat $sdata/spk2utt | wc -l` -lt $[41*40] ] && \
    echo "Warning: number of speakers is small, might be better to use --per-utt=true."
  spk2utt_opt="--spk2utt=ark:$sdata/JOB/spk2utt"
fi

if [ $stage -le 0 ]; then
  echo "$0: Accumulating statistics for basis-fMLLR computation"
# Note: we get Gaussian level alignments with the "final.mdl" and the
# speaker adapted features.
  $cmd JOB=1:$nj $dir/log/basis_acc.JOB.log \
    ali-to-post "ark:gunzip -c $srcdir/ali.JOB.gz|" ark:- \| \
    weight-silence-post $silence_weight $silphonelist $dir/final.mdl ark:- ark:- \| \
    gmm-post-to-gpost $dir/final.mdl "$feats" ark:- ark:- \| \
    gmm-basis-fmllr-accs-gpost $spk2utt_opt \
    $dir/final.mdl "$sifeats" ark,s,cs:- $dir/basis.acc.JOB || exit 1;
fi

if [ $stage -le 1 ]; then
  echo "$0: computing the basis matrices."
  $cmd $dir/log/basis_training.log \
    gmm-basis-fmllr-training $dir/final.mdl $dir/fmllr.basis $dir/basis.acc.* || exit 1;
  if $cleanup; then
    rm $dir/basis.acc.* 2>/dev/null
  fi
fi

if [ $stage -le 2 ]; then
  echo "$0: accumulating stats for online alignment model."

  # Accumulate stats for "online alignment model"-- this model is computed with
  # the speaker-independent features and online CMVN, but matches
  # Gaussian-for-Gaussian with the final speaker-adapted model.

  $cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
    ali-to-post "ark:gunzip -c $srcdir/ali.JOB.gz|" ark:-  \| \
    gmm-acc-stats-twofeats $dir/final.mdl "$feats" "$online_sifeats" \
    ark,s,cs:- $dir/final.JOB.acc || exit 1;
  [ `ls $dir/final.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
  # Update model.
  $cmd $dir/log/est_online_alimdl.log \
    gmm-est --remove-low-count-gaussians=false $dir/final.mdl \
    "gmm-sum-accs - $dir/final.*.acc|" $dir/final.oalimdl  || exit 1;
  if $cleanup; then
    rm $dir/final.*.acc
  fi
fi

if [ $stage -le 3 ]; then
  mkdir -p $dir/conf
  rm $dir/{plp,mfcc}.conf 2>/dev/null
  echo "$0: preparing configuration files in $dir/conf"
  if [ -f $dir/conf/online_decoding.conf ]; then
    echo "$0: moving $dir/conf/online_decoding.conf to $dir/conf/online_decoding.conf.bak"
    mv $dir/conf/online_decoding.conf $dir/conf/online_decoding.conf.bak
  fi
  conf=$dir/conf/online_decoding.conf
  echo -n >$conf
  case "$feature_type" in
    mfcc)
      echo "$0: creating $dir/conf/mfcc.conf"
      echo "--mfcc-config=$dir/conf/mfcc.conf" >>$conf
      cp conf/mfcc.conf $dir/conf/ ;;
    plp)
      echo "$0: enabling plp features"
      echo "--feature-type=plp" >>$conf
      echo "$0: creating $dir/conf/plp.conf"
      echo "--plp-config=$dir/conf/plp.conf" >>$conf
      cp conf/plp.conf $dir/conf/ ;;
    *)
      echo "Unknown feature type $feature_type"
  esac
  if ! cp $online_cmvn_config $dir/conf/online_cmvn.conf; then
    echo "$0: error copying online cmvn config to $dir/conf/"
    exit 1;
  fi
  echo "--cmvn-config=$dir/conf/online_cmvn.conf" >>$conf
  if [ -f $dir/final.mat ]; then
    echo "$0: enabling feature splicing"
    echo "--splice-feats" >>$conf
    echo "$0: creating $dir/conf/splice.conf"
    for x in $(cat $dir/splice_opts); do echo $x; done > $dir/conf/splice.conf
    echo "--splice-config=$dir/conf/splice.conf" >>$conf
    echo "$0: enabling LDA"
    echo "--lda-matrix=$dir/final.mat" >>$conf
  else
    echo "$0: enabling deltas"
    echo "--add-deltas" >>$conf
  fi
  if $add_pitch; then
    echo "$0: enabling pitch features"
    echo "--add-pitch" >>$conf
    echo "$0: creating $dir/conf/pitch.conf"
    echo "--pitch-config=$dir/conf/pitch.conf" >>$conf
    if ! cp $pitch_config $dir/conf/pitch.conf; then
      echo "$0: error copying pitch config to $dir/conf/"
      exit 1;
    fi;
    echo "$0: creating $dir/conf/pitch_process.conf"
    echo "--pitch-process-config=$dir/conf/pitch_process.conf" >>$conf
    if ! cp $pitch_process_config $dir/conf/pitch_process.conf; then
      echo "$0: error copying pitch process config to $dir/conf/"
      exit 1;
    fi;
    nfields=$(sed -n '2,2p' $dir/global_cmvn.stats | \
      perl -e '$_ = <>; s/^\s+|\s+$//g; print scalar(split);');
    if [ $nfields != 17 ]; then
      echo "$0: $dir/global_cmvn.stats has $nfields entries per row (expected 17)."
      echo "$0: Did you append pitch features?"
      exit 1;
    fi
    #offset=$(sed -n '2,2p' $dir/global_cmvn.stats | \
    #  perl -e '$_ = <>; s/^\s+|\s+$//g; ($t, $c) = (split)[13, 16]; print -$t/$c;');
    #echo "--pov-offset=$offset" >>$dir/conf/pitch_process.conf
  fi

  echo "--fmllr-basis=$dir/fmllr.basis" >>$conf
  echo "--online-alignment-model=$dir/final.oalimdl" >>$conf
  echo "--model=$dir/final.mdl" >>$conf
  if ! cmp --quiet $dir/final.mdl $dir/final.rescore_mdl; then
    echo "--rescore-model=$dir/final.rescore_mdl" >>$conf
  fi
  echo "--silence-phones=$silphonelist" >>$conf
  echo "--endpoint.silence-phones=$silphonelist" >>$conf
  echo "--global-cmvn-stats=$dir/global_cmvn.stats" >>$conf
  echo "$0: created config file $conf"
fi