Yannick Estève / ONTRAC-Kaldi

Blame view

egs/wsj/s5/steps/nnet2/convert_lda_to_raw.sh 6.36 KB
  #!/bin/bash
  
  # Copyright 2014    Johns Hopkins University (Author: Daniel Povey).
  # Apache 2.0.
  
  # This script converts nnet2 models which expect splice+LDA as the input, into
  # models which expect raw features (e.g. MFCC) as the input.  If you include
  # the option --global-cmvn-stats <matrix>, it will also remove CMVN from the model
  # by including it as part of the neural net.
  
  
  # Begin configuration section
  cleanup=true
  global_cmvn_stats=
  cmd=run.pl
  # learning_rate and max_change will only make a difference if we train this model, which is unlikely.
  learning_rate=0.00001 # give it a tiny learning rate by default; the user
                        # should probably tune this or set it if they want to train.
  max_change=5.0
  # End configuration section.
  
  echo "$0 $@"  # Print the command line for logging
  
  [ -f ./path.sh ] && . ./path.sh; # source the path.
  . parse_options.sh || exit 1;
  
  
  if [ $# -ne 2 ]; then
    echo "Usage: $0 [options] <src-nnet-dir> <dest-nnet-dir>"
    echo "e.g.: $0 --global-cmvn-stats global_cmvn.mat exp/dnn4b_nnet2 exp/dnn4b_nnet2_raw"
    echo "Options include"
    echo "   --global-cmvn-stats <stats-file>         # Filename of globally summed CMVN stats, if"
    echo "                                            # you want to push the CMVN inside the nnet"
    echo "                                            # (it won't any longer be speaker specific)"
    exit 1;
  fi
  
  src=$1
  dir=$2
  
  mkdir -p $dir/log || exit 1;
  
  for f in $src/final.mdl $src/final.mat $src/splice_opts $src/cmvn_opts; do
    [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
  done
  
  cp $src/phones.txt $dir 2>/dev/null
  
  mkdir -p $dir/log
  
  # nnet.config will be a config for a few trivial neural-network layers
  # that come before the main network, and which do things like
  echo -n >$dir/nnet.config
  
  if [ ! -z "$global_cmvn_stats" ]; then
    [ ! -f $src/cmvn_opts ] && \
      echo "$0: expected $src/cmvn_opts to exist" && exit 1;
    norm_vars=false
    if grep 'norm-means=false' $src/cmvn_opts; then
      echo "$0: if --norm-means=false, don't supply the --global-cmvn-stats option to this script"
      exit 1;
    elif grep 'norm-vars=true' $src/cmvn_opts; then
      echo "$0: warning: this script has not been tested with --norm-vars=true in CMVN options"
      norm_vars=true
    fi
  
  
    # First add to the config, layers that will do the same transform as cepstral
    # mean and variance normalization using these global stats.  We do this as
    # first an added offset (FixedBiasComonent), then, only if norm-vars=true
    # in the CMVN options, a scaling (FixedScaleComponent).
    
    $cmd $dir/log/copy_feats.log \
      copy-feats --binary=false "$global_cmvn_stats" $dir/global_cmvn_stats.txt || exit 1;
    cat $dir/global_cmvn_stats.txt | \
      perl -e ' $line0 = <STDIN>; $line0 == "[
  " || die "expected first line to be [, got $line0";
      $line1 = <STDIN>; $line2 = <STDIN>; @L1 = split(" ",$line1); @L2 = split(" ",$line2);
      ($bias_out, $scale_out) = @ARGV;
      open(B, ">$bias_out") || die "opening bias-out file $bias_out";
      open(S, ">$scale_out") || die "opening scale-out file $scale_out";
      pop @L2; pop @L2; # remove the " 0 ]"
      $count = pop @L1;  # last element of line 1 is total count.
      ($count > 0.0) || die "Bad count $count";
      $dim = @L1;
      $dim == scalar @L2 || die "Bad dimension of second line of CMVN stats @L2";
      print B "[ ";  print S "[ ";
      for ($x = 0; $x < $dim; $x++) {
        $mean = $L1[$x] / $count;  $var = ($L2[$x] / $count) - ($mean * $mean);
        $bias = -$mean;  print B "$bias ";
        $scale = 1.0 / sqrt($var); $scale > 0 || die "Bad scale $scale";  print S "$scale ";
      }
      print B "]
  ";  print S "]
  "; ' $dir/bias.txt $dir/scales.txt || exit 1;
    echo "FixedBiasComponent bias=$dir/bias.txt" >> $dir/nnet.config  
    if $norm_vars; then
      echo "FixedScaleComponent scales=$dir/scales.txt" >> $dir/nnet.config  
    fi
    echo "--norm-means=false --norm-vars=false" >$dir/cmvn_opts || exit 1;
  else
    cp $src/cmvn_opts $dir/ || exit 1;
  fi
  
  # We need the dimension of the raw features.  We work it out from the LDA matrix dimension.
  # get a word-count of the second row of the LDA matrix...  this will be either the
  # spliced dim or the spliced dim plus one.
  spliced_dim=$(copy-matrix --binary=false $src/final.mat - | head -n 2 | tail -n 1 | wc -w) || exit 1;
  
  
  splice_opts=$(cat $src/splice_opts) || exit 1;
  # Work out how many frames are spliced together by splicing a matrix with one element
  # and testing the resulting number of columns.
  num_splice=$(echo "foo [ 1.0 ]" | splice-feats $splice_opts ark:- ark:- | feat-to-dim ark:- -)
  
  # We'll separately need the left-context and right-context.
  # defaults in the splice-feats code are 4 and 4.
  left_context=4
  right_context=4
  for opt in $(cat $src/splice_opts); do
    if echo $opt | grep left-context  >/dev/null; then
      left_context=$(echo $opt | cut -d= -f2) || exit 1;
    fi
    if echo $opt | grep right-context  >/dev/null; then
      right_context=$(echo $opt | cut -d= -f2) || exit 1;
    fi
  done
  if ! [ $num_splice -eq $[$left_context+1+$right_context] ]; then
    echo "$0: num-splice worked out from the binaries differs from our interpreation of the options:"
    echo "$num_splice != $left_context + 1 + $right_context"
    exit 1;
  fi
  
  modulo=$[$spliced_dim%$num_splice]
  if [ $modulo -eq 1 ]; then
    # matrix includes offset term.
    spliced_dim=$[$spliced_dim-1];
    cp $src/final.mat $dir/
  elif [ $modulo -eq 0 ]; then
    # We need to add a zero bias term to the matrix, because the AffineComponent
    # expects that.
    copy-matrix --binary=false $src/final.mat - | \
      awk '{if ($NF == "]") { $NF = "0"; print $0, "]"; } else { if (NF > 1) { print $0, "0"; } else {print;}}}' >$dir/final.mat
  else
    echo "$0: Cannot make sense of spliced dimension $spliced_dim and num-splice=$num_splice"
    exit 1;
  fi
  feat_dim=$[$spliced_dim/$num_splice];
  echo "SpliceComponent input-dim=$feat_dim left-context=$left_context right-context=$right_context" >>$dir/nnet.config
  
  # use AffineComponentPreconditioned as it's easier to configure than AffineComponentPreconditionedOnline.
  echo "AffineComponentPreconditioned alpha=4.0 learning-rate=$learning_rate max-change=$max_change matrix=$dir/final.mat" >>$dir/nnet.config
  
  
  $cmd $dir/log/nnet_init.log \
    nnet-init $dir/nnet.config $dir/lda.nnet || exit 1;
  
  $cmd $dir/log/nnet_insert.log \
    nnet-insert --insert-at=0 --randomize-next-component=false \
     $src/final.mdl $dir/lda.nnet $dir/final.mdl || exit 1;
  
  if $cleanup; then
    rm $dir/final.mat $dir/lda.nnet
  fi