Yannick Estève / ONTRAC-Kaldi

Blame view

egs/aspire/s5/local/extract_vad_weights.sh 2.91 KB
  #!/bin/bash
  
  # Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016.  Apache 2.0.
  # This script converts lattices available from a first pass decode into a per-frame weights file
  # The ctms generated from the lattices are filtered. Silence frames are assigned a low weight (e.g.0.00001)
  # and voiced frames have a weight of 1.
  
  set -e
  
  stage=1
  cmd=run.pl
  iter=final
  silence_weight=0.00001
  #end configuration section.
  
  . ./cmd.sh
  
  [ -f ./path.sh ] && . ./path.sh
  . utils/parse_options.sh || exit 1;
  if [ $# -ne 4 ]; then
    echo "Usage: $0 [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <input-decode-dir> <output-wts-file-gzipped>"
    echo " Options:"
    echo "    --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes."
    exit 1;
  fi
  
  data_dir=$1
  lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied.
  input_decode_dir=$3
  output_wts_file_gz=$4
  
  if [ $stage -le 1 ]; then
    echo "$0: generating CTM from input lattices"
    local/multi_condition/get_ctm_conf.sh --cmd "$cmd" \
      --use-segments false \
      --iter $iter \
      $data_dir \
      $lang \
      $input_decode_dir
  fi
  
  if [ $stage -le 2 ]; then
    name=`basename $data_dir`
    # we just take the ctm from LMWT 10, it doesn't seem to affect the results a lot
    ctm=$input_decode_dir/score_10/$name.ctm
    echo "$0: generating weights file from ctm $ctm"
  
    pad_frames=0  # this did not seem to be helpful but leaving it as an option.
    feat-to-len scp:$data_dir/feats.scp ark,t:- >$input_decode_dir/utt.lengths
    if [ ! -f $ctm ]; then  echo "$0: expected ctm to exist: $ctm"; exit 1; fi
  
    cat $ctm | awk '$6 == 1.0 && $4 < 1.0' | \
    grep -v -w mm | grep -v -w mhm | grep -v -F '[noise]' | \
    grep -v -F '[laughter]' | grep -v -F '<unk>' | \
    perl -e ' $lengths=shift @ARGV;  $pad_frames=shift @ARGV; $silence_weight=shift @ARGV;
     $pad_frames >= 0 || die "bad pad-frames value $pad_frames";
     open(L, "<$lengths") || die "opening lengths file";
     @all_utts = ();
     $utt2ref = { };
     while (<L>) {
       ($utt, $len) = split(" ", $_);
       push @all_utts, $utt;
       $array_ref = [ ];
       for ($n = 0; $n < $len; $n++) { ${$array_ref}[$n] = $silence_weight; }
       $utt2ref{$utt} = $array_ref;
     }
     while (<STDIN>) {
       @A = split(" ", $_);
       @A == 6 || die "bad ctm line $_";
       $utt = $A[0]; $beg = $A[2]; $len = $A[3];
       $beg_int = int($beg * 100) - $pad_frames;
       $len_int = int($len * 100) + 2*$pad_frames;
       $array_ref = $utt2ref{$utt};
       !defined $array_ref  && die "No length info for utterance $utt";
       for ($t = $beg_int; $t < $beg_int + $len_int; $t++) {
         if ($t >= 0 && $t < @$array_ref) {
           ${$array_ref}[$t] = 1;
          }
        }
      }
      foreach $utt (@all_utts) {  $array_ref = $utt2ref{$utt};
        print $utt, " [ ", join(" ", @$array_ref), " ]
  ";
        } ' $input_decode_dir/utt.lengths $pad_frames $silence_weight | \
          gzip -c > $output_wts_file_gz
  fi