Yannick Estève / ONTRAC-Kaldi

Blame view

egs/sre08/v1/sid/music_id.sh 4.7 KB
  #!/bin/bash
  
  # Copyright    2015  David Snyder
  # Apache 2.0.
  
  # This script calculates the relative probability of music versus
  # speech.
  
  # Begin configuration section.
  nj=10
  cmd="run.pl"
  stage=-4
  num_gselect=20 # Gaussian-selection using diagonal and full covariance models
  norm_vars=false
  center=true
  cleanup=true
  # End configuration section.
  
  echo "$0 $@"  # Print the command line for logging
  
  if [ -f path.sh ]; then . ./path.sh; fi
  . parse_options.sh || exit 1;
  
  
  if [ $# != 4 ]; then
    echo "Usage: $0 <music-ubm-dir> <speech-ubm-dir> <data> <exp-dir>"
    echo " e.g.: $0  exp/full_ubm_music exp/full_ubm_speech data/test exp/test_results"
    echo "main options (for others, see top of script file)"
    echo "  --config <config-file>                           # config containing options"
    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
    echo "  --nj <n|10>                                      # Number of jobs (also see num-processes and num-threads)"
    echo "  --cleanup <true,false|true>                      # If true, clean up temporary files"
    echo "  --num-processes <n|4>                            # Number of processes for each queue job (relates"
    echo "                                                   # to summing accs in memory)"
    echo "  --stage <stage|-4>                               # To control partial reruns"
    echo "  --num-gselect <n|20>                             # Number of Gaussians to select using"
    echo "                                                   # diagonal model."
    exit 1;
  fi
  
  music_ubmdir=$1
  speech_ubmdir=$2
  data=$3
  dir=$4
  
  delta_opts=`cat $speech_ubmdir/delta_opts 2>/dev/null`
  
  for f in $music_ubmdir/final.ubm $speech_ubmdir/final.ubm $data/feats.scp $data/vad.scp; do
    [ ! -f $f ] && echo "No such file $f" && exit 1;
  done
  
  # Set various variables.
  mkdir -p $dir/log || exit 1;
  sdata=$data/split$nj
  utils/split_data.sh $data $nj || exit 1;
  
  ## Set up features.
  feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=$norm_vars --center=$center --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |"
  
  if [ $stage -le -2 ]; then
    $cmd $dir/log/music_convert.log \
      fgmm-global-to-gmm $music_ubmdir/final.ubm $dir/music_final.dubm || exit 1;
  fi
  if [ $stage -le -2 ]; then
    $cmd $dir/log/speech_convert.log \
      fgmm-global-to-gmm $speech_ubmdir/final.ubm $dir/speech_final.dubm || exit 1;
  fi
  
  # Do Gaussian selection using the diagonal forms of the models.
  
  if [ $stage -le -1 ]; then
    echo $nj > $dir/num_jobs
    echo "$0: doing Gaussian selection for music UBM"
    $cmd JOB=1:$nj $dir/log/music_gselect.JOB.log \
      gmm-gselect --n=$num_gselect $dir/music_final.dubm "$feats" ark:- \| \
      fgmm-gselect --gselect=ark,s,cs:- --n=$num_gselect $music_ubmdir/final.ubm \
        "$feats" "ark:|gzip -c >$dir/music_gselect.JOB.gz" || exit 1;
  
    echo $nj > $dir/num_jobs
    echo "$0: doing Gaussian selection for speech UBM"
    $cmd JOB=1:$nj $dir/log/speech_gselect.JOB.log \
      gmm-gselect --n=$num_gselect $dir/speech_final.dubm "$feats" ark:- \| \
      fgmm-gselect --gselect=ark,s,cs:- --n=$num_gselect $speech_ubmdir/final.ubm \
        "$feats" "ark:|gzip -c >$dir/speech_gselect.JOB.gz" || exit 1;
  fi
  
  if ! [ $nj -eq $(cat $dir/num_jobs) ]; then
    echo "Number of jobs mismatch"
    exit 1;
  fi
  
  # Calculate the average frame-level log-likelihoods for the utterances under
  # the music and speech UBMs.
  if [ $stage -le 0 ]; then
    $cmd JOB=1:$nj $dir/log/get_music_logprob.JOB.log \
      fgmm-global-get-frame-likes --average=true \
       "--gselect=ark,s,cs:gunzip -c $dir/music_gselect.JOB.gz|" $music_ubmdir/final.ubm \
        "$feats" ark,t:$dir/music_logprob.JOB || exit 1;
  fi
  if [ $stage -le 1 ]; then
    $cmd JOB=1:$nj $dir/log/get_speech_logprob.JOB.log \
      fgmm-global-get-frame-likes --average=true \
       "--gselect=ark,s,cs:gunzip -c $dir/speech_gselect.JOB.gz|" $speech_ubmdir/final.ubm \
        "$feats" ark,t:$dir/speech_logprob.JOB || exit 1;
  fi
  
  if [ $stage -le 2 ]; then
  
    for j in $(seq $nj); do cat $dir/music_logprob.$j; done > $dir/music_logprob
    for j in $(seq $nj); do cat $dir/speech_logprob.$j; done > $dir/speech_logprob
  
    n1=$(cat $dir/music_logprob | wc -l)
    n2=$(cat $dir/speech_logprob | wc -l)
  
    if [ $n1 -ne $n2 ]; then
      echo "Number of lines mismatch, music versus speech UBM probs: $n1 vs $n2"
      exit 1;
    fi
  
    paste $dir/music_logprob $dir/speech_logprob | \
      awk '{if ($1 != $3) { print >/dev/stderr "Sorting mismatch"; exit(1);  } print $1, $2, $4;}' \
      >$dir/logprob || exit 1;
  
    cat $dir/logprob | \
      awk '{lratio = $2-$3; print $1, 1/(1+exp(-lratio));}' \
      >$dir/ratio || exit 1;
  fi
  
  if $cleanup; then
    rm $dir/speech_gselect.*.gz
    rm $dir/music_gselect.*.gz
  fi
  
  exit 0;