Blame view

egs/sre08/v1/sid/music_id.sh 4.7 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
  #!/bin/bash
  
  # Copyright    2015  David Snyder
  # Apache 2.0.
  
  # This script calculates the relative probability of music versus
  # speech.
  
  # Begin configuration section.
  nj=10
  cmd="run.pl"
  stage=-4
  num_gselect=20 # Gaussian-selection using diagonal and full covariance models
  norm_vars=false
  center=true
  cleanup=true
  # End configuration section.
  
  echo "$0 $@"  # Print the command line for logging
  
  if [ -f path.sh ]; then . ./path.sh; fi
  . parse_options.sh || exit 1;
  
  
  if [ $# != 4 ]; then
    echo "Usage: $0 <music-ubm-dir> <speech-ubm-dir> <data> <exp-dir>"
    echo " e.g.: $0  exp/full_ubm_music exp/full_ubm_speech data/test exp/test_results"
    echo "main options (for others, see top of script file)"
    echo "  --config <config-file>                           # config containing options"
    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
    echo "  --nj <n|10>                                      # Number of jobs (also see num-processes and num-threads)"
    echo "  --cleanup <true,false|true>                      # If true, clean up temporary files"
    echo "  --num-processes <n|4>                            # Number of processes for each queue job (relates"
    echo "                                                   # to summing accs in memory)"
    echo "  --stage <stage|-4>                               # To control partial reruns"
    echo "  --num-gselect <n|20>                             # Number of Gaussians to select using"
    echo "                                                   # diagonal model."
    exit 1;
  fi
  
  music_ubmdir=$1
  speech_ubmdir=$2
  data=$3
  dir=$4
  
  delta_opts=`cat $speech_ubmdir/delta_opts 2>/dev/null`
  
  for f in $music_ubmdir/final.ubm $speech_ubmdir/final.ubm $data/feats.scp $data/vad.scp; do
    [ ! -f $f ] && echo "No such file $f" && exit 1;
  done
  
  # Set various variables.
  mkdir -p $dir/log || exit 1;
  sdata=$data/split$nj
  utils/split_data.sh $data $nj || exit 1;
  
  ## Set up features.
  feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=$norm_vars --center=$center --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |"
  
  if [ $stage -le -2 ]; then
    $cmd $dir/log/music_convert.log \
      fgmm-global-to-gmm $music_ubmdir/final.ubm $dir/music_final.dubm || exit 1;
  fi
  if [ $stage -le -2 ]; then
    $cmd $dir/log/speech_convert.log \
      fgmm-global-to-gmm $speech_ubmdir/final.ubm $dir/speech_final.dubm || exit 1;
  fi
  
  # Do Gaussian selection using the diagonal forms of the models.
  
  if [ $stage -le -1 ]; then
    echo $nj > $dir/num_jobs
    echo "$0: doing Gaussian selection for music UBM"
    $cmd JOB=1:$nj $dir/log/music_gselect.JOB.log \
      gmm-gselect --n=$num_gselect $dir/music_final.dubm "$feats" ark:- \| \
      fgmm-gselect --gselect=ark,s,cs:- --n=$num_gselect $music_ubmdir/final.ubm \
        "$feats" "ark:|gzip -c >$dir/music_gselect.JOB.gz" || exit 1;
  
    echo $nj > $dir/num_jobs
    echo "$0: doing Gaussian selection for speech UBM"
    $cmd JOB=1:$nj $dir/log/speech_gselect.JOB.log \
      gmm-gselect --n=$num_gselect $dir/speech_final.dubm "$feats" ark:- \| \
      fgmm-gselect --gselect=ark,s,cs:- --n=$num_gselect $speech_ubmdir/final.ubm \
        "$feats" "ark:|gzip -c >$dir/speech_gselect.JOB.gz" || exit 1;
  fi
  
  if ! [ $nj -eq $(cat $dir/num_jobs) ]; then
    echo "Number of jobs mismatch"
    exit 1;
  fi
  
  # Calculate the average frame-level log-likelihoods for the utterances under
  # the music and speech UBMs.
  if [ $stage -le 0 ]; then
    $cmd JOB=1:$nj $dir/log/get_music_logprob.JOB.log \
      fgmm-global-get-frame-likes --average=true \
       "--gselect=ark,s,cs:gunzip -c $dir/music_gselect.JOB.gz|" $music_ubmdir/final.ubm \
        "$feats" ark,t:$dir/music_logprob.JOB || exit 1;
  fi
  if [ $stage -le 1 ]; then
    $cmd JOB=1:$nj $dir/log/get_speech_logprob.JOB.log \
      fgmm-global-get-frame-likes --average=true \
       "--gselect=ark,s,cs:gunzip -c $dir/speech_gselect.JOB.gz|" $speech_ubmdir/final.ubm \
        "$feats" ark,t:$dir/speech_logprob.JOB || exit 1;
  fi
  
  if [ $stage -le 2 ]; then
  
    for j in $(seq $nj); do cat $dir/music_logprob.$j; done > $dir/music_logprob
    for j in $(seq $nj); do cat $dir/speech_logprob.$j; done > $dir/speech_logprob
  
    n1=$(cat $dir/music_logprob | wc -l)
    n2=$(cat $dir/speech_logprob | wc -l)
  
    if [ $n1 -ne $n2 ]; then
      echo "Number of lines mismatch, music versus speech UBM probs: $n1 vs $n2"
      exit 1;
    fi
  
    paste $dir/music_logprob $dir/speech_logprob | \
      awk '{if ($1 != $3) { print >/dev/stderr "Sorting mismatch"; exit(1);  } print $1, $2, $4;}' \
      >$dir/logprob || exit 1;
  
    cat $dir/logprob | \
      awk '{lratio = $2-$3; print $1, 1/(1+exp(-lratio));}' \
      >$dir/ratio || exit 1;
  fi
  
  if $cleanup; then
    rm $dir/speech_gselect.*.gz
    rm $dir/music_gselect.*.gz
  fi
  
  exit 0;