music_id.sh
4.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/bin/bash
# Copyright 2015 David Snyder
# Apache 2.0.
# This script calculates the relative probability of music versus
# speech.
# Begin configuration section.
nj=10
cmd="run.pl"
stage=-4
num_gselect=20 # Gaussian-selection using diagonal and full covariance models
norm_vars=false
center=true
cleanup=true
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "Usage: $0 <music-ubm-dir> <speech-ubm-dir> <data> <exp-dir>"
echo " e.g.: $0 exp/full_ubm_music exp/full_ubm_speech data/test exp/test_results"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --nj <n|10> # Number of jobs (also see num-processes and num-threads)"
echo " --cleanup <true,false|true> # If true, clean up temporary files"
echo " --num-processes <n|4> # Number of processes for each queue job (relates"
echo " # to summing accs in memory)"
echo " --stage <stage|-4> # To control partial reruns"
echo " --num-gselect <n|20> # Number of Gaussians to select using"
echo " # diagonal model."
exit 1;
fi
music_ubmdir=$1
speech_ubmdir=$2
data=$3
dir=$4
delta_opts=`cat $speech_ubmdir/delta_opts 2>/dev/null`
for f in $music_ubmdir/final.ubm $speech_ubmdir/final.ubm $data/feats.scp $data/vad.scp; do
[ ! -f $f ] && echo "No such file $f" && exit 1;
done
# Set various variables.
mkdir -p $dir/log || exit 1;
sdata=$data/split$nj
utils/split_data.sh $data $nj || exit 1;
## Set up features.
feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=$norm_vars --center=$center --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |"
if [ $stage -le -2 ]; then
$cmd $dir/log/music_convert.log \
fgmm-global-to-gmm $music_ubmdir/final.ubm $dir/music_final.dubm || exit 1;
fi
if [ $stage -le -2 ]; then
$cmd $dir/log/speech_convert.log \
fgmm-global-to-gmm $speech_ubmdir/final.ubm $dir/speech_final.dubm || exit 1;
fi
# Do Gaussian selection using the diagonal forms of the models.
if [ $stage -le -1 ]; then
echo $nj > $dir/num_jobs
echo "$0: doing Gaussian selection for music UBM"
$cmd JOB=1:$nj $dir/log/music_gselect.JOB.log \
gmm-gselect --n=$num_gselect $dir/music_final.dubm "$feats" ark:- \| \
fgmm-gselect --gselect=ark,s,cs:- --n=$num_gselect $music_ubmdir/final.ubm \
"$feats" "ark:|gzip -c >$dir/music_gselect.JOB.gz" || exit 1;
echo $nj > $dir/num_jobs
echo "$0: doing Gaussian selection for speech UBM"
$cmd JOB=1:$nj $dir/log/speech_gselect.JOB.log \
gmm-gselect --n=$num_gselect $dir/speech_final.dubm "$feats" ark:- \| \
fgmm-gselect --gselect=ark,s,cs:- --n=$num_gselect $speech_ubmdir/final.ubm \
"$feats" "ark:|gzip -c >$dir/speech_gselect.JOB.gz" || exit 1;
fi
if ! [ $nj -eq $(cat $dir/num_jobs) ]; then
echo "Number of jobs mismatch"
exit 1;
fi
# Calculate the average frame-level log-likelihoods for the utterances under
# the music and speech UBMs.
if [ $stage -le 0 ]; then
$cmd JOB=1:$nj $dir/log/get_music_logprob.JOB.log \
fgmm-global-get-frame-likes --average=true \
"--gselect=ark,s,cs:gunzip -c $dir/music_gselect.JOB.gz|" $music_ubmdir/final.ubm \
"$feats" ark,t:$dir/music_logprob.JOB || exit 1;
fi
if [ $stage -le 1 ]; then
$cmd JOB=1:$nj $dir/log/get_speech_logprob.JOB.log \
fgmm-global-get-frame-likes --average=true \
"--gselect=ark,s,cs:gunzip -c $dir/speech_gselect.JOB.gz|" $speech_ubmdir/final.ubm \
"$feats" ark,t:$dir/speech_logprob.JOB || exit 1;
fi
if [ $stage -le 2 ]; then
for j in $(seq $nj); do cat $dir/music_logprob.$j; done > $dir/music_logprob
for j in $(seq $nj); do cat $dir/speech_logprob.$j; done > $dir/speech_logprob
n1=$(cat $dir/music_logprob | wc -l)
n2=$(cat $dir/speech_logprob | wc -l)
if [ $n1 -ne $n2 ]; then
echo "Number of lines mismatch, music versus speech UBM probs: $n1 vs $n2"
exit 1;
fi
paste $dir/music_logprob $dir/speech_logprob | \
awk '{if ($1 != $3) { print >/dev/stderr "Sorting mismatch"; exit(1); } print $1, $2, $4;}' \
>$dir/logprob || exit 1;
cat $dir/logprob | \
awk '{lratio = $2-$3; print $1, 1/(1+exp(-lratio));}' \
>$dir/ratio || exit 1;
fi
if $cleanup; then
rm $dir/speech_gselect.*.gz
rm $dir/music_gselect.*.gz
fi
exit 0;