music_id.sh 4.7 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134


#!/bin/bash

# Copyright    2015  David Snyder
# Apache 2.0.

# This script calculates the relative probability of music versus
# speech.

# Begin configuration section.
nj=10
cmd="run.pl"
stage=-4
num_gselect=20 # Gaussian-selection using diagonal and full covariance models
norm_vars=false
center=true
cleanup=true
# End configuration section.

echo "$0 $@"  # Print the command line for logging

if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;


if [ $# != 4 ]; then
  echo "Usage: $0 <music-ubm-dir> <speech-ubm-dir> <data> <exp-dir>"
  echo " e.g.: $0  exp/full_ubm_music exp/full_ubm_speech data/test exp/test_results"
  echo "main options (for others, see top of script file)"
  echo "  --config <config-file>                           # config containing options"
  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
  echo "  --nj <n|10>                                      # Number of jobs (also see num-processes and num-threads)"
  echo "  --cleanup <true,false|true>                      # If true, clean up temporary files"
  echo "  --num-processes <n|4>                            # Number of processes for each queue job (relates"
  echo "                                                   # to summing accs in memory)"
  echo "  --stage <stage|-4>                               # To control partial reruns"
  echo "  --num-gselect <n|20>                             # Number of Gaussians to select using"
  echo "                                                   # diagonal model."
  exit 1;
fi

music_ubmdir=$1
speech_ubmdir=$2
data=$3
dir=$4

delta_opts=`cat $speech_ubmdir/delta_opts 2>/dev/null`

for f in $music_ubmdir/final.ubm $speech_ubmdir/final.ubm $data/feats.scp $data/vad.scp; do
  [ ! -f $f ] && echo "No such file $f" && exit 1;
done

# Set various variables.
mkdir -p $dir/log || exit 1;
sdata=$data/split$nj
utils/split_data.sh $data $nj || exit 1;

## Set up features.
feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=$norm_vars --center=$center --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |"

if [ $stage -le -2 ]; then
  $cmd $dir/log/music_convert.log \
    fgmm-global-to-gmm $music_ubmdir/final.ubm $dir/music_final.dubm || exit 1;
fi
if [ $stage -le -2 ]; then
  $cmd $dir/log/speech_convert.log \
    fgmm-global-to-gmm $speech_ubmdir/final.ubm $dir/speech_final.dubm || exit 1;
fi

# Do Gaussian selection using the diagonal forms of the models.

if [ $stage -le -1 ]; then
  echo $nj > $dir/num_jobs
  echo "$0: doing Gaussian selection for music UBM"
  $cmd JOB=1:$nj $dir/log/music_gselect.JOB.log \
    gmm-gselect --n=$num_gselect $dir/music_final.dubm "$feats" ark:- \| \
    fgmm-gselect --gselect=ark,s,cs:- --n=$num_gselect $music_ubmdir/final.ubm \
      "$feats" "ark:|gzip -c >$dir/music_gselect.JOB.gz" || exit 1;

  echo $nj > $dir/num_jobs
  echo "$0: doing Gaussian selection for speech UBM"
  $cmd JOB=1:$nj $dir/log/speech_gselect.JOB.log \
    gmm-gselect --n=$num_gselect $dir/speech_final.dubm "$feats" ark:- \| \
    fgmm-gselect --gselect=ark,s,cs:- --n=$num_gselect $speech_ubmdir/final.ubm \
      "$feats" "ark:|gzip -c >$dir/speech_gselect.JOB.gz" || exit 1;
fi

if ! [ $nj -eq $(cat $dir/num_jobs) ]; then
  echo "Number of jobs mismatch"
  exit 1;
fi

# Calculate the average frame-level log-likelihoods for the utterances under
# the music and speech UBMs.
if [ $stage -le 0 ]; then
  $cmd JOB=1:$nj $dir/log/get_music_logprob.JOB.log \
    fgmm-global-get-frame-likes --average=true \
     "--gselect=ark,s,cs:gunzip -c $dir/music_gselect.JOB.gz|" $music_ubmdir/final.ubm \
      "$feats" ark,t:$dir/music_logprob.JOB || exit 1;
fi
if [ $stage -le 1 ]; then
  $cmd JOB=1:$nj $dir/log/get_speech_logprob.JOB.log \
    fgmm-global-get-frame-likes --average=true \
     "--gselect=ark,s,cs:gunzip -c $dir/speech_gselect.JOB.gz|" $speech_ubmdir/final.ubm \
      "$feats" ark,t:$dir/speech_logprob.JOB || exit 1;
fi

if [ $stage -le 2 ]; then

  for j in $(seq $nj); do cat $dir/music_logprob.$j; done > $dir/music_logprob
  for j in $(seq $nj); do cat $dir/speech_logprob.$j; done > $dir/speech_logprob

  n1=$(cat $dir/music_logprob | wc -l)
  n2=$(cat $dir/speech_logprob | wc -l)

  if [ $n1 -ne $n2 ]; then
    echo "Number of lines mismatch, music versus speech UBM probs: $n1 vs $n2"
    exit 1;
  fi

  paste $dir/music_logprob $dir/speech_logprob | \
    awk '{if ($1 != $3) { print >/dev/stderr "Sorting mismatch"; exit(1);  } print $1, $2, $4;}' \
    >$dir/logprob || exit 1;

  cat $dir/logprob | \
    awk '{lratio = $2-$3; print $1, 1/(1+exp(-lratio));}' \
    >$dir/ratio || exit 1;
fi

if $cleanup; then
  rm $dir/speech_gselect.*.gz
  rm $dir/music_gselect.*.gz
fi

exit 0;