Blame view
egs/sre08/v1/sid/compute_vad_decision_gmm.sh
5.09 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
#!/bin/bash # Copyright 2015 David Snyder # Apache 2.0 # # Compute GMM-based VAD output and optionally combine with # the energy-based VAD decisions. nj=10 cmd=run.pl map_config= merge_map_config= priors= use_energy_vad=false num_gselect=20 norm_vars=false center=true stage=-4 echo "$0 $@" # Print the command line for logging if [ -f path.sh ]; then . ./path.sh; fi . parse_options.sh || exit 1; if [ $# -lt 5 ]; then echo "Usage: $0 [options] <data-dir> <gmm-dir-1> ... <gmm-dir-N> <log-dir> <vad-dir>"; echo "e.g.: $0 data/train exp/music_gmm exp/speech_gmm exp/noise_gmm exp/gmm_vad exp/gmm_vad" echo " Options:" echo " --map-config <config-file> # config passed to compute-vad-from-frame-likes" echo " --priors <comma-separated-floats> # list passed to compute-vad-from-frame-likes" echo " --merge-map-config <config-file> # config passed to merge-vads" echo " --use-energy-vad <true,false> # If true, look for a vad.scp file and combine it with this VAD" echo " --nj <nj> # number of parallel jobs" echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." exit 1; fi args=("$@") gmm_dirs=(${@:2:$(($#-3))}) # The GMM directories num_gmms=`expr $# - 3` data=${args[0]} log_dir=${args[$num_gmms+1]} vad_dir=${args[$num_gmms+2]} # make $vad_dir an absolute pathname. vad_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' ${vad_dir} ${PWD}` # use "name" as part of name of the archive. name=`basename $data` mkdir -p $vad_dir || exit 1; mkdir -p $log_dir || exit 1; if $use_energy_vad; then for f in $data/vad.scp "$merge_map_config"; do if [ ! -f $f ]; then echo "compute_vad_decision_gmm.sh: no such file $f" exit 1; fi done fi if [ ! -f $data/feats.scp ]; then echo "compute_vad_decision_gmm.sh: no such file $f" exit 1; fi utils/split_data.sh $data $nj || exit 1; sdata=$data/split$nj; # We assume that the same delta-opts is used for each # GMM dir. delta_opts=`cat ${gmm_dirs[0]}/delta_opts 2>/dev/null` if [ -f ${gmm_dirs[0]}/delta_opts ]; then cp ${gmm_dirs[0]}/delta_opts $dir/ 2>/dev/null fi ## Set up features. feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=$norm_vars --center=$center --cmn-window=300 ark:- ark:- |" if [ $stage -le -2 ]; then for gmm_dir in "${gmm_dirs[@]}"; do gmm_name=`basename $gmm_dir` $cmd ${log_dir}/log/${gmm_name}_convert.log \ fgmm-global-to-gmm ${gmm_dir}/final.ubm ${vad_dir}/${gmm_name}_final.dubm || exit 1; done fi if [ $stage -le -1 ]; then echo "$0: doing Gaussian selection" for gmm_dir in "${gmm_dirs[@]}"; do gmm_name=`basename $gmm_dir` $cmd JOB=1:$nj ${log_dir}/log/${gmm_name}_gselect.JOB.log \ gmm-gselect --n=$num_gselect ${vad_dir}/${gmm_name}_final.dubm "$feats" ark:- \| \ fgmm-gselect --gselect=ark,s,cs:- --n=${num_gselect} ${gmm_dir}/final.ubm \ "$feats" "ark:|gzip -c >${vad_dir}/${gmm_name}_gselect.JOB.gz" || exit 1; done fi frame_likes="" if [ $stage -le 0 ]; then echo "$0: computing frame likelihoods" for gmm_dir in "${gmm_dirs[@]}"; do gmm_name=`basename $gmm_dir` frame_likes="${frame_likes} ark:${vad_dir}/${gmm_name}_logprob.JOB.ark" $cmd JOB=1:$nj ${log_dir}/log/get_${gmm_name}_logprob.JOB.log \ fgmm-global-get-frame-likes --average=false \ "--gselect=ark,s,cs:gunzip -c ${vad_dir}/${gmm_name}_gselect.JOB.gz|" ${gmm_dir}/final.ubm \ "$feats" ark:${vad_dir}/${gmm_name}_logprob.JOB.ark || exit 1; done echo "$0: computing VAD decisions from frame likelihoods" $cmd JOB=1:$nj ${log_dir}/log/make_vad_gmm_${name}.JOB.log \ compute-vad-from-frame-likes --map=${map_config} --priors=$priors $frame_likes \ ark,scp:${vad_dir}/vad_gmm_${name}.JOB.ark,${vad_dir}/vad_gmm_${name}.JOB.scp \ || exit 1; if $use_energy_vad ; then echo "$0: merging with energy-based VAD decisions" $cmd JOB=1:$nj ${log_dir}/log/merge_vads_${name}.JOB.log \ merge-vads --map=${merge_map_config} scp:$sdata/JOB/vad.scp \ scp:${vad_dir}/vad_gmm_${name}.JOB.scp \ ark,scp:${vad_dir}/vad_merged_${name}.JOB.ark,${vad_dir}/vad_merged_${name}.JOB.scp \ || exit 1; fi echo "$0: moving old vad.scp to ${data}/vad.scp.bak" mv ${data}/vad.scp ${data}/vad.scp.bak for ((n=1; n<=nj; n++)); do if $use_energy_vad ; then cat ${vad_dir}/vad_merged_${name}.$n.scp || exit 1; else cat ${vad_dir}/vad_gmm_${name}.$n.scp || exit 1; fi done > ${data}/vad.scp fi nc=`cat $data/vad.scp | wc -l` nu=`cat $data/feats.scp | wc -l` if [ $nc -ne $nu ]; then echo "**Warning it seems not all of the speakers got VAD output ($nc != $nu);" echo "**validate_data_dir.sh will fail; you might want to use fix_data_dir.sh" [ $nc -eq 0 ] && exit 1; fi echo "$0 created GMM-based VAD output for $name" if $cleanup ; then for gmm_dir in "${gmm_dirs[@]}"; do gmm_name=`basename $gmm_dir` rm ${vad_dir}/${gmm_name}_gselect.*.gz rm ${vad_dir}/${gmm_name}_logprob.*.ark done fi exit 0; |