Blame view
egs/aspire/s5/local/extract_vad_weights.sh
2.91 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
#!/bin/bash # Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016. Apache 2.0. # This script converts lattices available from a first pass decode into a per-frame weights file # The ctms generated from the lattices are filtered. Silence frames are assigned a low weight (e.g.0.00001) # and voiced frames have a weight of 1. set -e stage=1 cmd=run.pl iter=final silence_weight=0.00001 #end configuration section. . ./cmd.sh [ -f ./path.sh ] && . ./path.sh . utils/parse_options.sh || exit 1; if [ $# -ne 4 ]; then echo "Usage: $0 [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <input-decode-dir> <output-wts-file-gzipped>" echo " Options:" echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." exit 1; fi data_dir=$1 lang=$2 # Note: may be graph directory not lang directory, but has the necessary stuff copied. input_decode_dir=$3 output_wts_file_gz=$4 if [ $stage -le 1 ]; then echo "$0: generating CTM from input lattices" local/multi_condition/get_ctm_conf.sh --cmd "$cmd" \ --use-segments false \ --iter $iter \ $data_dir \ $lang \ $input_decode_dir fi if [ $stage -le 2 ]; then name=`basename $data_dir` # we just take the ctm from LMWT 10, it doesn't seem to affect the results a lot ctm=$input_decode_dir/score_10/$name.ctm echo "$0: generating weights file from ctm $ctm" pad_frames=0 # this did not seem to be helpful but leaving it as an option. feat-to-len scp:$data_dir/feats.scp ark,t:- >$input_decode_dir/utt.lengths if [ ! -f $ctm ]; then echo "$0: expected ctm to exist: $ctm"; exit 1; fi cat $ctm | awk '$6 == 1.0 && $4 < 1.0' | \ grep -v -w mm | grep -v -w mhm | grep -v -F '[noise]' | \ grep -v -F '[laughter]' | grep -v -F '<unk>' | \ perl -e ' $lengths=shift @ARGV; $pad_frames=shift @ARGV; $silence_weight=shift @ARGV; $pad_frames >= 0 || die "bad pad-frames value $pad_frames"; open(L, "<$lengths") || die "opening lengths file"; @all_utts = (); $utt2ref = { }; while (<L>) { ($utt, $len) = split(" ", $_); push @all_utts, $utt; $array_ref = [ ]; for ($n = 0; $n < $len; $n++) { ${$array_ref}[$n] = $silence_weight; } $utt2ref{$utt} = $array_ref; } while (<STDIN>) { @A = split(" ", $_); @A == 6 || die "bad ctm line $_"; $utt = $A[0]; $beg = $A[2]; $len = $A[3]; $beg_int = int($beg * 100) - $pad_frames; $len_int = int($len * 100) + 2*$pad_frames; $array_ref = $utt2ref{$utt}; !defined $array_ref && die "No length info for utterance $utt"; for ($t = $beg_int; $t < $beg_int + $len_int; $t++) { if ($t >= 0 && $t < @$array_ref) { ${$array_ref}[$t] = 1; } } } foreach $utt (@all_utts) { $array_ref = $utt2ref{$utt}; print $utt, " [ ", join(" ", @$array_ref), " ] "; } ' $input_decode_dir/utt.lengths $pad_frames $silence_weight | \ gzip -c > $output_wts_file_gz fi |