Blame view
egs/reverb/s5/local/score.sh
6.09 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
#!/bin/bash # Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey, Yenda Trmal) # Apache 2.0 # See the script steps/scoring/score_kaldi_cer.sh in case you need to evalutate CER [ -f ./path.sh ] && . ./path.sh # begin configuration section. cmd=run.pl stage=0 decode_mbr=false stats=true beam=6 word_ins_penalty=0.0,0.5,1.0 min_lmwt=7 max_lmwt=17 iter=final #end configuration section. echo "$0 $@" # Print the command line for logging [ -f ./path.sh ] && . ./path.sh . parse_options.sh || exit 1; if [ $# -ne 3 ]; then echo "Usage: $0 [--cmd (run.pl|queue.pl...)] <data-dir> <lang-dir|graph-dir> <decode-dir>" echo " Options:" echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." echo " --stage (0|1|2) # start scoring script from part-way through." echo " --decode_mbr (true/false) # maximum bayes risk decoding (confusion network)." echo " --min_lmwt <int> # minumum LM-weight for lattice rescoring " echo " --max_lmwt <int> # maximum LM-weight for lattice rescoring " exit 1; fi data=$1 lang_or_graph=$2 dir=$3 symtab=$lang_or_graph/words.txt for f in $symtab $dir/lat.1.gz $data/text; do [ ! -f $f ] && echo "score.sh: no such file $f" && exit 1; done ref_filtering_cmd="cat" [ -x local/wer_output_filter ] && ref_filtering_cmd="local/wer_output_filter" [ -x local/wer_ref_filter ] && ref_filtering_cmd="local/wer_ref_filter" hyp_filtering_cmd="cat" [ -x local/wer_output_filter ] && hyp_filtering_cmd="local/wer_output_filter" [ -x local/wer_hyp_filter ] && hyp_filtering_cmd="local/wer_hyp_filter" if $decode_mbr ; then echo "$0: scoring with MBR, word insertion penalty=$word_ins_penalty" else echo "$0: scoring with word insertion penalty=$word_ins_penalty" fi mkdir -p $dir/scoring_kaldi if echo $data | grep -q "real"; then tasks="\ near_room1 far_room1" elif echo $data | grep -q "cln"; then tasks="\ cln_room1 cln_room2 cln_room3" else tasks="\ near_room1 far_room1 \ near_room2 far_room2 \ near_room3 far_room3" fi for task in ${tasks}; do grep $task $data/text | $ref_filtering_cmd > $dir/scoring_kaldi/test_filt_${task}.txt || exit 1; done if [ $stage -le 0 ]; then for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do mkdir -p $dir/scoring_kaldi/penalty_$wip/log if $decode_mbr ; then $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \ acwt=\`perl -e \"print 1.0/LMWT\"\`\; \ lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ lattice-prune --beam=$beam ark:- ark:- \| \ lattice-mbr-decode --word-symbol-table=$symtab \ ark:- ark,t:- \| \ utils/int2sym.pl -f 2- $symtab \| \ $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1; else $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/best_path.LMWT.log \ lattice-scale --inv-acoustic-scale=LMWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \ lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \ lattice-best-path --word-symbol-table=$symtab ark:- ark,t:- \| \ utils/int2sym.pl -f 2- $symtab \| \ $hyp_filtering_cmd '>' $dir/scoring_kaldi/penalty_$wip/LMWT.txt || exit 1; fi for task in ${tasks}; do $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring_kaldi/penalty_$wip/log/score.LMWT.log \ grep $task $dir/scoring_kaldi/penalty_$wip/LMWT.txt \| \ compute-wer --text --mode=present \ ark:$dir/scoring_kaldi/test_filt_${task}.txt ark,p:- ">&" $dir/wer_LMWT_${wip}_${task} || exit 1; done done fi if [ $stage -le 1 ]; then for task in ${tasks}; do for wip in $(echo $word_ins_penalty | sed 's/,/ /g'); do for lmwt in $(seq $min_lmwt $max_lmwt); do # adding /dev/null to the command list below forces grep to output the filename grep WER $dir/wer_${lmwt}_${wip}_${task} /dev/null done done | utils/best_wer.sh >& $dir/scoring_kaldi/best_wer_${task} || exit 1 best_wer_file=$(awk '{print $NF}' $dir/scoring_kaldi/best_wer_${task}) best_wip=$(echo $best_wer_file | awk -F_ '{N=NF-2; print $N}') best_lmwt=$(echo $best_wer_file | awk -F_ '{N=NF-3; print $N}') if [ -z "$best_lmwt" ]; then echo "$0: we could not get the details of the best WER from the file $dir/wer_*. Probably something went wrong." exit 1; fi if $stats; then mkdir -p $dir/scoring_kaldi/wer_details echo $best_lmwt > $dir/scoring_kaldi/wer_details/lmwt # record best language model weight echo $best_wip > $dir/scoring_kaldi/wer_details/wip # record best word insertion penalty $cmd $dir/scoring_kaldi/log/stats1.log \ cat $dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \| \ align-text --special-symbol="'***'" ark:$dir/scoring_kaldi/test_filt_${task}.txt ark:- ark,t:- \| \ utils/scoring/wer_per_utt_details.pl --special-symbol "'***'" \| tee $dir/scoring_kaldi/wer_details/per_utt \|\ utils/scoring/wer_per_spk_details.pl $data/utt2spk \> $dir/scoring_kaldi/wer_details/per_spk || exit 1; $cmd $dir/scoring_kaldi/log/stats2.log \ cat $dir/scoring_kaldi/wer_details/per_utt \| \ utils/scoring/wer_ops_details.pl --special-symbol "'***'" \| \ sort -b -i -k 1,1 -k 4,4rn -k 2,2 -k 3,3 \> $dir/scoring_kaldi/wer_details/ops || exit 1; $cmd $dir/scoring_kaldi/log/wer_bootci.log \ compute-wer-bootci --mode=present \ ark:$dir/scoring_kaldi/test_filt_${task}.txt ark:$dir/scoring_kaldi/penalty_$best_wip/$best_lmwt.txt \ '>' $dir/scoring_kaldi/wer_details/wer_bootci || exit 1; fi done fi # If we got here, the scoring was successful. # As a small aid to prevent confusion, we remove all wer_{?,??} files; # these originate from the previous version of the scoring files # i keep both statement here because it could lead to confusion about # the capabilities of the script (we don't do cer in the script) rm $dir/wer_{?,??} 2>/dev/null rm $dir/cer_{?,??} 2>/dev/null exit 0; |