Blame view
egs/aspire/s5/local/score_stm.sh
3.9 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
#!/bin/bash # Copyright 2013 Johns Hopkins University (authors: Yenda Trmal) # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, # MERCHANTABLITY OR NON-INFRINGEMENT. # See the Apache 2 License for the specific language governing permissions and # limitations under the License. # This is a scoring script for the CTMS in <decode-dir>/score_<LMWT>/${name}.ctm # it tries to mimic the NIST scoring setup as much as possible (and usually does a good job) # begin configuration section. cmd=run.pl cer=0 min_lmwt=7 max_lmwt=17 model= stage=0 ctm_name= case_insensitive=true use_icu=true icu_transform='Any-Lower' #end configuration section. echo $0 $@ [ -f ./path.sh ] && . ./path.sh [ -f ./cmd.sh ] && . ./cmd.sh . parse_options.sh || exit 1; if [ $# -ne 3 ]; then echo "Usage: $0 [options] <dataDir> <langDir|graphDir> <decodeDir>" && exit; echo " Options:" echo " --cmd (run.pl|queue.pl...) # specify how to run the sub-processes." echo " --cer (0|1) # compute CER in addition to WER" exit 1; fi data=$1 lang=$2 # This parameter is not used -- kept only for backwards compatibility dir=$3 set -e set -o pipefail set -u ScoringProgram=`which sclite` || ScoringProgram=$KALDI_ROOT/tools/sctk/bin/sclite [ ! -x $ScoringProgram ] && echo "Cannot find scoring program at $ScoringProgram" && exit 1; SortingProgram=`which hubscr.pl` || SortingProgram=$KALDI_ROOT/tools/sctk/bin/hubscr.pl [ ! -x $ScoringProgram ] && echo "Cannot find scoring program at $ScoringProgram" && exit 1; for f in $data/stm ; do [ ! -f $f ] && echo "$0: expecting file $f to exist" && exit 1; done if [ -z $ctm_name ] ; then name=`basename $data`; # e.g. eval2000 else name=$ctm_name fi mkdir -p $dir/scoring/log if [ $stage -le 0 ] ; then $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ set -e';' set -o pipefail';' \ cp -f $data/stm $dir/score_LMWT/stm.unsorted '&&' \ cp -f $dir/score_LMWT/${name}.ctm $dir/score_LMWT/${name}.ctm.unsorted '&&'\ $SortingProgram sortSTM \<$dir/score_LMWT/stm.unsorted \>$dir/score_LMWT/stm.sorted '&&' \ $SortingProgram sortCTM \<$dir/score_LMWT/${name}.ctm.unsorted \>$dir/score_LMWT/${name}.ctm.sorted '&&' \ paste -d ' ' \<\(cut -f 1-5 -d ' ' $dir/score_LMWT/stm.sorted \) \ \<\(cut -f 6- -d ' ' $dir/score_LMWT/stm.sorted \| uconv -f utf8 -t utf8 -x "$icu_transform" \) \ \> $dir/score_LMWT/stm '&&' \ paste -d ' ' \<\(cut -f 1-4 -d ' ' $dir/score_LMWT/${name}.ctm.sorted \) \ \<\(cut -f 5- -d ' ' $dir/score_LMWT/${name}.ctm.sorted \| uconv -f utf8 -t utf8 -x "$icu_transform" \) \ \> $dir/score_LMWT/${name}.ctm '&&' \ utils/fix_ctm.sh $dir/score_LMWT/stm $dir/score_LMWT/${name}.ctm '&&' \ $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm \ -n "$name.ctm" -f 0 -D -F -o sum rsum prf dtl sgml -e utf-8 || exit 1 fi # Score the set... if [ $stage -le 1 ]; then $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.log \ cp $data/stm $dir/score_LMWT/ '&&' \ $hubscr -p $hubdir -V -l english -h hub5 -g $data/glm -r $dir/score_LMWT/stm $dir/score_LMWT/ctm.filt || exit 1; fi # #if [ $stage -le 1 ]; then # if [ $cer -eq 1 ]; then # $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/score.LMWT.char.log \ # $ScoringProgram -s -r $dir/score_LMWT/stm stm -h $dir/score_LMWT/${name}.ctm ctm \ # -n "$name.char.ctm" -o sum rsum prf dtl sgml -f 0 -D -F -c NOASCII DH -e utf-8 || exit 1 # fi #fi # echo "Finished scoring on" `date` exit 0 |