score.sh
5.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/bin/bash
# Copyright 2012-2018 Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
# Apache 2.0.
# Begin configuration section.
# case_insensitive=true
extraid=
min_lmwt=8
max_lmwt=12
cmd=run.pl
stage=0
ntrue_from=
# End configuration section.
help_message="$0: score the kwslist using the F4DE scorer from NIST
Example:
$0 [additional-parameters] <kaldi-data-dir> <kws-results-dir>
where the most important additional parameters can be:
--extraid <extra-id> #for using, when a non-default kws tasks are setup
(using the kws_setup.sh --extraid) for a kaldi-single data-dir"
echo $0 $@
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# -ne 3 ]; then
printf "FATAL: incorrect number of variables given to the script\n\n"
printf "$help_message\n"
exit 1;
fi
set -e -o pipefail
langdir=$1
if [ -z $extraid ] ; then
kwsdatadir=$2/kws
else
kwsdatadir=$2/kwset_${extraid}
fi
kwsoutputdir="$3"
trials=$(cat $kwsdatadir/trials)
mkdir -p $kwsoutputdir/log/
if [ $stage -le 0 ] ; then
if [ -z "$ntrue_from" ]; then
for LMWT in $(seq $min_lmwt $max_lmwt) ; do
mkdir -p ${kwsoutputdir}_$LMWT/details/
mkdir -p ${kwsoutputdir}_$LMWT/scoring/
# as we need to sweep through different ntrue-scales we will
# we will do it in one parallel command -- it will be more effective
# than sweeping in a loop and for all lmwts in parallel (as usuallyu
# there will be just a couple of different lmwts, but the ntrue-scale
# has a larger dynamic range
$cmd NTRUE=1:21 $kwsoutputdir/log/score.${LMWT}.NTRUE.log \
ntrue=\$\(perl -e 'print 1+(NTRUE-1)/5.0' \) '&&' \
cat ${kwsoutputdir}_$LMWT/results \|\
local/kws/normalize_results_kst.pl --trials $trials --ntrue-scale \$ntrue \|\
local/kws/filter_kws_results.pl --probs --nbest 200 \|\
compute-atwv $trials ark,t:$kwsdatadir/hitlist ark:- \
\> ${kwsoutputdir}_$LMWT/scoring/score.NTRUE.txt
ntrue=$(grep ATWV ${kwsoutputdir}_$LMWT/scoring/score.*.txt | \
sort -k2,2nr -t '=' | head -n 1 | \
sed 's/.*score\.\([0-9][0-9]*\)\.txt.*/\1/g')
#The calculation of ntrue must be the same as in the command above
echo "$ntrue" > ${kwsoutputdir}_$LMWT/details/ntrue_raw
ntrue=$(perl -e "print 1+($ntrue-1)/5.0")
echo "$ntrue" > ${kwsoutputdir}_$LMWT/details/ntrue
done
else
for LMWT in $(seq $min_lmwt $max_lmwt) ; do
mkdir -p ${kwsoutputdir}_$LMWT/details/
mkdir -p ${kwsoutputdir}_$LMWT/scoring/
cp ${ntrue_from}_${LMWT}/details/ntrue ${kwsoutputdir}_${LMWT}/details/ntrue
[ -f ${ntrue_from}_${LMWT}/details/ntrue_raw ] && \
cp ${ntrue_from}_${LMWT}/details/ntrue_raw ${kwsoutputdir}_${LMWT}/details/ntrue_raw
echo "$ntrue_from" > ${kwsoutputdir}_${LMWT}/details/ntrue_from
done
fi
fi
if [ $stage -le 1 ] ; then
$cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/normalize.LMWT.log \
cat ${kwsoutputdir}_LMWT/results \|\
local/kws/normalize_results_kst.pl --trials $trials --ntrue-scale \$\(cat ${kwsoutputdir}_LMWT/details/ntrue\)\
\> ${kwsoutputdir}_LMWT/details/results
$cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/score.final.LMWT.log \
cat ${kwsoutputdir}_LMWT/details/results \|\
compute-atwv $trials ark,t:$kwsdatadir/hitlist ark:- \
${kwsoutputdir}_LMWT/details/alignment.csv \> ${kwsoutputdir}_LMWT/details/score.txt '&&' \
cp ${kwsoutputdir}_LMWT/details/score.txt ${kwsoutputdir}_LMWT/score.txt
if [ -f $kwsdatadir/categories ]; then
$cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/per-category-stats.LMWT.log \
cat ${kwsoutputdir}_LMWT/details/alignment.csv \|\
perl local/search/per_category_stats.pl --sweep-step 0.005 $trials \
$kwsdatadir/categories \> ${kwsoutputdir}_LMWT/details/per-category-score.txt
else
echo "$0: Categories file not found, not generating per-category scores"
fi
fi
if [ $stage -le 2 ]; then
if [ -f $kwsdatadir/f4de_attribs ] ; then
language=""
flen=0.01
kwlist_name=""
. $kwsdatadir/f4de_attribs #override the previous variables
ecf=$kwsdatadir/ecf.xml
rttm=$kwsdatadir/rttm
kwlist=$kwsdatadir/kwlist.xml
$cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_prepare.LMWT.log \
mkdir -p ${kwsoutputdir}_LMWT/f4de/ '&&' cat $kwlist \| \
local/search/annotate_kwlist.pl $kwsdatadir/categories \> ${kwsoutputdir}_LMWT/f4de/kwlist.xml
$cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_write_kwslist.LMWT.log \
cat ${kwsoutputdir}_LMWT/details/results \| \
utils/int2sym.pl -f 2 $kwsdatadir/utt.map \| \
local/search/utt_to_files.pl --flen $flen $kwsdatadir/../segments \|\
local/search/write_kwslist.pl --flen $flen --language $language \
--kwlist-id $kwlist_name \> ${kwsoutputdir}_LMWT/f4de/kwslist.xml
$cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_score.LMWT.log \
KWSEval -e $ecf -r $rttm -t ${kwsoutputdir}_LMWT/f4de/kwlist.xml -a \
--zGlobalMeasures Optimum --zGlobalMeasures Supremum \
-O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \
-O -B -q 'OOV:regex=.*' -q 'BaseOOV:regex=.*' \
-s ${kwsoutputdir}_LMWT/f4de/kwslist.xml -c -o -b -d -f ${kwsoutputdir}_LMWT/f4de/
$cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_report.LMWT.log \
local/kws_oracle_threshold.pl --duration $trials \
${kwsoutputdir}_LMWT/f4de/alignment.csv \> ${kwsoutputdir}_LMWT/f4de/metrics.txt
fi
fi
echo "$0: Done"
exit 0;