Blame view

egs/mini_librispeech/s5/local/kws/score.sh 5.47 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
  #!/bin/bash
  
  # Copyright 2012-2018  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
  # Apache 2.0.
  
  # Begin configuration section.
  # case_insensitive=true
  extraid=
  min_lmwt=8
  max_lmwt=12
  cmd=run.pl
  stage=0
  ntrue_from=
  # End configuration section.
  
  help_message="$0: score the kwslist using the F4DE scorer from NIST
    Example:
      $0 [additional-parameters] <kaldi-data-dir> <kws-results-dir>
      where the most important additional parameters can be:
      --extraid  <extra-id> #for using, when a non-default kws tasks are setup
                (using the kws_setup.sh --extraid) for a kaldi-single data-dir"
  
  echo $0 $@
  [ -f ./path.sh ] && . ./path.sh; # source the path.
  . parse_options.sh || exit 1;
  
  
  if [ $# -ne 3 ]; then
      printf "FATAL: incorrect number of variables given to the script
  
  "
      printf "$help_message
  "
      exit 1;
  fi
  
  set -e -o pipefail
  
  langdir=$1
  if [ -z $extraid ] ; then
    kwsdatadir=$2/kws
  else
    kwsdatadir=$2/kwset_${extraid}
  fi
  kwsoutputdir="$3"
  
  trials=$(cat $kwsdatadir/trials)
  mkdir -p $kwsoutputdir/log/
  
  if [ $stage -le 0 ] ; then
    if [ -z "$ntrue_from" ]; then
      for LMWT in $(seq $min_lmwt $max_lmwt) ; do
        mkdir -p ${kwsoutputdir}_$LMWT/details/
        mkdir -p ${kwsoutputdir}_$LMWT/scoring/
  
        # as we need to sweep through different ntrue-scales we will
        # we will do it in one parallel command -- it will be more effective
        # than sweeping in a loop and for all lmwts in parallel (as usuallyu
        # there will be just a couple of different lmwts, but the ntrue-scale
        # has a larger dynamic range
        $cmd NTRUE=1:21 $kwsoutputdir/log/score.${LMWT}.NTRUE.log \
          ntrue=\$\(perl -e 'print 1+(NTRUE-1)/5.0' \) '&&' \
          cat ${kwsoutputdir}_$LMWT/results \|\
            local/kws/normalize_results_kst.pl --trials $trials --ntrue-scale \$ntrue \|\
            local/kws/filter_kws_results.pl --probs --nbest 200   \|\
            compute-atwv $trials ark,t:$kwsdatadir/hitlist ark:- \
            \> ${kwsoutputdir}_$LMWT/scoring/score.NTRUE.txt
  
        ntrue=$(grep ATWV ${kwsoutputdir}_$LMWT/scoring/score.*.txt | \
                sort -k2,2nr -t '='  | head -n 1 | \
                sed 's/.*score\.\([0-9][0-9]*\)\.txt.*/\1/g')
        #The calculation of ntrue must be the same as in the command above
        echo "$ntrue" > ${kwsoutputdir}_$LMWT/details/ntrue_raw
        ntrue=$(perl -e "print 1+($ntrue-1)/5.0")
        echo "$ntrue" > ${kwsoutputdir}_$LMWT/details/ntrue
      done
    else
      for LMWT in $(seq $min_lmwt $max_lmwt) ; do
        mkdir -p ${kwsoutputdir}_$LMWT/details/
        mkdir -p ${kwsoutputdir}_$LMWT/scoring/
  
        cp ${ntrue_from}_${LMWT}/details/ntrue  ${kwsoutputdir}_${LMWT}/details/ntrue
        [ -f  ${ntrue_from}_${LMWT}/details/ntrue_raw ] && \
          cp ${ntrue_from}_${LMWT}/details/ntrue_raw  ${kwsoutputdir}_${LMWT}/details/ntrue_raw
        echo "$ntrue_from" > ${kwsoutputdir}_${LMWT}/details/ntrue_from
      done
    fi
  fi
  
  if [ $stage -le 1 ] ; then
    $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/normalize.LMWT.log \
      cat ${kwsoutputdir}_LMWT/results \|\
        local/kws/normalize_results_kst.pl --trials $trials --ntrue-scale \$\(cat ${kwsoutputdir}_LMWT/details/ntrue\)\
        \> ${kwsoutputdir}_LMWT/details/results
  
    $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/score.final.LMWT.log \
      cat ${kwsoutputdir}_LMWT/details/results \|\
        compute-atwv $trials ark,t:$kwsdatadir/hitlist ark:- \
        ${kwsoutputdir}_LMWT/details/alignment.csv \> ${kwsoutputdir}_LMWT/details/score.txt  '&&' \
      cp ${kwsoutputdir}_LMWT/details/score.txt ${kwsoutputdir}_LMWT/score.txt
  
    if [ -f $kwsdatadir/categories ]; then
      $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/per-category-stats.LMWT.log \
        cat ${kwsoutputdir}_LMWT/details/alignment.csv \|\
          perl local/search/per_category_stats.pl --sweep-step 0.005  $trials \
          $kwsdatadir/categories \> ${kwsoutputdir}_LMWT/details/per-category-score.txt
    else
      echo "$0: Categories file not found, not generating per-category scores"
    fi
  fi
  
  if [ $stage -le 2 ]; then
  if [ -f $kwsdatadir/f4de_attribs ] ; then
    language=""
    flen=0.01
    kwlist_name=""
    . $kwsdatadir/f4de_attribs #override the previous variables
  
    ecf=$kwsdatadir/ecf.xml
    rttm=$kwsdatadir/rttm
    kwlist=$kwsdatadir/kwlist.xml
  
    $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_prepare.LMWT.log \
      mkdir -p ${kwsoutputdir}_LMWT/f4de/ '&&' cat $kwlist \| \
      local/search/annotate_kwlist.pl $kwsdatadir/categories \> ${kwsoutputdir}_LMWT/f4de/kwlist.xml
  
    $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_write_kwslist.LMWT.log \
      cat ${kwsoutputdir}_LMWT/details/results \| \
        utils/int2sym.pl -f 2 $kwsdatadir/utt.map \| \
        local/search/utt_to_files.pl --flen $flen $kwsdatadir/../segments \|\
        local/search/write_kwslist.pl --flen $flen --language $language \
        --kwlist-id $kwlist_name \> ${kwsoutputdir}_LMWT/f4de/kwslist.xml
  
    $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_score.LMWT.log \
      KWSEval -e $ecf -r $rttm -t ${kwsoutputdir}_LMWT/f4de/kwlist.xml -a  \
        --zGlobalMeasures Optimum --zGlobalMeasures Supremum \
        -O -B -q 'Characters:regex=.*' -q 'NGramOrder:regex=.*' \
        -O -B -q 'OOV:regex=.*' -q 'BaseOOV:regex=.*' \
        -s ${kwsoutputdir}_LMWT/f4de/kwslist.xml -c -o -b -d -f  ${kwsoutputdir}_LMWT/f4de/
  
    $cmd LMWT=$min_lmwt:$max_lmwt $kwsoutputdir/log/f4de_report.LMWT.log \
      local/kws_oracle_threshold.pl --duration $trials \
        ${kwsoutputdir}_LMWT/f4de/alignment.csv \> ${kwsoutputdir}_LMWT/f4de/metrics.txt
  fi
  fi
  
  echo "$0: Done"
  exit 0;