Blame view

egs/mini_librispeech/s5/local/kws/create_hitlist.sh 2.33 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
  #!/bin/bash
  # Copyright 2012-2018  Johns Hopkins University (Author: Guoguo Chen, Yenda Trmal)
  # Apache 2.0.
  
  # Licensed under the Apache License, Version 2.0 (the "License");
  # you may not use this file except in compliance with the License.
  # You may obtain a copy of the License at
  #
  #  http://www.apache.org/licenses/LICENSE-2.0
  #
  # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  # MERCHANTABLITY OR NON-INFRINGEMENT.
  # See the Apache 2 License for the specific language governing permissions and
  # limitations under the License.
  
  
  cmd=run.pl
  scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
  beam=10
  retry_beam=40
  boost_silence=1.0
  
  if [ -f path.sh ]; then . ./path.sh; fi
  . parse_options.sh || exit 1;
  
  if [ $# != 5 ]; then
    echo "This script takes an ali directory and creates the corresponding RTTM file"
    echo ""
    echo "Usage: create_hitlist.sh <data-dir> <lang-dir> <lang-tmp-dir> <exp-dir> <kws-data-dir>"
    echo " e.g.: create_hitlist.sh data/heldout data/lang data/local/lang_tmp exp/heldout_ali data/heldout/kws"
    echo "main options (for others, see top of script file)"
    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) "
  
    exit 1;
  fi
  
  set -e
  set -o pipefail
  set -u
  
  data=$1
  lang=$2
  lang_tmp=$3
  dir=$4
  kws=$5
  
  oov=`cat $lang/oov.txt`
  mkdir -p $dir/log
  
  echo "$0: writing alignments."
  wbegin=`grep "#1" $lang/phones.txt | head -1 | awk '{print $2}'`
  wend=`grep "#2" $lang/phones.txt | head -1 | awk '{print $2}'`
  
  if [ ! -f $lang/L_align.fst ]; then
    echo "$0: generating $lang/L_align.fst"
    local/kws/make_L_align.sh $lang_tmp $lang $lang 2>&1 | tee $dir/log/L_align.log
  fi
  
  $cmd $dir/log/ali_to_hitlist.log \
    set -e -o pipefail\; \
    ali-to-phones $dir/final.mdl "ark:gunzip -c $dir/ali.*.gz|" ark,t:- \| \
    phones-to-prons $lang/L_align.fst $wbegin $wend ark:- "ark,s:utils/sym2int.pl -f 2- --map-oov '$oov' $lang/words.txt <$data/text|" ark,t:- \| \
    prons-to-wordali ark:- "ark:ali-to-phones --write-lengths=true $dir/final.mdl 'ark:gunzip -c $dir/ali.*.gz|' ark,t:- |" ark,t:- \| \
    local/kws/generate_hitlist.pl $kws/keywords.int \|\
    utils/sym2int.pl -f 2 $kws/utt.map  \> $kws/hitlist
  
  echo "$0: done generating hitlist"
  
  
  exit 0;