Blame view

egs/wsj/s5/utils/mkgraph.sh 7.36 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
  #!/bin/bash
  # Copyright 2010-2012 Microsoft Corporation
  #           2012-2013 Johns Hopkins University (Author: Daniel Povey)
  # Apache 2.0
  
  # This script creates a fully expanded decoding graph (HCLG) that represents
  # all the language-model, pronunciation dictionary (lexicon), context-dependency,
  # and HMM structure in our model.  The output is a Finite State Transducer
  # that has word-ids on the output, and pdf-ids on the input (these are indexes
  # that resolve to Gaussian Mixture Models).
  # See
  #  http://kaldi-asr.org/doc/graph_recipe_test.html
  # (this is compiled from this repository using Doxygen,
  # the source for this part is in src/doc/graph_recipe_test.dox)
  
  set -o pipefail
  
  tscale=1.0
  loopscale=0.1
  
  remove_oov=false
  
  for x in `seq 4`; do
    [ "$1" == "--mono" -o "$1" == "--left-biphone" -o "$1" == "--quinphone" ] && shift && \
      echo "WARNING: the --mono, --left-biphone and --quinphone options are now deprecated and ignored."
    [ "$1" == "--remove-oov" ] && remove_oov=true && shift;
    [ "$1" == "--transition-scale" ] && tscale=$2 && shift 2;
    [ "$1" == "--self-loop-scale" ] && loopscale=$2 && shift 2;
  done
  
  if [ $# != 3 ]; then
     echo "Usage: utils/mkgraph.sh [options] <lang-dir> <model-dir> <graphdir>"
     echo "e.g.: utils/mkgraph.sh data/lang_test exp/tri1/ exp/tri1/graph"
     echo " Options:"
     echo " --remove-oov       #  If true, any paths containing the OOV symbol (obtained from oov.int"
     echo "                    #  in the lang directory) are removed from the G.fst during compilation."
     echo " --transition-scale #  Scaling factor on transition probabilities."
     echo " --self-loop-scale  #  Please see: http://kaldi-asr.org/doc/hmm.html#hmm_scale."
     echo "Note: the --mono, --left-biphone and --quinphone options are now deprecated"
     echo "and will be ignored."
     exit 1;
  fi
  
  if [ -f path.sh ]; then . ./path.sh; fi
  
  lang=$1
  tree=$2/tree
  model=$2/final.mdl
  dir=$3
  
  mkdir -p $dir
  
  # If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
  # (note: the [[ ]] brackets make the || type operators work (inside [ ], we
  # would have to use -o instead),  -f means file exists, and -ot means older than).
  
  required="$lang/L.fst $lang/G.fst $lang/phones.txt $lang/words.txt $lang/phones/silence.csl $lang/phones/disambig.int $model $tree"
  for f in $required; do
    [ ! -f $f ] && echo "mkgraph.sh: expected $f to exist" && exit 1;
  done
  
  if [ -f $dir/HCLG.fst ]; then
    # detect when the result already exists, and avoid overwriting it.
    must_rebuild=false
    for f in $required; do
      [ $f -nt $dir/HCLG.fst ] && must_rebuild=true
    done
    if ! $must_rebuild; then
      echo "$0: $dir/HCLG.fst is up to date."
      exit 0
    fi
  fi
  
  
  N=$(tree-info $tree | grep "context-width" | cut -d' ' -f2) || { echo "Error when getting context-width"; exit 1; }
  P=$(tree-info $tree | grep "central-position" | cut -d' ' -f2) || { echo "Error when getting central-position"; exit 1; }
  
  [[ -f $2/frame_subsampling_factor && "$loopscale" == "0.1" ]] && \
    echo "$0: WARNING: chain models need '--self-loop-scale 1.0'";
  
  if [ -f $lang/phones/nonterm_phones_offset.int ]; then
    if [[ $N != 2  || $P != 1 ]]; then
      echo "$0: when doing grammar decoding, you can only build graphs for left-biphone trees."
      exit 1
    fi
    nonterm_phones_offset=$(cat $lang/phones/nonterm_phones_offset.int)
    nonterm_opt="--nonterm-phones-offset=$nonterm_phones_offset"
    prepare_grammar_command="make-grammar-fst --nonterm-phones-offset=$nonterm_phones_offset - -"
  else
    prepare_grammar_command="cat"
    nonterm_opt=
  fi
  
  mkdir -p $lang/tmp
  trap "rm -f $lang/tmp/LG.fst.$$" EXIT HUP INT PIPE TERM
  # Note: [[ ]] is like [ ] but enables certain extra constructs, e.g. || in
  # place of -o
  if [[ ! -s $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \
        $lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then
    fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \
      fstminimizeencoded | fstpushspecial > $lang/tmp/LG.fst.$$ || exit 1;
    mv $lang/tmp/LG.fst.$$ $lang/tmp/LG.fst
    fstisstochastic $lang/tmp/LG.fst || echo "[info]: LG not stochastic."
  fi
  
  clg=$lang/tmp/CLG_${N}_${P}.fst
  clg_tmp=$clg.$$
  ilabels=$lang/tmp/ilabels_${N}_${P}
  ilabels_tmp=$ilabels.$$
  trap "rm -f $clg_tmp $ilabels_tmp" EXIT HUP INT PIPE TERM
  if [[ ! -s $clg || $clg -ot $lang/tmp/LG.fst \
      || ! -s $ilabels || $ilabels -ot $lang/tmp/LG.fst ]]; then
    fstcomposecontext $nonterm_opt --context-size=$N --central-position=$P \
     --read-disambig-syms=$lang/phones/disambig.int \
     --write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.int \
      $ilabels_tmp $lang/tmp/LG.fst |\
      fstarcsort --sort_type=ilabel > $clg_tmp
    mv $clg_tmp $clg
    mv $ilabels_tmp $ilabels
    fstisstochastic $clg || echo "[info]: CLG not stochastic."
  fi
  
  trap "rm -f $dir/Ha.fst.$$" EXIT HUP INT PIPE TERM
  if [[ ! -s $dir/Ha.fst || $dir/Ha.fst -ot $model  \
      || $dir/Ha.fst -ot $lang/tmp/ilabels_${N}_${P} ]]; then
    make-h-transducer $nonterm_opt --disambig-syms-out=$dir/disambig_tid.int \
      --transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
       > $dir/Ha.fst.$$  || exit 1;
    mv $dir/Ha.fst.$$ $dir/Ha.fst
  fi
  
  trap "rm -f $dir/HCLGa.fst.$$" EXIT HUP INT PIPE TERM
  if [[ ! -s $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \
        $dir/HCLGa.fst -ot $clg ]]; then
    if $remove_oov; then
      [ ! -f $lang/oov.int ] && \
        echo "$0: --remove-oov option: no file $lang/oov.int" && exit 1;
      clg="fstrmsymbols --remove-arcs=true --apply-to-output=true $lang/oov.int $clg|"
    fi
    fsttablecompose $dir/Ha.fst "$clg" | fstdeterminizestar --use-log=true \
      | fstrmsymbols $dir/disambig_tid.int | fstrmepslocal | \
       fstminimizeencoded > $dir/HCLGa.fst.$$ || exit 1;
    mv $dir/HCLGa.fst.$$ $dir/HCLGa.fst
    fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic"
  fi
  
  trap "rm -f $dir/HCLG.fst.$$" EXIT HUP INT PIPE TERM
  if [[ ! -s $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then
    add-self-loops --self-loop-scale=$loopscale --reorder=true $model $dir/HCLGa.fst | \
      $prepare_grammar_command | \
      fstconvert --fst_type=const > $dir/HCLG.fst.$$ || exit 1;
    mv $dir/HCLG.fst.$$ $dir/HCLG.fst
    if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
      # No point doing this test if transition-scale not 1, as it is bound to fail.
      fstisstochastic $dir/HCLG.fst || echo "[info]: final HCLG is not stochastic."
    fi
  fi
  
  # note: the empty FST has 66 bytes.  this check is for whether the final FST
  # is the empty file or is the empty FST.
  if ! [ $(head -c 67 $dir/HCLG.fst | wc -c) -eq 67 ]; then
    echo "$0: it looks like the result in $dir/HCLG.fst is empty"
    exit 1
  fi
  
  # save space.
  rm $dir/HCLGa.fst $dir/Ha.fst 2>/dev/null || true
  
  # keep a copy of the lexicon and a list of silence phones with HCLG...
  # this means we can decode without reference to the $lang directory.
  
  
  cp $lang/words.txt $dir/ || exit 1;
  mkdir -p $dir/phones
  cp $lang/phones/word_boundary.* $dir/phones/ 2>/dev/null # might be needed for ctm scoring,
  cp $lang/phones/align_lexicon.* $dir/phones/ 2>/dev/null # might be needed for ctm scoring,
  cp $lang/phones/optional_silence.* $dir/phones/ 2>/dev/null # might be needed for analyzing alignments.
      # but ignore the error if it's not there.
  
  
  cp $lang/phones/disambig.{txt,int} $dir/phones/ 2> /dev/null
  cp $lang/phones/silence.csl $dir/phones/ || exit 1;
  cp $lang/phones.txt $dir/ 2> /dev/null # ignore the error if it's not there.
  
  am-info --print-args=false $model | grep pdfs | awk '{print $NF}' > $dir/num_pdfs