Blame view

Scripts/utils/mkgraph.sh 4.46 KB
ec85f8892   bigot benjamin   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
  #!/bin/bash
  # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
  # Apache 2.0
  
  # This script creates a fully expanded decoding graph (HCLG) that represents
  # all the language-model, pronunciation dictionary (lexicon), context-dependency,
  # and HMM structure in our model.  The output is a Finite State Transducer
  # that has word-ids on the output, and pdf-ids on the input (these are indexes
  # that resolve to Gaussian Mixture Models).  
  # See
  #  http://kaldi.sourceforge.net/graph_recipe_test.html
  # (this is compiled from this repository using Doxygen,
  # the source for this part is in src/doc/graph_recipe_test.dox)
  
  
  N=3
  P=1
  reverse=false
  
  for x in `seq 2`; do 
    [ "$1" == "--mono" ] && N=1 && P=0 && shift;
    [ "$1" == "--quinphone" ] && N=5 && P=2 && shift;
    [ "$1" == "--reverse" ] && reverse=true && shift;
  done
  
  if [ $# != 3 ]; then
     echo "Usage: utils/mkgraph.sh [options] <lang-dir> <model-dir> <graphdir>"
     echo "e.g.: utils/mkgraph.sh data/lang_test exp/tri1/ exp/tri1/graph"
     echo " Options:"
     echo " --mono          #  For monophone models."
     echo " --quinphone     #  For models with 5-phone context (3 is default)"
     exit 1;
  fi
  
  if [ -f path.sh ]; then . ./path.sh; fi
  
  lang=$1
  tree=$2/tree
  model=$2/final.mdl
  dir=$3
  
  mkdir -p $dir
  
  tscale=1.0
  loopscale=0.1
  
  # If $lang/tmp/LG.fst does not exist or is older than its sources, make it...
  # (note: the [[ ]] brackets make the || type operators work (inside [ ], we
  # would have to use -o instead),  -f means file exists, and -ot means older than).
  
  required="$lang/L.fst $lang/G.fst $lang/phones.txt $lang/words.txt $lang/phones/silence.csl $lang/phones/disambig.int $model $tree"
  for f in $required; do
    [ ! -f $f ] && echo "mkgraph.sh: expected $f to exist" && exit 1;
  done
  
  mkdir -p $lang/tmp
  # Note: [[ ]] is like [ ] but enables certain extra constructs, e.g. || in 
  # place of -o
  if [[ ! -s $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \
        $lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then
    fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \
      fstminimizeencoded  > $lang/tmp/LG.fst || exit 1;
    fstisstochastic $lang/tmp/LG.fst || echo "[info]: LG not stochastic."
  fi
  
  
  clg=$lang/tmp/CLG_${N}_${P}.fst
  
  if [[ ! -s $clg || $clg -ot $lang/tmp/LG.fst ]]; then
    fstcomposecontext --context-size=$N --central-position=$P \
     --read-disambig-syms=$lang/phones/disambig.int \
     --write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.int \
      $lang/tmp/ilabels_${N}_${P} < $lang/tmp/LG.fst >$clg
    fstisstochastic $clg  || echo "[info]: CLG not stochastic."
  fi
  
  if [[ ! -s $dir/Ha.fst || $dir/Ha.fst -ot $model  \
      || $dir/Ha.fst -ot $lang/tmp/ilabels_${N}_${P} ]]; then
    if $reverse; then
      make-h-transducer --reverse=true --push_weights=true \
        --disambig-syms-out=$dir/disambig_tid.int \
        --transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
        > $dir/Ha.fst  || exit 1;
    else
      make-h-transducer --disambig-syms-out=$dir/disambig_tid.int \
        --transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \
         > $dir/Ha.fst  || exit 1;
    fi
  fi
  
  if [[ ! -s $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \
        $dir/HCLGa.fst -ot $clg ]]; then
    fsttablecompose $dir/Ha.fst $clg | fstdeterminizestar --use-log=true \
      | fstrmsymbols $dir/disambig_tid.int | fstrmepslocal | \
       fstminimizeencoded > $dir/HCLGa.fst || exit 1;
    fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic"
  fi
  
  if [[ ! -s $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then
    add-self-loops --self-loop-scale=$loopscale --reorder=true \
      $model < $dir/HCLGa.fst > $dir/HCLG.fst || exit 1;
  
    if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
      # No point doing this test if transition-scale not 1, as it is bound to fail. 
      fstisstochastic $dir/HCLG.fst || echo "[info]: final HCLG is not stochastic."
    fi
  fi
  
  # keep a copy of the lexicon and a list of silence phones with HCLG...
  # this means we can decode without reference to the $lang directory.
  
  
  cp $lang/words.txt $dir/ || exit 1;
  mkdir -p $dir/phones
  cp $lang/phones/word_boundary.* $dir/phones/ 2>/dev/null # might be needed for ctm scoring,
    # but ignore the error if it's not there.
  
  cp $lang/phones/disambig.{txt,int} $dir/phones/ 2> /dev/null
  cp $lang/phones/silence.csl $dir/phones/ || exit 1;
  cp $lang/phones.txt $dir/ 2> /dev/null # ignore the error if it's not there.
  
  # to make const fst:
  # fstconvert --fst_type=const $dir/HCLG.fst $dir/HCLG_c.fst