Blame view
egs/wsj/s5/utils/mkgraph.sh
7.36 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
#!/bin/bash # Copyright 2010-2012 Microsoft Corporation # 2012-2013 Johns Hopkins University (Author: Daniel Povey) # Apache 2.0 # This script creates a fully expanded decoding graph (HCLG) that represents # all the language-model, pronunciation dictionary (lexicon), context-dependency, # and HMM structure in our model. The output is a Finite State Transducer # that has word-ids on the output, and pdf-ids on the input (these are indexes # that resolve to Gaussian Mixture Models). # See # http://kaldi-asr.org/doc/graph_recipe_test.html # (this is compiled from this repository using Doxygen, # the source for this part is in src/doc/graph_recipe_test.dox) set -o pipefail tscale=1.0 loopscale=0.1 remove_oov=false for x in `seq 4`; do [ "$1" == "--mono" -o "$1" == "--left-biphone" -o "$1" == "--quinphone" ] && shift && \ echo "WARNING: the --mono, --left-biphone and --quinphone options are now deprecated and ignored." [ "$1" == "--remove-oov" ] && remove_oov=true && shift; [ "$1" == "--transition-scale" ] && tscale=$2 && shift 2; [ "$1" == "--self-loop-scale" ] && loopscale=$2 && shift 2; done if [ $# != 3 ]; then echo "Usage: utils/mkgraph.sh [options] <lang-dir> <model-dir> <graphdir>" echo "e.g.: utils/mkgraph.sh data/lang_test exp/tri1/ exp/tri1/graph" echo " Options:" echo " --remove-oov # If true, any paths containing the OOV symbol (obtained from oov.int" echo " # in the lang directory) are removed from the G.fst during compilation." echo " --transition-scale # Scaling factor on transition probabilities." echo " --self-loop-scale # Please see: http://kaldi-asr.org/doc/hmm.html#hmm_scale." echo "Note: the --mono, --left-biphone and --quinphone options are now deprecated" echo "and will be ignored." exit 1; fi if [ -f path.sh ]; then . ./path.sh; fi lang=$1 tree=$2/tree model=$2/final.mdl dir=$3 mkdir -p $dir # If $lang/tmp/LG.fst does not exist or is older than its sources, make it... # (note: the [[ ]] brackets make the || type operators work (inside [ ], we # would have to use -o instead), -f means file exists, and -ot means older than). required="$lang/L.fst $lang/G.fst $lang/phones.txt $lang/words.txt $lang/phones/silence.csl $lang/phones/disambig.int $model $tree" for f in $required; do [ ! -f $f ] && echo "mkgraph.sh: expected $f to exist" && exit 1; done if [ -f $dir/HCLG.fst ]; then # detect when the result already exists, and avoid overwriting it. must_rebuild=false for f in $required; do [ $f -nt $dir/HCLG.fst ] && must_rebuild=true done if ! $must_rebuild; then echo "$0: $dir/HCLG.fst is up to date." exit 0 fi fi N=$(tree-info $tree | grep "context-width" | cut -d' ' -f2) || { echo "Error when getting context-width"; exit 1; } P=$(tree-info $tree | grep "central-position" | cut -d' ' -f2) || { echo "Error when getting central-position"; exit 1; } [[ -f $2/frame_subsampling_factor && "$loopscale" == "0.1" ]] && \ echo "$0: WARNING: chain models need '--self-loop-scale 1.0'"; if [ -f $lang/phones/nonterm_phones_offset.int ]; then if [[ $N != 2 || $P != 1 ]]; then echo "$0: when doing grammar decoding, you can only build graphs for left-biphone trees." exit 1 fi nonterm_phones_offset=$(cat $lang/phones/nonterm_phones_offset.int) nonterm_opt="--nonterm-phones-offset=$nonterm_phones_offset" prepare_grammar_command="make-grammar-fst --nonterm-phones-offset=$nonterm_phones_offset - -" else prepare_grammar_command="cat" nonterm_opt= fi mkdir -p $lang/tmp trap "rm -f $lang/tmp/LG.fst.$$" EXIT HUP INT PIPE TERM # Note: [[ ]] is like [ ] but enables certain extra constructs, e.g. || in # place of -o if [[ ! -s $lang/tmp/LG.fst || $lang/tmp/LG.fst -ot $lang/G.fst || \ $lang/tmp/LG.fst -ot $lang/L_disambig.fst ]]; then fsttablecompose $lang/L_disambig.fst $lang/G.fst | fstdeterminizestar --use-log=true | \ fstminimizeencoded | fstpushspecial > $lang/tmp/LG.fst.$$ || exit 1; mv $lang/tmp/LG.fst.$$ $lang/tmp/LG.fst fstisstochastic $lang/tmp/LG.fst || echo "[info]: LG not stochastic." fi clg=$lang/tmp/CLG_${N}_${P}.fst clg_tmp=$clg.$$ ilabels=$lang/tmp/ilabels_${N}_${P} ilabels_tmp=$ilabels.$$ trap "rm -f $clg_tmp $ilabels_tmp" EXIT HUP INT PIPE TERM if [[ ! -s $clg || $clg -ot $lang/tmp/LG.fst \ || ! -s $ilabels || $ilabels -ot $lang/tmp/LG.fst ]]; then fstcomposecontext $nonterm_opt --context-size=$N --central-position=$P \ --read-disambig-syms=$lang/phones/disambig.int \ --write-disambig-syms=$lang/tmp/disambig_ilabels_${N}_${P}.int \ $ilabels_tmp $lang/tmp/LG.fst |\ fstarcsort --sort_type=ilabel > $clg_tmp mv $clg_tmp $clg mv $ilabels_tmp $ilabels fstisstochastic $clg || echo "[info]: CLG not stochastic." fi trap "rm -f $dir/Ha.fst.$$" EXIT HUP INT PIPE TERM if [[ ! -s $dir/Ha.fst || $dir/Ha.fst -ot $model \ || $dir/Ha.fst -ot $lang/tmp/ilabels_${N}_${P} ]]; then make-h-transducer $nonterm_opt --disambig-syms-out=$dir/disambig_tid.int \ --transition-scale=$tscale $lang/tmp/ilabels_${N}_${P} $tree $model \ > $dir/Ha.fst.$$ || exit 1; mv $dir/Ha.fst.$$ $dir/Ha.fst fi trap "rm -f $dir/HCLGa.fst.$$" EXIT HUP INT PIPE TERM if [[ ! -s $dir/HCLGa.fst || $dir/HCLGa.fst -ot $dir/Ha.fst || \ $dir/HCLGa.fst -ot $clg ]]; then if $remove_oov; then [ ! -f $lang/oov.int ] && \ echo "$0: --remove-oov option: no file $lang/oov.int" && exit 1; clg="fstrmsymbols --remove-arcs=true --apply-to-output=true $lang/oov.int $clg|" fi fsttablecompose $dir/Ha.fst "$clg" | fstdeterminizestar --use-log=true \ | fstrmsymbols $dir/disambig_tid.int | fstrmepslocal | \ fstminimizeencoded > $dir/HCLGa.fst.$$ || exit 1; mv $dir/HCLGa.fst.$$ $dir/HCLGa.fst fstisstochastic $dir/HCLGa.fst || echo "HCLGa is not stochastic" fi trap "rm -f $dir/HCLG.fst.$$" EXIT HUP INT PIPE TERM if [[ ! -s $dir/HCLG.fst || $dir/HCLG.fst -ot $dir/HCLGa.fst ]]; then add-self-loops --self-loop-scale=$loopscale --reorder=true $model $dir/HCLGa.fst | \ $prepare_grammar_command | \ fstconvert --fst_type=const > $dir/HCLG.fst.$$ || exit 1; mv $dir/HCLG.fst.$$ $dir/HCLG.fst if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then # No point doing this test if transition-scale not 1, as it is bound to fail. fstisstochastic $dir/HCLG.fst || echo "[info]: final HCLG is not stochastic." fi fi # note: the empty FST has 66 bytes. this check is for whether the final FST # is the empty file or is the empty FST. if ! [ $(head -c 67 $dir/HCLG.fst | wc -c) -eq 67 ]; then echo "$0: it looks like the result in $dir/HCLG.fst is empty" exit 1 fi # save space. rm $dir/HCLGa.fst $dir/Ha.fst 2>/dev/null || true # keep a copy of the lexicon and a list of silence phones with HCLG... # this means we can decode without reference to the $lang directory. cp $lang/words.txt $dir/ || exit 1; mkdir -p $dir/phones cp $lang/phones/word_boundary.* $dir/phones/ 2>/dev/null # might be needed for ctm scoring, cp $lang/phones/align_lexicon.* $dir/phones/ 2>/dev/null # might be needed for ctm scoring, cp $lang/phones/optional_silence.* $dir/phones/ 2>/dev/null # might be needed for analyzing alignments. # but ignore the error if it's not there. cp $lang/phones/disambig.{txt,int} $dir/phones/ 2> /dev/null cp $lang/phones/silence.csl $dir/phones/ || exit 1; cp $lang/phones.txt $dir/ 2> /dev/null # ignore the error if it's not there. am-info --print-args=false $model | grep pdfs | awk '{print $NF}' > $dir/num_pdfs |