make_phone_graph.sh
4.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/bin/bash
# steps/make_phone_graph.sh data/train_100k_nodup/ data/lang exp/tri2_ali_100k_nodup/ exp/tri2
# Copyright 2013 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script makes a phone-based LM, without smoothing to unigram, that
# is to be used for segmentation, and uses that together with a model to
# make a decoding graph.
# Uses SRILM.
# See also utils/lang/make_phone_bigram_lm.sh.
# Begin configuration section.
stage=0
cmd=run.pl
N=3 # change N and P for non-trigram systems.
P=1
tscale=1.0 # transition scale.
loopscale=0.1 # scale for self-loops.
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# -ne 3 ]; then
echo "Usage: $0 [options] <lang-dir> <alignment-dir> <model-dir>"
echo " e.g.: $0 data/lang exp/tri3b_ali exp/tri4b_seg"
echo "Makes the graph in $dir/phone_graph, corresponding to the model in $dir"
echo "The alignments from $ali_dir are used to train the phone LM."
exit 1;
fi
lang=$1
alidir=$2
dir=$3
for f in $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $dir/final.mdl; do
if [ ! -f $f ]; then
echo "$0: expected $f to exist"
exit 1;
fi
done
loc=`which ngram-count`;
if [ -z $loc ]; then
if uname -a | grep 64 >/dev/null; then # some kind of 64 bit...
sdir=$KALDI_ROOT/tools/srilm/bin/i686-m64
else
sdir=$KALDI_ROOT/tools/srilm/bin/i686
fi
if [ -f $sdir/ngram-count ]; then
echo Using SRILM tools from $sdir
export PATH=$PATH:$sdir
else
echo You appear to not have SRILM tools installed, either on your path,
echo or installed in $sdir. See tools/install_srilm.sh for installation
echo instructions.
exit 1
fi
fi
set -e # exit on error status
mkdir -p $dir/phone_graph
utils/lang/check_phones_compatible.sh $lang/phones.txt $alidir/phones.txt
if [ $stage -le 0 ]; then
echo "$0: creating phone LM-training data"
gunzip -c $alidir/ali.*gz | ali-to-phones $alidir/final.mdl ark:- ark,t:- | \
awk '{for (x=2; x <= NF; x++) printf("%s ", $x); printf("\n"); }' | \
utils/int2sym.pl $lang/phones.txt > $dir/phone_graph/train_phones.txt
fi
if [ $stage -le 1 ]; then
echo "$0: building ARPA LM"
ngram-count -text $dir/phone_graph/train_phones.txt -order 3 \
-addsmooth1 1 -kndiscount2 -kndiscount3 -interpolate -lm $dir/phone_graph/arpa.gz
fi
# Set the unigram and unigram-backoff log-probs to -99. we'll later remove the
# arcs from the FST. This is to avoid CLG blowup, and to increase speed.
if [ $stage -le 2 ]; then
echo "$0: removing unigrams from ARPA LM"
gunzip -c $dir/phone_graph/arpa.gz | \
awk '/\\1-grams/{state=1;} /\\2-grams:/{ state=2; }
{if(state == 1 && NF == 3) { printf("-99\t%s\t-99\n", $2); } else {print;}}' | \
gzip -c >$dir/phone_graph/arpa_noug.gz
fi
if [ $stage -le 3 ]; then
echo "$0: creating G_phones.fst from ARPA"
gunzip -c $dir/phone_graph/arpa_noug.gz | \
arpa2fst --disambig-symbol=#0 --read-symbol-table=$lang/phones.txt - - | \
fstprint | awk '{if (NF < 5 || $5 < 100.0) { print; }}' | fstcompile | \
fstconnect > $dir/phone_graph/G_phones.fst
fstisstochastic $dir/phone_graph/G_phones.fst || echo "[info]: G_phones not stochastic."
fi
if [ $stage -le 4 ]; then
echo "$0: creating CLG."
fstcomposecontext --context-size=$N --central-position=$P \
--read-disambig-syms=$lang/phones/disambig.int \
--write-disambig-syms=$dir/phone_graph/disambig_ilabels_${N}_${P}.int \
$dir/phone_graph/ilabels_${N}_${P} < $dir/phone_graph/G_phones.fst | \
fstdeterminize >$dir/phone_graph/CLG.fst
fstisstochastic $dir/phone_graph/CLG.fst || echo "[info]: CLG not stochastic."
fi
if [ $stage -le 5 ]; then
echo "$0: creating Ha.fst"
make-h-transducer --disambig-syms-out=$dir/phone_graph/disambig_tid.int \
--transition-scale=$tscale $dir/phone_graph/ilabels_${N}_${P} $dir/tree $dir/final.mdl \
> $dir/phone_graph/Ha.fst
fi
if [ $stage -le 6 ]; then
echo "$0: creating HCLGa.fst"
fsttablecompose $dir/phone_graph/Ha.fst $dir/phone_graph/CLG.fst | \
fstdeterminizestar --use-log=true | \
fstrmsymbols $dir/phone_graph/disambig_tid.int | fstrmepslocal | \
fstminimizeencoded > $dir/phone_graph/HCLGa.fst || exit 1;
fstisstochastic $dir/phone_graph/HCLGa.fst || echo "HCLGa is not stochastic"
fi
if [ $stage -le 7 ]; then
add-self-loops --self-loop-scale=$loopscale --reorder=true \
$dir/final.mdl < $dir/phone_graph/HCLGa.fst > $dir/phone_graph/HCLG.fst || exit 1;
if [ $tscale == 1.0 -a $loopscale == 1.0 ]; then
# No point doing this test if transition-scale not 1, as it is bound to fail.
fstisstochastic $dir/phone_graph/HCLG.fst || echo "[info]: final HCLG is not stochastic."
fi
# $lang/phones.txt is the symbol table that corresponds to the output
# symbols on the graph; decoding scripts expect it as words.txt.
cp $lang/phones.txt $dir/phone_graph/words.txt
cp -r $lang/phones $dir/phone_graph/
fi