prepare_online_decoding.sh
6.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/bin/bash
# Copyright 2014 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Begin configuration.
stage=0 # This allows restarting after partway, when something when wrong.
feature_type=mfcc
add_pitch=false
mfcc_config=conf/mfcc.conf # you can override any of these you need to override.
plp_config=conf/plp.conf
fbank_config=conf/fbank.conf
# online_pitch_config is the config file for both pitch extraction and
# post-processing; we combine them into one because during training this
# is given to the program compute-and-process-kaldi-pitch-feats.
online_pitch_config=conf/online_pitch.conf
# Below are some options that affect the iVectors, and should probably
# match those used in extract_ivectors_online.sh.
num_gselect=5 # Gaussian-selection using diagonal model: number of Gaussians to select
posterior_scale=0.1 # Scale on the acoustic posteriors, intended to account for
# inter-frame correlations.
min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out)
# caution: you should use the same value in the online-estimation
# code.
max_count=100 # This max-count of 100 can make iVectors more consistent for
# different lengths of utterance, by scaling up the prior term
# when the data-count exceeds this value. The data-count is
# after posterior-scaling, so assuming the posterior-scale is
# 0.1, --max-count 100 starts having effect after 1000 frames,
# or 10 seconds of data.
iter=final
# End configuration.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh;
. parse_options.sh || exit 1;
if [ $# -ne 4 ] && [ $# -ne 3 ]; then
echo "Usage: $0 [options] <lang-dir> [<ivector-extractor-dir>] <nnet-dir> <output-dir>"
echo "e.g.: $0 data/lang exp/nnet2_online/extractor exp/nnet2_online/nnet exp/nnet2_online/nnet_online"
echo "main options (for others, see top of script file)"
echo " --feature-type <mfcc|plp> # Type of the base features; "
echo " # important to generate the correct"
echo " # configs in <output-dir>/conf/"
echo " --add-pitch <true|false> # Append pitch features to cmvn"
echo " # (default: false)"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --config <config-file> # config containing options"
echo " --iter <model-iteration|final> # iteration of model to take."
echo " --stage <stage> # stage to do partial re-run from."
exit 1;
fi
if [ $# -eq 4 ]; then
lang=$1
iedir=$2
srcdir=$3
dir=$4
else
[ $# -eq 3 ] || exit 1;
lang=$1
iedir=
srcdir=$2
dir=$3
fi
for f in $lang/phones/silence.csl $srcdir/${iter}.mdl $srcdir/tree; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
if [ ! -z "$iedir" ]; then
for f in final.{mat,ie,dubm} splice_opts global_cmvn.stats online_cmvn.conf; do
[ ! -f $iedir/$f ] && echo "$0: no such file $iedir/$f" && exit 1;
done
if $add_pitch; then
iedim=`matrix-dim $iedir/final.mat | awk '{print $1}'`
amdim=`nnet3-am-info $srcdir/${iter}.mdl | grep "input-dim:" | awk '{print $2}'`
[ $(($amdim-$iedim)) -eq 0 ] && echo "$0: remove pitch from the input of ivector extractor" && exit 1;
fi
fi
dir=$(utils/make_absolute.sh $dir) # Convert $dir to an absolute pathname, so that the
# configuration files we write will contain absolute
# pathnames.
mkdir -p $dir/conf
utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1;
cp $lang/phones.txt $dir || exit 1;
cp $srcdir/${iter}.mdl $dir/final.mdl || exit 1;
cp $srcdir/tree $dir/ || exit 1;
if [ -f $srcdir/frame_subsampling_factor ]; then
cp $srcdir/frame_subsampling_factor $dir/
fi
if [ ! -z "$iedir" ]; then
mkdir -p $dir/ivector_extractor/
cp $iedir/final.{mat,ie,dubm} $iedir/global_cmvn.stats $dir/ivector_extractor/ || exit 1;
# The following things won't be needed directly by the online decoding, but
# will allow us to run prepare_online_decoding.sh again with
# $dir/ivector_extractor/ as the input directory (useful in certain
# cross-system training scenarios).
cp $iedir/splice_opts $iedir/online_cmvn.conf $dir/ivector_extractor/ || exit 1;
fi
mkdir -p $dir/conf
rm $dir/{plp,mfcc,fbank}.conf 2>/dev/null
echo "$0: preparing configuration files in $dir/conf"
if [ -f $dir/conf/online.conf ]; then
echo "$0: moving $dir/conf/online.conf to $dir/conf/online.conf.bak"
mv $dir/conf/online.conf $dir/conf/online.conf.bak
fi
conf=$dir/conf/online.conf
echo -n >$conf
echo "--feature-type=$feature_type" >>$conf
case "$feature_type" in
mfcc)
echo "--mfcc-config=$dir/conf/mfcc.conf" >>$conf
cp $mfcc_config $dir/conf/mfcc.conf || exit 1;;
plp)
echo "--plp-config=$dir/conf/plp.conf" >>$conf
cp $plp_config $dir/conf/plp.conf || exit 1;;
fbank)
echo "--fbank-config=$dir/conf/fbank.conf" >>$conf
cp $fbank_config $dir/conf/fbank.conf || exit 1;;
*)
echo "Unknown feature type $feature_type"
esac
if [ ! -z "$iedir" ]; then
ieconf=$dir/conf/ivector_extractor.conf
echo -n >$ieconf
echo "--ivector-extraction-config=$ieconf" >>$conf
cp $iedir/online_cmvn.conf $dir/conf/online_cmvn.conf || exit 1;
# the next line puts each option from splice_opts on its own line in the config.
for x in $(cat $iedir/splice_opts); do echo "$x"; done > $dir/conf/splice.conf
echo "--splice-config=$dir/conf/splice.conf" >>$ieconf
echo "--cmvn-config=$dir/conf/online_cmvn.conf" >>$ieconf
echo "--lda-matrix=$dir/ivector_extractor/final.mat" >>$ieconf
echo "--global-cmvn-stats=$dir/ivector_extractor/global_cmvn.stats" >>$ieconf
echo "--diag-ubm=$dir/ivector_extractor/final.dubm" >>$ieconf
echo "--ivector-extractor=$dir/ivector_extractor/final.ie" >>$ieconf
echo "--num-gselect=$num_gselect" >>$ieconf
echo "--min-post=$min_post" >>$ieconf
echo "--posterior-scale=$posterior_scale" >>$ieconf # this is currently the default in the scripts.
echo "--max-remembered-frames=1000" >>$ieconf # the default
echo "--max-count=$max_count" >>$ieconf
fi
if $add_pitch; then
echo "$0: enabling pitch features"
echo "--add-pitch=true" >>$conf
echo "$0: creating $dir/conf/online_pitch.conf"
if [ ! -f $online_pitch_config ]; then
echo "$0: expected file '$online_pitch_config' to exist.";
exit 1;
fi
cp $online_pitch_config $dir/conf/online_pitch.conf || exit 1;
echo "--online-pitch-config=$dir/conf/online_pitch.conf" >>$conf
fi
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
echo "--endpoint.silence-phones=$silphonelist" >>$conf
echo "$0: created config file $conf"