prepare_online_decoding.sh
10.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
#!/bin/bash
# Copyright 2014 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Begin configuration.
stage=0 # This allows restarting after partway, when something when wrong.
feature_type=mfcc
online_cmvn_config=conf/online_cmvn.conf
add_pitch=false
pitch_config=conf/pitch.conf
pitch_process_config=conf/pitch_process.conf
per_utt_basis=true # If true, then treat each utterance as a separate speaker
# for purposes of basis training... this is recommended if
# the number of actual speakers in your training set is less
# than (feature-dim) * (feature-dim+1).
per_utt_cmvn=false # If true, apply online CMVN normalization per utterance
# rather than per speaker.
silence_weight=0.01
cmd=run.pl
cleanup=true
# End configuration.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh;
. parse_options.sh || exit 1;
if [ $# -ne 4 -a $# -ne 5 ]; then
echo "Usage: $0 [options] <data-dir> <lang-dir> <sat-model-dir> [<MMI-model>] <output-dir>"
echo "e.g.: $0 data/train data/lang exp/tri3b exp/tri3b_mmi/final.mdl exp/tri3b_online"
echo "main options (for others, see top of script file)"
echo " --feature-type <mfcc|plp> # Type of the base features; "
echo " # important to generate the correct"
echo " # configs in <output-dir>/conf/"
echo " --online-cmvn-config <config> # config for online cmvn,"
echo " # default conf/online_cmvn.conf"
echo " --add-pitch <true|false> # Append pitch features to cmvn"
echo " # (default: false)"
echo " --per-utt-cmvn <true|false> # Apply online CMVN per utt, not"
echo " # per speaker (default: false)"
echo " --per-utt-basis <true|false> # Do basis computation per utterance"
echo " # (default: true)"
echo " --silence-weight <weight> # Weight on silence for basis fMLLR;"
echo " # default 0.01."
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --config <config-file> # config containing options"
echo " --stage <stage> # stage to do partial re-run from."
exit 1;
fi
if [ $# -eq 5 ]; then
data=$1
lang=$2
srcdir=$3
mmi_model=$4
dir=$5
else
data=$1
lang=$2
srcdir=$3
mmi_model=$srcdir/final.mdl
dir=$4
fi
for f in $srcdir/final.mdl $srcdir/ali.1.gz $data/feats.scp $lang/phones.txt \
$mmi_model $online_cmvn_config; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
nj=`cat $srcdir/num_jobs` || exit 1;
sdata=$data/split$nj;
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
mkdir -p $dir/log
echo $nj >$dir/num_jobs || exit 1;
utils/lang/check_phones_compatible.sh $lang/phones.txt $srcdir/phones.txt || exit 1;
cp $lang/phones.txt $dir || exit 1;
splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null`
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
cp $srcdir/splice_opts $srcdir/cmvn_opts $srcdir/final.mat $srcdir/final.mdl $dir/ 2>/dev/null
cp $mmi_model $dir/final.rescore_mdl
# Set up the unadapted features "$sifeats".
if [ -f $dir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
if ! $per_utt_cmvn; then
online_cmvn_spk2utt_opt=
else
online_cmvn_spk2utt_opt="--spk2utt=ark:$sdata/JOB/spk2utt"
fi
# create global_cmvn.stats
if ! matrix-sum --binary=false scp:$data/cmvn.scp - >$dir/global_cmvn.stats 2>/dev/null; then
echo "$0: Error summing cmvn stats"
exit 1
fi
if $add_pitch; then
skip_opt="--skip-dims=13:14:15" # should make this more general.
fi
echo "$0: feature type is $feat_type";
case $feat_type in
delta) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
online_sifeats="ark,s,cs:apply-cmvn-online $skip_opt --config=$online_cmvn_config $dir/global_cmvn.stats $online_cmvn_spk2utt_opt scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
lda) sifeats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
online_sifeats="ark,s,cs:apply-cmvn-online $skip_opt --config=$online_cmvn_config $online_cmvn_spk2utt_opt $dir/global_cmvn.stats scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |";;
*) echo "Invalid feature type $feat_type" && exit 1;
esac
# Set up the adapted features "$feats" for training set.
if [ -f $srcdir/trans.1 ]; then
feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$srcdir/trans.JOB ark:- ark:- |";
else
feats="$sifeats";
fi
if $per_utt_basis; then
spk2utt_opt= # treat each utterance as separate speaker when computing basis.
echo "Doing per-utterance adaptation for purposes of computing the basis."
else
echo "Doing per-speaker adaptation for purposes of computing the basis."
[ `cat $sdata/spk2utt | wc -l` -lt $[41*40] ] && \
echo "Warning: number of speakers is small, might be better to use --per-utt=true."
spk2utt_opt="--spk2utt=ark:$sdata/JOB/spk2utt"
fi
if [ $stage -le 0 ]; then
echo "$0: Accumulating statistics for basis-fMLLR computation"
# Note: we get Gaussian level alignments with the "final.mdl" and the
# speaker adapted features.
$cmd JOB=1:$nj $dir/log/basis_acc.JOB.log \
ali-to-post "ark:gunzip -c $srcdir/ali.JOB.gz|" ark:- \| \
weight-silence-post $silence_weight $silphonelist $dir/final.mdl ark:- ark:- \| \
gmm-post-to-gpost $dir/final.mdl "$feats" ark:- ark:- \| \
gmm-basis-fmllr-accs-gpost $spk2utt_opt \
$dir/final.mdl "$sifeats" ark,s,cs:- $dir/basis.acc.JOB || exit 1;
fi
if [ $stage -le 1 ]; then
echo "$0: computing the basis matrices."
$cmd $dir/log/basis_training.log \
gmm-basis-fmllr-training $dir/final.mdl $dir/fmllr.basis $dir/basis.acc.* || exit 1;
if $cleanup; then
rm $dir/basis.acc.* 2>/dev/null
fi
fi
if [ $stage -le 2 ]; then
echo "$0: accumulating stats for online alignment model."
# Accumulate stats for "online alignment model"-- this model is computed with
# the speaker-independent features and online CMVN, but matches
# Gaussian-for-Gaussian with the final speaker-adapted model.
$cmd JOB=1:$nj $dir/log/acc_alimdl.JOB.log \
ali-to-post "ark:gunzip -c $srcdir/ali.JOB.gz|" ark:- \| \
gmm-acc-stats-twofeats $dir/final.mdl "$feats" "$online_sifeats" \
ark,s,cs:- $dir/final.JOB.acc || exit 1;
[ `ls $dir/final.*.acc | wc -w` -ne "$nj" ] && echo "$0: Wrong #accs" && exit 1;
# Update model.
$cmd $dir/log/est_online_alimdl.log \
gmm-est --remove-low-count-gaussians=false $dir/final.mdl \
"gmm-sum-accs - $dir/final.*.acc|" $dir/final.oalimdl || exit 1;
if $cleanup; then
rm $dir/final.*.acc
fi
fi
if [ $stage -le 3 ]; then
mkdir -p $dir/conf
rm $dir/{plp,mfcc}.conf 2>/dev/null
echo "$0: preparing configuration files in $dir/conf"
if [ -f $dir/conf/online_decoding.conf ]; then
echo "$0: moving $dir/conf/online_decoding.conf to $dir/conf/online_decoding.conf.bak"
mv $dir/conf/online_decoding.conf $dir/conf/online_decoding.conf.bak
fi
conf=$dir/conf/online_decoding.conf
echo -n >$conf
case "$feature_type" in
mfcc)
echo "$0: creating $dir/conf/mfcc.conf"
echo "--mfcc-config=$dir/conf/mfcc.conf" >>$conf
cp conf/mfcc.conf $dir/conf/ ;;
plp)
echo "$0: enabling plp features"
echo "--feature-type=plp" >>$conf
echo "$0: creating $dir/conf/plp.conf"
echo "--plp-config=$dir/conf/plp.conf" >>$conf
cp conf/plp.conf $dir/conf/ ;;
*)
echo "Unknown feature type $feature_type"
esac
if ! cp $online_cmvn_config $dir/conf/online_cmvn.conf; then
echo "$0: error copying online cmvn config to $dir/conf/"
exit 1;
fi
echo "--cmvn-config=$dir/conf/online_cmvn.conf" >>$conf
if [ -f $dir/final.mat ]; then
echo "$0: enabling feature splicing"
echo "--splice-feats" >>$conf
echo "$0: creating $dir/conf/splice.conf"
for x in $(cat $dir/splice_opts); do echo $x; done > $dir/conf/splice.conf
echo "--splice-config=$dir/conf/splice.conf" >>$conf
echo "$0: enabling LDA"
echo "--lda-matrix=$dir/final.mat" >>$conf
else
echo "$0: enabling deltas"
echo "--add-deltas" >>$conf
fi
if $add_pitch; then
echo "$0: enabling pitch features"
echo "--add-pitch" >>$conf
echo "$0: creating $dir/conf/pitch.conf"
echo "--pitch-config=$dir/conf/pitch.conf" >>$conf
if ! cp $pitch_config $dir/conf/pitch.conf; then
echo "$0: error copying pitch config to $dir/conf/"
exit 1;
fi;
echo "$0: creating $dir/conf/pitch_process.conf"
echo "--pitch-process-config=$dir/conf/pitch_process.conf" >>$conf
if ! cp $pitch_process_config $dir/conf/pitch_process.conf; then
echo "$0: error copying pitch process config to $dir/conf/"
exit 1;
fi;
nfields=$(sed -n '2,2p' $dir/global_cmvn.stats | \
perl -e '$_ = <>; s/^\s+|\s+$//g; print scalar(split);');
if [ $nfields != 17 ]; then
echo "$0: $dir/global_cmvn.stats has $nfields entries per row (expected 17)."
echo "$0: Did you append pitch features?"
exit 1;
fi
#offset=$(sed -n '2,2p' $dir/global_cmvn.stats | \
# perl -e '$_ = <>; s/^\s+|\s+$//g; ($t, $c) = (split)[13, 16]; print -$t/$c;');
#echo "--pov-offset=$offset" >>$dir/conf/pitch_process.conf
fi
echo "--fmllr-basis=$dir/fmllr.basis" >>$conf
echo "--online-alignment-model=$dir/final.oalimdl" >>$conf
echo "--model=$dir/final.mdl" >>$conf
if ! cmp --quiet $dir/final.mdl $dir/final.rescore_mdl; then
echo "--rescore-model=$dir/final.rescore_mdl" >>$conf
fi
echo "--silence-phones=$silphonelist" >>$conf
echo "--endpoint.silence-phones=$silphonelist" >>$conf
echo "--global-cmvn-stats=$dir/global_cmvn.stats" >>$conf
echo "$0: created config file $conf"
fi