decode.sh
6.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#!/bin/bash
# Copyright Johns Hopkins University (Author: Daniel Povey, Vijayaditya Peddinti) 2016. Apache 2.0.
# This script generates the ctm files for dev_aspire, test_aspire and eval_aspire
# for scoring with ASpIRE scoring server.
# It also provides the WER for dev_aspire data.
set -e
# general opts
iter=
stage=0
decode_num_jobs=30
num_jobs=30
affix=
# segmentation opts
window=10
overlap=5
# ivector opts
max_count=75 # parameter for extract_ivectors.sh
sub_speaker_frames=6000
ivector_scale=0.75
filter_ctm=true
weights_file=
silence_weight=0.00001
# decode opts
pass2_decode_opts="--min-active 1000"
lattice_beam=8
extra_left_context=0 # change for (B)LSTM
extra_right_context=0 # change for BLSTM
frames_per_chunk=50 # change for (B)LSTM
acwt=0.1 # important to change this when using chain models
post_decode_acwt=1.0 # important to change this when using chain models
extra_left_context_initial=0
extra_right_context_final=0
score_opts="--min-lmwt 6 --max-lmwt 13"
. ./cmd.sh
[ -f ./path.sh ] && . ./path.sh
. utils/parse_options.sh || exit 1;
if [ $# -ne 4 ]; then
echo "Usage: $0 [options] <data-dir> <lang-dir> <graph-dir> <model-dir>"
echo " Options:"
echo " --stage (0|1|2) # start scoring script from part-way through."
echo "e.g.:"
echo "$0 dev_aspire data/lang exp/tri5a/graph_pp exp/nnet3/tdnn"
exit 1;
fi
data_set=$1 #select from {dev_aspire, test_aspire, eval_aspire}
lang=$2 # data/lang
graph=$3 #exp/tri5a/graph_pp
dir=$4 # exp/nnet3/tdnn
model_affix=`basename $dir`
ivector_dir=exp/nnet3
ivector_affix=${affix:+_$affix}_chain_${model_affix}${iter:+_iter$iter}
affix=_${affix}${iter:+_iter${iter}}
segmented_data_set=${data_set}_uniformsegmented
if [ $stage -le 1 ]; then
local/generate_uniformly_segmented_data_dir.sh \
--overlap $overlap --window $window $data_set $segmented_data_set
fi
if [[ "$data_set" =~ "test_aspire" ]]; then
out_file=single_dev_test${affix}_$model_affix.ctm
act_data_set=test_aspire
elif [[ "$data_set" =~ "eval_aspire" ]]; then
out_file=single_eval${affix}_$model_affix.ctm
act_data_set=eval_aspire
elif [[ "$data_set" =~ "dev_aspire" ]]; then
# we will just decode the directory without oracle segments file
# as we would like to operate in the actual evaluation condition
out_file=single_dev${affix}_${model_affix}.ctm
act_data_set=dev_aspire
else
echo "$0: Unknown data-set $data_set"
exit 1
fi
if [ $stage -le 2 ]; then
echo "Extracting i-vectors, stage 1"
steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj 20 \
--max-count $max_count \
data/${segmented_data_set}_hires $ivector_dir/extractor \
$ivector_dir/ivectors_${segmented_data_set}${ivector_affix}_stage1;
# float comparisons are hard in bash
if [ `bc <<< "$ivector_scale != 1"` -eq 1 ]; then
ivector_scale_affix=_scale$ivector_scale
else
ivector_scale_affix=
fi
if [ ! -z "$ivector_scale_affix" ]; then
echo "$0: Scaling iVectors, stage 1"
srcdir=$ivector_dir/ivectors_${segmented_data_set}${ivector_affix}_stage1
outdir=$ivector_dir/ivectors_${segmented_data_set}${ivector_affix}${ivector_scale_affix}_stage1
mkdir -p $outdir
copy-matrix --scale=$ivector_scale scp:$srcdir/ivector_online.scp ark:- | \
copy-feats --compress=true ark:- ark,scp:$outdir/ivector_online.ark,$outdir/ivector_online.scp;
cp $srcdir/ivector_period $outdir/ivector_period
fi
fi
decode_dir=$dir/decode_${segmented_data_set}${affix}_pp
# generate the lattices
if [ $stage -le 3 ]; then
echo "Generating lattices, stage 1"
steps/nnet3/decode.sh --nj $decode_num_jobs --cmd "$decode_cmd" --config conf/decode.config \
--acwt $acwt --post-decode-acwt $post_decode_acwt \
--extra-left-context $extra_left_context \
--extra-right-context $extra_right_context \
--extra-left-context-initial $extra_left_context_initial \
--extra-right-context-final $extra_right_context_final \
--frames-per-chunk "$frames_per_chunk" \
--online-ivector-dir $ivector_dir/ivectors_${segmented_data_set}${ivector_affix}${ivector_scale_affix}_stage1 \
--skip-scoring true ${iter:+--iter $iter} \
$graph data/${segmented_data_set}_hires ${decode_dir}_stage1;
fi
if [ $stage -le 4 ]; then
if $filter_ctm; then
if [ ! -z $weights_file ]; then
echo "$0: Using provided vad weights file $weights_file"
ivector_extractor_input=$weights_file
else
echo "$0 : Generating vad weights file"
ivector_extractor_input=${decode_dir}_stage1/weights${affix}.gz
local/extract_vad_weights.sh --cmd "$decode_cmd" ${iter:+--iter $iter} \
data/${segmented_data_set}_hires $lang \
${decode_dir}_stage1 $ivector_extractor_input
fi
else
# just use all the frames
ivector_extractor_input=${decode_dir}_stage1
fi
fi
if [ $stage -le 5 ]; then
echo "Extracting i-vectors, stage 2 with input $ivector_extractor_input"
# this does offline decoding, except we estimate the iVectors per
# speaker, excluding silence (based on alignments from a DNN decoding), with a
# different script. This is just to demonstrate that script.
# the --sub-speaker-frames is optional; if provided, it will divide each speaker
# up into "sub-speakers" of at least that many frames... can be useful if
# acoustic conditions drift over time within the speaker's data.
steps/online/nnet2/extract_ivectors.sh --cmd "$train_cmd" --nj 20 \
--silence-weight $silence_weight \
--sub-speaker-frames $sub_speaker_frames --max-count $max_count \
data/${segmented_data_set}_hires $lang $ivector_dir/extractor \
$ivector_extractor_input $ivector_dir/ivectors_${segmented_data_set}${ivector_affix};
fi
if [ $stage -le 6 ]; then
echo "Generating lattices, stage 2 with --acwt $acwt"
rm -f ${decode_dir}_tg/.error
steps/nnet3/decode.sh --nj $decode_num_jobs --cmd "$decode_cmd" --config conf/decode.config $pass2_decode_opts \
--acwt $acwt --post-decode-acwt $post_decode_acwt \
--extra-left-context $extra_left_context \
--extra-right-context $extra_right_context \
--extra-left-context-initial $extra_left_context_initial \
--extra-right-context-final $extra_right_context_final \
--frames-per-chunk "$frames_per_chunk" \
--skip-scoring true ${iter:+--iter $iter} --lattice-beam $lattice_beam \
--online-ivector-dir $ivector_dir/ivectors_${segmented_data_set}${ivector_affix} \
$graph data/${segmented_data_set}_hires ${decode_dir}_tg || touch ${decode_dir}_tg/.error
[ -f ${decode_dir}_tg/.error ] && echo "$0: Error decoding" && exit 1;
fi
if [ $stage -le 7 ]; then
echo "Rescoring lattices"
steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
--skip-scoring true \
${lang}_pp_test{,_fg} data/${segmented_data_set}_hires \
${decode_dir}_{tg,fg};
fi
decode_dir=${decode_dir}_fg
if [ $stage -le 8 ]; then
local/score_aspire.sh --cmd "$decode_cmd" \
$score_opts \
--word-ins-penalties "0.0,0.25,0.5,0.75,1.0" \
--ctm-beam 6 \
${iter:+--iter $iter} \
--decode-mbr true \
--tune-hyper true \
$lang $decode_dir $act_data_set $segmented_data_set $out_file
fi