run_lstm_1a.sh
7.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/bin/bash
# Copyright 2012 Johns Hopkins University (author: Daniel Povey)
# 2015 Guoguo Chen
# 2017 Hainan Xu
# 2017 Szu-Jui Chen
# This script trains LMs on the Chime4 data.
# rnnlm/train_rnnlm.sh: best iteration (out of 120) was 91, linking it to final iteration.
# rnnlm/train_rnnlm.sh: train/dev perplexity was 23.2 / 25.6.
# Train objf: -5.63 -4.52 -4.20 -4.05 -3.96 -3.89 -3.83 -3.79 -3.76 -3.73 -3.70 -3.67 -3.65
# -3.63 -3.61 -3.59 -3.58 -3.56 -3.54 -3.53 -3.52 -3.50 -3.49 -3.48 -3.47 -3.46 -3.45 -3.44
# -3.43 -3.42 -3.43 -3.41 -3.39 -3.38 -3.38 -3.37 -3.35 -3.34 -3.34 -3.33 -3.32 -3.31 -3.31
# -3.30 -3.29 -3.28 -3.28 -3.27 -3.26 -3.25 -3.25 -3.25 -3.23 -3.22 -3.23 -3.22 -3.21 -3.20
# -3.20 -3.19 -3.19 -3.18 -3.18 -3.17 -3.16 -3.15 -3.16 -3.15 -3.14 -3.13 -3.13 -3.13 -3.12
# -3.11 -3.12 -3.11 -3.10 -3.09 -3.09 -3.09 -3.08 -3.07 -3.07 -3.07 -3.06 -3.05 -3.05 -3.05
# -3.04 -3.04 -3.04 -3.03 -3.00 -3.02 -3.00 -2.99 -3.00 -2.99 -2.99 -2.98 -2.96 -2.97 -2.96
# -2.95 -2.96 -2.95 -2.95 -2.94 -2.93 -2.93 -2.92 -2.91 -2.92 -2.91 -2.91 -2.91 -2.89 -2.90 -2.89 -2.88
#Dev objf: -11.73 -5.17 -4.46 -4.21 -4.06 -3.96 -3.88 -3.82 -3.79 -3.73 -3.69 -3.68 -3.63
# -3.61 -3.59 -3.58 -3.54 -3.54 -3.53 -3.51 -3.50 -3.47 -3.47 -3.46 -3.44 -3.44 -3.42 -3.42
# -3.42 -3.42 -3.40 -3.36 -3.35 -3.35 -3.34 -3.34 -3.34 -3.33 -3.32 -3.32 -3.31 -3.31 -3.31
# -3.30 -3.29 -3.29 -3.29 -3.28 -3.28 -3.28 -3.27 -3.27 -3.26 -3.27 -3.27 -3.26 -3.25 -3.26
# -3.26 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.26 -3.25 -3.24 -3.25 -3.25 -3.24 -3.24
# -3.25 -3.25 -3.24 -3.24 -3.25 -3.26 -3.25 -3.25 -3.24 -3.25 -3.25 -3.24 -3.25 -3.25 -3.25
# -3.24 -3.26 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.25 -3.26 -3.26 -3.26 -3.26 -3.26
# -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.27 -3.28 -3.28 -3.28 -3.28 -3.29 -3.29 -3.29
# Begin configuration section.
affix=1a
dir=exp/rnnlm_lstm_${affix}
enhan=$1
embedding_dim=2048
lstm_rpd=512
lstm_nrpd=512
stage=-10
train_stage=-10
# variables for lattice rescoring
run_lat_rescore=true
run_nbest_rescore=true
use_backward_model=true
ac_model_dir=exp/chain/tdnn1a_sp
decode_dir_suffix=rnnlm_lstm_${affix}
ngram_order=4 # approximate the lattice-rescoring by limiting the max-ngram-order
# if it's set, it merges histories in the lattice if they share
# the same ngram history and this prevents the lattice from
# exploding exponentially
. cmd.sh
. utils/parse_options.sh
srcdir=data/local/local_lm
lexicon=data/local/dict/lexiconp.txt
text_dir=data/rnnlm/text_nosp_${affix}
mkdir -p $dir/config
set -e
for f in $lexicon; do
[ ! -f $f ] && \
echo "$0: expected file $f to exist; search for local/wsj_extend_dict.sh in run.sh" && exit 1
done
#prepare training and dev data
if [ $stage -le 0 ]; then
mkdir -p $text_dir
cp $srcdir/train.rnn $text_dir/chime4.txt.tmp
sed -e "s/<RNN_UNK>/<UNK>/g" $text_dir/chime4.txt.tmp > $text_dir/chime4.txt
cp $srcdir/valid.rnn $text_dir/dev.txt
fi
if [ $stage -le 1 ]; then
cp data/lang_chain/words.txt $dir/config/words.txt
n=`cat $dir/config/words.txt | wc -l`
echo "<brk> $n" >> $dir/config/words.txt
# words that are not present in words.txt but are in the training or dev data, will be
# mapped to <SPOKEN_NOISE> during training.
echo "<UNK>" >$dir/config/oov.txt
cat > $dir/config/data_weights.txt <<EOF
chime4 3 1.0
EOF
rnnlm/get_unigram_probs.py --vocab-file=$dir/config/words.txt \
--unk-word="<UNK>" \
--data-weights-file=$dir/config/data_weights.txt \
$text_dir | awk 'NF==2' >$dir/config/unigram_probs.txt
# choose features
rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \
--use-constant-feature=true \
--special-words='<s>,</s>,<UNK>,<brk>' \
$dir/config/words.txt > $dir/config/features.txt
cat >$dir/config/xconfig <<EOF
input dim=$embedding_dim name=input
relu-renorm-layer name=tdnn1 dim=$embedding_dim input=Append(0, IfDefined(-1))
fast-lstmp-layer name=lstm1 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
relu-renorm-layer name=tdnn2 dim=$embedding_dim input=Append(0, IfDefined(-3))
fast-lstmp-layer name=lstm2 cell-dim=$embedding_dim recurrent-projection-dim=$lstm_rpd non-recurrent-projection-dim=$lstm_nrpd
relu-renorm-layer name=tdnn3 dim=$embedding_dim input=Append(0, IfDefined(-3))
output-layer name=output include-log-softmax=false dim=$embedding_dim
EOF
rnnlm/validate_config_dir.sh $text_dir $dir/config
fi
if [ $stage -le 2 ]; then
rnnlm/prepare_rnnlm_dir.sh $text_dir $dir/config $dir
fi
# Train model with forward data(forward model)
if [ $stage -le 3 ]; then
rnnlm/train_rnnlm.sh --num-jobs-initial 1 --num-jobs-final 3 \
--stage $train_stage --num-epochs 10 --cmd "$train_cmd" $dir
fi
# Train another model with reversed data(backward model)
if [ $stage -le 4 ] && $use_backward_model; then
local/rnnlm/run_lstm_back.sh --embedding-dim $embedding_dim \
--lstm-rpd $lstm_rpd --lstm-nrpd $lstm_nrpd \
--affix $affix
fi
# Since lattice-rescoring performs worse but faster than nbest-rescoring,
# we only use it to evaluate how good our forward model is.
LM=5gkn_5k # using the 5-gram lm from run_lmrescore_tdnn.sh
tgtdir=${ac_model_dir}_smbr_lmrescore
if [ $stage -le 5 ] && $run_lat_rescore; then
echo "$0: Perform lattice-rescoring on $ac_model_dir"
for decode_set in dt05_real dt05_simu et05_real et05_simu; do
decode_dir=$tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${LM}
# Lattice rescoring
rnnlm/lmrescore_pruned.sh \
--cmd "$train_cmd --mem 2G" \
--weight 0.8 --max-ngram-order $ngram_order \
data/lang_test_$LM $dir \
data/${decode_set}_${enhan}_chunked ${decode_dir} \
$tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${decode_dir_suffix} &
done
wait
# calc wers for lattice-rescoring results
local/chime4_calc_wers.sh $tgtdir ${enhan}_${decode_dir_suffix} \
$tgtdir/graph_tgpr_5k \
> $tgtdir/best_wer_${enhan}_${decode_dir_suffix}.result
head -n 15 $tgtdir/best_wer_${enhan}_${decode_dir_suffix}.result
fi
nbest=100
rnnweight=0.8
if [ $stage -le 6 ] && $run_nbest_rescore; then
echo "$0: Perform nbest-rescoring on $ac_model_dir"
for decode_set in dt05_real dt05_simu et05_real et05_simu; do
decode_dir=$tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${LM}
(
# Lattice rescoring
rnnlm/lmrescore_nbest.sh \
--cmd "$train_cmd --mem 2G" --N $nbest \
$rnnweight data/lang_test_$LM $dir \
data/${decode_set}_${enhan}_chunked ${decode_dir} \
$tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}
if $use_backward_model; then
rnnlm/lmrescore_nbest_back.sh \
--cmd "$train_cmd --mem 2G" --N $nbest \
$rnnweight data/lang_test_$LM ${dir}_back \
data/${decode_set}_${enhan}_chunked \
$tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest} \
$tgtdir/decode_tgpr_5k_${decode_set}_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}_bi
fi
) &
done
wait
# calc wers for nbest-rescoring results
if $use_backward_model; then
local/chime4_calc_wers.sh $tgtdir ${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}_bi \
$tgtdir/graph_tgpr_5k \
> $tgtdir/best_wer_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}_bi.result
head -n 15 $tgtdir/best_wer_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}_bi.result
else
local/chime4_calc_wers.sh $tgtdir ${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest} \
$tgtdir/graph_tgpr_5k \
> $tgtdir/best_wer_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}.result
head -n 15 $tgtdir/best_wer_${enhan}_${decode_dir_suffix}_w${rnnweight}_n${nbest}.result
fi
fi
exit 0