run_lmrescore.sh
4.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/bin/bash
# Copyright 2015 University of Sheffield (Jon Barker, Ricard Marxer)
# Inria (Emmanuel Vincent)
# Mitsubishi Electric Research Labs (Shinji Watanabe)
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
# Copyright 2015, Mitsubishi Electric Research Laboratories, MERL (Author: Takaaki Hori)
nj=12
stage=1
order=5
hidden=300
rnnweight=0.5
nbest=100
train=noisy
eval_flag=true # make it true when the evaluation data are released
. utils/parse_options.sh || exit 1;
. ./path.sh
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
## This relates to the queue.
# This is a shell script, but it's recommended that you run the commands one by
# one by copying and pasting into the shell.
if [ $# -ne 2 ]; then
printf "\nUSAGE: %s <Chime4 root directory> <enhancement method>\n\n" `basename $0`
echo "First argument specifies a root directory of Chime4 data"
echo "Second argument specifies a unique name for different enhancement method"
exit 1;
fi
# set language models
lm_suffix=${order}gkn_5k
rnnlm_suffix=rnnlm_5k_h${hidden}
# data root
chime4_data=$1
# enhan data
enhan=$2
# check data
if [ ! -d $chime4_data ]; then
echo "$chime4_data does not exist. Please specify chime4 data root correctly" && exit 1
fi
# check whether run_dnn is executed
srcdir=exp/tri4a_dnn_tr05_multi_${train}_smbr_i1lats
if [ ! -d $srcdir ]; then
echo "error, execute local/run_dnn.sh, first"
exit 1;
fi
# train a high-order n-gram language model
if [ $stage -le 1 ]; then
local/chime4_train_lms.sh $chime4_data || exit 1;
fi
# train a RNN language model
if [ $stage -le 2 ]; then
local/chime4_train_rnnlms.sh $chime4_data || exit 1;
fi
# preparation
dir=exp/tri4a_dnn_tr05_multi_${train}_smbr_lmrescore
mkdir -p $dir
# make a symbolic link to graph info
if [ ! -e $dir/graph_tgpr_5k ]; then
if [ ! -e exp/tri4a_dnn_tr05_multi_${train}/graph_tgpr_5k ]; then
echo "graph is missing, execute local/run_dnn.sh, correctly"
exit 1;
fi
pushd . ; cd $dir
ln -s ../tri4a_dnn_tr05_multi_${train}/graph_tgpr_5k .
popd
fi
# rescore lattices by a high-order N-gram
if [ $stage -le 3 ]; then
# check the best iteration
if [ ! -f $srcdir/log/best_wer_$enhan ]; then
echo "error, execute local/run_dnn.sh, first"
exit 1;
fi
it=`cut -f 1 -d" " $srcdir/log/best_wer_$enhan | awk -F'[_]' '{print $1}'`
# rescore lattices
if $eval_flag; then
tasks="dt05_simu dt05_real et05_simu et05_real"
else
tasks="dt05_simu dt05_real"
fi
for t in $tasks; do
steps/lmrescore.sh --mode 3 \
data/lang_test_tgpr_5k \
data/lang_test_${lm_suffix} \
data-fmllr-tri3b/${t}_$enhan \
$srcdir/decode_tgpr_5k_${t}_${enhan}_it$it \
$dir/decode_tgpr_5k_${t}_${enhan}_${lm_suffix}
done
# rescored results by high-order n-gram LM
mkdir -p $dir/log
local/chime4_calc_wers.sh $dir ${enhan}_${lm_suffix} $dir/graph_tgpr_5k \
> $dir/best_wer_${enhan}_${lm_suffix}.result
head -n 15 $dir/best_wer_${enhan}_${lm_suffix}.result
fi
# N-best rescoring using a RNNLM
if [ $stage -le 4 ]; then
# check the best lmw
if [ ! -f $dir/log/best_wer_${enhan}_${lm_suffix} ]; then
echo "error, rescoring with a high-order n-gram seems to be failed"
exit 1;
fi
lmw=`cut -f 1 -d" " $dir/log/best_wer_${enhan}_${lm_suffix} | awk -F'[_]' '{print $NF}'`
# rescore n-best list for all sets
if $eval_flag; then
tasks="dt05_simu dt05_real et05_simu et05_real"
else
tasks="dt05_simu dt05_real"
fi
for t in $tasks; do
steps/rnnlmrescore.sh --inv-acwt $lmw --N $nbest --use-phi true \
$rnnweight \
data/lang_test_${lm_suffix} \
data/lang_test_${rnnlm_suffix} \
data-fmllr-tri3b/${t}_$enhan \
$dir/decode_tgpr_5k_${t}_${enhan}_${lm_suffix} \
$dir/decode_tgpr_5k_${t}_${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest}
done
# calc wers for RNNLM results
local/chime4_calc_wers.sh $dir ${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest} $dir/graph_tgpr_5k \
> $dir/best_wer_${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest}.result
head -n 15 $dir/best_wer_${enhan}_${rnnlm_suffix}_w${rnnweight}_n${nbest}.result
fi