run_gmm.sh
9.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
#!/bin/bash
# Copyright 2016 University of Sheffield (Jon Barker, Ricard Marxer)
# Inria (Emmanuel Vincent)
# Mitsubishi Electric Research Labs (Shinji Watanabe)
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
# This script is made from the kaldi recipe of the 2nd CHiME Challenge Track 2
# made by Chao Weng
. ./path.sh
. ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
## This relates to the queue.
# Config:
nj=30
stage=0 # resume training with --stage=N
train=noisy # noisy data multi-condition training
eval_flag=true # make it true when the evaluation data are released
add_enhanced_data=true # make it true when you want to add enhanced data into training set
decode_only=false # if true, it wouldn't train a model again and will only do decoding
. utils/parse_options.sh || exit 1;
# This is a shell script, but it's recommended that you run the commands one by
# one by copying and pasting into the shell.
if [ $# -ne 3 ]; then
printf "\nUSAGE: %s <enhancement method> <enhanced speech directory> <chime4 root directory>\n\n" `basename $0`
echo "First argument specifies a unique name for different enhancement method"
echo "Second argument specifies the directory of enhanced wav files"
echo "Third argument specifies the CHiME4 root directory"
exit 1;
fi
# set enhanced data
enhan=$1
enhan_data=$2
# set chime4 data
chime4_data=$3
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
set -u
set -o pipefail
# check whether run_init is executed
if [ ! -d data/lang ]; then
echo "error, execute local/run_init.sh, first"
exit 1;
fi
if $decode_only; then
# check data/loca/data
mdir=`pwd`
if [ ! -d $mdir/data/local/data ]; then
echo "error, set $mdir correctly"
exit 1;
elif [ ! -d data/local/data ]; then
echo "copy $mdir/data/local/data"
mkdir -p data/local
cp -r $mdir/data/local/data data/local/
fi
# check gmm model
if [ ! -d $mdir/exp/tri3b_tr05_multi_${train} ]; then
echo "error, set $mdir correctly"
exit 1;
elif [ ! -d exp/tri3b_tr05_multi_${train} ]; then
echo "copy $mdir/exp/tri3b_tr05_multi_${train}"
mkdir -p exp
cp -r $mdir/exp/tri3b_tr05_multi_${train} exp/
fi
# process for enhanced data
if [ ! -d data/dt05_real_$enhan ] || [ ! -d data/et05_real_$enhan ]; then
local/real_enhan_chime4_data_prep.sh $enhan $enhan_data
local/simu_enhan_chime4_data_prep.sh $enhan $enhan_data
fi
stage=6
fi
#######################
#### training #########
if [ $stage -le 1 ]; then
# process for distant talking speech for real and simulation data
local/real_noisy_chime4_data_prep.sh $chime4_data
local/simu_noisy_chime4_data_prep.sh $chime4_data
fi
# process for enhanced data
if [ $stage -le 1 ]; then
if [ ! -d data/dt05_real_$enhan ] || [ ! -d data/et05_real_$enhan ]; then
local/real_enhan_chime4_data_prep.sh $enhan $enhan_data
local/simu_enhan_chime4_data_prep.sh $enhan $enhan_data
fi
fi
# Copy enhanced data for 1ch and 2ch experiments
if [ $stage -le 2 ] && [[ "$PWD" != *s5_6ch* ]]; then
beamformed=0
# First remove empty files generated from previous stage
for d in tr05_{real,simu}_$enhan; do
[ -d data/$d ] && rm -rf data/$d && \
echo "remove empty directory $d"
done
if [[ "$enhan" == *beamformit_2mics* ]] && [ -d ../s5_6ch/data/tr05_real_beamformit_5mics ]; then
echo "copy tr05_{real,simu}_beamformit_5mics from ../s5_6ch/data/"
cp -r ../s5_6ch/data/tr05_real_beamformit_5mics data/tr05_real_beamformit_2mics
cp -r ../s5_6ch/data/tr05_simu_beamformit_5mics data/tr05_simu_beamformit_2mics
beamformed=1
elif [ -d ../s5_6ch/data/tr05_real_$enhan ]; then
echo "copy enhanced training data ${d} from ../s5_6ch/data/"
cp -r ../s5_6ch/data/tr05_real_$enhan data/
cp -r ../s5_6ch/data/tr05_simu_$enhan data/
beamformed=1
elif [[ "$enhan" == *isolated_1ch_track* ]]; then
beamformed=1
fi
if [ $beamformed == 0 ]; then
echo "no such directory tr05_{real,simu}_{beamformit_5mics,blstm_gev,single_BLSTMmask}"
echo "They are generated by run_beamform_6ch_track.sh in ../s5_6ch/run.sh, please execute it first" && \
exit 1;
fi
fi
# Now make MFCC features for clean, close, and noisy data
# mfccdir should be some place with a largish disk where you
# want to store MFCC features.
mfccdir=mfcc
if [ $stage -le 3 ]; then
if $add_enhanced_data; then
if $eval_flag; then
tasks="tr05_real_${train} dt05_real_${train} tr05_simu_${train} dt05_simu_${train} et05_real_${train} et05_simu_${train} tr05_real_$enhan tr05_simu_$enhan"
else
tasks="tr05_real_${train} dt05_real_${train} tr05_simu_${train} dt05_simu_${train} tr05_real_$enhan tr05_simu_$enhan"
fi
else
if $eval_flag; then
tasks="tr05_real_${train} dt05_real_${train} tr05_simu_${train} dt05_simu_${train} et05_real_${train} et05_simu_${train}"
else
tasks="tr05_real_${train} dt05_real_${train} tr05_simu_${train} dt05_simu_${train}"
fi
fi
for x in $tasks; do
steps/make_mfcc.sh --nj 8 --cmd "$train_cmd" \
data/$x exp/make_mfcc/$x $mfccdir
steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir
done
fi
# make mixed training set from real and simulation training data
# multi = simu + real
# Note that we are combining enhanced training data with noisy training data
if [ $stage -le 4 ]; then
if $add_enhanced_data; then
utils/combine_data.sh data/tr05_multi_${train} data/tr05_simu_${train} data/tr05_real_${train} data/tr05_simu_$enhan data/tr05_real_$enhan
else
utils/combine_data.sh data/tr05_multi_${train} data/tr05_simu_${train} data/tr05_real_${train}
fi
utils/combine_data.sh data/dt05_multi_${train} data/dt05_simu_${train} data/dt05_real_${train}
if $eval_flag; then
utils/combine_data.sh data/et05_multi_${train} data/et05_simu_${train} data/et05_real_${train}
fi
fi
# training models for noisy data
if [ $stage -le 5 ]; then
nspk=`wc -l data/tr05_multi_${train}/spk2utt | awk '{print $1}'`
if [ $nj -gt $nspk ]; then
nj2=$nspk
else
nj2=$nj
fi
# training monophone model
steps/train_mono.sh --boost-silence 1.25 --nj $nj2 --cmd "$train_cmd" \
data/tr05_multi_${train} data/lang exp/mono0a_tr05_multi_${train}
steps/align_si.sh --boost-silence 1.25 --nj $nj2 --cmd "$train_cmd" \
data/tr05_multi_${train} data/lang exp/mono0a_tr05_multi_${train} exp/mono0a_ali_tr05_multi_${train}
# training triphone model with lda mllt features
steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
2000 10000 data/tr05_multi_${train} data/lang exp/mono0a_ali_tr05_multi_${train} exp/tri1_tr05_multi_${train}
steps/align_si.sh --nj $nj2 --cmd "$train_cmd" \
data/tr05_multi_${train} data/lang exp/tri1_tr05_multi_${train} exp/tri1_ali_tr05_multi_${train}
steps/train_lda_mllt.sh --cmd "$train_cmd" \
--splice-opts "--left-context=3 --right-context=3" \
2500 15000 data/tr05_multi_${train} data/lang exp/tri1_ali_tr05_multi_${train} exp/tri2b_tr05_multi_${train}
steps/align_si.sh --nj $nj2 --cmd "$train_cmd" \
--use-graphs true data/tr05_multi_${train} data/lang exp/tri2b_tr05_multi_${train} exp/tri2b_ali_tr05_multi_${train}
steps/train_sat.sh --cmd "$train_cmd" \
2500 15000 data/tr05_multi_${train} data/lang exp/tri2b_ali_tr05_multi_${train} exp/tri3b_tr05_multi_${train}
utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri3b_tr05_multi_${train} exp/tri3b_tr05_multi_${train}/graph_tgpr_5k
fi
#### training done ####
#######################
#####################
#### tsting #########
# Now make MFCC features for enhanced data
# mfccdir should be some place with a largish disk where you
# want to store MFCC features.
mfccdir=mfcc/$enhan
if [ $stage -le 6 ]; then
if $eval_flag; then
tasks="dt05_real_$enhan dt05_simu_$enhan et05_real_$enhan et05_simu_$enhan"
else
tasks="dt05_real_$enhan dt05_simu_$enhan"
fi
for x in $tasks; do
if [ ! -e data/$x/feats.scp ]; then
steps/make_mfcc.sh --nj 8 --cmd "$train_cmd" \
data/$x exp/make_mfcc/$x $mfccdir
steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir
fi
done
fi
# make mixed training set from real and simulation enhanced data
# multi = simu + real
if [ $stage -le 7 ]; then
if [ ! -d data/dt05_multi_$enhan ]; then
utils/combine_data.sh data/dt05_multi_$enhan data/dt05_simu_$enhan data/dt05_real_$enhan
if $eval_flag; then
utils/combine_data.sh data/et05_multi_$enhan data/et05_simu_$enhan data/et05_real_$enhan
fi
fi
fi
# decode enhanced speech using AMs trained with enhanced data
if [ $stage -le 8 ]; then
steps/decode_fmllr.sh --nj 4 --num-threads 3 --cmd "$decode_cmd" \
exp/tri3b_tr05_multi_${train}/graph_tgpr_5k data/dt05_real_$enhan exp/tri3b_tr05_multi_${train}/decode_tgpr_5k_dt05_real_$enhan &
steps/decode_fmllr.sh --nj 4 --num-threads 3 --cmd "$decode_cmd" \
exp/tri3b_tr05_multi_${train}/graph_tgpr_5k data/dt05_simu_$enhan exp/tri3b_tr05_multi_${train}/decode_tgpr_5k_dt05_simu_$enhan &
if $eval_flag; then
steps/decode_fmllr.sh --nj 4 --num-threads 3 --cmd "$decode_cmd" \
exp/tri3b_tr05_multi_${train}/graph_tgpr_5k data/et05_real_$enhan exp/tri3b_tr05_multi_${train}/decode_tgpr_5k_et05_real_$enhan &
steps/decode_fmllr.sh --nj 4 --num-threads 3 --cmd "$decode_cmd" \
exp/tri3b_tr05_multi_${train}/graph_tgpr_5k data/et05_simu_$enhan exp/tri3b_tr05_multi_${train}/decode_tgpr_5k_et05_simu_$enhan &
fi
wait;
fi
# scoring
if [ $stage -le 9 ]; then
# decoded results of enhanced speech using AMs trained with enhanced data
local/chime4_calc_wers.sh exp/tri3b_tr05_multi_${train} $enhan exp/tri3b_tr05_multi_${train}/graph_tgpr_5k \
> exp/tri3b_tr05_multi_${train}/best_wer_$enhan.result
head -n 15 exp/tri3b_tr05_multi_${train}/best_wer_$enhan.result
fi
#### tsting done ####
#####################
echo "`basename $0` Done."