Blame view

egs/chime2/s5/run.sh 14 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
  #!/bin/bash
  
  . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
             ## This relates to the queue.
  
  # This is a shell script, but it's recommended that you run the commands one by
  # one by copying and pasting into the shell.
  
  case 0 in    #goto here
      1)
  ;;           #here:
  esac
  
  #exit 1;
  #need wsj0 for the clean version and LMs
  #wsj0=/mnt/spdb/wall_street_journal
  wsj0=/export/corpora5/LDC/LDC93S6B
  local/clean_wsj0_data_prep.sh $wsj0
  
  #reverb=/mnt/spdb/CHiME/chime2-wsj0/reverberated 
  reverb=/export/corpora5/ChiME/chime2-wsj0/reverberated
  local/reverb_wsj0_data_prep.sh $reverb 
  
  #noisy=/mnt/spdb/CHiME/chime2-wsj0/isolated
  noisy=/export/corpora5/ChiME/chime2-wsj0/isolated
  local/noisy_wsj0_data_prep.sh $noisy 
  
  local/wsj_prepare_dict.sh || exit 1;
  
  utils/prepare_lang.sh data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp data/lang || exit 1;
  
  local/chime_format_data.sh || exit 1;
  
  # Now make MFCC features.
  # mfccdir should be some place with a largish disk where you
  # want to store MFCC features.
  
  mfccdir=mfcc
  for x in test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean train_si84_clean; do 
   steps/make_mfcc.sh --nj 10 --cmd "$train_cmd" \
     data/$x exp/make_mfcc/$x $mfccdir || exit 1;
   steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
  done
  
  # Note: the --boost-silence option should probably be omitted by default
  # for normal setups.  It doesn't always help. [it's to discourage non-silence
  # models from modeling silence.]
  mfccdir=mfcc
  for x in test_eval92_5k_noisy dev_dt_05_noisy train_si84_noisy; do 
   steps/make_mfcc.sh --nj 10 --cmd "$train_cmd" \
     data/$x exp/make_mfcc/$x $mfccdir || exit 1;
   steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
  done
  
  mfccdir=mfcc
  for x in dev_dt_05_reverb train_si84_reverb; do 
   steps/make_mfcc.sh --nj 10 --cmd "$train_cmd" \
     data/$x exp/make_mfcc/$x $mfccdir || exit 1;
   steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
  done
  
  # make fbank features
  mkdir -p data-fbank
  fbankdir=fbank
  for x in test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean train_si84_clean; do 
   cp -r data/$x data-fbank/$x
   steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \
     data-fbank/$x exp/make_fbank/$x $fbankdir || exit 1;
  done
  
  fbankdir=fbank
  for x in test_eval92_5k_noisy dev_dt_05_noisy train_si84_noisy; do 
   cp -r data/$x data-fbank/$x
   steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \
     data-fbank/$x exp/make_fbank/$x $fbankdir || exit 1;
  done
  
  fbankdir=fbank
  for x in dev_dt_05_reverb train_si84_reverb; do 
   cp -r data/$x data-fbank/$x
   steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \
     data-fbank/$x exp/make_fbank/$x $fbankdir || exit 1;
  done
  
  #begin train gmm systems using multi condition data
  #train_si84 = clean+reverb+noisy, 
  for s in train_si84 ; do 
    mkdir -p data/$s
    cp data/${s}_clean/spk2gender data/$s/ 
    for x in text wav.scp; do
      cat data/${s}_clean/$x data/${s}_reverb/$x data/${s}_noisy/$x | sort -k1 > data/$s/$x 
    done
    cat data/$s/wav.scp | awk '{print $1}' | perl -ane 'chop; m:^...:; print "$_ $&
  ";' > data/$s/utt2spk 
    cat data/$s/utt2spk | utils/utt2spk_to_spk2utt.pl > data/$s/spk2utt 
  done
  
  mfccdir=mfcc
  for x in train_si84; do 
   steps/make_mfcc.sh --nj 10 --cmd "$train_cmd" \
     data/$x exp/make_mfcc/$x $mfccdir || exit 1;
   steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
  done
  
  fbankdir=fbank
  for x in train_si84; do 
   cp -r data/$x data-fbank/$x
   steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \
     data-fbank/$x exp/make_fbank/$x $fbankdir || exit 1;
  done
  
  
  steps/train_mono.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \
    data/train_si84 data/lang exp/mono0a || exit 1;
  
  
  
  utils/mkgraph.sh data/lang_test_tgpr_5k exp/mono0a exp/mono0a/graph_tgpr_5k
  #steps/decode.sh --nj 8  \
  #  exp/mono0a/graph_tgpr_5k data/test_eval92_5k_clean exp/mono0a/decode_tgpr_eval92_5k_clean
  steps/decode.sh --nj 8  --cmd "$train_cmd" \
    exp/mono0a/graph_tgpr_5k data/test_eval92_5k_noisy exp/mono0a/decode_tgpr_eval92_5k_noisy
   
  
  steps/align_si.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \
     data/train_si84 data/lang exp/mono0a exp/mono0a_ali || exit 1;
  
  steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \
      2000 10000 data/train_si84 data/lang exp/mono0a_ali exp/tri1 || exit 1;
  utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri1 exp/tri1/graph_tgpr_5k || exit 1;
  
  #steps/decode.sh --nj 8 \
  #  exp/tri1/graph_tgpr data/test_eval92_5k_clean exp/tri1/decode_tgpr_eval92_5k_clean || exit 1;
  steps/decode.sh --nj 8 --cmd "$train_cmd" \
    exp/tri1/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri1/decode_tgpr_eval92_5k_noisy || exit 1;
  
  
  # test various modes of LM rescoring (4 is the default one).
  # This is just confirming they're equivalent.
  #for mode in 1 2 3 4; do
  #steps/lmrescore.sh --mode $mode --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \
  #  data/test_dev93 exp/tri1/decode_tgpr_dev93 exp/tri1/decode_tgpr_dev93_tg$mode  || exit 1;
  #done
  
  # demonstrate how to get lattices that are "word-aligned" (arcs coincide with
  # words, with boundaries in the right place).
  #sil_label=`grep '!SIL' data/lang_test_tgpr/words.txt | awk '{print $2}'`
  #steps/word_align_lattices.sh --cmd "$train_cmd" --silence-label $sil_label \
  #  data/lang_test_tgpr exp/tri1/decode_tgpr_dev93 exp/tri1/decode_tgpr_dev93_aligned || exit 1;
  
  steps/align_si.sh --nj 10 --cmd "$train_cmd" \
    data/train_si84 data/lang exp/tri1 exp/tri1_ali_si84 || exit 1;
  
  # Train tri2a, which is deltas + delta-deltas, on si84 data.
  steps/train_deltas.sh --cmd "$train_cmd" \
    2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2a || exit 1;
  
  utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri2a exp/tri2a/graph_tgpr_5k || exit 1;
  
  #steps/decode.sh --nj 8  \
  #  exp/tri2a/graph_tgpr_5k data/test_eval92_5k_clean exp/tri2a/decode_tgpr_eval92_5k_clean || exit 1;
  steps/decode.sh --nj 8 --cmd "$train_cmd" \
    exp/tri2a/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri2a/decode_tgpr_eval92_5k_noisy|| exit 1;
  
  #utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a exp/tri2a/graph_bg5k
  #steps/decode.sh --nj 8 \
  #  exp/tri2a/graph_bg5k data/test_eval92_5k_clean exp/tri2a/decode_bg_eval92_5k_clean || exit 1;
  
  steps/train_lda_mllt.sh --cmd "$train_cmd" \
     --splice-opts "--left-context=3 --right-context=3" \
     2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b || exit 1;
  
  utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri2b exp/tri2b/graph_tgpr_5k || exit 1;
  steps/decode.sh --nj 8 --cmd "$train_cmd" \
    exp/tri2b/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri2b/decode_tgpr_eval92_5k_noisy || exit 1;
  #steps/decode.sh --nj 8 \
  #  exp/tri2b/graph_tgpr data/test_eval92_clean exp/tri2b/decode_tgpr_eval92_clean || exit 1;
  
  
  # Align tri2b system with si84 data.
  steps/align_si.sh  --nj 10 --cmd "$train_cmd" \
    --use-graphs true data/train_si84 data/lang exp/tri2b exp/tri2b_ali_si84  || exit 1;
  
  
  # From 2b system, train 3b which is LDA + MLLT + SAT.
  steps/train_sat.sh --cmd "$train_cmd" \
    2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri3b || exit 1;
  utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri3b exp/tri3b/graph_tgpr_5k || exit 1;
  steps/decode_fmllr.sh --nj 8 --cmd "$train_cmd" \
    exp/tri3b/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri3b/decode_tgpr_eval92_5k_noisy || exit 1;
  
  
  # From 3b multi-condition system, align noisy si84 data.
  steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \
    data/train_si84_noisy data/lang exp/tri3b exp/tri3b_ali_si84_noisy || exit 1;
  
  steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \
    data/dev_dt_05_noisy data/lang exp/tri3b exp/tri3b_ali_dev_dt_05 || exit 1;
  
  #begin training DNN-HMM system
  #only on noisy si84 
  
  . ./path.sh
  #RBM pretraining
  dir=exp/tri4a_dnn_pretrain
  $cuda_cmd $dir/_pretrain_dbn.log \
    steps/nnet/pretrain_dbn.sh --nn-depth 7 --rbm-iter 3 data-fbank/train_si84_noisy $dir
  #BP 
  dir=exp/tri4a_dnn
  ali=exp/tri3b_ali_si84_noisy
  ali_dev=exp/tri3b_ali_dev_dt_05
  feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform
  dbn=exp/tri4a_dnn_pretrain/7.dbn
  $cuda_cmd $dir/_train_nnet.log \
    steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
    data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1;
  
  utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri4a_dnn exp/tri4a_dnn/graph_tgpr_5k || exit 1;
  steps/nnet/decode.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \
    exp/tri4a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1;
  
  #Retrain system using new ali,
  #this is essential 
  #repeat this process for 3 times 
  srcdir=exp/tri4a_dnn
  steps/nnet/align.sh --nj 10 \
    data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali_si84_noisy || exit 1;
  steps/nnet/align.sh --nj 10 \
    data-fbank/dev_dt_05_noisy data/lang $srcdir ${srcdir}_ali_dt_05_noisy || exit 1;
  
  #no need to do pretraining again
  dir=exp/tri5a_dnn
  ali=exp/tri4a_dnn_ali_si84_noisy
  ali_dev=exp/tri4a_dnn_ali_dt_05_noisy
  feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform
  dbn=exp/tri4a_dnn_pretrain/7.dbn
  $cuda_cmd $dir/_train_nnet.log \
    steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
    data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1;
  
  utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri5a_dnn exp/tri5a_dnn/graph_tgpr_5k || exit 1;
  steps/nnet/decode.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \
    exp/tri5a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1;
  
  
  srcdir=exp/tri5a_dnn
  steps/nnet/align.sh --nj 10 \
    data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali_si84_noisy || exit 1;
  steps/nnet/align.sh --nj 10 \
    data-fbank/dev_dt_05_noisy data/lang $srcdir ${srcdir}_ali_dt_05_noisy || exit 1;
  
  . ./path.sh
  dir=exp/tri6a_dnn
  ali=exp/tri5a_dnn_ali_si84_noisy
  ali_dev=exp/tri5a_dnn_ali_dt_05_noisy
  feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform
  dbn=exp/tri4a_dnn_pretrain/7.dbn
  $cuda_cmd $dir/_train_nnet.log \
    steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
    data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1;
  
  utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri6a_dnn exp/tri6a_dnn/graph_tgpr_5k || exit 1;
  steps/nnet/decode.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \
    exp/tri6a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1;
  
  srcdir=exp/tri6a_dnn
  steps/nnet/align.sh --nj 10 \
    data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali_si84_noisy || exit 1;
  steps/nnet/align.sh --nj 10 \
    data-fbank/dev_dt_05_noisy data/lang $srcdir ${srcdir}_ali_dt_05_noisy || exit 1;
  
  . ./path.sh
  dir=exp/tri7a_dnn
  ali=exp/tri6a_dnn_ali_si84_noisy
  ali_dev=exp/tri6a_dnn_ali_dt_05_noisy
  feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform
  dbn=exp/tri4a_dnn_pretrain/7.dbn
  $cuda_cmd $dir/_train_nnet.log \
    steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \
    data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1;
  
  utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri7a_dnn exp/tri7a_dnn/graph_tgpr_5k || exit 1;
  steps/nnet/decode.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \
    exp/tri7a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1;
  
  # Sequence training using sMBR criterion, we do Stochastic-GD 
  # with per-utterance updates. We use usually good acwt 0.1
  # Lattices are re-generated after 1st epoch, to get faster convergence.
  dir=exp/tri7a_dnn_smbr
  srcdir=exp/tri7a_dnn
  acwt=0.1
  
  # First we generate lattices and alignments:
  # awk -v FS="/" '{ NF_nosuffix=$NF; gsub(".gz","",NF_nosuffix); print NF_nosuffix gunzip -c "$0" |"; }' in 
  # steps/nnet/make_denlats.sh
  steps/nnet/align.sh --nj 10 --cmd "$train_cmd" \
      data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali || exit 1;
  steps/nnet/make_denlats.sh --nj 10 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
      data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_denlats || exit 1;
  
  # Re-train the DNN by 1 iteration of sMBR 
  steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 1 --acwt $acwt --do-smbr true \
      data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
  # Decode (reuse HCLG graph)
  for ITER in 1; do
      steps/nnet/decode.sh --nj 8 --cmd "$decode_cmd" --config conf/decode_dnn.config \
      --nnet $dir/${ITER}.nnet --acwt $acwt \
      exp/tri7a_dnn/graph_tgpr_5k data-fbank/dev_dt_05_noisy $dir/decode_tgpr_5k_dt_05_noisy_it${ITER} || exit 1;
      steps/nnet/decode.sh --nj 8 --cmd "$decode_cmd" --config conf/decode_dnn.config \
      --nnet $dir/${ITER}.nnet --acwt $acwt \
      exp/tri7a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy_it${ITER} || exit 1;
  done 
  
  # Re-generate lattices, run 4 more sMBR iterations
  dir=exp/tri7a_dnn_smbr_i1lats
  srcdir=exp/tri7a_dnn_smbr
  acwt=0.1
  
  # Generate lattices and alignments:
  steps/nnet/align.sh --nj 10 --cmd "$train_cmd" \
      data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali || exit 1;
  steps/nnet/make_denlats.sh --nj 10 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \
      data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_denlats || exit 1;
  
  # Re-train the DNN by 1 iteration of sMBR 
  steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt --do-smbr true \
      data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1
  
      # Decode (reuse HCLG graph)
  for ITER in 1 2 3 4; do
      steps/nnet/decode.sh --nj 8 --cmd "$decode_cmd" --config conf/decode_dnn.config \
      --nnet $dir/${ITER}.nnet --acwt $acwt \
      exp/tri7a_dnn/graph_tgpr_5k data-fbank/dev_dt_05_noisy $dir/decode_tgpr_5k_dt_05_noisy_it${ITER} || exit 1;
      steps/nnet/decode.sh --nj 8 --cmd "$decode_cmd" --config conf/decode_dnn.config \
      --nnet $dir/${ITER}.nnet --acwt $acwt \
      exp/tri7a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy_it${ITER} || exit 1;
  done