Blame view
egs/chime2/s5/run.sh
14 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 |
#!/bin/bash . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. ## This relates to the queue. # This is a shell script, but it's recommended that you run the commands one by # one by copying and pasting into the shell. case 0 in #goto here 1) ;; #here: esac #exit 1; #need wsj0 for the clean version and LMs #wsj0=/mnt/spdb/wall_street_journal wsj0=/export/corpora5/LDC/LDC93S6B local/clean_wsj0_data_prep.sh $wsj0 #reverb=/mnt/spdb/CHiME/chime2-wsj0/reverberated reverb=/export/corpora5/ChiME/chime2-wsj0/reverberated local/reverb_wsj0_data_prep.sh $reverb #noisy=/mnt/spdb/CHiME/chime2-wsj0/isolated noisy=/export/corpora5/ChiME/chime2-wsj0/isolated local/noisy_wsj0_data_prep.sh $noisy local/wsj_prepare_dict.sh || exit 1; utils/prepare_lang.sh data/local/dict "<SPOKEN_NOISE>" data/local/lang_tmp data/lang || exit 1; local/chime_format_data.sh || exit 1; # Now make MFCC features. # mfccdir should be some place with a largish disk where you # want to store MFCC features. mfccdir=mfcc for x in test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean train_si84_clean; do steps/make_mfcc.sh --nj 10 --cmd "$train_cmd" \ data/$x exp/make_mfcc/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1; done # Note: the --boost-silence option should probably be omitted by default # for normal setups. It doesn't always help. [it's to discourage non-silence # models from modeling silence.] mfccdir=mfcc for x in test_eval92_5k_noisy dev_dt_05_noisy train_si84_noisy; do steps/make_mfcc.sh --nj 10 --cmd "$train_cmd" \ data/$x exp/make_mfcc/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1; done mfccdir=mfcc for x in dev_dt_05_reverb train_si84_reverb; do steps/make_mfcc.sh --nj 10 --cmd "$train_cmd" \ data/$x exp/make_mfcc/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1; done # make fbank features mkdir -p data-fbank fbankdir=fbank for x in test_eval92_clean test_eval92_5k_clean dev_dt_05_clean dev_dt_20_clean train_si84_clean; do cp -r data/$x data-fbank/$x steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \ data-fbank/$x exp/make_fbank/$x $fbankdir || exit 1; done fbankdir=fbank for x in test_eval92_5k_noisy dev_dt_05_noisy train_si84_noisy; do cp -r data/$x data-fbank/$x steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \ data-fbank/$x exp/make_fbank/$x $fbankdir || exit 1; done fbankdir=fbank for x in dev_dt_05_reverb train_si84_reverb; do cp -r data/$x data-fbank/$x steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \ data-fbank/$x exp/make_fbank/$x $fbankdir || exit 1; done #begin train gmm systems using multi condition data #train_si84 = clean+reverb+noisy, for s in train_si84 ; do mkdir -p data/$s cp data/${s}_clean/spk2gender data/$s/ for x in text wav.scp; do cat data/${s}_clean/$x data/${s}_reverb/$x data/${s}_noisy/$x | sort -k1 > data/$s/$x done cat data/$s/wav.scp | awk '{print $1}' | perl -ane 'chop; m:^...:; print "$_ $& ";' > data/$s/utt2spk cat data/$s/utt2spk | utils/utt2spk_to_spk2utt.pl > data/$s/spk2utt done mfccdir=mfcc for x in train_si84; do steps/make_mfcc.sh --nj 10 --cmd "$train_cmd" \ data/$x exp/make_mfcc/$x $mfccdir || exit 1; steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1; done fbankdir=fbank for x in train_si84; do cp -r data/$x data-fbank/$x steps/make_fbank.sh --nj 10 --cmd "$train_cmd" \ data-fbank/$x exp/make_fbank/$x $fbankdir || exit 1; done steps/train_mono.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \ data/train_si84 data/lang exp/mono0a || exit 1; utils/mkgraph.sh data/lang_test_tgpr_5k exp/mono0a exp/mono0a/graph_tgpr_5k #steps/decode.sh --nj 8 \ # exp/mono0a/graph_tgpr_5k data/test_eval92_5k_clean exp/mono0a/decode_tgpr_eval92_5k_clean steps/decode.sh --nj 8 --cmd "$train_cmd" \ exp/mono0a/graph_tgpr_5k data/test_eval92_5k_noisy exp/mono0a/decode_tgpr_eval92_5k_noisy steps/align_si.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \ data/train_si84 data/lang exp/mono0a exp/mono0a_ali || exit 1; steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \ 2000 10000 data/train_si84 data/lang exp/mono0a_ali exp/tri1 || exit 1; utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri1 exp/tri1/graph_tgpr_5k || exit 1; #steps/decode.sh --nj 8 \ # exp/tri1/graph_tgpr data/test_eval92_5k_clean exp/tri1/decode_tgpr_eval92_5k_clean || exit 1; steps/decode.sh --nj 8 --cmd "$train_cmd" \ exp/tri1/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri1/decode_tgpr_eval92_5k_noisy || exit 1; # test various modes of LM rescoring (4 is the default one). # This is just confirming they're equivalent. #for mode in 1 2 3 4; do #steps/lmrescore.sh --mode $mode --cmd "$decode_cmd" data/lang_test_{tgpr,tg} \ # data/test_dev93 exp/tri1/decode_tgpr_dev93 exp/tri1/decode_tgpr_dev93_tg$mode || exit 1; #done # demonstrate how to get lattices that are "word-aligned" (arcs coincide with # words, with boundaries in the right place). #sil_label=`grep '!SIL' data/lang_test_tgpr/words.txt | awk '{print $2}'` #steps/word_align_lattices.sh --cmd "$train_cmd" --silence-label $sil_label \ # data/lang_test_tgpr exp/tri1/decode_tgpr_dev93 exp/tri1/decode_tgpr_dev93_aligned || exit 1; steps/align_si.sh --nj 10 --cmd "$train_cmd" \ data/train_si84 data/lang exp/tri1 exp/tri1_ali_si84 || exit 1; # Train tri2a, which is deltas + delta-deltas, on si84 data. steps/train_deltas.sh --cmd "$train_cmd" \ 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2a || exit 1; utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri2a exp/tri2a/graph_tgpr_5k || exit 1; #steps/decode.sh --nj 8 \ # exp/tri2a/graph_tgpr_5k data/test_eval92_5k_clean exp/tri2a/decode_tgpr_eval92_5k_clean || exit 1; steps/decode.sh --nj 8 --cmd "$train_cmd" \ exp/tri2a/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri2a/decode_tgpr_eval92_5k_noisy|| exit 1; #utils/mkgraph.sh data/lang_test_bg_5k exp/tri2a exp/tri2a/graph_bg5k #steps/decode.sh --nj 8 \ # exp/tri2a/graph_bg5k data/test_eval92_5k_clean exp/tri2a/decode_bg_eval92_5k_clean || exit 1; steps/train_lda_mllt.sh --cmd "$train_cmd" \ --splice-opts "--left-context=3 --right-context=3" \ 2500 15000 data/train_si84 data/lang exp/tri1_ali_si84 exp/tri2b || exit 1; utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri2b exp/tri2b/graph_tgpr_5k || exit 1; steps/decode.sh --nj 8 --cmd "$train_cmd" \ exp/tri2b/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri2b/decode_tgpr_eval92_5k_noisy || exit 1; #steps/decode.sh --nj 8 \ # exp/tri2b/graph_tgpr data/test_eval92_clean exp/tri2b/decode_tgpr_eval92_clean || exit 1; # Align tri2b system with si84 data. steps/align_si.sh --nj 10 --cmd "$train_cmd" \ --use-graphs true data/train_si84 data/lang exp/tri2b exp/tri2b_ali_si84 || exit 1; # From 2b system, train 3b which is LDA + MLLT + SAT. steps/train_sat.sh --cmd "$train_cmd" \ 2500 15000 data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri3b || exit 1; utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri3b exp/tri3b/graph_tgpr_5k || exit 1; steps/decode_fmllr.sh --nj 8 --cmd "$train_cmd" \ exp/tri3b/graph_tgpr_5k data/test_eval92_5k_noisy exp/tri3b/decode_tgpr_eval92_5k_noisy || exit 1; # From 3b multi-condition system, align noisy si84 data. steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \ data/train_si84_noisy data/lang exp/tri3b exp/tri3b_ali_si84_noisy || exit 1; steps/align_fmllr.sh --nj 10 --cmd "$train_cmd" \ data/dev_dt_05_noisy data/lang exp/tri3b exp/tri3b_ali_dev_dt_05 || exit 1; #begin training DNN-HMM system #only on noisy si84 . ./path.sh #RBM pretraining dir=exp/tri4a_dnn_pretrain $cuda_cmd $dir/_pretrain_dbn.log \ steps/nnet/pretrain_dbn.sh --nn-depth 7 --rbm-iter 3 data-fbank/train_si84_noisy $dir #BP dir=exp/tri4a_dnn ali=exp/tri3b_ali_si84_noisy ali_dev=exp/tri3b_ali_dev_dt_05 feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform dbn=exp/tri4a_dnn_pretrain/7.dbn $cuda_cmd $dir/_train_nnet.log \ steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \ data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1; utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri4a_dnn exp/tri4a_dnn/graph_tgpr_5k || exit 1; steps/nnet/decode.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \ exp/tri4a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1; #Retrain system using new ali, #this is essential #repeat this process for 3 times srcdir=exp/tri4a_dnn steps/nnet/align.sh --nj 10 \ data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali_si84_noisy || exit 1; steps/nnet/align.sh --nj 10 \ data-fbank/dev_dt_05_noisy data/lang $srcdir ${srcdir}_ali_dt_05_noisy || exit 1; #no need to do pretraining again dir=exp/tri5a_dnn ali=exp/tri4a_dnn_ali_si84_noisy ali_dev=exp/tri4a_dnn_ali_dt_05_noisy feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform dbn=exp/tri4a_dnn_pretrain/7.dbn $cuda_cmd $dir/_train_nnet.log \ steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \ data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1; utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri5a_dnn exp/tri5a_dnn/graph_tgpr_5k || exit 1; steps/nnet/decode.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \ exp/tri5a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1; srcdir=exp/tri5a_dnn steps/nnet/align.sh --nj 10 \ data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali_si84_noisy || exit 1; steps/nnet/align.sh --nj 10 \ data-fbank/dev_dt_05_noisy data/lang $srcdir ${srcdir}_ali_dt_05_noisy || exit 1; . ./path.sh dir=exp/tri6a_dnn ali=exp/tri5a_dnn_ali_si84_noisy ali_dev=exp/tri5a_dnn_ali_dt_05_noisy feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform dbn=exp/tri4a_dnn_pretrain/7.dbn $cuda_cmd $dir/_train_nnet.log \ steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \ data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1; utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri6a_dnn exp/tri6a_dnn/graph_tgpr_5k || exit 1; steps/nnet/decode.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \ exp/tri6a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1; srcdir=exp/tri6a_dnn steps/nnet/align.sh --nj 10 \ data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali_si84_noisy || exit 1; steps/nnet/align.sh --nj 10 \ data-fbank/dev_dt_05_noisy data/lang $srcdir ${srcdir}_ali_dt_05_noisy || exit 1; . ./path.sh dir=exp/tri7a_dnn ali=exp/tri6a_dnn_ali_si84_noisy ali_dev=exp/tri6a_dnn_ali_dt_05_noisy feature_transform=exp/tri4a_dnn_pretrain/final.feature_transform dbn=exp/tri4a_dnn_pretrain/7.dbn $cuda_cmd $dir/_train_nnet.log \ steps/nnet/train.sh --feature-transform $feature_transform --dbn $dbn --hid-layers 0 --learn-rate 0.008 \ data-fbank/train_si84_noisy data-fbank/dev_dt_05_noisy data/lang $ali $ali_dev $dir || exit 1; utils/mkgraph.sh data/lang_test_tgpr_5k exp/tri7a_dnn exp/tri7a_dnn/graph_tgpr_5k || exit 1; steps/nnet/decode.sh --nj 8 --acwt 0.10 --config conf/decode_dnn.config \ exp/tri7a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy || exit 1; # Sequence training using sMBR criterion, we do Stochastic-GD # with per-utterance updates. We use usually good acwt 0.1 # Lattices are re-generated after 1st epoch, to get faster convergence. dir=exp/tri7a_dnn_smbr srcdir=exp/tri7a_dnn acwt=0.1 # First we generate lattices and alignments: # awk -v FS="/" '{ NF_nosuffix=$NF; gsub(".gz","",NF_nosuffix); print NF_nosuffix gunzip -c "$0" |"; }' in # steps/nnet/make_denlats.sh steps/nnet/align.sh --nj 10 --cmd "$train_cmd" \ data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali || exit 1; steps/nnet/make_denlats.sh --nj 10 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \ data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_denlats || exit 1; # Re-train the DNN by 1 iteration of sMBR steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 1 --acwt $acwt --do-smbr true \ data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1 # Decode (reuse HCLG graph) for ITER in 1; do steps/nnet/decode.sh --nj 8 --cmd "$decode_cmd" --config conf/decode_dnn.config \ --nnet $dir/${ITER}.nnet --acwt $acwt \ exp/tri7a_dnn/graph_tgpr_5k data-fbank/dev_dt_05_noisy $dir/decode_tgpr_5k_dt_05_noisy_it${ITER} || exit 1; steps/nnet/decode.sh --nj 8 --cmd "$decode_cmd" --config conf/decode_dnn.config \ --nnet $dir/${ITER}.nnet --acwt $acwt \ exp/tri7a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy_it${ITER} || exit 1; done # Re-generate lattices, run 4 more sMBR iterations dir=exp/tri7a_dnn_smbr_i1lats srcdir=exp/tri7a_dnn_smbr acwt=0.1 # Generate lattices and alignments: steps/nnet/align.sh --nj 10 --cmd "$train_cmd" \ data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali || exit 1; steps/nnet/make_denlats.sh --nj 10 --cmd "$decode_cmd" --config conf/decode_dnn.config --acwt $acwt \ data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_denlats || exit 1; # Re-train the DNN by 1 iteration of sMBR steps/nnet/train_mpe.sh --cmd "$cuda_cmd" --num-iters 4 --acwt $acwt --do-smbr true \ data-fbank/train_si84_noisy data/lang $srcdir ${srcdir}_ali ${srcdir}_denlats $dir || exit 1 # Decode (reuse HCLG graph) for ITER in 1 2 3 4; do steps/nnet/decode.sh --nj 8 --cmd "$decode_cmd" --config conf/decode_dnn.config \ --nnet $dir/${ITER}.nnet --acwt $acwt \ exp/tri7a_dnn/graph_tgpr_5k data-fbank/dev_dt_05_noisy $dir/decode_tgpr_5k_dt_05_noisy_it${ITER} || exit 1; steps/nnet/decode.sh --nj 8 --cmd "$decode_cmd" --config conf/decode_dnn.config \ --nnet $dir/${ITER}.nnet --acwt $acwt \ exp/tri7a_dnn/graph_tgpr_5k data-fbank/test_eval92_5k_noisy $dir/decode_tgpr_5k_eval92_5k_noisy_it${ITER} || exit 1; done |