Blame view
egs/timit/s5/run.sh
9.84 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 |
#!/bin/bash # # Copyright 2013 Bagher BabaAli, # 2014-2017 Brno University of Technology (Author: Karel Vesely) # # TIMIT, description of the database: # http://perso.limsi.fr/lamel/TIMIT_NISTIR4930.pdf # # Hon and Lee paper on TIMIT, 1988, introduces mapping to 48 training phonemes, # then re-mapping to 39 phonemes for scoring: # http://repository.cmu.edu/cgi/viewcontent.cgi?article=2768&context=compsci # . ./cmd.sh [ -f path.sh ] && . ./path.sh set -e # Acoustic model parameters numLeavesTri1=2500 numGaussTri1=15000 numLeavesMLLT=2500 numGaussMLLT=15000 numLeavesSAT=2500 numGaussSAT=15000 numGaussUBM=400 numLeavesSGMM=7000 numGaussSGMM=9000 feats_nj=10 train_nj=30 decode_nj=5 echo ============================================================================ echo " Data & Lexicon & Language Preparation " echo ============================================================================ #timit=/export/corpora5/LDC/LDC93S1/timit/TIMIT # @JHU timit=/mnt/matylda2/data/TIMIT/timit # @BUT local/timit_data_prep.sh $timit || exit 1 local/timit_prepare_dict.sh # Caution below: we remove optional silence by setting "--sil-prob 0.0", # in TIMIT the silence appears also as a word in the dictionary and is scored. utils/prepare_lang.sh --sil-prob 0.0 --position-dependent-phones false --num-sil-states 3 \ data/local/dict "sil" data/local/lang_tmp data/lang local/timit_format_data.sh echo ============================================================================ echo " MFCC Feature Extration & CMVN for Training and Test set " echo ============================================================================ # Now make MFCC features. mfccdir=mfcc for x in train dev test; do steps/make_mfcc.sh --cmd "$train_cmd" --nj $feats_nj data/$x exp/make_mfcc/$x $mfccdir steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir done echo ============================================================================ echo " MonoPhone Training & Decoding " echo ============================================================================ steps/train_mono.sh --nj "$train_nj" --cmd "$train_cmd" data/train data/lang exp/mono utils/mkgraph.sh data/lang_test_bg exp/mono exp/mono/graph steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/mono/graph data/dev exp/mono/decode_dev steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/mono/graph data/test exp/mono/decode_test echo ============================================================================ echo " tri1 : Deltas + Delta-Deltas Training & Decoding " echo ============================================================================ steps/align_si.sh --boost-silence 1.25 --nj "$train_nj" --cmd "$train_cmd" \ data/train data/lang exp/mono exp/mono_ali # Train tri1, which is deltas + delta-deltas, on train data. steps/train_deltas.sh --cmd "$train_cmd" \ $numLeavesTri1 $numGaussTri1 data/train data/lang exp/mono_ali exp/tri1 utils/mkgraph.sh data/lang_test_bg exp/tri1 exp/tri1/graph steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/tri1/graph data/dev exp/tri1/decode_dev steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/tri1/graph data/test exp/tri1/decode_test echo ============================================================================ echo " tri2 : LDA + MLLT Training & Decoding " echo ============================================================================ steps/align_si.sh --nj "$train_nj" --cmd "$train_cmd" \ data/train data/lang exp/tri1 exp/tri1_ali steps/train_lda_mllt.sh --cmd "$train_cmd" \ --splice-opts "--left-context=3 --right-context=3" \ $numLeavesMLLT $numGaussMLLT data/train data/lang exp/tri1_ali exp/tri2 utils/mkgraph.sh data/lang_test_bg exp/tri2 exp/tri2/graph steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/tri2/graph data/dev exp/tri2/decode_dev steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/tri2/graph data/test exp/tri2/decode_test echo ============================================================================ echo " tri3 : LDA + MLLT + SAT Training & Decoding " echo ============================================================================ # Align tri2 system with train data. steps/align_si.sh --nj "$train_nj" --cmd "$train_cmd" \ --use-graphs true data/train data/lang exp/tri2 exp/tri2_ali # From tri2 system, train tri3 which is LDA + MLLT + SAT. steps/train_sat.sh --cmd "$train_cmd" \ $numLeavesSAT $numGaussSAT data/train data/lang exp/tri2_ali exp/tri3 utils/mkgraph.sh data/lang_test_bg exp/tri3 exp/tri3/graph steps/decode_fmllr.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/tri3/graph data/dev exp/tri3/decode_dev steps/decode_fmllr.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/tri3/graph data/test exp/tri3/decode_test echo ============================================================================ echo " SGMM2 Training & Decoding " echo ============================================================================ steps/align_fmllr.sh --nj "$train_nj" --cmd "$train_cmd" \ data/train data/lang exp/tri3 exp/tri3_ali exit 0 # From this point you can run Karel's DNN : local/nnet/run_dnn.sh steps/train_ubm.sh --cmd "$train_cmd" \ $numGaussUBM data/train data/lang exp/tri3_ali exp/ubm4 steps/train_sgmm2.sh --cmd "$train_cmd" $numLeavesSGMM $numGaussSGMM \ data/train data/lang exp/tri3_ali exp/ubm4/final.ubm exp/sgmm2_4 utils/mkgraph.sh data/lang_test_bg exp/sgmm2_4 exp/sgmm2_4/graph steps/decode_sgmm2.sh --nj "$decode_nj" --cmd "$decode_cmd"\ --transform-dir exp/tri3/decode_dev exp/sgmm2_4/graph data/dev \ exp/sgmm2_4/decode_dev steps/decode_sgmm2.sh --nj "$decode_nj" --cmd "$decode_cmd"\ --transform-dir exp/tri3/decode_test exp/sgmm2_4/graph data/test \ exp/sgmm2_4/decode_test echo ============================================================================ echo " MMI + SGMM2 Training & Decoding " echo ============================================================================ steps/align_sgmm2.sh --nj "$train_nj" --cmd "$train_cmd" \ --transform-dir exp/tri3_ali --use-graphs true --use-gselect true \ data/train data/lang exp/sgmm2_4 exp/sgmm2_4_ali steps/make_denlats_sgmm2.sh --nj "$train_nj" --sub-split "$train_nj" \ --acwt 0.2 --lattice-beam 10.0 --beam 18.0 \ --cmd "$decode_cmd" --transform-dir exp/tri3_ali \ data/train data/lang exp/sgmm2_4_ali exp/sgmm2_4_denlats steps/train_mmi_sgmm2.sh --acwt 0.2 --cmd "$decode_cmd" \ --transform-dir exp/tri3_ali --boost 0.1 --drop-frames true \ data/train data/lang exp/sgmm2_4_ali exp/sgmm2_4_denlats exp/sgmm2_4_mmi_b0.1 for iter in 1 2 3 4; do steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \ --transform-dir exp/tri3/decode_dev data/lang_test_bg data/dev \ exp/sgmm2_4/decode_dev exp/sgmm2_4_mmi_b0.1/decode_dev_it$iter steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \ --transform-dir exp/tri3/decode_test data/lang_test_bg data/test \ exp/sgmm2_4/decode_test exp/sgmm2_4_mmi_b0.1/decode_test_it$iter done echo ============================================================================ echo " DNN Hybrid Training & Decoding " echo ============================================================================ # DNN hybrid system training parameters dnn_mem_reqs="--mem 1G" dnn_extra_opts="--num_epochs 20 --num-epochs-extra 10 --add-layers-period 1 --shrink-interval 3" steps/nnet2/train_tanh.sh --mix-up 5000 --initial-learning-rate 0.015 \ --final-learning-rate 0.002 --num-hidden-layers 2 \ --num-jobs-nnet "$train_nj" --cmd "$train_cmd" "${dnn_train_extra_opts[@]}" \ data/train data/lang exp/tri3_ali exp/tri4_nnet [ ! -d exp/tri4_nnet/decode_dev ] && mkdir -p exp/tri4_nnet/decode_dev decode_extra_opts=(--num-threads 6) steps/nnet2/decode.sh --cmd "$decode_cmd" --nj "$decode_nj" "${decode_extra_opts[@]}" \ --transform-dir exp/tri3/decode_dev exp/tri3/graph data/dev \ exp/tri4_nnet/decode_dev | tee exp/tri4_nnet/decode_dev/decode.log [ ! -d exp/tri4_nnet/decode_test ] && mkdir -p exp/tri4_nnet/decode_test steps/nnet2/decode.sh --cmd "$decode_cmd" --nj "$decode_nj" "${decode_extra_opts[@]}" \ --transform-dir exp/tri3/decode_test exp/tri3/graph data/test \ exp/tri4_nnet/decode_test | tee exp/tri4_nnet/decode_test/decode.log echo ============================================================================ echo " System Combination (DNN+SGMM) " echo ============================================================================ for iter in 1 2 3 4; do local/score_combine.sh --cmd "$decode_cmd" \ data/dev data/lang_test_bg exp/tri4_nnet/decode_dev \ exp/sgmm2_4_mmi_b0.1/decode_dev_it$iter exp/combine_2/decode_dev_it$iter local/score_combine.sh --cmd "$decode_cmd" \ data/test data/lang_test_bg exp/tri4_nnet/decode_test \ exp/sgmm2_4_mmi_b0.1/decode_test_it$iter exp/combine_2/decode_test_it$iter done echo ============================================================================ echo " DNN Hybrid Training & Decoding (Karel's recipe) " echo ============================================================================ local/nnet/run_dnn.sh #local/nnet/run_autoencoder.sh : an example, not used to build any system, echo ============================================================================ echo " Getting Results [see RESULTS file] " echo ============================================================================ bash RESULTS dev bash RESULTS test echo ============================================================================ echo "Finished successfully on" `date` echo ============================================================================ exit 0 |