Blame view
egs/mgb5/s5/run.sh
5.73 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
#!/bin/bash # Copyright 2019 QCRI (Author:Ahmed Ali) # Apache 2.0 stage=0 # initialization PATH . ./path.sh || die "path.sh expected"; # initialization commands . ./cmd.sh . ./utils/parse_options.sh set -e -o pipefail nj=16 dev_nj=16 if [ $stage -le 1 ]; then echo "Preparing data and training language models" local/prepare_data.sh local/prepare_dict.sh utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang data/lang local/prepare_lm.sh fi if [ $stage -le 2 ]; then # Feature extraction for x in train dev; do steps/make_mfcc.sh --nj $nj --cmd "$train_cmd" data/$x exp/make_mfcc/$x mfcc steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x mfcc done fi if [ $stage -le 3 ]; then ### Monophone echo "Starting monophone training." utils/subset_data_dir.sh data/train 1000 data/train.1k steps/train_mono.sh --nj $nj --cmd "$train_cmd" data/train.1k data/lang exp/mono echo "Mono training done." ( echo "Decoding the dev set using monophone models." utils/mkgraph.sh data/lang_test exp/mono exp/mono/graph steps/decode.sh --config conf/decode.config --nj $dev_nj --cmd "$decode_cmd" \ exp/mono/graph data/dev exp/mono/decode_dev echo "Monophone decoding done." ) & fi if [ $stage -le 4 ]; then ### Triphone echo "Starting triphone training." steps/align_si.sh --nj $nj --cmd "$train_cmd" \ data/train data/lang exp/mono exp/mono_ali steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" \ 3200 30000 data/train data/lang exp/mono_ali exp/tri1 echo "Triphone training done." ( echo "Decoding the dev set using triphone models." utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph steps/decode.sh --nj $dev_nj --cmd "$decode_cmd" \ exp/tri1/graph data/dev exp/tri1/decode_dev steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ data/lang_test/ data/lang_big/ data/dev \ exp/tri1/decode_dev exp/tri1/decode_dev.rescored echo "Triphone decoding done." ) & fi if [ $stage -le 5 ]; then ## Triphones + delta delta # Training echo "Starting (larger) triphone training." steps/align_si.sh --nj $nj --cmd "$train_cmd" --use-graphs true \ data/train data/lang exp/tri1 exp/tri1_ali steps/train_deltas.sh --cmd "$train_cmd" \ 4200 40000 data/train data/lang exp/tri1_ali exp/tri2a echo "Triphone (large) training done." ( echo "Decoding the dev set using triphone(large) models." utils/mkgraph.sh data/lang_test exp/tri2a exp/tri2a/graph steps/decode.sh --nj $dev_nj --cmd "$decode_cmd" \ exp/tri2a/graph data/dev exp/tri2a/decode_dev steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ data/lang_test/ data/lang_big/ data/dev \ exp/tri2a/decode_dev exp/tri2a/decode_dev.rescored echo "Triphone(large) decoding done." ) & fi if [ $stage -le 6 ]; then ### Triphone + LDA and MLLT # Training echo "Starting LDA+MLLT training." steps/align_si.sh --nj $nj --cmd "$train_cmd" \ data/train data/lang exp/tri2a exp/tri2a_ali steps/train_lda_mllt.sh --cmd "$train_cmd" \ --splice-opts "--left-context=3 --right-context=3" \ 4200 40000 data/train data/lang exp/tri2a_ali exp/tri2b echo "LDA+MLLT training done." ( echo "Decoding the dev set using LDA+MLLT models." utils/mkgraph.sh data/lang_test exp/tri2b exp/tri2b/graph steps/decode.sh --nj $dev_nj --cmd "$decode_cmd" \ exp/tri2b/graph data/dev exp/tri2b/decode_dev steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ data/lang_test/ data/lang_big/ data/dev \ exp/tri2b/decode_dev exp/tri2b/decode_dev.rescored echo "LDA+MLLT decoding done." ) & fi if [ $stage -le 7 ]; then ### Triphone + LDA and MLLT + SAT and FMLLR # Training echo "Starting SAT+FMLLR training." steps/align_si.sh --nj $nj --cmd "$train_cmd" \ --use-graphs true data/train data/lang exp/tri2b exp/tri2b_ali steps/train_sat.sh --cmd "$train_cmd" 4200 40000 \ data/train data/lang exp/tri2b_ali exp/tri3b echo "SAT+FMLLR training done." ( echo "Decoding the dev set using SAT+FMLLR models." utils/mkgraph.sh data/lang_test exp/tri3b exp/tri3b/graph steps/decode_fmllr.sh --nj $dev_nj --cmd "$decode_cmd" \ exp/tri3b/graph data/dev exp/tri3b/decode_dev steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ data/lang_test/ data/lang_big/ data/dev \ exp/tri3b/decode_dev exp/tri3b/decode_dev.rescored echo "SAT+FMLLR decoding done." ) & fi if [ $stage -le 8 ]; then echo "Starting SGMM training." steps/align_fmllr.sh --nj $nj --cmd "$train_cmd" \ data/train data/lang exp/tri3b exp/tri3b_ali steps/train_ubm.sh --cmd "$train_cmd" \ 600 data/train data/lang exp/tri3b_ali exp/ubm5b2 steps/train_sgmm2.sh --cmd "$train_cmd" \ 5200 12000 data/train data/lang exp/tri3b_ali exp/ubm5b2/final.ubm exp/sgmm2_5b2 echo "SGMM training done." ( echo "Decoding the dev set using SGMM models" # Graph compilation utils/mkgraph.sh data/lang_test exp/sgmm2_5b2 exp/sgmm2_5b2/graph utils/mkgraph.sh data/lang_big/ exp/sgmm2_5b2 exp/sgmm2_5b2/graph_big steps/decode_sgmm2.sh --nj $dev_nj --cmd "$decode_cmd" \ --transform-dir exp/tri3b/decode_dev \ exp/sgmm2_5b2/graph data/dev exp/sgmm2_5b2/decode_dev steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \ data/lang_test/ data/lang_big/ data/dev \ exp/sgmm2_5b2/decode_dev exp/sgmm2_5b2/decode_dev.rescored steps/decode_sgmm2.sh --nj $dev_nj --cmd "$decode_cmd" \ --transform-dir exp/tri3b/decode_dev \ exp/sgmm2_5b2/graph_big data/dev exp/sgmm2_5b2/decode_dev.big echo "SGMM decoding done." ) & fi wait; time bash -x ./local/chain/run_tdnn.sh &> chain_run_tdnn.log #score for x in exp/chain/*/decode* exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done | sort -k2 -n > RESULTS |