Blame view
egs/farsdat/s5/run.sh
7.69 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
#!/bin/bash # # Copyright 2014 University of Tehran (Author: Bagher BabaAli) # 2014 Brno University of Technology (Karel Vesely) # 2014 Johns Hopkins University (Daniel Povey) # # farsdat, description of the database: # http://www.assta.org/sst/SST-94-Vol-ll/cache/SST-94-VOL2-Chapter15-p20.pdf . ./cmd.sh [ -f path.sh ] && . ./path.sh set -e # Acoustic model parameters numLeavesTri1=2500 numGaussTri1=15000 numLeavesMLLT=2500 numGaussMLLT=15000 numLeavesSAT=2500 numGaussSAT=15000 numGaussUBM=400 numLeavesSGMM=7000 numGaussSGMM=9000 feats_nj=30 train_nj=30 decode_nj=30 echo ============================================================================ echo " Data & Lexicon & Language Preparation " echo ============================================================================ farsdat=/export/a09/bagher/developments/egs/farsdat/temp/database/ # @JHU local/farsdat_data_prep.sh $farsdat || exit 1 local/farsdat_prepare_dict.sh # Caution below: we insert optional-silence with probability 0.5, which is the # default, but this is probably not appropriate for this setup, since silence # appears also as a word in the dictionary and is scored. We could stop this # by using the option --sil-prob 0.0, but apparently this makes results worse. # (-> In sclite scoring the deletions of 'sil' are not scored as errors) utils/prepare_lang.sh --position-dependent-phones false --num-sil-states 3 \ data/local/dict "sil" data/local/lang_tmp data/lang local/farsdat_prepare_lm.sh echo ============================================================================ echo " MFCC Feature Extration & CMVN for Training and Test set " echo ============================================================================ # Now make MFCC features. mfccdir=mfcc for x in train dev test; do steps/make_mfcc.sh --cmd "$train_cmd" --nj $feats_nj data/$x exp/make_mfcc/$x $mfccdir steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir done echo ============================================================================ echo " MonoPhone Training & Decoding " echo ============================================================================ steps/train_mono.sh --nj "$train_nj" --cmd "$train_cmd" data/train data/lang exp/mono utils/mkgraph.sh data/lang_test_bg exp/mono exp/mono/graph steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/mono/graph data/dev exp/mono/decode_dev steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/mono/graph data/test exp/mono/decode_test echo ============================================================================ echo " tri1 : Deltas + Delta-Deltas Training & Decoding " echo ============================================================================ steps/align_si.sh --boost-silence 1.25 --nj "$train_nj" --cmd "$train_cmd" \ data/train data/lang exp/mono exp/mono_ali # Train tri1, which is deltas + delta-deltas, on train data. steps/train_deltas.sh --cmd "$train_cmd" \ $numLeavesTri1 $numGaussTri1 data/train data/lang exp/mono_ali exp/tri1 utils/mkgraph.sh data/lang_test_bg exp/tri1 exp/tri1/graph steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/tri1/graph data/dev exp/tri1/decode_dev steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/tri1/graph data/test exp/tri1/decode_test echo ============================================================================ echo " tri2 : LDA + MLLT Training & Decoding " echo ============================================================================ steps/align_si.sh --nj "$train_nj" --cmd "$train_cmd" \ data/train data/lang exp/tri1 exp/tri1_ali steps/train_lda_mllt.sh --cmd "$train_cmd" \ --splice-opts "--left-context=3 --right-context=3" \ $numLeavesMLLT $numGaussMLLT data/train data/lang exp/tri1_ali exp/tri2 utils/mkgraph.sh data/lang_test_bg exp/tri2 exp/tri2/graph steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/tri2/graph data/dev exp/tri2/decode_dev steps/decode.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/tri2/graph data/test exp/tri2/decode_test echo ============================================================================ echo " tri3 : LDA + MLLT + SAT Training & Decoding " echo ============================================================================ # Align tri2 system with train data. steps/align_si.sh --nj "$train_nj" --cmd "$train_cmd" \ --use-graphs true data/train data/lang exp/tri2 exp/tri2_ali # From tri2 system, train tri3 which is LDA + MLLT + SAT. steps/train_sat.sh --cmd "$train_cmd" \ $numLeavesSAT $numGaussSAT data/train data/lang exp/tri2_ali exp/tri3 utils/mkgraph.sh data/lang_test_bg exp/tri3 exp/tri3/graph steps/decode_fmllr.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/tri3/graph data/dev exp/tri3/decode_dev steps/decode_fmllr.sh --nj "$decode_nj" --cmd "$decode_cmd" \ exp/tri3/graph data/test exp/tri3/decode_test echo ============================================================================ echo " SGMM2 Training & Decoding " echo ============================================================================ steps/align_fmllr.sh --nj "$train_nj" --cmd "$train_cmd" \ data/train data/lang exp/tri3 exp/tri3_ali #exit 0 # From this point you can run DNN : local/nnet/run_dnn.sh steps/train_ubm.sh --cmd "$train_cmd" \ $numGaussUBM data/train data/lang exp/tri3_ali exp/ubm4 steps/train_sgmm2.sh --cmd "$train_cmd" $numLeavesSGMM $numGaussSGMM \ data/train data/lang exp/tri3_ali exp/ubm4/final.ubm exp/sgmm2_4 utils/mkgraph.sh data/lang_test_bg exp/sgmm2_4 exp/sgmm2_4/graph steps/decode_sgmm2.sh --nj "$decode_nj" --cmd "$decode_cmd"\ --transform-dir exp/tri3/decode_dev exp/sgmm2_4/graph data/dev \ exp/sgmm2_4/decode_dev steps/decode_sgmm2.sh --nj "$decode_nj" --cmd "$decode_cmd"\ --transform-dir exp/tri3/decode_test exp/sgmm2_4/graph data/test \ exp/sgmm2_4/decode_test echo ============================================================================ echo " MMI + SGMM2 Training & Decoding " echo ============================================================================ steps/align_sgmm2.sh --nj "$train_nj" --cmd "$train_cmd" \ --transform-dir exp/tri3_ali --use-graphs true --use-gselect true \ data/train data/lang exp/sgmm2_4 exp/sgmm2_4_ali steps/make_denlats_sgmm2.sh --nj "$train_nj" --sub-split "$train_nj" \ --acwt 0.2 --lattice-beam 10.0 --beam 18.0 \ --cmd "$decode_cmd" --transform-dir exp/tri3_ali \ data/train data/lang exp/sgmm2_4_ali exp/sgmm2_4_denlats steps/train_mmi_sgmm2.sh --acwt 0.2 --cmd "$decode_cmd" \ --transform-dir exp/tri3_ali --boost 0.1 --drop-frames true \ data/train data/lang exp/sgmm2_4_ali exp/sgmm2_4_denlats exp/sgmm2_4_mmi_b0.1 for iter in 1 2 3 4; do steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \ --transform-dir exp/tri3/decode_dev data/lang_test_bg data/dev \ exp/sgmm2_4/decode_dev exp/sgmm2_4_mmi_b0.1/decode_dev_it$iter steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \ --transform-dir exp/tri3/decode_test data/lang_test_bg data/test \ exp/sgmm2_4/decode_test exp/sgmm2_4_mmi_b0.1/decode_test_it$iter done echo ============================================================================ echo " DNN Hybrid Training & Decoding (Karel's recipe) " echo ============================================================================ local/nnet/run_dnn.sh echo ============================================================================ echo "Finished successfully on" `date` echo ============================================================================ exit 0 |