Blame view
egs/callhome_egyptian/s5/run.sh
7.08 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
#!/bin/bash # # Johns Hopkins University (Author : Gaurav Kumar, Daniel Povey) # Recipe for CallHome Egyptian Arabic # Made to integrate KALDI with JOSHUA for end-to-end ASR and SMT . ./cmd.sh . ./path.sh mfccdir=`pwd`/mfcc set -e # Specify the location of the speech files, the transcripts and the lexicon # These are passed off to other scripts in including the one for data and lexicon prep eca_speech=/export/corpora/LDC/LDC97S45 eca_transcripts=/export/corpora/LDC/LDC97T19 eca_lexicon=/export/corpora/LDC/LDC99L22 sup_speech=/export/corpora/LDC/LDC2002S37 sup_transcripts=/export/corpora/LDC/LDC2002T38 h5_speech=/export/corpora/LDC/LDC2002S22 h5_transcripts=/export/corpora/LDC/LDC2002T39 split=local/splits local/callhome_data_prep.sh $eca_speech $eca_transcripts $sup_speech $sup_transcripts $h5_speech $h5_transcripts local/callhome_prepare_dict.sh $eca_lexicon # Added c,j, v to the non silences phones manually utils/prepare_lang.sh data/local/dict "<unk>" data/local/lang data/lang # Make sure that you do not use your test and your dev sets to train the LM # Some form of cross validation is possible where you decode your dev/set based on an # LM that is trained on everything but that that conversation local/callhome_train_lms.sh $split local/callhome_create_test_lang.sh utils/fix_data_dir.sh data/local/data/train_all steps/make_mfcc.sh --nj 20 --cmd "$train_cmd" data/local/data/train_all exp/make_mfcc/train_all $mfccdir || exit 1; utils/fix_data_dir.sh data/local/data/train_all utils/validate_data_dir.sh data/local/data/train_all cp -r data/local/data/train_all data/train_all # Creating data partitions for the pipeline local/create_splits $split # Now compute CMVN stats for the train, dev and test subsets steps/compute_cmvn_stats.sh data/dev exp/make_mfcc/dev $mfccdir steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test $mfccdir steps/compute_cmvn_stats.sh data/sup exp/make_mfcc/sup $mfccdir steps/compute_cmvn_stats.sh data/h5 exp/make_mfcc/h5 $mfccdir steps/compute_cmvn_stats.sh data/train exp/make_mfcc/train $mfccdir # Again from Dan's recipe : Reduced monophone training data # Now-- there are 1.6 million utterances, and we want to start the monophone training # on relatively short utterances (easier to align), but not only the very shortest # ones (mostly uh-huh). So take the 100k shortest ones, and then take 10k random # utterances from those. steps/train_mono.sh --nj 10 --cmd "$train_cmd" \ data/train data/lang exp/mono0a steps/align_si.sh --nj 30 --cmd "$train_cmd" \ data/train data/lang exp/mono0a exp/mono0a_ali || exit 1; steps/train_deltas.sh --cmd "$train_cmd" \ 1000 10000 data/train data/lang exp/mono0a_ali exp/tri1 || exit 1; (utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri1/graph data/dev exp/tri1/decode_dev)& steps/align_si.sh --nj 30 --cmd "$train_cmd" \ data/train data/lang exp/tri1 exp/tri1_ali || exit 1; steps/train_deltas.sh --cmd "$train_cmd" \ 1400 15000 data/train data/lang exp/tri1_ali exp/tri2 || exit 1; ( utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1; steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri2/graph data/dev exp/tri2/decode_dev || exit 1; )& steps/align_si.sh --nj 30 --cmd "$train_cmd" \ data/train data/lang exp/tri2 exp/tri2_ali || exit 1; # Train tri3a, which is LDA+MLLT, on 100k data. steps/train_lda_mllt.sh --cmd "$train_cmd" \ --splice-opts "--left-context=3 --right-context=3" \ 1800 20000 data/train data/lang exp/tri2_ali exp/tri3a || exit 1; ( utils/mkgraph.sh data/lang_test exp/tri3a exp/tri3a/graph || exit 1; steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri3a/graph data/dev exp/tri3a/decode_dev || exit 1; )& # Next we'll use fMLLR and train with SAT (i.e. on # fMLLR features) steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ data/train data/lang exp/tri3a exp/tri3a_ali || exit 1; steps/train_sat.sh --cmd "$train_cmd" \ 2200 25000 data/train data/lang exp/tri3a_ali exp/tri4a || exit 1; ( utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri4a/graph data/dev exp/tri4a/decode_dev )& steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ data/train data/lang exp/tri4a exp/tri4a_ali || exit 1; # Reduce the number of gaussians steps/train_sat.sh --cmd "$train_cmd" \ 2600 30000 data/train data/lang exp/tri4a_ali exp/tri5a || exit 1; ( utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri5a/graph data/dev exp/tri5a/decode_dev )& ( steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri5a/graph data/test exp/tri5a/decode_test # Decode Supplement and H5 steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri5a/graph data/sup exp/tri5a/decode_sup steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ exp/tri5a/graph data/h5 exp/tri5a/decode_h5 )& dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \ --parallel-opts "--num-threads 16" --cmd "queue.pl --mem 1G") dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1 \ --parallel-opts "--gpu 1" --cmd "queue.pl --mem 1G") steps/nnet2/train_pnorm_ensemble.sh \ --mix-up 5000 --initial-learning-rate 0.008 --final-learning-rate 0.0008\ --num-hidden-layers 4 --pnorm-input-dim 2000 --pnorm-output-dim 200\ --cmd "$train_cmd" \ "${dnn_gpu_parallel_opts[@]}" \ --ensemble-size 4 --initial-beta 0.1 --final-beta 5 \ data/train data/lang exp/tri5a_ali exp/tri6a_dnn ( steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev exp/tri5a/graph data/dev exp/tri6a_dnn/decode_dev ) & # Decode test sets ( steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_test exp/tri5a/graph data/test exp/tri6a_dnn/decode_test steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_sup exp/tri5a/graph data/sup exp/tri6a_dnn/decode_sup steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parallel-opts " --num-threads 4" \ --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_h5 exp/tri5a/graph data/h5 exp/tri6a_dnn/decode_h5 ) & wait # (TDNN + iVectors) training # Note that the alignments used by run_tdnn.sh come from the pnorm-ensemble model # If you choose to skip ensemble training (which is slow), use the best # fmllr alignments available (tri4a) # You can modify this in local/nnet/run_tdnn.sh local/nnet3/run_tdnn.sh exit 0; |