Blame view
egs/gale_arabic/s5b/run.sh
3.83 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
#!/bin/bash -e # Copyright 2014 QCRI (author: Ahmed Ali) # Apache 2.0 num_jobs=120 num_decode_jobs=40 decode_gmm=true stage=0 overwrite=false dir1=/export/corpora/LDC/LDC2013S02/ dir2=/export/corpora/LDC/LDC2013S07/ dir3=/export/corpora/LDC/LDC2014S07/ text1=/export/corpora/LDC/LDC2013T17/ text2=/export/corpora/LDC/LDC2013T04/ text3=/export/corpora/LDC/LDC2014T17/ galeData=GALE . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. ## This relates to the queue. . ./path.sh . ./utils/parse_options.sh # e.g. this parses the above options # if supplied. if [ $stage -le 0 ]; then if [ -f data/train/text ] && ! $overwrite; then echo "$0: Not processing, probably script have run from wrong stage" echo "Exiting with status 1 to avoid data corruption" exit 1; fi echo "$0: Preparing data..." local/prepare_data.sh --dir1 $dir1 --dir2 $dir2 --dir3 $dir3 \ --text1 $text1 --text2 $text2 --text3 $text3 echo "$0: Preparing lexicon and LM..." local/prepare_dict.sh utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang data/lang local/prepare_lm.sh utils/format_lm.sh data/lang data/local/lm/lm.gz \ data/local/dict/lexicon.txt data/lang_test fi mfccdir=mfcc if [ $stage -le 1 ]; then echo "$0: Preparing the test and train feature files..." for x in train test ; do steps/make_mfcc.sh --cmd "$train_cmd" --nj $num_jobs \ data/$x exp/make_mfcc/$x $mfccdir utils/fix_data_dir.sh data/$x # some files fail to get mfcc for many reasons steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir done fi if [ $stage -le 2 ]; then echo "$0: creating sub-set and training monophone system" utils/subset_data_dir.sh data/train 10000 data/train.10K || exit 1; steps/train_mono.sh --nj 40 --cmd "$train_cmd" \ data/train.10K data/lang exp/mono || exit 1; fi if [ $stage -le 3 ]; then echo "$0: Aligning data using monophone system" steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ data/train data/lang exp/mono exp/mono_ali || exit 1; echo "$0: training triphone system with delta features" steps/train_deltas.sh --cmd "$train_cmd" \ 2500 30000 data/train data/lang exp/mono_ali exp/tri1 || exit 1; fi if [ $stage -le 4 ] && $decode_gmm; then utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ exp/tri1/graph data/test exp/tri1/decode fi if [ $stage -le 5 ]; then echo "$0: Aligning data and retraining and realigning with lda_mllt" steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ data/train data/lang exp/tri1 exp/tri1_ali || exit 1; steps/train_lda_mllt.sh --cmd "$train_cmd" 4000 50000 \ data/train data/lang exp/tri1_ali exp/tri2b || exit 1; fi if [ $stage -le 6 ] && $decode_gmm; then utils/mkgraph.sh data/lang_test exp/tri2b exp/tri2b/graph steps/decode.sh --nj $num_decode_jobs --cmd "$decode_cmd" \ exp/tri2b/graph data/test exp/tri2b/decode fi if [ $stage -le 7 ]; then echo "$0: Aligning data and retraining and realigning with sat_basis" steps/align_si.sh --nj $num_jobs --cmd "$train_cmd" \ data/train data/lang exp/tri2b exp/tri2b_ali || exit 1; steps/train_sat_basis.sh --cmd "$train_cmd" \ 5000 100000 data/train data/lang exp/tri2b_ali exp/tri3b || exit 1; steps/align_fmllr.sh --nj $num_jobs --cmd "$train_cmd" \ data/train data/lang exp/tri3b exp/tri3b_ali || exit 1; fi if [ $stage -le 8 ] && $decode_gmm; then utils/mkgraph.sh data/lang_test exp/tri3b exp/tri3b/graph steps/decode_fmllr.sh --nj $num_decode_jobs --cmd \ "$decode_cmd" exp/tri3b/graph data/test exp/tri3b/decode fi if [ $stage -le 9 ]; then echo "$0: Training a regular chain model using the e2e alignments..." local/chain/run_tdnn.sh fi echo "$0: training succedded" exit 0 |