Blame view
egs/iam/v1/run.sh
6.84 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
#!/bin/bash # Copyright 2017 Chun Chieh Chang # 2017 Ashish Arora # 2017 Hossein Hadian set -e stage=0 nj=20 decode_gmm=false username= password= # iam_database points to the database path on the JHU grid. If you have not # already downloaded the database you can set it to a local directory # like "data/download" and follow the instructions # in "local/prepare_data.sh" to download the database: iam_database=/export/corpora5/handwriting_ocr/IAM # wellington_database points to the database path on the JHU grid. The Wellington # corpus contains two directories WWC and WSC (Wellington Written and Spoken Corpus). # This corpus is of written NZ English that can be purchased here: # "https://www.victoria.ac.nz/lals/resources/corpora-default" wellington_database=/export/corpora5/Wellington/WWC/ train_set=train_aug process_aachen_split=false overwrite=false . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. ## This relates to the queue. . ./path.sh . ./utils/parse_options.sh # e.g. this parses the above options # if supplied. ./local/check_tools.sh if [ $stage -le 0 ]; then if [ -f data/train/text ] && ! $overwrite; then echo "$0: Not processing, probably script have run from wrong stage" echo "Exiting with status 1 to avoid data corruption" exit 1; fi echo "$0: Preparing data..." local/prepare_data.sh --download-dir "$iam_database" \ --wellington-dir "$wellington_database" \ --username "$username" --password "$password" \ --process_aachen_split $process_aachen_split fi mkdir -p data/{train,test,val}/data if [ $stage -le 1 ]; then echo "$0: $(date) stage 1: getting allowed image widths for e2e training..." image/get_image2num_frames.py --feat-dim 40 data/train # This will be needed for the next command # The next command creates a "allowed_lengths.txt" file in data/train # which will be used by local/make_features.py to enforce the images to # have allowed lengths. The allowed lengths will be spaced by 10% difference in length. image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train echo "$0: $(date) Extracting features, creating feats.scp file" local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/train steps/compute_cmvn_stats.sh data/train || exit 1; for set in val test; do local/extract_features.sh --nj $nj --cmd "$cmd" --augment true \ --feat-dim 40 data/${set} steps/compute_cmvn_stats.sh data/${set} || exit 1; done utils/fix_data_dir.sh data/train fi if [ $stage -le 2 ]; then for set in train; do echo "$0: $(date) stage 2: Performing augmentation, it will double training data" local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data steps/compute_cmvn_stats.sh data/${set}_aug || exit 1; done fi if [ $stage -le 3 ]; then echo "$0: Estimating a language model for decoding..." # We do this stage before dict preparation because prepare_dict.sh # generates the lexicon from pocolm's wordlist local/train_lm.sh --vocab-size 50k fi if [ $stage -le 4 ]; then echo "$0: Preparing dictionary and lang..." # This is for training. Use a large vocab size, e.g. 500k to include all the # training words: local/prepare_dict.sh --vocab-size 500k --dir data/local/dict # this is for training utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.95 \ data/local/dict "<unk>" data/lang/temp data/lang silphonelist=`cat data/lang/phones/silence.csl` nonsilphonelist=`cat data/lang/phones/nonsilence.csl` local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang/phones.txt >data/lang/topo # This is for decoding. We use a 50k lexicon to be consistent with the papers # reporting WERs on IAM: local/prepare_dict.sh --vocab-size 50k --dir data/local/dict_50k # this is for decoding utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.95 \ data/local/dict_50k "<unk>" data/lang_test/temp data/lang_test utils/format_lm.sh data/lang_test data/local/local_lm/data/arpa/3gram_big.arpa.gz \ data/local/dict_50k/lexicon.txt data/lang_test echo "$0: Preparing the unk model for open-vocab decoding..." utils/lang/make_unk_lm.sh --ngram-order 4 --num-extra-ngrams 7500 \ data/local/dict_50k exp/unk_lang_model utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 \ --unk-fst exp/unk_lang_model/unk_fst.txt \ data/local/dict_50k "<unk>" data/lang_unk/temp data/lang_unk silphonelist=`cat data/lang/phones/silence.csl` nonsilphonelist=`cat data/lang/phones/nonsilence.csl` local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang_unk/phones.txt >data/lang_unk/topo cp data/lang_test/G.fst data/lang_unk/G.fst fi if [ $stage -le 4 ]; then steps/train_mono.sh --nj $nj --cmd $cmd --totgauss 10000 data/$train_set \ data/lang exp/mono fi if [ $stage -le 5 ] && $decode_gmm; then utils/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph steps/decode.sh --nj $nj --cmd $cmd exp/mono/graph data/test \ exp/mono/decode_test fi if [ $stage -le 6 ]; then steps/align_si.sh --nj $nj --cmd $cmd data/$train_set data/lang \ exp/mono exp/mono_ali steps/train_deltas.sh --cmd $cmd 500 20000 data/$train_set data/lang \ exp/mono_ali exp/tri fi if [ $stage -le 7 ] && $decode_gmm; then utils/mkgraph.sh data/lang_test exp/tri exp/tri/graph steps/decode.sh --nj $nj --cmd $cmd exp/tri/graph data/test \ exp/tri/decode_test fi if [ $stage -le 8 ]; then steps/align_si.sh --nj $nj --cmd $cmd data/$train_set data/lang \ exp/tri exp/tri_ali steps/train_lda_mllt.sh --cmd $cmd \ --splice-opts "--left-context=3 --right-context=3" 500 20000 \ data/$train_set data/lang exp/tri_ali exp/tri2 fi if [ $stage -le 9 ] && $decode_gmm; then utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph steps/decode.sh --nj $nj --cmd $cmd exp/tri2/graph \ data/test exp/tri2/decode_test fi if [ $stage -le 10 ]; then steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \ data/$train_set data/lang exp/tri2 exp/tri2_ali steps/train_sat.sh --cmd $cmd 500 20000 \ data/$train_set data/lang exp/tri2_ali exp/tri3 fi if [ $stage -le 11 ] && $decode_gmm; then utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph steps/decode_fmllr.sh --nj $nj --cmd $cmd exp/tri3/graph \ data/test exp/tri3/decode_test fi if [ $stage -le 12 ]; then steps/align_fmllr.sh --nj $nj --cmd $cmd --use-graphs true \ data/$train_set data/lang exp/tri3 exp/tri3_ali fi if [ $stage -le 13 ]; then local/chain/run_cnn.sh --lang-test lang_unk --train_set $train_set fi if [ $stage -le 14 ]; then local/chain/run_cnn_chainali.sh --chain-model-dir exp/chain/cnn_1a --stage 2 --train_set $train_set fi |