Blame view
egs/iam/v2/run_end2end.sh
5.59 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
#!/bin/bash # Copyright 2017 Hossein Hadian set -e stage=0 nj=20 username= password= process_aachen_split=false overwrite=false # iam_database points to the database path on the JHU grid. If you have not # already downloaded the database you can set it to a local directory # like "data/download" and follow the instructions # in "local/prepare_data.sh" to download the database: iam_database=/export/corpora5/handwriting_ocr/IAM # wellington_database points to the database path on the JHU grid. The Wellington # corpus contains two directories WWC and WSC (Wellington Written and Spoken Corpus). # This corpus is of written NZ English that can be purchased here: # "https://www.victoria.ac.nz/lals/resources/corpora-default" wellington_database=/export/corpora5/Wellington/WWC/ . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. ## This relates to the queue. . ./path.sh . ./utils/parse_options.sh # e.g. this parses the above options # if supplied. ./local/check_tools.sh if [ $stage -le 0 ]; then if [ -f data/train/text ] && ! $overwrite; then echo "$0: Not processing, probably script have run from wrong stage" echo "Exiting with status 1 to avoid data corruption" exit 1; fi echo "$0: Preparing data..." local/prepare_data.sh --download-dir "$iam_database" \ --wellington-dir "$wellington_database" \ --username "$username" --password "$password" \ --process_aachen_split $process_aachen_split fi mkdir -p data/{train,test}/data if [ $stage -le 1 ]; then echo "$(date) stage 1: getting allowed image widths for e2e training..." image/get_image2num_frames.py --feat-dim 40 data/train # This will be needed for the next command # The next command creates a "allowed_lengths.txt" file in data/train # which will be used by local/make_features.py to enforce the images to # have allowed lengths. The allowed lengths will be spaced by 10% difference in length. image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train echo "$(date) Extracting features, creating feats.scp file" local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/train steps/compute_cmvn_stats.sh data/train || exit 1; for set in val test; do local/extract_features.sh --nj $nj --cmd "$cmd" --augment true \ --feat-dim 40 data/${set} steps/compute_cmvn_stats.sh data/${set} || exit 1; done utils/fix_data_dir.sh data/train fi if [ $stage -le 2 ]; then for set in train; do echo "$(date) stage 2: Performing augmentation, it will double training data" local/augment_data.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/${set} data/${set}_aug data steps/compute_cmvn_stats.sh data/${set}_aug || exit 1; done fi if [ $stage -le 3 ]; then echo "$0: Preparing BPE..." # getting non-silence phones. cut -d' ' -f2- data/train/text | \ python3 <( cat << "END" import os, sys, io; infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8'); output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8'); phone_dict = dict(); for line in infile: line_vect = line.strip().split(); for word in line_vect: for phone in word: phone_dict[phone] = phone; for phone in phone_dict.keys(): output.write(phone+ ' '); END ) > data/local/phones.txt cut -d' ' -f2- data/train/text > data/local/train_data.txt cat data/local/phones.txt data/local/train_data.txt | \ utils/lang/bpe/prepend_words.py | \ utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt for set in test train val train_aug; do cut -d' ' -f1 data/$set/text > data/$set/ids cut -d' ' -f2- data/$set/text | \ utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \ | sed 's/@@//g' > data/$set/bpe_text mv data/$set/text data/$set/text.old paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text done fi if [ $stage -le 4 ]; then echo "$0: Estimating a language model for decoding..." local/train_lm.sh fi if [ $stage -le 5 ]; then echo "$0: Preparing dictionary and lang..." local/prepare_dict.sh # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations. # So we set --sil-prob to 0.0 utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \ data/local/dict "<sil>" data/lang/temp data/lang silphonelist=`cat data/lang/phones/silence.csl` nonsilphonelist=`cat data/lang/phones/nonsilence.csl` local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang/phones.txt >data/lang/topo utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_big.arpa.gz \ data/local/dict/lexicon.txt data/lang utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \ data/lang data/lang_rescore_6g fi if [ $stage -le 6 ]; then echo "$0: Calling the flat-start chain recipe..." local/chain/run_e2e_cnn.sh --train_set train_aug fi if [ $stage -le 7 ]; then echo "$0: Aligning the training data using the e2e chain model..." steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ --use-gpu false \ --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ data/train_aug data/lang exp/chain/e2e_cnn_1b exp/chain/e2e_ali_train fi if [ $stage -le 8 ]; then echo "$0: Building a tree and training a regular chain model using the e2e alignments..." local/chain/run_cnn_e2eali.sh --train_set train_aug fi |