Blame view
egs/bentham/v1/run_end2end.sh
4.79 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
#!/bin/bash # Copyright 2018 Ashish Arora (Johns Hopkins University) # 2018 Desh Raj (Johns Hopkins University) set -e stage=0 nj=20 # bentham_hwr_database points to the official database path on the JHU grid. If you have not # already downloaded the data, you will have to first download it and then name the Images # and Ground Truth zipped files as images.zip and gt.zip. Then, point the path below to the # location where your zipped files are present on the grid. bentham_hwr_database=/export/corpora5/handwriting_ocr/hwr1/ICDAR-HTR-Competition-2015 # bentham_text_database points to the database path on the JHU grid. # It contains all of the written works of Bentham, and can be used to train # an LM for the HWR task. We have provided a script which downloads the data # and saves it to the location provided below. bentham_text_corpus=/export/corpora5/handwriting_ocr/hwr1/ICDAR-HTR-Competition-2015/Bentham-Text . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system. ## This relates to the queue. . ./path.sh . ./utils/parse_options.sh # e.g. this parses the above options # if supplied. ./local/check_tools.sh if [ $stage -le 0 ]; then echo "$0: Preparing data..." local/prepare_data.sh --database-dir $bentham_hwr_database \ --text-corpus-dir $bentham_text_corpus fi if [ $stage -le 1 ]; then image/get_image2num_frames.py data/train # This will be needed for the next command # The next command creates a "allowed_lengths.txt" file in data/train # which will be used by local/make_features.py to enforce the images to # have allowed lengths. The allowed lengths will be spaced by 10% difference in length. image/get_allowed_lengths.py --frame-subsampling-factor 4 10 data/train echo "$(date) Extracting features, creating feats.scp file" for dataset in train val test; do local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim 40 data/$dataset steps/compute_cmvn_stats.sh data/$dataset done utils/fix_data_dir.sh data/train fi if [ $stage -le 2 ]; then echo "$0: Preparing BPE..." # getting non-silence phones. cut -d' ' -f2- data/train/text | \ python3 <( cat << "END" import os, sys, io; infile = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8'); output = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8'); phone_dict = dict(); for line in infile: line_vect = line.strip().split(); for word in line_vect: for phone in word: phone_dict[phone] = phone; for phone in phone_dict.keys(): output.write(phone+ ' '); END ) > data/local/phones.txt cut -d' ' -f2- data/train/text > data/local/train_data.txt cat data/local/phones.txt data/local/train_data.txt | \ utils/lang/bpe/prepend_words.py | \ utils/lang/bpe/learn_bpe.py -s 700 > data/local/bpe.txt for set in test train val; do cut -d' ' -f1 data/$set/text > data/$set/ids cut -d' ' -f2- data/$set/text | \ utils/lang/bpe/prepend_words.py | utils/lang/bpe/apply_bpe.py -c data/local/bpe.txt \ | sed 's/@@//g' > data/$set/bpe_text mv data/$set/text data/$set/text.old paste -d' ' data/$set/ids data/$set/bpe_text > data/$set/text done fi if [ $stage -le 3 ]; then echo "$0: Estimating a language model for decoding..." local/train_lm.sh fi if [ $stage -le 4 ]; then echo "$0: Preparing dictionary and lang..." local/prepare_dict.sh # This recipe uses byte-pair encoding, the silences are part of the words' pronunciations. # So we set --sil-prob to 0.0 utils/prepare_lang.sh --num-sil-states 4 --num-nonsil-states 8 --sil-prob 0.0 --position-dependent-phones false \ data/local/dict "<sil>" data/lang/temp data/lang silphonelist=`cat data/lang/phones/silence.csl` nonsilphonelist=`cat data/lang/phones/nonsilence.csl` local/gen_topo.py 8 4 4 $nonsilphonelist $silphonelist data/lang/phones.txt >data/lang/topo utils/lang/bpe/add_final_optional_silence.sh --final-sil-prob 0.5 data/lang utils/format_lm.sh data/lang data/local/local_lm/data/arpa/6gram_big.arpa.gz \ data/local/dict/lexicon.txt data/lang utils/build_const_arpa_lm.sh data/local/local_lm/data/arpa/6gram_unpruned.arpa.gz \ data/lang data/lang_rescore_6g fi if [ $stage -le 5 ]; then echo "$0: Calling the flat-start chain recipe..." local/chain/run_e2e_cnn.sh fi if [ $stage -le 6 ]; then echo "$0: Aligning the training data using the e2e chain model..." steps/nnet3/align.sh --nj 50 --cmd "$cmd" \ --use-gpu false \ --scale-opts '--transition-scale=1.0 --self-loop-scale=1.0 --acoustic-scale=1.0' \ data/train data/lang exp/chain/e2e_cnn_1a exp/chain/e2e_ali_train fi if [ $stage -le 7 ]; then echo "$0: Building a tree and training a regular chain model using the e2e alignments..." local/chain/run_cnn_e2eali.sh fi |