Blame view
egs/hub4_english/s5/local/format_lms.sh
1.59 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
#!/bin/bash # # Copyright 2014 Nickolay V. Shmyrev # Apache 2.0 [ -f ./path.sh ] && . ./path.sh set -e -o pipefail -u lang_suffix=_test local_lm_dir=data/local/local_lm . utils/parse_options.sh #arpa_lm=$local_lm_dir/data/arpa/4gram.arpa.gz small_arpa_lm=$local_lm_dir/data/arpa/4gram_small.arpa.gz big_arpa_lm=$local_lm_dir/data/arpa/4gram_big.arpa.gz for f in $small_arpa_lm $big_arpa_lm data/lang_nosp/words.txt; do [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1 done set -e cp -rT data/lang_nosp/ data/lang_nosp${lang_suffix} if [ -f data/lang_nosp${lang_suffix}/G.fst ] && [ data/lang_nosp${lang_suffix}/G.fst -nt $small_arpa_lm ]; then echo "$0: not regenerating data/lang_nosp${lang_suffix}/G.fst as it already exists and " echo ".. is newer than the source LM." else arpa2fst --disambig-symbol=#0 --read-symbol-table=data/lang_nosp/words.txt \ "gunzip -c $small_arpa_lm|" data/lang_nosp${lang_suffix}/G.fst echo "$0: Checking how stochastic G is (the first of these numbers should be small):" fstisstochastic data/lang_nosp${lang_suffix}/G.fst || true utils/validate_lang.pl --skip-determinization-check data/lang_nosp${lang_suffix} fi if [ -f data/lang_nosp${lang_suffix}_rescore/G.carpa ] && [ data/lang_nosp${lang_suffix}_rescore/G.carpa -nt $big_arpa_lm ] && \ [ data/lang_nosp${lang_suffix}_rescore/G.carpa -nt data/lang_nosp/words.txt ]; then echo "$0: not regenerating data/lang_nosp_rescore/ as it seems to already by up to date." else utils/build_const_arpa_lm.sh $big_arpa_lm data/lang_nosp \ data/lang_nosp${lang_suffix}_rescore || exit 1; fi exit 0; |