prepare_lm.sh
900 Bytes
#!/bin/bash
#
# Copyright 2014 Nickolay V. Shmyrev
# Apache 2.0
if [ -f path.sh ]; then . ./path.sh; fi
arpa_lm=db/cantab-TEDLIUM/cantab-TEDLIUM-pruned.lm3.gz
[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
rm -rf data/lang_nosp_test
cp -r data/lang_nosp data/lang_nosp_test
gunzip -c "$arpa_lm" | arpa2fst --disambig-symbol=#0 \
--read-symbol-table=data/lang_nosp_test/words.txt - data/lang_nosp_test/G.fst
echo "$0: Checking how stochastic G is (the first of these numbers should be small):"
fstisstochastic data/lang_nosp_test/G.fst
utils/validate_lang.pl data/lang_nosp_test || exit 1;
if [ ! -d data/lang_nosp_rescore ]; then
big_arpa_lm=db/cantab-TEDLIUM/cantab-TEDLIUM-unpruned.lm4.gz
[ ! -f $big_arpa_lm ] && echo No such file $big_arpa_lm && exit 1;
utils/build_const_arpa_lm.sh $big_arpa_lm data/lang_nosp_test data/lang_nosp_rescore || exit 1;
fi
exit 0;