Blame view
egs/mgb5/s5/local/prepare_lm.sh
810 Bytes
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
#!/bin/bash # Copyright 2019 QCRI (Author: Ahmed Ali) # Apache 2.0 set -e -o pipefail # To create G.fst from ARPA language model . ./path.sh || die "path.sh expected"; local/train_lms_srilm.sh --train-text data/train/text data/ data/srilm # for basic decoding, let's use only a trigram LM [ -d data/lang_test/ ] && rm -rf data/lang_test cp -R data/lang data/lang_test lm=data/srilm/3gram.me.gz utils/format_lm.sh data/lang_test $lm data/local/dict/lexicon.txt data/lang_test # for decoding using bigger, we build 4-gram using the same transcription text [ -d data/lang_big ] && rm -rf data/lang_big cp -R data/lang data/lang_big lm=data/srilm/4gram.me.gz utils/format_lm.sh data/lang_big $lm data/local/dict/lexicon.txt data/lang_big utils/build_const_arpa_lm.sh $lm data/lang_big data/lang_big exit 0; |