Blame view
egs/aspire/s5/local/fisher_create_test_lang.sh
1.62 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
#!/bin/bash # if [ -f path.sh ]; then . ./path.sh; fi mkdir -p data/lang_test arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz [ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1; cp -rT data/lang data/lang_test gunzip -c "$arpa_lm" | \ arpa2fst --disambig-symbol=#0 \ --read-symbol-table=data/lang_test/words.txt - data/lang_test/G.fst echo "Checking how stochastic G is (the first of these numbers should be small):" fstisstochastic data/lang_test/G.fst ## Check lexicon. ## just have a look and make sure it seems sane. echo "First few lines of lexicon FST:" fstprint --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/lang/L.fst | head echo Performing further checks # Checking that G.fst is determinizable. fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G. # Checking that L_disambig.fst is determinizable. fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L. # Checking that disambiguated lexicon times G is determinizable # Note: we do this with fstdeterminizestar not fstdeterminize, as # fstdeterminize was taking forever (presumbaly relates to a bug # in this version of OpenFst that makes determinization slow for # some case). fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \ fstdeterminizestar >/dev/null || echo Error # Checking that LG is stochastic: fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \ fstisstochastic || echo "[log:] LG is not stochastic" utils/build_const_arpa_lm.sh \ data/local/lm/4gram-mincount/lm_unpruned.gz data/lang data/lang_test_fg echo "$0 succeeded" |