Blame view
tools/lia_ltbox/lia_biglex/script/make_arpa_sri.csh~
1008 Bytes
e6be5137b reinitialized pro... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
#!/bin/csh if ( $#argv != 3 ) then echo 'Syntax: make_arpa_sri.csh <lexicon> <corpus> <size n>' exit endif # set sribin = /laboinfo/TAP/TOOLS/SRI-LM-1.4.2/bin/i686 set sribin = $SRILM_BIN if (1) then echo "=> make ARPA LM with: LEXICON="$1" CORPUS="$2 set CORPUS = `basename $2 ".txt"` set LEXICON = `basename $1 ".lex"` rm -f $CORPUS.$LEXICON.sri.n$3.ngrams rm -f $CORPUS.$LEXICON.sri.n$3.arpa rm -f $CORPUS.$LEXICON.sri.n$3.sort.arpa cat $2 | \ $LIA_TAGG/bin/unephraseparligne -remove_cc -cut 80 | \ $sribin/ngram-count -order $3 \ -text - \ -sort \ -vocab $1 \ -unk -map-unk "<UNK>" \ -write $CORPUS.$LEXICON.sri.n$3.ngrams $sribin/ngram-count -order $3 \ -read $CORPUS.$LEXICON.sri.n$3.ngrams \ -lm $CORPUS.$LEXICON.sri.n$3.arpa \ -kndiscount -kndiscount1 3 -kndiscount2 0 -kndiscount3 0 \ -unk -vocab $1 -map-unk "<UNK>" $LIA_TAGG/bin/sort_arpa -n $3 -unk < $CORPUS.$LEXICON.sri.n$3.arpa > $CORPUS.$LEXICON.sri.n$3.sort.arpa echo ' -> done' endif |