#!/bin/csh if ( $#argv != 3 ) then echo 'Syntax: make_arpa_sri.csh ' exit endif # set sribin = /laboinfo/TAP/TOOLS/SRI-LM-1.4.2/bin/i686 set sribin = $SRILM_BIN if (1) then echo "=> make ARPA LM with: LEXICON="$1" CORPUS="$2 set CORPUS = `basename $2 ".txt"` set LEXICON = `basename $1 ".lex"` rm -f $CORPUS.$LEXICON.sri.n$3.ngrams rm -f $CORPUS.$LEXICON.sri.n$3.arpa rm -f $CORPUS.$LEXICON.sri.n$3.sort.arpa cat $2 | \ $LIA_TAGG/bin/unephraseparligne -remove_cc -cut 80 | \ $sribin/ngram-count -order $3 \ -text - \ -sort \ -vocab $1 \ -unk -map-unk "" \ -write $CORPUS.$LEXICON.sri.n$3.ngrams $sribin/ngram-count -order $3 \ -read $CORPUS.$LEXICON.sri.n$3.ngrams \ -lm $CORPUS.$LEXICON.sri.n$3.arpa \ -kndiscount -kndiscount1 3 -kndiscount2 0 -kndiscount3 0 \ -unk -vocab $1 -map-unk "" $LIA_TAGG/bin/sort_arpa -n $3 -unk < $CORPUS.$LEXICON.sri.n$3.arpa > $CORPUS.$LEXICON.sri.n$3.sort.arpa echo ' -> done' endif