Blame view

tools/lia_ltbox/lia_biglex/script/make_arpa_sri.csh 1008 Bytes
e6be5137b   Jean-François Rey   reinitialized pro...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
  #!/bin/csh
  
  if ( $#argv != 3 ) then
   echo 'Syntax: make_arpa_sri.csh <lexicon> <corpus> <size n>'
   exit
  endif
  
  # set sribin = /laboinfo/TAP/TOOLS/SRI-LM-1.4.2/bin/i686
  set sribin = $SRILM_BIN
  
  if (1) then
  	echo "=> make ARPA LM with: LEXICON="$1"  CORPUS="$2
          set CORPUS = `basename $2 ".txt"`
  	set LEXICON = `basename $1 ".lex"`
  	rm -f $CORPUS.$LEXICON.sri.n$3.ngrams
  	rm -f $CORPUS.$LEXICON.sri.n$3.arpa
  	rm -f $CORPUS.$LEXICON.sri.n$3.sort.arpa
  	cat $2 | \
  		$LIA_TAGG/bin/unephraseparligne -remove_cc -cut 80 | \
  		$sribin/ngram-count -order $3 \
  			-text - \
  			-sort \
  			-vocab $1 \
  			-unk -map-unk "<UNK>" \
  			-write $CORPUS.$LEXICON.sri.n$3.ngrams
  	$sribin/ngram-count -order $3 \
  		-read $CORPUS.$LEXICON.sri.n$3.ngrams \
  		-lm $CORPUS.$LEXICON.sri.n$3.arpa \
  		-kndiscount -kndiscount1 0 -kndiscount2 0 -kndiscount3 0 \
  		-unk -vocab $1 -map-unk "<UNK>"
  	$LIA_TAGG/bin/sort_arpa -n $3 -unk < $CORPUS.$LEXICON.sri.n$3.arpa > $CORPUS.$LEXICON.sri.n$3.sort.arpa
  	echo '    -> done'
  endif