lia_ne_tagg_ctm 1.11 KB
#!/bin/tcsh 

if ( $#argv < 2 ) then
 echo 'Syntax: lia_ne_tagg_ctm <file in ctm> <file out ctm-ne> [-post] [-nocap]'
 exit
endif

set POST = 0
set NOCAP = 0
set FORM = $LIA_NE/lex_data/ne_form.all
set IFMINU = ""

if ( "$3" == "-post" || "$4" == "-post" ) then
 set POST = 1
endif

if ( "$3" == "-nocap" || "$4" == "-nocap" ) then
 set NOCAP = 1
 set FORM = $LIA_NE/lex_data/ne_form.all.minu
 set IFMINU = "-minuscule"
endif

set PATTERN = $LIA_NE/lex_data/pattern_correct_ester.txt
# set PATTERN = $LIA_NE/pattern_correct_ester_basic.txt

set NAME = $1
set FILEOUT = $2

# Process CTM
#
$LIA_NE/bin/from_ctm2token < $NAME > $NAME.tk
$LIA_NE/script/iso88592utf8 < $NAME.tk > $NAME.tk2
$LIA_NE/bin/tagg_xml_tk -doc $NAME.tk2 -ne $NAME.ne $IFMINU
$LIA_NE/bin/from_token2ctm_ne -tk $NAME.tk2 -ne $NAME.ne | $LIA_NE/script/utf82iso8859 > $NAME.ctm-ne
if ( $POST ) then
 $LIA_NE/bin/postprocess_ne_ester -file $NAME.ctm-ne -ctm -pattern $PATTERN \
 	 -gene $LIA_NE/lex_data/generalize_lex.lst -form $FORM > $NAME.ctm-ne2
 mv $NAME.ctm-ne2 $NAME.ctm-ne
endif
mv $NAME.ctm-ne $FILEOUT
rm -f $NAME.tk2 -f $NAME.tk -f -f $NAME.ne -f $NAME.ctm-ne