lia_ne_tagg_ctm
1.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/bin/tcsh
if ( $#argv < 2 ) then
echo 'Syntax: lia_ne_tagg_ctm <file in ctm> <file out ctm-ne> [-post] [-nocap]'
exit
endif
set POST = 0
set NOCAP = 0
set FORM = $LIA_NE/lex_data/ne_form.all
set IFMINU = ""
if ( "$3" == "-post" || "$4" == "-post" ) then
set POST = 1
endif
if ( "$3" == "-nocap" || "$4" == "-nocap" ) then
set NOCAP = 1
set FORM = $LIA_NE/lex_data/ne_form.all.minu
set IFMINU = "-minuscule"
endif
set PATTERN = $LIA_NE/lex_data/pattern_correct_ester.txt
# set PATTERN = $LIA_NE/pattern_correct_ester_basic.txt
set NAME = $1
set FILEOUT = $2
# Process CTM
#
$LIA_NE/bin/from_ctm2token < $NAME > $NAME.tk
$LIA_NE/script/iso88592utf8 < $NAME.tk > $NAME.tk2
$LIA_NE/bin/tagg_xml_tk -doc $NAME.tk2 -ne $NAME.ne $IFMINU
$LIA_NE/bin/from_token2ctm_ne -tk $NAME.tk2 -ne $NAME.ne | $LIA_NE/script/utf82iso8859 > $NAME.ctm-ne
if ( $POST ) then
$LIA_NE/bin/postprocess_ne_ester -file $NAME.ctm-ne -ctm -pattern $PATTERN \
-gene $LIA_NE/lex_data/generalize_lex.lst -form $FORM > $NAME.ctm-ne2
mv $NAME.ctm-ne2 $NAME.ctm-ne
endif
mv $NAME.ctm-ne $FILEOUT
rm -f $NAME.tk2 -f $NAME.tk -f -f $NAME.ne -f $NAME.ctm-ne