lia_make_datalex.csh
1.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/bin/csh
#
# Make the lexicon ressources for the tagger & bracketter
#
# parameters :
# $1 = lexicon.txt (word,POS,freq,lemma)
# $2 = 3class LM arpa format
# $3 = optionnal [-accent] : ressource for French reaccentueur
if ( $#argv < 2 ) then
echo 'Syntax: lia_make_datalex.csh <lexicon.txt> <LM3class.arpa> [-accent]'
else
echo "Make lexicon"
$LIA_TAGG/bin/lia_eclate_lexique_union < $1 >! $1.compte
if ( $3 == "-accent") then
$LIA_TAGG/bin/lia_eclate_lexique_union < $1 | $LIA_TAGG/bin/lia_produit_lex_reacc >! $1.accent.compte
endif
echo " - graph lexicon"
cut -f1 $1.compte | sort -u >! $1.graf
$LIA_TAGG/bin/lia_rajoute_code < $1.graf >! $1.sirlex
$LIA_TAGG/bin/lia_compile_lexique $1.sirlex
if ( $3 == "-accent") then
echo " - accent"
cut -f1 $1.accent.compte | sort -u > $1.accent.graf
$LIA_TAGG/bin/lia_rajoute_code < $1.accent.graf > $1.accent.sirlex
$LIA_TAGG/bin/lia_compile_lexique $1.accent.sirlex
endif
echo " - class lexicon"
$LIA_TAGG/bin/lia_extract_lex_from_arpa < $2 >! $2.sirlex
$LIA_TAGG/bin/lia_compile_ml $2.sirlex $2 log_10 3g -dicho
echo "Make PMC model"
echo " - lemma"
$LIA_TAGG/bin/lia_compile_pmc $1.sirlex $2.sirlex $1.compte lemme log_10 $1
if ( $3 == "-accent") then
echo " - accent"
$LIA_TAGG/bin/lia_compile_pmc $1.accent.sirlex $2.sirlex $1.accent.compte lemme log_10 $1.accent
endif
echo Nettoyage
rm $1.compte $1.sirlex $2.sirlex
if ( $3 == "-accent") then
rm $1.accent.compte $1.accent.sirlex
endif
echo Termine
endif