example.glm
3.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
;; File et94_3.glm, Updated 950117
;; Global rules for mapping one lexical equivalent to another,
;; for November 1994 CSR Hub and Spoke Evaluation Test Material
;; (Applies to .lsn, not .dot)
* name "et94_3.glm"
* desc "November 1994 Eval Test Global Orthographic Map"
* copy_no_hit = 'T'
;;
;; Verbal Punctuation equivalences
!EXCLAMATION-POINT => EXCLAMATION POINT
"BEGIN-QUOTE => BEGIN QUOTE
"CLOSE-QUOTE => CLOSE QUOTE
"CLOSE-QUOTES => CLOSE QUOTES
"DOUBLE-QUOTE => DOUBLE QUOTE
"END-QUOTE => END QUOTE
"OPEN-QUOTE => OPEN QUOTE
"QUOTE => QUOTE
"UNQUOTE => UNQUOTE
#SHARP-SIGN => SHARP SIGN
%PERCENT => PERCENT
&ERSAND => AMPERSAND
'SINGLE-QUOTE => SINGLE QUOTE
(LEFT-PAREN => LEFT PAREN
(OPEN-PARENTHESIS => OPEN PARENTHESIS
(PARENTHESES => PARENTHESES
)CLOSE-PARENTHESIS => CLOSE PARENTHESIS
)RIGHT-PAREN => RIGHT PAREN
,COMMA => COMMA
--DASH => DASH
-DASH => DASH
-HYPHEN => HYPHEN
.DECIMAL => DECIMAL
.PERIOD => PERIOD
.POINT => POINT
/SLASH => SLASH
:COLON => COLON
;SEMI-COLON => SEMICOLON
?QUESTION-MARK => QUESTION MARK
END-QUOTE => END QUOTE
{LEFT-BRACE => LEFT BRACE
}RIGHT-BRACE => RIGHT BRACE
;;
;; Both ways in Training .lsn's, with no syntactic/semantic difference:
AD WEEK => ADWEEK
AIR LINES => AIRLINES
ANTI DRUG => ANTIDRUG
AUTO MAKERS => AUTOMAKERS
BRIEF CASES => BRIEFCASES
BUY BACK => BUYBACK
BUY OUT => BUYOUT
CEASEFIRE => CEASE FIRE
COUNTER PRODUCTIVE => COUNTERPRODUCTIVE
FREE WHEELING => FREEWHEELING
HEALTHCARE => HEALTH CARE
HOTDOG => HOT DOG
LAP TOP => LAPTOP
LAP TOPS => LAPTOPS
LIGHT WEIGHT => LIGHTWEIGHT
MID AFTERNOON => MIDAFTERNOON
MID YEAR => MIDYEAR
MOUSE TRAP => MOUSETRAP
MULTI MILLION => MULTIMILLION
OUT STRIPPED => OUTSTRIPPED
PAPERWORK => PAPERWORK
POWER BOOK => POWERBOOK
POWER BOOKS => POWERBOOKS
PRE TAX => PRETAX
RE ELECTION => REELECTION
SEVEN FOLD => SEVENFOLD
SHORTSELLERS => SHORT SELLERS
STING RAY => STINGRAY
STORY BOARD => STORYBOARD
STORY BOARDS => STORYBOARDS
TRACK BALL => TRACKBALL
TRACK BALLS => TRACKBALLS
TWENTY SOMETHING => TWENTYSOMETHING
UNDER FUNDED => UNDERFUNDED
UNDER FUNDING => UNDERFUNDING
UNDER WAY => UNDERWAY
VIDEO TAPE => VIDEOTAPE
WAL MART => WALMART
WORK FORCE => WORKFORCE
WORLD WIDE => WORLDWIDE
;;
;; Both ways in test data with no syntactic/semantic difference:
BESICORP. => BESICORP
;;
;; Denise's tilde-words that appear in test, treated as verbalized punctuation:
~EARNINGS => EARNINGS
~END-OF-TOPIC => END OF TOPIC
~GRAPH => GRAPH
~NEW-PARAGRAPH => NEW PARAGRAPH
~NEW-SENTENCE => NEW SENTENCE
~NEW-TOPIC => NEW TOPIC
~OPENING => OPENING
~PARAGRAPH => PARAGRAPH
;;
;; NO SEMANTIC OR ACOUSTIC DIFFERENCE *in this corpus* :
CO AUTHOR => COAUTHOR
DE FACTO => DEFACTO
FEATHER WEIGHT => FEATHERWEIGHT
FLY WHEEL => FLYWHEEL
GREENE => GREEN
MIS INFORMATION => MISINFORMATION
NON GOVERNMENT => NONGOVERNMENT
OVER ALLOTMENT => OVERALLOTMENT
PRE SPLIT => PRESPLIT
RE DIRECTED => REDIRECTED
RE ELECTION => REELECTION
RE INVENTED => REINVENTED
ST. => SAINT
TELE COMMUTING => TELECOMMUTING
;;
;; BOTH WAYS IN COMMON DICTIONARY
RAQUET => RACKET
RAQUETS => RACKETS
ROM'S => ROMS
;;
;; Test cases
" OK " => " OKAY "
SHE'S => SHE IS
[S.'S] => [{S.'S / S. IS / S. HAS}] / [ ] __ [ ]
[RECURSIVE] => [{RECURSIVE/RECUR {SIVE / SI VE}}] / [ ] __A [ ]