Blame view

tools/sctk-2.4.10/src/hubscr/test1-sastt.base/example.glm 3.27 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
  ;; File et94_3.glm, Updated 950117
  ;; Global rules for mapping one lexical equivalent to another,
  ;; for November 1994 CSR Hub and Spoke Evaluation Test Material
  ;; (Applies to .lsn, not .dot)
  * name "et94_3.glm"
  * desc "November 1994 Eval Test Global Orthographic Map"
  * copy_no_hit = 'T'
  ;;
  ;; Verbal Punctuation equivalences
   !EXCLAMATION-POINT => EXCLAMATION POINT
   "BEGIN-QUOTE => BEGIN QUOTE
   "CLOSE-QUOTE  => CLOSE QUOTE
   "CLOSE-QUOTES => CLOSE QUOTES
   "DOUBLE-QUOTE => DOUBLE QUOTE
   "END-QUOTE    => END QUOTE
   "OPEN-QUOTE => OPEN QUOTE
   "QUOTE   => QUOTE
   "UNQUOTE      => UNQUOTE
   #SHARP-SIGN => SHARP SIGN
   %PERCENT => PERCENT
   &AMPERSAND => AMPERSAND
   'SINGLE-QUOTE  => SINGLE QUOTE
   (LEFT-PAREN  => LEFT PAREN
   (OPEN-PARENTHESIS => OPEN PARENTHESIS
   (PARENTHESES => PARENTHESES
   )CLOSE-PARENTHESIS => CLOSE PARENTHESIS
   )RIGHT-PAREN => RIGHT PAREN
   ,COMMA   => COMMA
   --DASH   => DASH
   -DASH => DASH
   -HYPHEN  => HYPHEN
   .DECIMAL => DECIMAL
   .PERIOD  => PERIOD
   .POINT   => POINT
   /SLASH => SLASH
   :COLON   => COLON
   ;SEMI-COLON  => SEMICOLON
   ?QUESTION-MARK => QUESTION MARK
   END-QUOTE => END QUOTE
   {LEFT-BRACE  => LEFT BRACE
   }RIGHT-BRACE => RIGHT BRACE
  ;;
  ;; Both ways in Training .lsn's, with no syntactic/semantic difference:
   AD WEEK => ADWEEK
   AIR LINES => AIRLINES
   ANTI DRUG  => ANTIDRUG
   AUTO MAKERS => AUTOMAKERS
   BRIEF CASES => BRIEFCASES
   BUY BACK => BUYBACK
   BUY OUT => BUYOUT
   CEASEFIRE => CEASE FIRE
   COUNTER PRODUCTIVE => COUNTERPRODUCTIVE
   FREE WHEELING => FREEWHEELING
   HEALTHCARE => HEALTH CARE
   HOTDOG => HOT DOG
   LAP TOP => LAPTOP
   LAP TOPS => LAPTOPS
   LIGHT WEIGHT => LIGHTWEIGHT
   MID AFTERNOON => MIDAFTERNOON
   MID YEAR => MIDYEAR
   MOUSE TRAP => MOUSETRAP
   MULTI MILLION => MULTIMILLION
   OUT STRIPPED => OUTSTRIPPED
   PAPERWORK => PAPERWORK
   POWER BOOK => POWERBOOK
   POWER BOOKS => POWERBOOKS
   PRE TAX => PRETAX
   RE ELECTION => REELECTION
   SEVEN FOLD => SEVENFOLD
   SHORTSELLERS => SHORT SELLERS
   STING RAY => STINGRAY
   STORY BOARD => STORYBOARD
   STORY BOARDS => STORYBOARDS 
   TRACK BALL => TRACKBALL
   TRACK BALLS => TRACKBALLS
   TWENTY SOMETHING => TWENTYSOMETHING
   UNDER FUNDED => UNDERFUNDED
   UNDER FUNDING => UNDERFUNDING
   UNDER WAY => UNDERWAY
   VIDEO TAPE => VIDEOTAPE
   WAL MART => WALMART
   WORK FORCE => WORKFORCE
   WORLD WIDE => WORLDWIDE
  ;;
  ;; Both ways in test data with no syntactic/semantic difference:
   BESICORP. => BESICORP
  ;;
  ;; Denise's tilde-words that appear in test, treated as verbalized punctuation:
  ~EARNINGS => EARNINGS
  ~END-OF-TOPIC => END OF TOPIC
  ~GRAPH        => GRAPH
  ~NEW-PARAGRAPH => NEW PARAGRAPH
  ~NEW-SENTENCE => NEW SENTENCE
  ~NEW-TOPIC    => NEW TOPIC
  ~OPENING => OPENING
  ~PARAGRAPH    => PARAGRAPH
  ;;
  ;; NO SEMANTIC OR ACOUSTIC DIFFERENCE *in this corpus* :
   CO AUTHOR => COAUTHOR
   DE FACTO => DEFACTO
   FEATHER WEIGHT => FEATHERWEIGHT
   FLY WHEEL => FLYWHEEL
   GREENE => GREEN
   MIS INFORMATION => MISINFORMATION
   NON GOVERNMENT => NONGOVERNMENT
   OVER ALLOTMENT => OVERALLOTMENT
   PRE SPLIT => PRESPLIT
   RE DIRECTED => REDIRECTED
   RE ELECTION => REELECTION
   RE INVENTED => REINVENTED
   ST. => SAINT
   TELE COMMUTING => TELECOMMUTING
  ;;
  ;; BOTH WAYS IN COMMON DICTIONARY
   RAQUET => RACKET
   RAQUETS => RACKETS
   ROM'S => ROMS
  ;;
  ;; Test cases
   " OK " => " OKAY "
  SHE'S => SHE IS
  [S.'S] => [{S.'S / S. IS / S. HAS}] / [ ] __ [ ]
  [RECURSIVE] => [{RECURSIVE/RECUR {SIVE / SI VE}}] / [ ] __A [ ]