Commit a79a228434f1eebcb042bcc576a3a0d6476789e4

Authored by Romain Deveaud
1 parent 87316a38a5
Exists in master

new TreeTagger module. tagger-chunker parsing & doc.

Showing 44 changed files with 1597 additions and 208 deletions Inline Diff

doc/classes/RIR.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>Module: RIR [RDoc Documentation]</title> 5 <title>Module: RIR [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="classHeader"> 45 <div id="classHeader">
46 <table class="header-table"> 46 <table class="header-table">
47 <tr class="top-aligned-row"> 47 <tr class="top-aligned-row">
48 <td><strong>Module</strong></td> 48 <td><strong>Module</strong></td>
49 <td class="class-name-in-header">RIR</td> 49 <td class="class-name-in-header">RIR</td>
50 </tr> 50 </tr>
51 <tr class="top-aligned-row"> 51 <tr class="top-aligned-row">
52 <td><strong>In:</strong></td> 52 <td><strong>In:</strong></td>
53 <td> 53 <td>
54 54
55 55
56 <a href="../files/lib/rir/corpus_rb.html"> 56 <a href="../files/lib/rir/ttagger_rb.html">
57 57
58 lib/rir/corpus.rb 58 lib/rir/ttagger.rb
59 59
60 </a> 60 </a>
61 61
62 62
63 <br /> 63 <br />
64 64
65 65
66 <a href="../files/lib/rir/query_rb.html"> 66 <a href="../files/lib/rir/query_rb.html">
67 67
68 lib/rir/query.rb 68 lib/rir/query.rb
69 69
70 </a> 70 </a>
71 71
72 72
73 <br /> 73 <br />
74 74
75 75
76 <a href="../files/lib/rir/string_rb.html"> 76 <a href="../files/lib/rir/document_rb.html">
77 77
78 lib/rir/string.rb 78 lib/rir/document.rb
79 79
80 </a> 80 </a>
81 81
82 82
83 <br /> 83 <br />
84 84
85 </td> 85 </td>
86 </tr> 86 </tr>
87 87
88 88
89 </table> 89 </table>
90 </div> 90 </div>
91 <!-- banner header --> 91 <!-- banner header -->
92 92
93 <div id="bodyContent"> 93 <div id="bodyContent">
94 94
95 <div id="contextContent"> 95 <div id="contextContent">
96 96
97 <div id="description"> 97 <div id="description">
98 <p> 98 <p>
99 This file is a part of an Information Retrieval oriented Ruby library 99 This file is a part of an Information Retrieval oriented Ruby library
100 </p> 100 </p>
101 <p> 101 <p>
102 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> 102 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
103 </p> 103 </p>
104 <p> 104 <p>
105 This program is free software: you can redistribute it and/or modify it 105 This program is free software: you can redistribute it and/or modify it
106 under the terms of the GNU General Public License as published by the Free 106 under the terms of the GNU General Public License as published by the Free
107 Software Foundation, either version 3 of the License, or (at your option) 107 Software Foundation, either version 3 of the License, or (at your option)
108 any later version. 108 any later version.
109 </p> 109 </p>
110 <p> 110 <p>
111 This program is distributed in the hope that it will be useful, but WITHOUT 111 This program is distributed in the hope that it will be useful, but WITHOUT
112 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 112 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
113 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 113 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
114 more details. 114 more details.
115 </p> 115 </p>
116 <p> 116 <p>
117 You should have received a copy of the GNU General Public License along 117 You should have received a copy of the GNU General Public License along
118 with this program. If not, see <<a 118 with this program. If not, see <<a
119 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. 119 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
120 </p> 120 </p>
121 <hr size="1"></hr><p> 121 <hr size="1"></hr><p>
122 This file is a part of an Information Retrieval oriented Ruby library 122 This file is a part of an Information Retrieval oriented Ruby library
123 </p> 123 </p>
124 <p> 124 <p>
125 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> 125 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
126 </p> 126 </p>
127 <p> 127 <p>
128 This program is free software: you can redistribute it and/or modify it 128 This program is free software: you can redistribute it and/or modify it
129 under the terms of the GNU General Public License as published by the Free 129 under the terms of the GNU General Public License as published by the Free
130 Software Foundation, either version 3 of the License, or (at your option) 130 Software Foundation, either version 3 of the License, or (at your option)
131 any later version. 131 any later version.
132 </p> 132 </p>
133 <p> 133 <p>
134 This program is distributed in the hope that it will be useful, but WITHOUT 134 This program is distributed in the hope that it will be useful, but WITHOUT
135 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 135 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
136 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 136 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
137 more details. 137 more details.
138 </p> 138 </p>
139 <p> 139 <p>
140 You should have received a copy of the GNU General Public License along 140 You should have received a copy of the GNU General Public License along
141 with this program. If not, see <<a 141 with this program. If not, see <<a
142 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. 142 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
143 </p> 143 </p>
144 <hr size="1"></hr><p> 144 <hr size="1"></hr><p>
145 This file is a part of an Information Retrieval oriented Ruby library 145 General module for many purposes related to Information Retrieval.
146 </p> 146 </p>
147 <p>
148 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
149 </p>
150 <p>
151 This program is free software: you can redistribute it and/or modify it
152 under the terms of the GNU General Public License as published by the Free
153 Software Foundation, either version 3 of the License, or (at your option)
154 any later version.
155 </p>
156 <p>
157 This program is distributed in the hope that it will be useful, but WITHOUT
158 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
159 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
160 more details.
161 </p>
162 <p>
163 You should have received a copy of the GNU General Public License along
164 with this program. If not, see <<a
165 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
166 </p>
167 147
168 </div> 148 </div>
169 149
170 </div> 150 </div>
171 151
172 152
173 </div> 153 </div>
174 154
175 <!-- if includes --> 155 <!-- if includes -->
176 156
177 <div id="section"> 157 <div id="section">
178 158
179 <div id="class-list"> 159 <div id="class-list">
180 <h3 class="section-bar">Classes and Modules</h3> 160 <h3 class="section-bar">Classes and Modules</h3>
181 161
182 Module <a href="RIR/Indri.html" class="link">RIR::Indri</a><br /> 162 Module <a href="RIR/Indri.html" class="link">RIR::Indri</a><br />
183 Class <a href="RIR/Corpus.html" class="link">RIR::Corpus</a><br /> 163 Module <a href="RIR/TreeTagger.html" class="link">RIR::TreeTagger</a><br />
164 Class <a href="RIR/Document.html" class="link">RIR::Document</a><br />
184 Class <a href="RIR/Query.html" class="link">RIR::Query</a><br /> 165 Class <a href="RIR/Query.html" class="link">RIR::Query</a><br />
166 Class <a href="RIR/WebDocument.html" class="link">RIR::WebDocument</a><br />
167 Class <a href="RIR/WikipediaPage.html" class="link">RIR::WikipediaPage</a><br />
185 168
186 </div>
187
188 <div id="constants-list">
189 <h3 class="section-bar">Constants</h3>
190
191 <div class="name-list">
192 <table summary="Constants">
193
194 <tr class="top-aligned-row context-row">
195 <td class="context-item-name">Stoplist</td>
196 <td>=</td>
197 <td class="context-item-value">[ &quot;a&quot;, &quot;anything&quot;, &quot;anyway&quot;, &quot;anywhere&quot;, &quot;apart&quot;, &quot;are&quot;, &quot;around&quot;, &quot;as&quot;, &quot;at&quot;, &quot;av&quot;, &quot;be&quot;, &quot;became&quot;, &quot;because&quot;, &quot;become&quot;, &quot;becomes&quot;, &quot;becoming&quot;, &quot;been&quot;, &quot;before&quot;, &quot;beforehand&quot;, &quot;behind&quot;, &quot;being&quot;, &quot;below&quot;, &quot;beside&quot;, &quot;besides&quot;, &quot;between&quot;, &quot;beyond&quot;, &quot;both&quot;, &quot;but&quot;, &quot;by&quot;, &quot;can&quot;, &quot;cannot&quot;, &quot;canst&quot;, &quot;certain&quot;, &quot;cf&quot;, &quot;choose&quot;, &quot;contrariwise&quot;, &quot;cos&quot;, &quot;could&quot;, &quot;cu&quot;, &quot;day&quot;, &quot;do&quot;, &quot;does&quot;, &quot;doesn't&quot;, &quot;doing&quot;, &quot;dost&quot;, &quot;doth&quot;, &quot;double&quot;, &quot;down&quot;, &quot;dual&quot;, &quot;during&quot;, &quot;each&quot;, &quot;either&quot;, &quot;else&quot;, &quot;elsewhere&quot;, &quot;enough&quot;, &quot;et&quot;, &quot;etc&quot;, &quot;even&quot;, &quot;ever&quot;, &quot;every&quot;, &quot;everybody&quot;, &quot;everyone&quot;, &quot;everything&quot;, &quot;everywhere&quot;, &quot;except&quot;, &quot;excepted&quot;, &quot;excepting&quot;, &quot;exception&quot;, &quot;exclude&quot;, &quot;excluding&quot;, &quot;exclusive&quot;, &quot;far&quot;, &quot;farther&quot;, &quot;farthest&quot;, &quot;few&quot;, &quot;ff&quot;, &quot;first&quot;, &quot;for&quot;, &quot;formerly&quot;, &quot;forth&quot;, &quot;forward&quot;, &quot;from&quot;, &quot;front&quot;, &quot;further&quot;, &quot;furthermore&quot;, &quot;furthest&quot;, &quot;get&quot;, &quot;go&quot;, &quot;had&quot;, &quot;halves&quot;, &quot;hardly&quot;, &quot;has&quot;, &quot;hast&quot;, &quot;hath&quot;, &quot;have&quot;, &quot;he&quot;, &quot;hence&quot;, &quot;henceforth&quot;, &quot;her&quot;, &quot;here&quot;, &quot;hereabouts&quot;, &quot;hereafter&quot;, &quot;hereby&quot;, &quot;herein&quot;, &quot;hereto&quot;, &quot;hereupon&quot;, &quot;hers&quot;, &quot;herself&quot;, &quot;him&quot;, &quot;himself&quot;, &quot;hindmost&quot;, &quot;his&quot;, &quot;hither&quot;, &quot;hitherto&quot;, &quot;how&quot;, &quot;however&quot;, &quot;howsoever&quot;, &quot;i&quot;, &quot;ie&quot;, &quot;if&quot;, &quot;in&quot;, &quot;inasmuch&quot;, &quot;inc&quot;, &quot;include&quot;, &quot;included&quot;, &quot;including&quot;, &quot;indeed&quot;, &quot;indoors&quot;, &quot;inside&quot;, &quot;insomuch&quot;, &quot;instead&quot;, &quot;into&quot;, &quot;inward&quot;, &quot;inwards&quot;, &quot;is&quot;, &quot;it&quot;, &quot;its&quot;, &quot;itself&quot;, &quot;just&quot;, &quot;kind&quot;, &quot;kg&quot;, &quot;km&quot;, &quot;last&quot;, &quot;latter&quot;, &quot;latterly&quot;, &quot;less&quot;, &quot;lest&quot;, &quot;let&quot;, &quot;like&quot;, &quot;little&quot;, &quot;ltd&quot;, &quot;many&quot;, &quot;may&quot;, &quot;maybe&quot;, &quot;me&quot;, &quot;meantime&quot;, &quot;meanwhile&quot;, &quot;might&quot;, &quot;moreover&quot;, &quot;most&quot;, &quot;mostly&quot;, &quot;more&quot;, &quot;mr&quot;, &quot;mrs&quot;, &quot;ms&quot;, &quot;much&quot;, &quot;must&quot;, &quot;my&quot;, &quot;myself&quot;, &quot;namely&quot;, &quot;need&quot;, &quot;neither&quot;, &quot;never&quot;, &quot;nevertheless&quot;, &quot;next&quot;, &quot;no&quot;, &quot;nobody&quot;, &quot;none&quot;, &quot;nonetheless&quot;, &quot;noone&quot;, &quot;nope&quot;, &quot;nor&quot;, &quot;not&quot;, &quot;nothing&quot;, &quot;notwithstanding&quot;, &quot;now&quot;, &quot;nowadays&quot;, &quot;nowhere&quot;, &quot;of&quot;, &quot;off&quot;, &quot;often&quot;, &quot;ok&quot;, &quot;on&quot;, &quot;once&quot;, &quot;one&quot;, &quot;only&quot;, &quot;onto&quot;, &quot;or&quot;, &quot;other&quot;, &quot;others&quot;, &quot;otherwise&quot;, &quot;ought&quot;, &quot;our&quot;, &quot;ours&quot;, &quot;ourselves&quot;, &quot;out&quot;, &quot;outside&quot;, &quot;over&quot;, &quot;own&quot;, &quot;per&quot;, &quot;perhaps&quot;, &quot;plenty&quot;, &quot;provide&quot;, &quot;quite&quot;, &quot;rather&quot;, &quot;really&quot;, &quot;round&quot;, &quot;said&quot;, &quot;sake&quot;, &quot;same&quot;, &quot;sang&quot;, &quot;save&quot;, &quot;saw&quot;, &quot;see&quot;, &quot;seeing&quot;, &quot;seem&quot;, &quot;seemed&quot;, &quot;seeming&quot;, &quot;seems&quot;, &quot;seen&quot;, &quot;seldom&quot;, &quot;selves&quot;, &quot;sent&quot;, &quot;several&quot;, &quot;shalt&quot;, &quot;she&quot;, &quot;should&quot;, &quot;shown&quot;, &quot;sideways&quot;, &quot;since&quot;, &quot;slept&quot;, &quot;slew&quot;, &quot;slung&quot;, &quot;slunk&quot;, &quot;smote&quot;, &quot;so&quot;, &quot;some&quot;, &quot;somebody&quot;, &quot;somehow&quot;, &quot;someone&quot;, &quot;something&quot;, &quot;sometime&quot;, &quot;sometimes&quot;, &quot;somewhat&quot;, &quot;somewhere&quot;, &quot;spake&quot;, &quot;spat&quot;, &quot;spoke&quot;, &quot;spoken&quot;, &quot;sprang&quot;, &quot;sprung&quot;, &quot;stave&quot;, &quot;staves&quot;, &quot;still&quot;, &quot;such&quot;, &quot;supposing&quot;, &quot;than&quot;, &quot;that&quot;, &quot;the&quot;, &quot;thee&quot;, &quot;their&quot;, &quot;them&quot;, &quot;themselves&quot;, &quot;then&quot;, &quot;thence&quot;, &quot;thenceforth&quot;, &quot;there&quot;, &quot;thereabout&quot;, &quot;thereabouts&quot;, &quot;thereafter&quot;, &quot;thereby&quot;, &quot;therefore&quot;, &quot;therein&quot;, &quot;thereof&quot;, &quot;thereon&quot;, &quot;thereto&quot;, &quot;thereupon&quot;, &quot;these&quot;, &quot;they&quot;, &quot;this&quot;, &quot;those&quot;, &quot;thou&quot;, &quot;though&quot;, &quot;thrice&quot;, &quot;through&quot;, &quot;throughout&quot;, &quot;thru&quot;, &quot;thus&quot;, &quot;thy&quot;, &quot;thyself&quot;, &quot;till&quot;, &quot;to&quot;, &quot;together&quot;, &quot;too&quot;, &quot;toward&quot;, &quot;towards&quot;, &quot;ugh&quot;, &quot;unable&quot;, &quot;under&quot;, &quot;underneath&quot;, &quot;unless&quot;, &quot;unlike&quot;, &quot;until&quot;, &quot;up&quot;, &quot;upon&quot;, &quot;upward&quot;, &quot;upwards&quot;, &quot;us&quot;, &quot;use&quot;, &quot;used&quot;, &quot;using&quot;, &quot;very&quot;, &quot;via&quot;, &quot;vs&quot;, &quot;want&quot;, &quot;was&quot;, &quot;we&quot;, &quot;week&quot;, &quot;well&quot;, &quot;were&quot;, &quot;what&quot;, &quot;whatever&quot;, &quot;whatsoever&quot;, &quot;when&quot;, &quot;whence&quot;, &quot;whenever&quot;, &quot;whensoever&quot;, &quot;where&quot;, &quot;whereabouts&quot;, &quot;whereafter&quot;, &quot;whereas&quot;, &quot;whereat&quot;, &quot;whereby&quot;, &quot;wherefore&quot;, &quot;wherefrom&quot;, &quot;wherein&quot;, &quot;whereinto&quot;, &quot;whereof&quot;, &quot;whereon&quot;, &quot;wheresoever&quot;, &quot;whereto&quot;, &quot;whereunto&quot;, &quot;whereupon&quot;, &quot;wherever&quot;, &quot;wherewith&quot;, &quot;whether&quot;, &quot;whew&quot;, &quot;which&quot;, &quot;whichever&quot;, &quot;whichsoever&quot;, &quot;while&quot;, &quot;whilst&quot;, &quot;whither&quot;, &quot;who&quot;, &quot;whoa&quot;, &quot;whoever&quot;, &quot;whole&quot;, &quot;whom&quot;, &quot;whomever&quot;, &quot;whomsoever&quot;, &quot;whose&quot;, &quot;whosoever&quot;, &quot;why&quot;, &quot;will&quot;, &quot;wilt&quot;, &quot;with&quot;, &quot;within&quot;, &quot;without&quot;, &quot;worse&quot;, &quot;worst&quot;, &quot;would&quot;, &quot;wow&quot;, &quot;ye&quot;, &quot;yet&quot;, &quot;year&quot;, &quot;yippee&quot;, &quot;you&quot;, &quot;your&quot;, &quot;yours&quot;, &quot;yourself&quot;, &quot;yourselves&quot; ]</td>
198
199 <td>&nbsp;</td>
200 <td class="context-item-desc">
201 These are the default stopwords provided by Lemur.
202
203 </td>
204
205 </tr>
206
207 </table>
208 </div>
209 </div> 169 </div>
210 170
211 171
212 172
213 173
214 <!-- if method_list --> 174 <!-- if method_list -->
215 175
216 176
217 177
218 178
219 </div> 179 </div>
220 180
221 <div id="validator-badges"> 181 <div id="validator-badges">
222 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 182 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
223 </div> 183 </div>
224 184
doc/classes/RIR/Document.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>Class: RIR::Document [RDoc Documentation]</title> 5 <title>Class: RIR::Document [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="classHeader"> 45 <div id="classHeader">
46 <table class="header-table"> 46 <table class="header-table">
47 <tr class="top-aligned-row"> 47 <tr class="top-aligned-row">
48 <td><strong>Class</strong></td> 48 <td><strong>Class</strong></td>
49 <td class="class-name-in-header">RIR::Document</td> 49 <td class="class-name-in-header">RIR::Document</td>
50 </tr> 50 </tr>
51 <tr class="top-aligned-row"> 51 <tr class="top-aligned-row">
52 <td><strong>In:</strong></td> 52 <td><strong>In:</strong></td>
53 <td> 53 <td>
54 54
55 55
56 <a href="../../files/lib/rir/document_rb.html"> 56 <a href="../../files/lib/rir/document_rb.html">
57 57
58 lib/rir/document.rb 58 lib/rir/document.rb
59 59
60 </a> 60 </a>
61 61
62 62
63 <br /> 63 <br />
64 64
65 </td> 65 </td>
66 </tr> 66 </tr>
67 67
68 68
69 <tr class="top-aligned-row"> 69 <tr class="top-aligned-row">
70 <td><strong>Parent:</strong></td> 70 <td><strong>Parent:</strong></td>
71 <td> 71 <td>
72 72
73 Object 73 Object
74 74
75 </td> 75 </td>
76 </tr> 76 </tr>
77 77
78 </table> 78 </table>
79 </div> 79 </div>
80 <!-- banner header --> 80 <!-- banner header -->
81 81
82 <div id="bodyContent"> 82 <div id="bodyContent">
83 83
84 <div id="contextContent"> 84 <div id="contextContent">
85 85
86 <div id="description"> 86 <div id="description">
87 <p> 87 <p>
88 A <a href="Document.html">Document</a> is a bag of words and is constructed 88 A <a href="Document.html">Document</a> is a bag of words and is constructed
89 from a string. 89 from a string.
90 </p> 90 </p>
91 91
92 </div> 92 </div>
93 93
94 </div> 94 </div>
95 95
96 96
97 <div id="method-list"> 97 <div id="method-list">
98 <h3 class="section-bar">Methods</h3> 98 <h3 class="section-bar">Methods</h3>
99 99
100 <div class="name-list"> 100 <div class="name-list">
101 101
102 <a href="#M000021">count_words</a>&nbsp;&nbsp; 102 <a href="#M000010">count_words</a>&nbsp;&nbsp;
103 103
104 <a href="#M000022">entropy</a>&nbsp;&nbsp; 104 <a href="#M000011">entropy</a>&nbsp;&nbsp;
105 105
106 <a href="#M000019">format_words</a>&nbsp;&nbsp; 106 <a href="#M000008">format_words</a>&nbsp;&nbsp;
107 107
108 <a href="#M000023">new</a>&nbsp;&nbsp; 108 <a href="#M000013">new</a>&nbsp;&nbsp;
109 109
110 <a href="#M000020">ngrams</a>&nbsp;&nbsp; 110 <a href="#M000009">ngrams</a>&nbsp;&nbsp;
111 111
112 <a href="#M000012">tf</a>&nbsp;&nbsp;
113
112 </div> 114 </div>
113 </div> 115 </div>
114 116
115 </div> 117 </div>
116 118
117 <!-- if includes --> 119 <!-- if includes -->
118 120
119 <div id="section"> 121 <div id="section">
120 122
121 123
122 124
123 <div id="attribute-list"> 125 <div id="attribute-list">
124 <h3 class="section-bar">Attributes</h3> 126 <h3 class="section-bar">Attributes</h3>
125 127
126 <div class="name-list"> 128 <div class="name-list">
127 <table> 129 <table>
128 130
129 <tr class="top-aligned-row context-row"> 131 <tr class="top-aligned-row context-row">
130 <td class="context-item-name">doc_content</td> 132 <td class="context-item-name">doc_content</td>
131 133
132 <td class="context-item-value">&nbsp;[R]&nbsp;</td> 134 <td class="context-item-value">&nbsp;[R]&nbsp;</td>
133 135
134 <td class="context-item-desc"></td> 136 <td class="context-item-desc"></td>
135 </tr> 137 </tr>
136 138
137 <tr class="top-aligned-row context-row"> 139 <tr class="top-aligned-row context-row">
138 <td class="context-item-name">words</td> 140 <td class="context-item-name">words</td>
139 141
140 <td class="context-item-value">&nbsp;[R]&nbsp;</td> 142 <td class="context-item-value">&nbsp;[R]&nbsp;</td>
141 143
142 <td class="context-item-desc"></td> 144 <td class="context-item-desc"></td>
143 </tr> 145 </tr>
144 146
145 </table> 147 </table>
146 </div> 148 </div>
147 </div> 149 </div>
148 150
149 151
150 <!-- if method_list --> 152 <!-- if method_list -->
151 153
152 <div id="methods"> 154 <div id="methods">
153 155
154 <h3 class="section-bar">Public Class methods</h3> 156 <h3 class="section-bar">Public Class methods</h3>
155 157
156 158
157 <div id="method-M000023" class="method-detail"> 159 <div id="method-M000013" class="method-detail">
158 <a name="M000023"></a> 160 <a name="M000013"></a>
159 161
160 <div class="method-heading"> 162 <div class="method-heading">
161 163
162 <a href="Document.src/M000023.html" target="Code" class="method-signature" 164 <a href="Document.src/M000013.html" target="Code" class="method-signature"
163 onclick="popupCode('Document.src/M000023.html');return false;"> 165 onclick="popupCode('Document.src/M000013.html');return false;">
164 166
165 <span class="method-name">new</span><span class="method-args">(content)</span> 167 <span class="method-name">new</span><span class="method-args">(content)</span>
166 168
167 </a> 169 </a>
168 170
169 </div> 171 </div>
170 172
171 <div class="method-description"> 173 <div class="method-description">
172 174
173 </div> 175 </div>
174 </div> 176 </div>
175 177
176 178
177 <h3 class="section-bar">Public Instance methods</h3> 179 <h3 class="section-bar">Public Instance methods</h3>
178 180
179 181
180 <div id="method-M000021" class="method-detail"> 182 <div id="method-M000010" class="method-detail">
181 <a name="M000021"></a> 183 <a name="M000010"></a>
182 184
183 <div class="method-heading"> 185 <div class="method-heading">
184 186
185 <a href="Document.src/M000021.html" target="Code" class="method-signature" 187 <a href="Document.src/M000010.html" target="Code" class="method-signature"
186 onclick="popupCode('Document.src/M000021.html');return false;"> 188 onclick="popupCode('Document.src/M000010.html');return false;">
187 189
188 <span class="method-name">count_words</span><span class="method-args">()</span> 190 <span class="method-name">count_words</span><span class="method-args">()</span>
189 191
190 </a> 192 </a>
191 193
192 </div> 194 </div>
193 195
194 <div class="method-description"> 196 <div class="method-description">
195 197
196 <p> 198 <p>
197 Returns a Hash containing the words and their associated counts in the 199 Returns a Hash containing the words and their associated counts in the
198 current <a href="Document.html">Document</a>. 200 current <a href="Document.html">Document</a>.
199 </p> 201 </p>
200 <pre> 202 <pre>
201 count_words #=&gt; { &quot;guitar&quot;=&gt;1, &quot;bass&quot;=&gt;3, &quot;album&quot;=&gt;20, ... } 203 count_words #=&gt; { &quot;guitar&quot;=&gt;1, &quot;bass&quot;=&gt;3, &quot;album&quot;=&gt;20, ... }
202 </pre> 204 </pre>
203 205
204 </div> 206 </div>
205 </div> 207 </div>
206 208
207 209
208 <div id="method-M000022" class="method-detail"> 210 <div id="method-M000011" class="method-detail">
209 <a name="M000022"></a> 211 <a name="M000011"></a>
210 212
211 <div class="method-heading"> 213 <div class="method-heading">
212 214
213 <a href="Document.src/M000022.html" target="Code" class="method-signature" 215 <a href="Document.src/M000011.html" target="Code" class="method-signature"
214 onclick="popupCode('Document.src/M000022.html');return false;"> 216 onclick="popupCode('Document.src/M000011.html');return false;">
215 217
216 <span class="method-name">entropy</span><span class="method-args">(s)</span> 218 <span class="method-name">entropy</span><span class="method-args">(s)</span>
217 219
218 </a> 220 </a>
219 221
220 </div> 222 </div>
221 223
222 <div class="method-description"> 224 <div class="method-description">
223 225
224 <p> 226 <p>
225 Computes the entropy of a given string <tt>s</tt> inside the document. 227 Computes the entropy of a given string <tt>s</tt> inside the document.
226 </p> 228 </p>
227 <p> 229 <p>
228 If the string parameter is composed of many words (i.e. tokens separated by 230 If the string parameter is composed of many words (i.e. tokens separated by
229 whitespace(s)), it is considered as an ngram. 231 whitespace(s)), it is considered as an ngram.
230 </p> 232 </p>
231 <pre> 233 <pre>
232 entropy(&quot;guitar&quot;) #=&gt; 0.00389919463243839 234 entropy(&quot;guitar&quot;) #=&gt; 0.00432114812727959
235 entropy(&quot;dillinger escape plan&quot;) #=&gt; 0.265862076325102
233 </pre> 236 </pre>
234 237
235 </div> 238 </div>
236 </div> 239 </div>
237 240
238 241
239 <div id="method-M000020" class="method-detail"> 242 <div id="method-M000009" class="method-detail">
240 <a name="M000020"></a> 243 <a name="M000009"></a>
241 244
242 <div class="method-heading"> 245 <div class="method-heading">
243 246
244 <a href="Document.src/M000020.html" target="Code" class="method-signature" 247 <a href="Document.src/M000009.html" target="Code" class="method-signature"
245 onclick="popupCode('Document.src/M000020.html');return false;"> 248 onclick="popupCode('Document.src/M000009.html');return false;">
246 249
247 <span class="method-name">ngrams</span><span class="method-args">(n)</span> 250 <span class="method-name">ngrams</span><span class="method-args">(n)</span>
248 251
249 </a> 252 </a>
250 253
251 </div> 254 </div>
252 255
253 <div class="method-description"> 256 <div class="method-description">
254 257
255 <p> 258 <p>
256 Returns an Array containing the <tt>n</tt>-grams (words) from the current 259 Returns an Array containing the <tt>n</tt>-grams (words) from the current
257 <a href="Document.html">Document</a>. 260 <a href="Document.html">Document</a>.
258 </p> 261 </p>
259 <pre> 262 <pre>
260 ngrams(2) #=&gt; [&quot;the free&quot;, &quot;free encyclopedia&quot;, &quot;encyclopedia var&quot;, &quot;var skin&quot;, ...] 263 ngrams(2) #=&gt; [&quot;the free&quot;, &quot;free encyclopedia&quot;, &quot;encyclopedia var&quot;, &quot;var skin&quot;, ...]
261 </pre> 264 </pre>
262 265
263 </div> 266 </div>
264 </div> 267 </div>
265 268
266 269
270 <div id="method-M000012" class="method-detail">
271 <a name="M000012"></a>
272
273 <div class="method-heading">
274
275 <a href="Document.src/M000012.html" target="Code" class="method-signature"
276 onclick="popupCode('Document.src/M000012.html');return false;">
277
278 <span class="method-name">tf</span><span class="method-args">(s)</span>
279
280 </a>
281
282 </div>
283
284 <div class="method-description">
285
286 <p>
287 Computes the term frequency of a given <b>word</b> <tt>s</tt>.
288 </p>
289 <pre>
290 tf(&quot;guitar&quot;) #=&gt; 0.000380372765310004
291 </pre>
292
293 </div>
294 </div>
295
296
267 <h3 class="section-bar">Protected Instance methods</h3> 297 <h3 class="section-bar">Protected Instance methods</h3>
268 298
269 299
270 <div id="method-M000019" class="method-detail"> 300 <div id="method-M000008" class="method-detail">
271 <a name="M000019"></a> 301 <a name="M000008"></a>
272 302
273 <div class="method-heading"> 303 <div class="method-heading">
274 304
275 <a href="Document.src/M000019.html" target="Code" class="method-signature" 305 <a href="Document.src/M000008.html" target="Code" class="method-signature"
276 onclick="popupCode('Document.src/M000019.html');return false;"> 306 onclick="popupCode('Document.src/M000008.html');return false;">
277 307
278 <span class="method-name">format_words</span><span class="method-args">()</span> 308 <span class="method-name">format_words</span><span class="method-args">()</span>
279 309
280 </a> 310 </a>
281 311
282 </div> 312 </div>
283 313
284 <div class="method-description"> 314 <div class="method-description">
285 315
286 <p> 316 <p>
287 Any non-word characters are removed from the words (see <a 317 Any non-word characters are removed from the words (see <a
288 href="http://perldoc.perl.org/perlre.html">perldoc.perl.org/perlre.html</a> 318 href="http://perldoc.perl.org/perlre.html">perldoc.perl.org/perlre.html</a>
289 and the W special escape). 319 and the W special escape).
290 </p> 320 </p>
291 <p> 321 <p>
292 Protected function, only meant to by called at the initialization. 322 Protected function, only meant to by called at the initialization.
293 </p> 323 </p>
294 324
295 </div> 325 </div>
296 </div> 326 </div>
297 327
298 328
299 329
300 </div> 330 </div>
301 331
302 332
303 333
304 334
305 </div> 335 </div>
306 336
307 <div id="validator-badges"> 337 <div id="validator-badges">
308 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 338 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
309 </div> 339 </div>
310 340
311 </body> 341 </body>
312 </html> 342 </html>
313 343
doc/classes/RIR/Document.src/M000008.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>format_words (RIR::Document)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head>
9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 31</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">format_words</span>
12 <span class="ruby-identifier">wo</span> = []
13
14 <span class="ruby-ivar">@doc_content</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
15 <span class="ruby-identifier">w</span>.<span class="ruby-identifier">split</span>(<span class="ruby-regexp re">/\W/</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sw</span><span class="ruby-operator">|</span>
16 <span class="ruby-identifier">wo</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">sw</span>.<span class="ruby-identifier">downcase</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">sw</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[a-zA-Z]/</span>
17 <span class="ruby-keyword kw">end</span>
18 <span class="ruby-keyword kw">end</span>
19
20 <span class="ruby-identifier">wo</span>
21 <span class="ruby-keyword kw">end</span></pre>
22 </body>
23 </html>
24
doc/classes/RIR/Document.src/M000009.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>ngrams (RIR::Document)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head>
9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 46</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">ngrams</span>(<span class="ruby-identifier">n</span>)
12 <span class="ruby-identifier">window</span> = []
13 <span class="ruby-identifier">ngrams_array</span> = []
14
15 <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
16 <span class="ruby-identifier">window</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">w</span>)
17 <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">n</span>
18 <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">&quot; &quot;</span>)
19 <span class="ruby-identifier">window</span>.<span class="ruby-identifier">delete_at</span>(<span class="ruby-value">0</span>)
20 <span class="ruby-keyword kw">end</span>
21 <span class="ruby-keyword kw">end</span>
22
23 <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">uniq</span>
24 <span class="ruby-keyword kw">end</span></pre>
25 </body>
26 </html>
27
doc/classes/RIR/Document.src/M000010.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>format_words (RIR::Document)</title> 5 <title>count_words (RIR::Document)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> 7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head> 8 </head>
9 <body class="standalone-code"> 9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 31</span> 10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 64</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">format_words</span> 11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">count_words</span>
12 <span class="ruby-identifier">wo</span> = [] 12 <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">h</span>,<span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">h</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-value">0</span> }
13 <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> }
13 14
14 <span class="ruby-ivar">@doc_content</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> 15 <span class="ruby-identifier">counts</span>
15 <span class="ruby-identifier">w</span>.<span class="ruby-identifier">split</span>(<span class="ruby-regexp re">/\W/</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sw</span><span class="ruby-operator">|</span>
16 <span class="ruby-identifier">wo</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">sw</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">sw</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[a-zA-Z]/</span>
17 <span class="ruby-keyword kw">end</span>
18 <span class="ruby-keyword kw">end</span>
19
20 <span class="ruby-identifier">wo</span>
21 <span class="ruby-keyword kw">end</span></pre> 16 <span class="ruby-keyword kw">end</span></pre>
22 </body> 17 </body>
23 </html> 18 </html>
doc/classes/RIR/Document.src/M000011.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>ngrams (RIR::Document)</title> 5 <title>entropy (RIR::Document)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> 7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head> 8 </head>
9 <body class="standalone-code"> 9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 46</span> 10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 78</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">ngrams</span>(<span class="ruby-identifier">n</span>) 11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">entropy</span>(<span class="ruby-identifier">s</span>)
12 <span class="ruby-identifier">window</span> = [] 12 <span class="ruby-identifier">en</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span>
13 <span class="ruby-identifier">ngrams_array</span> = [] 13 <span class="ruby-identifier">counts</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span>
14 14
15 <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> 15 <span class="ruby-identifier">s</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
16 <span class="ruby-identifier">window</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">w</span>) 16 <span class="ruby-identifier">p_wi</span> = <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span>
17 <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">n</span> 17 <span class="ruby-identifier">en</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">p_wi</span><span class="ruby-operator">*</span><span class="ruby-constant">Math</span>.<span class="ruby-identifier">log2</span>(<span class="ruby-identifier">p_wi</span>)
18 <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">&quot; &quot;</span>)
19 <span class="ruby-identifier">window</span>.<span class="ruby-identifier">delete_at</span>(<span class="ruby-value">0</span>)
20 <span class="ruby-keyword kw">end</span>
21 <span class="ruby-keyword kw">end</span> 18 <span class="ruby-keyword kw">end</span>
22 19
23 <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">uniq</span> 20 <span class="ruby-identifier">en</span> <span class="ruby-operator">*=</span> <span class="ruby-value">-1</span>
21 <span class="ruby-identifier">en</span>
24 <span class="ruby-keyword kw">end</span></pre> 22 <span class="ruby-keyword kw">end</span></pre>
25 </body> 23 </body>
26 </html> 24 </html>
doc/classes/RIR/Document.src/M000012.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>count_words (RIR::Document)</title> 5 <title>tf (RIR::Document)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> 7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head> 8 </head>
9 <body class="standalone-code"> 9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 64</span> 10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 94</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">count_words</span> 11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">tf</span>(<span class="ruby-identifier">s</span>)
12 <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">h</span>,<span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">h</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-value">0</span> } 12 <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span>[<span class="ruby-identifier">s</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">size</span>.<span class="ruby-identifier">to_f</span>
13 <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>.<span class="ruby-identifier">downcase</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> }
14
15 <span class="ruby-identifier">counts</span>
16 <span class="ruby-keyword kw">end</span></pre> 13 <span class="ruby-keyword kw">end</span></pre>
17 </body> 14 </body>
18 </html> 15 </html>
19 16
doc/classes/RIR/Document.src/M000013.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>entropy (RIR::Document)</title> 5 <title>new (RIR::Document)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> 7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head> 8 </head>
9 <body class="standalone-code"> 9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 77</span> 10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 99</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">entropy</span>(<span class="ruby-identifier">s</span>) 11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">content</span>)
12 <span class="ruby-identifier">en</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span> 12 <span class="ruby-ivar">@doc_content</span> = <span class="ruby-identifier">content</span>
13 <span class="ruby-identifier">counts</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span> 13 <span class="ruby-ivar">@words</span> = <span class="ruby-identifier">format_words</span>
14
15 <span class="ruby-identifier">s</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
16 <span class="ruby-identifier">p_wi</span> = <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span>
17 <span class="ruby-identifier">en</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">p_wi</span><span class="ruby-operator">*</span><span class="ruby-constant">Math</span>.<span class="ruby-identifier">log2</span>(<span class="ruby-identifier">p_wi</span>)
18 <span class="ruby-keyword kw">end</span>
19
20 <span class="ruby-identifier">en</span> <span class="ruby-operator">*=</span> <span class="ruby-value">-1</span>
21 <span class="ruby-identifier">en</span>
22 <span class="ruby-keyword kw">end</span></pre> 14 <span class="ruby-keyword kw">end</span></pre>
23 </body> 15 </body>
24 </html> 16 </html>
25 17
doc/classes/RIR/Indri/IndriQuery.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>Class: RIR::Indri::IndriQuery [RDoc Documentation]</title> 5 <title>Class: RIR::Indri::IndriQuery [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="classHeader"> 45 <div id="classHeader">
46 <table class="header-table"> 46 <table class="header-table">
47 <tr class="top-aligned-row"> 47 <tr class="top-aligned-row">
48 <td><strong>Class</strong></td> 48 <td><strong>Class</strong></td>
49 <td class="class-name-in-header">RIR::Indri::IndriQuery</td> 49 <td class="class-name-in-header">RIR::Indri::IndriQuery</td>
50 </tr> 50 </tr>
51 <tr class="top-aligned-row"> 51 <tr class="top-aligned-row">
52 <td><strong>In:</strong></td> 52 <td><strong>In:</strong></td>
53 <td> 53 <td>
54 54
55 55
56 <a href="../../../files/lib/rir/query_rb.html"> 56 <a href="../../../files/lib/rir/query_rb.html">
57 57
58 lib/rir/query.rb 58 lib/rir/query.rb
59 59
60 </a> 60 </a>
61 61
62 62
63 <br /> 63 <br />
64 64
65 </td> 65 </td>
66 </tr> 66 </tr>
67 67
68 68
69 <tr class="top-aligned-row"> 69 <tr class="top-aligned-row">
70 <td><strong>Parent:</strong></td> 70 <td><strong>Parent:</strong></td>
71 <td> 71 <td>
72 72
73 <a href="../Query.html"> 73 <a href="../Query.html">
74 74
75 RIR::Query 75 RIR::Query
76 76
77 </a> 77 </a>
78 78
79 </td> 79 </td>
80 </tr> 80 </tr>
81 81
82 </table> 82 </table>
83 </div> 83 </div>
84 <!-- banner header --> 84 <!-- banner header -->
85 85
86 <div id="bodyContent"> 86 <div id="bodyContent">
87 87
88 <div id="contextContent"> 88 <div id="contextContent">
89 89
90 </div> 90 </div>
91 91
92 92
93 <div id="method-list"> 93 <div id="method-list">
94 <h3 class="section-bar">Methods</h3> 94 <h3 class="section-bar">Methods</h3>
95 95
96 <div class="name-list"> 96 <div class="name-list">
97 97
98 <a href="#M000014">new</a>&nbsp;&nbsp; 98 <a href="#M000006">new</a>&nbsp;&nbsp;
99 99
100 <a href="#M000015">to_s</a>&nbsp;&nbsp; 100 <a href="#M000007">to_s</a>&nbsp;&nbsp;
101 101
102 </div> 102 </div>
103 </div> 103 </div>
104 104
105 </div> 105 </div>
106 106
107 <!-- if includes --> 107 <!-- if includes -->
108 108
109 <div id="section"> 109 <div id="section">
110 110
111 111
112 112
113 <div id="attribute-list"> 113 <div id="attribute-list">
114 <h3 class="section-bar">Attributes</h3> 114 <h3 class="section-bar">Attributes</h3>
115 115
116 <div class="name-list"> 116 <div class="name-list">
117 <table> 117 <table>
118 118
119 <tr class="top-aligned-row context-row"> 119 <tr class="top-aligned-row context-row">
120 <td class="context-item-name">id</td> 120 <td class="context-item-name">id</td>
121 121
122 <td class="context-item-value">&nbsp;[RW]&nbsp;</td> 122 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
123 123
124 <td class="context-item-desc"></td> 124 <td class="context-item-desc"></td>
125 </tr> 125 </tr>
126 126
127 <tr class="top-aligned-row context-row"> 127 <tr class="top-aligned-row context-row">
128 <td class="context-item-name">params</td> 128 <td class="context-item-name">params</td>
129 129
130 <td class="context-item-value">&nbsp;[RW]&nbsp;</td> 130 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
131 131
132 <td class="context-item-desc"></td> 132 <td class="context-item-desc"></td>
133 </tr> 133 </tr>
134 134
135 <tr class="top-aligned-row context-row"> 135 <tr class="top-aligned-row context-row">
136 <td class="context-item-name">query</td> 136 <td class="context-item-name">query</td>
137 137
138 <td class="context-item-value">&nbsp;[RW]&nbsp;</td> 138 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
139 139
140 <td class="context-item-desc"></td> 140 <td class="context-item-desc"></td>
141 </tr> 141 </tr>
142 142
143 <tr class="top-aligned-row context-row"> 143 <tr class="top-aligned-row context-row">
144 <td class="context-item-name">rule</td> 144 <td class="context-item-name">rule</td>
145 145
146 <td class="context-item-value">&nbsp;[RW]&nbsp;</td> 146 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
147 147
148 <td class="context-item-desc"></td> 148 <td class="context-item-desc"></td>
149 </tr> 149 </tr>
150 150
151 </table> 151 </table>
152 </div> 152 </div>
153 </div> 153 </div>
154 154
155 155
156 <!-- if method_list --> 156 <!-- if method_list -->
157 157
158 <div id="methods"> 158 <div id="methods">
159 159
160 <h3 class="section-bar">Public Class methods</h3> 160 <h3 class="section-bar">Public Class methods</h3>
161 161
162 162
163 <div id="method-M000014" class="method-detail"> 163 <div id="method-M000006" class="method-detail">
164 <a name="M000014"></a> 164 <a name="M000006"></a>
165 165
166 <div class="method-heading"> 166 <div class="method-heading">
167 167
168 <a href="IndriQuery.src/M000014.html" target="Code" class="method-signature" 168 <a href="IndriQuery.src/M000006.html" target="Code" class="method-signature"
169 onclick="popupCode('IndriQuery.src/M000014.html');return false;"> 169 onclick="popupCode('IndriQuery.src/M000006.html');return false;">
170 170
171 <span class="method-name">new</span><span class="method-args">(id,query,params)</span> 171 <span class="method-name">new</span><span class="method-args">(id,query,params)</span>
172 172
173 </a> 173 </a>
174 174
175 </div> 175 </div>
176 176
177 <div class="method-description"> 177 <div class="method-description">
178 178
179 </div> 179 </div>
180 </div> 180 </div>
181 181
182 182
183 <h3 class="section-bar">Public Instance methods</h3> 183 <h3 class="section-bar">Public Instance methods</h3>
184 184
185 185
186 <div id="method-M000015" class="method-detail"> 186 <div id="method-M000007" class="method-detail">
187 <a name="M000015"></a> 187 <a name="M000007"></a>
188 188
189 <div class="method-heading"> 189 <div class="method-heading">
190 190
191 <a href="IndriQuery.src/M000015.html" target="Code" class="method-signature" 191 <a href="IndriQuery.src/M000007.html" target="Code" class="method-signature"
192 onclick="popupCode('IndriQuery.src/M000015.html');return false;"> 192 onclick="popupCode('IndriQuery.src/M000007.html');return false;">
193 193
194 <span class="method-name">to_s</span><span class="method-args">()</span> 194 <span class="method-name">to_s</span><span class="method-args">()</span>
195 195
196 </a> 196 </a>
197 197
198 </div> 198 </div>
199 199
200 <div class="method-description"> 200 <div class="method-description">
201 201
202 </div> 202 </div>
203 </div> 203 </div>
204 204
205 205
206 206
207 </div> 207 </div>
208 208
209 209
210 210
211 211
212 </div> 212 </div>
213 213
214 <div id="validator-badges"> 214 <div id="validator-badges">
215 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 215 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
216 </div> 216 </div>
217 217
218 </body> 218 </body>
219 </html> 219 </html>
220 220
doc/classes/RIR/Indri/IndriQuery.src/M000006.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>new (RIR::Indri::IndriQuery)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head>
9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/query.rb, line 62</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">id</span>,<span class="ruby-identifier">query</span>,<span class="ruby-identifier">params</span>)
12 <span class="ruby-ivar">@params</span> = <span class="ruby-identifier">params</span>
13 <span class="ruby-comment cmt"># Here we set the default retrieval model as Language Modeling</span>
14 <span class="ruby-comment cmt"># with a Dirichlet smoothing at 2500.</span>
15 <span class="ruby-comment cmt"># TODO: maybe a Rule class...</span>
16 <span class="ruby-ivar">@params</span>.<span class="ruby-identifier">rule</span> = <span class="ruby-value str">'method:dirichlet,mu:2500'</span> <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@params</span>.<span class="ruby-identifier">rule</span>.<span class="ruby-identifier">nil?</span>
17
18 <span class="ruby-ivar">@id</span> = <span class="ruby-identifier">id</span>
19 <span class="ruby-ivar">@query</span> = <span class="ruby-identifier">query</span>
20 <span class="ruby-keyword kw">end</span></pre>
21 </body>
22 </html>
23
doc/classes/RIR/Indri/IndriQuery.src/M000007.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>to_s (RIR::Indri::IndriQuery)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head>
9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/query.rb, line 73</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">to_s</span>
12 <span class="ruby-identifier">h</span> = <span class="ruby-ivar">@params</span>.<span class="ruby-identifier">to_s</span>
13 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">&quot;&lt;query&gt;\n&quot;</span>
14 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;number&gt;#{@id}&lt;/number&gt;\n&quot;</span>
15 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;text&gt;#{@query}&lt;/text&gt;\n&quot;</span>
16 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">&quot;&lt;/query&gt;\n&quot;</span>
17 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">&quot;&lt;/parameters&gt;&quot;</span>
18
19 <span class="ruby-identifier">h</span>
20 <span class="ruby-keyword kw">end</span></pre>
21 </body>
22 </html>
23
doc/classes/RIR/Indri/Parameters.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>Class: RIR::Indri::Parameters [RDoc Documentation]</title> 5 <title>Class: RIR::Indri::Parameters [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="classHeader"> 45 <div id="classHeader">
46 <table class="header-table"> 46 <table class="header-table">
47 <tr class="top-aligned-row"> 47 <tr class="top-aligned-row">
48 <td><strong>Class</strong></td> 48 <td><strong>Class</strong></td>
49 <td class="class-name-in-header">RIR::Indri::Parameters</td> 49 <td class="class-name-in-header">RIR::Indri::Parameters</td>
50 </tr> 50 </tr>
51 <tr class="top-aligned-row"> 51 <tr class="top-aligned-row">
52 <td><strong>In:</strong></td> 52 <td><strong>In:</strong></td>
53 <td> 53 <td>
54 54
55 55
56 <a href="../../../files/lib/rir/query_rb.html"> 56 <a href="../../../files/lib/rir/query_rb.html">
57 57
58 lib/rir/query.rb 58 lib/rir/query.rb
59 59
60 </a> 60 </a>
61 61
62 62
63 <br /> 63 <br />
64 64
65 </td> 65 </td>
66 </tr> 66 </tr>
67 67
68 68
69 <tr class="top-aligned-row"> 69 <tr class="top-aligned-row">
70 <td><strong>Parent:</strong></td> 70 <td><strong>Parent:</strong></td>
71 <td> 71 <td>
72 72
73 Object 73 Object
74 74
75 </td> 75 </td>
76 </tr> 76 </tr>
77 77
78 </table> 78 </table>
79 </div> 79 </div>
80 <!-- banner header --> 80 <!-- banner header -->
81 81
82 <div id="bodyContent"> 82 <div id="bodyContent">
83 83
84 <div id="contextContent"> 84 <div id="contextContent">
85 85
86 </div> 86 </div>
87 87
88 88
89 <div id="method-list"> 89 <div id="method-list">
90 <h3 class="section-bar">Methods</h3> 90 <h3 class="section-bar">Methods</h3>
91 91
92 <div class="name-list"> 92 <div class="name-list">
93 93
94 <a href="#M000012">new</a>&nbsp;&nbsp; 94 <a href="#M000004">new</a>&nbsp;&nbsp;
95 95
96 <a href="#M000013">to_s</a>&nbsp;&nbsp; 96 <a href="#M000005">to_s</a>&nbsp;&nbsp;
97 97
98 </div> 98 </div>
99 </div> 99 </div>
100 100
101 </div> 101 </div>
102 102
103 <!-- if includes --> 103 <!-- if includes -->
104 104
105 <div id="section"> 105 <div id="section">
106 106
107 107
108 108
109 <div id="attribute-list"> 109 <div id="attribute-list">
110 <h3 class="section-bar">Attributes</h3> 110 <h3 class="section-bar">Attributes</h3>
111 111
112 <div class="name-list"> 112 <div class="name-list">
113 <table> 113 <table>
114 114
115 <tr class="top-aligned-row context-row"> 115 <tr class="top-aligned-row context-row">
116 <td class="context-item-name">baseline</td> 116 <td class="context-item-name">baseline</td>
117 117
118 <td class="context-item-value">&nbsp;[RW]&nbsp;</td> 118 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
119 119
120 <td class="context-item-desc"></td> 120 <td class="context-item-desc"></td>
121 </tr> 121 </tr>
122 122
123 <tr class="top-aligned-row context-row"> 123 <tr class="top-aligned-row context-row">
124 <td class="context-item-name">corpus</td> 124 <td class="context-item-name">count</td>
125 125
126 <td class="context-item-value">&nbsp;[RW]&nbsp;</td> 126 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
127 127
128 <td class="context-item-desc"></td> 128 <td class="context-item-desc"></td>
129 </tr> 129 </tr>
130 130
131 <tr class="top-aligned-row context-row"> 131 <tr class="top-aligned-row context-row">
132 <td class="context-item-name">count</td> 132 <td class="context-item-name">index_path</td>
133 133
134 <td class="context-item-value">&nbsp;[RW]&nbsp;</td> 134 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
135 135
136 <td class="context-item-desc"></td> 136 <td class="context-item-desc"></td>
137 </tr> 137 </tr>
138 138
139 <tr class="top-aligned-row context-row"> 139 <tr class="top-aligned-row context-row">
140 <td class="context-item-name">memory</td> 140 <td class="context-item-name">memory</td>
141 141
142 <td class="context-item-value">&nbsp;[RW]&nbsp;</td> 142 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
143 143
144 <td class="context-item-desc"></td> 144 <td class="context-item-desc"></td>
145 </tr> 145 </tr>
146 146
147 <tr class="top-aligned-row context-row"> 147 <tr class="top-aligned-row context-row">
148 <td class="context-item-name">offset</td> 148 <td class="context-item-name">offset</td>
149 149
150 <td class="context-item-value">&nbsp;[RW]&nbsp;</td> 150 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
151 151
152 <td class="context-item-desc"></td> 152 <td class="context-item-desc"></td>
153 </tr> 153 </tr>
154 154
155 <tr class="top-aligned-row context-row"> 155 <tr class="top-aligned-row context-row">
156 <td class="context-item-name">print_docs</td> 156 <td class="context-item-name">print_docs</td>
157 157
158 <td class="context-item-value">&nbsp;[RW]&nbsp;</td> 158 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
159 159
160 <td class="context-item-desc"></td> 160 <td class="context-item-desc"></td>
161 </tr> 161 </tr>
162 162
163 <tr class="top-aligned-row context-row"> 163 <tr class="top-aligned-row context-row">
164 <td class="context-item-name">print_query</td> 164 <td class="context-item-name">print_query</td>
165 165
166 <td class="context-item-value">&nbsp;[RW]&nbsp;</td> 166 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
167 167
168 <td class="context-item-desc"></td> 168 <td class="context-item-desc"></td>
169 </tr> 169 </tr>
170 170
171 <tr class="top-aligned-row context-row"> 171 <tr class="top-aligned-row context-row">
172 <td class="context-item-name">rule</td> 172 <td class="context-item-name">rule</td>
173 173
174 <td class="context-item-value">&nbsp;[RW]&nbsp;</td> 174 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
175 175
176 <td class="context-item-desc"></td> 176 <td class="context-item-desc"></td>
177 </tr> 177 </tr>
178 178
179 <tr class="top-aligned-row context-row"> 179 <tr class="top-aligned-row context-row">
180 <td class="context-item-name">run_id</td> 180 <td class="context-item-name">run_id</td>
181 181
182 <td class="context-item-value">&nbsp;[RW]&nbsp;</td> 182 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
183 183
184 <td class="context-item-desc"></td> 184 <td class="context-item-desc"></td>
185 </tr> 185 </tr>
186 186
187 </table> 187 </table>
188 </div> 188 </div>
189 </div> 189 </div>
190 190
191 191
192 <!-- if method_list --> 192 <!-- if method_list -->
193 193
194 <div id="methods"> 194 <div id="methods">
195 195
196 <h3 class="section-bar">Public Class methods</h3> 196 <h3 class="section-bar">Public Class methods</h3>
197 197
198 198
199 <div id="method-M000012" class="method-detail"> 199 <div id="method-M000004" class="method-detail">
200 <a name="M000012"></a> 200 <a name="M000004"></a>
201 201
202 <div class="method-heading"> 202 <div class="method-heading">
203 203
204 <a href="Parameters.src/M000012.html" target="Code" class="method-signature" 204 <a href="Parameters.src/M000004.html" target="Code" class="method-signature"
205 onclick="popupCode('Parameters.src/M000012.html');return false;"> 205 onclick="popupCode('Parameters.src/M000004.html');return false;">
206 206
207 <span class="method-name">new</span><span class="method-args">(corpus,mem=&quot;1g&quot;,count=&quot;1000&quot;,offset=&quot;1&quot;,run_id=&quot;default&quot;,print_query=false,print_docs=false)</span> 207 <span class="method-name">new</span><span class="method-args">(corpus,mem=&quot;1g&quot;,count=&quot;1000&quot;,offset=&quot;1&quot;,run_id=&quot;default&quot;,print_query=false,print_docs=false)</span>
208 208
209 </a> 209 </a>
210 210
211 </div> 211 </div>
212 212
213 <div class="method-description"> 213 <div class="method-description">
214 214
215 </div> 215 </div>
216 </div> 216 </div>
217 217
218 218
219 <h3 class="section-bar">Public Instance methods</h3> 219 <h3 class="section-bar">Public Instance methods</h3>
220 220
221 221
222 <div id="method-M000013" class="method-detail"> 222 <div id="method-M000005" class="method-detail">
223 <a name="M000013"></a> 223 <a name="M000005"></a>
224 224
225 <div class="method-heading"> 225 <div class="method-heading">
226 226
227 <a href="Parameters.src/M000013.html" target="Code" class="method-signature" 227 <a href="Parameters.src/M000005.html" target="Code" class="method-signature"
228 onclick="popupCode('Parameters.src/M000013.html');return false;"> 228 onclick="popupCode('Parameters.src/M000005.html');return false;">
229 229
230 <span class="method-name">to_s</span><span class="method-args">()</span> 230 <span class="method-name">to_s</span><span class="method-args">()</span>
231 231
232 </a> 232 </a>
233 233
234 </div> 234 </div>
235 235
236 <div class="method-description"> 236 <div class="method-description">
237 237
238 </div> 238 </div>
239 </div> 239 </div>
240 240
241 241
242 242
243 </div> 243 </div>
244 244
245 245
246 246
247 247
248 </div> 248 </div>
249 249
250 <div id="validator-badges"> 250 <div id="validator-badges">
251 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 251 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
252 </div> 252 </div>
253 253
254 </body> 254 </body>
255 </html> 255 </html>
256 256
doc/classes/RIR/Indri/Parameters.src/M000004.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>new (RIR::Indri::Parameters)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head>
9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/query.rb, line 30</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">corpus</span>,<span class="ruby-identifier">mem</span>=<span class="ruby-value str">&quot;1g&quot;</span>,<span class="ruby-identifier">count</span>=<span class="ruby-value str">&quot;1000&quot;</span>,<span class="ruby-identifier">offset</span>=<span class="ruby-value str">&quot;1&quot;</span>,<span class="ruby-identifier">run_id</span>=<span class="ruby-value str">&quot;default&quot;</span>,<span class="ruby-identifier">print_query</span>=<span class="ruby-keyword kw">false</span>,<span class="ruby-identifier">print_docs</span>=<span class="ruby-keyword kw">false</span>)
12 <span class="ruby-ivar">@index_path</span> = <span class="ruby-identifier">corpus</span>
13 <span class="ruby-ivar">@memory</span> = <span class="ruby-identifier">mem</span>
14 <span class="ruby-ivar">@count</span> = <span class="ruby-identifier">count</span>
15 <span class="ruby-ivar">@offset</span> = <span class="ruby-identifier">offset</span>
16 <span class="ruby-ivar">@run_id</span> = <span class="ruby-identifier">run_id</span>
17 <span class="ruby-ivar">@print_query</span> = <span class="ruby-identifier">print_query</span> <span class="ruby-value">? </span><span class="ruby-value str">&quot;true&quot;</span> <span class="ruby-operator">:</span> <span class="ruby-value str">&quot;false&quot;</span>
18 <span class="ruby-ivar">@print_docs</span> = <span class="ruby-identifier">print_docs</span> <span class="ruby-value">? </span><span class="ruby-value str">&quot;true&quot;</span> <span class="ruby-operator">:</span> <span class="ruby-value str">&quot;false&quot;</span>
19 <span class="ruby-keyword kw">end</span></pre>
20 </body>
21 </html>
22
doc/classes/RIR/Indri/Parameters.src/M000005.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>to_s (RIR::Indri::Parameters)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head>
9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/query.rb, line 40</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">to_s</span>
12 <span class="ruby-identifier">h</span> = <span class="ruby-value str">&quot;&lt;parameters&gt;\n&quot;</span>
13 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;memory&gt;#{@memory}&lt;/memory&gt;\n&quot;</span>
14 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;index&gt;#{@index_path}&lt;/index&gt;\n&quot;</span>
15 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;count&gt;#{@count}&lt;/count&gt;\n&quot;</span>
16 <span class="ruby-keyword kw">unless</span> <span class="ruby-ivar">@baseline</span>.<span class="ruby-identifier">nil?</span>
17 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;baseline&gt;#{@baseline}&lt;/baseline&gt;\n&quot;</span>
18 <span class="ruby-keyword kw">else</span>
19 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;rule&gt;#{@rule}&lt;/rule&gt;\n&quot;</span>
20 <span class="ruby-keyword kw">end</span>
21 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;queryOffset&gt;#{@offset}&lt;/queryOffset&gt;\n&quot;</span>
22 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;runID&gt;#{@run_id}&lt;/runID&gt;\n&quot;</span>
23 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;printQuery&gt;#{@print_query}&lt;/printQuery&gt;\n&quot;</span>
24 <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;printDocuments&gt;#{@print_docs}&lt;/printDocuments&gt;\n&quot;</span>
25
26 <span class="ruby-identifier">h</span>
27 <span class="ruby-keyword kw">end</span></pre>
28 </body>
29 </html>
30
doc/classes/RIR/TreeTagger.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>Module: RIR::TreeTagger [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript">
10 // <![CDATA[
11
12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 }
15
16 function toggleCode( id ) {
17 if ( document.getElementById )
18 elem = document.getElementById( id );
19 else if ( document.all )
20 elem = eval( "document.all." + id );
21 else
22 return false;
23
24 elemStyle = elem.style;
25
26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block"
28 } else {
29 elemStyle.display = "none"
30 }
31
32 return true;
33 }
34
35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37
38 // ]]>
39 </script>
40
41 </head>
42 <body>
43
44
45 <div id="classHeader">
46 <table class="header-table">
47 <tr class="top-aligned-row">
48 <td><strong>Module</strong></td>
49 <td class="class-name-in-header">RIR::TreeTagger</td>
50 </tr>
51 <tr class="top-aligned-row">
52 <td><strong>In:</strong></td>
53 <td>
54
55
56 <a href="../../files/lib/rir/ttagger_rb.html">
57
58 lib/rir/ttagger.rb
59
60 </a>
61
62
63 <br />
64
65 </td>
66 </tr>
67
68
69 </table>
70 </div>
71 <!-- banner header -->
72
73 <div id="bodyContent">
74
75 <div id="contextContent">
76
77 <div id="description">
78 <p>
79 TreeTagger-related stuff module.
80 </p>
81 <p>
82 See <a
83 href="http://www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/DecisionTreeTagger.html">www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/DecisionTreeTagger.html</a>
84 </p>
85
86 </div>
87
88 </div>
89
90
91 </div>
92
93 <!-- if includes -->
94
95 <div id="section">
96
97 <div id="class-list">
98 <h3 class="section-bar">Classes and Modules</h3>
99
100 Class <a href="TreeTagger/Chunk.html" class="link">RIR::TreeTagger::Chunk</a><br />
101 Class <a href="TreeTagger/TaggerChunker.html" class="link">RIR::TreeTagger::TaggerChunker</a><br />
102 Class <a href="TreeTagger/TaggerChunkerEnglish.html" class="link">RIR::TreeTagger::TaggerChunkerEnglish</a><br />
103 Class <a href="TreeTagger/TaggerChunkerFrench.html" class="link">RIR::TreeTagger::TaggerChunkerFrench</a><br />
104 Class <a href="TreeTagger/TaggerChunkerGerman.html" class="link">RIR::TreeTagger::TaggerChunkerGerman</a><br />
105
106 </div>
107
108
109
110
111 <!-- if method_list -->
112
113
114
115
116 </div>
117
118 <div id="validator-badges">
119 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
120 </div>
121
122 </body>
123 </html>
124
doc/classes/RIR/TreeTagger/Chunk.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>Class: RIR::TreeTagger::Chunk [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript">
10 // <![CDATA[
11
12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 }
15
16 function toggleCode( id ) {
17 if ( document.getElementById )
18 elem = document.getElementById( id );
19 else if ( document.all )
20 elem = eval( "document.all." + id );
21 else
22 return false;
23
24 elemStyle = elem.style;
25
26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block"
28 } else {
29 elemStyle.display = "none"
30 }
31
32 return true;
33 }
34
35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37
38 // ]]>
39 </script>
40
41 </head>
42 <body>
43
44
45 <div id="classHeader">
46 <table class="header-table">
47 <tr class="top-aligned-row">
48 <td><strong>Class</strong></td>
49 <td class="class-name-in-header">RIR::TreeTagger::Chunk</td>
50 </tr>
51 <tr class="top-aligned-row">
52 <td><strong>In:</strong></td>
53 <td>
54
55
56 <a href="../../../files/lib/rir/ttagger_rb.html">
57
58 lib/rir/ttagger.rb
59
60 </a>
61
62
63 <br />
64
65 </td>
66 </tr>
67
68
69 <tr class="top-aligned-row">
70 <td><strong>Parent:</strong></td>
71 <td>
72
73 Object
74
75 </td>
76 </tr>
77
78 </table>
79 </div>
80 <!-- banner header -->
81
82 <div id="bodyContent">
83
84 <div id="contextContent">
85
86 <div id="description">
87 <p>
88 Represents a <a href="Chunk.html">Chunk</a> extracted when parsing a <a
89 href="TaggerChunker.html">TaggerChunker</a> file.
90 </p>
91
92 </div>
93
94 </div>
95
96
97 <div id="method-list">
98 <h3 class="section-bar">Methods</h3>
99
100 <div class="name-list">
101
102 <a href="#M000003">new</a>&nbsp;&nbsp;
103
104 </div>
105 </div>
106
107 </div>
108
109 <!-- if includes -->
110
111 <div id="section">
112
113
114
115 <div id="attribute-list">
116 <h3 class="section-bar">Attributes</h3>
117
118 <div class="name-list">
119 <table>
120
121 <tr class="top-aligned-row context-row">
122 <td class="context-item-name">tag</td>
123
124 <td class="context-item-value">&nbsp;[R]&nbsp;</td>
125
126 <td class="context-item-desc"></td>
127 </tr>
128
129 <tr class="top-aligned-row context-row">
130 <td class="context-item-name">words</td>
131
132 <td class="context-item-value">&nbsp;[R]&nbsp;</td>
133
134 <td class="context-item-desc"></td>
135 </tr>
136
137 </table>
138 </div>
139 </div>
140
141
142 <!-- if method_list -->
143
144 <div id="methods">
145
146 <h3 class="section-bar">Public Class methods</h3>
147
148
149 <div id="method-M000003" class="method-detail">
150 <a name="M000003"></a>
151
152 <div class="method-heading">
153
154 <a href="Chunk.src/M000003.html" target="Code" class="method-signature"
155 onclick="popupCode('Chunk.src/M000003.html');return false;">
156
157 <span class="method-name">new</span><span class="method-args">(str,tag)</span>
158
159 </a>
160
161 </div>
162
163 <div class="method-description">
164
165 <p>
166 <tt>str</tt> are whitespace-separated terms. <tt>tag</tt> see : <a
167 href="ftp://ftp.ims.uni-stuttgart.de/pub/corpora/chunker-tagset-english.txt">ftp.ims.uni-stuttgart.de/pub/corpora/chunker-tagset-english.txt</a>
168 </p>
169
170 </div>
171 </div>
172
173
174
175 </div>
176
177
178
179
180 </div>
181
182 <div id="validator-badges">
183 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
184 </div>
185
186 </body>
187 </html>
188
doc/classes/RIR/TreeTagger/Chunk.src/M000003.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>new (RIR::TreeTagger::Chunk)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head>
9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/ttagger.rb, line 86</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span> <span class="ruby-identifier">str</span>,<span class="ruby-identifier">tag</span>
12 <span class="ruby-ivar">@words</span> = <span class="ruby-identifier">str</span>.<span class="ruby-identifier">split</span>
13 <span class="ruby-ivar">@tag</span> = <span class="ruby-identifier">tag</span>[<span class="ruby-value">1</span><span class="ruby-operator">..</span><span class="ruby-value">-2</span>]
14 <span class="ruby-keyword kw">end</span></pre>
15 </body>
16 </html>
17
doc/classes/RIR/TreeTagger/TaggerChunker.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>Class: RIR::TreeTagger::TaggerChunker [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript">
10 // <![CDATA[
11
12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 }
15
16 function toggleCode( id ) {
17 if ( document.getElementById )
18 elem = document.getElementById( id );
19 else if ( document.all )
20 elem = eval( "document.all." + id );
21 else
22 return false;
23
24 elemStyle = elem.style;
25
26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block"
28 } else {
29 elemStyle.display = "none"
30 }
31
32 return true;
33 }
34
35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37
38 // ]]>
39 </script>
40
41 </head>
42 <body>
43
44
45 <div id="classHeader">
46 <table class="header-table">
47 <tr class="top-aligned-row">
48 <td><strong>Class</strong></td>
49 <td class="class-name-in-header">RIR::TreeTagger::TaggerChunker</td>
50 </tr>
51 <tr class="top-aligned-row">
52 <td><strong>In:</strong></td>
53 <td>
54
55
56 <a href="../../../files/lib/rir/ttagger_rb.html">
57
58 lib/rir/ttagger.rb
59
60 </a>
61
62
63 <br />
64
65 </td>
66 </tr>
67
68
69 <tr class="top-aligned-row">
70 <td><strong>Parent:</strong></td>
71 <td>
72
73 Object
74
75 </td>
76 </tr>
77
78 </table>
79 </div>
80 <!-- banner header -->
81
82 <div id="bodyContent">
83
84 <div id="contextContent">
85
86 <div id="description">
87 <p>
88 This class handles generic parsing of tagger-chunker outputs.
89 </p>
90
91 </div>
92
93 </div>
94
95
96 <div id="method-list">
97 <h3 class="section-bar">Methods</h3>
98
99 <div class="name-list">
100
101 <a href="#M000002">new</a>&nbsp;&nbsp;
102
103 <a href="#M000001">parse</a>&nbsp;&nbsp;
104
105 </div>
106 </div>
107
108 </div>
109
110 <!-- if includes -->
111
112 <div id="section">
113
114
115
116 <div id="attribute-list">
117 <h3 class="section-bar">Attributes</h3>
118
119 <div class="name-list">
120 <table>
121
122 <tr class="top-aligned-row context-row">
123 <td class="context-item-name">chunks</td>
124
125 <td class="context-item-value">&nbsp;[R]&nbsp;</td>
126
127 <td class="context-item-desc"></td>
128 </tr>
129
130 <tr class="top-aligned-row context-row">
131 <td class="context-item-name">file</td>
132
133 <td class="context-item-value">&nbsp;[R]&nbsp;</td>
134
135 <td class="context-item-desc"></td>
136 </tr>
137
138 </table>
139 </div>
140 </div>
141
142
143 <!-- if method_list -->
144
145 <div id="methods">
146
147 <h3 class="section-bar">Public Class methods</h3>
148
149
150 <div id="method-M000002" class="method-detail">
151 <a name="M000002"></a>
152
153 <div class="method-heading">
154
155 <a href="TaggerChunker.src/M000002.html" target="Code" class="method-signature"
156 onclick="popupCode('TaggerChunker.src/M000002.html');return false;">
157
158 <span class="method-name">new</span><span class="method-args">(chunk_file)</span>
159
160 </a>
161
162 </div>
163
164 <div class="method-description">
165
166 <p>
167 Initializes parsing. <tt>chunk_file</tt> is the output of
168 <tt>tagger-chunker-</tt> and must be a valid path to the file.
169 </p>
170 <pre>
171 TaggerChunker.new(&quot;ttout/2010020&quot;) #=&gt; #&lt;RIR::TreeTagger::TaggerChunker:0x92fd088 @chunks=[#&lt;RIR::TreeTagger::Chunk:0x8ec5a10 @words=[&quot;robert&quot;, &quot;schumann&quot;], @tag=&quot;NC&quot;&gt;, ...] ...&gt;
172 </pre>
173
174 </div>
175 </div>
176
177
178 <div id="method-M000001" class="method-detail">
179 <a name="M000001"></a>
180
181 <div class="method-heading">
182
183 <a href="TaggerChunker.src/M000001.html" target="Code" class="method-signature"
184 onclick="popupCode('TaggerChunker.src/M000001.html');return false;">
185
186 <span class="method-name">parse</span><span class="method-args">(chunk_lines)</span>
187
188 </a>
189
190 </div>
191
192 <div class="method-description">
193
194 <p>
195 Parses a tagger-chunker output and returns an Array of <a
196 href="Chunk.html">Chunk</a>.
197 </p>
198
199 </div>
200 </div>
201
202
203
204 </div>
205
206
207
208
209 </div>
210
211 <div id="validator-badges">
212 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
213 </div>
214
215 </body>
216 </html>
217
doc/classes/RIR/TreeTagger/TaggerChunker.src/M000001.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>parse (RIR::TreeTagger::TaggerChunker)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head>
9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/ttagger.rb, line 33</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">parse</span> <span class="ruby-identifier">chunk_lines</span>
12 <span class="ruby-identifier">open</span> = <span class="ruby-keyword kw">false</span>
13 <span class="ruby-identifier">tag</span> = <span class="ruby-keyword kw">nil</span>
14
15 <span class="ruby-identifier">chunks</span> = []
16 <span class="ruby-identifier">words</span> = []
17
18 <span class="ruby-identifier">chunk_lines</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">l</span><span class="ruby-operator">|</span>
19 <span class="ruby-identifier">l</span>.<span class="ruby-identifier">chomp!</span>
20 <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">l</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/^&lt;\w+&gt;$/</span>
21 <span class="ruby-identifier">open</span> = <span class="ruby-keyword kw">true</span>
22 <span class="ruby-identifier">tag</span> = <span class="ruby-identifier">l</span>
23 <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">l</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/^&lt;\/\w+&gt;$/</span>
24 <span class="ruby-keyword kw">if</span> <span class="ruby-operator">!</span><span class="ruby-identifier">words</span>.<span class="ruby-identifier">empty?</span> <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">open</span> <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">l</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">tag</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/&lt;/</span>, <span class="ruby-value str">'&lt;/'</span>)
25 <span class="ruby-identifier">open</span> = <span class="ruby-keyword kw">false</span>
26 <span class="ruby-identifier">chunks</span>.<span class="ruby-identifier">push</span> <span class="ruby-constant">Chunk</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">words</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">&quot; &quot;</span>), <span class="ruby-identifier">tag</span>)
27 <span class="ruby-identifier">words</span>.<span class="ruby-identifier">clear</span>
28 <span class="ruby-keyword kw">else</span>
29 <span class="ruby-keyword kw">next</span>
30 <span class="ruby-keyword kw">end</span>
31 <span class="ruby-keyword kw">else</span>
32 <span class="ruby-identifier">words</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">l</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">first</span>)
33 <span class="ruby-keyword kw">end</span>
34 <span class="ruby-keyword kw">end</span>
35
36 <span class="ruby-identifier">chunks</span>
37 <span class="ruby-keyword kw">end</span></pre>
38 </body>
39 </html>
40
doc/classes/RIR/TreeTagger/TaggerChunker.src/M000002.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>new (RIR::TreeTagger::TaggerChunker)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head>
9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/ttagger.rb, line 65</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span> <span class="ruby-identifier">chunk_file</span>
12 <span class="ruby-ivar">@chunks</span> = <span class="ruby-constant">TaggerChunker</span>.<span class="ruby-identifier">parse</span> <span class="ruby-constant">File</span>.<span class="ruby-identifier">open</span>(<span class="ruby-identifier">chunk_file</span>).<span class="ruby-identifier">readlines</span>
13 <span class="ruby-keyword kw">end</span></pre>
14 </body>
15 </html>
16
doc/classes/RIR/TreeTagger/TaggerChunkerEnglish.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>Class: RIR::TreeTagger::TaggerChunkerEnglish [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript">
10 // <![CDATA[
11
12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 }
15
16 function toggleCode( id ) {
17 if ( document.getElementById )
18 elem = document.getElementById( id );
19 else if ( document.all )
20 elem = eval( "document.all." + id );
21 else
22 return false;
23
24 elemStyle = elem.style;
25
26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block"
28 } else {
29 elemStyle.display = "none"
30 }
31
32 return true;
33 }
34
35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37
38 // ]]>
39 </script>
40
41 </head>
42 <body>
43
44
45 <div id="classHeader">
46 <table class="header-table">
47 <tr class="top-aligned-row">
48 <td><strong>Class</strong></td>
49 <td class="class-name-in-header">RIR::TreeTagger::TaggerChunkerEnglish</td>
50 </tr>
51 <tr class="top-aligned-row">
52 <td><strong>In:</strong></td>
53 <td>
54
55
56 <a href="../../../files/lib/rir/ttagger_rb.html">
57
58 lib/rir/ttagger.rb
59
60 </a>
61
62
63 <br />
64
65 </td>
66 </tr>
67
68
69 <tr class="top-aligned-row">
70 <td><strong>Parent:</strong></td>
71 <td>
72
73 <a href="TaggerChunker.html">
74
75 RIR::TreeTagger::TaggerChunker
76
77 </a>
78
79 </td>
80 </tr>
81
82 </table>
83 </div>
84 <!-- banner header -->
85
86 <div id="bodyContent">
87
88 <div id="contextContent">
89
90 </div>
91
92
93 </div>
94
95 <!-- if includes -->
96
97 <div id="section">
98
99
100
101
102 <!-- if method_list -->
103
104
105
106
107 </div>
108
109 <div id="validator-badges">
110 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
111 </div>
112
113 </body>
114 </html>
115
doc/classes/RIR/TreeTagger/TaggerChunkerFrench.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>Class: RIR::TreeTagger::TaggerChunkerFrench [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript">
10 // <![CDATA[
11
12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 }
15
16 function toggleCode( id ) {
17 if ( document.getElementById )
18 elem = document.getElementById( id );
19 else if ( document.all )
20 elem = eval( "document.all." + id );
21 else
22 return false;
23
24 elemStyle = elem.style;
25
26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block"
28 } else {
29 elemStyle.display = "none"
30 }
31
32 return true;
33 }
34
35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37
38 // ]]>
39 </script>
40
41 </head>
42 <body>
43
44
45 <div id="classHeader">
46 <table class="header-table">
47 <tr class="top-aligned-row">
48 <td><strong>Class</strong></td>
49 <td class="class-name-in-header">RIR::TreeTagger::TaggerChunkerFrench</td>
50 </tr>
51 <tr class="top-aligned-row">
52 <td><strong>In:</strong></td>
53 <td>
54
55
56 <a href="../../../files/lib/rir/ttagger_rb.html">
57
58 lib/rir/ttagger.rb
59
60 </a>
61
62
63 <br />
64
65 </td>
66 </tr>
67
68
69 <tr class="top-aligned-row">
70 <td><strong>Parent:</strong></td>
71 <td>
72
73 <a href="TaggerChunker.html">
74
75 RIR::TreeTagger::TaggerChunker
76
77 </a>
78
79 </td>
80 </tr>
81
82 </table>
83 </div>
84 <!-- banner header -->
85
86 <div id="bodyContent">
87
88 <div id="contextContent">
89
90 </div>
91
92
93 </div>
94
95 <!-- if includes -->
96
97 <div id="section">
98
99
100
101
102 <!-- if method_list -->
103
104
105
106
107 </div>
108
109 <div id="validator-badges">
110 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
111 </div>
112
113 </body>
114 </html>
115
doc/classes/RIR/TreeTagger/TaggerChunkerGerman.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>Class: RIR::TreeTagger::TaggerChunkerGerman [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript">
10 // <![CDATA[
11
12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 }
15
16 function toggleCode( id ) {
17 if ( document.getElementById )
18 elem = document.getElementById( id );
19 else if ( document.all )
20 elem = eval( "document.all." + id );
21 else
22 return false;
23
24 elemStyle = elem.style;
25
26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block"
28 } else {
29 elemStyle.display = "none"
30 }
31
32 return true;
33 }
34
35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37
38 // ]]>
39 </script>
40
41 </head>
42 <body>
43
44
45 <div id="classHeader">
46 <table class="header-table">
47 <tr class="top-aligned-row">
48 <td><strong>Class</strong></td>
49 <td class="class-name-in-header">RIR::TreeTagger::TaggerChunkerGerman</td>
50 </tr>
51 <tr class="top-aligned-row">
52 <td><strong>In:</strong></td>
53 <td>
54
55
56 <a href="../../../files/lib/rir/ttagger_rb.html">
57
58 lib/rir/ttagger.rb
59
60 </a>
61
62
63 <br />
64
65 </td>
66 </tr>
67
68
69 <tr class="top-aligned-row">
70 <td><strong>Parent:</strong></td>
71 <td>
72
73 <a href="TaggerChunker.html">
74
75 RIR::TreeTagger::TaggerChunker
76
77 </a>
78
79 </td>
80 </tr>
81
82 </table>
83 </div>
84 <!-- banner header -->
85
86 <div id="bodyContent">
87
88 <div id="contextContent">
89
90 </div>
91
92
93 </div>
94
95 <!-- if includes -->
96
97 <div id="section">
98
99
100
101
102 <!-- if method_list -->
103
104
105
106
107 </div>
108
109 <div id="validator-badges">
110 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
111 </div>
112
113 </body>
114 </html>
115
doc/classes/RIR/WebDocument.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>Class: RIR::WebDocument [RDoc Documentation]</title> 5 <title>Class: RIR::WebDocument [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="classHeader"> 45 <div id="classHeader">
46 <table class="header-table"> 46 <table class="header-table">
47 <tr class="top-aligned-row"> 47 <tr class="top-aligned-row">
48 <td><strong>Class</strong></td> 48 <td><strong>Class</strong></td>
49 <td class="class-name-in-header">RIR::WebDocument</td> 49 <td class="class-name-in-header">RIR::WebDocument</td>
50 </tr> 50 </tr>
51 <tr class="top-aligned-row"> 51 <tr class="top-aligned-row">
52 <td><strong>In:</strong></td> 52 <td><strong>In:</strong></td>
53 <td> 53 <td>
54 54
55 55
56 <a href="../../files/lib/rir/document_rb.html"> 56 <a href="../../files/lib/rir/document_rb.html">
57 57
58 lib/rir/document.rb 58 lib/rir/document.rb
59 59
60 </a> 60 </a>
61 61
62 62
63 <br /> 63 <br />
64 64
65 </td> 65 </td>
66 </tr> 66 </tr>
67 67
68 68
69 <tr class="top-aligned-row"> 69 <tr class="top-aligned-row">
70 <td><strong>Parent:</strong></td> 70 <td><strong>Parent:</strong></td>
71 <td> 71 <td>
72 72
73 <a href="Document.html"> 73 <a href="Document.html">
74 74
75 RIR::Document 75 RIR::Document
76 76
77 </a> 77 </a>
78 78
79 </td> 79 </td>
80 </tr> 80 </tr>
81 81
82 </table> 82 </table>
83 </div> 83 </div>
84 <!-- banner header --> 84 <!-- banner header -->
85 85
86 <div id="bodyContent"> 86 <div id="bodyContent">
87 87
88 <div id="contextContent"> 88 <div id="contextContent">
89 89
90 <div id="description"> 90 <div id="description">
91 <p> 91 <p>
92 A <a href="WebDocument.html">WebDocument</a> is a <a 92 A <a href="WebDocument.html">WebDocument</a> is a <a
93 href="Document.html">Document</a> with a <tt>url</tt>. 93 href="Document.html">Document</a> with a <tt>url</tt>.
94 </p> 94 </p>
95 95
96 </div> 96 </div>
97 97
98 </div> 98 </div>
99 99
100 100
101 <div id="method-list"> 101 <div id="method-list">
102 <h3 class="section-bar">Methods</h3> 102 <h3 class="section-bar">Methods</h3>
103 103
104 <div class="name-list"> 104 <div class="name-list">
105 105
106 <a href="#M000024">get_content</a>&nbsp;&nbsp; 106 <a href="#M000014">get_content</a>&nbsp;&nbsp;
107 107
108 <a href="#M000025">new</a>&nbsp;&nbsp; 108 <a href="#M000015">new</a>&nbsp;&nbsp;
109 109
110 </div> 110 </div>
111 </div> 111 </div>
112 112
113 </div> 113 </div>
114 114
115 <!-- if includes --> 115 <!-- if includes -->
116 116
117 <div id="section"> 117 <div id="section">
118 118
119 119
120 120
121 <div id="attribute-list"> 121 <div id="attribute-list">
122 <h3 class="section-bar">Attributes</h3> 122 <h3 class="section-bar">Attributes</h3>
123 123
124 <div class="name-list"> 124 <div class="name-list">
125 <table> 125 <table>
126 126
127 <tr class="top-aligned-row context-row"> 127 <tr class="top-aligned-row context-row">
128 <td class="context-item-name">url</td> 128 <td class="context-item-name">url</td>
129 129
130 <td class="context-item-value">&nbsp;[R]&nbsp;</td> 130 <td class="context-item-value">&nbsp;[R]&nbsp;</td>
131 131
132 <td class="context-item-desc"></td> 132 <td class="context-item-desc"></td>
133 </tr> 133 </tr>
134 134
135 </table> 135 </table>
136 </div> 136 </div>
137 </div> 137 </div>
138 138
139 139
140 <!-- if method_list --> 140 <!-- if method_list -->
141 141
142 <div id="methods"> 142 <div id="methods">
143 143
144 <h3 class="section-bar">Public Class methods</h3> 144 <h3 class="section-bar">Public Class methods</h3>
145 145
146 146
147 <div id="method-M000024" class="method-detail"> 147 <div id="method-M000014" class="method-detail">
148 <a name="M000024"></a> 148 <a name="M000014"></a>
149 149
150 <div class="method-heading"> 150 <div class="method-heading">
151 151
152 <a href="WebDocument.src/M000024.html" target="Code" class="method-signature" 152 <a href="WebDocument.src/M000014.html" target="Code" class="method-signature"
153 onclick="popupCode('WebDocument.src/M000024.html');return false;"> 153 onclick="popupCode('WebDocument.src/M000014.html');return false;">
154 154
155 <span class="method-name">get_content</span><span class="method-args">(url)</span> 155 <span class="method-name">get_content</span><span class="method-args">(url)</span>
156 156
157 </a> 157 </a>
158 158
159 </div> 159 </div>
160 160
161 <div class="method-description"> 161 <div class="method-description">
162 162
163 <p> 163 <p>
164 Returns the HTML text from the page of a given <tt>url</tt>. 164 Returns the HTML text from the page of a given <tt>url</tt>.
165 </p> 165 </p>
166 166
167 </div> 167 </div>
168 </div> 168 </div>
169 169
170 170
171 <div id="method-M000025" class="method-detail"> 171 <div id="method-M000015" class="method-detail">
172 <a name="M000025"></a> 172 <a name="M000015"></a>
173 173
174 <div class="method-heading"> 174 <div class="method-heading">
175 175
176 <a href="WebDocument.src/M000025.html" target="Code" class="method-signature" 176 <a href="WebDocument.src/M000015.html" target="Code" class="method-signature"
177 onclick="popupCode('WebDocument.src/M000025.html');return false;"> 177 onclick="popupCode('WebDocument.src/M000015.html');return false;">
178 178
179 <span class="method-name">new</span><span class="method-args">(url)</span> 179 <span class="method-name">new</span><span class="method-args">(url)</span>
180 180
181 </a> 181 </a>
182 182
183 </div> 183 </div>
184 184
185 <div class="method-description"> 185 <div class="method-description">
186 186
187 <p> 187 <p>
188 <a href="WebDocument.html">WebDocument</a> constructor, the content of the 188 <a href="WebDocument.html">WebDocument</a> constructor, the content of the
189 <a href="Document.html">Document</a> is the HTML page without the tags. 189 <a href="Document.html">Document</a> is the HTML page without the tags.
190 </p> 190 </p>
191 191
192 </div> 192 </div>
193 </div> 193 </div>
194 194
195 195
196 196
197 </div> 197 </div>
198 198
199 199
200 200
201 201
202 </div> 202 </div>
203 203
204 <div id="validator-badges"> 204 <div id="validator-badges">
205 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 205 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
206 </div> 206 </div>
207 207
208 </body> 208 </body>
209 </html> 209 </html>
210 210
doc/classes/RIR/WebDocument.src/M000014.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>get_content (RIR::WebDocument)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head>
9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 112</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>)
12 <span class="ruby-identifier">require</span> <span class="ruby-value str">'net/http'</span>
13 <span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>(<span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span>(<span class="ruby-identifier">url</span>))
14 <span class="ruby-keyword kw">end</span></pre>
15 </body>
16 </html>
17
doc/classes/RIR/WebDocument.src/M000015.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>get_content (RIR::WebDocument)</title> 5 <title>new (RIR::WebDocument)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> 7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head> 8 </head>
9 <body class="standalone-code"> 9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 105</span> 10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 119</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>) 11 <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">url</span>)
12 <span class="ruby-identifier">require</span> <span class="ruby-value str">'net/http'</span> 12 <span class="ruby-ivar">@url</span> = <span class="ruby-identifier">url</span>
13 <span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>(<span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span>(<span class="ruby-identifier">url</span>)) 13 <span class="ruby-keyword kw">super</span> <span class="ruby-constant">WebDocument</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>).<span class="ruby-identifier">strip_javascripts</span>.<span class="ruby-identifier">strip_stylesheets</span>.<span class="ruby-identifier">strip_xml_tags</span>
14 <span class="ruby-keyword kw">end</span></pre> 14 <span class="ruby-keyword kw">end</span></pre>
15 </body> 15 </body>
16 </html> 16 </html>
17 17
doc/classes/RIR/WikipediaPage.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>Class: RIR::WikipediaPage [RDoc Documentation]</title> 5 <title>Class: RIR::WikipediaPage [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="classHeader"> 45 <div id="classHeader">
46 <table class="header-table"> 46 <table class="header-table">
47 <tr class="top-aligned-row"> 47 <tr class="top-aligned-row">
48 <td><strong>Class</strong></td> 48 <td><strong>Class</strong></td>
49 <td class="class-name-in-header">RIR::WikipediaPage</td> 49 <td class="class-name-in-header">RIR::WikipediaPage</td>
50 </tr> 50 </tr>
51 <tr class="top-aligned-row"> 51 <tr class="top-aligned-row">
52 <td><strong>In:</strong></td> 52 <td><strong>In:</strong></td>
53 <td> 53 <td>
54 54
55 55
56 <a href="../../files/lib/rir/document_rb.html"> 56 <a href="../../files/lib/rir/document_rb.html">
57 57
58 lib/rir/document.rb 58 lib/rir/document.rb
59 59
60 </a> 60 </a>
61 61
62 62
63 <br /> 63 <br />
64 64
65 </td> 65 </td>
66 </tr> 66 </tr>
67 67
68 68
69 <tr class="top-aligned-row"> 69 <tr class="top-aligned-row">
70 <td><strong>Parent:</strong></td> 70 <td><strong>Parent:</strong></td>
71 <td> 71 <td>
72 72
73 <a href="WebDocument.html"> 73 <a href="WebDocument.html">
74 74
75 RIR::WebDocument 75 RIR::WebDocument
76 76
77 </a> 77 </a>
78 78
79 </td> 79 </td>
80 </tr> 80 </tr>
81 81
82 </table> 82 </table>
83 </div> 83 </div>
84 <!-- banner header --> 84 <!-- banner header -->
85 85
86 <div id="bodyContent"> 86 <div id="bodyContent">
87 87
88 <div id="contextContent"> 88 <div id="contextContent">
89 89
90 <div id="description"> 90 <div id="description">
91 <p> 91 <p>
92 A <a href="WikipediaPage.html">WikipediaPage</a> is a <a 92 A <a href="WikipediaPage.html">WikipediaPage</a> is a <a
93 href="WebDocument.html">WebDocument</a>. 93 href="WebDocument.html">WebDocument</a>.
94 </p> 94 </p>
95 95
96 </div> 96 </div>
97 97
98 </div> 98 </div>
99 99
100 100
101 <div id="method-list"> 101 <div id="method-list">
102 <h3 class="section-bar">Methods</h3> 102 <h3 class="section-bar">Methods</h3>
103 103
104 <div class="name-list"> 104 <div class="name-list">
105 105
106 <a href="#M000027">get_url</a>&nbsp;&nbsp; 106 <a href="#M000017">get_url</a>&nbsp;&nbsp;
107 107
108 <a href="#M000028">search_homepage</a>&nbsp;&nbsp; 108 <a href="#M000018">search_homepage</a>&nbsp;&nbsp;
109 109
110 <a href="#M000026">search_wikipedia_titles</a>&nbsp;&nbsp; 110 <a href="#M000016">search_wikipedia_titles</a>&nbsp;&nbsp;
111 111
112 </div> 112 </div>
113 </div> 113 </div>
114 114
115 </div> 115 </div>
116 116
117 <!-- if includes --> 117 <!-- if includes -->
118 118
119 <div id="section"> 119 <div id="section">
120 120
121 121
122 122
123 123
124 <!-- if method_list --> 124 <!-- if method_list -->
125 125
126 <div id="methods"> 126 <div id="methods">
127 127
128 <h3 class="section-bar">Public Class methods</h3> 128 <h3 class="section-bar">Public Class methods</h3>
129 129
130 130
131 <div id="method-M000027" class="method-detail"> 131 <div id="method-M000017" class="method-detail">
132 <a name="M000027"></a> 132 <a name="M000017"></a>
133 133
134 <div class="method-heading"> 134 <div class="method-heading">
135 135
136 <a href="WikipediaPage.src/M000027.html" target="Code" class="method-signature" 136 <a href="WikipediaPage.src/M000017.html" target="Code" class="method-signature"
137 onclick="popupCode('WikipediaPage.src/M000027.html');return false;"> 137 onclick="popupCode('WikipediaPage.src/M000017.html');return false;">
138 138
139 <span class="method-name">get_url</span><span class="method-args">(name)</span> 139 <span class="method-name">get_url</span><span class="method-args">(name)</span>
140 140
141 </a> 141 </a>
142 142
143 </div> 143 </div>
144 144
145 <div class="method-description"> 145 <div class="method-description">
146 146
147 </div> 147 </div>
148 </div> 148 </div>
149 149
150 150
151 <div id="method-M000028" class="method-detail"> 151 <div id="method-M000018" class="method-detail">
152 <a name="M000028"></a> 152 <a name="M000018"></a>
153 153
154 <div class="method-heading"> 154 <div class="method-heading">
155 155
156 <a href="WikipediaPage.src/M000028.html" target="Code" class="method-signature" 156 <a href="WikipediaPage.src/M000018.html" target="Code" class="method-signature"
157 onclick="popupCode('WikipediaPage.src/M000028.html');return false;"> 157 onclick="popupCode('WikipediaPage.src/M000018.html');return false;">
158 158
159 <span class="method-name">search_homepage</span><span class="method-args">(name)</span> 159 <span class="method-name">search_homepage</span><span class="method-args">(name)</span>
160 160
161 </a> 161 </a>
162 162
163 </div> 163 </div>
164 164
165 <div class="method-description"> 165 <div class="method-description">
166 166
167 </div> 167 </div>
168 </div> 168 </div>
169 169
170 170
171 <div id="method-M000026" class="method-detail"> 171 <div id="method-M000016" class="method-detail">
172 <a name="M000026"></a> 172 <a name="M000016"></a>
173 173
174 <div class="method-heading"> 174 <div class="method-heading">
175 175
176 <a href="WikipediaPage.src/M000026.html" target="Code" class="method-signature" 176 <a href="WikipediaPage.src/M000016.html" target="Code" class="method-signature"
177 onclick="popupCode('WikipediaPage.src/M000026.html');return false;"> 177 onclick="popupCode('WikipediaPage.src/M000016.html');return false;">
178 178
179 <span class="method-name">search_wikipedia_titles</span><span class="method-args">(name)</span> 179 <span class="method-name">search_wikipedia_titles</span><span class="method-args">(name)</span>
180 180
181 </a> 181 </a>
182 182
183 </div> 183 </div>
184 184
185 <div class="method-description"> 185 <div class="method-description">
186 186
187 </div> 187 </div>
188 </div> 188 </div>
189 189
190 190
191 191
192 </div> 192 </div>
193 193
194 194
195 195
196 196
197 </div> 197 </div>
198 198
199 <div id="validator-badges"> 199 <div id="validator-badges">
200 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 200 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
201 </div> 201 </div>
202 202
203 </body> 203 </body>
204 </html> 204 </html>
205 205
doc/classes/RIR/WikipediaPage.src/M000016.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>search_wikipedia_titles (RIR::WikipediaPage)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head>
9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 132</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">search_wikipedia_titles</span>(<span class="ruby-identifier">name</span>)
12 <span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">&quot;Bad encoding&quot;</span>, <span class="ruby-identifier">name</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">name</span>.<span class="ruby-identifier">isutf8</span>
13
14 <span class="ruby-identifier">res</span> = <span class="ruby-constant">REXML</span><span class="ruby-operator">::</span><span class="ruby-constant">Document</span>.<span class="ruby-identifier">new</span>(<span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>( <span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span> <span class="ruby-node">&quot;http://en.wikipedia.org/w/api.php?action=query&amp;list=search&amp;srsearch=#{URI.escape name}&amp;format=xml&quot;</span> ).<span class="ruby-identifier">toutf8</span>).<span class="ruby-identifier">elements</span>[<span class="ruby-value str">'api/query/search'</span>]
15
16 <span class="ruby-identifier">res</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">e</span><span class="ruby-operator">|</span> <span class="ruby-identifier">e</span>.<span class="ruby-identifier">attributes</span>[<span class="ruby-value str">'title'</span>] } <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">res</span>.<span class="ruby-identifier">nil?</span>
17 <span class="ruby-keyword kw">end</span></pre>
18 </body>
19 </html>
20
doc/classes/RIR/WikipediaPage.src/M000017.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>get_url (RIR::WikipediaPage)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head>
9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 140</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_url</span>(<span class="ruby-identifier">name</span>)
12 <span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">&quot;Bad encoding&quot;</span>, <span class="ruby-identifier">name</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">name</span>.<span class="ruby-identifier">isutf8</span>
13
14 <span class="ruby-identifier">atts</span> = <span class="ruby-constant">REXML</span><span class="ruby-operator">::</span><span class="ruby-constant">Document</span>.<span class="ruby-identifier">new</span>(<span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>( <span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span> <span class="ruby-node">&quot;http://en.wikipedia.org/w/api.php?action=query&amp;titles=#{URI.escape name}&amp;inprop=url&amp;prop=info&amp;format=xml&quot;</span> ).<span class="ruby-identifier">toutf8</span>).<span class="ruby-identifier">elements</span>[<span class="ruby-value str">'api/query/pages/page'</span>].<span class="ruby-identifier">attributes</span>
15
16 <span class="ruby-identifier">atts</span>[<span class="ruby-value str">'fullurl'</span>] <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">atts</span>[<span class="ruby-value str">'missing'</span>].<span class="ruby-identifier">nil?</span>
17 <span class="ruby-keyword kw">end</span></pre>
18 </body>
19 </html>
20
doc/classes/RIR/WikipediaPage.src/M000018.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>search_homepage (RIR::WikipediaPage)</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 </head>
9 <body class="standalone-code">
10 <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 148</span>
11 <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">search_homepage</span>(<span class="ruby-identifier">name</span>)
12 <span class="ruby-identifier">title</span> = <span class="ruby-constant">WikipediaPage</span>.<span class="ruby-identifier">search_wikipedia_titles</span> <span class="ruby-identifier">name</span>
13
14 <span class="ruby-constant">WikipediaPage</span>.<span class="ruby-identifier">new</span>(<span class="ruby-constant">WikipediaPage</span>.<span class="ruby-identifier">get_url</span> <span class="ruby-identifier">title</span>[<span class="ruby-value">0</span>]) <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">title</span>.<span class="ruby-identifier">nil?</span> <span class="ruby-operator">||</span> <span class="ruby-identifier">title</span>.<span class="ruby-identifier">empty?</span>
15 <span class="ruby-keyword kw">end</span></pre>
16 </body>
17 </html>
18
1 Tue, 23 Nov 2010 18:20:46 +0100 1 Thu, 25 Nov 2010 17:01:52 +0100
2 2
doc/files/lib/rir/document_rb.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>File: document.rb [RDoc Documentation]</title> 5 <title>File: document.rb [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="fileHeader"> 45 <div id="fileHeader">
46 <h1>document.rb</h1> 46 <h1>document.rb</h1>
47 <table class="header-table"> 47 <table class="header-table">
48 <tr class="top-aligned-row"> 48 <tr class="top-aligned-row">
49 <td><strong>Path:</strong></td> 49 <td><strong>Path:</strong></td>
50 <td>lib/rir/document.rb 50 <td>lib/rir/document.rb
51 51
52 </td> 52 </td>
53 </tr> 53 </tr>
54 <tr class="top-aligned-row"> 54 <tr class="top-aligned-row">
55 <td><strong>Last Update:</strong></td> 55 <td><strong>Last Update:</strong></td>
56 <td>2010-11-23 18:14:13 +0100</td> 56 <td>2010-11-25 16:04:20 +0100</td>
57 </tr> 57 </tr>
58 </table> 58 </table>
59 </div> 59 </div>
60 <!-- banner header --> 60 <!-- banner header -->
61 61
62 <div id="bodyContent"> 62 <div id="bodyContent">
63 63
64 <div id="contextContent"> 64 <div id="contextContent">
65 65
66 <div id="description"> 66 <div id="description">
67 <p> 67 <p>
68 This file is a part of an Information Retrieval oriented Ruby library 68 This file is a part of an Information Retrieval oriented Ruby library
69 </p> 69 </p>
70 <p> 70 <p>
71 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> 71 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
72 </p> 72 </p>
73 <p> 73 <p>
74 This program is free software: you can redistribute it and/or modify it 74 This program is free software: you can redistribute it and/or modify it
75 under the terms of the GNU General Public License as published by the Free 75 under the terms of the GNU General Public License as published by the Free
76 Software Foundation, either version 3 of the License, or (at your option) 76 Software Foundation, either version 3 of the License, or (at your option)
77 any later version. 77 any later version.
78 </p> 78 </p>
79 <p> 79 <p>
80 This program is distributed in the hope that it will be useful, but WITHOUT 80 This program is distributed in the hope that it will be useful, but WITHOUT
81 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 81 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
82 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 82 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
83 more details. 83 more details.
84 </p> 84 </p>
85 <p> 85 <p>
86 You should have received a copy of the GNU General Public License along 86 You should have received a copy of the GNU General Public License along
87 with this program. If not, see <<a 87 with this program. If not, see <<a
88 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. 88 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
89 </p> 89 </p>
90 90
91 </div> 91 </div>
92 92
93 <div id="requires-list"> 93 <div id="requires-list">
94 <h3 class="section-bar">Required files</h3> 94 <h3 class="section-bar">Required files</h3>
95 95
96 <div class="name-list"> 96 <div class="name-list">
97 97
98 net/http&nbsp;&nbsp; 98 net/http&nbsp;&nbsp;
99 99
100 rexml/document&nbsp;&nbsp; 100 rexml/document&nbsp;&nbsp;
101 101
102 net/http&nbsp;&nbsp; 102 net/http&nbsp;&nbsp;
103 103
104 kconv&nbsp;&nbsp; 104 kconv&nbsp;&nbsp;
105 105
106 </div> 106 </div>
107 </div> 107 </div>
108 108
109 </div> 109 </div>
110 110
111 111
112 </div> 112 </div>
113 113
114 <!-- if includes --> 114 <!-- if includes -->
115 115
116 <div id="section"> 116 <div id="section">
117 117
118 118
119 119
120 120
121 <!-- if method_list --> 121 <!-- if method_list -->
122 122
123 123
124 124
125 125
126 </div> 126 </div>
127 127
128 <div id="validator-badges"> 128 <div id="validator-badges">
129 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 129 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
130 </div> 130 </div>
131 131
132 </body> 132 </body>
133 </html> 133 </html>
134 134
doc/files/lib/rir/query_rb.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>File: query.rb [RDoc Documentation]</title> 5 <title>File: query.rb [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="fileHeader"> 45 <div id="fileHeader">
46 <h1>query.rb</h1> 46 <h1>query.rb</h1>
47 <table class="header-table"> 47 <table class="header-table">
48 <tr class="top-aligned-row"> 48 <tr class="top-aligned-row">
49 <td><strong>Path:</strong></td> 49 <td><strong>Path:</strong></td>
50 <td>lib/rir/query.rb 50 <td>lib/rir/query.rb
51 51
52 </td> 52 </td>
53 </tr> 53 </tr>
54 <tr class="top-aligned-row"> 54 <tr class="top-aligned-row">
55 <td><strong>Last Update:</strong></td> 55 <td><strong>Last Update:</strong></td>
56 <td>2010-11-23 18:20:30 +0100</td> 56 <td>2010-11-25 13:25:18 +0100</td>
57 </tr> 57 </tr>
58 </table> 58 </table>
59 </div> 59 </div>
60 <!-- banner header --> 60 <!-- banner header -->
61 61
62 <div id="bodyContent"> 62 <div id="bodyContent">
63 63
64 <div id="contextContent"> 64 <div id="contextContent">
65 65
66 <div id="description"> 66 <div id="description">
67 <p> 67 <p>
68 This file is a part of an Information Retrieval oriented Ruby library 68 This file is a part of an Information Retrieval oriented Ruby library
69 </p> 69 </p>
70 <p> 70 <p>
71 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> 71 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
72 </p> 72 </p>
73 <p> 73 <p>
74 This program is free software: you can redistribute it and/or modify it 74 This program is free software: you can redistribute it and/or modify it
75 under the terms of the GNU General Public License as published by the Free 75 under the terms of the GNU General Public License as published by the Free
76 Software Foundation, either version 3 of the License, or (at your option) 76 Software Foundation, either version 3 of the License, or (at your option)
77 any later version. 77 any later version.
78 </p> 78 </p>
79 <p> 79 <p>
80 This program is distributed in the hope that it will be useful, but WITHOUT 80 This program is distributed in the hope that it will be useful, but WITHOUT
81 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 81 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
82 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 82 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
83 more details. 83 more details.
84 </p> 84 </p>
85 <p> 85 <p>
86 You should have received a copy of the GNU General Public License along 86 You should have received a copy of the GNU General Public License along
87 with this program. If not, see <<a 87 with this program. If not, see <<a
88 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. 88 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
89 </p> 89 </p>
90 90
91 </div> 91 </div>
92 92
93 </div> 93 </div>
94 94
95 95
96 </div> 96 </div>
97 97
98 <!-- if includes --> 98 <!-- if includes -->
99 99
100 <div id="section"> 100 <div id="section">
101 101
102 102
103 103
104 104
105 <!-- if method_list --> 105 <!-- if method_list -->
106 106
107 107
108 108
109 109
110 </div> 110 </div>
111 111
112 <div id="validator-badges"> 112 <div id="validator-badges">
113 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 113 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
114 </div> 114 </div>
115 115
116 </body> 116 </body>
117 </html> 117 </html>
118 118
doc/files/lib/rir/ttagger_rb.html
File was created 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head>
5 <title>File: ttagger.rb [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript">
10 // <![CDATA[
11
12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 }
15
16 function toggleCode( id ) {
17 if ( document.getElementById )
18 elem = document.getElementById( id );
19 else if ( document.all )
20 elem = eval( "document.all." + id );
21 else
22 return false;
23
24 elemStyle = elem.style;
25
26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block"
28 } else {
29 elemStyle.display = "none"
30 }
31
32 return true;
33 }
34
35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37
38 // ]]>
39 </script>
40
41 </head>
42 <body>
43
44
45 <div id="fileHeader">
46 <h1>ttagger.rb</h1>
47 <table class="header-table">
48 <tr class="top-aligned-row">
49 <td><strong>Path:</strong></td>
50 <td>lib/rir/ttagger.rb
51
52 </td>
53 </tr>
54 <tr class="top-aligned-row">
55 <td><strong>Last Update:</strong></td>
56 <td>2010-11-25 17:01:46 +0100</td>
57 </tr>
58 </table>
59 </div>
60 <!-- banner header -->
61
62 <div id="bodyContent">
63
64 <div id="contextContent">
65
66 <div id="description">
67 <p>
68 This file is a part of an Information Retrieval oriented Ruby library
69 </p>
70 <p>
71 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
72 </p>
73 <p>
74 This program is free software: you can redistribute it and/or modify it
75 under the terms of the GNU General Public License as published by the Free
76 Software Foundation, either version 3 of the License, or (at your option)
77 any later version.
78 </p>
79 <p>
80 This program is distributed in the hope that it will be useful, but WITHOUT
81 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
82 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
83 more details.
84 </p>
85 <p>
86 You should have received a copy of the GNU General Public License along
87 with this program. If not, see <<a
88 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
89 </p>
90 <hr size="1"></hr><p>
91 This file is a part of an Information Retrieval oriented Ruby library
92 </p>
93 <p>
94 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
95 </p>
96 <p>
97 This program is free software: you can redistribute it and/or modify it
98 under the terms of the GNU General Public License as published by the Free
99 Software Foundation, either version 3 of the License, or (at your option)
100 any later version.
101 </p>
102 <p>
103 This program is distributed in the hope that it will be useful, but WITHOUT
104 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
105 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
106 more details.
107 </p>
108 <p>
109 You should have received a copy of the GNU General Public License along
110 with this program. If not, see <<a
111 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
112 </p>
113 <hr size="1"></hr><p>
114 General module for many purposes related to Information Retrieval.
115 </p>
116
117 </div>
118
119 </div>
120
121
122 </div>
123
124 <!-- if includes -->
125
126 <div id="section">
127
128
129
130
131 <!-- if method_list -->
132
133
134
135
136 </div>
137
138 <div id="validator-badges">
139 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
140 </div>
141
142 </body>
143 </html>
144
doc/files/lib/rir_rb.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>File: rir.rb [RDoc Documentation]</title> 5 <title>File: rir.rb [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="fileHeader"> 45 <div id="fileHeader">
46 <h1>rir.rb</h1> 46 <h1>rir.rb</h1>
47 <table class="header-table"> 47 <table class="header-table">
48 <tr class="top-aligned-row"> 48 <tr class="top-aligned-row">
49 <td><strong>Path:</strong></td> 49 <td><strong>Path:</strong></td>
50 <td>lib/rir.rb 50 <td>lib/rir.rb
51 51
52 </td> 52 </td>
53 </tr> 53 </tr>
54 <tr class="top-aligned-row"> 54 <tr class="top-aligned-row">
55 <td><strong>Last Update:</strong></td> 55 <td><strong>Last Update:</strong></td>
56 <td>2010-11-19 11:27:16 +0100</td> 56 <td>2010-11-25 15:44:52 +0100</td>
57 </tr> 57 </tr>
58 </table> 58 </table>
59 </div> 59 </div>
60 <!-- banner header --> 60 <!-- banner header -->
61 61
62 <div id="bodyContent"> 62 <div id="bodyContent">
63 63
64 <div id="contextContent"> 64 <div id="contextContent">
65 65
66 <div id="requires-list"> 66 <div id="requires-list">
67 <h3 class="section-bar">Required files</h3> 67 <h3 class="section-bar">Required files</h3>
68 68
69 <div class="name-list"> 69 <div class="name-list">
70 70
71 rir/document&nbsp;&nbsp; 71 rir/document&nbsp;&nbsp;
72 72
73 rir/string&nbsp;&nbsp; 73 rir/string&nbsp;&nbsp;
74 74
75 rir/query&nbsp;&nbsp; 75 rir/query&nbsp;&nbsp;
76 76
77 rir/corpus&nbsp;&nbsp; 77 rir/corpus&nbsp;&nbsp;
78 78
79 rir/regexp&nbsp;&nbsp; 79 rir/regexp&nbsp;&nbsp;
80
81 rir/ttagger&nbsp;&nbsp;
80 82
81 </div> 83 </div>
82 </div> 84 </div>
83 85
84 </div> 86 </div>
85 87
86 88
87 </div> 89 </div>
88 90
89 <!-- if includes --> 91 <!-- if includes -->
90 92
91 <div id="section"> 93 <div id="section">
92 94
93 95
94 96
95 97
96 <!-- if method_list --> 98 <!-- if method_list -->
97 99
98 100
99 101
100 102
101 </div> 103 </div>
102 104
103 <div id="validator-badges"> 105 <div id="validator-badges">
104 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 106 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
105 </div> 107 </div>
106 108
107 </body> 109 </body>
108 </html> 110 </html>
109 111
doc/fr_class_index.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <!-- 4 <!--
5 5
6 Classes [RDoc Documentation] 6 Classes [RDoc Documentation]
7 7
8 --> 8 -->
9 <head> 9 <head>
10 <title>Classes [RDoc Documentation]</title> 10 <title>Classes [RDoc Documentation]</title>
11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
12 <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> 12 <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
13 <base target="docwin" /> 13 <base target="docwin" />
14 </head> 14 </head>
15 <body> 15 <body>
16 <div class="index"> 16 <div class="index">
17 <h1 class="section-bar">Classes</h1> 17 <h1 class="section-bar">Classes</h1>
18 <div id="index-entries"> 18 <div id="index-entries">
19 19
20 <a href="classes/RIR.html">RIR</a><br /> 20 <a href="classes/RIR.html">RIR</a><br />
21 21
22 <a href="classes/RIR/Corpus.html">RIR::Corpus</a><br /> 22 <a href="classes/RIR/Document.html">RIR::Document</a><br />
23 23
24 <a href="classes/RIR/Indri.html">RIR::Indri</a><br /> 24 <a href="classes/RIR/Indri.html">RIR::Indri</a><br />
25 25
26 <a href="classes/RIR/Indri/IndriQuery.html">RIR::Indri::IndriQuery</a><br /> 26 <a href="classes/RIR/Indri/IndriQuery.html">RIR::Indri::IndriQuery</a><br />
27 27
28 <a href="classes/RIR/Indri/Parameters.html">RIR::Indri::Parameters</a><br /> 28 <a href="classes/RIR/Indri/Parameters.html">RIR::Indri::Parameters</a><br />
29 29
30 <a href="classes/RIR/Query.html">RIR::Query</a><br /> 30 <a href="classes/RIR/Query.html">RIR::Query</a><br />
31 31
32 <a href="classes/String.html">String</a><br /> 32 <a href="classes/RIR/TreeTagger.html">RIR::TreeTagger</a><br />
33
34 <a href="classes/RIR/TreeTagger/Chunk.html">RIR::TreeTagger::Chunk</a><br />
35
36 <a href="classes/RIR/TreeTagger/TaggerChunker.html">RIR::TreeTagger::TaggerChunker</a><br />
37
38 <a href="classes/RIR/TreeTagger/TaggerChunkerEnglish.html">RIR::TreeTagger::TaggerChunkerEnglish</a><br />
39
40 <a href="classes/RIR/TreeTagger/TaggerChunkerFrench.html">RIR::TreeTagger::TaggerChunkerFrench</a><br />
41
42 <a href="classes/RIR/TreeTagger/TaggerChunkerGerman.html">RIR::TreeTagger::TaggerChunkerGerman</a><br />
43
44 <a href="classes/RIR/WebDocument.html">RIR::WebDocument</a><br />
45
46 <a href="classes/RIR/WikipediaPage.html">RIR::WikipediaPage</a><br />
33 47
34 </div> 48 </div>
35 </div> 49 </div>
36 </body> 50 </body>
37 </html> 51 </html>
38 52
doc/fr_file_index.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <!-- 4 <!--
5 5
6 Files [RDoc Documentation] 6 Files [RDoc Documentation]
7 7
8 --> 8 -->
9 <head> 9 <head>
10 <title>Files [RDoc Documentation]</title> 10 <title>Files [RDoc Documentation]</title>
11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
12 <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> 12 <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
13 <base target="docwin" /> 13 <base target="docwin" />
14 </head> 14 </head>
15 <body> 15 <body>
16 <div class="index"> 16 <div class="index">
17 <h1 class="section-bar">Files</h1> 17 <h1 class="section-bar">Files</h1>
18 <div id="index-entries"> 18 <div id="index-entries">
19 19
20 <a href="files/lib/rir/corpus_rb.html">lib/rir/corpus.rb</a><br /> 20 <a href="files/lib/rir_rb.html">lib/rir.rb</a><br />
21 21
22 <a href="files/lib/rir/document_rb.html">lib/rir/document.rb</a><br />
23
22 <a href="files/lib/rir/query_rb.html">lib/rir/query.rb</a><br /> 24 <a href="files/lib/rir/query_rb.html">lib/rir/query.rb</a><br />
23 25
24 <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br /> 26 <a href="files/lib/rir/ttagger_rb.html">lib/rir/ttagger.rb</a><br />
25 27
26 </div> 28 </div>
27 </div> 29 </div>
28 </body> 30 </body>
29 </html> 31 </html>
30 32
doc/fr_method_index.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <!-- 4 <!--
5 5
6 Methods [RDoc Documentation] 6 Methods [RDoc Documentation]
7 7
8 --> 8 -->
9 <head> 9 <head>
10 <title>Methods [RDoc Documentation]</title> 10 <title>Methods [RDoc Documentation]</title>
11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
12 <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> 12 <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
13 <base target="docwin" /> 13 <base target="docwin" />
14 </head> 14 </head>
15 <body> 15 <body>
16 <div class="index"> 16 <div class="index">
17 <h1 class="section-bar">Methods</h1> 17 <h1 class="section-bar">Methods</h1>
18 <div id="index-entries"> 18 <div id="index-entries">
19 19
20 <a href="classes/String.html#M000011">extract_xmltags_values (String)</a><br /> 20 <a href="classes/RIR/Document.html#M000010">count_words (RIR::Document)</a><br />
21 21
22 <a href="classes/RIR/Corpus.html#M000017">files (RIR::Corpus)</a><br /> 22 <a href="classes/RIR/Document.html#M000011">entropy (RIR::Document)</a><br />
23 23
24 <a href="classes/String.html#M000001">is_stopword? (String)</a><br /> 24 <a href="classes/RIR/Document.html#M000008">format_words (RIR::Document)</a><br />
25 25
26 <a href="classes/RIR/Corpus.html#M000016">new (RIR::Corpus)</a><br /> 26 <a href="classes/RIR/WebDocument.html#M000014">get_content (RIR::WebDocument)</a><br />
27 27
28 <a href="classes/RIR/Indri/Parameters.html#M000012">new (RIR::Indri::Parameters)</a><br /> 28 <a href="classes/RIR/WikipediaPage.html#M000017">get_url (RIR::WikipediaPage)</a><br />
29 29
30 <a href="classes/RIR/Indri/IndriQuery.html#M000014">new (RIR::Indri::IndriQuery)</a><br /> 30 <a href="classes/RIR/WebDocument.html#M000015">new (RIR::WebDocument)</a><br />
31 31
32 <a href="classes/String.html#M000002">remove_special_characters (String)</a><br /> 32 <a href="classes/RIR/Indri/IndriQuery.html#M000006">new (RIR::Indri::IndriQuery)</a><br />
33 33
34 <a href="classes/String.html#M000006">strip_javascripts (String)</a><br /> 34 <a href="classes/RIR/Indri/Parameters.html#M000004">new (RIR::Indri::Parameters)</a><br />
35 35
36 <a href="classes/String.html#M000005">strip_javascripts! (String)</a><br /> 36 <a href="classes/RIR/Document.html#M000013">new (RIR::Document)</a><br />
37 37
38 <a href="classes/String.html#M000010">strip_punctuation (String)</a><br /> 38 <a href="classes/RIR/TreeTagger/TaggerChunker.html#M000002">new (RIR::TreeTagger::TaggerChunker)</a><br />
39 39
40 <a href="classes/String.html#M000009">strip_punctuation! (String)</a><br /> 40 <a href="classes/RIR/TreeTagger/Chunk.html#M000003">new (RIR::TreeTagger::Chunk)</a><br />
41 41
42 <a href="classes/String.html#M000008">strip_stylesheets (String)</a><br /> 42 <a href="classes/RIR/Document.html#M000009">ngrams (RIR::Document)</a><br />
43 43
44 <a href="classes/String.html#M000007">strip_stylesheets! (String)</a><br /> 44 <a href="classes/RIR/TreeTagger/TaggerChunker.html#M000001">parse (RIR::TreeTagger::TaggerChunker)</a><br />
45 45
46 <a href="classes/String.html#M000004">strip_xml_tags (String)</a><br /> 46 <a href="classes/RIR/WikipediaPage.html#M000018">search_homepage (RIR::WikipediaPage)</a><br />
47 47
48 <a href="classes/String.html#M000003">strip_xml_tags! (String)</a><br /> 48 <a href="classes/RIR/WikipediaPage.html#M000016">search_wikipedia_titles (RIR::WikipediaPage)</a><br />
49 49
50 <a href="classes/RIR/Indri/Parameters.html#M000013">to_s (RIR::Indri::Parameters)</a><br /> 50 <a href="classes/RIR/Document.html#M000012">tf (RIR::Document)</a><br />
51 51
52 <a href="classes/RIR/Indri/IndriQuery.html#M000015">to_s (RIR::Indri::IndriQuery)</a><br /> 52 <a href="classes/RIR/Indri/Parameters.html#M000005">to_s (RIR::Indri::Parameters)</a><br />
53
54 <a href="classes/RIR/Indri/IndriQuery.html#M000007">to_s (RIR::Indri::IndriQuery)</a><br />
53 55
54 </div> 56 </div>
55 </div> 57 </div>
56 </body> 58 </body>
57 </html> 59 </html>
58 60
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <!-- 4 <!--
5 5
6 RDoc Documentation 6 RDoc Documentation
7 7
8 --> 8 -->
9 <head> 9 <head>
10 <title>RDoc Documentation</title> 10 <title>RDoc Documentation</title>
11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
12 </head> 12 </head>
13 <frameset rows="20%, 80%"> 13 <frameset rows="20%, 80%">
14 <frameset cols="25%,35%,45%"> 14 <frameset cols="25%,35%,45%">
15 <frame src="fr_file_index.html" title="Files" name="Files" /> 15 <frame src="fr_file_index.html" title="Files" name="Files" />
16 <frame src="fr_class_index.html" name="Classes" /> 16 <frame src="fr_class_index.html" name="Classes" />
17 <frame src="fr_method_index.html" name="Methods" /> 17 <frame src="fr_method_index.html" name="Methods" />
18 </frameset> 18 </frameset>
19 <frame src="files/lib/rir/corpus_rb.html" name="docwin" /> 19 <frame src="files/lib/rir_rb.html" name="docwin" />
20 </frameset> 20 </frameset>
21 </html> 21 </html>
22 22
1 #!/usr/bin/env ruby 1 #!/usr/bin/env ruby
2 2
3 require 'rir/document' 3 require 'rir/document'
4 require 'rir/string' 4 require 'rir/string'
5 require 'rir/query' 5 require 'rir/query'
6 require 'rir/corpus' 6 require 'rir/corpus'
7 require 'rir/regexp' 7 require 'rir/regexp'
8 require 'rir/ttagger'
8 9
1 #!/usr/bin/env ruby 1 #!/usr/bin/env ruby
2 2
3 # This file is a part of an Information Retrieval oriented Ruby library 3 # This file is a part of an Information Retrieval oriented Ruby library
4 # 4 #
5 # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> 5 # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
6 # 6 #
7 # This program is free software: you can redistribute it and/or modify 7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by 8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or 9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version. 10 # (at your option) any later version.
11 # 11 #
12 # This program is distributed in the hope that it will be useful, 12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details. 15 # GNU General Public License for more details.
16 # 16 #
17 # You should have received a copy of the GNU General Public License 17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19
20 # General module for many purposes related to Information Retrieval. 20 # General module for many purposes related to Information Retrieval.
21 module RIR 21 module RIR
22 22
23 # A Document is a bag of words and is constructed from a string. 23 # A Document is a bag of words and is constructed from a string.
24 class Document 24 class Document
25 attr_reader :words, :doc_content 25 attr_reader :words, :doc_content
26 26
27 # Any non-word characters are removed from the words (see http://perldoc.perl.org/perlre.html 27 # Any non-word characters are removed from the words (see http://perldoc.perl.org/perlre.html
28 # and the \\W special escape). 28 # and the \\W special escape).
29 # 29 #
30 # Protected function, only meant to by called at the initialization. 30 # Protected function, only meant to by called at the initialization.
31 def format_words 31 def format_words
32 wo = [] 32 wo = []
33 33
34 @doc_content.split.each do |w| 34 @doc_content.split.each do |w|
35 w.split(/\W/).each do |sw| 35 w.split(/\W/).each do |sw|
36 wo.push(sw.downcase) if sw =~ /[a-zA-Z]/ 36 wo.push(sw.downcase) if sw =~ /[a-zA-Z]/
37 end 37 end
38 end 38 end
39 39
40 wo 40 wo
41 end 41 end
42 42
43 # Returns an Array containing the +n+-grams (words) from the current Document. 43 # Returns an Array containing the +n+-grams (words) from the current Document.
44 # 44 #
45 # ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...] 45 # ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...]
46 def ngrams(n) 46 def ngrams(n)
47 window = [] 47 window = []
48 ngrams_array = [] 48 ngrams_array = []
49 49
50 @words.each do |w| 50 @words.each do |w|
51 window.push(w) 51 window.push(w)
52 if window.size == n 52 if window.size == n
53 ngrams_array.push window.join(" ") 53 ngrams_array.push window.join(" ")
54 window.delete_at(0) 54 window.delete_at(0)
55 end 55 end
56 end 56 end
57 57
58 ngrams_array.uniq 58 ngrams_array.uniq
59 end 59 end
60 60
61 # Returns a Hash containing the words and their associated counts in the current Document. 61 # Returns a Hash containing the words and their associated counts in the current Document.
62 # 62 #
63 # count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } 63 # count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... }
64 def count_words 64 def count_words
65 counts = Hash.new { |h,k| h[k] = 0 } 65 counts = Hash.new { |h,k| h[k] = 0 }
66 @words.each { |w| counts[w] += 1 } 66 @words.each { |w| counts[w] += 1 }
67 67
68 counts 68 counts
69 end 69 end
70 70
71 # Computes the entropy of a given string +s+ inside the document. 71 # Computes the entropy of a given string +s+ inside the document.
72 # 72 #
73 # If the string parameter is composed of many words (i.e. tokens separated 73 # If the string parameter is composed of many words (i.e. tokens separated
74 # by whitespace(s)), it is considered as an ngram. 74 # by whitespace(s)), it is considered as an ngram.
75 # 75 #
76 # entropy("guitar") #=> 0.00389919463243839 76 # entropy("guitar") #=> 0.00432114812727959
77 # entropy("dillinger escape plan") #=> 0.265862076325102
77 def entropy(s) 78 def entropy(s)
78 en = 0.0 79 en = 0.0
79 counts = self.count_words 80 counts = self.count_words
80 81
81 s.split.each do |w| 82 s.split.each do |w|
82 p_wi = counts[w].to_f/@words.count.to_f 83 p_wi = counts[w].to_f/@words.count.to_f
83 en += p_wi*Math.log2(p_wi) 84 en += p_wi*Math.log2(p_wi)
84 end 85 end
85 86
86 en *= -1 87 en *= -1
87 en 88 en
88 end 89 end
89 90
91 # Computes the term frequency of a given *word* +s+.
92 #
93 # tf("guitar") #=> 0.000380372765310004
94 def tf(s)
95 self.count_words[s].to_f/@words.size.to_f
96 end
90 97
91 98
92 def initialize(content) 99 def initialize(content)
93 @doc_content = content 100 @doc_content = content
94 @words = format_words 101 @words = format_words
95 end 102 end
96 103
97 protected :format_words 104 protected :format_words
98 end 105 end
99 106
100 # A WebDocument is a Document with a +url+. 107 # A WebDocument is a Document with a +url+.
101 class WebDocument < Document 108 class WebDocument < Document
102 attr_reader :url 109 attr_reader :url
103 110
104 # Returns the HTML text from the page of a given +url+. 111 # Returns the HTML text from the page of a given +url+.
105 def self.get_content(url) 112 def self.get_content(url)
106 require 'net/http' 113 require 'net/http'
107 Net::HTTP.get(URI.parse(url)) 114 Net::HTTP.get(URI.parse(url))
108 end 115 end
109 116
110 # WebDocument constructor, the content of the Document is the HTML page 117 # WebDocument constructor, the content of the Document is the HTML page
111 # without the tags. 118 # without the tags.
112 def initialize(url) 119 def initialize(url)
113 @url = url 120 @url = url
114 super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags 121 super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags
115 end 122 end
116 end 123 end
117 124
118 # A WikipediaPage is a WebDocument. 125 # A WikipediaPage is a WebDocument.
119 class WikipediaPage < WebDocument 126 class WikipediaPage < WebDocument
120 require 'rexml/document' 127 require 'rexml/document'
121 require 'net/http' 128 require 'net/http'
122 require 'kconv' 129 require 'kconv'
123 130
124 131
125 def self.search_wikipedia_titles(name) 132 def self.search_wikipedia_titles(name)
126 res = REXML::Document.new(Net::HTTP.get(URI.parse("http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&format=xml")).toutf8).elements['api/query/search'] 133 raise ArgumentError, "Bad encoding", name unless name.isutf8
127 134
135 res = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&format=xml" ).toutf8).elements['api/query/search']
136
128 res.collect { |e| e.attributes['title'] } unless res.nil? 137 res.collect { |e| e.attributes['title'] } unless res.nil?
129 end 138 end
130 139
131 def self.get_url(name) 140 def self.get_url(name)
132 atts = REXML::Document.new(Net::HTTP.get(URI.parse("http://en.wikipedia.org/w/api.php?action=query&titles=#{URI.escape name}&inprop=url&prop=info&format=xml")).toutf8).elements['api/query/pages/page'].attributes 141 raise ArgumentError, "Bad encoding", name unless name.isutf8
133 142
143 atts = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&titles=#{URI.escape name}&inprop=url&prop=info&format=xml" ).toutf8).elements['api/query/pages/page'].attributes
144
134 atts['fullurl'] if atts['missing'].nil? 145 atts['fullurl'] if atts['missing'].nil?
135 end 146 end
136 147
137 def self.search_homepage(name) 148 def self.search_homepage(name)
138 title = WikipediaPage.search_wikipedia_titles name 149 title = WikipediaPage.search_wikipedia_titles name
139 150
140 begin 151 WikipediaPage.new(WikipediaPage.get_url title[0]) unless title.nil? || title.empty?
141 WikipediaPage.new(WikipediaPage.get_url title[0]) unless title.nil? || title.empty?
142 rescue
143 puts title[0]
144 end
145 end 152 end
146 153
147 # def initialize(name) 154 # def initialize(name)
148 # title = WikipediaPage.search_wikipedia_titles name 155 # title = WikipediaPage.search_wikipedia_titles name
149 # raise ArgumentError, "No page found" if title.empty? 156 # raise ArgumentError, "No page found" if title.empty?
150 # super WikipediaPage.get_url title[0] 157 # super WikipediaPage.get_url title[0]
1 #!/usr/bin/env ruby 1 #!/usr/bin/env ruby
2 2
3 # This file is a part of an Information Retrieval oriented Ruby library 3 # This file is a part of an Information Retrieval oriented Ruby library
4 # 4 #
5 # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> 5 # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
6 # 6 #
7 # This program is free software: you can redistribute it and/or modify 7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by 8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or 9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version. 10 # (at your option) any later version.
11 # 11 #
12 # This program is distributed in the hope that it will be useful, 12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details. 15 # GNU General Public License for more details.
16 # 16 #
17 # You should have received a copy of the GNU General Public License 17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19
20 module RIR 20 module RIR
21 21
22 class Query 22 class Query
23 end 23 end
24 24
25 module Indri 25 module Indri
26 26
27 class Parameters 27 class Parameters
28 attr_accessor :corpus, :memory, :count, :offset, :run_id, :print_query, :print_docs, :rule, :baseline 28 attr_accessor :index_path, :memory, :count, :offset, :run_id, :print_query, :print_docs, :rule, :baseline
29 29
30 def initialize(corpus,mem="1g",count="1000",offset="1",run_id="default",print_query=false,print_docs=false) 30 def initialize(corpus,mem="1g",count="1000",offset="1",run_id="default",print_query=false,print_docs=false)
31 @corpus = corpus 31 @index_path = corpus
32 @memory = mem 32 @memory = mem
33 @count = count 33 @count = count
34 @offset = offset 34 @offset = offset
35 @run_id = run_id 35 @run_id = run_id
36 @print_query = print_query ? "true" : "false" 36 @print_query = print_query ? "true" : "false"
37 @print_docs = print_docs ? "true" : "false" 37 @print_docs = print_docs ? "true" : "false"
38 end 38 end
39 39
40 def to_s 40 def to_s
41 h = "<parameters>\n" 41 h = "<parameters>\n"
42 h += "<memory>#{@memory}</memory>\n" 42 h += "<memory>#{@memory}</memory>\n"
43 h += "<index>#{@corpus}</index>\n" 43 h += "<index>#{@index_path}</index>\n"
44 h += "<count>#{@count}</count>\n" 44 h += "<count>#{@count}</count>\n"
45 unless @baseline.nil? 45 unless @baseline.nil?
46 h += "<baseline>#{@baseline}</baseline>\n" 46 h += "<baseline>#{@baseline}</baseline>\n"
47 else 47 else
48 h += "<rule>#{@rule}</rule>\n" 48 h += "<rule>#{@rule}</rule>\n"
49 end 49 end
50 h += "<queryOffset>#{@offset}</queryOffset>\n" 50 h += "<queryOffset>#{@offset}</queryOffset>\n"
51 h += "<runID>#{@run_id}</runID>\n" 51 h += "<runID>#{@run_id}</runID>\n"
52 h += "<printQuery>#{@print_query}</printQuery>\n" 52 h += "<printQuery>#{@print_query}</printQuery>\n"
53 h += "<printDocuments>#{@print_docs}</printDocuments>\n" 53 h += "<printDocuments>#{@print_docs}</printDocuments>\n"
54 54
55 h 55 h
56 end 56 end
57 end 57 end
58 58
59 class IndriQuery < Query 59 class IndriQuery < Query
60 attr_accessor :id, :query, :params, :rule 60 attr_accessor :id, :query, :params, :rule
61 61
62 def initialize(id,query,params) 62 def initialize(id,query,params)
63 @params = params 63 @params = params
64 # Here we set the default retrieval model as Language Modeling 64 # Here we set the default retrieval model as Language Modeling
65 # with a Dirichlet smoothing at 2500. 65 # with a Dirichlet smoothing at 2500.
66 # TODO: maybe a Rule class... 66 # TODO: maybe a Rule class...
67 @params.rule = 'method:dirichlet,mu:2500' if @params.rule.nil? 67 @params.rule = 'method:dirichlet,mu:2500' if @params.rule.nil?
68 68
69 @id = id 69 @id = id
70 @query = query 70 @query = query
71 end 71 end
72 72
73 def to_s 73 def to_s
74 h = @params.to_s 74 h = @params.to_s
75 h += "<query>\n" 75 h += "<query>\n"
76 h += "<number>#{@id}</number>\n" 76 h += "<number>#{@id}</number>\n"
77 h += "<text>#{@query}</text>\n" 77 h += "<text>#{@query}</text>\n"
78 h += "</query>\n" 78 h += "</query>\n"
79 h += "</parameters>" 79 h += "</parameters>"
80 80
81 h 81 h
82 end 82 end
83 end 83 end
84 84
85 end 85 end
86 end 86 end
87 87
File was created 1 #!/usr/bin/env ruby
2
3 # This file is a part of an Information Retrieval oriented Ruby library
4 #
5 # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 module RIR
21
22 # TreeTagger-related stuff module.
23 #
24 # See http://www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/DecisionTreeTagger.html
25 module TreeTagger
26
27 # This class handles generic parsing of tagger-chunker outputs.
28 class TaggerChunker
29 attr_reader :chunks, :file
30
31
32 # Parses a tagger-chunker output and returns an Array of Chunk.
33 def self.parse chunk_lines
34 open = false
35 tag = nil
36
37 chunks = []
38 words = []
39
40 chunk_lines.each do |l|
41 l.chomp!
42 if l =~ /^<\w+>$/
43 open = true
44 tag = l
45 elsif l =~ /^<\/\w+>$/
46 if !words.empty? && open && l == tag.sub(/</, '</')
47 open = false
48 chunks.push Chunk.new(words.join(" "), tag)
49 words.clear
50 else
51 next
52 end
53 else
54 words.push(l.split.first)
55 end
56 end
57
58 chunks
59 end
60
61 # Initializes parsing. +chunk_file+ is the output of +tagger-chunker-+ and must
62 # be a valid path to the file.
63 #
64 # TaggerChunker.new("ttout/2010020") #=> #<RIR::TreeTagger::TaggerChunker:0x92fd088 @chunks=[#<RIR::TreeTagger::Chunk:0x8ec5a10 @words=["robert", "schumann"], @tag="NC">, ...] ...>
65 def initialize chunk_file
66 @chunks = TaggerChunker.parse File.open(chunk_file).readlines
67 end
68
69 end
70
71 class TaggerChunkerEnglish < TaggerChunker
72 end
73
74 class TaggerChunkerFrench < TaggerChunker
75 end
76
77 class TaggerChunkerGerman < TaggerChunker
78 end
79
80 # Represents a Chunk extracted when parsing a TaggerChunker file.
81 class Chunk
82 attr_reader :words, :tag
83
84 # +str+ are whitespace-separated terms.
85 # +tag+ see : ftp://ftp.ims.uni-stuttgart.de/pub/corpora/chunker-tagset-english.txt
86 def initialize str,tag
87 @words = str.split
88 @tag = tag[1..-2]
89 end
90 end
91
92 end
93 end
94
1 $LOAD_PATH.unshift File.expand_path(File.join(File.dirname(__FILE__), "lib")) 1 $LOAD_PATH.unshift File.expand_path(File.join(File.dirname(__FILE__), "lib"))
2 2
3 require 'rir' 3 require 'rir'
4 4
5 w = RIR::WikipediaPage.new("http://en.wikipedia.org/wiki/The_Dillinger_Escape_Plan") 5 w = RIR::WikipediaPage.new("http://en.wikipedia.org/wiki/The_Dillinger_Escape_Plan")
6 p w.entropy("guitar") 6 p w.entropy("dillinger escape plan")
7 p w.tf("guitar")
7 8