Commit fd4cb285a4975c9a0b6624e93a42eb9fa812fee4

Authored by Romain Deveaud
1 parent b843bae6b0
Exists in master

doc changes + document and string improvements

Showing 14 changed files with 63 additions and 26 deletions Inline Diff

doc/classes/String.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>Class: String [RDoc Documentation]</title> 5 <title>Class: String [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="classHeader"> 45 <div id="classHeader">
46 <table class="header-table"> 46 <table class="header-table">
47 <tr class="top-aligned-row"> 47 <tr class="top-aligned-row">
48 <td><strong>Class</strong></td> 48 <td><strong>Class</strong></td>
49 <td class="class-name-in-header">String</td> 49 <td class="class-name-in-header">String</td>
50 </tr> 50 </tr>
51 <tr class="top-aligned-row"> 51 <tr class="top-aligned-row">
52 <td><strong>In:</strong></td> 52 <td><strong>In:</strong></td>
53 <td> 53 <td>
54 54
55 55
56 <a href="../files/lib/rir/string_rb.html"> 56 <a href="../files/lib/rir/string_rb.html">
57 57
58 lib/rir/string.rb 58 lib/rir/string.rb
59 59
60 </a> 60 </a>
61 61
62 62
63 <br /> 63 <br />
64 64
65 </td> 65 </td>
66 </tr> 66 </tr>
67 67
68 68
69 <tr class="top-aligned-row"> 69 <tr class="top-aligned-row">
70 <td><strong>Parent:</strong></td> 70 <td><strong>Parent:</strong></td>
71 <td> 71 <td>
72 72
73 Object 73 Object
74 74
75 </td> 75 </td>
76 </tr> 76 </tr>
77 77
78 </table> 78 </table>
79 </div> 79 </div>
80 <!-- banner header --> 80 <!-- banner header -->
81 81
82 <div id="bodyContent"> 82 <div id="bodyContent">
83 83
84 <div id="contextContent"> 84 <div id="contextContent">
85 85
86 <div id="description"> 86 <div id="description">
87 <p> 87 <p>
88 Extention of the standard class <a href="String.html">String</a> with 88 Extention of the standard class <a href="String.html">String</a> with
89 useful function. 89 useful function.
90 </p> 90 </p>
91 91
92 </div> 92 </div>
93 93
94 </div> 94 </div>
95 95
96 96
97 <div id="method-list"> 97 <div id="method-list">
98 <h3 class="section-bar">Methods</h3> 98 <h3 class="section-bar">Methods</h3>
99 99
100 <div class="name-list"> 100 <div class="name-list">
101 101
102 <a href="#M000009">extract_xmltags_values</a>&nbsp;&nbsp; 102 <a href="#M000009">extract_xmltags_values</a>&nbsp;&nbsp;
103 103
104 <a href="#M000001">is_stopword?</a>&nbsp;&nbsp; 104 <a href="#M000001">is_stopword?</a>&nbsp;&nbsp;
105 105
106 <a href="#M000002">remove_special_characters</a>&nbsp;&nbsp; 106 <a href="#M000002">remove_special_characters</a>&nbsp;&nbsp;
107 107
108 <a href="#M000006">strip_javascripts</a>&nbsp;&nbsp; 108 <a href="#M000006">strip_javascripts</a>&nbsp;&nbsp;
109 109
110 <a href="#M000005">strip_javascripts!</a>&nbsp;&nbsp; 110 <a href="#M000005">strip_javascripts!</a>&nbsp;&nbsp;
111 111
112 <a href="#M000008">strip_stylesheets</a>&nbsp;&nbsp; 112 <a href="#M000008">strip_stylesheets</a>&nbsp;&nbsp;
113 113
114 <a href="#M000007">strip_stylesheets!</a>&nbsp;&nbsp; 114 <a href="#M000007">strip_stylesheets!</a>&nbsp;&nbsp;
115 115
116 <a href="#M000004">strip_xml_tags</a>&nbsp;&nbsp; 116 <a href="#M000004">strip_xml_tags</a>&nbsp;&nbsp;
117 117
118 <a href="#M000003">strip_xml_tags!</a>&nbsp;&nbsp; 118 <a href="#M000003">strip_xml_tags!</a>&nbsp;&nbsp;
119 119
120 </div> 120 </div>
121 </div> 121 </div>
122 122
123 </div> 123 </div>
124 124
125 <!-- if includes --> 125 <!-- if includes -->
126 126
127 <div id="includes"> 127 <div id="includes">
128 <h3 class="section-bar">Included Modules</h3> 128 <h3 class="section-bar">Included Modules</h3>
129 129
130 <div id="includes-list"> 130 <div id="includes-list">
131 131
132 <span class="include-name"><a href="Rir.html">Rir</a></span> 132 <span class="include-name"><a href="RIR.html">RIR</a></span>
133 133
134 </div> 134 </div>
135 </div> 135 </div>
136 136
137 <div id="section"> 137 <div id="section">
138 138
139 139
140 140
141 141
142 <!-- if method_list --> 142 <!-- if method_list -->
143 143
144 <div id="methods"> 144 <div id="methods">
145 145
146 <h3 class="section-bar">Public Instance methods</h3> 146 <h3 class="section-bar">Public Instance methods</h3>
147 147
148 148
149 <div id="method-M000009" class="method-detail"> 149 <div id="method-M000009" class="method-detail">
150 <a name="M000009"></a> 150 <a name="M000009"></a>
151 151
152 <div class="method-heading"> 152 <div class="method-heading">
153 153
154 <a href="String.src/M000009.html" target="Code" class="method-signature" 154 <a href="String.src/M000009.html" target="Code" class="method-signature"
155 onclick="popupCode('String.src/M000009.html');return false;"> 155 onclick="popupCode('String.src/M000009.html');return false;">
156 156
157 <span class="method-name">extract_xmltags_values</span><span class="method-args">(tag_name)</span> 157 <span class="method-name">extract_xmltags_values</span><span class="method-args">(tag_name)</span>
158 158
159 </a> 159 </a>
160 160
161 </div> 161 </div>
162 162
163 <div class="method-description"> 163 <div class="method-description">
164 164
165 <p> 165 <p>
166 Returns the text values inside all occurences of a XML tag in <tt>self</tt> 166 Returns the text values inside all occurences of a XML tag in <tt>self</tt>
167 </p> 167 </p>
168 <pre> 168 <pre>
169 s = &quot;four-piece in &lt;a href='#'&gt;Indianapolis&lt;/a&gt;, &lt;a href='#'&gt;Indiana&lt;/a&gt; at the Murat Theatre&quot; 169 s = &quot;four-piece in &lt;a href='#'&gt;Indianapolis&lt;/a&gt;, &lt;a href='#'&gt;Indiana&lt;/a&gt; at the Murat Theatre&quot;
170 s.extract_xmltags_values 'a' #=&gt; [&quot;Indianapolis&quot;, &quot;Indiana&quot;] 170 s.extract_xmltags_values 'a' #=&gt; [&quot;Indianapolis&quot;, &quot;Indiana&quot;]
171 </pre> 171 </pre>
172 172
173 </div> 173 </div>
174 </div> 174 </div>
175 175
176 176
177 <div id="method-M000001" class="method-detail"> 177 <div id="method-M000001" class="method-detail">
178 <a name="M000001"></a> 178 <a name="M000001"></a>
179 179
180 <div class="method-heading"> 180 <div class="method-heading">
181 181
182 <a href="String.src/M000001.html" target="Code" class="method-signature" 182 <a href="String.src/M000001.html" target="Code" class="method-signature"
183 onclick="popupCode('String.src/M000001.html');return false;"> 183 onclick="popupCode('String.src/M000001.html');return false;">
184 184
185 <span class="method-name">is_stopword?</span><span class="method-args">()</span> 185 <span class="method-name">is_stopword?</span><span class="method-args">()</span>
186 186
187 </a> 187 </a>
188 188
189 </div> 189 </div>
190 190
191 <div class="method-description"> 191 <div class="method-description">
192 192
193 <p> 193 <p>
194 Returns <tt>true</tt> if <tt>self</tt> belongs to Rir::Stoplist, 194 Returns <tt>true</tt> if <tt>self</tt> belongs to Rir::Stoplist,
195 <tt>false</tt> otherwise. 195 <tt>false</tt> otherwise.
196 </p> 196 </p>
197 197
198 </div> 198 </div>
199 </div> 199 </div>
200 200
201 201
202 <div id="method-M000002" class="method-detail"> 202 <div id="method-M000002" class="method-detail">
203 <a name="M000002"></a> 203 <a name="M000002"></a>
204 204
205 <div class="method-heading"> 205 <div class="method-heading">
206 206
207 <a href="String.src/M000002.html" target="Code" class="method-signature" 207 <a href="String.src/M000002.html" target="Code" class="method-signature"
208 onclick="popupCode('String.src/M000002.html');return false;"> 208 onclick="popupCode('String.src/M000002.html');return false;">
209 209
210 <span class="method-name">remove_special_characters</span><span class="method-args">()</span> 210 <span class="method-name">remove_special_characters</span><span class="method-args">()</span>
211 211
212 </a> 212 </a>
213 213
214 </div> 214 </div>
215 215
216 <div class="method-description"> 216 <div class="method-description">
217 217
218 <p> 218 <p>
219 Do not use. TODO: rewamp. find why this function is here. 219 Do not use. TODO: rewamp. find why this function is here.
220 </p> 220 </p>
221 221
222 </div> 222 </div>
223 </div> 223 </div>
224 224
225 225
226 <div id="method-M000006" class="method-detail"> 226 <div id="method-M000006" class="method-detail">
227 <a name="M000006"></a> 227 <a name="M000006"></a>
228 228
229 <div class="method-heading"> 229 <div class="method-heading">
230 230
231 <a href="String.src/M000006.html" target="Code" class="method-signature" 231 <a href="String.src/M000006.html" target="Code" class="method-signature"
232 onclick="popupCode('String.src/M000006.html');return false;"> 232 onclick="popupCode('String.src/M000006.html');return false;">
233 233
234 <span class="method-name">strip_javascripts</span><span class="method-args">()</span> 234 <span class="method-name">strip_javascripts</span><span class="method-args">()</span>
235 235
236 </a> 236 </a>
237 237
238 </div> 238 </div>
239 239
240 <div class="method-description"> 240 <div class="method-description">
241 241
242 <p> 242 <p>
243 Removes all Javascript sources from <tt>self</tt>. 243 Removes all Javascript sources from <tt>self</tt>.
244 </p> 244 </p>
245 <pre> 245 <pre>
246 s = &quot;&lt;script type='text/javascript'&gt; 246 s = &quot;&lt;script type='text/javascript'&gt;
247 var skin='vector', 247 var skin='vector',
248 stylepath='http://bits.wikimedia.org/skins-1.5' 248 stylepath='http://bits.wikimedia.org/skins-1.5'
249 &lt;/script&gt; 249 &lt;/script&gt;
250 250
251 test&quot; 251 test&quot;
252 s.strip_javascripts #=&gt; &quot;test&quot; 252 s.strip_javascripts #=&gt; &quot;test&quot;
253 </pre> 253 </pre>
254 254
255 </div> 255 </div>
256 </div> 256 </div>
257 257
258 258
259 <div id="method-M000005" class="method-detail"> 259 <div id="method-M000005" class="method-detail">
260 <a name="M000005"></a> 260 <a name="M000005"></a>
261 261
262 <div class="method-heading"> 262 <div class="method-heading">
263 263
264 <a href="String.src/M000005.html" target="Code" class="method-signature" 264 <a href="String.src/M000005.html" target="Code" class="method-signature"
265 onclick="popupCode('String.src/M000005.html');return false;"> 265 onclick="popupCode('String.src/M000005.html');return false;">
266 266
267 <span class="method-name">strip_javascripts!</span><span class="method-args">()</span> 267 <span class="method-name">strip_javascripts!</span><span class="method-args">()</span>
268 268
269 </a> 269 </a>
270 270
271 </div> 271 </div>
272 272
273 <div class="method-description"> 273 <div class="method-description">
274 274
275 <p> 275 <p>
276 Removes all Javascript sources from <tt>self</tt>. 276 Removes all Javascript sources from <tt>self</tt>.
277 </p> 277 </p>
278 <pre> 278 <pre>
279 s = &quot;&lt;script type='text/javascript'&gt; 279 s = &quot;&lt;script type='text/javascript'&gt;
280 var skin='vector', 280 var skin='vector',
281 stylepath='http://bits.wikimedia.org/skins-1.5' 281 stylepath='http://bits.wikimedia.org/skins-1.5'
282 &lt;/script&gt; 282 &lt;/script&gt;
283 283
284 test&quot; 284 test&quot;
285 s.strip_javascripts! 285 s.strip_javascripts!
286 s #=&gt; &quot;test&quot; 286 s #=&gt; &quot;test&quot;
287 </pre> 287 </pre>
288 288
289 </div> 289 </div>
290 </div> 290 </div>
291 291
292 292
293 <div id="method-M000008" class="method-detail"> 293 <div id="method-M000008" class="method-detail">
294 <a name="M000008"></a> 294 <a name="M000008"></a>
295 295
296 <div class="method-heading"> 296 <div class="method-heading">
297 297
298 <a href="String.src/M000008.html" target="Code" class="method-signature" 298 <a href="String.src/M000008.html" target="Code" class="method-signature"
299 onclick="popupCode('String.src/M000008.html');return false;"> 299 onclick="popupCode('String.src/M000008.html');return false;">
300 300
301 <span class="method-name">strip_stylesheets</span><span class="method-args">()</span> 301 <span class="method-name">strip_stylesheets</span><span class="method-args">()</span>
302 302
303 </a> 303 </a>
304 304
305 </div> 305 </div>
306 306
307 <div class="method-description"> 307 <div class="method-description">
308 308
309 </div> 309 </div>
310 </div> 310 </div>
311 311
312 312
313 <div id="method-M000007" class="method-detail"> 313 <div id="method-M000007" class="method-detail">
314 <a name="M000007"></a> 314 <a name="M000007"></a>
315 315
316 <div class="method-heading"> 316 <div class="method-heading">
317 317
318 <a href="String.src/M000007.html" target="Code" class="method-signature" 318 <a href="String.src/M000007.html" target="Code" class="method-signature"
319 onclick="popupCode('String.src/M000007.html');return false;"> 319 onclick="popupCode('String.src/M000007.html');return false;">
320 320
321 <span class="method-name">strip_stylesheets!</span><span class="method-args">()</span> 321 <span class="method-name">strip_stylesheets!</span><span class="method-args">()</span>
322 322
323 </a> 323 </a>
324 324
325 </div> 325 </div>
326 326
327 <div class="method-description"> 327 <div class="method-description">
328 328
329 </div> 329 </div>
330 </div> 330 </div>
331 331
332 332
333 <div id="method-M000004" class="method-detail"> 333 <div id="method-M000004" class="method-detail">
334 <a name="M000004"></a> 334 <a name="M000004"></a>
335 335
336 <div class="method-heading"> 336 <div class="method-heading">
337 337
338 <a href="String.src/M000004.html" target="Code" class="method-signature" 338 <a href="String.src/M000004.html" target="Code" class="method-signature"
339 onclick="popupCode('String.src/M000004.html');return false;"> 339 onclick="popupCode('String.src/M000004.html');return false;">
340 340
341 <span class="method-name">strip_xml_tags</span><span class="method-args">()</span> 341 <span class="method-name">strip_xml_tags</span><span class="method-args">()</span>
342 342
343 </a> 343 </a>
344 344
345 </div> 345 </div>
346 346
347 <div class="method-description"> 347 <div class="method-description">
348 348
349 <p> 349 <p>
350 Removes all XML-like tags from <tt>self</tt>. 350 Removes all XML-like tags from <tt>self</tt>.
351 </p> 351 </p>
352 <pre> 352 <pre>
353 s = &quot;&lt;html&gt;&lt;body&gt;test&lt;/body&gt;&lt;/html&gt;&quot; 353 s = &quot;&lt;html&gt;&lt;body&gt;test&lt;/body&gt;&lt;/html&gt;&quot;
354 s.strip_xml_tags #=&gt; &quot;test&quot; 354 s.strip_xml_tags #=&gt; &quot;test&quot;
355 s #=&gt; &quot;&lt;html&gt;&lt;body&gt;test&lt;/body&gt;&lt;/html&gt;&quot; 355 s #=&gt; &quot;&lt;html&gt;&lt;body&gt;test&lt;/body&gt;&lt;/html&gt;&quot;
356 </pre> 356 </pre>
357 357
358 </div> 358 </div>
359 </div> 359 </div>
360 360
361 361
362 <div id="method-M000003" class="method-detail"> 362 <div id="method-M000003" class="method-detail">
363 <a name="M000003"></a> 363 <a name="M000003"></a>
364 364
365 <div class="method-heading"> 365 <div class="method-heading">
366 366
367 <a href="String.src/M000003.html" target="Code" class="method-signature" 367 <a href="String.src/M000003.html" target="Code" class="method-signature"
368 onclick="popupCode('String.src/M000003.html');return false;"> 368 onclick="popupCode('String.src/M000003.html');return false;">
369 369
370 <span class="method-name">strip_xml_tags!</span><span class="method-args">()</span> 370 <span class="method-name">strip_xml_tags!</span><span class="method-args">()</span>
371 371
372 </a> 372 </a>
373 373
374 </div> 374 </div>
375 375
376 <div class="method-description"> 376 <div class="method-description">
377 377
378 <p> 378 <p>
379 Removes all XML-like tags from <tt>self</tt>. 379 Removes all XML-like tags from <tt>self</tt>.
380 </p> 380 </p>
381 <pre> 381 <pre>
382 s = &quot;&lt;html&gt;&lt;body&gt;test&lt;/body&gt;&lt;/html&gt;&quot; 382 s = &quot;&lt;html&gt;&lt;body&gt;test&lt;/body&gt;&lt;/html&gt;&quot;
383 s.strip_xml_tags! 383 s.strip_xml_tags!
384 s #=&gt; &quot;test&quot; 384 s #=&gt; &quot;test&quot;
385 </pre> 385 </pre>
386 386
387 </div> 387 </div>
388 </div> 388 </div>
389 389
390 390
391 391
392 </div> 392 </div>
393 393
394 394
395 395
396 396
397 </div> 397 </div>
398 398
399 <div id="validator-badges"> 399 <div id="validator-badges">
400 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 400 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
401 </div> 401 </div>
402 402
403 </body> 403 </body>
404 </html> 404 </html>
405 405
1 Fri, 05 Nov 2010 14:41:10 +0100 1 Fri, 05 Nov 2010 15:06:41 +0100
2 2
doc/files/README_markdown.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>File: README.markdown [RDoc Documentation]</title> 5 <title>File: README.markdown [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="fileHeader"> 45 <div id="fileHeader">
46 <h1>README.markdown</h1> 46 <h1>README.markdown</h1>
47 <table class="header-table"> 47 <table class="header-table">
48 <tr class="top-aligned-row"> 48 <tr class="top-aligned-row">
49 <td><strong>Path:</strong></td> 49 <td><strong>Path:</strong></td>
50 <td>README.markdown 50 <td>README.markdown
51 51
52 </td> 52 </td>
53 </tr> 53 </tr>
54 <tr class="top-aligned-row"> 54 <tr class="top-aligned-row">
55 <td><strong>Last Update:</strong></td> 55 <td><strong>Last Update:</strong></td>
56 <td>2010-11-05 14:40:41 +0100</td> 56 <td>2010-11-05 14:46:27 +0100</td>
57 </tr> 57 </tr>
58 </table> 58 </table>
59 </div> 59 </div>
60 <!-- banner header --> 60 <!-- banner header -->
61 61
62 <div id="bodyContent"> 62 <div id="bodyContent">
63 63
64 <div id="contextContent"> 64 <div id="contextContent">
65
66 <div id="description">
67 <p>
68 # Ruby Information Retrieval (rIR)
69 </p>
70 <p>
71 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
72 </p>
73 <p>
74 License
75 </p>
76 <h6>=</h6>
77 <p>
78 This program is free software: you can redistribute it and/or modify it
79 under the terms of the GNU General Public License as published by the Free
80 Software Foundation, either version 3 of the License, or (at your option)
81 any later version.
82 </p>
83 <p>
84 This program is distributed in the hope that it will be useful, but WITHOUT
85 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
86 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
87 more details.
88 </p>
89 <p>
90 You should have received a copy of the GNU General Public License along
91 with this program. If not, see <<a
92 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
93 </p>
94
95 </div>
65 96
66 </div> 97 </div>
67 98
68 99
69 </div> 100 </div>
70 101
71 <!-- if includes --> 102 <!-- if includes -->
72 103
73 <div id="section"> 104 <div id="section">
74 105
75 106
76 107
77 108
78 <!-- if method_list --> 109 <!-- if method_list -->
79 110
80 111
81 112
82 113
83 </div> 114 </div>
84 115
85 <div id="validator-badges"> 116 <div id="validator-badges">
86 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 117 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
87 </div> 118 </div>
88 119
89 </body> 120 </body>
90 </html> 121 </html>
91 122
doc/files/lib/rir/document_rb.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>File: document.rb [RDoc Documentation]</title> 5 <title>File: document.rb [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="fileHeader"> 45 <div id="fileHeader">
46 <h1>document.rb</h1> 46 <h1>document.rb</h1>
47 <table class="header-table"> 47 <table class="header-table">
48 <tr class="top-aligned-row"> 48 <tr class="top-aligned-row">
49 <td><strong>Path:</strong></td> 49 <td><strong>Path:</strong></td>
50 <td>lib/rir/document.rb 50 <td>lib/rir/document.rb
51 51
52 </td> 52 </td>
53 </tr> 53 </tr>
54 <tr class="top-aligned-row"> 54 <tr class="top-aligned-row">
55 <td><strong>Last Update:</strong></td> 55 <td><strong>Last Update:</strong></td>
56 <td>2010-11-05 14:39:35 +0100</td> 56 <td>2010-11-05 15:06:24 +0100</td>
57 </tr> 57 </tr>
58 </table> 58 </table>
59 </div> 59 </div>
60 <!-- banner header --> 60 <!-- banner header -->
61 61
62 <div id="bodyContent"> 62 <div id="bodyContent">
63 63
64 <div id="contextContent"> 64 <div id="contextContent">
65 65
66 <div id="description"> 66 <div id="description">
67 <p> 67 <p>
68 This file is a part of an Information Retrieval oriented Ruby library 68 This file is a part of an Information Retrieval oriented Ruby library
69 </p> 69 </p>
70 <p> 70 <p>
71 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> 71 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
72 </p> 72 </p>
73 <p> 73 <p>
74 This program is free software: you can redistribute it and/or modify it 74 This program is free software: you can redistribute it and/or modify it
75 under the terms of the GNU General Public License as published by the Free 75 under the terms of the GNU General Public License as published by the Free
76 Software Foundation, either version 3 of the License, or (at your option) 76 Software Foundation, either version 3 of the License, or (at your option)
77 any later version. 77 any later version.
78 </p> 78 </p>
79 <p> 79 <p>
80 This program is distributed in the hope that it will be useful, but WITHOUT 80 This program is distributed in the hope that it will be useful, but WITHOUT
81 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 81 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
82 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 82 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
83 more details. 83 more details.
84 </p> 84 </p>
85 <p> 85 <p>
86 You should have received a copy of the GNU General Public License along 86 You should have received a copy of the GNU General Public License along
87 with this program. If not, see <<a 87 with this program. If not, see <<a
88 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. 88 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
89 </p> 89 </p>
90 90
91 </div> 91 </div>
92 92
93 <div id="requires-list"> 93 <div id="requires-list">
94 <h3 class="section-bar">Required files</h3> 94 <h3 class="section-bar">Required files</h3>
95 95
96 <div class="name-list"> 96 <div class="name-list">
97 97
98 net/http&nbsp;&nbsp; 98 net/http&nbsp;&nbsp;
99 99
100 </div> 100 </div>
101 </div> 101 </div>
102 102
103 </div> 103 </div>
104 104
105 105
106 </div> 106 </div>
107 107
108 <!-- if includes --> 108 <!-- if includes -->
109 109
110 <div id="section"> 110 <div id="section">
111 111
112 112
113 113
114 114
115 <!-- if method_list --> 115 <!-- if method_list -->
116 116
117 117
118 118
119 119
120 </div> 120 </div>
121 121
122 <div id="validator-badges"> 122 <div id="validator-badges">
123 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 123 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
124 </div> 124 </div>
125 125
126 </body> 126 </body>
127 </html> 127 </html>
128 128
doc/files/lib/rir/string_rb.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>File: string.rb [RDoc Documentation]</title> 5 <title>File: string.rb [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="fileHeader"> 45 <div id="fileHeader">
46 <h1>string.rb</h1> 46 <h1>string.rb</h1>
47 <table class="header-table"> 47 <table class="header-table">
48 <tr class="top-aligned-row"> 48 <tr class="top-aligned-row">
49 <td><strong>Path:</strong></td> 49 <td><strong>Path:</strong></td>
50 <td>lib/rir/string.rb 50 <td>lib/rir/string.rb
51 51
52 </td> 52 </td>
53 </tr> 53 </tr>
54 <tr class="top-aligned-row"> 54 <tr class="top-aligned-row">
55 <td><strong>Last Update:</strong></td> 55 <td><strong>Last Update:</strong></td>
56 <td>2010-11-05 14:39:35 +0100</td> 56 <td>2010-11-05 15:06:35 +0100</td>
57 </tr> 57 </tr>
58 </table> 58 </table>
59 </div> 59 </div>
60 <!-- banner header --> 60 <!-- banner header -->
61 61
62 <div id="bodyContent"> 62 <div id="bodyContent">
63 63
64 <div id="contextContent"> 64 <div id="contextContent">
65 65
66 <div id="description"> 66 <div id="description">
67 <p> 67 <p>
68 This file is a part of an Information Retrieval oriented Ruby library 68 This file is a part of an Information Retrieval oriented Ruby library
69 </p> 69 </p>
70 <p> 70 <p>
71 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> 71 Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
72 </p> 72 </p>
73 <p> 73 <p>
74 This program is free software: you can redistribute it and/or modify it 74 This program is free software: you can redistribute it and/or modify it
75 under the terms of the GNU General Public License as published by the Free 75 under the terms of the GNU General Public License as published by the Free
76 Software Foundation, either version 3 of the License, or (at your option) 76 Software Foundation, either version 3 of the License, or (at your option)
77 any later version. 77 any later version.
78 </p> 78 </p>
79 <p> 79 <p>
80 This program is distributed in the hope that it will be useful, but WITHOUT 80 This program is distributed in the hope that it will be useful, but WITHOUT
81 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 81 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
82 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 82 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
83 more details. 83 more details.
84 </p> 84 </p>
85 <p> 85 <p>
86 You should have received a copy of the GNU General Public License along 86 You should have received a copy of the GNU General Public License along
87 with this program. If not, see <<a 87 with this program. If not, see <<a
88 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. 88 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
89 </p> 89 </p>
90 90
91 </div> 91 </div>
92 92
93 <div id="requires-list"> 93 <div id="requires-list">
94 <h3 class="section-bar">Required files</h3> 94 <h3 class="section-bar">Required files</h3>
95 95
96 <div class="name-list"> 96 <div class="name-list">
97 97
98 cgi&nbsp;&nbsp; 98 cgi&nbsp;&nbsp;
99 99
100 kconv&nbsp;&nbsp; 100 kconv&nbsp;&nbsp;
101 101
102 </div> 102 </div>
103 </div> 103 </div>
104 104
105 </div> 105 </div>
106 106
107 107
108 </div> 108 </div>
109 109
110 <!-- if includes --> 110 <!-- if includes -->
111 111
112 <div id="section"> 112 <div id="section">
113 113
114 114
115 115
116 116
117 <!-- if method_list --> 117 <!-- if method_list -->
118 118
119 119
120 120
121 121
122 </div> 122 </div>
123 123
124 <div id="validator-badges"> 124 <div id="validator-badges">
125 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 125 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
126 </div> 126 </div>
127 127
128 </body> 128 </body>
129 </html> 129 </html>
130 130
doc/files/main_rb.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <head> 4 <head>
5 <title>File: main.rb [RDoc Documentation]</title> 5 <title>File: main.rb [RDoc Documentation]</title>
6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 <meta http-equiv="Content-Script-Type" content="text/javascript" /> 7 <meta http-equiv="Content-Script-Type" content="text/javascript" />
8 <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> 8 <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
9 <script type="text/javascript"> 9 <script type="text/javascript">
10 // <![CDATA[ 10 // <![CDATA[
11 11
12 function popupCode( url ) { 12 function popupCode( url ) {
13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") 13 window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14 } 14 }
15 15
16 function toggleCode( id ) { 16 function toggleCode( id ) {
17 if ( document.getElementById ) 17 if ( document.getElementById )
18 elem = document.getElementById( id ); 18 elem = document.getElementById( id );
19 else if ( document.all ) 19 else if ( document.all )
20 elem = eval( "document.all." + id ); 20 elem = eval( "document.all." + id );
21 else 21 else
22 return false; 22 return false;
23 23
24 elemStyle = elem.style; 24 elemStyle = elem.style;
25 25
26 if ( elemStyle.display != "block" ) { 26 if ( elemStyle.display != "block" ) {
27 elemStyle.display = "block" 27 elemStyle.display = "block"
28 } else { 28 } else {
29 elemStyle.display = "none" 29 elemStyle.display = "none"
30 } 30 }
31 31
32 return true; 32 return true;
33 } 33 }
34 34
35 // Make codeblocks hidden by default 35 // Make codeblocks hidden by default
36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) 36 document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37 37
38 // ]]> 38 // ]]>
39 </script> 39 </script>
40 40
41 </head> 41 </head>
42 <body> 42 <body>
43 43
44 44
45 <div id="fileHeader"> 45 <div id="fileHeader">
46 <h1>main.rb</h1> 46 <h1>main.rb</h1>
47 <table class="header-table"> 47 <table class="header-table">
48 <tr class="top-aligned-row"> 48 <tr class="top-aligned-row">
49 <td><strong>Path:</strong></td> 49 <td><strong>Path:</strong></td>
50 <td>main.rb 50 <td>main.rb
51 51
52 </td> 52 </td>
53 </tr> 53 </tr>
54 <tr class="top-aligned-row"> 54 <tr class="top-aligned-row">
55 <td><strong>Last Update:</strong></td> 55 <td><strong>Last Update:</strong></td>
56 <td>2010-11-05 14:40:11 +0100</td> 56 <td>2010-11-05 15:05:38 +0100</td>
57 </tr> 57 </tr>
58 </table> 58 </table>
59 </div> 59 </div>
60 <!-- banner header --> 60 <!-- banner header -->
61 61
62 <div id="bodyContent"> 62 <div id="bodyContent">
63 63
64 <div id="contextContent"> 64 <div id="contextContent">
65 65
66 <div id="requires-list"> 66 <div id="requires-list">
67 <h3 class="section-bar">Required files</h3> 67 <h3 class="section-bar">Required files</h3>
68 68
69 <div class="name-list"> 69 <div class="name-list">
70 70
71 rir&nbsp;&nbsp; 71 rir&nbsp;&nbsp;
72 72
73 </div> 73 </div>
74 </div> 74 </div>
75 75
76 </div> 76 </div>
77 77
78 78
79 </div> 79 </div>
80 80
81 <!-- if includes --> 81 <!-- if includes -->
82 82
83 <div id="section"> 83 <div id="section">
84 84
85 85
86 86
87 87
88 <!-- if method_list --> 88 <!-- if method_list -->
89 89
90 90
91 91
92 92
93 </div> 93 </div>
94 94
95 <div id="validator-badges"> 95 <div id="validator-badges">
96 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> 96 <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
97 </div> 97 </div>
98 98
99 </body> 99 </body>
100 </html> 100 </html>
101 101
doc/fr_class_index.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <!-- 4 <!--
5 5
6 Classes [RDoc Documentation] 6 Classes [RDoc Documentation]
7 7
8 --> 8 -->
9 <head> 9 <head>
10 <title>Classes [RDoc Documentation]</title> 10 <title>Classes [RDoc Documentation]</title>
11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
12 <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> 12 <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
13 <base target="docwin" /> 13 <base target="docwin" />
14 </head> 14 </head>
15 <body> 15 <body>
16 <div class="index"> 16 <div class="index">
17 <h1 class="section-bar">Classes</h1> 17 <h1 class="section-bar">Classes</h1>
18 <div id="index-entries"> 18 <div id="index-entries">
19 19
20 <a href="classes/Rir.html">Rir</a><br /> 20 <a href="classes/RIR.html">RIR</a><br />
21 21
22 <a href="classes/Rir/Document.html">Rir::Document</a><br /> 22 <a href="classes/RIR/Document.html">RIR::Document</a><br />
23 23
24 <a href="classes/Rir/WebDocument.html">Rir::WebDocument</a><br /> 24 <a href="classes/RIR/WebDocument.html">RIR::WebDocument</a><br />
25 25
26 <a href="classes/Rir/WikipediaPage.html">Rir::WikipediaPage</a><br /> 26 <a href="classes/RIR/WikipediaPage.html">RIR::WikipediaPage</a><br />
27 27
28 <a href="classes/String.html">String</a><br /> 28 <a href="classes/String.html">String</a><br />
29 29
30 </div> 30 </div>
31 </div> 31 </div>
32 </body> 32 </body>
33 </html> 33 </html>
34 34
doc/fr_file_index.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <!-- 4 <!--
5 5
6 Files [RDoc Documentation] 6 Files [RDoc Documentation]
7 7
8 --> 8 -->
9 <head> 9 <head>
10 <title>Files [RDoc Documentation]</title> 10 <title>Files [RDoc Documentation]</title>
11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
12 <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> 12 <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
13 <base target="docwin" /> 13 <base target="docwin" />
14 </head> 14 </head>
15 <body> 15 <body>
16 <div class="index"> 16 <div class="index">
17 <h1 class="section-bar">Files</h1> 17 <h1 class="section-bar">Files</h1>
18 <div id="index-entries"> 18 <div id="index-entries">
19 19
20 <a href="files/README_markdown.html">README.markdown</a><br />
21
22 <a href="files/lib/rir_rb.html">lib/rir.rb</a><br />
23
24 <a href="files/lib/rir/document_rb.html">lib/rir/document.rb</a><br /> 20 <a href="files/lib/rir/document_rb.html">lib/rir/document.rb</a><br />
25 21
26 <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br /> 22 <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br />
27 23
28 <a href="files/main_rb.html">main.rb</a><br /> 24 <a href="files/main_rb.html">main.rb</a><br />
29 25
30 </div> 26 </div>
31 </div> 27 </div>
32 </body> 28 </body>
33 </html> 29 </html>
34 30
doc/fr_method_index.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <!-- 4 <!--
5 5
6 Methods [RDoc Documentation] 6 Methods [RDoc Documentation]
7 7
8 --> 8 -->
9 <head> 9 <head>
10 <title>Methods [RDoc Documentation]</title> 10 <title>Methods [RDoc Documentation]</title>
11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
12 <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> 12 <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
13 <base target="docwin" /> 13 <base target="docwin" />
14 </head> 14 </head>
15 <body> 15 <body>
16 <div class="index"> 16 <div class="index">
17 <h1 class="section-bar">Methods</h1> 17 <h1 class="section-bar">Methods</h1>
18 <div id="index-entries"> 18 <div id="index-entries">
19 19
20 <a href="classes/Rir/Document.html#M000012">count_words (Rir::Document)</a><br /> 20 <a href="classes/RIR/Document.html#M000012">count_words (RIR::Document)</a><br />
21 21
22 <a href="classes/Rir/Document.html#M000013">entropy (Rir::Document)</a><br /> 22 <a href="classes/RIR/Document.html#M000013">entropy (RIR::Document)</a><br />
23 23
24 <a href="classes/String.html#M000009">extract_xmltags_values (String)</a><br /> 24 <a href="classes/String.html#M000009">extract_xmltags_values (String)</a><br />
25 25
26 <a href="classes/Rir/Document.html#M000010">format_words (Rir::Document)</a><br /> 26 <a href="classes/RIR/Document.html#M000010">format_words (RIR::Document)</a><br />
27 27
28 <a href="classes/Rir/WebDocument.html#M000015">get_content (Rir::WebDocument)</a><br /> 28 <a href="classes/RIR/WebDocument.html#M000015">get_content (RIR::WebDocument)</a><br />
29 29
30 <a href="classes/String.html#M000001">is_stopword? (String)</a><br /> 30 <a href="classes/String.html#M000001">is_stopword? (String)</a><br />
31 31
32 <a href="classes/Rir/WebDocument.html#M000016">new (Rir::WebDocument)</a><br /> 32 <a href="classes/RIR/WebDocument.html#M000016">new (RIR::WebDocument)</a><br />
33 33
34 <a href="classes/Rir/Document.html#M000014">new (Rir::Document)</a><br /> 34 <a href="classes/RIR/Document.html#M000014">new (RIR::Document)</a><br />
35 35
36 <a href="classes/Rir/Document.html#M000011">ngrams (Rir::Document)</a><br /> 36 <a href="classes/RIR/Document.html#M000011">ngrams (RIR::Document)</a><br />
37 37
38 <a href="classes/String.html#M000002">remove_special_characters (String)</a><br /> 38 <a href="classes/String.html#M000002">remove_special_characters (String)</a><br />
39 39
40 <a href="classes/String.html#M000006">strip_javascripts (String)</a><br /> 40 <a href="classes/String.html#M000006">strip_javascripts (String)</a><br />
41 41
42 <a href="classes/String.html#M000005">strip_javascripts! (String)</a><br /> 42 <a href="classes/String.html#M000005">strip_javascripts! (String)</a><br />
43 43
44 <a href="classes/String.html#M000008">strip_stylesheets (String)</a><br /> 44 <a href="classes/String.html#M000008">strip_stylesheets (String)</a><br />
45 45
46 <a href="classes/String.html#M000007">strip_stylesheets! (String)</a><br /> 46 <a href="classes/String.html#M000007">strip_stylesheets! (String)</a><br />
47 47
48 <a href="classes/String.html#M000004">strip_xml_tags (String)</a><br /> 48 <a href="classes/String.html#M000004">strip_xml_tags (String)</a><br />
49 49
50 <a href="classes/String.html#M000003">strip_xml_tags! (String)</a><br /> 50 <a href="classes/String.html#M000003">strip_xml_tags! (String)</a><br />
51 51
52 </div> 52 </div>
53 </div> 53 </div>
54 </body> 54 </body>
55 </html> 55 </html>
56 56
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 <!-- 4 <!--
5 5
6 RDoc Documentation 6 RDoc Documentation
7 7
8 --> 8 -->
9 <head> 9 <head>
10 <title>RDoc Documentation</title> 10 <title>RDoc Documentation</title>
11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 11 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
12 </head> 12 </head>
13 <frameset rows="20%, 80%"> 13 <frameset rows="20%, 80%">
14 <frameset cols="25%,35%,45%"> 14 <frameset cols="25%,35%,45%">
15 <frame src="fr_file_index.html" title="Files" name="Files" /> 15 <frame src="fr_file_index.html" title="Files" name="Files" />
16 <frame src="fr_class_index.html" name="Classes" /> 16 <frame src="fr_class_index.html" name="Classes" />
17 <frame src="fr_method_index.html" name="Methods" /> 17 <frame src="fr_method_index.html" name="Methods" />
18 </frameset> 18 </frameset>
19 <frame src="files/README_markdown.html" name="docwin" /> 19 <frame src="files/lib/rir/string_rb.html" name="docwin" />
20 </frameset> 20 </frameset>
21 </html> 21 </html>
22 22
1 #!/usr/bin/env ruby 1 #!/usr/bin/env ruby
2 2
3 require 'rir/document' 3 require 'rir/document'
4 require 'rir/string' 4 require 'rir/string'
5 require 'rir/query'
5 6
1 #!/usr/bin/env ruby 1 #!/usr/bin/env ruby
2 2
3 # This file is a part of an Information Retrieval oriented Ruby library 3 # This file is a part of an Information Retrieval oriented Ruby library
4 # 4 #
5 # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> 5 # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
6 # 6 #
7 # This program is free software: you can redistribute it and/or modify 7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by 8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or 9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version. 10 # (at your option) any later version.
11 # 11 #
12 # This program is distributed in the hope that it will be useful, 12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details. 15 # GNU General Public License for more details.
16 # 16 #
17 # You should have received a copy of the GNU General Public License 17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19
20 # General module for many purposes related to Information Retrieval. 20 # General module for many purposes related to Information Retrieval.
21 module Rir 21 module RIR
22 22
23 # A Document is a bag of words and is constructed from a string. 23 # A Document is a bag of words and is constructed from a string.
24 class Document 24 class Document
25 attr_reader :words, :doc_content 25 attr_reader :words, :doc_content
26 26
27 # Any non-word characters are removed from the words (see http://perldoc.perl.org/perlre.html 27 # Any non-word characters are removed from the words (see http://perldoc.perl.org/perlre.html
28 # and the \\W special escape). 28 # and the \\W special escape).
29 # 29 #
30 # Protected function, only meant to by called at the initialization. 30 # Protected function, only meant to by called at the initialization.
31 def format_words 31 def format_words
32 wo = [] 32 wo = []
33 33
34 @doc_content.split.each do |w| 34 @doc_content.split.each do |w|
35 w.split(/\W/).each do |sw| 35 w.split(/\W/).each do |sw|
36 wo.push(sw) if sw =~ /[a-zA-Z]/ 36 wo.push(sw) if sw =~ /[a-zA-Z]/
37 end 37 end
38 end 38 end
39 39
40 wo 40 wo
41 end 41 end
42 42
43 # Returns an Array containing the +n+-grams (words) from the current Document. 43 # Returns an Array containing the +n+-grams (words) from the current Document.
44 # 44 #
45 # ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...] 45 # ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...]
46 def ngrams(n) 46 def ngrams(n)
47 window = [] 47 window = []
48 ngrams_array = [] 48 ngrams_array = []
49 49
50 @words.each do |w| 50 @words.each do |w|
51 window.push(w) 51 window.push(w)
52 if window.size == n 52 if window.size == n
53 ngrams_array.push window.join(" ") 53 ngrams_array.push window.join(" ")
54 window.delete_at(0) 54 window.delete_at(0)
55 end 55 end
56 end 56 end
57 57
58 ngrams_array.uniq 58 ngrams_array.uniq
59 end 59 end
60 60
61 # Returns a Hash containing the words and their associated counts in the current Document. 61 # Returns a Hash containing the words and their associated counts in the current Document.
62 # 62 #
63 # count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } 63 # count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... }
64 def count_words 64 def count_words
65 counts = Hash.new { |h,k| h[k] = 0 } 65 counts = Hash.new { |h,k| h[k] = 0 }
66 @words.each { |w| counts[w.downcase] += 1 } 66 @words.each { |w| counts[w.downcase] += 1 }
67 67
68 counts 68 counts
69 end 69 end
70 70
71 # Computes the entropy of a given string +s+ inside the document. 71 # Computes the entropy of a given string +s+ inside the document.
72 # 72 #
73 # If the string parameter is composed of many words (i.e. tokens separated 73 # If the string parameter is composed of many words (i.e. tokens separated
74 # by whitespace(s)), it is considered as an ngram. 74 # by whitespace(s)), it is considered as an ngram.
75 # 75 #
76 # entropy("guitar") #=> 0.00389919463243839 76 # entropy("guitar") #=> 0.00389919463243839
77 def entropy(s) 77 def entropy(s)
78 en = 0.0 78 en = 0.0
79 counts = self.count_words 79 counts = self.count_words
80 80
81 s.split.each do |w| 81 s.split.each do |w|
82 p_wi = counts[w].to_f/@words.count.to_f 82 p_wi = counts[w].to_f/@words.count.to_f
83 en += p_wi*Math.log2(p_wi) 83 en += p_wi*Math.log2(p_wi)
84 end 84 end
85 85
86 en *= -1 86 en *= -1
87 en 87 en
88 end 88 end
89 89
90 90
91 91
92 def initialize(content) 92 def initialize(content)
93 @doc_content = content 93 @doc_content = content
94 @words = format_words 94 @words = format_words
95 end 95 end
96 96
97 protected :format_words 97 protected :format_words
98 end 98 end
99 99
100 # A WebDocument is a Document with a +url+. 100 # A WebDocument is a Document with a +url+.
101 class WebDocument < Document 101 class WebDocument < Document
102 attr_reader :url 102 attr_reader :url
103 103
104 # Returns the HTML text from the page of a given +url+. 104 # Returns the HTML text from the page of a given +url+.
105 def self.get_content(url) 105 def self.get_content(url)
106 require 'net/http' 106 require 'net/http'
107 Net::HTTP.get(URI.parse(url)) 107 Net::HTTP.get(URI.parse(url))
108 end 108 end
109 109
110 # WebDocument constructor, the content of the Document is the HTML page 110 # WebDocument constructor, the content of the Document is the HTML page
111 # without the tags. 111 # without the tags.
112 def initialize(url) 112 def initialize(url)
113 @url = url 113 @url = url
114 super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags 114 super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags
115 end 115 end
116 end 116 end
117 117
118 # A WikipediaPage is a WebDocument. 118 # A WikipediaPage is a WebDocument.
119 class WikipediaPage < WebDocument 119 class WikipediaPage < WebDocument
120 end 120 end
121 end 121 end
122 122
1 #!/usr/bin/env ruby 1 #!/usr/bin/env ruby
2 2
3 # This file is a part of an Information Retrieval oriented Ruby library 3 # This file is a part of an Information Retrieval oriented Ruby library
4 # 4 #
5 # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> 5 # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
6 # 6 #
7 # This program is free software: you can redistribute it and/or modify 7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by 8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or 9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version. 10 # (at your option) any later version.
11 # 11 #
12 # This program is distributed in the hope that it will be useful, 12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details. 15 # GNU General Public License for more details.
16 # 16 #
17 # You should have received a copy of the GNU General Public License 17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19
20 # General module for many purposes related to Information Retrieval. 20 # General module for many purposes related to Information Retrieval.
21 module Rir 21 module RIR
22 22
23 # These are the default stopwords provided by Lemur. 23 # These are the default stopwords provided by Lemur.
24 Stoplist = [ 24 Stoplist = [
25 "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", 25 "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av",
26 "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", 26 "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand",
27 "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", 27 "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by",
28 "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", 28 "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu",
29 "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", 29 "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during",
30 "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", 30 "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every",
31 "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", 31 "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting",
32 "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", 32 "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff",
33 "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", 33 "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore",
34 "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", 34 "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he",
35 "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", 35 "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto",
36 "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", 36 "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto",
37 "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", 37 "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include",
38 "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", 38 "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into",
39 "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", 39 "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last",
40 "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", 40 "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe",
41 "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", 41 "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs",
42 "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", 42 "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless",
43 "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", 43 "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing",
44 "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", 44 "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once",
45 "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", 45 "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours",
46 "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", 46 "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite",
47 "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", 47 "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing",
48 "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", 48 "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt",
49 "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", 49 "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote",
50 "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", 50 "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes",
51 "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", 51 "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave",
52 "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", 52 "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them",
53 "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", 53 "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts",
54 "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", 54 "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon",
55 "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", 55 "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru",
56 "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", 56 "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh",
57 "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", 57 "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward",
58 "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", 58 "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week",
59 "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", 59 "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever",
60 "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", 60 "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore",
61 "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", 61 "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto",
62 "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", 62 "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever",
63 "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", 63 "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom",
64 "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", 64 "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within",
65 "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", 65 "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your",
66 "yours", "yourself", "yourselves" 66 "yours", "yourself", "yourselves"
67 ] 67 ]
68 68
69 69
70 end 70 end
71 71
72 # Extention of the standard class String with useful function. 72 # Extention of the standard class String with useful function.
73 class String 73 class String
74 include Rir 74 include RIR
75 75
76 # Returns +true+ if +self+ belongs to Rir::Stoplist, +false+ otherwise. 76 # Returns +true+ if +self+ belongs to Rir::Stoplist, +false+ otherwise.
77 def is_stopword? 77 def is_stopword?
78 Stoplist.include?(self.downcase) 78 Stoplist.include?(self.downcase)
79 end 79 end
80 80
81 # Do not use. 81 # Do not use.
82 # TODO: rewamp. find why this function is here. 82 # TODO: rewamp. find why this function is here.
83 def remove_special_characters 83 def remove_special_characters
84 self.split.collect { |w| w.gsub(/\W/,' ').split.collect { |w| w.gsub(/\W/,' ').strip.sub(/\A.\z/, '')}.join(' ').strip.sub(/\A.\z/, '')}.join(' ') 84 self.split.collect { |w| w.gsub(/\W/,' ').split.collect { |w| w.gsub(/\W/,' ').strip.sub(/\A.\z/, '')}.join(' ').strip.sub(/\A.\z/, '')}.join(' ')
85 end 85 end
86 86
87 # Removes all XML-like tags from +self+. 87 # Removes all XML-like tags from +self+.
88 # 88 #
89 # s = "<html><body>test</body></html>" 89 # s = "<html><body>test</body></html>"
90 # s.strip_xml_tags! 90 # s.strip_xml_tags!
91 # s #=> "test" 91 # s #=> "test"
92 def strip_xml_tags! 92 def strip_xml_tags!
93 replace strip_with_pattern /<\/?[^>]*>/ 93 replace strip_with_pattern /<\/?[^>]*>/
94 end 94 end
95 95
96 # Removes all XML-like tags from +self+. 96 # Removes all XML-like tags from +self+.
97 # 97 #
98 # s = "<html><body>test</body></html>" 98 # s = "<html><body>test</body></html>"
99 # s.strip_xml_tags #=> "test" 99 # s.strip_xml_tags #=> "test"
100 # s #=> "<html><body>test</body></html>" 100 # s #=> "<html><body>test</body></html>"
101 def strip_xml_tags 101 def strip_xml_tags
102 dup.strip_xml_tags! 102 dup.strip_xml_tags!
103 end 103 end
104 104
105 # Removes all Javascript sources from +self+. 105 # Removes all Javascript sources from +self+.
106 # 106 #
107 # s = "<script type='text/javascript'> 107 # s = "<script type='text/javascript'>
108 # var skin='vector', 108 # var skin='vector',
109 # stylepath='http://bits.wikimedia.org/skins-1.5' 109 # stylepath='http://bits.wikimedia.org/skins-1.5'
110 # </script> 110 # </script>
111 # 111 #
112 # test" 112 # test"
113 # s.strip_javascripts! 113 # s.strip_javascripts!
114 # s #=> "test" 114 # s #=> "test"
115 def strip_javascripts! 115 def strip_javascripts!
116 replace strip_with_pattern /<script type="text\/javascript">(.+?)<\/script>/m 116 replace strip_with_pattern /<script type="text\/javascript">(.+?)<\/script>/m
117 end 117 end
118 118
119 # Removes all Javascript sources from +self+. 119 # Removes all Javascript sources from +self+.
120 # 120 #
121 # s = "<script type='text/javascript'> 121 # s = "<script type='text/javascript'>
122 # var skin='vector', 122 # var skin='vector',
123 # stylepath='http://bits.wikimedia.org/skins-1.5' 123 # stylepath='http://bits.wikimedia.org/skins-1.5'
124 # </script> 124 # </script>
125 # 125 #
126 # test" 126 # test"
127 # s.strip_javascripts #=> "test" 127 # s.strip_javascripts #=> "test"
128 def strip_javascripts 128 def strip_javascripts
129 dup.strip_javascripts! 129 dup.strip_javascripts!
130 end 130 end
131 131
132 def strip_stylesheets! 132 def strip_stylesheets!
133 # TODO: rewamp. dunno what is it. 133 # TODO: rewamp. dunno what is it.
134 replace strip_with_pattern /<style type="text\/css">(.+?)<\/style>/m 134 replace strip_with_pattern /<style type="text\/css">(.+?)<\/style>/m
135 end 135 end
136 136
137 def strip_stylesheets 137 def strip_stylesheets
138 dup.strip_stylesheets! 138 dup.strip_stylesheets!
139 end 139 end
140 140
141 # Returns the text values inside all occurences of a XML tag in +self+ 141 # Returns the text values inside all occurences of a XML tag in +self+
142 # 142 #
143 # s = "four-piece in <a href='#'>Indianapolis</a>, <a href='#'>Indiana</a> at the Murat Theatre" 143 # s = "four-piece in <a href='#'>Indianapolis</a>, <a href='#'>Indiana</a> at the Murat Theatre"
144 # s.extract_xmltags_values 'a' #=> ["Indianapolis", "Indiana"] 144 # s.extract_xmltags_values 'a' #=> ["Indianapolis", "Indiana"]
145 def extract_xmltags_values(tag_name) 145 def extract_xmltags_values(tag_name)
146 self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten 146 self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten
147 end 147 end
148 148
149 private
150 def strip_with_pattern(pattern) 149 def strip_with_pattern(pattern)
151 require 'cgi' 150 require 'cgi'
152 require 'kconv' 151 require 'kconv'
153 CGI::unescapeHTML(self.gsub(pattern,"")).toutf8 152 CGI::unescapeHTML(self.gsub(pattern,"")).toutf8
154 end 153 end
154
155 private :strip_with_pattern
155 end 156 end
1 $LOAD_PATH.unshift File.expand_path(File.join(File.dirname(__FILE__), "lib")) 1 $LOAD_PATH.unshift File.expand_path(File.join(File.dirname(__FILE__), "lib"))
2 2
3 require 'rir' 3 require 'rir'
4
5 w = RIR::WikipediaPage.new("http://en.wikipedia.org/wiki/The_Dillinger_Escape_Plan")
6 p w.entropy("guitar")
7
8 params = RIR::Indri::Parameters.new("path_vers_mon_index")
9 p params.rule
10 q = RIR::Indri::IndriQuery.new("pouet", "bla", params)
11 puts q
4 12