Commit a79a228434f1eebcb042bcc576a3a0d6476789e4

Authored by Romain Deveaud
1 parent 87316a38a5
Exists in master

new TreeTagger module. tagger-chunker parsing & doc.

Showing 44 changed files with 1597 additions and 208 deletions Side-by-side Diff

doc/classes/RIR.html
... ... @@ -53,9 +53,9 @@
53 53 <td>
54 54  
55 55  
56   - <a href="../files/lib/rir/corpus_rb.html">
  56 + <a href="../files/lib/rir/ttagger_rb.html">
57 57  
58   - lib/rir/corpus.rb
  58 + lib/rir/ttagger.rb
59 59  
60 60 </a>
61 61  
62 62  
... ... @@ -73,9 +73,9 @@
73 73 <br />
74 74  
75 75  
76   - <a href="../files/lib/rir/string_rb.html">
  76 + <a href="../files/lib/rir/document_rb.html">
77 77  
78   - lib/rir/string.rb
  78 + lib/rir/document.rb
79 79  
80 80 </a>
81 81  
82 82  
... ... @@ -142,28 +142,8 @@
142 142 href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
143 143 </p>
144 144 <hr size="1"></hr><p>
145   -This file is a part of an Information Retrieval oriented Ruby library
  145 +General module for many purposes related to Information Retrieval.
146 146 </p>
147   -<p>
148   -Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
149   -</p>
150   -<p>
151   -This program is free software: you can redistribute it and/or modify it
152   -under the terms of the GNU General Public License as published by the Free
153   -Software Foundation, either version 3 of the License, or (at your option)
154   -any later version.
155   -</p>
156   -<p>
157   -This program is distributed in the hope that it will be useful, but WITHOUT
158   -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
159   -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
160   -more details.
161   -</p>
162   -<p>
163   -You should have received a copy of the GNU General Public License along
164   -with this program. If not, see <<a
165   -href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
166   -</p>
167 147  
168 148 </div>
169 149  
170 150  
171 151  
... ... @@ -180,32 +160,12 @@
180 160 <h3 class="section-bar">Classes and Modules</h3>
181 161  
182 162 Module <a href="RIR/Indri.html" class="link">RIR::Indri</a><br />
183   -Class <a href="RIR/Corpus.html" class="link">RIR::Corpus</a><br />
  163 +Module <a href="RIR/TreeTagger.html" class="link">RIR::TreeTagger</a><br />
  164 +Class <a href="RIR/Document.html" class="link">RIR::Document</a><br />
184 165 Class <a href="RIR/Query.html" class="link">RIR::Query</a><br />
  166 +Class <a href="RIR/WebDocument.html" class="link">RIR::WebDocument</a><br />
  167 +Class <a href="RIR/WikipediaPage.html" class="link">RIR::WikipediaPage</a><br />
185 168  
186   - </div>
187   -
188   - <div id="constants-list">
189   - <h3 class="section-bar">Constants</h3>
190   -
191   - <div class="name-list">
192   - <table summary="Constants">
193   -
194   - <tr class="top-aligned-row context-row">
195   - <td class="context-item-name">Stoplist</td>
196   - <td>=</td>
197   - <td class="context-item-value">[ &quot;a&quot;, &quot;anything&quot;, &quot;anyway&quot;, &quot;anywhere&quot;, &quot;apart&quot;, &quot;are&quot;, &quot;around&quot;, &quot;as&quot;, &quot;at&quot;, &quot;av&quot;, &quot;be&quot;, &quot;became&quot;, &quot;because&quot;, &quot;become&quot;, &quot;becomes&quot;, &quot;becoming&quot;, &quot;been&quot;, &quot;before&quot;, &quot;beforehand&quot;, &quot;behind&quot;, &quot;being&quot;, &quot;below&quot;, &quot;beside&quot;, &quot;besides&quot;, &quot;between&quot;, &quot;beyond&quot;, &quot;both&quot;, &quot;but&quot;, &quot;by&quot;, &quot;can&quot;, &quot;cannot&quot;, &quot;canst&quot;, &quot;certain&quot;, &quot;cf&quot;, &quot;choose&quot;, &quot;contrariwise&quot;, &quot;cos&quot;, &quot;could&quot;, &quot;cu&quot;, &quot;day&quot;, &quot;do&quot;, &quot;does&quot;, &quot;doesn't&quot;, &quot;doing&quot;, &quot;dost&quot;, &quot;doth&quot;, &quot;double&quot;, &quot;down&quot;, &quot;dual&quot;, &quot;during&quot;, &quot;each&quot;, &quot;either&quot;, &quot;else&quot;, &quot;elsewhere&quot;, &quot;enough&quot;, &quot;et&quot;, &quot;etc&quot;, &quot;even&quot;, &quot;ever&quot;, &quot;every&quot;, &quot;everybody&quot;, &quot;everyone&quot;, &quot;everything&quot;, &quot;everywhere&quot;, &quot;except&quot;, &quot;excepted&quot;, &quot;excepting&quot;, &quot;exception&quot;, &quot;exclude&quot;, &quot;excluding&quot;, &quot;exclusive&quot;, &quot;far&quot;, &quot;farther&quot;, &quot;farthest&quot;, &quot;few&quot;, &quot;ff&quot;, &quot;first&quot;, &quot;for&quot;, &quot;formerly&quot;, &quot;forth&quot;, &quot;forward&quot;, &quot;from&quot;, &quot;front&quot;, &quot;further&quot;, &quot;furthermore&quot;, &quot;furthest&quot;, &quot;get&quot;, &quot;go&quot;, &quot;had&quot;, &quot;halves&quot;, &quot;hardly&quot;, &quot;has&quot;, &quot;hast&quot;, &quot;hath&quot;, &quot;have&quot;, &quot;he&quot;, &quot;hence&quot;, &quot;henceforth&quot;, &quot;her&quot;, &quot;here&quot;, &quot;hereabouts&quot;, &quot;hereafter&quot;, &quot;hereby&quot;, &quot;herein&quot;, &quot;hereto&quot;, &quot;hereupon&quot;, &quot;hers&quot;, &quot;herself&quot;, &quot;him&quot;, &quot;himself&quot;, &quot;hindmost&quot;, &quot;his&quot;, &quot;hither&quot;, &quot;hitherto&quot;, &quot;how&quot;, &quot;however&quot;, &quot;howsoever&quot;, &quot;i&quot;, &quot;ie&quot;, &quot;if&quot;, &quot;in&quot;, &quot;inasmuch&quot;, &quot;inc&quot;, &quot;include&quot;, &quot;included&quot;, &quot;including&quot;, &quot;indeed&quot;, &quot;indoors&quot;, &quot;inside&quot;, &quot;insomuch&quot;, &quot;instead&quot;, &quot;into&quot;, &quot;inward&quot;, &quot;inwards&quot;, &quot;is&quot;, &quot;it&quot;, &quot;its&quot;, &quot;itself&quot;, &quot;just&quot;, &quot;kind&quot;, &quot;kg&quot;, &quot;km&quot;, &quot;last&quot;, &quot;latter&quot;, &quot;latterly&quot;, &quot;less&quot;, &quot;lest&quot;, &quot;let&quot;, &quot;like&quot;, &quot;little&quot;, &quot;ltd&quot;, &quot;many&quot;, &quot;may&quot;, &quot;maybe&quot;, &quot;me&quot;, &quot;meantime&quot;, &quot;meanwhile&quot;, &quot;might&quot;, &quot;moreover&quot;, &quot;most&quot;, &quot;mostly&quot;, &quot;more&quot;, &quot;mr&quot;, &quot;mrs&quot;, &quot;ms&quot;, &quot;much&quot;, &quot;must&quot;, &quot;my&quot;, &quot;myself&quot;, &quot;namely&quot;, &quot;need&quot;, &quot;neither&quot;, &quot;never&quot;, &quot;nevertheless&quot;, &quot;next&quot;, &quot;no&quot;, &quot;nobody&quot;, &quot;none&quot;, &quot;nonetheless&quot;, &quot;noone&quot;, &quot;nope&quot;, &quot;nor&quot;, &quot;not&quot;, &quot;nothing&quot;, &quot;notwithstanding&quot;, &quot;now&quot;, &quot;nowadays&quot;, &quot;nowhere&quot;, &quot;of&quot;, &quot;off&quot;, &quot;often&quot;, &quot;ok&quot;, &quot;on&quot;, &quot;once&quot;, &quot;one&quot;, &quot;only&quot;, &quot;onto&quot;, &quot;or&quot;, &quot;other&quot;, &quot;others&quot;, &quot;otherwise&quot;, &quot;ought&quot;, &quot;our&quot;, &quot;ours&quot;, &quot;ourselves&quot;, &quot;out&quot;, &quot;outside&quot;, &quot;over&quot;, &quot;own&quot;, &quot;per&quot;, &quot;perhaps&quot;, &quot;plenty&quot;, &quot;provide&quot;, &quot;quite&quot;, &quot;rather&quot;, &quot;really&quot;, &quot;round&quot;, &quot;said&quot;, &quot;sake&quot;, &quot;same&quot;, &quot;sang&quot;, &quot;save&quot;, &quot;saw&quot;, &quot;see&quot;, &quot;seeing&quot;, &quot;seem&quot;, &quot;seemed&quot;, &quot;seeming&quot;, &quot;seems&quot;, &quot;seen&quot;, &quot;seldom&quot;, &quot;selves&quot;, &quot;sent&quot;, &quot;several&quot;, &quot;shalt&quot;, &quot;she&quot;, &quot;should&quot;, &quot;shown&quot;, &quot;sideways&quot;, &quot;since&quot;, &quot;slept&quot;, &quot;slew&quot;, &quot;slung&quot;, &quot;slunk&quot;, &quot;smote&quot;, &quot;so&quot;, &quot;some&quot;, &quot;somebody&quot;, &quot;somehow&quot;, &quot;someone&quot;, &quot;something&quot;, &quot;sometime&quot;, &quot;sometimes&quot;, &quot;somewhat&quot;, &quot;somewhere&quot;, &quot;spake&quot;, &quot;spat&quot;, &quot;spoke&quot;, &quot;spoken&quot;, &quot;sprang&quot;, &quot;sprung&quot;, &quot;stave&quot;, &quot;staves&quot;, &quot;still&quot;, &quot;such&quot;, &quot;supposing&quot;, &quot;than&quot;, &quot;that&quot;, &quot;the&quot;, &quot;thee&quot;, &quot;their&quot;, &quot;them&quot;, &quot;themselves&quot;, &quot;then&quot;, &quot;thence&quot;, &quot;thenceforth&quot;, &quot;there&quot;, &quot;thereabout&quot;, &quot;thereabouts&quot;, &quot;thereafter&quot;, &quot;thereby&quot;, &quot;therefore&quot;, &quot;therein&quot;, &quot;thereof&quot;, &quot;thereon&quot;, &quot;thereto&quot;, &quot;thereupon&quot;, &quot;these&quot;, &quot;they&quot;, &quot;this&quot;, &quot;those&quot;, &quot;thou&quot;, &quot;though&quot;, &quot;thrice&quot;, &quot;through&quot;, &quot;throughout&quot;, &quot;thru&quot;, &quot;thus&quot;, &quot;thy&quot;, &quot;thyself&quot;, &quot;till&quot;, &quot;to&quot;, &quot;together&quot;, &quot;too&quot;, &quot;toward&quot;, &quot;towards&quot;, &quot;ugh&quot;, &quot;unable&quot;, &quot;under&quot;, &quot;underneath&quot;, &quot;unless&quot;, &quot;unlike&quot;, &quot;until&quot;, &quot;up&quot;, &quot;upon&quot;, &quot;upward&quot;, &quot;upwards&quot;, &quot;us&quot;, &quot;use&quot;, &quot;used&quot;, &quot;using&quot;, &quot;very&quot;, &quot;via&quot;, &quot;vs&quot;, &quot;want&quot;, &quot;was&quot;, &quot;we&quot;, &quot;week&quot;, &quot;well&quot;, &quot;were&quot;, &quot;what&quot;, &quot;whatever&quot;, &quot;whatsoever&quot;, &quot;when&quot;, &quot;whence&quot;, &quot;whenever&quot;, &quot;whensoever&quot;, &quot;where&quot;, &quot;whereabouts&quot;, &quot;whereafter&quot;, &quot;whereas&quot;, &quot;whereat&quot;, &quot;whereby&quot;, &quot;wherefore&quot;, &quot;wherefrom&quot;, &quot;wherein&quot;, &quot;whereinto&quot;, &quot;whereof&quot;, &quot;whereon&quot;, &quot;wheresoever&quot;, &quot;whereto&quot;, &quot;whereunto&quot;, &quot;whereupon&quot;, &quot;wherever&quot;, &quot;wherewith&quot;, &quot;whether&quot;, &quot;whew&quot;, &quot;which&quot;, &quot;whichever&quot;, &quot;whichsoever&quot;, &quot;while&quot;, &quot;whilst&quot;, &quot;whither&quot;, &quot;who&quot;, &quot;whoa&quot;, &quot;whoever&quot;, &quot;whole&quot;, &quot;whom&quot;, &quot;whomever&quot;, &quot;whomsoever&quot;, &quot;whose&quot;, &quot;whosoever&quot;, &quot;why&quot;, &quot;will&quot;, &quot;wilt&quot;, &quot;with&quot;, &quot;within&quot;, &quot;without&quot;, &quot;worse&quot;, &quot;worst&quot;, &quot;would&quot;, &quot;wow&quot;, &quot;ye&quot;, &quot;yet&quot;, &quot;year&quot;, &quot;yippee&quot;, &quot;you&quot;, &quot;your&quot;, &quot;yours&quot;, &quot;yourself&quot;, &quot;yourselves&quot; ]</td>
198   -
199   - <td>&nbsp;</td>
200   - <td class="context-item-desc">
201   -These are the default stopwords provided by Lemur.
202   -
203   -</td>
204   -
205   - </tr>
206   -
207   - </table>
208   - </div>
209 169 </div>
210 170  
211 171  
doc/classes/RIR/Document.html
... ... @@ -99,16 +99,18 @@
99 99  
100 100 <div class="name-list">
101 101  
102   - <a href="#M000021">count_words</a>&nbsp;&nbsp;
  102 + <a href="#M000010">count_words</a>&nbsp;&nbsp;
103 103  
104   - <a href="#M000022">entropy</a>&nbsp;&nbsp;
  104 + <a href="#M000011">entropy</a>&nbsp;&nbsp;
105 105  
106   - <a href="#M000019">format_words</a>&nbsp;&nbsp;
  106 + <a href="#M000008">format_words</a>&nbsp;&nbsp;
107 107  
108   - <a href="#M000023">new</a>&nbsp;&nbsp;
  108 + <a href="#M000013">new</a>&nbsp;&nbsp;
109 109  
110   - <a href="#M000020">ngrams</a>&nbsp;&nbsp;
  110 + <a href="#M000009">ngrams</a>&nbsp;&nbsp;
111 111  
  112 + <a href="#M000012">tf</a>&nbsp;&nbsp;
  113 +
112 114 </div>
113 115 </div>
114 116  
115 117  
... ... @@ -154,13 +156,13 @@
154 156 <h3 class="section-bar">Public Class methods</h3>
155 157  
156 158  
157   - <div id="method-M000023" class="method-detail">
158   - <a name="M000023"></a>
  159 + <div id="method-M000013" class="method-detail">
  160 + <a name="M000013"></a>
159 161  
160 162 <div class="method-heading">
161 163  
162   - <a href="Document.src/M000023.html" target="Code" class="method-signature"
163   - onclick="popupCode('Document.src/M000023.html');return false;">
  164 + <a href="Document.src/M000013.html" target="Code" class="method-signature"
  165 + onclick="popupCode('Document.src/M000013.html');return false;">
164 166  
165 167 <span class="method-name">new</span><span class="method-args">(content)</span>
166 168  
167 169  
... ... @@ -177,13 +179,13 @@
177 179 <h3 class="section-bar">Public Instance methods</h3>
178 180  
179 181  
180   - <div id="method-M000021" class="method-detail">
181   - <a name="M000021"></a>
  182 + <div id="method-M000010" class="method-detail">
  183 + <a name="M000010"></a>
182 184  
183 185 <div class="method-heading">
184 186  
185   - <a href="Document.src/M000021.html" target="Code" class="method-signature"
186   - onclick="popupCode('Document.src/M000021.html');return false;">
  187 + <a href="Document.src/M000010.html" target="Code" class="method-signature"
  188 + onclick="popupCode('Document.src/M000010.html');return false;">
187 189  
188 190 <span class="method-name">count_words</span><span class="method-args">()</span>
189 191  
190 192  
... ... @@ -205,13 +207,13 @@
205 207 </div>
206 208  
207 209  
208   - <div id="method-M000022" class="method-detail">
209   - <a name="M000022"></a>
  210 + <div id="method-M000011" class="method-detail">
  211 + <a name="M000011"></a>
210 212  
211 213 <div class="method-heading">
212 214  
213   - <a href="Document.src/M000022.html" target="Code" class="method-signature"
214   - onclick="popupCode('Document.src/M000022.html');return false;">
  215 + <a href="Document.src/M000011.html" target="Code" class="method-signature"
  216 + onclick="popupCode('Document.src/M000011.html');return false;">
215 217  
216 218 <span class="method-name">entropy</span><span class="method-args">(s)</span>
217 219  
218 220  
219 221  
... ... @@ -229,20 +231,21 @@
229 231 whitespace(s)), it is considered as an ngram.
230 232 </p>
231 233 <pre>
232   - entropy(&quot;guitar&quot;) #=&gt; 0.00389919463243839
  234 + entropy(&quot;guitar&quot;) #=&gt; 0.00432114812727959
  235 + entropy(&quot;dillinger escape plan&quot;) #=&gt; 0.265862076325102
233 236 </pre>
234 237  
235 238 </div>
236 239 </div>
237 240  
238 241  
239   - <div id="method-M000020" class="method-detail">
240   - <a name="M000020"></a>
  242 + <div id="method-M000009" class="method-detail">
  243 + <a name="M000009"></a>
241 244  
242 245 <div class="method-heading">
243 246  
244   - <a href="Document.src/M000020.html" target="Code" class="method-signature"
245   - onclick="popupCode('Document.src/M000020.html');return false;">
  247 + <a href="Document.src/M000009.html" target="Code" class="method-signature"
  248 + onclick="popupCode('Document.src/M000009.html');return false;">
246 249  
247 250 <span class="method-name">ngrams</span><span class="method-args">(n)</span>
248 251  
249 252  
250 253  
... ... @@ -264,16 +267,43 @@
264 267 </div>
265 268  
266 269  
  270 + <div id="method-M000012" class="method-detail">
  271 + <a name="M000012"></a>
  272 +
  273 + <div class="method-heading">
  274 +
  275 + <a href="Document.src/M000012.html" target="Code" class="method-signature"
  276 + onclick="popupCode('Document.src/M000012.html');return false;">
  277 +
  278 + <span class="method-name">tf</span><span class="method-args">(s)</span>
  279 +
  280 + </a>
  281 +
  282 + </div>
  283 +
  284 + <div class="method-description">
  285 +
  286 + <p>
  287 +Computes the term frequency of a given <b>word</b> <tt>s</tt>.
  288 +</p>
  289 +<pre>
  290 + tf(&quot;guitar&quot;) #=&gt; 0.000380372765310004
  291 +</pre>
  292 +
  293 + </div>
  294 + </div>
  295 +
  296 +
267 297 <h3 class="section-bar">Protected Instance methods</h3>
268 298  
269 299  
270   - <div id="method-M000019" class="method-detail">
271   - <a name="M000019"></a>
  300 + <div id="method-M000008" class="method-detail">
  301 + <a name="M000008"></a>
272 302  
273 303 <div class="method-heading">
274 304  
275   - <a href="Document.src/M000019.html" target="Code" class="method-signature"
276   - onclick="popupCode('Document.src/M000019.html');return false;">
  305 + <a href="Document.src/M000008.html" target="Code" class="method-signature"
  306 + onclick="popupCode('Document.src/M000008.html');return false;">
277 307  
278 308 <span class="method-name">format_words</span><span class="method-args">()</span>
279 309  
doc/classes/RIR/Document.src/M000008.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>format_words (RIR::Document)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 31</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">format_words</span>
  12 + <span class="ruby-identifier">wo</span> = []
  13 +
  14 + <span class="ruby-ivar">@doc_content</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
  15 + <span class="ruby-identifier">w</span>.<span class="ruby-identifier">split</span>(<span class="ruby-regexp re">/\W/</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sw</span><span class="ruby-operator">|</span>
  16 + <span class="ruby-identifier">wo</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">sw</span>.<span class="ruby-identifier">downcase</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">sw</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[a-zA-Z]/</span>
  17 + <span class="ruby-keyword kw">end</span>
  18 + <span class="ruby-keyword kw">end</span>
  19 +
  20 + <span class="ruby-identifier">wo</span>
  21 + <span class="ruby-keyword kw">end</span></pre>
  22 +</body>
  23 +</html>
doc/classes/RIR/Document.src/M000009.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>ngrams (RIR::Document)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 46</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">ngrams</span>(<span class="ruby-identifier">n</span>)
  12 + <span class="ruby-identifier">window</span> = []
  13 + <span class="ruby-identifier">ngrams_array</span> = []
  14 +
  15 + <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
  16 + <span class="ruby-identifier">window</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">w</span>)
  17 + <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">n</span>
  18 + <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">&quot; &quot;</span>)
  19 + <span class="ruby-identifier">window</span>.<span class="ruby-identifier">delete_at</span>(<span class="ruby-value">0</span>)
  20 + <span class="ruby-keyword kw">end</span>
  21 + <span class="ruby-keyword kw">end</span>
  22 +
  23 + <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">uniq</span>
  24 + <span class="ruby-keyword kw">end</span></pre>
  25 +</body>
  26 +</html>
doc/classes/RIR/Document.src/M000010.html
... ... @@ -2,22 +2,17 @@
2 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 4 <head>
5   - <title>format_words (RIR::Document)</title>
  5 + <title>count_words (RIR::Document)</title>
6 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 8 </head>
9 9 <body class="standalone-code">
10   - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 31</span>
11   - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">format_words</span>
12   - <span class="ruby-identifier">wo</span> = []
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 64</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">count_words</span>
  12 + <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">h</span>,<span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">h</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-value">0</span> }
  13 + <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> }
13 14  
14   - <span class="ruby-ivar">@doc_content</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
15   - <span class="ruby-identifier">w</span>.<span class="ruby-identifier">split</span>(<span class="ruby-regexp re">/\W/</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sw</span><span class="ruby-operator">|</span>
16   - <span class="ruby-identifier">wo</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">sw</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">sw</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[a-zA-Z]/</span>
17   - <span class="ruby-keyword kw">end</span>
18   - <span class="ruby-keyword kw">end</span>
19   -
20   - <span class="ruby-identifier">wo</span>
  15 + <span class="ruby-identifier">counts</span>
21 16 <span class="ruby-keyword kw">end</span></pre>
22 17 </body>
23 18 </html>
doc/classes/RIR/Document.src/M000011.html
... ... @@ -2,25 +2,23 @@
2 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 4 <head>
5   - <title>ngrams (RIR::Document)</title>
  5 + <title>entropy (RIR::Document)</title>
6 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 8 </head>
9 9 <body class="standalone-code">
10   - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 46</span>
11   - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">ngrams</span>(<span class="ruby-identifier">n</span>)
12   - <span class="ruby-identifier">window</span> = []
13   - <span class="ruby-identifier">ngrams_array</span> = []
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 78</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">entropy</span>(<span class="ruby-identifier">s</span>)
  12 + <span class="ruby-identifier">en</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span>
  13 + <span class="ruby-identifier">counts</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span>
14 14  
15   - <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
16   - <span class="ruby-identifier">window</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">w</span>)
17   - <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">n</span>
18   - <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">&quot; &quot;</span>)
19   - <span class="ruby-identifier">window</span>.<span class="ruby-identifier">delete_at</span>(<span class="ruby-value">0</span>)
20   - <span class="ruby-keyword kw">end</span>
  15 + <span class="ruby-identifier">s</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
  16 + <span class="ruby-identifier">p_wi</span> = <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span>
  17 + <span class="ruby-identifier">en</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">p_wi</span><span class="ruby-operator">*</span><span class="ruby-constant">Math</span>.<span class="ruby-identifier">log2</span>(<span class="ruby-identifier">p_wi</span>)
21 18 <span class="ruby-keyword kw">end</span>
22 19  
23   - <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">uniq</span>
  20 + <span class="ruby-identifier">en</span> <span class="ruby-operator">*=</span> <span class="ruby-value">-1</span>
  21 + <span class="ruby-identifier">en</span>
24 22 <span class="ruby-keyword kw">end</span></pre>
25 23 </body>
26 24 </html>
doc/classes/RIR/Document.src/M000012.html
... ... @@ -2,17 +2,14 @@
2 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 4 <head>
5   - <title>count_words (RIR::Document)</title>
  5 + <title>tf (RIR::Document)</title>
6 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 8 </head>
9 9 <body class="standalone-code">
10   - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 64</span>
11   - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">count_words</span>
12   - <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">h</span>,<span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">h</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-value">0</span> }
13   - <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>.<span class="ruby-identifier">downcase</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> }
14   -
15   - <span class="ruby-identifier">counts</span>
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 94</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">tf</span>(<span class="ruby-identifier">s</span>)
  12 + <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span>[<span class="ruby-identifier">s</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">size</span>.<span class="ruby-identifier">to_f</span>
16 13 <span class="ruby-keyword kw">end</span></pre>
17 14 </body>
18 15 </html>
doc/classes/RIR/Document.src/M000013.html
... ... @@ -2,23 +2,15 @@
2 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 4 <head>
5   - <title>entropy (RIR::Document)</title>
  5 + <title>new (RIR::Document)</title>
6 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 8 </head>
9 9 <body class="standalone-code">
10   - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 77</span>
11   - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">entropy</span>(<span class="ruby-identifier">s</span>)
12   - <span class="ruby-identifier">en</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span>
13   - <span class="ruby-identifier">counts</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span>
14   -
15   - <span class="ruby-identifier">s</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
16   - <span class="ruby-identifier">p_wi</span> = <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span>
17   - <span class="ruby-identifier">en</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">p_wi</span><span class="ruby-operator">*</span><span class="ruby-constant">Math</span>.<span class="ruby-identifier">log2</span>(<span class="ruby-identifier">p_wi</span>)
18   - <span class="ruby-keyword kw">end</span>
19   -
20   - <span class="ruby-identifier">en</span> <span class="ruby-operator">*=</span> <span class="ruby-value">-1</span>
21   - <span class="ruby-identifier">en</span>
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 99</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">content</span>)
  12 + <span class="ruby-ivar">@doc_content</span> = <span class="ruby-identifier">content</span>
  13 + <span class="ruby-ivar">@words</span> = <span class="ruby-identifier">format_words</span>
22 14 <span class="ruby-keyword kw">end</span></pre>
23 15 </body>
24 16 </html>
doc/classes/RIR/Indri/IndriQuery.html
... ... @@ -95,9 +95,9 @@
95 95  
96 96 <div class="name-list">
97 97  
98   - <a href="#M000014">new</a>&nbsp;&nbsp;
  98 + <a href="#M000006">new</a>&nbsp;&nbsp;
99 99  
100   - <a href="#M000015">to_s</a>&nbsp;&nbsp;
  100 + <a href="#M000007">to_s</a>&nbsp;&nbsp;
101 101  
102 102 </div>
103 103 </div>
104 104  
... ... @@ -160,13 +160,13 @@
160 160 <h3 class="section-bar">Public Class methods</h3>
161 161  
162 162  
163   - <div id="method-M000014" class="method-detail">
164   - <a name="M000014"></a>
  163 + <div id="method-M000006" class="method-detail">
  164 + <a name="M000006"></a>
165 165  
166 166 <div class="method-heading">
167 167  
168   - <a href="IndriQuery.src/M000014.html" target="Code" class="method-signature"
169   - onclick="popupCode('IndriQuery.src/M000014.html');return false;">
  168 + <a href="IndriQuery.src/M000006.html" target="Code" class="method-signature"
  169 + onclick="popupCode('IndriQuery.src/M000006.html');return false;">
170 170  
171 171 <span class="method-name">new</span><span class="method-args">(id,query,params)</span>
172 172  
173 173  
... ... @@ -183,13 +183,13 @@
183 183 <h3 class="section-bar">Public Instance methods</h3>
184 184  
185 185  
186   - <div id="method-M000015" class="method-detail">
187   - <a name="M000015"></a>
  186 + <div id="method-M000007" class="method-detail">
  187 + <a name="M000007"></a>
188 188  
189 189 <div class="method-heading">
190 190  
191   - <a href="IndriQuery.src/M000015.html" target="Code" class="method-signature"
192   - onclick="popupCode('IndriQuery.src/M000015.html');return false;">
  191 + <a href="IndriQuery.src/M000007.html" target="Code" class="method-signature"
  192 + onclick="popupCode('IndriQuery.src/M000007.html');return false;">
193 193  
194 194 <span class="method-name">to_s</span><span class="method-args">()</span>
195 195  
doc/classes/RIR/Indri/IndriQuery.src/M000006.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>new (RIR::Indri::IndriQuery)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/query.rb, line 62</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">id</span>,<span class="ruby-identifier">query</span>,<span class="ruby-identifier">params</span>)
  12 + <span class="ruby-ivar">@params</span> = <span class="ruby-identifier">params</span>
  13 + <span class="ruby-comment cmt"># Here we set the default retrieval model as Language Modeling</span>
  14 + <span class="ruby-comment cmt"># with a Dirichlet smoothing at 2500.</span>
  15 + <span class="ruby-comment cmt"># TODO: maybe a Rule class...</span>
  16 + <span class="ruby-ivar">@params</span>.<span class="ruby-identifier">rule</span> = <span class="ruby-value str">'method:dirichlet,mu:2500'</span> <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@params</span>.<span class="ruby-identifier">rule</span>.<span class="ruby-identifier">nil?</span>
  17 +
  18 + <span class="ruby-ivar">@id</span> = <span class="ruby-identifier">id</span>
  19 + <span class="ruby-ivar">@query</span> = <span class="ruby-identifier">query</span>
  20 + <span class="ruby-keyword kw">end</span></pre>
  21 +</body>
  22 +</html>
doc/classes/RIR/Indri/IndriQuery.src/M000007.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>to_s (RIR::Indri::IndriQuery)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/query.rb, line 73</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">to_s</span>
  12 + <span class="ruby-identifier">h</span> = <span class="ruby-ivar">@params</span>.<span class="ruby-identifier">to_s</span>
  13 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">&quot;&lt;query&gt;\n&quot;</span>
  14 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;number&gt;#{@id}&lt;/number&gt;\n&quot;</span>
  15 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;text&gt;#{@query}&lt;/text&gt;\n&quot;</span>
  16 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">&quot;&lt;/query&gt;\n&quot;</span>
  17 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">&quot;&lt;/parameters&gt;&quot;</span>
  18 +
  19 + <span class="ruby-identifier">h</span>
  20 + <span class="ruby-keyword kw">end</span></pre>
  21 +</body>
  22 +</html>
doc/classes/RIR/Indri/Parameters.html
... ... @@ -91,9 +91,9 @@
91 91  
92 92 <div class="name-list">
93 93  
94   - <a href="#M000012">new</a>&nbsp;&nbsp;
  94 + <a href="#M000004">new</a>&nbsp;&nbsp;
95 95  
96   - <a href="#M000013">to_s</a>&nbsp;&nbsp;
  96 + <a href="#M000005">to_s</a>&nbsp;&nbsp;
97 97  
98 98 </div>
99 99 </div>
... ... @@ -121,7 +121,7 @@
121 121 </tr>
122 122  
123 123 <tr class="top-aligned-row context-row">
124   - <td class="context-item-name">corpus</td>
  124 + <td class="context-item-name">count</td>
125 125  
126 126 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
127 127  
... ... @@ -129,7 +129,7 @@
129 129 </tr>
130 130  
131 131 <tr class="top-aligned-row context-row">
132   - <td class="context-item-name">count</td>
  132 + <td class="context-item-name">index_path</td>
133 133  
134 134 <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
135 135  
136 136  
... ... @@ -196,13 +196,13 @@
196 196 <h3 class="section-bar">Public Class methods</h3>
197 197  
198 198  
199   - <div id="method-M000012" class="method-detail">
200   - <a name="M000012"></a>
  199 + <div id="method-M000004" class="method-detail">
  200 + <a name="M000004"></a>
201 201  
202 202 <div class="method-heading">
203 203  
204   - <a href="Parameters.src/M000012.html" target="Code" class="method-signature"
205   - onclick="popupCode('Parameters.src/M000012.html');return false;">
  204 + <a href="Parameters.src/M000004.html" target="Code" class="method-signature"
  205 + onclick="popupCode('Parameters.src/M000004.html');return false;">
206 206  
207 207 <span class="method-name">new</span><span class="method-args">(corpus,mem=&quot;1g&quot;,count=&quot;1000&quot;,offset=&quot;1&quot;,run_id=&quot;default&quot;,print_query=false,print_docs=false)</span>
208 208  
209 209  
... ... @@ -219,13 +219,13 @@
219 219 <h3 class="section-bar">Public Instance methods</h3>
220 220  
221 221  
222   - <div id="method-M000013" class="method-detail">
223   - <a name="M000013"></a>
  222 + <div id="method-M000005" class="method-detail">
  223 + <a name="M000005"></a>
224 224  
225 225 <div class="method-heading">
226 226  
227   - <a href="Parameters.src/M000013.html" target="Code" class="method-signature"
228   - onclick="popupCode('Parameters.src/M000013.html');return false;">
  227 + <a href="Parameters.src/M000005.html" target="Code" class="method-signature"
  228 + onclick="popupCode('Parameters.src/M000005.html');return false;">
229 229  
230 230 <span class="method-name">to_s</span><span class="method-args">()</span>
231 231  
doc/classes/RIR/Indri/Parameters.src/M000004.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>new (RIR::Indri::Parameters)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/query.rb, line 30</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">corpus</span>,<span class="ruby-identifier">mem</span>=<span class="ruby-value str">&quot;1g&quot;</span>,<span class="ruby-identifier">count</span>=<span class="ruby-value str">&quot;1000&quot;</span>,<span class="ruby-identifier">offset</span>=<span class="ruby-value str">&quot;1&quot;</span>,<span class="ruby-identifier">run_id</span>=<span class="ruby-value str">&quot;default&quot;</span>,<span class="ruby-identifier">print_query</span>=<span class="ruby-keyword kw">false</span>,<span class="ruby-identifier">print_docs</span>=<span class="ruby-keyword kw">false</span>)
  12 + <span class="ruby-ivar">@index_path</span> = <span class="ruby-identifier">corpus</span>
  13 + <span class="ruby-ivar">@memory</span> = <span class="ruby-identifier">mem</span>
  14 + <span class="ruby-ivar">@count</span> = <span class="ruby-identifier">count</span>
  15 + <span class="ruby-ivar">@offset</span> = <span class="ruby-identifier">offset</span>
  16 + <span class="ruby-ivar">@run_id</span> = <span class="ruby-identifier">run_id</span>
  17 + <span class="ruby-ivar">@print_query</span> = <span class="ruby-identifier">print_query</span> <span class="ruby-value">? </span><span class="ruby-value str">&quot;true&quot;</span> <span class="ruby-operator">:</span> <span class="ruby-value str">&quot;false&quot;</span>
  18 + <span class="ruby-ivar">@print_docs</span> = <span class="ruby-identifier">print_docs</span> <span class="ruby-value">? </span><span class="ruby-value str">&quot;true&quot;</span> <span class="ruby-operator">:</span> <span class="ruby-value str">&quot;false&quot;</span>
  19 + <span class="ruby-keyword kw">end</span></pre>
  20 +</body>
  21 +</html>
doc/classes/RIR/Indri/Parameters.src/M000005.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>to_s (RIR::Indri::Parameters)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/query.rb, line 40</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">to_s</span>
  12 + <span class="ruby-identifier">h</span> = <span class="ruby-value str">&quot;&lt;parameters&gt;\n&quot;</span>
  13 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;memory&gt;#{@memory}&lt;/memory&gt;\n&quot;</span>
  14 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;index&gt;#{@index_path}&lt;/index&gt;\n&quot;</span>
  15 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;count&gt;#{@count}&lt;/count&gt;\n&quot;</span>
  16 + <span class="ruby-keyword kw">unless</span> <span class="ruby-ivar">@baseline</span>.<span class="ruby-identifier">nil?</span>
  17 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;baseline&gt;#{@baseline}&lt;/baseline&gt;\n&quot;</span>
  18 + <span class="ruby-keyword kw">else</span>
  19 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;rule&gt;#{@rule}&lt;/rule&gt;\n&quot;</span>
  20 + <span class="ruby-keyword kw">end</span>
  21 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;queryOffset&gt;#{@offset}&lt;/queryOffset&gt;\n&quot;</span>
  22 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;runID&gt;#{@run_id}&lt;/runID&gt;\n&quot;</span>
  23 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;printQuery&gt;#{@print_query}&lt;/printQuery&gt;\n&quot;</span>
  24 + <span class="ruby-identifier">h</span> <span class="ruby-operator">+=</span> <span class="ruby-node">&quot;&lt;printDocuments&gt;#{@print_docs}&lt;/printDocuments&gt;\n&quot;</span>
  25 +
  26 + <span class="ruby-identifier">h</span>
  27 + <span class="ruby-keyword kw">end</span></pre>
  28 +</body>
  29 +</html>
doc/classes/RIR/TreeTagger.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Module: RIR::TreeTagger [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Module</strong></td>
  49 + <td class="class-name-in-header">RIR::TreeTagger</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../../files/lib/rir/ttagger_rb.html">
  57 +
  58 + lib/rir/ttagger.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 + </td>
  66 + </tr>
  67 +
  68 +
  69 + </table>
  70 + </div>
  71 + <!-- banner header -->
  72 +
  73 + <div id="bodyContent">
  74 +
  75 + <div id="contextContent">
  76 +
  77 + <div id="description">
  78 + <p>
  79 +TreeTagger-related stuff module.
  80 +</p>
  81 +<p>
  82 +See <a
  83 +href="http://www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/DecisionTreeTagger.html">www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/DecisionTreeTagger.html</a>
  84 +</p>
  85 +
  86 + </div>
  87 +
  88 + </div>
  89 +
  90 +
  91 + </div>
  92 +
  93 + <!-- if includes -->
  94 +
  95 + <div id="section">
  96 +
  97 + <div id="class-list">
  98 + <h3 class="section-bar">Classes and Modules</h3>
  99 +
  100 + Class <a href="TreeTagger/Chunk.html" class="link">RIR::TreeTagger::Chunk</a><br />
  101 +Class <a href="TreeTagger/TaggerChunker.html" class="link">RIR::TreeTagger::TaggerChunker</a><br />
  102 +Class <a href="TreeTagger/TaggerChunkerEnglish.html" class="link">RIR::TreeTagger::TaggerChunkerEnglish</a><br />
  103 +Class <a href="TreeTagger/TaggerChunkerFrench.html" class="link">RIR::TreeTagger::TaggerChunkerFrench</a><br />
  104 +Class <a href="TreeTagger/TaggerChunkerGerman.html" class="link">RIR::TreeTagger::TaggerChunkerGerman</a><br />
  105 +
  106 + </div>
  107 +
  108 +
  109 +
  110 +
  111 + <!-- if method_list -->
  112 +
  113 +
  114 +
  115 +
  116 + </div>
  117 +
  118 +<div id="validator-badges">
  119 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  120 +</div>
  121 +
  122 +</body>
  123 +</html>
doc/classes/RIR/TreeTagger/Chunk.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Class: RIR::TreeTagger::Chunk [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Class</strong></td>
  49 + <td class="class-name-in-header">RIR::TreeTagger::Chunk</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../../../files/lib/rir/ttagger_rb.html">
  57 +
  58 + lib/rir/ttagger.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 + </td>
  66 + </tr>
  67 +
  68 +
  69 + <tr class="top-aligned-row">
  70 + <td><strong>Parent:</strong></td>
  71 + <td>
  72 +
  73 + Object
  74 +
  75 + </td>
  76 + </tr>
  77 +
  78 + </table>
  79 + </div>
  80 + <!-- banner header -->
  81 +
  82 + <div id="bodyContent">
  83 +
  84 + <div id="contextContent">
  85 +
  86 + <div id="description">
  87 + <p>
  88 +Represents a <a href="Chunk.html">Chunk</a> extracted when parsing a <a
  89 +href="TaggerChunker.html">TaggerChunker</a> file.
  90 +</p>
  91 +
  92 + </div>
  93 +
  94 + </div>
  95 +
  96 +
  97 + <div id="method-list">
  98 + <h3 class="section-bar">Methods</h3>
  99 +
  100 + <div class="name-list">
  101 +
  102 + <a href="#M000003">new</a>&nbsp;&nbsp;
  103 +
  104 + </div>
  105 + </div>
  106 +
  107 + </div>
  108 +
  109 + <!-- if includes -->
  110 +
  111 + <div id="section">
  112 +
  113 +
  114 +
  115 + <div id="attribute-list">
  116 + <h3 class="section-bar">Attributes</h3>
  117 +
  118 + <div class="name-list">
  119 + <table>
  120 +
  121 + <tr class="top-aligned-row context-row">
  122 + <td class="context-item-name">tag</td>
  123 +
  124 + <td class="context-item-value">&nbsp;[R]&nbsp;</td>
  125 +
  126 + <td class="context-item-desc"></td>
  127 + </tr>
  128 +
  129 + <tr class="top-aligned-row context-row">
  130 + <td class="context-item-name">words</td>
  131 +
  132 + <td class="context-item-value">&nbsp;[R]&nbsp;</td>
  133 +
  134 + <td class="context-item-desc"></td>
  135 + </tr>
  136 +
  137 + </table>
  138 + </div>
  139 + </div>
  140 +
  141 +
  142 + <!-- if method_list -->
  143 +
  144 + <div id="methods">
  145 +
  146 + <h3 class="section-bar">Public Class methods</h3>
  147 +
  148 +
  149 + <div id="method-M000003" class="method-detail">
  150 + <a name="M000003"></a>
  151 +
  152 + <div class="method-heading">
  153 +
  154 + <a href="Chunk.src/M000003.html" target="Code" class="method-signature"
  155 + onclick="popupCode('Chunk.src/M000003.html');return false;">
  156 +
  157 + <span class="method-name">new</span><span class="method-args">(str,tag)</span>
  158 +
  159 + </a>
  160 +
  161 + </div>
  162 +
  163 + <div class="method-description">
  164 +
  165 + <p>
  166 +<tt>str</tt> are whitespace-separated terms. <tt>tag</tt> see : <a
  167 +href="ftp://ftp.ims.uni-stuttgart.de/pub/corpora/chunker-tagset-english.txt">ftp.ims.uni-stuttgart.de/pub/corpora/chunker-tagset-english.txt</a>
  168 +</p>
  169 +
  170 + </div>
  171 + </div>
  172 +
  173 +
  174 +
  175 + </div>
  176 +
  177 +
  178 +
  179 +
  180 + </div>
  181 +
  182 +<div id="validator-badges">
  183 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  184 +</div>
  185 +
  186 +</body>
  187 +</html>
doc/classes/RIR/TreeTagger/Chunk.src/M000003.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>new (RIR::TreeTagger::Chunk)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/ttagger.rb, line 86</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span> <span class="ruby-identifier">str</span>,<span class="ruby-identifier">tag</span>
  12 + <span class="ruby-ivar">@words</span> = <span class="ruby-identifier">str</span>.<span class="ruby-identifier">split</span>
  13 + <span class="ruby-ivar">@tag</span> = <span class="ruby-identifier">tag</span>[<span class="ruby-value">1</span><span class="ruby-operator">..</span><span class="ruby-value">-2</span>]
  14 + <span class="ruby-keyword kw">end</span></pre>
  15 +</body>
  16 +</html>
doc/classes/RIR/TreeTagger/TaggerChunker.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Class: RIR::TreeTagger::TaggerChunker [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Class</strong></td>
  49 + <td class="class-name-in-header">RIR::TreeTagger::TaggerChunker</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../../../files/lib/rir/ttagger_rb.html">
  57 +
  58 + lib/rir/ttagger.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 + </td>
  66 + </tr>
  67 +
  68 +
  69 + <tr class="top-aligned-row">
  70 + <td><strong>Parent:</strong></td>
  71 + <td>
  72 +
  73 + Object
  74 +
  75 + </td>
  76 + </tr>
  77 +
  78 + </table>
  79 + </div>
  80 + <!-- banner header -->
  81 +
  82 + <div id="bodyContent">
  83 +
  84 + <div id="contextContent">
  85 +
  86 + <div id="description">
  87 + <p>
  88 +This class handles generic parsing of tagger-chunker outputs.
  89 +</p>
  90 +
  91 + </div>
  92 +
  93 + </div>
  94 +
  95 +
  96 + <div id="method-list">
  97 + <h3 class="section-bar">Methods</h3>
  98 +
  99 + <div class="name-list">
  100 +
  101 + <a href="#M000002">new</a>&nbsp;&nbsp;
  102 +
  103 + <a href="#M000001">parse</a>&nbsp;&nbsp;
  104 +
  105 + </div>
  106 + </div>
  107 +
  108 + </div>
  109 +
  110 + <!-- if includes -->
  111 +
  112 + <div id="section">
  113 +
  114 +
  115 +
  116 + <div id="attribute-list">
  117 + <h3 class="section-bar">Attributes</h3>
  118 +
  119 + <div class="name-list">
  120 + <table>
  121 +
  122 + <tr class="top-aligned-row context-row">
  123 + <td class="context-item-name">chunks</td>
  124 +
  125 + <td class="context-item-value">&nbsp;[R]&nbsp;</td>
  126 +
  127 + <td class="context-item-desc"></td>
  128 + </tr>
  129 +
  130 + <tr class="top-aligned-row context-row">
  131 + <td class="context-item-name">file</td>
  132 +
  133 + <td class="context-item-value">&nbsp;[R]&nbsp;</td>
  134 +
  135 + <td class="context-item-desc"></td>
  136 + </tr>
  137 +
  138 + </table>
  139 + </div>
  140 + </div>
  141 +
  142 +
  143 + <!-- if method_list -->
  144 +
  145 + <div id="methods">
  146 +
  147 + <h3 class="section-bar">Public Class methods</h3>
  148 +
  149 +
  150 + <div id="method-M000002" class="method-detail">
  151 + <a name="M000002"></a>
  152 +
  153 + <div class="method-heading">
  154 +
  155 + <a href="TaggerChunker.src/M000002.html" target="Code" class="method-signature"
  156 + onclick="popupCode('TaggerChunker.src/M000002.html');return false;">
  157 +
  158 + <span class="method-name">new</span><span class="method-args">(chunk_file)</span>
  159 +
  160 + </a>
  161 +
  162 + </div>
  163 +
  164 + <div class="method-description">
  165 +
  166 + <p>
  167 +Initializes parsing. <tt>chunk_file</tt> is the output of
  168 +<tt>tagger-chunker-</tt> and must be a valid path to the file.
  169 +</p>
  170 +<pre>
  171 + TaggerChunker.new(&quot;ttout/2010020&quot;) #=&gt; #&lt;RIR::TreeTagger::TaggerChunker:0x92fd088 @chunks=[#&lt;RIR::TreeTagger::Chunk:0x8ec5a10 @words=[&quot;robert&quot;, &quot;schumann&quot;], @tag=&quot;NC&quot;&gt;, ...] ...&gt;
  172 +</pre>
  173 +
  174 + </div>
  175 + </div>
  176 +
  177 +
  178 + <div id="method-M000001" class="method-detail">
  179 + <a name="M000001"></a>
  180 +
  181 + <div class="method-heading">
  182 +
  183 + <a href="TaggerChunker.src/M000001.html" target="Code" class="method-signature"
  184 + onclick="popupCode('TaggerChunker.src/M000001.html');return false;">
  185 +
  186 + <span class="method-name">parse</span><span class="method-args">(chunk_lines)</span>
  187 +
  188 + </a>
  189 +
  190 + </div>
  191 +
  192 + <div class="method-description">
  193 +
  194 + <p>
  195 +Parses a tagger-chunker output and returns an Array of <a
  196 +href="Chunk.html">Chunk</a>.
  197 +</p>
  198 +
  199 + </div>
  200 + </div>
  201 +
  202 +
  203 +
  204 + </div>
  205 +
  206 +
  207 +
  208 +
  209 + </div>
  210 +
  211 +<div id="validator-badges">
  212 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  213 +</div>
  214 +
  215 +</body>
  216 +</html>
doc/classes/RIR/TreeTagger/TaggerChunker.src/M000001.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>parse (RIR::TreeTagger::TaggerChunker)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/ttagger.rb, line 33</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">parse</span> <span class="ruby-identifier">chunk_lines</span>
  12 + <span class="ruby-identifier">open</span> = <span class="ruby-keyword kw">false</span>
  13 + <span class="ruby-identifier">tag</span> = <span class="ruby-keyword kw">nil</span>
  14 +
  15 + <span class="ruby-identifier">chunks</span> = []
  16 + <span class="ruby-identifier">words</span> = []
  17 +
  18 + <span class="ruby-identifier">chunk_lines</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">l</span><span class="ruby-operator">|</span>
  19 + <span class="ruby-identifier">l</span>.<span class="ruby-identifier">chomp!</span>
  20 + <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">l</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/^&lt;\w+&gt;$/</span>
  21 + <span class="ruby-identifier">open</span> = <span class="ruby-keyword kw">true</span>
  22 + <span class="ruby-identifier">tag</span> = <span class="ruby-identifier">l</span>
  23 + <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">l</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/^&lt;\/\w+&gt;$/</span>
  24 + <span class="ruby-keyword kw">if</span> <span class="ruby-operator">!</span><span class="ruby-identifier">words</span>.<span class="ruby-identifier">empty?</span> <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">open</span> <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">l</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">tag</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/&lt;/</span>, <span class="ruby-value str">'&lt;/'</span>)
  25 + <span class="ruby-identifier">open</span> = <span class="ruby-keyword kw">false</span>
  26 + <span class="ruby-identifier">chunks</span>.<span class="ruby-identifier">push</span> <span class="ruby-constant">Chunk</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">words</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">&quot; &quot;</span>), <span class="ruby-identifier">tag</span>)
  27 + <span class="ruby-identifier">words</span>.<span class="ruby-identifier">clear</span>
  28 + <span class="ruby-keyword kw">else</span>
  29 + <span class="ruby-keyword kw">next</span>
  30 + <span class="ruby-keyword kw">end</span>
  31 + <span class="ruby-keyword kw">else</span>
  32 + <span class="ruby-identifier">words</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">l</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">first</span>)
  33 + <span class="ruby-keyword kw">end</span>
  34 + <span class="ruby-keyword kw">end</span>
  35 +
  36 + <span class="ruby-identifier">chunks</span>
  37 + <span class="ruby-keyword kw">end</span></pre>
  38 +</body>
  39 +</html>
doc/classes/RIR/TreeTagger/TaggerChunker.src/M000002.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>new (RIR::TreeTagger::TaggerChunker)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/ttagger.rb, line 65</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span> <span class="ruby-identifier">chunk_file</span>
  12 + <span class="ruby-ivar">@chunks</span> = <span class="ruby-constant">TaggerChunker</span>.<span class="ruby-identifier">parse</span> <span class="ruby-constant">File</span>.<span class="ruby-identifier">open</span>(<span class="ruby-identifier">chunk_file</span>).<span class="ruby-identifier">readlines</span>
  13 + <span class="ruby-keyword kw">end</span></pre>
  14 +</body>
  15 +</html>
doc/classes/RIR/TreeTagger/TaggerChunkerEnglish.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Class: RIR::TreeTagger::TaggerChunkerEnglish [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Class</strong></td>
  49 + <td class="class-name-in-header">RIR::TreeTagger::TaggerChunkerEnglish</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../../../files/lib/rir/ttagger_rb.html">
  57 +
  58 + lib/rir/ttagger.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 + </td>
  66 + </tr>
  67 +
  68 +
  69 + <tr class="top-aligned-row">
  70 + <td><strong>Parent:</strong></td>
  71 + <td>
  72 +
  73 + <a href="TaggerChunker.html">
  74 +
  75 + RIR::TreeTagger::TaggerChunker
  76 +
  77 + </a>
  78 +
  79 + </td>
  80 + </tr>
  81 +
  82 + </table>
  83 + </div>
  84 + <!-- banner header -->
  85 +
  86 + <div id="bodyContent">
  87 +
  88 + <div id="contextContent">
  89 +
  90 + </div>
  91 +
  92 +
  93 + </div>
  94 +
  95 + <!-- if includes -->
  96 +
  97 + <div id="section">
  98 +
  99 +
  100 +
  101 +
  102 + <!-- if method_list -->
  103 +
  104 +
  105 +
  106 +
  107 + </div>
  108 +
  109 +<div id="validator-badges">
  110 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  111 +</div>
  112 +
  113 +</body>
  114 +</html>
doc/classes/RIR/TreeTagger/TaggerChunkerFrench.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Class: RIR::TreeTagger::TaggerChunkerFrench [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Class</strong></td>
  49 + <td class="class-name-in-header">RIR::TreeTagger::TaggerChunkerFrench</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../../../files/lib/rir/ttagger_rb.html">
  57 +
  58 + lib/rir/ttagger.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 + </td>
  66 + </tr>
  67 +
  68 +
  69 + <tr class="top-aligned-row">
  70 + <td><strong>Parent:</strong></td>
  71 + <td>
  72 +
  73 + <a href="TaggerChunker.html">
  74 +
  75 + RIR::TreeTagger::TaggerChunker
  76 +
  77 + </a>
  78 +
  79 + </td>
  80 + </tr>
  81 +
  82 + </table>
  83 + </div>
  84 + <!-- banner header -->
  85 +
  86 + <div id="bodyContent">
  87 +
  88 + <div id="contextContent">
  89 +
  90 + </div>
  91 +
  92 +
  93 + </div>
  94 +
  95 + <!-- if includes -->
  96 +
  97 + <div id="section">
  98 +
  99 +
  100 +
  101 +
  102 + <!-- if method_list -->
  103 +
  104 +
  105 +
  106 +
  107 + </div>
  108 +
  109 +<div id="validator-badges">
  110 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  111 +</div>
  112 +
  113 +</body>
  114 +</html>
doc/classes/RIR/TreeTagger/TaggerChunkerGerman.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Class: RIR::TreeTagger::TaggerChunkerGerman [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Class</strong></td>
  49 + <td class="class-name-in-header">RIR::TreeTagger::TaggerChunkerGerman</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../../../files/lib/rir/ttagger_rb.html">
  57 +
  58 + lib/rir/ttagger.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 + </td>
  66 + </tr>
  67 +
  68 +
  69 + <tr class="top-aligned-row">
  70 + <td><strong>Parent:</strong></td>
  71 + <td>
  72 +
  73 + <a href="TaggerChunker.html">
  74 +
  75 + RIR::TreeTagger::TaggerChunker
  76 +
  77 + </a>
  78 +
  79 + </td>
  80 + </tr>
  81 +
  82 + </table>
  83 + </div>
  84 + <!-- banner header -->
  85 +
  86 + <div id="bodyContent">
  87 +
  88 + <div id="contextContent">
  89 +
  90 + </div>
  91 +
  92 +
  93 + </div>
  94 +
  95 + <!-- if includes -->
  96 +
  97 + <div id="section">
  98 +
  99 +
  100 +
  101 +
  102 + <!-- if method_list -->
  103 +
  104 +
  105 +
  106 +
  107 + </div>
  108 +
  109 +<div id="validator-badges">
  110 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  111 +</div>
  112 +
  113 +</body>
  114 +</html>
doc/classes/RIR/WebDocument.html
... ... @@ -103,9 +103,9 @@
103 103  
104 104 <div class="name-list">
105 105  
106   - <a href="#M000024">get_content</a>&nbsp;&nbsp;
  106 + <a href="#M000014">get_content</a>&nbsp;&nbsp;
107 107  
108   - <a href="#M000025">new</a>&nbsp;&nbsp;
  108 + <a href="#M000015">new</a>&nbsp;&nbsp;
109 109  
110 110 </div>
111 111 </div>
112 112  
... ... @@ -144,13 +144,13 @@
144 144 <h3 class="section-bar">Public Class methods</h3>
145 145  
146 146  
147   - <div id="method-M000024" class="method-detail">
148   - <a name="M000024"></a>
  147 + <div id="method-M000014" class="method-detail">
  148 + <a name="M000014"></a>
149 149  
150 150 <div class="method-heading">
151 151  
152   - <a href="WebDocument.src/M000024.html" target="Code" class="method-signature"
153   - onclick="popupCode('WebDocument.src/M000024.html');return false;">
  152 + <a href="WebDocument.src/M000014.html" target="Code" class="method-signature"
  153 + onclick="popupCode('WebDocument.src/M000014.html');return false;">
154 154  
155 155 <span class="method-name">get_content</span><span class="method-args">(url)</span>
156 156  
157 157  
... ... @@ -168,13 +168,13 @@
168 168 </div>
169 169  
170 170  
171   - <div id="method-M000025" class="method-detail">
172   - <a name="M000025"></a>
  171 + <div id="method-M000015" class="method-detail">
  172 + <a name="M000015"></a>
173 173  
174 174 <div class="method-heading">
175 175  
176   - <a href="WebDocument.src/M000025.html" target="Code" class="method-signature"
177   - onclick="popupCode('WebDocument.src/M000025.html');return false;">
  176 + <a href="WebDocument.src/M000015.html" target="Code" class="method-signature"
  177 + onclick="popupCode('WebDocument.src/M000015.html');return false;">
178 178  
179 179 <span class="method-name">new</span><span class="method-args">(url)</span>
180 180  
doc/classes/RIR/WebDocument.src/M000014.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>get_content (RIR::WebDocument)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 112</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>)
  12 + <span class="ruby-identifier">require</span> <span class="ruby-value str">'net/http'</span>
  13 + <span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>(<span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span>(<span class="ruby-identifier">url</span>))
  14 + <span class="ruby-keyword kw">end</span></pre>
  15 +</body>
  16 +</html>
doc/classes/RIR/WebDocument.src/M000015.html
... ... @@ -2,15 +2,15 @@
2 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3 3 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4 4 <head>
5   - <title>get_content (RIR::WebDocument)</title>
  5 + <title>new (RIR::WebDocument)</title>
6 6 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7 7 <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8 8 </head>
9 9 <body class="standalone-code">
10   - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 105</span>
11   - <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>)
12   - <span class="ruby-identifier">require</span> <span class="ruby-value str">'net/http'</span>
13   - <span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>(<span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span>(<span class="ruby-identifier">url</span>))
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 119</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">url</span>)
  12 + <span class="ruby-ivar">@url</span> = <span class="ruby-identifier">url</span>
  13 + <span class="ruby-keyword kw">super</span> <span class="ruby-constant">WebDocument</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>).<span class="ruby-identifier">strip_javascripts</span>.<span class="ruby-identifier">strip_stylesheets</span>.<span class="ruby-identifier">strip_xml_tags</span>
14 14 <span class="ruby-keyword kw">end</span></pre>
15 15 </body>
16 16 </html>
doc/classes/RIR/WikipediaPage.html
... ... @@ -103,11 +103,11 @@
103 103  
104 104 <div class="name-list">
105 105  
106   - <a href="#M000027">get_url</a>&nbsp;&nbsp;
  106 + <a href="#M000017">get_url</a>&nbsp;&nbsp;
107 107  
108   - <a href="#M000028">search_homepage</a>&nbsp;&nbsp;
  108 + <a href="#M000018">search_homepage</a>&nbsp;&nbsp;
109 109  
110   - <a href="#M000026">search_wikipedia_titles</a>&nbsp;&nbsp;
  110 + <a href="#M000016">search_wikipedia_titles</a>&nbsp;&nbsp;
111 111  
112 112 </div>
113 113 </div>
114 114  
... ... @@ -128,13 +128,13 @@
128 128 <h3 class="section-bar">Public Class methods</h3>
129 129  
130 130  
131   - <div id="method-M000027" class="method-detail">
132   - <a name="M000027"></a>
  131 + <div id="method-M000017" class="method-detail">
  132 + <a name="M000017"></a>
133 133  
134 134 <div class="method-heading">
135 135  
136   - <a href="WikipediaPage.src/M000027.html" target="Code" class="method-signature"
137   - onclick="popupCode('WikipediaPage.src/M000027.html');return false;">
  136 + <a href="WikipediaPage.src/M000017.html" target="Code" class="method-signature"
  137 + onclick="popupCode('WikipediaPage.src/M000017.html');return false;">
138 138  
139 139 <span class="method-name">get_url</span><span class="method-args">(name)</span>
140 140  
141 141  
... ... @@ -148,13 +148,13 @@
148 148 </div>
149 149  
150 150  
151   - <div id="method-M000028" class="method-detail">
152   - <a name="M000028"></a>
  151 + <div id="method-M000018" class="method-detail">
  152 + <a name="M000018"></a>
153 153  
154 154 <div class="method-heading">
155 155  
156   - <a href="WikipediaPage.src/M000028.html" target="Code" class="method-signature"
157   - onclick="popupCode('WikipediaPage.src/M000028.html');return false;">
  156 + <a href="WikipediaPage.src/M000018.html" target="Code" class="method-signature"
  157 + onclick="popupCode('WikipediaPage.src/M000018.html');return false;">
158 158  
159 159 <span class="method-name">search_homepage</span><span class="method-args">(name)</span>
160 160  
161 161  
... ... @@ -168,13 +168,13 @@
168 168 </div>
169 169  
170 170  
171   - <div id="method-M000026" class="method-detail">
172   - <a name="M000026"></a>
  171 + <div id="method-M000016" class="method-detail">
  172 + <a name="M000016"></a>
173 173  
174 174 <div class="method-heading">
175 175  
176   - <a href="WikipediaPage.src/M000026.html" target="Code" class="method-signature"
177   - onclick="popupCode('WikipediaPage.src/M000026.html');return false;">
  176 + <a href="WikipediaPage.src/M000016.html" target="Code" class="method-signature"
  177 + onclick="popupCode('WikipediaPage.src/M000016.html');return false;">
178 178  
179 179 <span class="method-name">search_wikipedia_titles</span><span class="method-args">(name)</span>
180 180  
doc/classes/RIR/WikipediaPage.src/M000016.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>search_wikipedia_titles (RIR::WikipediaPage)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 132</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">search_wikipedia_titles</span>(<span class="ruby-identifier">name</span>)
  12 + <span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">&quot;Bad encoding&quot;</span>, <span class="ruby-identifier">name</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">name</span>.<span class="ruby-identifier">isutf8</span>
  13 +
  14 + <span class="ruby-identifier">res</span> = <span class="ruby-constant">REXML</span><span class="ruby-operator">::</span><span class="ruby-constant">Document</span>.<span class="ruby-identifier">new</span>(<span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>( <span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span> <span class="ruby-node">&quot;http://en.wikipedia.org/w/api.php?action=query&amp;list=search&amp;srsearch=#{URI.escape name}&amp;format=xml&quot;</span> ).<span class="ruby-identifier">toutf8</span>).<span class="ruby-identifier">elements</span>[<span class="ruby-value str">'api/query/search'</span>]
  15 +
  16 + <span class="ruby-identifier">res</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">e</span><span class="ruby-operator">|</span> <span class="ruby-identifier">e</span>.<span class="ruby-identifier">attributes</span>[<span class="ruby-value str">'title'</span>] } <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">res</span>.<span class="ruby-identifier">nil?</span>
  17 + <span class="ruby-keyword kw">end</span></pre>
  18 +</body>
  19 +</html>
doc/classes/RIR/WikipediaPage.src/M000017.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>get_url (RIR::WikipediaPage)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 140</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_url</span>(<span class="ruby-identifier">name</span>)
  12 + <span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>, <span class="ruby-value str">&quot;Bad encoding&quot;</span>, <span class="ruby-identifier">name</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">name</span>.<span class="ruby-identifier">isutf8</span>
  13 +
  14 + <span class="ruby-identifier">atts</span> = <span class="ruby-constant">REXML</span><span class="ruby-operator">::</span><span class="ruby-constant">Document</span>.<span class="ruby-identifier">new</span>(<span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>( <span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span> <span class="ruby-node">&quot;http://en.wikipedia.org/w/api.php?action=query&amp;titles=#{URI.escape name}&amp;inprop=url&amp;prop=info&amp;format=xml&quot;</span> ).<span class="ruby-identifier">toutf8</span>).<span class="ruby-identifier">elements</span>[<span class="ruby-value str">'api/query/pages/page'</span>].<span class="ruby-identifier">attributes</span>
  15 +
  16 + <span class="ruby-identifier">atts</span>[<span class="ruby-value str">'fullurl'</span>] <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">atts</span>[<span class="ruby-value str">'missing'</span>].<span class="ruby-identifier">nil?</span>
  17 + <span class="ruby-keyword kw">end</span></pre>
  18 +</body>
  19 +</html>
doc/classes/RIR/WikipediaPage.src/M000018.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>search_homepage (RIR::WikipediaPage)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 148</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">search_homepage</span>(<span class="ruby-identifier">name</span>)
  12 + <span class="ruby-identifier">title</span> = <span class="ruby-constant">WikipediaPage</span>.<span class="ruby-identifier">search_wikipedia_titles</span> <span class="ruby-identifier">name</span>
  13 +
  14 + <span class="ruby-constant">WikipediaPage</span>.<span class="ruby-identifier">new</span>(<span class="ruby-constant">WikipediaPage</span>.<span class="ruby-identifier">get_url</span> <span class="ruby-identifier">title</span>[<span class="ruby-value">0</span>]) <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">title</span>.<span class="ruby-identifier">nil?</span> <span class="ruby-operator">||</span> <span class="ruby-identifier">title</span>.<span class="ruby-identifier">empty?</span>
  15 + <span class="ruby-keyword kw">end</span></pre>
  16 +</body>
  17 +</html>
1   -Tue, 23 Nov 2010 18:20:46 +0100
  1 +Thu, 25 Nov 2010 17:01:52 +0100
doc/files/lib/rir/document_rb.html
... ... @@ -53,7 +53,7 @@
53 53 </tr>
54 54 <tr class="top-aligned-row">
55 55 <td><strong>Last Update:</strong></td>
56   - <td>2010-11-23 18:14:13 +0100</td>
  56 + <td>2010-11-25 16:04:20 +0100</td>
57 57 </tr>
58 58 </table>
59 59 </div>
doc/files/lib/rir/query_rb.html
... ... @@ -53,7 +53,7 @@
53 53 </tr>
54 54 <tr class="top-aligned-row">
55 55 <td><strong>Last Update:</strong></td>
56   - <td>2010-11-23 18:20:30 +0100</td>
  56 + <td>2010-11-25 13:25:18 +0100</td>
57 57 </tr>
58 58 </table>
59 59 </div>
doc/files/lib/rir/ttagger_rb.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>File: ttagger.rb [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="fileHeader">
  46 + <h1>ttagger.rb</h1>
  47 + <table class="header-table">
  48 + <tr class="top-aligned-row">
  49 + <td><strong>Path:</strong></td>
  50 + <td>lib/rir/ttagger.rb
  51 +
  52 + </td>
  53 + </tr>
  54 + <tr class="top-aligned-row">
  55 + <td><strong>Last Update:</strong></td>
  56 + <td>2010-11-25 17:01:46 +0100</td>
  57 + </tr>
  58 + </table>
  59 + </div>
  60 + <!-- banner header -->
  61 +
  62 + <div id="bodyContent">
  63 +
  64 + <div id="contextContent">
  65 +
  66 + <div id="description">
  67 + <p>
  68 +This file is a part of an Information Retrieval oriented Ruby library
  69 +</p>
  70 +<p>
  71 +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
  72 +</p>
  73 +<p>
  74 +This program is free software: you can redistribute it and/or modify it
  75 +under the terms of the GNU General Public License as published by the Free
  76 +Software Foundation, either version 3 of the License, or (at your option)
  77 +any later version.
  78 +</p>
  79 +<p>
  80 +This program is distributed in the hope that it will be useful, but WITHOUT
  81 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  82 +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  83 +more details.
  84 +</p>
  85 +<p>
  86 +You should have received a copy of the GNU General Public License along
  87 +with this program. If not, see <<a
  88 +href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
  89 +</p>
  90 +<hr size="1"></hr><p>
  91 +This file is a part of an Information Retrieval oriented Ruby library
  92 +</p>
  93 +<p>
  94 +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
  95 +</p>
  96 +<p>
  97 +This program is free software: you can redistribute it and/or modify it
  98 +under the terms of the GNU General Public License as published by the Free
  99 +Software Foundation, either version 3 of the License, or (at your option)
  100 +any later version.
  101 +</p>
  102 +<p>
  103 +This program is distributed in the hope that it will be useful, but WITHOUT
  104 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  105 +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  106 +more details.
  107 +</p>
  108 +<p>
  109 +You should have received a copy of the GNU General Public License along
  110 +with this program. If not, see <<a
  111 +href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
  112 +</p>
  113 +<hr size="1"></hr><p>
  114 +General module for many purposes related to Information Retrieval.
  115 +</p>
  116 +
  117 + </div>
  118 +
  119 + </div>
  120 +
  121 +
  122 + </div>
  123 +
  124 + <!-- if includes -->
  125 +
  126 + <div id="section">
  127 +
  128 +
  129 +
  130 +
  131 + <!-- if method_list -->
  132 +
  133 +
  134 +
  135 +
  136 + </div>
  137 +
  138 +<div id="validator-badges">
  139 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  140 +</div>
  141 +
  142 +</body>
  143 +</html>
doc/files/lib/rir_rb.html
... ... @@ -53,7 +53,7 @@
53 53 </tr>
54 54 <tr class="top-aligned-row">
55 55 <td><strong>Last Update:</strong></td>
56   - <td>2010-11-19 11:27:16 +0100</td>
  56 + <td>2010-11-25 15:44:52 +0100</td>
57 57 </tr>
58 58 </table>
59 59 </div>
... ... @@ -77,6 +77,8 @@
77 77 rir/corpus&nbsp;&nbsp;
78 78  
79 79 rir/regexp&nbsp;&nbsp;
  80 +
  81 + rir/ttagger&nbsp;&nbsp;
80 82  
81 83 </div>
82 84 </div>
doc/fr_class_index.html
... ... @@ -19,7 +19,7 @@
19 19  
20 20 <a href="classes/RIR.html">RIR</a><br />
21 21  
22   - <a href="classes/RIR/Corpus.html">RIR::Corpus</a><br />
  22 + <a href="classes/RIR/Document.html">RIR::Document</a><br />
23 23  
24 24 <a href="classes/RIR/Indri.html">RIR::Indri</a><br />
25 25  
... ... @@ -29,7 +29,21 @@
29 29  
30 30 <a href="classes/RIR/Query.html">RIR::Query</a><br />
31 31  
32   - <a href="classes/String.html">String</a><br />
  32 + <a href="classes/RIR/TreeTagger.html">RIR::TreeTagger</a><br />
  33 +
  34 + <a href="classes/RIR/TreeTagger/Chunk.html">RIR::TreeTagger::Chunk</a><br />
  35 +
  36 + <a href="classes/RIR/TreeTagger/TaggerChunker.html">RIR::TreeTagger::TaggerChunker</a><br />
  37 +
  38 + <a href="classes/RIR/TreeTagger/TaggerChunkerEnglish.html">RIR::TreeTagger::TaggerChunkerEnglish</a><br />
  39 +
  40 + <a href="classes/RIR/TreeTagger/TaggerChunkerFrench.html">RIR::TreeTagger::TaggerChunkerFrench</a><br />
  41 +
  42 + <a href="classes/RIR/TreeTagger/TaggerChunkerGerman.html">RIR::TreeTagger::TaggerChunkerGerman</a><br />
  43 +
  44 + <a href="classes/RIR/WebDocument.html">RIR::WebDocument</a><br />
  45 +
  46 + <a href="classes/RIR/WikipediaPage.html">RIR::WikipediaPage</a><br />
33 47  
34 48 </div>
35 49 </div>
doc/fr_file_index.html
... ... @@ -17,11 +17,13 @@
17 17 <h1 class="section-bar">Files</h1>
18 18 <div id="index-entries">
19 19  
20   - <a href="files/lib/rir/corpus_rb.html">lib/rir/corpus.rb</a><br />
  20 + <a href="files/lib/rir_rb.html">lib/rir.rb</a><br />
21 21  
  22 + <a href="files/lib/rir/document_rb.html">lib/rir/document.rb</a><br />
  23 +
22 24 <a href="files/lib/rir/query_rb.html">lib/rir/query.rb</a><br />
23 25  
24   - <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br />
  26 + <a href="files/lib/rir/ttagger_rb.html">lib/rir/ttagger.rb</a><br />
25 27  
26 28 </div>
27 29 </div>
doc/fr_method_index.html
... ... @@ -17,39 +17,41 @@
17 17 <h1 class="section-bar">Methods</h1>
18 18 <div id="index-entries">
19 19  
20   - <a href="classes/String.html#M000011">extract_xmltags_values (String)</a><br />
  20 + <a href="classes/RIR/Document.html#M000010">count_words (RIR::Document)</a><br />
21 21  
22   - <a href="classes/RIR/Corpus.html#M000017">files (RIR::Corpus)</a><br />
  22 + <a href="classes/RIR/Document.html#M000011">entropy (RIR::Document)</a><br />
23 23  
24   - <a href="classes/String.html#M000001">is_stopword? (String)</a><br />
  24 + <a href="classes/RIR/Document.html#M000008">format_words (RIR::Document)</a><br />
25 25  
26   - <a href="classes/RIR/Corpus.html#M000016">new (RIR::Corpus)</a><br />
  26 + <a href="classes/RIR/WebDocument.html#M000014">get_content (RIR::WebDocument)</a><br />
27 27  
28   - <a href="classes/RIR/Indri/Parameters.html#M000012">new (RIR::Indri::Parameters)</a><br />
  28 + <a href="classes/RIR/WikipediaPage.html#M000017">get_url (RIR::WikipediaPage)</a><br />
29 29  
30   - <a href="classes/RIR/Indri/IndriQuery.html#M000014">new (RIR::Indri::IndriQuery)</a><br />
  30 + <a href="classes/RIR/WebDocument.html#M000015">new (RIR::WebDocument)</a><br />
31 31  
32   - <a href="classes/String.html#M000002">remove_special_characters (String)</a><br />
  32 + <a href="classes/RIR/Indri/IndriQuery.html#M000006">new (RIR::Indri::IndriQuery)</a><br />
33 33  
34   - <a href="classes/String.html#M000006">strip_javascripts (String)</a><br />
  34 + <a href="classes/RIR/Indri/Parameters.html#M000004">new (RIR::Indri::Parameters)</a><br />
35 35  
36   - <a href="classes/String.html#M000005">strip_javascripts! (String)</a><br />
  36 + <a href="classes/RIR/Document.html#M000013">new (RIR::Document)</a><br />
37 37  
38   - <a href="classes/String.html#M000010">strip_punctuation (String)</a><br />
  38 + <a href="classes/RIR/TreeTagger/TaggerChunker.html#M000002">new (RIR::TreeTagger::TaggerChunker)</a><br />
39 39  
40   - <a href="classes/String.html#M000009">strip_punctuation! (String)</a><br />
  40 + <a href="classes/RIR/TreeTagger/Chunk.html#M000003">new (RIR::TreeTagger::Chunk)</a><br />
41 41  
42   - <a href="classes/String.html#M000008">strip_stylesheets (String)</a><br />
  42 + <a href="classes/RIR/Document.html#M000009">ngrams (RIR::Document)</a><br />
43 43  
44   - <a href="classes/String.html#M000007">strip_stylesheets! (String)</a><br />
  44 + <a href="classes/RIR/TreeTagger/TaggerChunker.html#M000001">parse (RIR::TreeTagger::TaggerChunker)</a><br />
45 45  
46   - <a href="classes/String.html#M000004">strip_xml_tags (String)</a><br />
  46 + <a href="classes/RIR/WikipediaPage.html#M000018">search_homepage (RIR::WikipediaPage)</a><br />
47 47  
48   - <a href="classes/String.html#M000003">strip_xml_tags! (String)</a><br />
  48 + <a href="classes/RIR/WikipediaPage.html#M000016">search_wikipedia_titles (RIR::WikipediaPage)</a><br />
49 49  
50   - <a href="classes/RIR/Indri/Parameters.html#M000013">to_s (RIR::Indri::Parameters)</a><br />
  50 + <a href="classes/RIR/Document.html#M000012">tf (RIR::Document)</a><br />
51 51  
52   - <a href="classes/RIR/Indri/IndriQuery.html#M000015">to_s (RIR::Indri::IndriQuery)</a><br />
  52 + <a href="classes/RIR/Indri/Parameters.html#M000005">to_s (RIR::Indri::Parameters)</a><br />
  53 +
  54 + <a href="classes/RIR/Indri/IndriQuery.html#M000007">to_s (RIR::Indri::IndriQuery)</a><br />
53 55  
54 56 </div>
55 57 </div>
... ... @@ -16,7 +16,7 @@
16 16 <frame src="fr_class_index.html" name="Classes" />
17 17 <frame src="fr_method_index.html" name="Methods" />
18 18 </frameset>
19   - <frame src="files/lib/rir/corpus_rb.html" name="docwin" />
  19 + <frame src="files/lib/rir_rb.html" name="docwin" />
20 20 </frameset>
21 21 </html>
... ... @@ -5,4 +5,5 @@
5 5 require 'rir/query'
6 6 require 'rir/corpus'
7 7 require 'rir/regexp'
  8 +require 'rir/ttagger'
... ... @@ -73,7 +73,8 @@
73 73 # If the string parameter is composed of many words (i.e. tokens separated
74 74 # by whitespace(s)), it is considered as an ngram.
75 75 #
76   - # entropy("guitar") #=> 0.00389919463243839
  76 + # entropy("guitar") #=> 0.00432114812727959
  77 + # entropy("dillinger escape plan") #=> 0.265862076325102
77 78 def entropy(s)
78 79 en = 0.0
79 80 counts = self.count_words
... ... @@ -87,6 +88,12 @@
87 88 en
88 89 end
89 90  
  91 + # Computes the term frequency of a given *word* +s+.
  92 + #
  93 + # tf("guitar") #=> 0.000380372765310004
  94 + def tf(s)
  95 + self.count_words[s].to_f/@words.size.to_f
  96 + end
90 97  
91 98  
92 99 def initialize(content)
93 100  
94 101  
95 102  
96 103  
... ... @@ -123,25 +130,25 @@
123 130  
124 131  
125 132 def self.search_wikipedia_titles(name)
126   - res = REXML::Document.new(Net::HTTP.get(URI.parse("http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&format=xml")).toutf8).elements['api/query/search']
  133 + raise ArgumentError, "Bad encoding", name unless name.isutf8
127 134  
  135 + res = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&format=xml" ).toutf8).elements['api/query/search']
  136 +
128 137 res.collect { |e| e.attributes['title'] } unless res.nil?
129 138 end
130 139  
131 140 def self.get_url(name)
132   - atts = REXML::Document.new(Net::HTTP.get(URI.parse("http://en.wikipedia.org/w/api.php?action=query&titles=#{URI.escape name}&inprop=url&prop=info&format=xml")).toutf8).elements['api/query/pages/page'].attributes
  141 + raise ArgumentError, "Bad encoding", name unless name.isutf8
133 142  
  143 + atts = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&titles=#{URI.escape name}&inprop=url&prop=info&format=xml" ).toutf8).elements['api/query/pages/page'].attributes
  144 +
134 145 atts['fullurl'] if atts['missing'].nil?
135 146 end
136 147  
137 148 def self.search_homepage(name)
138 149 title = WikipediaPage.search_wikipedia_titles name
139 150  
140   - begin
141   - WikipediaPage.new(WikipediaPage.get_url title[0]) unless title.nil? || title.empty?
142   - rescue
143   - puts title[0]
144   - end
  151 + WikipediaPage.new(WikipediaPage.get_url title[0]) unless title.nil? || title.empty?
145 152 end
146 153  
147 154 # def initialize(name)
... ... @@ -25,10 +25,10 @@
25 25 module Indri
26 26  
27 27 class Parameters
28   - attr_accessor :corpus, :memory, :count, :offset, :run_id, :print_query, :print_docs, :rule, :baseline
  28 + attr_accessor :index_path, :memory, :count, :offset, :run_id, :print_query, :print_docs, :rule, :baseline
29 29  
30 30 def initialize(corpus,mem="1g",count="1000",offset="1",run_id="default",print_query=false,print_docs=false)
31   - @corpus = corpus
  31 + @index_path = corpus
32 32 @memory = mem
33 33 @count = count
34 34 @offset = offset
... ... @@ -40,7 +40,7 @@
40 40 def to_s
41 41 h = "<parameters>\n"
42 42 h += "<memory>#{@memory}</memory>\n"
43   - h += "<index>#{@corpus}</index>\n"
  43 + h += "<index>#{@index_path}</index>\n"
44 44 h += "<count>#{@count}</count>\n"
45 45 unless @baseline.nil?
46 46 h += "<baseline>#{@baseline}</baseline>\n"
  1 +#!/usr/bin/env ruby
  2 +
  3 +# This file is a part of an Information Retrieval oriented Ruby library
  4 +#
  5 +# Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
  6 +#
  7 +# This program is free software: you can redistribute it and/or modify
  8 +# it under the terms of the GNU General Public License as published by
  9 +# the Free Software Foundation, either version 3 of the License, or
  10 +# (at your option) any later version.
  11 +#
  12 +# This program is distributed in the hope that it will be useful,
  13 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 +# GNU General Public License for more details.
  16 +#
  17 +# You should have received a copy of the GNU General Public License
  18 +# along with this program. If not, see <http://www.gnu.org/licenses/>.
  19 +
  20 +module RIR
  21 +
  22 + # TreeTagger-related stuff module.
  23 + #
  24 + # See http://www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/DecisionTreeTagger.html
  25 + module TreeTagger
  26 +
  27 + # This class handles generic parsing of tagger-chunker outputs.
  28 + class TaggerChunker
  29 + attr_reader :chunks, :file
  30 +
  31 +
  32 + # Parses a tagger-chunker output and returns an Array of Chunk.
  33 + def self.parse chunk_lines
  34 + open = false
  35 + tag = nil
  36 +
  37 + chunks = []
  38 + words = []
  39 +
  40 + chunk_lines.each do |l|
  41 + l.chomp!
  42 + if l =~ /^<\w+>$/
  43 + open = true
  44 + tag = l
  45 + elsif l =~ /^<\/\w+>$/
  46 + if !words.empty? && open && l == tag.sub(/</, '</')
  47 + open = false
  48 + chunks.push Chunk.new(words.join(" "), tag)
  49 + words.clear
  50 + else
  51 + next
  52 + end
  53 + else
  54 + words.push(l.split.first)
  55 + end
  56 + end
  57 +
  58 + chunks
  59 + end
  60 +
  61 + # Initializes parsing. +chunk_file+ is the output of +tagger-chunker-+ and must
  62 + # be a valid path to the file.
  63 + #
  64 + # TaggerChunker.new("ttout/2010020") #=> #<RIR::TreeTagger::TaggerChunker:0x92fd088 @chunks=[#<RIR::TreeTagger::Chunk:0x8ec5a10 @words=["robert", "schumann"], @tag="NC">, ...] ...>
  65 + def initialize chunk_file
  66 + @chunks = TaggerChunker.parse File.open(chunk_file).readlines
  67 + end
  68 +
  69 + end
  70 +
  71 + class TaggerChunkerEnglish < TaggerChunker
  72 + end
  73 +
  74 + class TaggerChunkerFrench < TaggerChunker
  75 + end
  76 +
  77 + class TaggerChunkerGerman < TaggerChunker
  78 + end
  79 +
  80 + # Represents a Chunk extracted when parsing a TaggerChunker file.
  81 + class Chunk
  82 + attr_reader :words, :tag
  83 +
  84 + # +str+ are whitespace-separated terms.
  85 + # +tag+ see : ftp://ftp.ims.uni-stuttgart.de/pub/corpora/chunker-tagset-english.txt
  86 + def initialize str,tag
  87 + @words = str.split
  88 + @tag = tag[1..-2]
  89 + end
  90 + end
  91 +
  92 + end
  93 +end
... ... @@ -3,5 +3,6 @@
3 3 require 'rir'
4 4  
5 5 w = RIR::WikipediaPage.new("http://en.wikipedia.org/wiki/The_Dillinger_Escape_Plan")
6   -p w.entropy("guitar")
  6 +p w.entropy("dillinger escape plan")
  7 +p w.tf("guitar")