Commit bc18b961bdd99fd9f3bb51dc8fd6e88757de3eb8
1 parent
3e81fa06a9
Exists in
master
more doc
Showing 25 changed files with 316 additions and 85 deletions Side-by-side Diff
- doc/classes/RIR.html
- doc/classes/RIR/Document.html
- doc/classes/RIR/WebDocument.html
- doc/classes/RIR/WikipediaPage.html
- doc/classes/String.html
- doc/classes/String.src/M000001.html
- doc/classes/String.src/M000002.html
- doc/classes/String.src/M000003.html
- doc/classes/String.src/M000004.html
- doc/classes/String.src/M000005.html
- doc/classes/String.src/M000006.html
- doc/classes/String.src/M000007.html
- doc/classes/String.src/M000008.html
- doc/classes/String.src/M000009.html
- doc/created.rid
- doc/files/lib/rir/document_rb.html
- doc/files/lib/rir/string_rb.html
- doc/files/lib/rir_rb.html
- doc/fr_class_index.html
- doc/fr_file_index.html
- doc/fr_method_index.html
- doc/index.html
- lib/rir/corpus.rb
- lib/rir/query.rb
- lib/rir/string.rb
doc/classes/RIR.html
... | ... | @@ -53,9 +53,9 @@ |
53 | 53 | <td> |
54 | 54 | |
55 | 55 | |
56 | - <a href="../files/lib/rir/string_rb.html"> | |
56 | + <a href="../files/lib/rir/corpus_rb.html"> | |
57 | 57 | |
58 | - lib/rir/string.rb | |
58 | + lib/rir/corpus.rb | |
59 | 59 | |
60 | 60 | </a> |
61 | 61 | |
62 | 62 | |
63 | 63 | |
... | ... | @@ -63,15 +63,25 @@ |
63 | 63 | <br /> |
64 | 64 | |
65 | 65 | |
66 | - <a href="../files/lib/rir/document_rb.html"> | |
66 | + <a href="../files/lib/rir/query_rb.html"> | |
67 | 67 | |
68 | - lib/rir/document.rb | |
68 | + lib/rir/query.rb | |
69 | 69 | |
70 | 70 | </a> |
71 | 71 | |
72 | 72 | |
73 | 73 | <br /> |
74 | 74 | |
75 | + | |
76 | + <a href="../files/lib/rir/string_rb.html"> | |
77 | + | |
78 | + lib/rir/string.rb | |
79 | + | |
80 | + </a> | |
81 | + | |
82 | + | |
83 | + <br /> | |
84 | + | |
75 | 85 | </td> |
76 | 86 | </tr> |
77 | 87 | |
78 | 88 | |
79 | 89 | |
80 | 90 | |
... | ... | @@ -86,11 +96,74 @@ |
86 | 96 | |
87 | 97 | <div id="description"> |
88 | 98 | <p> |
89 | -General module for many purposes related to Information Retrieval. | |
99 | +This file is a part of an Information Retrieval oriented Ruby library | |
90 | 100 | </p> |
101 | +<p> | |
102 | +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | |
103 | +</p> | |
104 | +<p> | |
105 | +This program is free software: you can redistribute it and/or modify it | |
106 | +under the terms of the GNU General Public License as published by the Free | |
107 | +Software Foundation, either version 3 of the License, or (at your option) | |
108 | +any later version. | |
109 | +</p> | |
110 | +<p> | |
111 | +This program is distributed in the hope that it will be useful, but WITHOUT | |
112 | +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
113 | +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
114 | +more details. | |
115 | +</p> | |
116 | +<p> | |
117 | +You should have received a copy of the GNU General Public License along | |
118 | +with this program. If not, see <<a | |
119 | +href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. | |
120 | +</p> | |
91 | 121 | <hr size="1"></hr><p> |
92 | -General module for many purposes related to Information Retrieval. | |
122 | +This file is a part of an Information Retrieval oriented Ruby library | |
93 | 123 | </p> |
124 | +<p> | |
125 | +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | |
126 | +</p> | |
127 | +<p> | |
128 | +This program is free software: you can redistribute it and/or modify it | |
129 | +under the terms of the GNU General Public License as published by the Free | |
130 | +Software Foundation, either version 3 of the License, or (at your option) | |
131 | +any later version. | |
132 | +</p> | |
133 | +<p> | |
134 | +This program is distributed in the hope that it will be useful, but WITHOUT | |
135 | +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
136 | +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
137 | +more details. | |
138 | +</p> | |
139 | +<p> | |
140 | +You should have received a copy of the GNU General Public License along | |
141 | +with this program. If not, see <<a | |
142 | +href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. | |
143 | +</p> | |
144 | +<hr size="1"></hr><p> | |
145 | +This file is a part of an Information Retrieval oriented Ruby library | |
146 | +</p> | |
147 | +<p> | |
148 | +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | |
149 | +</p> | |
150 | +<p> | |
151 | +This program is free software: you can redistribute it and/or modify it | |
152 | +under the terms of the GNU General Public License as published by the Free | |
153 | +Software Foundation, either version 3 of the License, or (at your option) | |
154 | +any later version. | |
155 | +</p> | |
156 | +<p> | |
157 | +This program is distributed in the hope that it will be useful, but WITHOUT | |
158 | +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
159 | +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
160 | +more details. | |
161 | +</p> | |
162 | +<p> | |
163 | +You should have received a copy of the GNU General Public License along | |
164 | +with this program. If not, see <<a | |
165 | +href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. | |
166 | +</p> | |
94 | 167 | |
95 | 168 | </div> |
96 | 169 | |
... | ... | @@ -106,9 +179,9 @@ |
106 | 179 | <div id="class-list"> |
107 | 180 | <h3 class="section-bar">Classes and Modules</h3> |
108 | 181 | |
109 | - Class <a href="RIR/Document.html" class="link">RIR::Document</a><br /> | |
110 | -Class <a href="RIR/WebDocument.html" class="link">RIR::WebDocument</a><br /> | |
111 | -Class <a href="RIR/WikipediaPage.html" class="link">RIR::WikipediaPage</a><br /> | |
182 | + Module <a href="RIR/Indri.html" class="link">RIR::Indri</a><br /> | |
183 | +Class <a href="RIR/Corpus.html" class="link">RIR::Corpus</a><br /> | |
184 | +Class <a href="RIR/Query.html" class="link">RIR::Query</a><br /> | |
112 | 185 | |
113 | 186 | </div> |
114 | 187 |
doc/classes/RIR/Document.html
... | ... | @@ -99,15 +99,15 @@ |
99 | 99 | |
100 | 100 | <div class="name-list"> |
101 | 101 | |
102 | - <a href="#M000012">count_words</a> | |
102 | + <a href="#M000021">count_words</a> | |
103 | 103 | |
104 | - <a href="#M000013">entropy</a> | |
104 | + <a href="#M000022">entropy</a> | |
105 | 105 | |
106 | - <a href="#M000010">format_words</a> | |
106 | + <a href="#M000019">format_words</a> | |
107 | 107 | |
108 | - <a href="#M000014">new</a> | |
108 | + <a href="#M000023">new</a> | |
109 | 109 | |
110 | - <a href="#M000011">ngrams</a> | |
110 | + <a href="#M000020">ngrams</a> | |
111 | 111 | |
112 | 112 | </div> |
113 | 113 | </div> |
114 | 114 | |
... | ... | @@ -154,13 +154,13 @@ |
154 | 154 | <h3 class="section-bar">Public Class methods</h3> |
155 | 155 | |
156 | 156 | |
157 | - <div id="method-M000014" class="method-detail"> | |
158 | - <a name="M000014"></a> | |
157 | + <div id="method-M000023" class="method-detail"> | |
158 | + <a name="M000023"></a> | |
159 | 159 | |
160 | 160 | <div class="method-heading"> |
161 | 161 | |
162 | - <a href="Document.src/M000014.html" target="Code" class="method-signature" | |
163 | - onclick="popupCode('Document.src/M000014.html');return false;"> | |
162 | + <a href="Document.src/M000023.html" target="Code" class="method-signature" | |
163 | + onclick="popupCode('Document.src/M000023.html');return false;"> | |
164 | 164 | |
165 | 165 | <span class="method-name">new</span><span class="method-args">(content)</span> |
166 | 166 | |
167 | 167 | |
... | ... | @@ -177,13 +177,13 @@ |
177 | 177 | <h3 class="section-bar">Public Instance methods</h3> |
178 | 178 | |
179 | 179 | |
180 | - <div id="method-M000012" class="method-detail"> | |
181 | - <a name="M000012"></a> | |
180 | + <div id="method-M000021" class="method-detail"> | |
181 | + <a name="M000021"></a> | |
182 | 182 | |
183 | 183 | <div class="method-heading"> |
184 | 184 | |
185 | - <a href="Document.src/M000012.html" target="Code" class="method-signature" | |
186 | - onclick="popupCode('Document.src/M000012.html');return false;"> | |
185 | + <a href="Document.src/M000021.html" target="Code" class="method-signature" | |
186 | + onclick="popupCode('Document.src/M000021.html');return false;"> | |
187 | 187 | |
188 | 188 | <span class="method-name">count_words</span><span class="method-args">()</span> |
189 | 189 | |
190 | 190 | |
... | ... | @@ -205,13 +205,13 @@ |
205 | 205 | </div> |
206 | 206 | |
207 | 207 | |
208 | - <div id="method-M000013" class="method-detail"> | |
209 | - <a name="M000013"></a> | |
208 | + <div id="method-M000022" class="method-detail"> | |
209 | + <a name="M000022"></a> | |
210 | 210 | |
211 | 211 | <div class="method-heading"> |
212 | 212 | |
213 | - <a href="Document.src/M000013.html" target="Code" class="method-signature" | |
214 | - onclick="popupCode('Document.src/M000013.html');return false;"> | |
213 | + <a href="Document.src/M000022.html" target="Code" class="method-signature" | |
214 | + onclick="popupCode('Document.src/M000022.html');return false;"> | |
215 | 215 | |
216 | 216 | <span class="method-name">entropy</span><span class="method-args">(s)</span> |
217 | 217 | |
218 | 218 | |
... | ... | @@ -236,13 +236,13 @@ |
236 | 236 | </div> |
237 | 237 | |
238 | 238 | |
239 | - <div id="method-M000011" class="method-detail"> | |
240 | - <a name="M000011"></a> | |
239 | + <div id="method-M000020" class="method-detail"> | |
240 | + <a name="M000020"></a> | |
241 | 241 | |
242 | 242 | <div class="method-heading"> |
243 | 243 | |
244 | - <a href="Document.src/M000011.html" target="Code" class="method-signature" | |
245 | - onclick="popupCode('Document.src/M000011.html');return false;"> | |
244 | + <a href="Document.src/M000020.html" target="Code" class="method-signature" | |
245 | + onclick="popupCode('Document.src/M000020.html');return false;"> | |
246 | 246 | |
247 | 247 | <span class="method-name">ngrams</span><span class="method-args">(n)</span> |
248 | 248 | |
249 | 249 | |
... | ... | @@ -267,13 +267,13 @@ |
267 | 267 | <h3 class="section-bar">Protected Instance methods</h3> |
268 | 268 | |
269 | 269 | |
270 | - <div id="method-M000010" class="method-detail"> | |
271 | - <a name="M000010"></a> | |
270 | + <div id="method-M000019" class="method-detail"> | |
271 | + <a name="M000019"></a> | |
272 | 272 | |
273 | 273 | <div class="method-heading"> |
274 | 274 | |
275 | - <a href="Document.src/M000010.html" target="Code" class="method-signature" | |
276 | - onclick="popupCode('Document.src/M000010.html');return false;"> | |
275 | + <a href="Document.src/M000019.html" target="Code" class="method-signature" | |
276 | + onclick="popupCode('Document.src/M000019.html');return false;"> | |
277 | 277 | |
278 | 278 | <span class="method-name">format_words</span><span class="method-args">()</span> |
279 | 279 |
doc/classes/RIR/WebDocument.html
... | ... | @@ -103,9 +103,9 @@ |
103 | 103 | |
104 | 104 | <div class="name-list"> |
105 | 105 | |
106 | - <a href="#M000015">get_content</a> | |
106 | + <a href="#M000024">get_content</a> | |
107 | 107 | |
108 | - <a href="#M000016">new</a> | |
108 | + <a href="#M000025">new</a> | |
109 | 109 | |
110 | 110 | </div> |
111 | 111 | </div> |
112 | 112 | |
... | ... | @@ -144,13 +144,13 @@ |
144 | 144 | <h3 class="section-bar">Public Class methods</h3> |
145 | 145 | |
146 | 146 | |
147 | - <div id="method-M000015" class="method-detail"> | |
148 | - <a name="M000015"></a> | |
147 | + <div id="method-M000024" class="method-detail"> | |
148 | + <a name="M000024"></a> | |
149 | 149 | |
150 | 150 | <div class="method-heading"> |
151 | 151 | |
152 | - <a href="WebDocument.src/M000015.html" target="Code" class="method-signature" | |
153 | - onclick="popupCode('WebDocument.src/M000015.html');return false;"> | |
152 | + <a href="WebDocument.src/M000024.html" target="Code" class="method-signature" | |
153 | + onclick="popupCode('WebDocument.src/M000024.html');return false;"> | |
154 | 154 | |
155 | 155 | <span class="method-name">get_content</span><span class="method-args">(url)</span> |
156 | 156 | |
157 | 157 | |
... | ... | @@ -168,13 +168,13 @@ |
168 | 168 | </div> |
169 | 169 | |
170 | 170 | |
171 | - <div id="method-M000016" class="method-detail"> | |
172 | - <a name="M000016"></a> | |
171 | + <div id="method-M000025" class="method-detail"> | |
172 | + <a name="M000025"></a> | |
173 | 173 | |
174 | 174 | <div class="method-heading"> |
175 | 175 | |
176 | - <a href="WebDocument.src/M000016.html" target="Code" class="method-signature" | |
177 | - onclick="popupCode('WebDocument.src/M000016.html');return false;"> | |
176 | + <a href="WebDocument.src/M000025.html" target="Code" class="method-signature" | |
177 | + onclick="popupCode('WebDocument.src/M000025.html');return false;"> | |
178 | 178 | |
179 | 179 | <span class="method-name">new</span><span class="method-args">(url)</span> |
180 | 180 |
doc/classes/RIR/WikipediaPage.html
... | ... | @@ -98,6 +98,20 @@ |
98 | 98 | </div> |
99 | 99 | |
100 | 100 | |
101 | + <div id="method-list"> | |
102 | + <h3 class="section-bar">Methods</h3> | |
103 | + | |
104 | + <div class="name-list"> | |
105 | + | |
106 | + <a href="#M000027">get_url</a> | |
107 | + | |
108 | + <a href="#M000028">search_homepage</a> | |
109 | + | |
110 | + <a href="#M000026">search_wikipedia_titles</a> | |
111 | + | |
112 | + </div> | |
113 | + </div> | |
114 | + | |
101 | 115 | </div> |
102 | 116 | |
103 | 117 | <!-- if includes --> |
... | ... | @@ -108,6 +122,74 @@ |
108 | 122 | |
109 | 123 | |
110 | 124 | <!-- if method_list --> |
125 | + | |
126 | + <div id="methods"> | |
127 | + | |
128 | + <h3 class="section-bar">Public Class methods</h3> | |
129 | + | |
130 | + | |
131 | + <div id="method-M000027" class="method-detail"> | |
132 | + <a name="M000027"></a> | |
133 | + | |
134 | + <div class="method-heading"> | |
135 | + | |
136 | + <a href="WikipediaPage.src/M000027.html" target="Code" class="method-signature" | |
137 | + onclick="popupCode('WikipediaPage.src/M000027.html');return false;"> | |
138 | + | |
139 | + <span class="method-name">get_url</span><span class="method-args">(name)</span> | |
140 | + | |
141 | + </a> | |
142 | + | |
143 | + </div> | |
144 | + | |
145 | + <div class="method-description"> | |
146 | + | |
147 | + </div> | |
148 | + </div> | |
149 | + | |
150 | + | |
151 | + <div id="method-M000028" class="method-detail"> | |
152 | + <a name="M000028"></a> | |
153 | + | |
154 | + <div class="method-heading"> | |
155 | + | |
156 | + <a href="WikipediaPage.src/M000028.html" target="Code" class="method-signature" | |
157 | + onclick="popupCode('WikipediaPage.src/M000028.html');return false;"> | |
158 | + | |
159 | + <span class="method-name">search_homepage</span><span class="method-args">(name)</span> | |
160 | + | |
161 | + </a> | |
162 | + | |
163 | + </div> | |
164 | + | |
165 | + <div class="method-description"> | |
166 | + | |
167 | + </div> | |
168 | + </div> | |
169 | + | |
170 | + | |
171 | + <div id="method-M000026" class="method-detail"> | |
172 | + <a name="M000026"></a> | |
173 | + | |
174 | + <div class="method-heading"> | |
175 | + | |
176 | + <a href="WikipediaPage.src/M000026.html" target="Code" class="method-signature" | |
177 | + onclick="popupCode('WikipediaPage.src/M000026.html');return false;"> | |
178 | + | |
179 | + <span class="method-name">search_wikipedia_titles</span><span class="method-args">(name)</span> | |
180 | + | |
181 | + </a> | |
182 | + | |
183 | + </div> | |
184 | + | |
185 | + <div class="method-description"> | |
186 | + | |
187 | + </div> | |
188 | + </div> | |
189 | + | |
190 | + | |
191 | + | |
192 | + </div> | |
111 | 193 | |
112 | 194 | |
113 | 195 |
doc/classes/String.html
... | ... | @@ -99,7 +99,7 @@ |
99 | 99 | |
100 | 100 | <div class="name-list"> |
101 | 101 | |
102 | - <a href="#M000009">extract_xmltags_values</a> | |
102 | + <a href="#M000011">extract_xmltags_values</a> | |
103 | 103 | |
104 | 104 | <a href="#M000001">is_stopword?</a> |
105 | 105 | |
... | ... | @@ -109,6 +109,10 @@ |
109 | 109 | |
110 | 110 | <a href="#M000005">strip_javascripts!</a> |
111 | 111 | |
112 | + <a href="#M000010">strip_punctuation</a> | |
113 | + | |
114 | + <a href="#M000009">strip_punctuation!</a> | |
115 | + | |
112 | 116 | <a href="#M000008">strip_stylesheets</a> |
113 | 117 | |
114 | 118 | <a href="#M000007">strip_stylesheets!</a> |
115 | 119 | |
... | ... | @@ -146,13 +150,13 @@ |
146 | 150 | <h3 class="section-bar">Public Instance methods</h3> |
147 | 151 | |
148 | 152 | |
149 | - <div id="method-M000009" class="method-detail"> | |
150 | - <a name="M000009"></a> | |
153 | + <div id="method-M000011" class="method-detail"> | |
154 | + <a name="M000011"></a> | |
151 | 155 | |
152 | 156 | <div class="method-heading"> |
153 | 157 | |
154 | - <a href="String.src/M000009.html" target="Code" class="method-signature" | |
155 | - onclick="popupCode('String.src/M000009.html');return false;"> | |
158 | + <a href="String.src/M000011.html" target="Code" class="method-signature" | |
159 | + onclick="popupCode('String.src/M000011.html');return false;"> | |
156 | 160 | |
157 | 161 | <span class="method-name">extract_xmltags_values</span><span class="method-args">(tag_name)</span> |
158 | 162 | |
... | ... | @@ -284,6 +288,63 @@ |
284 | 288 | test" |
285 | 289 | s.strip_javascripts! |
286 | 290 | s #=> "test" |
291 | +</pre> | |
292 | + | |
293 | + </div> | |
294 | + </div> | |
295 | + | |
296 | + | |
297 | + <div id="method-M000010" class="method-detail"> | |
298 | + <a name="M000010"></a> | |
299 | + | |
300 | + <div class="method-heading"> | |
301 | + | |
302 | + <a href="String.src/M000010.html" target="Code" class="method-signature" | |
303 | + onclick="popupCode('String.src/M000010.html');return false;"> | |
304 | + | |
305 | + <span class="method-name">strip_punctuation</span><span class="method-args">()</span> | |
306 | + | |
307 | + </a> | |
308 | + | |
309 | + </div> | |
310 | + | |
311 | + <div class="method-description"> | |
312 | + | |
313 | + <p> | |
314 | +Removes punctuation from <tt>self</tt>. | |
315 | +</p> | |
316 | +<pre> | |
317 | + s = "hello, world. how are you?!" | |
318 | + s.strip_punctuation # => "hello world how are you" | |
319 | +</pre> | |
320 | + | |
321 | + </div> | |
322 | + </div> | |
323 | + | |
324 | + | |
325 | + <div id="method-M000009" class="method-detail"> | |
326 | + <a name="M000009"></a> | |
327 | + | |
328 | + <div class="method-heading"> | |
329 | + | |
330 | + <a href="String.src/M000009.html" target="Code" class="method-signature" | |
331 | + onclick="popupCode('String.src/M000009.html');return false;"> | |
332 | + | |
333 | + <span class="method-name">strip_punctuation!</span><span class="method-args">()</span> | |
334 | + | |
335 | + </a> | |
336 | + | |
337 | + </div> | |
338 | + | |
339 | + <div class="method-description"> | |
340 | + | |
341 | + <p> | |
342 | +Removes punctuation from <tt>self</tt>. | |
343 | +</p> | |
344 | +<pre> | |
345 | + s = "hello, world. how are you?!" | |
346 | + s.strip_punctuation! | |
347 | + s # => "hello world how are you" | |
287 | 348 | </pre> |
288 | 349 | |
289 | 350 | </div> |
doc/classes/String.src/M000001.html
... | ... | @@ -7,7 +7,7 @@ |
7 | 7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> |
8 | 8 | </head> |
9 | 9 | <body class="standalone-code"> |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 77</span> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 76</span> | |
11 | 11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">is_stopword?</span> |
12 | 12 | <span class="ruby-constant">Stoplist</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">downcase</span>) |
13 | 13 | <span class="ruby-keyword kw">end</span></pre> |
doc/classes/String.src/M000002.html
... | ... | @@ -7,7 +7,7 @@ |
7 | 7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> |
8 | 8 | </head> |
9 | 9 | <body class="standalone-code"> |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 83</span> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 82</span> | |
11 | 11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">remove_special_characters</span> |
12 | 12 | <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/\W/</span>,<span class="ruby-value str">' '</span>).<span class="ruby-identifier">split</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/\W/</span>,<span class="ruby-value str">' '</span>).<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/\A.\z/</span>, <span class="ruby-value str">''</span>)}.<span class="ruby-identifier">join</span>(<span class="ruby-value str">' '</span>).<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/\A.\z/</span>, <span class="ruby-value str">''</span>)}.<span class="ruby-identifier">join</span>(<span class="ruby-value str">' '</span>) |
13 | 13 | <span class="ruby-keyword kw">end</span></pre> |
doc/classes/String.src/M000003.html
... | ... | @@ -7,7 +7,7 @@ |
7 | 7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> |
8 | 8 | </head> |
9 | 9 | <body class="standalone-code"> |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 92</span> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 91</span> | |
11 | 11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_xml_tags!</span> |
12 | 12 | <span class="ruby-identifier">replace</span> <span class="ruby-identifier">strip_with_pattern</span> <span class="ruby-operator">/</span><span class="ruby-operator"><</span>\<span class="ruby-regexp re">/?[^>]*>/</span> |
13 | 13 | <span class="ruby-keyword kw">end</span></pre> |
doc/classes/String.src/M000004.html
... | ... | @@ -7,7 +7,7 @@ |
7 | 7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> |
8 | 8 | </head> |
9 | 9 | <body class="standalone-code"> |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 101</span> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 100</span> | |
11 | 11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_xml_tags</span> |
12 | 12 | <span class="ruby-identifier">dup</span>.<span class="ruby-identifier">strip_xml_tags!</span> |
13 | 13 | <span class="ruby-keyword kw">end</span></pre> |
doc/classes/String.src/M000005.html
... | ... | @@ -7,7 +7,7 @@ |
7 | 7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> |
8 | 8 | </head> |
9 | 9 | <body class="standalone-code"> |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 115</span> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 114</span> | |
11 | 11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_javascripts!</span> |
12 | 12 | <span class="ruby-identifier">replace</span> <span class="ruby-identifier">strip_with_pattern</span> <span class="ruby-operator">/</span><span class="ruby-operator"><</span><span class="ruby-identifier">script</span> <span class="ruby-identifier">type</span>=<span class="ruby-value str">"text\/javascript"</span><span class="ruby-operator">></span>(.<span class="ruby-operator">+</span><span class="ruby-value">?)</span><span class="ruby-operator"><</span>\<span class="ruby-regexp re">/script>/</span><span class="ruby-identifier">m</span> |
13 | 13 | <span class="ruby-keyword kw">end</span></pre> |
doc/classes/String.src/M000006.html
... | ... | @@ -7,7 +7,7 @@ |
7 | 7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> |
8 | 8 | </head> |
9 | 9 | <body class="standalone-code"> |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 128</span> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 127</span> | |
11 | 11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_javascripts</span> |
12 | 12 | <span class="ruby-identifier">dup</span>.<span class="ruby-identifier">strip_javascripts!</span> |
13 | 13 | <span class="ruby-keyword kw">end</span></pre> |
doc/classes/String.src/M000007.html
... | ... | @@ -7,7 +7,7 @@ |
7 | 7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> |
8 | 8 | </head> |
9 | 9 | <body class="standalone-code"> |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 132</span> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 131</span> | |
11 | 11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_stylesheets!</span> |
12 | 12 | <span class="ruby-comment cmt"># TODO: rewamp. dunno what is it.</span> |
13 | 13 | <span class="ruby-identifier">replace</span> <span class="ruby-identifier">strip_with_pattern</span> <span class="ruby-operator">/</span><span class="ruby-operator"><</span><span class="ruby-identifier">style</span> <span class="ruby-identifier">type</span>=<span class="ruby-value str">"text\/css"</span><span class="ruby-operator">></span>(.<span class="ruby-operator">+</span><span class="ruby-value">?)</span><span class="ruby-operator"><</span>\<span class="ruby-regexp re">/style>/</span><span class="ruby-identifier">m</span> |
doc/classes/String.src/M000008.html
... | ... | @@ -7,7 +7,7 @@ |
7 | 7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> |
8 | 8 | </head> |
9 | 9 | <body class="standalone-code"> |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 137</span> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 136</span> | |
11 | 11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_stylesheets</span> |
12 | 12 | <span class="ruby-identifier">dup</span>.<span class="ruby-identifier">strip_stylesheets!</span> |
13 | 13 | <span class="ruby-keyword kw">end</span></pre> |
doc/classes/String.src/M000009.html
... | ... | @@ -2,14 +2,14 @@ |
2 | 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> |
3 | 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
4 | 4 | <head> |
5 | - <title>extract_xmltags_values (String)</title> | |
5 | + <title>strip_punctuation! (String)</title> | |
6 | 6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
7 | 7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> |
8 | 8 | </head> |
9 | 9 | <body class="standalone-code"> |
10 | 10 | <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 145</span> |
11 | - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">extract_xmltags_values</span>(<span class="ruby-identifier">tag_name</span>) | |
12 | - <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">scan</span>(<span class="ruby-node">/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/</span>).<span class="ruby-identifier">flatten</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_punctuation!</span> | |
12 | + <span class="ruby-identifier">replace</span> <span class="ruby-identifier">strip_with_pattern</span> <span class="ruby-operator">/</span>[<span class="ruby-operator">^</span><span class="ruby-identifier">a</span><span class="ruby-operator">-</span><span class="ruby-identifier">zA</span><span class="ruby-operator">-</span><span class="ruby-constant">Z0</span><span class="ruby-operator">-</span><span class="ruby-value">9</span>\<span class="ruby-operator">-</span>\<span class="ruby-identifier">s</span>]<span class="ruby-operator">/</span> | |
13 | 13 | <span class="ruby-keyword kw">end</span></pre> |
14 | 14 | </body> |
15 | 15 | </html> |
doc/created.rid
doc/files/lib/rir/document_rb.html
... | ... | @@ -53,7 +53,7 @@ |
53 | 53 | </tr> |
54 | 54 | <tr class="top-aligned-row"> |
55 | 55 | <td><strong>Last Update:</strong></td> |
56 | - <td>2010-11-05 15:06:24 +0100</td> | |
56 | + <td>2010-11-23 18:14:13 +0100</td> | |
57 | 57 | </tr> |
58 | 58 | </table> |
59 | 59 | </div> |
... | ... | @@ -96,6 +96,12 @@ |
96 | 96 | <div class="name-list"> |
97 | 97 | |
98 | 98 | net/http |
99 | + | |
100 | + rexml/document | |
101 | + | |
102 | + net/http | |
103 | + | |
104 | + kconv | |
99 | 105 | |
100 | 106 | </div> |
101 | 107 | </div> |
doc/files/lib/rir/string_rb.html
doc/files/lib/rir_rb.html
... | ... | @@ -53,7 +53,7 @@ |
53 | 53 | </tr> |
54 | 54 | <tr class="top-aligned-row"> |
55 | 55 | <td><strong>Last Update:</strong></td> |
56 | - <td>2010-11-05 14:39:35 +0100</td> | |
56 | + <td>2010-11-19 11:27:16 +0100</td> | |
57 | 57 | </tr> |
58 | 58 | </table> |
59 | 59 | </div> |
... | ... | @@ -71,6 +71,12 @@ |
71 | 71 | rir/document |
72 | 72 | |
73 | 73 | rir/string |
74 | + | |
75 | + rir/query | |
76 | + | |
77 | + rir/corpus | |
78 | + | |
79 | + rir/regexp | |
74 | 80 | |
75 | 81 | </div> |
76 | 82 | </div> |
doc/fr_class_index.html
... | ... | @@ -19,11 +19,15 @@ |
19 | 19 | |
20 | 20 | <a href="classes/RIR.html">RIR</a><br /> |
21 | 21 | |
22 | - <a href="classes/RIR/Document.html">RIR::Document</a><br /> | |
22 | + <a href="classes/RIR/Corpus.html">RIR::Corpus</a><br /> | |
23 | 23 | |
24 | - <a href="classes/RIR/WebDocument.html">RIR::WebDocument</a><br /> | |
24 | + <a href="classes/RIR/Indri.html">RIR::Indri</a><br /> | |
25 | 25 | |
26 | - <a href="classes/RIR/WikipediaPage.html">RIR::WikipediaPage</a><br /> | |
26 | + <a href="classes/RIR/Indri/IndriQuery.html">RIR::Indri::IndriQuery</a><br /> | |
27 | + | |
28 | + <a href="classes/RIR/Indri/Parameters.html">RIR::Indri::Parameters</a><br /> | |
29 | + | |
30 | + <a href="classes/RIR/Query.html">RIR::Query</a><br /> | |
27 | 31 | |
28 | 32 | <a href="classes/String.html">String</a><br /> |
29 | 33 |
doc/fr_file_index.html
... | ... | @@ -17,11 +17,11 @@ |
17 | 17 | <h1 class="section-bar">Files</h1> |
18 | 18 | <div id="index-entries"> |
19 | 19 | |
20 | - <a href="files/lib/rir/document_rb.html">lib/rir/document.rb</a><br /> | |
20 | + <a href="files/lib/rir/corpus_rb.html">lib/rir/corpus.rb</a><br /> | |
21 | 21 | |
22 | - <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br /> | |
22 | + <a href="files/lib/rir/query_rb.html">lib/rir/query.rb</a><br /> | |
23 | 23 | |
24 | - <a href="files/main_rb.html">main.rb</a><br /> | |
24 | + <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br /> | |
25 | 25 | |
26 | 26 | </div> |
27 | 27 | </div> |
doc/fr_method_index.html
... | ... | @@ -17,23 +17,17 @@ |
17 | 17 | <h1 class="section-bar">Methods</h1> |
18 | 18 | <div id="index-entries"> |
19 | 19 | |
20 | - <a href="classes/RIR/Document.html#M000012">count_words (RIR::Document)</a><br /> | |
20 | + <a href="classes/String.html#M000011">extract_xmltags_values (String)</a><br /> | |
21 | 21 | |
22 | - <a href="classes/RIR/Document.html#M000013">entropy (RIR::Document)</a><br /> | |
22 | + <a href="classes/RIR/Corpus.html#M000017">files (RIR::Corpus)</a><br /> | |
23 | 23 | |
24 | - <a href="classes/String.html#M000009">extract_xmltags_values (String)</a><br /> | |
25 | - | |
26 | - <a href="classes/RIR/Document.html#M000010">format_words (RIR::Document)</a><br /> | |
27 | - | |
28 | - <a href="classes/RIR/WebDocument.html#M000015">get_content (RIR::WebDocument)</a><br /> | |
29 | - | |
30 | 24 | <a href="classes/String.html#M000001">is_stopword? (String)</a><br /> |
31 | 25 | |
32 | - <a href="classes/RIR/WebDocument.html#M000016">new (RIR::WebDocument)</a><br /> | |
26 | + <a href="classes/RIR/Corpus.html#M000016">new (RIR::Corpus)</a><br /> | |
33 | 27 | |
34 | - <a href="classes/RIR/Document.html#M000014">new (RIR::Document)</a><br /> | |
28 | + <a href="classes/RIR/Indri/Parameters.html#M000012">new (RIR::Indri::Parameters)</a><br /> | |
35 | 29 | |
36 | - <a href="classes/RIR/Document.html#M000011">ngrams (RIR::Document)</a><br /> | |
30 | + <a href="classes/RIR/Indri/IndriQuery.html#M000014">new (RIR::Indri::IndriQuery)</a><br /> | |
37 | 31 | |
38 | 32 | <a href="classes/String.html#M000002">remove_special_characters (String)</a><br /> |
39 | 33 | |
... | ... | @@ -41,6 +35,10 @@ |
41 | 35 | |
42 | 36 | <a href="classes/String.html#M000005">strip_javascripts! (String)</a><br /> |
43 | 37 | |
38 | + <a href="classes/String.html#M000010">strip_punctuation (String)</a><br /> | |
39 | + | |
40 | + <a href="classes/String.html#M000009">strip_punctuation! (String)</a><br /> | |
41 | + | |
44 | 42 | <a href="classes/String.html#M000008">strip_stylesheets (String)</a><br /> |
45 | 43 | |
46 | 44 | <a href="classes/String.html#M000007">strip_stylesheets! (String)</a><br /> |
... | ... | @@ -48,6 +46,10 @@ |
48 | 46 | <a href="classes/String.html#M000004">strip_xml_tags (String)</a><br /> |
49 | 47 | |
50 | 48 | <a href="classes/String.html#M000003">strip_xml_tags! (String)</a><br /> |
49 | + | |
50 | + <a href="classes/RIR/Indri/Parameters.html#M000013">to_s (RIR::Indri::Parameters)</a><br /> | |
51 | + | |
52 | + <a href="classes/RIR/Indri/IndriQuery.html#M000015">to_s (RIR::Indri::IndriQuery)</a><br /> | |
51 | 53 | |
52 | 54 | </div> |
53 | 55 | </div> |
doc/index.html
... | ... | @@ -16,7 +16,7 @@ |
16 | 16 | <frame src="fr_class_index.html" name="Classes" /> |
17 | 17 | <frame src="fr_method_index.html" name="Methods" /> |
18 | 18 | </frameset> |
19 | - <frame src="files/lib/rir/string_rb.html" name="docwin" /> | |
19 | + <frame src="files/lib/rir/corpus_rb.html" name="docwin" /> | |
20 | 20 | </frameset> |
21 | 21 | </html> |
lib/rir/corpus.rb
lib/rir/query.rb
lib/rir/string.rb
... | ... | @@ -17,7 +17,6 @@ |
17 | 17 | # You should have received a copy of the GNU General Public License |
18 | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 | |
20 | -# General module for many purposes related to Information Retrieval. | |
21 | 20 | module RIR |
22 | 21 | |
23 | 22 | # These are the default stopwords provided by Lemur. |