Commit 7043da90bf781276184a770f306cfe7b59c17d5a
0 parents
Exists in
master
first commit
Showing 37 changed files with 2767 additions and 0 deletions Side-by-side Diff
- README.markdown
- doc/classes/Rir.html
- doc/classes/Rir/Document.html
- doc/classes/Rir/Document.src/M000010.html
- doc/classes/Rir/Document.src/M000011.html
- doc/classes/Rir/Document.src/M000012.html
- doc/classes/Rir/Document.src/M000013.html
- doc/classes/Rir/Document.src/M000014.html
- doc/classes/Rir/WebDocument.html
- doc/classes/Rir/WebDocument.src/M000015.html
- doc/classes/Rir/WebDocument.src/M000016.html
- doc/classes/Rir/WikipediaPage.html
- doc/classes/String.html
- doc/classes/String.src/M000001.html
- doc/classes/String.src/M000002.html
- doc/classes/String.src/M000003.html
- doc/classes/String.src/M000004.html
- doc/classes/String.src/M000005.html
- doc/classes/String.src/M000006.html
- doc/classes/String.src/M000007.html
- doc/classes/String.src/M000008.html
- doc/classes/String.src/M000009.html
- doc/created.rid
- doc/files/README_markdown.html
- doc/files/lib/rir/document_rb.html
- doc/files/lib/rir/string_rb.html
- doc/files/lib/rir_rb.html
- doc/files/main_rb.html
- doc/fr_class_index.html
- doc/fr_file_index.html
- doc/fr_method_index.html
- doc/index.html
- doc/rdoc-style.css
- lib/rir.rb
- lib/rir/document.rb
- lib/rir/string.rb
- main.rb
README.markdown
... | ... | @@ -0,0 +1,19 @@ |
1 | +# Ruby Information Retrieval (rIR) | |
2 | + | |
3 | +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | |
4 | + | |
5 | +License | |
6 | +======= | |
7 | + | |
8 | +This program is free software: you can redistribute it and/or modify | |
9 | +it under the terms of the GNU General Public License as published by | |
10 | +the Free Software Foundation, either version 3 of the License, or | |
11 | +(at your option) any later version. | |
12 | + | |
13 | +This program is distributed in the hope that it will be useful, | |
14 | +but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | +GNU General Public License for more details. | |
17 | + | |
18 | +You should have received a copy of the GNU General Public License | |
19 | +along with this program. If not, see <http://www.gnu.org/licenses/>. |
doc/classes/Rir.html
... | ... | @@ -0,0 +1,153 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>Module: Rir [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="classHeader"> | |
46 | + <table class="header-table"> | |
47 | + <tr class="top-aligned-row"> | |
48 | + <td><strong>Module</strong></td> | |
49 | + <td class="class-name-in-header">Rir</td> | |
50 | + </tr> | |
51 | + <tr class="top-aligned-row"> | |
52 | + <td><strong>In:</strong></td> | |
53 | + <td> | |
54 | + | |
55 | + | |
56 | + <a href="../files/lib/rir/string_rb.html"> | |
57 | + | |
58 | + lib/rir/string.rb | |
59 | + | |
60 | + </a> | |
61 | + | |
62 | + | |
63 | + <br /> | |
64 | + | |
65 | + | |
66 | + <a href="../files/lib/rir/document_rb.html"> | |
67 | + | |
68 | + lib/rir/document.rb | |
69 | + | |
70 | + </a> | |
71 | + | |
72 | + | |
73 | + <br /> | |
74 | + | |
75 | + </td> | |
76 | + </tr> | |
77 | + | |
78 | + | |
79 | + </table> | |
80 | + </div> | |
81 | + <!-- banner header --> | |
82 | + | |
83 | + <div id="bodyContent"> | |
84 | + | |
85 | + <div id="contextContent"> | |
86 | + | |
87 | + <div id="description"> | |
88 | + <p> | |
89 | +General module for many purposes related to Information Retrieval. | |
90 | +</p> | |
91 | +<hr size="1"></hr><p> | |
92 | +General module for many purposes related to Information Retrieval. | |
93 | +</p> | |
94 | + | |
95 | + </div> | |
96 | + | |
97 | + </div> | |
98 | + | |
99 | + | |
100 | + </div> | |
101 | + | |
102 | + <!-- if includes --> | |
103 | + | |
104 | + <div id="section"> | |
105 | + | |
106 | + <div id="class-list"> | |
107 | + <h3 class="section-bar">Classes and Modules</h3> | |
108 | + | |
109 | + Class <a href="Rir/Document.html" class="link">Rir::Document</a><br /> | |
110 | +Class <a href="Rir/WebDocument.html" class="link">Rir::WebDocument</a><br /> | |
111 | +Class <a href="Rir/WikipediaPage.html" class="link">Rir::WikipediaPage</a><br /> | |
112 | + | |
113 | + </div> | |
114 | + | |
115 | + <div id="constants-list"> | |
116 | + <h3 class="section-bar">Constants</h3> | |
117 | + | |
118 | + <div class="name-list"> | |
119 | + <table summary="Constants"> | |
120 | + | |
121 | + <tr class="top-aligned-row context-row"> | |
122 | + <td class="context-item-name">Stoplist</td> | |
123 | + <td>=</td> | |
124 | + <td class="context-item-value">[ "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", "yours", "yourself", "yourselves" ]</td> | |
125 | + | |
126 | + <td> </td> | |
127 | + <td class="context-item-desc"> | |
128 | +These are the default stopwords provided by Lemur. | |
129 | + | |
130 | +</td> | |
131 | + | |
132 | + </tr> | |
133 | + | |
134 | + </table> | |
135 | + </div> | |
136 | + </div> | |
137 | + | |
138 | + | |
139 | + | |
140 | + | |
141 | + <!-- if method_list --> | |
142 | + | |
143 | + | |
144 | + | |
145 | + | |
146 | + </div> | |
147 | + | |
148 | +<div id="validator-badges"> | |
149 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
150 | +</div> | |
151 | + | |
152 | +</body> | |
153 | +</html> |
doc/classes/Rir/Document.html
... | ... | @@ -0,0 +1,312 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>Class: Rir::Document [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="classHeader"> | |
46 | + <table class="header-table"> | |
47 | + <tr class="top-aligned-row"> | |
48 | + <td><strong>Class</strong></td> | |
49 | + <td class="class-name-in-header">Rir::Document</td> | |
50 | + </tr> | |
51 | + <tr class="top-aligned-row"> | |
52 | + <td><strong>In:</strong></td> | |
53 | + <td> | |
54 | + | |
55 | + | |
56 | + <a href="../../files/lib/rir/document_rb.html"> | |
57 | + | |
58 | + lib/rir/document.rb | |
59 | + | |
60 | + </a> | |
61 | + | |
62 | + | |
63 | + <br /> | |
64 | + | |
65 | + </td> | |
66 | + </tr> | |
67 | + | |
68 | + | |
69 | + <tr class="top-aligned-row"> | |
70 | + <td><strong>Parent:</strong></td> | |
71 | + <td> | |
72 | + | |
73 | + Object | |
74 | + | |
75 | + </td> | |
76 | + </tr> | |
77 | + | |
78 | + </table> | |
79 | + </div> | |
80 | + <!-- banner header --> | |
81 | + | |
82 | + <div id="bodyContent"> | |
83 | + | |
84 | + <div id="contextContent"> | |
85 | + | |
86 | + <div id="description"> | |
87 | + <p> | |
88 | +A <a href="Document.html">Document</a> is a bag of words and is constructed | |
89 | +from a string. | |
90 | +</p> | |
91 | + | |
92 | + </div> | |
93 | + | |
94 | + </div> | |
95 | + | |
96 | + | |
97 | + <div id="method-list"> | |
98 | + <h3 class="section-bar">Methods</h3> | |
99 | + | |
100 | + <div class="name-list"> | |
101 | + | |
102 | + <a href="#M000012">count_words</a> | |
103 | + | |
104 | + <a href="#M000013">entropy</a> | |
105 | + | |
106 | + <a href="#M000010">format_words</a> | |
107 | + | |
108 | + <a href="#M000014">new</a> | |
109 | + | |
110 | + <a href="#M000011">ngrams</a> | |
111 | + | |
112 | + </div> | |
113 | + </div> | |
114 | + | |
115 | + </div> | |
116 | + | |
117 | + <!-- if includes --> | |
118 | + | |
119 | + <div id="section"> | |
120 | + | |
121 | + | |
122 | + | |
123 | + <div id="attribute-list"> | |
124 | + <h3 class="section-bar">Attributes</h3> | |
125 | + | |
126 | + <div class="name-list"> | |
127 | + <table> | |
128 | + | |
129 | + <tr class="top-aligned-row context-row"> | |
130 | + <td class="context-item-name">doc_content</td> | |
131 | + | |
132 | + <td class="context-item-value"> [R] </td> | |
133 | + | |
134 | + <td class="context-item-desc"></td> | |
135 | + </tr> | |
136 | + | |
137 | + <tr class="top-aligned-row context-row"> | |
138 | + <td class="context-item-name">words</td> | |
139 | + | |
140 | + <td class="context-item-value"> [R] </td> | |
141 | + | |
142 | + <td class="context-item-desc"></td> | |
143 | + </tr> | |
144 | + | |
145 | + </table> | |
146 | + </div> | |
147 | + </div> | |
148 | + | |
149 | + | |
150 | + <!-- if method_list --> | |
151 | + | |
152 | + <div id="methods"> | |
153 | + | |
154 | + <h3 class="section-bar">Public Class methods</h3> | |
155 | + | |
156 | + | |
157 | + <div id="method-M000014" class="method-detail"> | |
158 | + <a name="M000014"></a> | |
159 | + | |
160 | + <div class="method-heading"> | |
161 | + | |
162 | + <a href="Document.src/M000014.html" target="Code" class="method-signature" | |
163 | + onclick="popupCode('Document.src/M000014.html');return false;"> | |
164 | + | |
165 | + <span class="method-name">new</span><span class="method-args">(content)</span> | |
166 | + | |
167 | + </a> | |
168 | + | |
169 | + </div> | |
170 | + | |
171 | + <div class="method-description"> | |
172 | + | |
173 | + </div> | |
174 | + </div> | |
175 | + | |
176 | + | |
177 | + <h3 class="section-bar">Public Instance methods</h3> | |
178 | + | |
179 | + | |
180 | + <div id="method-M000012" class="method-detail"> | |
181 | + <a name="M000012"></a> | |
182 | + | |
183 | + <div class="method-heading"> | |
184 | + | |
185 | + <a href="Document.src/M000012.html" target="Code" class="method-signature" | |
186 | + onclick="popupCode('Document.src/M000012.html');return false;"> | |
187 | + | |
188 | + <span class="method-name">count_words</span><span class="method-args">()</span> | |
189 | + | |
190 | + </a> | |
191 | + | |
192 | + </div> | |
193 | + | |
194 | + <div class="method-description"> | |
195 | + | |
196 | + <p> | |
197 | +Returns a Hash containing the words and their associated counts in the | |
198 | +current <a href="Document.html">Document</a>. | |
199 | +</p> | |
200 | +<pre> | |
201 | + count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } | |
202 | +</pre> | |
203 | + | |
204 | + </div> | |
205 | + </div> | |
206 | + | |
207 | + | |
208 | + <div id="method-M000013" class="method-detail"> | |
209 | + <a name="M000013"></a> | |
210 | + | |
211 | + <div class="method-heading"> | |
212 | + | |
213 | + <a href="Document.src/M000013.html" target="Code" class="method-signature" | |
214 | + onclick="popupCode('Document.src/M000013.html');return false;"> | |
215 | + | |
216 | + <span class="method-name">entropy</span><span class="method-args">(s)</span> | |
217 | + | |
218 | + </a> | |
219 | + | |
220 | + </div> | |
221 | + | |
222 | + <div class="method-description"> | |
223 | + | |
224 | + <p> | |
225 | +Computes the entropy of a given string <tt>s</tt> inside the document. | |
226 | +</p> | |
227 | +<p> | |
228 | +If the string parameter is composed of many words (i.e. tokens separated by | |
229 | +whitespace(s)), it is considered as an ngram. | |
230 | +</p> | |
231 | +<pre> | |
232 | + entropy("guitar") #=> 0.00389919463243839 | |
233 | +</pre> | |
234 | + | |
235 | + </div> | |
236 | + </div> | |
237 | + | |
238 | + | |
239 | + <div id="method-M000011" class="method-detail"> | |
240 | + <a name="M000011"></a> | |
241 | + | |
242 | + <div class="method-heading"> | |
243 | + | |
244 | + <a href="Document.src/M000011.html" target="Code" class="method-signature" | |
245 | + onclick="popupCode('Document.src/M000011.html');return false;"> | |
246 | + | |
247 | + <span class="method-name">ngrams</span><span class="method-args">(n)</span> | |
248 | + | |
249 | + </a> | |
250 | + | |
251 | + </div> | |
252 | + | |
253 | + <div class="method-description"> | |
254 | + | |
255 | + <p> | |
256 | +Returns an Array containing the <tt>n</tt>-grams (words) from the current | |
257 | +<a href="Document.html">Document</a>. | |
258 | +</p> | |
259 | +<pre> | |
260 | + ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...] | |
261 | +</pre> | |
262 | + | |
263 | + </div> | |
264 | + </div> | |
265 | + | |
266 | + | |
267 | + <h3 class="section-bar">Protected Instance methods</h3> | |
268 | + | |
269 | + | |
270 | + <div id="method-M000010" class="method-detail"> | |
271 | + <a name="M000010"></a> | |
272 | + | |
273 | + <div class="method-heading"> | |
274 | + | |
275 | + <a href="Document.src/M000010.html" target="Code" class="method-signature" | |
276 | + onclick="popupCode('Document.src/M000010.html');return false;"> | |
277 | + | |
278 | + <span class="method-name">format_words</span><span class="method-args">()</span> | |
279 | + | |
280 | + </a> | |
281 | + | |
282 | + </div> | |
283 | + | |
284 | + <div class="method-description"> | |
285 | + | |
286 | + <p> | |
287 | +Any non-word characters are removed from the words (see <a | |
288 | +href="http://perldoc.perl.org/perlre.html">perldoc.perl.org/perlre.html</a> | |
289 | +and the W special escape). | |
290 | +</p> | |
291 | +<p> | |
292 | +Protected function, only meant to by called at the initialization. | |
293 | +</p> | |
294 | + | |
295 | + </div> | |
296 | + </div> | |
297 | + | |
298 | + | |
299 | + | |
300 | + </div> | |
301 | + | |
302 | + | |
303 | + | |
304 | + | |
305 | + </div> | |
306 | + | |
307 | +<div id="validator-badges"> | |
308 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
309 | +</div> | |
310 | + | |
311 | +</body> | |
312 | +</html> |
doc/classes/Rir/Document.src/M000010.html
... | ... | @@ -0,0 +1,23 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>format_words (Rir::Document)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 31</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">format_words</span> | |
12 | + <span class="ruby-identifier">wo</span> = [] | |
13 | + | |
14 | + <span class="ruby-ivar">@doc_content</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | |
15 | + <span class="ruby-identifier">w</span>.<span class="ruby-identifier">split</span>(<span class="ruby-regexp re">/\W/</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sw</span><span class="ruby-operator">|</span> | |
16 | + <span class="ruby-identifier">wo</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">sw</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">sw</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[a-zA-Z]/</span> | |
17 | + <span class="ruby-keyword kw">end</span> | |
18 | + <span class="ruby-keyword kw">end</span> | |
19 | + | |
20 | + <span class="ruby-identifier">wo</span> | |
21 | + <span class="ruby-keyword kw">end</span></pre> | |
22 | +</body> | |
23 | +</html> |
doc/classes/Rir/Document.src/M000011.html
... | ... | @@ -0,0 +1,26 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>ngrams (Rir::Document)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 46</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">ngrams</span>(<span class="ruby-identifier">n</span>) | |
12 | + <span class="ruby-identifier">window</span> = [] | |
13 | + <span class="ruby-identifier">ngrams_array</span> = [] | |
14 | + | |
15 | + <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | |
16 | + <span class="ruby-identifier">window</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">w</span>) | |
17 | + <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">n</span> | |
18 | + <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">" "</span>) | |
19 | + <span class="ruby-identifier">window</span>.<span class="ruby-identifier">delete_at</span>(<span class="ruby-value">0</span>) | |
20 | + <span class="ruby-keyword kw">end</span> | |
21 | + <span class="ruby-keyword kw">end</span> | |
22 | + | |
23 | + <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">uniq</span> | |
24 | + <span class="ruby-keyword kw">end</span></pre> | |
25 | +</body> | |
26 | +</html> |
doc/classes/Rir/Document.src/M000012.html
... | ... | @@ -0,0 +1,18 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>count_words (Rir::Document)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 64</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">count_words</span> | |
12 | + <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">h</span>,<span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">h</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-value">0</span> } | |
13 | + <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>.<span class="ruby-identifier">downcase</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> } | |
14 | + | |
15 | + <span class="ruby-identifier">counts</span> | |
16 | + <span class="ruby-keyword kw">end</span></pre> | |
17 | +</body> | |
18 | +</html> |
doc/classes/Rir/Document.src/M000013.html
... | ... | @@ -0,0 +1,24 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>entropy (Rir::Document)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 77</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">entropy</span>(<span class="ruby-identifier">s</span>) | |
12 | + <span class="ruby-identifier">en</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span> | |
13 | + <span class="ruby-identifier">counts</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span> | |
14 | + | |
15 | + <span class="ruby-identifier">s</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | |
16 | + <span class="ruby-identifier">p_wi</span> = <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span> | |
17 | + <span class="ruby-identifier">en</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">p_wi</span><span class="ruby-operator">*</span><span class="ruby-constant">Math</span>.<span class="ruby-identifier">log2</span>(<span class="ruby-identifier">p_wi</span>) | |
18 | + <span class="ruby-keyword kw">end</span> | |
19 | + | |
20 | + <span class="ruby-identifier">en</span> <span class="ruby-operator">*=</span> <span class="ruby-value">-1</span> | |
21 | + <span class="ruby-identifier">en</span> | |
22 | + <span class="ruby-keyword kw">end</span></pre> | |
23 | +</body> | |
24 | +</html> |
doc/classes/Rir/Document.src/M000014.html
... | ... | @@ -0,0 +1,16 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>new (Rir::Document)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 92</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">content</span>) | |
12 | + <span class="ruby-ivar">@doc_content</span> = <span class="ruby-identifier">content</span> | |
13 | + <span class="ruby-ivar">@words</span> = <span class="ruby-identifier">format_words</span> | |
14 | + <span class="ruby-keyword kw">end</span></pre> | |
15 | +</body> | |
16 | +</html> |
doc/classes/Rir/WebDocument.html
... | ... | @@ -0,0 +1,209 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>Class: Rir::WebDocument [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="classHeader"> | |
46 | + <table class="header-table"> | |
47 | + <tr class="top-aligned-row"> | |
48 | + <td><strong>Class</strong></td> | |
49 | + <td class="class-name-in-header">Rir::WebDocument</td> | |
50 | + </tr> | |
51 | + <tr class="top-aligned-row"> | |
52 | + <td><strong>In:</strong></td> | |
53 | + <td> | |
54 | + | |
55 | + | |
56 | + <a href="../../files/lib/rir/document_rb.html"> | |
57 | + | |
58 | + lib/rir/document.rb | |
59 | + | |
60 | + </a> | |
61 | + | |
62 | + | |
63 | + <br /> | |
64 | + | |
65 | + </td> | |
66 | + </tr> | |
67 | + | |
68 | + | |
69 | + <tr class="top-aligned-row"> | |
70 | + <td><strong>Parent:</strong></td> | |
71 | + <td> | |
72 | + | |
73 | + <a href="Document.html"> | |
74 | + | |
75 | + Rir::Document | |
76 | + | |
77 | + </a> | |
78 | + | |
79 | + </td> | |
80 | + </tr> | |
81 | + | |
82 | + </table> | |
83 | + </div> | |
84 | + <!-- banner header --> | |
85 | + | |
86 | + <div id="bodyContent"> | |
87 | + | |
88 | + <div id="contextContent"> | |
89 | + | |
90 | + <div id="description"> | |
91 | + <p> | |
92 | +A <a href="WebDocument.html">WebDocument</a> is a <a | |
93 | +href="Document.html">Document</a> with a <tt>url</tt>. | |
94 | +</p> | |
95 | + | |
96 | + </div> | |
97 | + | |
98 | + </div> | |
99 | + | |
100 | + | |
101 | + <div id="method-list"> | |
102 | + <h3 class="section-bar">Methods</h3> | |
103 | + | |
104 | + <div class="name-list"> | |
105 | + | |
106 | + <a href="#M000015">get_content</a> | |
107 | + | |
108 | + <a href="#M000016">new</a> | |
109 | + | |
110 | + </div> | |
111 | + </div> | |
112 | + | |
113 | + </div> | |
114 | + | |
115 | + <!-- if includes --> | |
116 | + | |
117 | + <div id="section"> | |
118 | + | |
119 | + | |
120 | + | |
121 | + <div id="attribute-list"> | |
122 | + <h3 class="section-bar">Attributes</h3> | |
123 | + | |
124 | + <div class="name-list"> | |
125 | + <table> | |
126 | + | |
127 | + <tr class="top-aligned-row context-row"> | |
128 | + <td class="context-item-name">url</td> | |
129 | + | |
130 | + <td class="context-item-value"> [R] </td> | |
131 | + | |
132 | + <td class="context-item-desc"></td> | |
133 | + </tr> | |
134 | + | |
135 | + </table> | |
136 | + </div> | |
137 | + </div> | |
138 | + | |
139 | + | |
140 | + <!-- if method_list --> | |
141 | + | |
142 | + <div id="methods"> | |
143 | + | |
144 | + <h3 class="section-bar">Public Class methods</h3> | |
145 | + | |
146 | + | |
147 | + <div id="method-M000015" class="method-detail"> | |
148 | + <a name="M000015"></a> | |
149 | + | |
150 | + <div class="method-heading"> | |
151 | + | |
152 | + <a href="WebDocument.src/M000015.html" target="Code" class="method-signature" | |
153 | + onclick="popupCode('WebDocument.src/M000015.html');return false;"> | |
154 | + | |
155 | + <span class="method-name">get_content</span><span class="method-args">(url)</span> | |
156 | + | |
157 | + </a> | |
158 | + | |
159 | + </div> | |
160 | + | |
161 | + <div class="method-description"> | |
162 | + | |
163 | + <p> | |
164 | +Returns the HTML text from the page of a given <tt>url</tt>. | |
165 | +</p> | |
166 | + | |
167 | + </div> | |
168 | + </div> | |
169 | + | |
170 | + | |
171 | + <div id="method-M000016" class="method-detail"> | |
172 | + <a name="M000016"></a> | |
173 | + | |
174 | + <div class="method-heading"> | |
175 | + | |
176 | + <a href="WebDocument.src/M000016.html" target="Code" class="method-signature" | |
177 | + onclick="popupCode('WebDocument.src/M000016.html');return false;"> | |
178 | + | |
179 | + <span class="method-name">new</span><span class="method-args">(url)</span> | |
180 | + | |
181 | + </a> | |
182 | + | |
183 | + </div> | |
184 | + | |
185 | + <div class="method-description"> | |
186 | + | |
187 | + <p> | |
188 | +<a href="WebDocument.html">WebDocument</a> constructor, the content of the | |
189 | +<a href="Document.html">Document</a> is the HTML page without the tags. | |
190 | +</p> | |
191 | + | |
192 | + </div> | |
193 | + </div> | |
194 | + | |
195 | + | |
196 | + | |
197 | + </div> | |
198 | + | |
199 | + | |
200 | + | |
201 | + | |
202 | + </div> | |
203 | + | |
204 | +<div id="validator-badges"> | |
205 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
206 | +</div> | |
207 | + | |
208 | +</body> | |
209 | +</html> |
doc/classes/Rir/WebDocument.src/M000015.html
... | ... | @@ -0,0 +1,16 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>get_content (Rir::WebDocument)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 105</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>) | |
12 | + <span class="ruby-identifier">require</span> <span class="ruby-value str">'net/http'</span> | |
13 | + <span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>(<span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span>(<span class="ruby-identifier">url</span>)) | |
14 | + <span class="ruby-keyword kw">end</span></pre> | |
15 | +</body> | |
16 | +</html> |
doc/classes/Rir/WebDocument.src/M000016.html
... | ... | @@ -0,0 +1,16 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>new (Rir::WebDocument)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 112</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">url</span>) | |
12 | + <span class="ruby-ivar">@url</span> = <span class="ruby-identifier">url</span> | |
13 | + <span class="ruby-keyword kw">super</span> <span class="ruby-constant">WebDocument</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>).<span class="ruby-identifier">strip_javascripts</span>.<span class="ruby-identifier">strip_stylesheets</span>.<span class="ruby-identifier">strip_xml_tags</span> | |
14 | + <span class="ruby-keyword kw">end</span></pre> | |
15 | +</body> | |
16 | +</html> |
doc/classes/Rir/WikipediaPage.html
... | ... | @@ -0,0 +1,122 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>Class: Rir::WikipediaPage [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="classHeader"> | |
46 | + <table class="header-table"> | |
47 | + <tr class="top-aligned-row"> | |
48 | + <td><strong>Class</strong></td> | |
49 | + <td class="class-name-in-header">Rir::WikipediaPage</td> | |
50 | + </tr> | |
51 | + <tr class="top-aligned-row"> | |
52 | + <td><strong>In:</strong></td> | |
53 | + <td> | |
54 | + | |
55 | + | |
56 | + <a href="../../files/lib/rir/document_rb.html"> | |
57 | + | |
58 | + lib/rir/document.rb | |
59 | + | |
60 | + </a> | |
61 | + | |
62 | + | |
63 | + <br /> | |
64 | + | |
65 | + </td> | |
66 | + </tr> | |
67 | + | |
68 | + | |
69 | + <tr class="top-aligned-row"> | |
70 | + <td><strong>Parent:</strong></td> | |
71 | + <td> | |
72 | + | |
73 | + <a href="WebDocument.html"> | |
74 | + | |
75 | + Rir::WebDocument | |
76 | + | |
77 | + </a> | |
78 | + | |
79 | + </td> | |
80 | + </tr> | |
81 | + | |
82 | + </table> | |
83 | + </div> | |
84 | + <!-- banner header --> | |
85 | + | |
86 | + <div id="bodyContent"> | |
87 | + | |
88 | + <div id="contextContent"> | |
89 | + | |
90 | + <div id="description"> | |
91 | + <p> | |
92 | +A <a href="WikipediaPage.html">WikipediaPage</a> is a <a | |
93 | +href="WebDocument.html">WebDocument</a>. | |
94 | +</p> | |
95 | + | |
96 | + </div> | |
97 | + | |
98 | + </div> | |
99 | + | |
100 | + | |
101 | + </div> | |
102 | + | |
103 | + <!-- if includes --> | |
104 | + | |
105 | + <div id="section"> | |
106 | + | |
107 | + | |
108 | + | |
109 | + | |
110 | + <!-- if method_list --> | |
111 | + | |
112 | + | |
113 | + | |
114 | + | |
115 | + </div> | |
116 | + | |
117 | +<div id="validator-badges"> | |
118 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
119 | +</div> | |
120 | + | |
121 | +</body> | |
122 | +</html> |
doc/classes/String.html
... | ... | @@ -0,0 +1,404 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>Class: String [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="classHeader"> | |
46 | + <table class="header-table"> | |
47 | + <tr class="top-aligned-row"> | |
48 | + <td><strong>Class</strong></td> | |
49 | + <td class="class-name-in-header">String</td> | |
50 | + </tr> | |
51 | + <tr class="top-aligned-row"> | |
52 | + <td><strong>In:</strong></td> | |
53 | + <td> | |
54 | + | |
55 | + | |
56 | + <a href="../files/lib/rir/string_rb.html"> | |
57 | + | |
58 | + lib/rir/string.rb | |
59 | + | |
60 | + </a> | |
61 | + | |
62 | + | |
63 | + <br /> | |
64 | + | |
65 | + </td> | |
66 | + </tr> | |
67 | + | |
68 | + | |
69 | + <tr class="top-aligned-row"> | |
70 | + <td><strong>Parent:</strong></td> | |
71 | + <td> | |
72 | + | |
73 | + Object | |
74 | + | |
75 | + </td> | |
76 | + </tr> | |
77 | + | |
78 | + </table> | |
79 | + </div> | |
80 | + <!-- banner header --> | |
81 | + | |
82 | + <div id="bodyContent"> | |
83 | + | |
84 | + <div id="contextContent"> | |
85 | + | |
86 | + <div id="description"> | |
87 | + <p> | |
88 | +Extention of the standard class <a href="String.html">String</a> with | |
89 | +useful function. | |
90 | +</p> | |
91 | + | |
92 | + </div> | |
93 | + | |
94 | + </div> | |
95 | + | |
96 | + | |
97 | + <div id="method-list"> | |
98 | + <h3 class="section-bar">Methods</h3> | |
99 | + | |
100 | + <div class="name-list"> | |
101 | + | |
102 | + <a href="#M000009">extract_xmltags_values</a> | |
103 | + | |
104 | + <a href="#M000001">is_stopword?</a> | |
105 | + | |
106 | + <a href="#M000002">remove_special_characters</a> | |
107 | + | |
108 | + <a href="#M000006">strip_javascripts</a> | |
109 | + | |
110 | + <a href="#M000005">strip_javascripts!</a> | |
111 | + | |
112 | + <a href="#M000008">strip_stylesheets</a> | |
113 | + | |
114 | + <a href="#M000007">strip_stylesheets!</a> | |
115 | + | |
116 | + <a href="#M000004">strip_xml_tags</a> | |
117 | + | |
118 | + <a href="#M000003">strip_xml_tags!</a> | |
119 | + | |
120 | + </div> | |
121 | + </div> | |
122 | + | |
123 | + </div> | |
124 | + | |
125 | + <!-- if includes --> | |
126 | + | |
127 | + <div id="includes"> | |
128 | + <h3 class="section-bar">Included Modules</h3> | |
129 | + | |
130 | + <div id="includes-list"> | |
131 | + | |
132 | + <span class="include-name"><a href="Rir.html">Rir</a></span> | |
133 | + | |
134 | + </div> | |
135 | + </div> | |
136 | + | |
137 | + <div id="section"> | |
138 | + | |
139 | + | |
140 | + | |
141 | + | |
142 | + <!-- if method_list --> | |
143 | + | |
144 | + <div id="methods"> | |
145 | + | |
146 | + <h3 class="section-bar">Public Instance methods</h3> | |
147 | + | |
148 | + | |
149 | + <div id="method-M000009" class="method-detail"> | |
150 | + <a name="M000009"></a> | |
151 | + | |
152 | + <div class="method-heading"> | |
153 | + | |
154 | + <a href="String.src/M000009.html" target="Code" class="method-signature" | |
155 | + onclick="popupCode('String.src/M000009.html');return false;"> | |
156 | + | |
157 | + <span class="method-name">extract_xmltags_values</span><span class="method-args">(tag_name)</span> | |
158 | + | |
159 | + </a> | |
160 | + | |
161 | + </div> | |
162 | + | |
163 | + <div class="method-description"> | |
164 | + | |
165 | + <p> | |
166 | +Returns the text values inside all occurences of a XML tag in <tt>self</tt> | |
167 | +</p> | |
168 | +<pre> | |
169 | + s = "four-piece in <a href='#'>Indianapolis</a>, <a href='#'>Indiana</a> at the Murat Theatre" | |
170 | + s.extract_xmltags_values 'a' #=> ["Indianapolis", "Indiana"] | |
171 | +</pre> | |
172 | + | |
173 | + </div> | |
174 | + </div> | |
175 | + | |
176 | + | |
177 | + <div id="method-M000001" class="method-detail"> | |
178 | + <a name="M000001"></a> | |
179 | + | |
180 | + <div class="method-heading"> | |
181 | + | |
182 | + <a href="String.src/M000001.html" target="Code" class="method-signature" | |
183 | + onclick="popupCode('String.src/M000001.html');return false;"> | |
184 | + | |
185 | + <span class="method-name">is_stopword?</span><span class="method-args">()</span> | |
186 | + | |
187 | + </a> | |
188 | + | |
189 | + </div> | |
190 | + | |
191 | + <div class="method-description"> | |
192 | + | |
193 | + <p> | |
194 | +Returns <tt>true</tt> if <tt>self</tt> belongs to Rir::Stoplist, | |
195 | +<tt>false</tt> otherwise. | |
196 | +</p> | |
197 | + | |
198 | + </div> | |
199 | + </div> | |
200 | + | |
201 | + | |
202 | + <div id="method-M000002" class="method-detail"> | |
203 | + <a name="M000002"></a> | |
204 | + | |
205 | + <div class="method-heading"> | |
206 | + | |
207 | + <a href="String.src/M000002.html" target="Code" class="method-signature" | |
208 | + onclick="popupCode('String.src/M000002.html');return false;"> | |
209 | + | |
210 | + <span class="method-name">remove_special_characters</span><span class="method-args">()</span> | |
211 | + | |
212 | + </a> | |
213 | + | |
214 | + </div> | |
215 | + | |
216 | + <div class="method-description"> | |
217 | + | |
218 | + <p> | |
219 | +Do not use. TODO: rewamp. find why this function is here. | |
220 | +</p> | |
221 | + | |
222 | + </div> | |
223 | + </div> | |
224 | + | |
225 | + | |
226 | + <div id="method-M000006" class="method-detail"> | |
227 | + <a name="M000006"></a> | |
228 | + | |
229 | + <div class="method-heading"> | |
230 | + | |
231 | + <a href="String.src/M000006.html" target="Code" class="method-signature" | |
232 | + onclick="popupCode('String.src/M000006.html');return false;"> | |
233 | + | |
234 | + <span class="method-name">strip_javascripts</span><span class="method-args">()</span> | |
235 | + | |
236 | + </a> | |
237 | + | |
238 | + </div> | |
239 | + | |
240 | + <div class="method-description"> | |
241 | + | |
242 | + <p> | |
243 | +Removes all Javascript sources from <tt>self</tt>. | |
244 | +</p> | |
245 | +<pre> | |
246 | + s = "<script type='text/javascript'> | |
247 | + var skin='vector', | |
248 | + stylepath='http://bits.wikimedia.org/skins-1.5' | |
249 | + </script> | |
250 | + | |
251 | + test" | |
252 | + s.strip_javascripts #=> "test" | |
253 | +</pre> | |
254 | + | |
255 | + </div> | |
256 | + </div> | |
257 | + | |
258 | + | |
259 | + <div id="method-M000005" class="method-detail"> | |
260 | + <a name="M000005"></a> | |
261 | + | |
262 | + <div class="method-heading"> | |
263 | + | |
264 | + <a href="String.src/M000005.html" target="Code" class="method-signature" | |
265 | + onclick="popupCode('String.src/M000005.html');return false;"> | |
266 | + | |
267 | + <span class="method-name">strip_javascripts!</span><span class="method-args">()</span> | |
268 | + | |
269 | + </a> | |
270 | + | |
271 | + </div> | |
272 | + | |
273 | + <div class="method-description"> | |
274 | + | |
275 | + <p> | |
276 | +Removes all Javascript sources from <tt>self</tt>. | |
277 | +</p> | |
278 | +<pre> | |
279 | + s = "<script type='text/javascript'> | |
280 | + var skin='vector', | |
281 | + stylepath='http://bits.wikimedia.org/skins-1.5' | |
282 | + </script> | |
283 | + | |
284 | + test" | |
285 | + s.strip_javascripts! | |
286 | + s #=> "test" | |
287 | +</pre> | |
288 | + | |
289 | + </div> | |
290 | + </div> | |
291 | + | |
292 | + | |
293 | + <div id="method-M000008" class="method-detail"> | |
294 | + <a name="M000008"></a> | |
295 | + | |
296 | + <div class="method-heading"> | |
297 | + | |
298 | + <a href="String.src/M000008.html" target="Code" class="method-signature" | |
299 | + onclick="popupCode('String.src/M000008.html');return false;"> | |
300 | + | |
301 | + <span class="method-name">strip_stylesheets</span><span class="method-args">()</span> | |
302 | + | |
303 | + </a> | |
304 | + | |
305 | + </div> | |
306 | + | |
307 | + <div class="method-description"> | |
308 | + | |
309 | + </div> | |
310 | + </div> | |
311 | + | |
312 | + | |
313 | + <div id="method-M000007" class="method-detail"> | |
314 | + <a name="M000007"></a> | |
315 | + | |
316 | + <div class="method-heading"> | |
317 | + | |
318 | + <a href="String.src/M000007.html" target="Code" class="method-signature" | |
319 | + onclick="popupCode('String.src/M000007.html');return false;"> | |
320 | + | |
321 | + <span class="method-name">strip_stylesheets!</span><span class="method-args">()</span> | |
322 | + | |
323 | + </a> | |
324 | + | |
325 | + </div> | |
326 | + | |
327 | + <div class="method-description"> | |
328 | + | |
329 | + </div> | |
330 | + </div> | |
331 | + | |
332 | + | |
333 | + <div id="method-M000004" class="method-detail"> | |
334 | + <a name="M000004"></a> | |
335 | + | |
336 | + <div class="method-heading"> | |
337 | + | |
338 | + <a href="String.src/M000004.html" target="Code" class="method-signature" | |
339 | + onclick="popupCode('String.src/M000004.html');return false;"> | |
340 | + | |
341 | + <span class="method-name">strip_xml_tags</span><span class="method-args">()</span> | |
342 | + | |
343 | + </a> | |
344 | + | |
345 | + </div> | |
346 | + | |
347 | + <div class="method-description"> | |
348 | + | |
349 | + <p> | |
350 | +Removes all XML-like tags from <tt>self</tt>. | |
351 | +</p> | |
352 | +<pre> | |
353 | + s = "<html><body>test</body></html>" | |
354 | + s.strip_xml_tags #=> "test" | |
355 | + s #=> "<html><body>test</body></html>" | |
356 | +</pre> | |
357 | + | |
358 | + </div> | |
359 | + </div> | |
360 | + | |
361 | + | |
362 | + <div id="method-M000003" class="method-detail"> | |
363 | + <a name="M000003"></a> | |
364 | + | |
365 | + <div class="method-heading"> | |
366 | + | |
367 | + <a href="String.src/M000003.html" target="Code" class="method-signature" | |
368 | + onclick="popupCode('String.src/M000003.html');return false;"> | |
369 | + | |
370 | + <span class="method-name">strip_xml_tags!</span><span class="method-args">()</span> | |
371 | + | |
372 | + </a> | |
373 | + | |
374 | + </div> | |
375 | + | |
376 | + <div class="method-description"> | |
377 | + | |
378 | + <p> | |
379 | +Removes all XML-like tags from <tt>self</tt>. | |
380 | +</p> | |
381 | +<pre> | |
382 | + s = "<html><body>test</body></html>" | |
383 | + s.strip_xml_tags! | |
384 | + s #=> "test" | |
385 | +</pre> | |
386 | + | |
387 | + </div> | |
388 | + </div> | |
389 | + | |
390 | + | |
391 | + | |
392 | + </div> | |
393 | + | |
394 | + | |
395 | + | |
396 | + | |
397 | + </div> | |
398 | + | |
399 | +<div id="validator-badges"> | |
400 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
401 | +</div> | |
402 | + | |
403 | +</body> | |
404 | +</html> |
doc/classes/String.src/M000001.html
... | ... | @@ -0,0 +1,15 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>is_stopword? (String)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 77</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">is_stopword?</span> | |
12 | + <span class="ruby-constant">Stoplist</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">downcase</span>) | |
13 | + <span class="ruby-keyword kw">end</span></pre> | |
14 | +</body> | |
15 | +</html> |
doc/classes/String.src/M000002.html
... | ... | @@ -0,0 +1,15 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>remove_special_characters (String)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 83</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">remove_special_characters</span> | |
12 | + <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/\W/</span>,<span class="ruby-value str">' '</span>).<span class="ruby-identifier">split</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/\W/</span>,<span class="ruby-value str">' '</span>).<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/\A.\z/</span>, <span class="ruby-value str">''</span>)}.<span class="ruby-identifier">join</span>(<span class="ruby-value str">' '</span>).<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/\A.\z/</span>, <span class="ruby-value str">''</span>)}.<span class="ruby-identifier">join</span>(<span class="ruby-value str">' '</span>) | |
13 | + <span class="ruby-keyword kw">end</span></pre> | |
14 | +</body> | |
15 | +</html> |
doc/classes/String.src/M000003.html
... | ... | @@ -0,0 +1,15 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>strip_xml_tags! (String)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 92</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_xml_tags!</span> | |
12 | + <span class="ruby-identifier">replace</span> <span class="ruby-identifier">strip_with_pattern</span> <span class="ruby-operator">/</span><span class="ruby-operator"><</span>\<span class="ruby-regexp re">/?[^>]*>/</span> | |
13 | + <span class="ruby-keyword kw">end</span></pre> | |
14 | +</body> | |
15 | +</html> |
doc/classes/String.src/M000004.html
... | ... | @@ -0,0 +1,15 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>strip_xml_tags (String)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 101</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_xml_tags</span> | |
12 | + <span class="ruby-identifier">dup</span>.<span class="ruby-identifier">strip_xml_tags!</span> | |
13 | + <span class="ruby-keyword kw">end</span></pre> | |
14 | +</body> | |
15 | +</html> |
doc/classes/String.src/M000005.html
... | ... | @@ -0,0 +1,15 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>strip_javascripts! (String)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 115</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_javascripts!</span> | |
12 | + <span class="ruby-identifier">replace</span> <span class="ruby-identifier">strip_with_pattern</span> <span class="ruby-operator">/</span><span class="ruby-operator"><</span><span class="ruby-identifier">script</span> <span class="ruby-identifier">type</span>=<span class="ruby-value str">"text\/javascript"</span><span class="ruby-operator">></span>(.<span class="ruby-operator">+</span><span class="ruby-value">?)</span><span class="ruby-operator"><</span>\<span class="ruby-regexp re">/script>/</span><span class="ruby-identifier">m</span> | |
13 | + <span class="ruby-keyword kw">end</span></pre> | |
14 | +</body> | |
15 | +</html> |
doc/classes/String.src/M000006.html
... | ... | @@ -0,0 +1,15 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>strip_javascripts (String)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 128</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_javascripts</span> | |
12 | + <span class="ruby-identifier">dup</span>.<span class="ruby-identifier">strip_javascripts!</span> | |
13 | + <span class="ruby-keyword kw">end</span></pre> | |
14 | +</body> | |
15 | +</html> |
doc/classes/String.src/M000007.html
... | ... | @@ -0,0 +1,16 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>strip_stylesheets! (String)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 132</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_stylesheets!</span> | |
12 | + <span class="ruby-comment cmt"># TODO: rewamp. dunno what is it.</span> | |
13 | + <span class="ruby-identifier">replace</span> <span class="ruby-identifier">strip_with_pattern</span> <span class="ruby-operator">/</span><span class="ruby-operator"><</span><span class="ruby-identifier">style</span> <span class="ruby-identifier">type</span>=<span class="ruby-value str">"text\/css"</span><span class="ruby-operator">></span>(.<span class="ruby-operator">+</span><span class="ruby-value">?)</span><span class="ruby-operator"><</span>\<span class="ruby-regexp re">/style>/</span><span class="ruby-identifier">m</span> | |
14 | + <span class="ruby-keyword kw">end</span></pre> | |
15 | +</body> | |
16 | +</html> |
doc/classes/String.src/M000008.html
... | ... | @@ -0,0 +1,15 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>strip_stylesheets (String)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 137</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_stylesheets</span> | |
12 | + <span class="ruby-identifier">dup</span>.<span class="ruby-identifier">strip_stylesheets!</span> | |
13 | + <span class="ruby-keyword kw">end</span></pre> | |
14 | +</body> | |
15 | +</html> |
doc/classes/String.src/M000009.html
... | ... | @@ -0,0 +1,15 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>extract_xmltags_values (String)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 145</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">extract_xmltags_values</span>(<span class="ruby-identifier">tag_name</span>) | |
12 | + <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">scan</span>(<span class="ruby-node">/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/</span>).<span class="ruby-identifier">flatten</span> | |
13 | + <span class="ruby-keyword kw">end</span></pre> | |
14 | +</body> | |
15 | +</html> |
doc/created.rid
... | ... | @@ -0,0 +1 @@ |
1 | +Fri, 05 Nov 2010 14:41:10 +0100 |
doc/files/README_markdown.html
... | ... | @@ -0,0 +1,90 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>File: README.markdown [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="fileHeader"> | |
46 | + <h1>README.markdown</h1> | |
47 | + <table class="header-table"> | |
48 | + <tr class="top-aligned-row"> | |
49 | + <td><strong>Path:</strong></td> | |
50 | + <td>README.markdown | |
51 | + | |
52 | + </td> | |
53 | + </tr> | |
54 | + <tr class="top-aligned-row"> | |
55 | + <td><strong>Last Update:</strong></td> | |
56 | + <td>2010-11-05 14:40:41 +0100</td> | |
57 | + </tr> | |
58 | + </table> | |
59 | + </div> | |
60 | + <!-- banner header --> | |
61 | + | |
62 | + <div id="bodyContent"> | |
63 | + | |
64 | + <div id="contextContent"> | |
65 | + | |
66 | + </div> | |
67 | + | |
68 | + | |
69 | + </div> | |
70 | + | |
71 | + <!-- if includes --> | |
72 | + | |
73 | + <div id="section"> | |
74 | + | |
75 | + | |
76 | + | |
77 | + | |
78 | + <!-- if method_list --> | |
79 | + | |
80 | + | |
81 | + | |
82 | + | |
83 | + </div> | |
84 | + | |
85 | +<div id="validator-badges"> | |
86 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
87 | +</div> | |
88 | + | |
89 | +</body> | |
90 | +</html> |
doc/files/lib/rir/document_rb.html
... | ... | @@ -0,0 +1,127 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>File: document.rb [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="fileHeader"> | |
46 | + <h1>document.rb</h1> | |
47 | + <table class="header-table"> | |
48 | + <tr class="top-aligned-row"> | |
49 | + <td><strong>Path:</strong></td> | |
50 | + <td>lib/rir/document.rb | |
51 | + | |
52 | + </td> | |
53 | + </tr> | |
54 | + <tr class="top-aligned-row"> | |
55 | + <td><strong>Last Update:</strong></td> | |
56 | + <td>2010-11-05 14:39:35 +0100</td> | |
57 | + </tr> | |
58 | + </table> | |
59 | + </div> | |
60 | + <!-- banner header --> | |
61 | + | |
62 | + <div id="bodyContent"> | |
63 | + | |
64 | + <div id="contextContent"> | |
65 | + | |
66 | + <div id="description"> | |
67 | + <p> | |
68 | +This file is a part of an Information Retrieval oriented Ruby library | |
69 | +</p> | |
70 | +<p> | |
71 | +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | |
72 | +</p> | |
73 | +<p> | |
74 | +This program is free software: you can redistribute it and/or modify it | |
75 | +under the terms of the GNU General Public License as published by the Free | |
76 | +Software Foundation, either version 3 of the License, or (at your option) | |
77 | +any later version. | |
78 | +</p> | |
79 | +<p> | |
80 | +This program is distributed in the hope that it will be useful, but WITHOUT | |
81 | +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
82 | +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
83 | +more details. | |
84 | +</p> | |
85 | +<p> | |
86 | +You should have received a copy of the GNU General Public License along | |
87 | +with this program. If not, see <<a | |
88 | +href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. | |
89 | +</p> | |
90 | + | |
91 | + </div> | |
92 | + | |
93 | + <div id="requires-list"> | |
94 | + <h3 class="section-bar">Required files</h3> | |
95 | + | |
96 | + <div class="name-list"> | |
97 | + | |
98 | + net/http | |
99 | + | |
100 | + </div> | |
101 | + </div> | |
102 | + | |
103 | + </div> | |
104 | + | |
105 | + | |
106 | + </div> | |
107 | + | |
108 | + <!-- if includes --> | |
109 | + | |
110 | + <div id="section"> | |
111 | + | |
112 | + | |
113 | + | |
114 | + | |
115 | + <!-- if method_list --> | |
116 | + | |
117 | + | |
118 | + | |
119 | + | |
120 | + </div> | |
121 | + | |
122 | +<div id="validator-badges"> | |
123 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
124 | +</div> | |
125 | + | |
126 | +</body> | |
127 | +</html> |
doc/files/lib/rir/string_rb.html
... | ... | @@ -0,0 +1,129 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>File: string.rb [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="fileHeader"> | |
46 | + <h1>string.rb</h1> | |
47 | + <table class="header-table"> | |
48 | + <tr class="top-aligned-row"> | |
49 | + <td><strong>Path:</strong></td> | |
50 | + <td>lib/rir/string.rb | |
51 | + | |
52 | + </td> | |
53 | + </tr> | |
54 | + <tr class="top-aligned-row"> | |
55 | + <td><strong>Last Update:</strong></td> | |
56 | + <td>2010-11-05 14:39:35 +0100</td> | |
57 | + </tr> | |
58 | + </table> | |
59 | + </div> | |
60 | + <!-- banner header --> | |
61 | + | |
62 | + <div id="bodyContent"> | |
63 | + | |
64 | + <div id="contextContent"> | |
65 | + | |
66 | + <div id="description"> | |
67 | + <p> | |
68 | +This file is a part of an Information Retrieval oriented Ruby library | |
69 | +</p> | |
70 | +<p> | |
71 | +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | |
72 | +</p> | |
73 | +<p> | |
74 | +This program is free software: you can redistribute it and/or modify it | |
75 | +under the terms of the GNU General Public License as published by the Free | |
76 | +Software Foundation, either version 3 of the License, or (at your option) | |
77 | +any later version. | |
78 | +</p> | |
79 | +<p> | |
80 | +This program is distributed in the hope that it will be useful, but WITHOUT | |
81 | +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
82 | +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
83 | +more details. | |
84 | +</p> | |
85 | +<p> | |
86 | +You should have received a copy of the GNU General Public License along | |
87 | +with this program. If not, see <<a | |
88 | +href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. | |
89 | +</p> | |
90 | + | |
91 | + </div> | |
92 | + | |
93 | + <div id="requires-list"> | |
94 | + <h3 class="section-bar">Required files</h3> | |
95 | + | |
96 | + <div class="name-list"> | |
97 | + | |
98 | + cgi | |
99 | + | |
100 | + kconv | |
101 | + | |
102 | + </div> | |
103 | + </div> | |
104 | + | |
105 | + </div> | |
106 | + | |
107 | + | |
108 | + </div> | |
109 | + | |
110 | + <!-- if includes --> | |
111 | + | |
112 | + <div id="section"> | |
113 | + | |
114 | + | |
115 | + | |
116 | + | |
117 | + <!-- if method_list --> | |
118 | + | |
119 | + | |
120 | + | |
121 | + | |
122 | + </div> | |
123 | + | |
124 | +<div id="validator-badges"> | |
125 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
126 | +</div> | |
127 | + | |
128 | +</body> | |
129 | +</html> |
doc/files/lib/rir_rb.html
... | ... | @@ -0,0 +1,102 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>File: rir.rb [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="fileHeader"> | |
46 | + <h1>rir.rb</h1> | |
47 | + <table class="header-table"> | |
48 | + <tr class="top-aligned-row"> | |
49 | + <td><strong>Path:</strong></td> | |
50 | + <td>lib/rir.rb | |
51 | + | |
52 | + </td> | |
53 | + </tr> | |
54 | + <tr class="top-aligned-row"> | |
55 | + <td><strong>Last Update:</strong></td> | |
56 | + <td>2010-11-05 14:39:35 +0100</td> | |
57 | + </tr> | |
58 | + </table> | |
59 | + </div> | |
60 | + <!-- banner header --> | |
61 | + | |
62 | + <div id="bodyContent"> | |
63 | + | |
64 | + <div id="contextContent"> | |
65 | + | |
66 | + <div id="requires-list"> | |
67 | + <h3 class="section-bar">Required files</h3> | |
68 | + | |
69 | + <div class="name-list"> | |
70 | + | |
71 | + rir/document | |
72 | + | |
73 | + rir/string | |
74 | + | |
75 | + </div> | |
76 | + </div> | |
77 | + | |
78 | + </div> | |
79 | + | |
80 | + | |
81 | + </div> | |
82 | + | |
83 | + <!-- if includes --> | |
84 | + | |
85 | + <div id="section"> | |
86 | + | |
87 | + | |
88 | + | |
89 | + | |
90 | + <!-- if method_list --> | |
91 | + | |
92 | + | |
93 | + | |
94 | + | |
95 | + </div> | |
96 | + | |
97 | +<div id="validator-badges"> | |
98 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
99 | +</div> | |
100 | + | |
101 | +</body> | |
102 | +</html> |
doc/files/main_rb.html
... | ... | @@ -0,0 +1,100 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>File: main.rb [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="fileHeader"> | |
46 | + <h1>main.rb</h1> | |
47 | + <table class="header-table"> | |
48 | + <tr class="top-aligned-row"> | |
49 | + <td><strong>Path:</strong></td> | |
50 | + <td>main.rb | |
51 | + | |
52 | + </td> | |
53 | + </tr> | |
54 | + <tr class="top-aligned-row"> | |
55 | + <td><strong>Last Update:</strong></td> | |
56 | + <td>2010-11-05 14:40:11 +0100</td> | |
57 | + </tr> | |
58 | + </table> | |
59 | + </div> | |
60 | + <!-- banner header --> | |
61 | + | |
62 | + <div id="bodyContent"> | |
63 | + | |
64 | + <div id="contextContent"> | |
65 | + | |
66 | + <div id="requires-list"> | |
67 | + <h3 class="section-bar">Required files</h3> | |
68 | + | |
69 | + <div class="name-list"> | |
70 | + | |
71 | + rir | |
72 | + | |
73 | + </div> | |
74 | + </div> | |
75 | + | |
76 | + </div> | |
77 | + | |
78 | + | |
79 | + </div> | |
80 | + | |
81 | + <!-- if includes --> | |
82 | + | |
83 | + <div id="section"> | |
84 | + | |
85 | + | |
86 | + | |
87 | + | |
88 | + <!-- if method_list --> | |
89 | + | |
90 | + | |
91 | + | |
92 | + | |
93 | + </div> | |
94 | + | |
95 | +<div id="validator-badges"> | |
96 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
97 | +</div> | |
98 | + | |
99 | +</body> | |
100 | +</html> |
doc/fr_class_index.html
... | ... | @@ -0,0 +1,33 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<!-- | |
5 | + | |
6 | + Classes [RDoc Documentation] | |
7 | + | |
8 | + --> | |
9 | +<head> | |
10 | + <title>Classes [RDoc Documentation]</title> | |
11 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
12 | + <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> | |
13 | + <base target="docwin" /> | |
14 | +</head> | |
15 | +<body> | |
16 | +<div class="index"> | |
17 | + <h1 class="section-bar">Classes</h1> | |
18 | + <div id="index-entries"> | |
19 | + | |
20 | + <a href="classes/Rir.html">Rir</a><br /> | |
21 | + | |
22 | + <a href="classes/Rir/Document.html">Rir::Document</a><br /> | |
23 | + | |
24 | + <a href="classes/Rir/WebDocument.html">Rir::WebDocument</a><br /> | |
25 | + | |
26 | + <a href="classes/Rir/WikipediaPage.html">Rir::WikipediaPage</a><br /> | |
27 | + | |
28 | + <a href="classes/String.html">String</a><br /> | |
29 | + | |
30 | + </div> | |
31 | +</div> | |
32 | +</body> | |
33 | +</html> |
doc/fr_file_index.html
... | ... | @@ -0,0 +1,33 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<!-- | |
5 | + | |
6 | + Files [RDoc Documentation] | |
7 | + | |
8 | + --> | |
9 | +<head> | |
10 | + <title>Files [RDoc Documentation]</title> | |
11 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
12 | + <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> | |
13 | + <base target="docwin" /> | |
14 | +</head> | |
15 | +<body> | |
16 | +<div class="index"> | |
17 | + <h1 class="section-bar">Files</h1> | |
18 | + <div id="index-entries"> | |
19 | + | |
20 | + <a href="files/README_markdown.html">README.markdown</a><br /> | |
21 | + | |
22 | + <a href="files/lib/rir_rb.html">lib/rir.rb</a><br /> | |
23 | + | |
24 | + <a href="files/lib/rir/document_rb.html">lib/rir/document.rb</a><br /> | |
25 | + | |
26 | + <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br /> | |
27 | + | |
28 | + <a href="files/main_rb.html">main.rb</a><br /> | |
29 | + | |
30 | + </div> | |
31 | +</div> | |
32 | +</body> | |
33 | +</html> |
doc/fr_method_index.html
... | ... | @@ -0,0 +1,55 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<!-- | |
5 | + | |
6 | + Methods [RDoc Documentation] | |
7 | + | |
8 | + --> | |
9 | +<head> | |
10 | + <title>Methods [RDoc Documentation]</title> | |
11 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
12 | + <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> | |
13 | + <base target="docwin" /> | |
14 | +</head> | |
15 | +<body> | |
16 | +<div class="index"> | |
17 | + <h1 class="section-bar">Methods</h1> | |
18 | + <div id="index-entries"> | |
19 | + | |
20 | + <a href="classes/Rir/Document.html#M000012">count_words (Rir::Document)</a><br /> | |
21 | + | |
22 | + <a href="classes/Rir/Document.html#M000013">entropy (Rir::Document)</a><br /> | |
23 | + | |
24 | + <a href="classes/String.html#M000009">extract_xmltags_values (String)</a><br /> | |
25 | + | |
26 | + <a href="classes/Rir/Document.html#M000010">format_words (Rir::Document)</a><br /> | |
27 | + | |
28 | + <a href="classes/Rir/WebDocument.html#M000015">get_content (Rir::WebDocument)</a><br /> | |
29 | + | |
30 | + <a href="classes/String.html#M000001">is_stopword? (String)</a><br /> | |
31 | + | |
32 | + <a href="classes/Rir/WebDocument.html#M000016">new (Rir::WebDocument)</a><br /> | |
33 | + | |
34 | + <a href="classes/Rir/Document.html#M000014">new (Rir::Document)</a><br /> | |
35 | + | |
36 | + <a href="classes/Rir/Document.html#M000011">ngrams (Rir::Document)</a><br /> | |
37 | + | |
38 | + <a href="classes/String.html#M000002">remove_special_characters (String)</a><br /> | |
39 | + | |
40 | + <a href="classes/String.html#M000006">strip_javascripts (String)</a><br /> | |
41 | + | |
42 | + <a href="classes/String.html#M000005">strip_javascripts! (String)</a><br /> | |
43 | + | |
44 | + <a href="classes/String.html#M000008">strip_stylesheets (String)</a><br /> | |
45 | + | |
46 | + <a href="classes/String.html#M000007">strip_stylesheets! (String)</a><br /> | |
47 | + | |
48 | + <a href="classes/String.html#M000004">strip_xml_tags (String)</a><br /> | |
49 | + | |
50 | + <a href="classes/String.html#M000003">strip_xml_tags! (String)</a><br /> | |
51 | + | |
52 | + </div> | |
53 | +</div> | |
54 | +</body> | |
55 | +</html> |
doc/index.html
... | ... | @@ -0,0 +1,21 @@ |
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<!-- | |
5 | + | |
6 | + RDoc Documentation | |
7 | + | |
8 | + --> | |
9 | +<head> | |
10 | + <title>RDoc Documentation</title> | |
11 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
12 | +</head> | |
13 | +<frameset rows="20%, 80%"> | |
14 | + <frameset cols="25%,35%,45%"> | |
15 | + <frame src="fr_file_index.html" title="Files" name="Files" /> | |
16 | + <frame src="fr_class_index.html" name="Classes" /> | |
17 | + <frame src="fr_method_index.html" name="Methods" /> | |
18 | + </frameset> | |
19 | + <frame src="files/README_markdown.html" name="docwin" /> | |
20 | +</frameset> | |
21 | +</html> |
doc/rdoc-style.css
... | ... | @@ -0,0 +1,299 @@ |
1 | +body { | |
2 | + font-family: Verdana,Arial,Helvetica,sans-serif; | |
3 | + font-size: 90%; | |
4 | + margin: 0; | |
5 | + margin-left: 40px; | |
6 | + padding: 0; | |
7 | + background: white; | |
8 | + color: black; | |
9 | +} | |
10 | + | |
11 | +h1, h2, h3, h4 { | |
12 | + margin: 0; | |
13 | + background: transparent; | |
14 | +} | |
15 | + | |
16 | +h1 { | |
17 | + font-size: 150%; | |
18 | +} | |
19 | + | |
20 | +h2,h3,h4 { | |
21 | + margin-top: 1em; | |
22 | +} | |
23 | + | |
24 | +:link, :visited { | |
25 | + background: #eef; | |
26 | + color: #039; | |
27 | + text-decoration: none; | |
28 | +} | |
29 | + | |
30 | +:link:hover, :visited:hover { | |
31 | + background: #039; | |
32 | + color: #eef; | |
33 | +} | |
34 | + | |
35 | +/* Override the base stylesheet's Anchor inside a table cell */ | |
36 | +td > :link, td > :visited { | |
37 | + background: transparent; | |
38 | + color: #039; | |
39 | + text-decoration: none; | |
40 | +} | |
41 | + | |
42 | +/* and inside a section title */ | |
43 | +.section-title > :link, .section-title > :visited { | |
44 | + background: transparent; | |
45 | + color: #eee; | |
46 | + text-decoration: none; | |
47 | +} | |
48 | + | |
49 | +/* === Structural elements =================================== */ | |
50 | + | |
51 | +.index { | |
52 | + margin: 0; | |
53 | + margin-left: -40px; | |
54 | + padding: 0; | |
55 | + font-size: 90%; | |
56 | +} | |
57 | + | |
58 | +.index :link, .index :visited { | |
59 | + margin-left: 0.7em; | |
60 | +} | |
61 | + | |
62 | +.index .section-bar { | |
63 | + margin-left: 0px; | |
64 | + padding-left: 0.7em; | |
65 | + background: #ccc; | |
66 | + font-size: small; | |
67 | +} | |
68 | + | |
69 | +#classHeader, #fileHeader { | |
70 | + width: auto; | |
71 | + color: white; | |
72 | + padding: 0.5em 1.5em 0.5em 1.5em; | |
73 | + margin: 0; | |
74 | + margin-left: -40px; | |
75 | + border-bottom: 3px solid #006; | |
76 | +} | |
77 | + | |
78 | +#classHeader :link, #fileHeader :link, | |
79 | +#classHeader :visited, #fileHeader :visited { | |
80 | + background: inherit; | |
81 | + color: white; | |
82 | +} | |
83 | + | |
84 | +#classHeader td, #fileHeader td { | |
85 | + background: inherit; | |
86 | + color: white; | |
87 | +} | |
88 | + | |
89 | +#fileHeader { | |
90 | + background: #057; | |
91 | +} | |
92 | + | |
93 | +#classHeader { | |
94 | + background: #048; | |
95 | +} | |
96 | + | |
97 | +.class-name-in-header { | |
98 | + font-size: 180%; | |
99 | + font-weight: bold; | |
100 | +} | |
101 | + | |
102 | +#bodyContent { | |
103 | + padding: 0 1.5em 0 1.5em; | |
104 | +} | |
105 | + | |
106 | +#description { | |
107 | + padding: 0.5em 1.5em; | |
108 | + background: #efefef; | |
109 | + border: 1px dotted #999; | |
110 | +} | |
111 | + | |
112 | +#description h1, #description h2, #description h3, | |
113 | +#description h4, #description h5, #description h6 { | |
114 | + color: #125; | |
115 | + background: transparent; | |
116 | +} | |
117 | + | |
118 | +#validator-badges { | |
119 | + text-align: center; | |
120 | +} | |
121 | + | |
122 | +#validator-badges img { | |
123 | + border: 0; | |
124 | +} | |
125 | + | |
126 | +#copyright { | |
127 | + color: #333; | |
128 | + background: #efefef; | |
129 | + font: 0.75em sans-serif; | |
130 | + margin-top: 5em; | |
131 | + margin-bottom: 0; | |
132 | + padding: 0.5em 2em; | |
133 | +} | |
134 | + | |
135 | +/* === Classes =================================== */ | |
136 | + | |
137 | +table.header-table { | |
138 | + color: white; | |
139 | + font-size: small; | |
140 | +} | |
141 | + | |
142 | +.type-note { | |
143 | + font-size: small; | |
144 | + color: #dedede; | |
145 | +} | |
146 | + | |
147 | +.section-bar { | |
148 | + color: #333; | |
149 | + border-bottom: 1px solid #999; | |
150 | + margin-left: -20px; | |
151 | +} | |
152 | + | |
153 | +.section-title { | |
154 | + background: #79a; | |
155 | + color: #eee; | |
156 | + padding: 3px; | |
157 | + margin-top: 2em; | |
158 | + margin-left: -30px; | |
159 | + border: 1px solid #999; | |
160 | +} | |
161 | + | |
162 | +.top-aligned-row { | |
163 | + vertical-align: top | |
164 | +} | |
165 | + | |
166 | +.bottom-aligned-row { | |
167 | + vertical-align: bottom | |
168 | +} | |
169 | + | |
170 | +#diagram img { | |
171 | + border: 0; | |
172 | +} | |
173 | + | |
174 | +/* --- Context section classes ----------------------- */ | |
175 | + | |
176 | +.context-row { } | |
177 | + | |
178 | +.context-item-name { | |
179 | + font-family: monospace; | |
180 | + font-weight: bold; | |
181 | + color: black; | |
182 | +} | |
183 | + | |
184 | +.context-item-value { | |
185 | + font-size: small; | |
186 | + color: #448; | |
187 | +} | |
188 | + | |
189 | +.context-item-desc { | |
190 | + color: #333; | |
191 | + padding-left: 2em; | |
192 | +} | |
193 | + | |
194 | +/* --- Method classes -------------------------- */ | |
195 | + | |
196 | +.method-detail { | |
197 | + background: #efefef; | |
198 | + padding: 0; | |
199 | + margin-top: 0.5em; | |
200 | + margin-bottom: 1em; | |
201 | + border: 1px dotted #ccc; | |
202 | +} | |
203 | + | |
204 | +.method-heading { | |
205 | + color: black; | |
206 | + background: #ccc; | |
207 | + border-bottom: 1px solid #666; | |
208 | + padding: 0.2em 0.5em 0 0.5em; | |
209 | +} | |
210 | + | |
211 | +.method-signature { | |
212 | + color: black; | |
213 | + background: inherit; | |
214 | +} | |
215 | + | |
216 | +.method-name { | |
217 | + font-weight: bold; | |
218 | +} | |
219 | + | |
220 | +.method-args { | |
221 | + font-style: italic; | |
222 | +} | |
223 | + | |
224 | +.method-description { | |
225 | + padding: 0 0.5em 0 0.5em; | |
226 | +} | |
227 | + | |
228 | +/* --- Source code sections -------------------- */ | |
229 | + | |
230 | +:link.source-toggle, :visited.source-toggle { | |
231 | + font-size: 90%; | |
232 | +} | |
233 | + | |
234 | +div.method-source-code { | |
235 | + background: #262626; | |
236 | + color: #ffdead; | |
237 | + margin: 1em; | |
238 | + padding: 0.5em; | |
239 | + border: 1px dashed #999; | |
240 | + overflow: auto; | |
241 | +} | |
242 | + | |
243 | +div.method-source-code pre { | |
244 | + color: #ffdead; | |
245 | +} | |
246 | + | |
247 | +/* --- Ruby keyword styles --------------------- */ | |
248 | + | |
249 | +.standalone-code { | |
250 | + background: #221111; | |
251 | + color: #ffdead; | |
252 | + overflow: auto; | |
253 | +} | |
254 | + | |
255 | +.ruby-constant { | |
256 | + color: #7fffd4; | |
257 | + background: transparent; | |
258 | +} | |
259 | + | |
260 | +.ruby-keyword { | |
261 | + color: #00ffff; | |
262 | + background: transparent; | |
263 | +} | |
264 | + | |
265 | +.ruby-ivar { | |
266 | + color: #eedd82; | |
267 | + background: transparent; | |
268 | +} | |
269 | + | |
270 | +.ruby-operator { | |
271 | + color: #00ffee; | |
272 | + background: transparent; | |
273 | +} | |
274 | + | |
275 | +.ruby-identifier { | |
276 | + color: #ffdead; | |
277 | + background: transparent; | |
278 | +} | |
279 | + | |
280 | +.ruby-node { | |
281 | + color: #ffa07a; | |
282 | + background: transparent; | |
283 | +} | |
284 | + | |
285 | +.ruby-comment { | |
286 | + color: #b22222; | |
287 | + font-weight: bold; | |
288 | + background: transparent; | |
289 | +} | |
290 | + | |
291 | +.ruby-regexp { | |
292 | + color: #ffa07a; | |
293 | + background: transparent; | |
294 | +} | |
295 | + | |
296 | +.ruby-value { | |
297 | + color: #7fffd4; | |
298 | + background: transparent; | |
299 | +} |
lib/rir.rb
lib/rir/document.rb
... | ... | @@ -0,0 +1,121 @@ |
1 | +#!/usr/bin/env ruby | |
2 | + | |
3 | +# This file is a part of an Information Retrieval oriented Ruby library | |
4 | +# | |
5 | +# Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | |
6 | +# | |
7 | +# This program is free software: you can redistribute it and/or modify | |
8 | +# it under the terms of the GNU General Public License as published by | |
9 | +# the Free Software Foundation, either version 3 of the License, or | |
10 | +# (at your option) any later version. | |
11 | +# | |
12 | +# This program is distributed in the hope that it will be useful, | |
13 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | +# GNU General Public License for more details. | |
16 | +# | |
17 | +# You should have received a copy of the GNU General Public License | |
18 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | + | |
20 | +# General module for many purposes related to Information Retrieval. | |
21 | +module Rir | |
22 | + | |
23 | + # A Document is a bag of words and is constructed from a string. | |
24 | + class Document | |
25 | + attr_reader :words, :doc_content | |
26 | + | |
27 | + # Any non-word characters are removed from the words (see http://perldoc.perl.org/perlre.html | |
28 | + # and the \\W special escape). | |
29 | + # | |
30 | + # Protected function, only meant to by called at the initialization. | |
31 | + def format_words | |
32 | + wo = [] | |
33 | + | |
34 | + @doc_content.split.each do |w| | |
35 | + w.split(/\W/).each do |sw| | |
36 | + wo.push(sw) if sw =~ /[a-zA-Z]/ | |
37 | + end | |
38 | + end | |
39 | + | |
40 | + wo | |
41 | + end | |
42 | + | |
43 | + # Returns an Array containing the +n+-grams (words) from the current Document. | |
44 | + # | |
45 | + # ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...] | |
46 | + def ngrams(n) | |
47 | + window = [] | |
48 | + ngrams_array = [] | |
49 | + | |
50 | + @words.each do |w| | |
51 | + window.push(w) | |
52 | + if window.size == n | |
53 | + ngrams_array.push window.join(" ") | |
54 | + window.delete_at(0) | |
55 | + end | |
56 | + end | |
57 | + | |
58 | + ngrams_array.uniq | |
59 | + end | |
60 | + | |
61 | + # Returns a Hash containing the words and their associated counts in the current Document. | |
62 | + # | |
63 | + # count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } | |
64 | + def count_words | |
65 | + counts = Hash.new { |h,k| h[k] = 0 } | |
66 | + @words.each { |w| counts[w.downcase] += 1 } | |
67 | + | |
68 | + counts | |
69 | + end | |
70 | + | |
71 | + # Computes the entropy of a given string +s+ inside the document. | |
72 | + # | |
73 | + # If the string parameter is composed of many words (i.e. tokens separated | |
74 | + # by whitespace(s)), it is considered as an ngram. | |
75 | + # | |
76 | + # entropy("guitar") #=> 0.00389919463243839 | |
77 | + def entropy(s) | |
78 | + en = 0.0 | |
79 | + counts = self.count_words | |
80 | + | |
81 | + s.split.each do |w| | |
82 | + p_wi = counts[w].to_f/@words.count.to_f | |
83 | + en += p_wi*Math.log2(p_wi) | |
84 | + end | |
85 | + | |
86 | + en *= -1 | |
87 | + en | |
88 | + end | |
89 | + | |
90 | + | |
91 | + | |
92 | + def initialize(content) | |
93 | + @doc_content = content | |
94 | + @words = format_words | |
95 | + end | |
96 | + | |
97 | + protected :format_words | |
98 | + end | |
99 | + | |
100 | + # A WebDocument is a Document with a +url+. | |
101 | + class WebDocument < Document | |
102 | + attr_reader :url | |
103 | + | |
104 | + # Returns the HTML text from the page of a given +url+. | |
105 | + def self.get_content(url) | |
106 | + require 'net/http' | |
107 | + Net::HTTP.get(URI.parse(url)) | |
108 | + end | |
109 | + | |
110 | + # WebDocument constructor, the content of the Document is the HTML page | |
111 | + # without the tags. | |
112 | + def initialize(url) | |
113 | + @url = url | |
114 | + super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags | |
115 | + end | |
116 | + end | |
117 | + | |
118 | + # A WikipediaPage is a WebDocument. | |
119 | + class WikipediaPage < WebDocument | |
120 | + end | |
121 | +end |
lib/rir/string.rb
... | ... | @@ -0,0 +1,155 @@ |
1 | +#!/usr/bin/env ruby | |
2 | + | |
3 | +# This file is a part of an Information Retrieval oriented Ruby library | |
4 | +# | |
5 | +# Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | |
6 | +# | |
7 | +# This program is free software: you can redistribute it and/or modify | |
8 | +# it under the terms of the GNU General Public License as published by | |
9 | +# the Free Software Foundation, either version 3 of the License, or | |
10 | +# (at your option) any later version. | |
11 | +# | |
12 | +# This program is distributed in the hope that it will be useful, | |
13 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | +# GNU General Public License for more details. | |
16 | +# | |
17 | +# You should have received a copy of the GNU General Public License | |
18 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. | |
19 | + | |
20 | +# General module for many purposes related to Information Retrieval. | |
21 | +module Rir | |
22 | + | |
23 | + # These are the default stopwords provided by Lemur. | |
24 | + Stoplist = [ | |
25 | + "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", | |
26 | + "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", | |
27 | + "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", | |
28 | + "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", | |
29 | + "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", | |
30 | + "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", | |
31 | + "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", | |
32 | + "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", | |
33 | + "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", | |
34 | + "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", | |
35 | + "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", | |
36 | + "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", | |
37 | + "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", | |
38 | + "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", | |
39 | + "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", | |
40 | + "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", | |
41 | + "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", | |
42 | + "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", | |
43 | + "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", | |
44 | + "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", | |
45 | + "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", | |
46 | + "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", | |
47 | + "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", | |
48 | + "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", | |
49 | + "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", | |
50 | + "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", | |
51 | + "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", | |
52 | + "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", | |
53 | + "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", | |
54 | + "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", | |
55 | + "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", | |
56 | + "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", | |
57 | + "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", | |
58 | + "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", | |
59 | + "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", | |
60 | + "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", | |
61 | + "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", | |
62 | + "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", | |
63 | + "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", | |
64 | + "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", | |
65 | + "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", | |
66 | + "yours", "yourself", "yourselves" | |
67 | + ] | |
68 | + | |
69 | + | |
70 | +end | |
71 | + | |
72 | +# Extention of the standard class String with useful function. | |
73 | +class String | |
74 | + include Rir | |
75 | + | |
76 | + # Returns +true+ if +self+ belongs to Rir::Stoplist, +false+ otherwise. | |
77 | + def is_stopword? | |
78 | + Stoplist.include?(self.downcase) | |
79 | + end | |
80 | + | |
81 | + # Do not use. | |
82 | + # TODO: rewamp. find why this function is here. | |
83 | + def remove_special_characters | |
84 | + self.split.collect { |w| w.gsub(/\W/,' ').split.collect { |w| w.gsub(/\W/,' ').strip.sub(/\A.\z/, '')}.join(' ').strip.sub(/\A.\z/, '')}.join(' ') | |
85 | + end | |
86 | + | |
87 | + # Removes all XML-like tags from +self+. | |
88 | + # | |
89 | + # s = "<html><body>test</body></html>" | |
90 | + # s.strip_xml_tags! | |
91 | + # s #=> "test" | |
92 | + def strip_xml_tags! | |
93 | + replace strip_with_pattern /<\/?[^>]*>/ | |
94 | + end | |
95 | + | |
96 | + # Removes all XML-like tags from +self+. | |
97 | + # | |
98 | + # s = "<html><body>test</body></html>" | |
99 | + # s.strip_xml_tags #=> "test" | |
100 | + # s #=> "<html><body>test</body></html>" | |
101 | + def strip_xml_tags | |
102 | + dup.strip_xml_tags! | |
103 | + end | |
104 | + | |
105 | + # Removes all Javascript sources from +self+. | |
106 | + # | |
107 | + # s = "<script type='text/javascript'> | |
108 | + # var skin='vector', | |
109 | + # stylepath='http://bits.wikimedia.org/skins-1.5' | |
110 | + # </script> | |
111 | + # | |
112 | + # test" | |
113 | + # s.strip_javascripts! | |
114 | + # s #=> "test" | |
115 | + def strip_javascripts! | |
116 | + replace strip_with_pattern /<script type="text\/javascript">(.+?)<\/script>/m | |
117 | + end | |
118 | + | |
119 | + # Removes all Javascript sources from +self+. | |
120 | + # | |
121 | + # s = "<script type='text/javascript'> | |
122 | + # var skin='vector', | |
123 | + # stylepath='http://bits.wikimedia.org/skins-1.5' | |
124 | + # </script> | |
125 | + # | |
126 | + # test" | |
127 | + # s.strip_javascripts #=> "test" | |
128 | + def strip_javascripts | |
129 | + dup.strip_javascripts! | |
130 | + end | |
131 | + | |
132 | + def strip_stylesheets! | |
133 | + # TODO: rewamp. dunno what is it. | |
134 | + replace strip_with_pattern /<style type="text\/css">(.+?)<\/style>/m | |
135 | + end | |
136 | + | |
137 | + def strip_stylesheets | |
138 | + dup.strip_stylesheets! | |
139 | + end | |
140 | + | |
141 | + # Returns the text values inside all occurences of a XML tag in +self+ | |
142 | + # | |
143 | + # s = "four-piece in <a href='#'>Indianapolis</a>, <a href='#'>Indiana</a> at the Murat Theatre" | |
144 | + # s.extract_xmltags_values 'a' #=> ["Indianapolis", "Indiana"] | |
145 | + def extract_xmltags_values(tag_name) | |
146 | + self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten | |
147 | + end | |
148 | + | |
149 | + private | |
150 | + def strip_with_pattern(pattern) | |
151 | + require 'cgi' | |
152 | + require 'kconv' | |
153 | + CGI::unescapeHTML(self.gsub(pattern,"")).toutf8 | |
154 | + end | |
155 | +end |
main.rb