Commit 7043da90bf781276184a770f306cfe7b59c17d5a
0 parents
Exists in
master
first commit
Showing 37 changed files with 2767 additions and 0 deletions Inline Diff
- README.markdown
- doc/classes/Rir.html
- doc/classes/Rir/Document.html
- doc/classes/Rir/Document.src/M000010.html
- doc/classes/Rir/Document.src/M000011.html
- doc/classes/Rir/Document.src/M000012.html
- doc/classes/Rir/Document.src/M000013.html
- doc/classes/Rir/Document.src/M000014.html
- doc/classes/Rir/WebDocument.html
- doc/classes/Rir/WebDocument.src/M000015.html
- doc/classes/Rir/WebDocument.src/M000016.html
- doc/classes/Rir/WikipediaPage.html
- doc/classes/String.html
- doc/classes/String.src/M000001.html
- doc/classes/String.src/M000002.html
- doc/classes/String.src/M000003.html
- doc/classes/String.src/M000004.html
- doc/classes/String.src/M000005.html
- doc/classes/String.src/M000006.html
- doc/classes/String.src/M000007.html
- doc/classes/String.src/M000008.html
- doc/classes/String.src/M000009.html
- doc/created.rid
- doc/files/README_markdown.html
- doc/files/lib/rir/document_rb.html
- doc/files/lib/rir/string_rb.html
- doc/files/lib/rir_rb.html
- doc/files/main_rb.html
- doc/fr_class_index.html
- doc/fr_file_index.html
- doc/fr_method_index.html
- doc/index.html
- doc/rdoc-style.css
- lib/rir.rb
- lib/rir/document.rb
- lib/rir/string.rb
- main.rb
README.markdown
File was created | 1 | # Ruby Information Retrieval (rIR) | |
2 | |||
3 | Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | ||
4 | |||
5 | License | ||
6 | ======= | ||
7 | |||
8 | This program is free software: you can redistribute it and/or modify | ||
9 | it under the terms of the GNU General Public License as published by | ||
10 | the Free Software Foundation, either version 3 of the License, or | ||
11 | (at your option) any later version. | ||
12 | |||
13 | This program is distributed in the hope that it will be useful, | ||
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | GNU General Public License for more details. | ||
17 | |||
18 | You should have received a copy of the GNU General Public License | ||
19 | along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
20 |
doc/classes/Rir.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>Module: Rir [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="classHeader"> | ||
46 | <table class="header-table"> | ||
47 | <tr class="top-aligned-row"> | ||
48 | <td><strong>Module</strong></td> | ||
49 | <td class="class-name-in-header">Rir</td> | ||
50 | </tr> | ||
51 | <tr class="top-aligned-row"> | ||
52 | <td><strong>In:</strong></td> | ||
53 | <td> | ||
54 | |||
55 | |||
56 | <a href="../files/lib/rir/string_rb.html"> | ||
57 | |||
58 | lib/rir/string.rb | ||
59 | |||
60 | </a> | ||
61 | |||
62 | |||
63 | <br /> | ||
64 | |||
65 | |||
66 | <a href="../files/lib/rir/document_rb.html"> | ||
67 | |||
68 | lib/rir/document.rb | ||
69 | |||
70 | </a> | ||
71 | |||
72 | |||
73 | <br /> | ||
74 | |||
75 | </td> | ||
76 | </tr> | ||
77 | |||
78 | |||
79 | </table> | ||
80 | </div> | ||
81 | <!-- banner header --> | ||
82 | |||
83 | <div id="bodyContent"> | ||
84 | |||
85 | <div id="contextContent"> | ||
86 | |||
87 | <div id="description"> | ||
88 | <p> | ||
89 | General module for many purposes related to Information Retrieval. | ||
90 | </p> | ||
91 | <hr size="1"></hr><p> | ||
92 | General module for many purposes related to Information Retrieval. | ||
93 | </p> | ||
94 | |||
95 | </div> | ||
96 | |||
97 | </div> | ||
98 | |||
99 | |||
100 | </div> | ||
101 | |||
102 | <!-- if includes --> | ||
103 | |||
104 | <div id="section"> | ||
105 | |||
106 | <div id="class-list"> | ||
107 | <h3 class="section-bar">Classes and Modules</h3> | ||
108 | |||
109 | Class <a href="Rir/Document.html" class="link">Rir::Document</a><br /> | ||
110 | Class <a href="Rir/WebDocument.html" class="link">Rir::WebDocument</a><br /> | ||
111 | Class <a href="Rir/WikipediaPage.html" class="link">Rir::WikipediaPage</a><br /> | ||
112 | |||
113 | </div> | ||
114 | |||
115 | <div id="constants-list"> | ||
116 | <h3 class="section-bar">Constants</h3> | ||
117 | |||
118 | <div class="name-list"> | ||
119 | <table summary="Constants"> | ||
120 | |||
121 | <tr class="top-aligned-row context-row"> | ||
122 | <td class="context-item-name">Stoplist</td> | ||
123 | <td>=</td> | ||
124 | <td class="context-item-value">[ "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", "yours", "yourself", "yourselves" ]</td> | ||
125 | |||
126 | <td> </td> | ||
127 | <td class="context-item-desc"> | ||
128 | These are the default stopwords provided by Lemur. | ||
129 | |||
130 | </td> | ||
131 | |||
132 | </tr> | ||
133 | |||
134 | </table> | ||
135 | </div> | ||
136 | </div> | ||
137 | |||
138 | |||
139 | |||
140 | |||
141 | <!-- if method_list --> | ||
142 | |||
143 | |||
144 | |||
145 | |||
146 | </div> | ||
147 | |||
148 | <div id="validator-badges"> | ||
149 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
150 | </div> | ||
151 | |||
152 | </body> | ||
153 | </html> | ||
154 |
doc/classes/Rir/Document.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>Class: Rir::Document [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="classHeader"> | ||
46 | <table class="header-table"> | ||
47 | <tr class="top-aligned-row"> | ||
48 | <td><strong>Class</strong></td> | ||
49 | <td class="class-name-in-header">Rir::Document</td> | ||
50 | </tr> | ||
51 | <tr class="top-aligned-row"> | ||
52 | <td><strong>In:</strong></td> | ||
53 | <td> | ||
54 | |||
55 | |||
56 | <a href="../../files/lib/rir/document_rb.html"> | ||
57 | |||
58 | lib/rir/document.rb | ||
59 | |||
60 | </a> | ||
61 | |||
62 | |||
63 | <br /> | ||
64 | |||
65 | </td> | ||
66 | </tr> | ||
67 | |||
68 | |||
69 | <tr class="top-aligned-row"> | ||
70 | <td><strong>Parent:</strong></td> | ||
71 | <td> | ||
72 | |||
73 | Object | ||
74 | |||
75 | </td> | ||
76 | </tr> | ||
77 | |||
78 | </table> | ||
79 | </div> | ||
80 | <!-- banner header --> | ||
81 | |||
82 | <div id="bodyContent"> | ||
83 | |||
84 | <div id="contextContent"> | ||
85 | |||
86 | <div id="description"> | ||
87 | <p> | ||
88 | A <a href="Document.html">Document</a> is a bag of words and is constructed | ||
89 | from a string. | ||
90 | </p> | ||
91 | |||
92 | </div> | ||
93 | |||
94 | </div> | ||
95 | |||
96 | |||
97 | <div id="method-list"> | ||
98 | <h3 class="section-bar">Methods</h3> | ||
99 | |||
100 | <div class="name-list"> | ||
101 | |||
102 | <a href="#M000012">count_words</a> | ||
103 | |||
104 | <a href="#M000013">entropy</a> | ||
105 | |||
106 | <a href="#M000010">format_words</a> | ||
107 | |||
108 | <a href="#M000014">new</a> | ||
109 | |||
110 | <a href="#M000011">ngrams</a> | ||
111 | |||
112 | </div> | ||
113 | </div> | ||
114 | |||
115 | </div> | ||
116 | |||
117 | <!-- if includes --> | ||
118 | |||
119 | <div id="section"> | ||
120 | |||
121 | |||
122 | |||
123 | <div id="attribute-list"> | ||
124 | <h3 class="section-bar">Attributes</h3> | ||
125 | |||
126 | <div class="name-list"> | ||
127 | <table> | ||
128 | |||
129 | <tr class="top-aligned-row context-row"> | ||
130 | <td class="context-item-name">doc_content</td> | ||
131 | |||
132 | <td class="context-item-value"> [R] </td> | ||
133 | |||
134 | <td class="context-item-desc"></td> | ||
135 | </tr> | ||
136 | |||
137 | <tr class="top-aligned-row context-row"> | ||
138 | <td class="context-item-name">words</td> | ||
139 | |||
140 | <td class="context-item-value"> [R] </td> | ||
141 | |||
142 | <td class="context-item-desc"></td> | ||
143 | </tr> | ||
144 | |||
145 | </table> | ||
146 | </div> | ||
147 | </div> | ||
148 | |||
149 | |||
150 | <!-- if method_list --> | ||
151 | |||
152 | <div id="methods"> | ||
153 | |||
154 | <h3 class="section-bar">Public Class methods</h3> | ||
155 | |||
156 | |||
157 | <div id="method-M000014" class="method-detail"> | ||
158 | <a name="M000014"></a> | ||
159 | |||
160 | <div class="method-heading"> | ||
161 | |||
162 | <a href="Document.src/M000014.html" target="Code" class="method-signature" | ||
163 | onclick="popupCode('Document.src/M000014.html');return false;"> | ||
164 | |||
165 | <span class="method-name">new</span><span class="method-args">(content)</span> | ||
166 | |||
167 | </a> | ||
168 | |||
169 | </div> | ||
170 | |||
171 | <div class="method-description"> | ||
172 | |||
173 | </div> | ||
174 | </div> | ||
175 | |||
176 | |||
177 | <h3 class="section-bar">Public Instance methods</h3> | ||
178 | |||
179 | |||
180 | <div id="method-M000012" class="method-detail"> | ||
181 | <a name="M000012"></a> | ||
182 | |||
183 | <div class="method-heading"> | ||
184 | |||
185 | <a href="Document.src/M000012.html" target="Code" class="method-signature" | ||
186 | onclick="popupCode('Document.src/M000012.html');return false;"> | ||
187 | |||
188 | <span class="method-name">count_words</span><span class="method-args">()</span> | ||
189 | |||
190 | </a> | ||
191 | |||
192 | </div> | ||
193 | |||
194 | <div class="method-description"> | ||
195 | |||
196 | <p> | ||
197 | Returns a Hash containing the words and their associated counts in the | ||
198 | current <a href="Document.html">Document</a>. | ||
199 | </p> | ||
200 | <pre> | ||
201 | count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } | ||
202 | </pre> | ||
203 | |||
204 | </div> | ||
205 | </div> | ||
206 | |||
207 | |||
208 | <div id="method-M000013" class="method-detail"> | ||
209 | <a name="M000013"></a> | ||
210 | |||
211 | <div class="method-heading"> | ||
212 | |||
213 | <a href="Document.src/M000013.html" target="Code" class="method-signature" | ||
214 | onclick="popupCode('Document.src/M000013.html');return false;"> | ||
215 | |||
216 | <span class="method-name">entropy</span><span class="method-args">(s)</span> | ||
217 | |||
218 | </a> | ||
219 | |||
220 | </div> | ||
221 | |||
222 | <div class="method-description"> | ||
223 | |||
224 | <p> | ||
225 | Computes the entropy of a given string <tt>s</tt> inside the document. | ||
226 | </p> | ||
227 | <p> | ||
228 | If the string parameter is composed of many words (i.e. tokens separated by | ||
229 | whitespace(s)), it is considered as an ngram. | ||
230 | </p> | ||
231 | <pre> | ||
232 | entropy("guitar") #=> 0.00389919463243839 | ||
233 | </pre> | ||
234 | |||
235 | </div> | ||
236 | </div> | ||
237 | |||
238 | |||
239 | <div id="method-M000011" class="method-detail"> | ||
240 | <a name="M000011"></a> | ||
241 | |||
242 | <div class="method-heading"> | ||
243 | |||
244 | <a href="Document.src/M000011.html" target="Code" class="method-signature" | ||
245 | onclick="popupCode('Document.src/M000011.html');return false;"> | ||
246 | |||
247 | <span class="method-name">ngrams</span><span class="method-args">(n)</span> | ||
248 | |||
249 | </a> | ||
250 | |||
251 | </div> | ||
252 | |||
253 | <div class="method-description"> | ||
254 | |||
255 | <p> | ||
256 | Returns an Array containing the <tt>n</tt>-grams (words) from the current | ||
257 | <a href="Document.html">Document</a>. | ||
258 | </p> | ||
259 | <pre> | ||
260 | ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...] | ||
261 | </pre> | ||
262 | |||
263 | </div> | ||
264 | </div> | ||
265 | |||
266 | |||
267 | <h3 class="section-bar">Protected Instance methods</h3> | ||
268 | |||
269 | |||
270 | <div id="method-M000010" class="method-detail"> | ||
271 | <a name="M000010"></a> | ||
272 | |||
273 | <div class="method-heading"> | ||
274 | |||
275 | <a href="Document.src/M000010.html" target="Code" class="method-signature" | ||
276 | onclick="popupCode('Document.src/M000010.html');return false;"> | ||
277 | |||
278 | <span class="method-name">format_words</span><span class="method-args">()</span> | ||
279 | |||
280 | </a> | ||
281 | |||
282 | </div> | ||
283 | |||
284 | <div class="method-description"> | ||
285 | |||
286 | <p> | ||
287 | Any non-word characters are removed from the words (see <a | ||
288 | href="http://perldoc.perl.org/perlre.html">perldoc.perl.org/perlre.html</a> | ||
289 | and the W special escape). | ||
290 | </p> | ||
291 | <p> | ||
292 | Protected function, only meant to by called at the initialization. | ||
293 | </p> | ||
294 | |||
295 | </div> | ||
296 | </div> | ||
297 | |||
298 | |||
299 | |||
300 | </div> | ||
301 | |||
302 | |||
303 | |||
304 | |||
305 | </div> | ||
306 | |||
307 | <div id="validator-badges"> | ||
308 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
309 | </div> | ||
310 | |||
311 | </body> | ||
312 | </html> | ||
313 |
doc/classes/Rir/Document.src/M000010.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>format_words (Rir::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 31</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">format_words</span> | ||
12 | <span class="ruby-identifier">wo</span> = [] | ||
13 | |||
14 | <span class="ruby-ivar">@doc_content</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | ||
15 | <span class="ruby-identifier">w</span>.<span class="ruby-identifier">split</span>(<span class="ruby-regexp re">/\W/</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sw</span><span class="ruby-operator">|</span> | ||
16 | <span class="ruby-identifier">wo</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">sw</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">sw</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[a-zA-Z]/</span> | ||
17 | <span class="ruby-keyword kw">end</span> | ||
18 | <span class="ruby-keyword kw">end</span> | ||
19 | |||
20 | <span class="ruby-identifier">wo</span> | ||
21 | <span class="ruby-keyword kw">end</span></pre> | ||
22 | </body> | ||
23 | </html> | ||
24 |
doc/classes/Rir/Document.src/M000011.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>ngrams (Rir::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 46</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">ngrams</span>(<span class="ruby-identifier">n</span>) | ||
12 | <span class="ruby-identifier">window</span> = [] | ||
13 | <span class="ruby-identifier">ngrams_array</span> = [] | ||
14 | |||
15 | <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | ||
16 | <span class="ruby-identifier">window</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">w</span>) | ||
17 | <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">n</span> | ||
18 | <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">" "</span>) | ||
19 | <span class="ruby-identifier">window</span>.<span class="ruby-identifier">delete_at</span>(<span class="ruby-value">0</span>) | ||
20 | <span class="ruby-keyword kw">end</span> | ||
21 | <span class="ruby-keyword kw">end</span> | ||
22 | |||
23 | <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">uniq</span> | ||
24 | <span class="ruby-keyword kw">end</span></pre> | ||
25 | </body> | ||
26 | </html> | ||
27 |
doc/classes/Rir/Document.src/M000012.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>count_words (Rir::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 64</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">count_words</span> | ||
12 | <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">h</span>,<span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">h</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-value">0</span> } | ||
13 | <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>.<span class="ruby-identifier">downcase</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> } | ||
14 | |||
15 | <span class="ruby-identifier">counts</span> | ||
16 | <span class="ruby-keyword kw">end</span></pre> | ||
17 | </body> | ||
18 | </html> | ||
19 |
doc/classes/Rir/Document.src/M000013.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>entropy (Rir::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 77</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">entropy</span>(<span class="ruby-identifier">s</span>) | ||
12 | <span class="ruby-identifier">en</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span> | ||
13 | <span class="ruby-identifier">counts</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span> | ||
14 | |||
15 | <span class="ruby-identifier">s</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | ||
16 | <span class="ruby-identifier">p_wi</span> = <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span> | ||
17 | <span class="ruby-identifier">en</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">p_wi</span><span class="ruby-operator">*</span><span class="ruby-constant">Math</span>.<span class="ruby-identifier">log2</span>(<span class="ruby-identifier">p_wi</span>) | ||
18 | <span class="ruby-keyword kw">end</span> | ||
19 | |||
20 | <span class="ruby-identifier">en</span> <span class="ruby-operator">*=</span> <span class="ruby-value">-1</span> | ||
21 | <span class="ruby-identifier">en</span> | ||
22 | <span class="ruby-keyword kw">end</span></pre> | ||
23 | </body> | ||
24 | </html> | ||
25 |
doc/classes/Rir/Document.src/M000014.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>new (Rir::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 92</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">content</span>) | ||
12 | <span class="ruby-ivar">@doc_content</span> = <span class="ruby-identifier">content</span> | ||
13 | <span class="ruby-ivar">@words</span> = <span class="ruby-identifier">format_words</span> | ||
14 | <span class="ruby-keyword kw">end</span></pre> | ||
15 | </body> | ||
16 | </html> | ||
17 |
doc/classes/Rir/WebDocument.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>Class: Rir::WebDocument [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="classHeader"> | ||
46 | <table class="header-table"> | ||
47 | <tr class="top-aligned-row"> | ||
48 | <td><strong>Class</strong></td> | ||
49 | <td class="class-name-in-header">Rir::WebDocument</td> | ||
50 | </tr> | ||
51 | <tr class="top-aligned-row"> | ||
52 | <td><strong>In:</strong></td> | ||
53 | <td> | ||
54 | |||
55 | |||
56 | <a href="../../files/lib/rir/document_rb.html"> | ||
57 | |||
58 | lib/rir/document.rb | ||
59 | |||
60 | </a> | ||
61 | |||
62 | |||
63 | <br /> | ||
64 | |||
65 | </td> | ||
66 | </tr> | ||
67 | |||
68 | |||
69 | <tr class="top-aligned-row"> | ||
70 | <td><strong>Parent:</strong></td> | ||
71 | <td> | ||
72 | |||
73 | <a href="Document.html"> | ||
74 | |||
75 | Rir::Document | ||
76 | |||
77 | </a> | ||
78 | |||
79 | </td> | ||
80 | </tr> | ||
81 | |||
82 | </table> | ||
83 | </div> | ||
84 | <!-- banner header --> | ||
85 | |||
86 | <div id="bodyContent"> | ||
87 | |||
88 | <div id="contextContent"> | ||
89 | |||
90 | <div id="description"> | ||
91 | <p> | ||
92 | A <a href="WebDocument.html">WebDocument</a> is a <a | ||
93 | href="Document.html">Document</a> with a <tt>url</tt>. | ||
94 | </p> | ||
95 | |||
96 | </div> | ||
97 | |||
98 | </div> | ||
99 | |||
100 | |||
101 | <div id="method-list"> | ||
102 | <h3 class="section-bar">Methods</h3> | ||
103 | |||
104 | <div class="name-list"> | ||
105 | |||
106 | <a href="#M000015">get_content</a> | ||
107 | |||
108 | <a href="#M000016">new</a> | ||
109 | |||
110 | </div> | ||
111 | </div> | ||
112 | |||
113 | </div> | ||
114 | |||
115 | <!-- if includes --> | ||
116 | |||
117 | <div id="section"> | ||
118 | |||
119 | |||
120 | |||
121 | <div id="attribute-list"> | ||
122 | <h3 class="section-bar">Attributes</h3> | ||
123 | |||
124 | <div class="name-list"> | ||
125 | <table> | ||
126 | |||
127 | <tr class="top-aligned-row context-row"> | ||
128 | <td class="context-item-name">url</td> | ||
129 | |||
130 | <td class="context-item-value"> [R] </td> | ||
131 | |||
132 | <td class="context-item-desc"></td> | ||
133 | </tr> | ||
134 | |||
135 | </table> | ||
136 | </div> | ||
137 | </div> | ||
138 | |||
139 | |||
140 | <!-- if method_list --> | ||
141 | |||
142 | <div id="methods"> | ||
143 | |||
144 | <h3 class="section-bar">Public Class methods</h3> | ||
145 | |||
146 | |||
147 | <div id="method-M000015" class="method-detail"> | ||
148 | <a name="M000015"></a> | ||
149 | |||
150 | <div class="method-heading"> | ||
151 | |||
152 | <a href="WebDocument.src/M000015.html" target="Code" class="method-signature" | ||
153 | onclick="popupCode('WebDocument.src/M000015.html');return false;"> | ||
154 | |||
155 | <span class="method-name">get_content</span><span class="method-args">(url)</span> | ||
156 | |||
157 | </a> | ||
158 | |||
159 | </div> | ||
160 | |||
161 | <div class="method-description"> | ||
162 | |||
163 | <p> | ||
164 | Returns the HTML text from the page of a given <tt>url</tt>. | ||
165 | </p> | ||
166 | |||
167 | </div> | ||
168 | </div> | ||
169 | |||
170 | |||
171 | <div id="method-M000016" class="method-detail"> | ||
172 | <a name="M000016"></a> | ||
173 | |||
174 | <div class="method-heading"> | ||
175 | |||
176 | <a href="WebDocument.src/M000016.html" target="Code" class="method-signature" | ||
177 | onclick="popupCode('WebDocument.src/M000016.html');return false;"> | ||
178 | |||
179 | <span class="method-name">new</span><span class="method-args">(url)</span> | ||
180 | |||
181 | </a> | ||
182 | |||
183 | </div> | ||
184 | |||
185 | <div class="method-description"> | ||
186 | |||
187 | <p> | ||
188 | <a href="WebDocument.html">WebDocument</a> constructor, the content of the | ||
189 | <a href="Document.html">Document</a> is the HTML page without the tags. | ||
190 | </p> | ||
191 | |||
192 | </div> | ||
193 | </div> | ||
194 | |||
195 | |||
196 | |||
197 | </div> | ||
198 | |||
199 | |||
200 | |||
201 | |||
202 | </div> | ||
203 | |||
204 | <div id="validator-badges"> | ||
205 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
206 | </div> | ||
207 | |||
208 | </body> | ||
209 | </html> | ||
210 |
doc/classes/Rir/WebDocument.src/M000015.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>get_content (Rir::WebDocument)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 105</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>) | ||
12 | <span class="ruby-identifier">require</span> <span class="ruby-value str">'net/http'</span> | ||
13 | <span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>(<span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span>(<span class="ruby-identifier">url</span>)) | ||
14 | <span class="ruby-keyword kw">end</span></pre> | ||
15 | </body> | ||
16 | </html> | ||
17 |
doc/classes/Rir/WebDocument.src/M000016.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>new (Rir::WebDocument)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 112</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">url</span>) | ||
12 | <span class="ruby-ivar">@url</span> = <span class="ruby-identifier">url</span> | ||
13 | <span class="ruby-keyword kw">super</span> <span class="ruby-constant">WebDocument</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>).<span class="ruby-identifier">strip_javascripts</span>.<span class="ruby-identifier">strip_stylesheets</span>.<span class="ruby-identifier">strip_xml_tags</span> | ||
14 | <span class="ruby-keyword kw">end</span></pre> | ||
15 | </body> | ||
16 | </html> | ||
17 |
doc/classes/Rir/WikipediaPage.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>Class: Rir::WikipediaPage [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="classHeader"> | ||
46 | <table class="header-table"> | ||
47 | <tr class="top-aligned-row"> | ||
48 | <td><strong>Class</strong></td> | ||
49 | <td class="class-name-in-header">Rir::WikipediaPage</td> | ||
50 | </tr> | ||
51 | <tr class="top-aligned-row"> | ||
52 | <td><strong>In:</strong></td> | ||
53 | <td> | ||
54 | |||
55 | |||
56 | <a href="../../files/lib/rir/document_rb.html"> | ||
57 | |||
58 | lib/rir/document.rb | ||
59 | |||
60 | </a> | ||
61 | |||
62 | |||
63 | <br /> | ||
64 | |||
65 | </td> | ||
66 | </tr> | ||
67 | |||
68 | |||
69 | <tr class="top-aligned-row"> | ||
70 | <td><strong>Parent:</strong></td> | ||
71 | <td> | ||
72 | |||
73 | <a href="WebDocument.html"> | ||
74 | |||
75 | Rir::WebDocument | ||
76 | |||
77 | </a> | ||
78 | |||
79 | </td> | ||
80 | </tr> | ||
81 | |||
82 | </table> | ||
83 | </div> | ||
84 | <!-- banner header --> | ||
85 | |||
86 | <div id="bodyContent"> | ||
87 | |||
88 | <div id="contextContent"> | ||
89 | |||
90 | <div id="description"> | ||
91 | <p> | ||
92 | A <a href="WikipediaPage.html">WikipediaPage</a> is a <a | ||
93 | href="WebDocument.html">WebDocument</a>. | ||
94 | </p> | ||
95 | |||
96 | </div> | ||
97 | |||
98 | </div> | ||
99 | |||
100 | |||
101 | </div> | ||
102 | |||
103 | <!-- if includes --> | ||
104 | |||
105 | <div id="section"> | ||
106 | |||
107 | |||
108 | |||
109 | |||
110 | <!-- if method_list --> | ||
111 | |||
112 | |||
113 | |||
114 | |||
115 | </div> | ||
116 | |||
117 | <div id="validator-badges"> | ||
118 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
119 | </div> | ||
120 | |||
121 | </body> | ||
122 | </html> | ||
123 |
doc/classes/String.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>Class: String [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="classHeader"> | ||
46 | <table class="header-table"> | ||
47 | <tr class="top-aligned-row"> | ||
48 | <td><strong>Class</strong></td> | ||
49 | <td class="class-name-in-header">String</td> | ||
50 | </tr> | ||
51 | <tr class="top-aligned-row"> | ||
52 | <td><strong>In:</strong></td> | ||
53 | <td> | ||
54 | |||
55 | |||
56 | <a href="../files/lib/rir/string_rb.html"> | ||
57 | |||
58 | lib/rir/string.rb | ||
59 | |||
60 | </a> | ||
61 | |||
62 | |||
63 | <br /> | ||
64 | |||
65 | </td> | ||
66 | </tr> | ||
67 | |||
68 | |||
69 | <tr class="top-aligned-row"> | ||
70 | <td><strong>Parent:</strong></td> | ||
71 | <td> | ||
72 | |||
73 | Object | ||
74 | |||
75 | </td> | ||
76 | </tr> | ||
77 | |||
78 | </table> | ||
79 | </div> | ||
80 | <!-- banner header --> | ||
81 | |||
82 | <div id="bodyContent"> | ||
83 | |||
84 | <div id="contextContent"> | ||
85 | |||
86 | <div id="description"> | ||
87 | <p> | ||
88 | Extention of the standard class <a href="String.html">String</a> with | ||
89 | useful function. | ||
90 | </p> | ||
91 | |||
92 | </div> | ||
93 | |||
94 | </div> | ||
95 | |||
96 | |||
97 | <div id="method-list"> | ||
98 | <h3 class="section-bar">Methods</h3> | ||
99 | |||
100 | <div class="name-list"> | ||
101 | |||
102 | <a href="#M000009">extract_xmltags_values</a> | ||
103 | |||
104 | <a href="#M000001">is_stopword?</a> | ||
105 | |||
106 | <a href="#M000002">remove_special_characters</a> | ||
107 | |||
108 | <a href="#M000006">strip_javascripts</a> | ||
109 | |||
110 | <a href="#M000005">strip_javascripts!</a> | ||
111 | |||
112 | <a href="#M000008">strip_stylesheets</a> | ||
113 | |||
114 | <a href="#M000007">strip_stylesheets!</a> | ||
115 | |||
116 | <a href="#M000004">strip_xml_tags</a> | ||
117 | |||
118 | <a href="#M000003">strip_xml_tags!</a> | ||
119 | |||
120 | </div> | ||
121 | </div> | ||
122 | |||
123 | </div> | ||
124 | |||
125 | <!-- if includes --> | ||
126 | |||
127 | <div id="includes"> | ||
128 | <h3 class="section-bar">Included Modules</h3> | ||
129 | |||
130 | <div id="includes-list"> | ||
131 | |||
132 | <span class="include-name"><a href="Rir.html">Rir</a></span> | ||
133 | |||
134 | </div> | ||
135 | </div> | ||
136 | |||
137 | <div id="section"> | ||
138 | |||
139 | |||
140 | |||
141 | |||
142 | <!-- if method_list --> | ||
143 | |||
144 | <div id="methods"> | ||
145 | |||
146 | <h3 class="section-bar">Public Instance methods</h3> | ||
147 | |||
148 | |||
149 | <div id="method-M000009" class="method-detail"> | ||
150 | <a name="M000009"></a> | ||
151 | |||
152 | <div class="method-heading"> | ||
153 | |||
154 | <a href="String.src/M000009.html" target="Code" class="method-signature" | ||
155 | onclick="popupCode('String.src/M000009.html');return false;"> | ||
156 | |||
157 | <span class="method-name">extract_xmltags_values</span><span class="method-args">(tag_name)</span> | ||
158 | |||
159 | </a> | ||
160 | |||
161 | </div> | ||
162 | |||
163 | <div class="method-description"> | ||
164 | |||
165 | <p> | ||
166 | Returns the text values inside all occurences of a XML tag in <tt>self</tt> | ||
167 | </p> | ||
168 | <pre> | ||
169 | s = "four-piece in <a href='#'>Indianapolis</a>, <a href='#'>Indiana</a> at the Murat Theatre" | ||
170 | s.extract_xmltags_values 'a' #=> ["Indianapolis", "Indiana"] | ||
171 | </pre> | ||
172 | |||
173 | </div> | ||
174 | </div> | ||
175 | |||
176 | |||
177 | <div id="method-M000001" class="method-detail"> | ||
178 | <a name="M000001"></a> | ||
179 | |||
180 | <div class="method-heading"> | ||
181 | |||
182 | <a href="String.src/M000001.html" target="Code" class="method-signature" | ||
183 | onclick="popupCode('String.src/M000001.html');return false;"> | ||
184 | |||
185 | <span class="method-name">is_stopword?</span><span class="method-args">()</span> | ||
186 | |||
187 | </a> | ||
188 | |||
189 | </div> | ||
190 | |||
191 | <div class="method-description"> | ||
192 | |||
193 | <p> | ||
194 | Returns <tt>true</tt> if <tt>self</tt> belongs to Rir::Stoplist, | ||
195 | <tt>false</tt> otherwise. | ||
196 | </p> | ||
197 | |||
198 | </div> | ||
199 | </div> | ||
200 | |||
201 | |||
202 | <div id="method-M000002" class="method-detail"> | ||
203 | <a name="M000002"></a> | ||
204 | |||
205 | <div class="method-heading"> | ||
206 | |||
207 | <a href="String.src/M000002.html" target="Code" class="method-signature" | ||
208 | onclick="popupCode('String.src/M000002.html');return false;"> | ||
209 | |||
210 | <span class="method-name">remove_special_characters</span><span class="method-args">()</span> | ||
211 | |||
212 | </a> | ||
213 | |||
214 | </div> | ||
215 | |||
216 | <div class="method-description"> | ||
217 | |||
218 | <p> | ||
219 | Do not use. TODO: rewamp. find why this function is here. | ||
220 | </p> | ||
221 | |||
222 | </div> | ||
223 | </div> | ||
224 | |||
225 | |||
226 | <div id="method-M000006" class="method-detail"> | ||
227 | <a name="M000006"></a> | ||
228 | |||
229 | <div class="method-heading"> | ||
230 | |||
231 | <a href="String.src/M000006.html" target="Code" class="method-signature" | ||
232 | onclick="popupCode('String.src/M000006.html');return false;"> | ||
233 | |||
234 | <span class="method-name">strip_javascripts</span><span class="method-args">()</span> | ||
235 | |||
236 | </a> | ||
237 | |||
238 | </div> | ||
239 | |||
240 | <div class="method-description"> | ||
241 | |||
242 | <p> | ||
243 | Removes all Javascript sources from <tt>self</tt>. | ||
244 | </p> | ||
245 | <pre> | ||
246 | s = "<script type='text/javascript'> | ||
247 | var skin='vector', | ||
248 | stylepath='http://bits.wikimedia.org/skins-1.5' | ||
249 | </script> | ||
250 | |||
251 | test" | ||
252 | s.strip_javascripts #=> "test" | ||
253 | </pre> | ||
254 | |||
255 | </div> | ||
256 | </div> | ||
257 | |||
258 | |||
259 | <div id="method-M000005" class="method-detail"> | ||
260 | <a name="M000005"></a> | ||
261 | |||
262 | <div class="method-heading"> | ||
263 | |||
264 | <a href="String.src/M000005.html" target="Code" class="method-signature" | ||
265 | onclick="popupCode('String.src/M000005.html');return false;"> | ||
266 | |||
267 | <span class="method-name">strip_javascripts!</span><span class="method-args">()</span> | ||
268 | |||
269 | </a> | ||
270 | |||
271 | </div> | ||
272 | |||
273 | <div class="method-description"> | ||
274 | |||
275 | <p> | ||
276 | Removes all Javascript sources from <tt>self</tt>. | ||
277 | </p> | ||
278 | <pre> | ||
279 | s = "<script type='text/javascript'> | ||
280 | var skin='vector', | ||
281 | stylepath='http://bits.wikimedia.org/skins-1.5' | ||
282 | </script> | ||
283 | |||
284 | test" | ||
285 | s.strip_javascripts! | ||
286 | s #=> "test" | ||
287 | </pre> | ||
288 | |||
289 | </div> | ||
290 | </div> | ||
291 | |||
292 | |||
293 | <div id="method-M000008" class="method-detail"> | ||
294 | <a name="M000008"></a> | ||
295 | |||
296 | <div class="method-heading"> | ||
297 | |||
298 | <a href="String.src/M000008.html" target="Code" class="method-signature" | ||
299 | onclick="popupCode('String.src/M000008.html');return false;"> | ||
300 | |||
301 | <span class="method-name">strip_stylesheets</span><span class="method-args">()</span> | ||
302 | |||
303 | </a> | ||
304 | |||
305 | </div> | ||
306 | |||
307 | <div class="method-description"> | ||
308 | |||
309 | </div> | ||
310 | </div> | ||
311 | |||
312 | |||
313 | <div id="method-M000007" class="method-detail"> | ||
314 | <a name="M000007"></a> | ||
315 | |||
316 | <div class="method-heading"> | ||
317 | |||
318 | <a href="String.src/M000007.html" target="Code" class="method-signature" | ||
319 | onclick="popupCode('String.src/M000007.html');return false;"> | ||
320 | |||
321 | <span class="method-name">strip_stylesheets!</span><span class="method-args">()</span> | ||
322 | |||
323 | </a> | ||
324 | |||
325 | </div> | ||
326 | |||
327 | <div class="method-description"> | ||
328 | |||
329 | </div> | ||
330 | </div> | ||
331 | |||
332 | |||
333 | <div id="method-M000004" class="method-detail"> | ||
334 | <a name="M000004"></a> | ||
335 | |||
336 | <div class="method-heading"> | ||
337 | |||
338 | <a href="String.src/M000004.html" target="Code" class="method-signature" | ||
339 | onclick="popupCode('String.src/M000004.html');return false;"> | ||
340 | |||
341 | <span class="method-name">strip_xml_tags</span><span class="method-args">()</span> | ||
342 | |||
343 | </a> | ||
344 | |||
345 | </div> | ||
346 | |||
347 | <div class="method-description"> | ||
348 | |||
349 | <p> | ||
350 | Removes all XML-like tags from <tt>self</tt>. | ||
351 | </p> | ||
352 | <pre> | ||
353 | s = "<html><body>test</body></html>" | ||
354 | s.strip_xml_tags #=> "test" | ||
355 | s #=> "<html><body>test</body></html>" | ||
356 | </pre> | ||
357 | |||
358 | </div> | ||
359 | </div> | ||
360 | |||
361 | |||
362 | <div id="method-M000003" class="method-detail"> | ||
363 | <a name="M000003"></a> | ||
364 | |||
365 | <div class="method-heading"> | ||
366 | |||
367 | <a href="String.src/M000003.html" target="Code" class="method-signature" | ||
368 | onclick="popupCode('String.src/M000003.html');return false;"> | ||
369 | |||
370 | <span class="method-name">strip_xml_tags!</span><span class="method-args">()</span> | ||
371 | |||
372 | </a> | ||
373 | |||
374 | </div> | ||
375 | |||
376 | <div class="method-description"> | ||
377 | |||
378 | <p> | ||
379 | Removes all XML-like tags from <tt>self</tt>. | ||
380 | </p> | ||
381 | <pre> | ||
382 | s = "<html><body>test</body></html>" | ||
383 | s.strip_xml_tags! | ||
384 | s #=> "test" | ||
385 | </pre> | ||
386 | |||
387 | </div> | ||
388 | </div> | ||
389 | |||
390 | |||
391 | |||
392 | </div> | ||
393 | |||
394 | |||
395 | |||
396 | |||
397 | </div> | ||
398 | |||
399 | <div id="validator-badges"> | ||
400 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
401 | </div> | ||
402 | |||
403 | </body> | ||
404 | </html> | ||
405 |
doc/classes/String.src/M000001.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>is_stopword? (String)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 77</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">is_stopword?</span> | ||
12 | <span class="ruby-constant">Stoplist</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">downcase</span>) | ||
13 | <span class="ruby-keyword kw">end</span></pre> | ||
14 | </body> | ||
15 | </html> | ||
16 |
doc/classes/String.src/M000002.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>remove_special_characters (String)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 83</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">remove_special_characters</span> | ||
12 | <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/\W/</span>,<span class="ruby-value str">' '</span>).<span class="ruby-identifier">split</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/\W/</span>,<span class="ruby-value str">' '</span>).<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/\A.\z/</span>, <span class="ruby-value str">''</span>)}.<span class="ruby-identifier">join</span>(<span class="ruby-value str">' '</span>).<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/\A.\z/</span>, <span class="ruby-value str">''</span>)}.<span class="ruby-identifier">join</span>(<span class="ruby-value str">' '</span>) | ||
13 | <span class="ruby-keyword kw">end</span></pre> | ||
14 | </body> | ||
15 | </html> | ||
16 |
doc/classes/String.src/M000003.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>strip_xml_tags! (String)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 92</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_xml_tags!</span> | ||
12 | <span class="ruby-identifier">replace</span> <span class="ruby-identifier">strip_with_pattern</span> <span class="ruby-operator">/</span><span class="ruby-operator"><</span>\<span class="ruby-regexp re">/?[^>]*>/</span> | ||
13 | <span class="ruby-keyword kw">end</span></pre> | ||
14 | </body> | ||
15 | </html> | ||
16 |
doc/classes/String.src/M000004.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>strip_xml_tags (String)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 101</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_xml_tags</span> | ||
12 | <span class="ruby-identifier">dup</span>.<span class="ruby-identifier">strip_xml_tags!</span> | ||
13 | <span class="ruby-keyword kw">end</span></pre> | ||
14 | </body> | ||
15 | </html> | ||
16 |
doc/classes/String.src/M000005.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>strip_javascripts! (String)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 115</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_javascripts!</span> | ||
12 | <span class="ruby-identifier">replace</span> <span class="ruby-identifier">strip_with_pattern</span> <span class="ruby-operator">/</span><span class="ruby-operator"><</span><span class="ruby-identifier">script</span> <span class="ruby-identifier">type</span>=<span class="ruby-value str">"text\/javascript"</span><span class="ruby-operator">></span>(.<span class="ruby-operator">+</span><span class="ruby-value">?)</span><span class="ruby-operator"><</span>\<span class="ruby-regexp re">/script>/</span><span class="ruby-identifier">m</span> | ||
13 | <span class="ruby-keyword kw">end</span></pre> | ||
14 | </body> | ||
15 | </html> | ||
16 |
doc/classes/String.src/M000006.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>strip_javascripts (String)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 128</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_javascripts</span> | ||
12 | <span class="ruby-identifier">dup</span>.<span class="ruby-identifier">strip_javascripts!</span> | ||
13 | <span class="ruby-keyword kw">end</span></pre> | ||
14 | </body> | ||
15 | </html> | ||
16 |
doc/classes/String.src/M000007.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>strip_stylesheets! (String)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 132</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_stylesheets!</span> | ||
12 | <span class="ruby-comment cmt"># TODO: rewamp. dunno what is it.</span> | ||
13 | <span class="ruby-identifier">replace</span> <span class="ruby-identifier">strip_with_pattern</span> <span class="ruby-operator">/</span><span class="ruby-operator"><</span><span class="ruby-identifier">style</span> <span class="ruby-identifier">type</span>=<span class="ruby-value str">"text\/css"</span><span class="ruby-operator">></span>(.<span class="ruby-operator">+</span><span class="ruby-value">?)</span><span class="ruby-operator"><</span>\<span class="ruby-regexp re">/style>/</span><span class="ruby-identifier">m</span> | ||
14 | <span class="ruby-keyword kw">end</span></pre> | ||
15 | </body> | ||
16 | </html> | ||
17 |
doc/classes/String.src/M000008.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>strip_stylesheets (String)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 137</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_stylesheets</span> | ||
12 | <span class="ruby-identifier">dup</span>.<span class="ruby-identifier">strip_stylesheets!</span> | ||
13 | <span class="ruby-keyword kw">end</span></pre> | ||
14 | </body> | ||
15 | </html> | ||
16 |
doc/classes/String.src/M000009.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>extract_xmltags_values (String)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 145</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">extract_xmltags_values</span>(<span class="ruby-identifier">tag_name</span>) | ||
12 | <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">scan</span>(<span class="ruby-node">/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/</span>).<span class="ruby-identifier">flatten</span> | ||
13 | <span class="ruby-keyword kw">end</span></pre> | ||
14 | </body> | ||
15 | </html> | ||
16 |
doc/created.rid
File was created | 1 | Fri, 05 Nov 2010 14:41:10 +0100 | |
2 |
doc/files/README_markdown.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>File: README.markdown [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="fileHeader"> | ||
46 | <h1>README.markdown</h1> | ||
47 | <table class="header-table"> | ||
48 | <tr class="top-aligned-row"> | ||
49 | <td><strong>Path:</strong></td> | ||
50 | <td>README.markdown | ||
51 | |||
52 | </td> | ||
53 | </tr> | ||
54 | <tr class="top-aligned-row"> | ||
55 | <td><strong>Last Update:</strong></td> | ||
56 | <td>2010-11-05 14:40:41 +0100</td> | ||
57 | </tr> | ||
58 | </table> | ||
59 | </div> | ||
60 | <!-- banner header --> | ||
61 | |||
62 | <div id="bodyContent"> | ||
63 | |||
64 | <div id="contextContent"> | ||
65 | |||
66 | </div> | ||
67 | |||
68 | |||
69 | </div> | ||
70 | |||
71 | <!-- if includes --> | ||
72 | |||
73 | <div id="section"> | ||
74 | |||
75 | |||
76 | |||
77 | |||
78 | <!-- if method_list --> | ||
79 | |||
80 | |||
81 | |||
82 | |||
83 | </div> | ||
84 | |||
85 | <div id="validator-badges"> | ||
86 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
87 | </div> | ||
88 | |||
89 | </body> | ||
90 | </html> | ||
91 |
doc/files/lib/rir/document_rb.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>File: document.rb [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="fileHeader"> | ||
46 | <h1>document.rb</h1> | ||
47 | <table class="header-table"> | ||
48 | <tr class="top-aligned-row"> | ||
49 | <td><strong>Path:</strong></td> | ||
50 | <td>lib/rir/document.rb | ||
51 | |||
52 | </td> | ||
53 | </tr> | ||
54 | <tr class="top-aligned-row"> | ||
55 | <td><strong>Last Update:</strong></td> | ||
56 | <td>2010-11-05 14:39:35 +0100</td> | ||
57 | </tr> | ||
58 | </table> | ||
59 | </div> | ||
60 | <!-- banner header --> | ||
61 | |||
62 | <div id="bodyContent"> | ||
63 | |||
64 | <div id="contextContent"> | ||
65 | |||
66 | <div id="description"> | ||
67 | <p> | ||
68 | This file is a part of an Information Retrieval oriented Ruby library | ||
69 | </p> | ||
70 | <p> | ||
71 | Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | ||
72 | </p> | ||
73 | <p> | ||
74 | This program is free software: you can redistribute it and/or modify it | ||
75 | under the terms of the GNU General Public License as published by the Free | ||
76 | Software Foundation, either version 3 of the License, or (at your option) | ||
77 | any later version. | ||
78 | </p> | ||
79 | <p> | ||
80 | This program is distributed in the hope that it will be useful, but WITHOUT | ||
81 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
82 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
83 | more details. | ||
84 | </p> | ||
85 | <p> | ||
86 | You should have received a copy of the GNU General Public License along | ||
87 | with this program. If not, see <<a | ||
88 | href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. | ||
89 | </p> | ||
90 | |||
91 | </div> | ||
92 | |||
93 | <div id="requires-list"> | ||
94 | <h3 class="section-bar">Required files</h3> | ||
95 | |||
96 | <div class="name-list"> | ||
97 | |||
98 | net/http | ||
99 | |||
100 | </div> | ||
101 | </div> | ||
102 | |||
103 | </div> | ||
104 | |||
105 | |||
106 | </div> | ||
107 | |||
108 | <!-- if includes --> | ||
109 | |||
110 | <div id="section"> | ||
111 | |||
112 | |||
113 | |||
114 | |||
115 | <!-- if method_list --> | ||
116 | |||
117 | |||
118 | |||
119 | |||
120 | </div> | ||
121 | |||
122 | <div id="validator-badges"> | ||
123 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
124 | </div> | ||
125 | |||
126 | </body> | ||
127 | </html> | ||
128 |
doc/files/lib/rir/string_rb.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>File: string.rb [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="fileHeader"> | ||
46 | <h1>string.rb</h1> | ||
47 | <table class="header-table"> | ||
48 | <tr class="top-aligned-row"> | ||
49 | <td><strong>Path:</strong></td> | ||
50 | <td>lib/rir/string.rb | ||
51 | |||
52 | </td> | ||
53 | </tr> | ||
54 | <tr class="top-aligned-row"> | ||
55 | <td><strong>Last Update:</strong></td> | ||
56 | <td>2010-11-05 14:39:35 +0100</td> | ||
57 | </tr> | ||
58 | </table> | ||
59 | </div> | ||
60 | <!-- banner header --> | ||
61 | |||
62 | <div id="bodyContent"> | ||
63 | |||
64 | <div id="contextContent"> | ||
65 | |||
66 | <div id="description"> | ||
67 | <p> | ||
68 | This file is a part of an Information Retrieval oriented Ruby library | ||
69 | </p> | ||
70 | <p> | ||
71 | Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | ||
72 | </p> | ||
73 | <p> | ||
74 | This program is free software: you can redistribute it and/or modify it | ||
75 | under the terms of the GNU General Public License as published by the Free | ||
76 | Software Foundation, either version 3 of the License, or (at your option) | ||
77 | any later version. | ||
78 | </p> | ||
79 | <p> | ||
80 | This program is distributed in the hope that it will be useful, but WITHOUT | ||
81 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
82 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
83 | more details. | ||
84 | </p> | ||
85 | <p> | ||
86 | You should have received a copy of the GNU General Public License along | ||
87 | with this program. If not, see <<a | ||
88 | href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. | ||
89 | </p> | ||
90 | |||
91 | </div> | ||
92 | |||
93 | <div id="requires-list"> | ||
94 | <h3 class="section-bar">Required files</h3> | ||
95 | |||
96 | <div class="name-list"> | ||
97 | |||
98 | cgi | ||
99 | |||
100 | kconv | ||
101 | |||
102 | </div> | ||
103 | </div> | ||
104 | |||
105 | </div> | ||
106 | |||
107 | |||
108 | </div> | ||
109 | |||
110 | <!-- if includes --> | ||
111 | |||
112 | <div id="section"> | ||
113 | |||
114 | |||
115 | |||
116 | |||
117 | <!-- if method_list --> | ||
118 | |||
119 | |||
120 | |||
121 | |||
122 | </div> | ||
123 | |||
124 | <div id="validator-badges"> | ||
125 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
126 | </div> | ||
127 | |||
128 | </body> | ||
129 | </html> | ||
130 |
doc/files/lib/rir_rb.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>File: rir.rb [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="fileHeader"> | ||
46 | <h1>rir.rb</h1> | ||
47 | <table class="header-table"> | ||
48 | <tr class="top-aligned-row"> | ||
49 | <td><strong>Path:</strong></td> | ||
50 | <td>lib/rir.rb | ||
51 | |||
52 | </td> | ||
53 | </tr> | ||
54 | <tr class="top-aligned-row"> | ||
55 | <td><strong>Last Update:</strong></td> | ||
56 | <td>2010-11-05 14:39:35 +0100</td> | ||
57 | </tr> | ||
58 | </table> | ||
59 | </div> | ||
60 | <!-- banner header --> | ||
61 | |||
62 | <div id="bodyContent"> | ||
63 | |||
64 | <div id="contextContent"> | ||
65 | |||
66 | <div id="requires-list"> | ||
67 | <h3 class="section-bar">Required files</h3> | ||
68 | |||
69 | <div class="name-list"> | ||
70 | |||
71 | rir/document | ||
72 | |||
73 | rir/string | ||
74 | |||
75 | </div> | ||
76 | </div> | ||
77 | |||
78 | </div> | ||
79 | |||
80 | |||
81 | </div> | ||
82 | |||
83 | <!-- if includes --> | ||
84 | |||
85 | <div id="section"> | ||
86 | |||
87 | |||
88 | |||
89 | |||
90 | <!-- if method_list --> | ||
91 | |||
92 | |||
93 | |||
94 | |||
95 | </div> | ||
96 | |||
97 | <div id="validator-badges"> | ||
98 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
99 | </div> | ||
100 | |||
101 | </body> | ||
102 | </html> | ||
103 |
doc/files/main_rb.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>File: main.rb [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="fileHeader"> | ||
46 | <h1>main.rb</h1> | ||
47 | <table class="header-table"> | ||
48 | <tr class="top-aligned-row"> | ||
49 | <td><strong>Path:</strong></td> | ||
50 | <td>main.rb | ||
51 | |||
52 | </td> | ||
53 | </tr> | ||
54 | <tr class="top-aligned-row"> | ||
55 | <td><strong>Last Update:</strong></td> | ||
56 | <td>2010-11-05 14:40:11 +0100</td> | ||
57 | </tr> | ||
58 | </table> | ||
59 | </div> | ||
60 | <!-- banner header --> | ||
61 | |||
62 | <div id="bodyContent"> | ||
63 | |||
64 | <div id="contextContent"> | ||
65 | |||
66 | <div id="requires-list"> | ||
67 | <h3 class="section-bar">Required files</h3> | ||
68 | |||
69 | <div class="name-list"> | ||
70 | |||
71 | rir | ||
72 | |||
73 | </div> | ||
74 | </div> | ||
75 | |||
76 | </div> | ||
77 | |||
78 | |||
79 | </div> | ||
80 | |||
81 | <!-- if includes --> | ||
82 | |||
83 | <div id="section"> | ||
84 | |||
85 | |||
86 | |||
87 | |||
88 | <!-- if method_list --> | ||
89 | |||
90 | |||
91 | |||
92 | |||
93 | </div> | ||
94 | |||
95 | <div id="validator-badges"> | ||
96 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
97 | </div> | ||
98 | |||
99 | </body> | ||
100 | </html> | ||
101 |
doc/fr_class_index.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <!-- | ||
5 | |||
6 | Classes [RDoc Documentation] | ||
7 | |||
8 | --> | ||
9 | <head> | ||
10 | <title>Classes [RDoc Documentation]</title> | ||
11 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
12 | <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> | ||
13 | <base target="docwin" /> | ||
14 | </head> | ||
15 | <body> | ||
16 | <div class="index"> | ||
17 | <h1 class="section-bar">Classes</h1> | ||
18 | <div id="index-entries"> | ||
19 | |||
20 | <a href="classes/Rir.html">Rir</a><br /> | ||
21 | |||
22 | <a href="classes/Rir/Document.html">Rir::Document</a><br /> | ||
23 | |||
24 | <a href="classes/Rir/WebDocument.html">Rir::WebDocument</a><br /> | ||
25 | |||
26 | <a href="classes/Rir/WikipediaPage.html">Rir::WikipediaPage</a><br /> | ||
27 | |||
28 | <a href="classes/String.html">String</a><br /> | ||
29 | |||
30 | </div> | ||
31 | </div> | ||
32 | </body> | ||
33 | </html> | ||
34 |
doc/fr_file_index.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <!-- | ||
5 | |||
6 | Files [RDoc Documentation] | ||
7 | |||
8 | --> | ||
9 | <head> | ||
10 | <title>Files [RDoc Documentation]</title> | ||
11 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
12 | <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> | ||
13 | <base target="docwin" /> | ||
14 | </head> | ||
15 | <body> | ||
16 | <div class="index"> | ||
17 | <h1 class="section-bar">Files</h1> | ||
18 | <div id="index-entries"> | ||
19 | |||
20 | <a href="files/README_markdown.html">README.markdown</a><br /> | ||
21 | |||
22 | <a href="files/lib/rir_rb.html">lib/rir.rb</a><br /> | ||
23 | |||
24 | <a href="files/lib/rir/document_rb.html">lib/rir/document.rb</a><br /> | ||
25 | |||
26 | <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br /> | ||
27 | |||
28 | <a href="files/main_rb.html">main.rb</a><br /> | ||
29 | |||
30 | </div> | ||
31 | </div> | ||
32 | </body> | ||
33 | </html> | ||
34 |
doc/fr_method_index.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <!-- | ||
5 | |||
6 | Methods [RDoc Documentation] | ||
7 | |||
8 | --> | ||
9 | <head> | ||
10 | <title>Methods [RDoc Documentation]</title> | ||
11 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
12 | <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> | ||
13 | <base target="docwin" /> | ||
14 | </head> | ||
15 | <body> | ||
16 | <div class="index"> | ||
17 | <h1 class="section-bar">Methods</h1> | ||
18 | <div id="index-entries"> | ||
19 | |||
20 | <a href="classes/Rir/Document.html#M000012">count_words (Rir::Document)</a><br /> | ||
21 | |||
22 | <a href="classes/Rir/Document.html#M000013">entropy (Rir::Document)</a><br /> | ||
23 | |||
24 | <a href="classes/String.html#M000009">extract_xmltags_values (String)</a><br /> | ||
25 | |||
26 | <a href="classes/Rir/Document.html#M000010">format_words (Rir::Document)</a><br /> | ||
27 | |||
28 | <a href="classes/Rir/WebDocument.html#M000015">get_content (Rir::WebDocument)</a><br /> | ||
29 | |||
30 | <a href="classes/String.html#M000001">is_stopword? (String)</a><br /> | ||
31 | |||
32 | <a href="classes/Rir/WebDocument.html#M000016">new (Rir::WebDocument)</a><br /> | ||
33 | |||
34 | <a href="classes/Rir/Document.html#M000014">new (Rir::Document)</a><br /> | ||
35 | |||
36 | <a href="classes/Rir/Document.html#M000011">ngrams (Rir::Document)</a><br /> | ||
37 | |||
38 | <a href="classes/String.html#M000002">remove_special_characters (String)</a><br /> | ||
39 | |||
40 | <a href="classes/String.html#M000006">strip_javascripts (String)</a><br /> | ||
41 | |||
42 | <a href="classes/String.html#M000005">strip_javascripts! (String)</a><br /> | ||
43 | |||
44 | <a href="classes/String.html#M000008">strip_stylesheets (String)</a><br /> | ||
45 | |||
46 | <a href="classes/String.html#M000007">strip_stylesheets! (String)</a><br /> | ||
47 | |||
48 | <a href="classes/String.html#M000004">strip_xml_tags (String)</a><br /> | ||
49 | |||
50 | <a href="classes/String.html#M000003">strip_xml_tags! (String)</a><br /> | ||
51 | |||
52 | </div> | ||
53 | </div> | ||
54 | </body> | ||
55 | </html> | ||
56 |
doc/index.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <!-- | ||
5 | |||
6 | RDoc Documentation | ||
7 | |||
8 | --> | ||
9 | <head> | ||
10 | <title>RDoc Documentation</title> | ||
11 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
12 | </head> | ||
13 | <frameset rows="20%, 80%"> | ||
14 | <frameset cols="25%,35%,45%"> | ||
15 | <frame src="fr_file_index.html" title="Files" name="Files" /> | ||
16 | <frame src="fr_class_index.html" name="Classes" /> | ||
17 | <frame src="fr_method_index.html" name="Methods" /> | ||
18 | </frameset> | ||
19 | <frame src="files/README_markdown.html" name="docwin" /> | ||
20 | </frameset> | ||
21 | </html> | ||
22 |
doc/rdoc-style.css
File was created | 1 | body { | |
2 | font-family: Verdana,Arial,Helvetica,sans-serif; | ||
3 | font-size: 90%; | ||
4 | margin: 0; | ||
5 | margin-left: 40px; | ||
6 | padding: 0; | ||
7 | background: white; | ||
8 | color: black; | ||
9 | } | ||
10 | |||
11 | h1, h2, h3, h4 { | ||
12 | margin: 0; | ||
13 | background: transparent; | ||
14 | } | ||
15 | |||
16 | h1 { | ||
17 | font-size: 150%; | ||
18 | } | ||
19 | |||
20 | h2,h3,h4 { | ||
21 | margin-top: 1em; | ||
22 | } | ||
23 | |||
24 | :link, :visited { | ||
25 | background: #eef; | ||
26 | color: #039; | ||
27 | text-decoration: none; | ||
28 | } | ||
29 | |||
30 | :link:hover, :visited:hover { | ||
31 | background: #039; | ||
32 | color: #eef; | ||
33 | } | ||
34 | |||
35 | /* Override the base stylesheet's Anchor inside a table cell */ | ||
36 | td > :link, td > :visited { | ||
37 | background: transparent; | ||
38 | color: #039; | ||
39 | text-decoration: none; | ||
40 | } | ||
41 | |||
42 | /* and inside a section title */ | ||
43 | .section-title > :link, .section-title > :visited { | ||
44 | background: transparent; | ||
45 | color: #eee; | ||
46 | text-decoration: none; | ||
47 | } | ||
48 | |||
49 | /* === Structural elements =================================== */ | ||
50 | |||
51 | .index { | ||
52 | margin: 0; | ||
53 | margin-left: -40px; | ||
54 | padding: 0; | ||
55 | font-size: 90%; | ||
56 | } | ||
57 | |||
58 | .index :link, .index :visited { | ||
59 | margin-left: 0.7em; | ||
60 | } | ||
61 | |||
62 | .index .section-bar { | ||
63 | margin-left: 0px; | ||
64 | padding-left: 0.7em; | ||
65 | background: #ccc; | ||
66 | font-size: small; | ||
67 | } | ||
68 | |||
69 | #classHeader, #fileHeader { | ||
70 | width: auto; | ||
71 | color: white; | ||
72 | padding: 0.5em 1.5em 0.5em 1.5em; | ||
73 | margin: 0; | ||
74 | margin-left: -40px; | ||
75 | border-bottom: 3px solid #006; | ||
76 | } | ||
77 | |||
78 | #classHeader :link, #fileHeader :link, | ||
79 | #classHeader :visited, #fileHeader :visited { | ||
80 | background: inherit; | ||
81 | color: white; | ||
82 | } | ||
83 | |||
84 | #classHeader td, #fileHeader td { | ||
85 | background: inherit; | ||
86 | color: white; | ||
87 | } | ||
88 | |||
89 | #fileHeader { | ||
90 | background: #057; | ||
91 | } | ||
92 | |||
93 | #classHeader { | ||
94 | background: #048; | ||
95 | } | ||
96 | |||
97 | .class-name-in-header { | ||
98 | font-size: 180%; | ||
99 | font-weight: bold; | ||
100 | } | ||
101 | |||
102 | #bodyContent { | ||
103 | padding: 0 1.5em 0 1.5em; | ||
104 | } | ||
105 | |||
106 | #description { | ||
107 | padding: 0.5em 1.5em; | ||
108 | background: #efefef; | ||
109 | border: 1px dotted #999; | ||
110 | } | ||
111 | |||
112 | #description h1, #description h2, #description h3, | ||
113 | #description h4, #description h5, #description h6 { | ||
114 | color: #125; | ||
115 | background: transparent; | ||
116 | } | ||
117 | |||
118 | #validator-badges { | ||
119 | text-align: center; | ||
120 | } | ||
121 | |||
122 | #validator-badges img { | ||
123 | border: 0; | ||
124 | } | ||
125 | |||
126 | #copyright { | ||
127 | color: #333; | ||
128 | background: #efefef; | ||
129 | font: 0.75em sans-serif; | ||
130 | margin-top: 5em; | ||
131 | margin-bottom: 0; | ||
132 | padding: 0.5em 2em; | ||
133 | } | ||
134 | |||
135 | /* === Classes =================================== */ | ||
136 | |||
137 | table.header-table { | ||
138 | color: white; | ||
139 | font-size: small; | ||
140 | } | ||
141 | |||
142 | .type-note { | ||
143 | font-size: small; | ||
144 | color: #dedede; | ||
145 | } | ||
146 | |||
147 | .section-bar { | ||
148 | color: #333; | ||
149 | border-bottom: 1px solid #999; | ||
150 | margin-left: -20px; | ||
151 | } | ||
152 | |||
153 | .section-title { | ||
154 | background: #79a; | ||
155 | color: #eee; | ||
156 | padding: 3px; | ||
157 | margin-top: 2em; | ||
158 | margin-left: -30px; | ||
159 | border: 1px solid #999; | ||
160 | } | ||
161 | |||
162 | .top-aligned-row { | ||
163 | vertical-align: top | ||
164 | } | ||
165 | |||
166 | .bottom-aligned-row { | ||
167 | vertical-align: bottom | ||
168 | } | ||
169 | |||
170 | #diagram img { | ||
171 | border: 0; | ||
172 | } | ||
173 | |||
174 | /* --- Context section classes ----------------------- */ | ||
175 | |||
176 | .context-row { } | ||
177 | |||
178 | .context-item-name { | ||
179 | font-family: monospace; | ||
180 | font-weight: bold; | ||
181 | color: black; | ||
182 | } | ||
183 | |||
184 | .context-item-value { | ||
185 | font-size: small; | ||
186 | color: #448; | ||
187 | } | ||
188 | |||
189 | .context-item-desc { | ||
190 | color: #333; | ||
191 | padding-left: 2em; | ||
192 | } | ||
193 | |||
194 | /* --- Method classes -------------------------- */ | ||
195 | |||
196 | .method-detail { | ||
197 | background: #efefef; | ||
198 | padding: 0; | ||
199 | margin-top: 0.5em; | ||
200 | margin-bottom: 1em; | ||
201 | border: 1px dotted #ccc; | ||
202 | } | ||
203 | |||
204 | .method-heading { | ||
205 | color: black; | ||
206 | background: #ccc; | ||
207 | border-bottom: 1px solid #666; | ||
208 | padding: 0.2em 0.5em 0 0.5em; | ||
209 | } | ||
210 | |||
211 | .method-signature { | ||
212 | color: black; | ||
213 | background: inherit; | ||
214 | } | ||
215 | |||
216 | .method-name { | ||
217 | font-weight: bold; | ||
218 | } | ||
219 | |||
220 | .method-args { | ||
221 | font-style: italic; | ||
222 | } | ||
223 | |||
224 | .method-description { | ||
225 | padding: 0 0.5em 0 0.5em; | ||
226 | } | ||
227 | |||
228 | /* --- Source code sections -------------------- */ | ||
229 | |||
230 | :link.source-toggle, :visited.source-toggle { | ||
231 | font-size: 90%; | ||
232 | } | ||
233 | |||
234 | div.method-source-code { | ||
235 | background: #262626; | ||
236 | color: #ffdead; | ||
237 | margin: 1em; | ||
238 | padding: 0.5em; | ||
239 | border: 1px dashed #999; | ||
240 | overflow: auto; | ||
241 | } | ||
242 | |||
243 | div.method-source-code pre { | ||
244 | color: #ffdead; | ||
245 | } | ||
246 | |||
247 | /* --- Ruby keyword styles --------------------- */ | ||
248 | |||
249 | .standalone-code { | ||
250 | background: #221111; | ||
251 | color: #ffdead; | ||
252 | overflow: auto; | ||
253 | } | ||
254 | |||
255 | .ruby-constant { | ||
256 | color: #7fffd4; | ||
257 | background: transparent; | ||
258 | } | ||
259 | |||
260 | .ruby-keyword { | ||
261 | color: #00ffff; | ||
262 | background: transparent; | ||
263 | } | ||
264 | |||
265 | .ruby-ivar { | ||
266 | color: #eedd82; | ||
267 | background: transparent; | ||
268 | } | ||
269 | |||
270 | .ruby-operator { | ||
271 | color: #00ffee; | ||
272 | background: transparent; | ||
273 | } | ||
274 | |||
275 | .ruby-identifier { | ||
276 | color: #ffdead; | ||
277 | background: transparent; | ||
278 | } | ||
279 | |||
280 | .ruby-node { | ||
281 | color: #ffa07a; | ||
282 | background: transparent; | ||
283 | } | ||
284 | |||
285 | .ruby-comment { | ||
286 | color: #b22222; | ||
287 | font-weight: bold; | ||
288 | background: transparent; | ||
289 | } | ||
290 | |||
291 | .ruby-regexp { | ||
292 | color: #ffa07a; | ||
293 | background: transparent; | ||
294 | } | ||
295 | |||
296 | .ruby-value { | ||
297 | color: #7fffd4; | ||
298 | background: transparent; | ||
299 | } | ||
300 |
lib/rir.rb
File was created | 1 | #!/usr/bin/env ruby | |
2 | |||
3 | require 'rir/document' | ||
4 | require 'rir/string' | ||
5 |
lib/rir/document.rb
File was created | 1 | #!/usr/bin/env ruby | |
2 | |||
3 | # This file is a part of an Information Retrieval oriented Ruby library | ||
4 | # | ||
5 | # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | ||
6 | # | ||
7 | # This program is free software: you can redistribute it and/or modify | ||
8 | # it under the terms of the GNU General Public License as published by | ||
9 | # the Free Software Foundation, either version 3 of the License, or | ||
10 | # (at your option) any later version. | ||
11 | # | ||
12 | # This program is distributed in the hope that it will be useful, | ||
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | # GNU General Public License for more details. | ||
16 | # | ||
17 | # You should have received a copy of the GNU General Public License | ||
18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | |||
20 | # General module for many purposes related to Information Retrieval. | ||
21 | module Rir | ||
22 | |||
23 | # A Document is a bag of words and is constructed from a string. | ||
24 | class Document | ||
25 | attr_reader :words, :doc_content | ||
26 | |||
27 | # Any non-word characters are removed from the words (see http://perldoc.perl.org/perlre.html | ||
28 | # and the \\W special escape). | ||
29 | # | ||
30 | # Protected function, only meant to by called at the initialization. | ||
31 | def format_words | ||
32 | wo = [] | ||
33 | |||
34 | @doc_content.split.each do |w| | ||
35 | w.split(/\W/).each do |sw| | ||
36 | wo.push(sw) if sw =~ /[a-zA-Z]/ | ||
37 | end | ||
38 | end | ||
39 | |||
40 | wo | ||
41 | end | ||
42 | |||
43 | # Returns an Array containing the +n+-grams (words) from the current Document. | ||
44 | # | ||
45 | # ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...] | ||
46 | def ngrams(n) | ||
47 | window = [] | ||
48 | ngrams_array = [] | ||
49 | |||
50 | @words.each do |w| | ||
51 | window.push(w) | ||
52 | if window.size == n | ||
53 | ngrams_array.push window.join(" ") | ||
54 | window.delete_at(0) | ||
55 | end | ||
56 | end | ||
57 | |||
58 | ngrams_array.uniq | ||
59 | end | ||
60 | |||
61 | # Returns a Hash containing the words and their associated counts in the current Document. | ||
62 | # | ||
63 | # count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } | ||
64 | def count_words | ||
65 | counts = Hash.new { |h,k| h[k] = 0 } | ||
66 | @words.each { |w| counts[w.downcase] += 1 } | ||
67 | |||
68 | counts | ||
69 | end | ||
70 | |||
71 | # Computes the entropy of a given string +s+ inside the document. | ||
72 | # | ||
73 | # If the string parameter is composed of many words (i.e. tokens separated | ||
74 | # by whitespace(s)), it is considered as an ngram. | ||
75 | # | ||
76 | # entropy("guitar") #=> 0.00389919463243839 | ||
77 | def entropy(s) | ||
78 | en = 0.0 | ||
79 | counts = self.count_words | ||
80 | |||
81 | s.split.each do |w| | ||
82 | p_wi = counts[w].to_f/@words.count.to_f | ||
83 | en += p_wi*Math.log2(p_wi) | ||
84 | end | ||
85 | |||
86 | en *= -1 | ||
87 | en | ||
88 | end | ||
89 | |||
90 | |||
91 | |||
92 | def initialize(content) | ||
93 | @doc_content = content | ||
94 | @words = format_words | ||
95 | end | ||
96 | |||
97 | protected :format_words | ||
98 | end | ||
99 | |||
100 | # A WebDocument is a Document with a +url+. | ||
101 | class WebDocument < Document | ||
102 | attr_reader :url | ||
103 | |||
104 | # Returns the HTML text from the page of a given +url+. | ||
105 | def self.get_content(url) | ||
106 | require 'net/http' | ||
107 | Net::HTTP.get(URI.parse(url)) | ||
108 | end | ||
109 | |||
110 | # WebDocument constructor, the content of the Document is the HTML page | ||
111 | # without the tags. | ||
112 | def initialize(url) | ||
113 | @url = url | ||
114 | super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags | ||
115 | end | ||
116 | end | ||
117 | |||
118 | # A WikipediaPage is a WebDocument. | ||
119 | class WikipediaPage < WebDocument | ||
120 | end | ||
121 | end | ||
122 |
lib/rir/string.rb
File was created | 1 | #!/usr/bin/env ruby | |
2 | |||
3 | # This file is a part of an Information Retrieval oriented Ruby library | ||
4 | # | ||
5 | # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | ||
6 | # | ||
7 | # This program is free software: you can redistribute it and/or modify | ||
8 | # it under the terms of the GNU General Public License as published by | ||
9 | # the Free Software Foundation, either version 3 of the License, or | ||
10 | # (at your option) any later version. | ||
11 | # | ||
12 | # This program is distributed in the hope that it will be useful, | ||
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | # GNU General Public License for more details. | ||
16 | # | ||
17 | # You should have received a copy of the GNU General Public License | ||
18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
19 | |||
20 | # General module for many purposes related to Information Retrieval. | ||
21 | module Rir | ||
22 | |||
23 | # These are the default stopwords provided by Lemur. | ||
24 | Stoplist = [ | ||
25 | "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", | ||
26 | "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", | ||
27 | "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", | ||
28 | "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", | ||
29 | "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", | ||
30 | "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", | ||
31 | "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", | ||
32 | "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", | ||
33 | "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", | ||
34 | "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", | ||
35 | "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", | ||
36 | "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", | ||
37 | "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", | ||
38 | "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", | ||
39 | "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", | ||
40 | "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", | ||
41 | "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", | ||
42 | "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", | ||
43 | "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", | ||
44 | "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", | ||
45 | "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", | ||
46 | "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", | ||
47 | "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", | ||
48 | "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", | ||
49 | "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", | ||
50 | "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", | ||
51 | "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", | ||
52 | "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", | ||
53 | "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", | ||
54 | "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", | ||
55 | "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", | ||
56 | "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", | ||
57 | "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", | ||
58 | "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", | ||
59 | "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", | ||
60 | "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", | ||
61 | "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", | ||
62 | "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", | ||
63 | "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", | ||
64 | "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", | ||
65 | "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", | ||
66 | "yours", "yourself", "yourselves" | ||
67 | ] | ||
68 | |||
69 | |||
70 | end | ||
71 | |||
72 | # Extention of the standard class String with useful function. | ||
73 | class String | ||
74 | include Rir | ||
75 | |||
76 | # Returns +true+ if +self+ belongs to Rir::Stoplist, +false+ otherwise. | ||
77 | def is_stopword? | ||
78 | Stoplist.include?(self.downcase) | ||
79 | end | ||
80 | |||
81 | # Do not use. | ||
82 | # TODO: rewamp. find why this function is here. | ||
83 | def remove_special_characters | ||
84 | self.split.collect { |w| w.gsub(/\W/,' ').split.collect { |w| w.gsub(/\W/,' ').strip.sub(/\A.\z/, '')}.join(' ').strip.sub(/\A.\z/, '')}.join(' ') | ||
85 | end | ||
86 | |||
87 | # Removes all XML-like tags from +self+. | ||
88 | # | ||
89 | # s = "<html><body>test</body></html>" | ||
90 | # s.strip_xml_tags! | ||
91 | # s #=> "test" | ||
92 | def strip_xml_tags! | ||
93 | replace strip_with_pattern /<\/?[^>]*>/ | ||
94 | end | ||
95 | |||
96 | # Removes all XML-like tags from +self+. | ||
97 | # | ||
98 | # s = "<html><body>test</body></html>" | ||
99 | # s.strip_xml_tags #=> "test" | ||
100 | # s #=> "<html><body>test</body></html>" | ||
101 | def strip_xml_tags | ||
102 | dup.strip_xml_tags! | ||
103 | end | ||
104 | |||
105 | # Removes all Javascript sources from +self+. | ||
106 | # | ||
107 | # s = "<script type='text/javascript'> | ||
108 | # var skin='vector', | ||
109 | # stylepath='http://bits.wikimedia.org/skins-1.5' | ||
110 | # </script> | ||
111 | # | ||
112 | # test" | ||
113 | # s.strip_javascripts! | ||
114 | # s #=> "test" | ||
115 | def strip_javascripts! | ||
116 | replace strip_with_pattern /<script type="text\/javascript">(.+?)<\/script>/m | ||
117 | end | ||
118 | |||
119 | # Removes all Javascript sources from +self+. | ||
120 | # | ||
121 | # s = "<script type='text/javascript'> | ||
122 | # var skin='vector', | ||
123 | # stylepath='http://bits.wikimedia.org/skins-1.5' | ||
124 | # </script> | ||
125 | # | ||
126 | # test" | ||
127 | # s.strip_javascripts #=> "test" | ||
128 | def strip_javascripts | ||
129 | dup.strip_javascripts! | ||
130 | end | ||
131 | |||
132 | def strip_stylesheets! | ||
133 | # TODO: rewamp. dunno what is it. | ||
134 | replace strip_with_pattern /<style type="text\/css">(.+?)<\/style>/m | ||
135 | end | ||
136 | |||
137 | def strip_stylesheets | ||
138 | dup.strip_stylesheets! | ||
139 | end | ||
140 | |||
141 | # Returns the text values inside all occurences of a XML tag in +self+ | ||
142 | # | ||
143 | # s = "four-piece in <a href='#'>Indianapolis</a>, <a href='#'>Indiana</a> at the Murat Theatre" | ||
144 | # s.extract_xmltags_values 'a' #=> ["Indianapolis", "Indiana"] | ||
145 | def extract_xmltags_values(tag_name) | ||
146 | self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten | ||
147 | end | ||
148 | |||
149 | private | ||
150 | def strip_with_pattern(pattern) | ||
151 | require 'cgi' | ||
152 | require 'kconv' | ||
153 | CGI::unescapeHTML(self.gsub(pattern,"")).toutf8 | ||
154 | end | ||
155 | end | ||
156 |
main.rb
File was created | 1 | $LOAD_PATH.unshift File.expand_path(File.join(File.dirname(__FILE__), "lib")) | |
2 | |||
3 | require 'rir' | ||
4 |