Commit 35f45ab54d40489d2fe5d4fc5d39c40290635dea
1 parent
7043da90bf
Exists in
master
changing the main module name, generating RDoc
Showing 22 changed files with 935 additions and 935 deletions Inline Diff
- doc/classes/RIR.html
- doc/classes/RIR/Document.html
- doc/classes/RIR/Document.src/M000010.html
- doc/classes/RIR/Document.src/M000011.html
- doc/classes/RIR/Document.src/M000012.html
- doc/classes/RIR/Document.src/M000013.html
- doc/classes/RIR/Document.src/M000014.html
- doc/classes/RIR/WebDocument.html
- doc/classes/RIR/WebDocument.src/M000015.html
- doc/classes/RIR/WebDocument.src/M000016.html
- doc/classes/RIR/WikipediaPage.html
- doc/classes/Rir.html
- doc/classes/Rir/Document.html
- doc/classes/Rir/Document.src/M000010.html
- doc/classes/Rir/Document.src/M000011.html
- doc/classes/Rir/Document.src/M000012.html
- doc/classes/Rir/Document.src/M000013.html
- doc/classes/Rir/Document.src/M000014.html
- doc/classes/Rir/WebDocument.html
- doc/classes/Rir/WebDocument.src/M000015.html
- doc/classes/Rir/WebDocument.src/M000016.html
- doc/classes/Rir/WikipediaPage.html
doc/classes/RIR.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>Module: RIR [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="classHeader"> | ||
46 | <table class="header-table"> | ||
47 | <tr class="top-aligned-row"> | ||
48 | <td><strong>Module</strong></td> | ||
49 | <td class="class-name-in-header">RIR</td> | ||
50 | </tr> | ||
51 | <tr class="top-aligned-row"> | ||
52 | <td><strong>In:</strong></td> | ||
53 | <td> | ||
54 | |||
55 | |||
56 | <a href="../files/lib/rir/string_rb.html"> | ||
57 | |||
58 | lib/rir/string.rb | ||
59 | |||
60 | </a> | ||
61 | |||
62 | |||
63 | <br /> | ||
64 | |||
65 | |||
66 | <a href="../files/lib/rir/document_rb.html"> | ||
67 | |||
68 | lib/rir/document.rb | ||
69 | |||
70 | </a> | ||
71 | |||
72 | |||
73 | <br /> | ||
74 | |||
75 | </td> | ||
76 | </tr> | ||
77 | |||
78 | |||
79 | </table> | ||
80 | </div> | ||
81 | <!-- banner header --> | ||
82 | |||
83 | <div id="bodyContent"> | ||
84 | |||
85 | <div id="contextContent"> | ||
86 | |||
87 | <div id="description"> | ||
88 | <p> | ||
89 | General module for many purposes related to Information Retrieval. | ||
90 | </p> | ||
91 | <hr size="1"></hr><p> | ||
92 | General module for many purposes related to Information Retrieval. | ||
93 | </p> | ||
94 | |||
95 | </div> | ||
96 | |||
97 | </div> | ||
98 | |||
99 | |||
100 | </div> | ||
101 | |||
102 | <!-- if includes --> | ||
103 | |||
104 | <div id="section"> | ||
105 | |||
106 | <div id="class-list"> | ||
107 | <h3 class="section-bar">Classes and Modules</h3> | ||
108 | |||
109 | Class <a href="RIR/Document.html" class="link">RIR::Document</a><br /> | ||
110 | Class <a href="RIR/WebDocument.html" class="link">RIR::WebDocument</a><br /> | ||
111 | Class <a href="RIR/WikipediaPage.html" class="link">RIR::WikipediaPage</a><br /> | ||
112 | |||
113 | </div> | ||
114 | |||
115 | <div id="constants-list"> | ||
116 | <h3 class="section-bar">Constants</h3> | ||
117 | |||
118 | <div class="name-list"> | ||
119 | <table summary="Constants"> | ||
120 | |||
121 | <tr class="top-aligned-row context-row"> | ||
122 | <td class="context-item-name">Stoplist</td> | ||
123 | <td>=</td> | ||
124 | <td class="context-item-value">[ "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", "yours", "yourself", "yourselves" ]</td> | ||
125 | |||
126 | <td> </td> | ||
127 | <td class="context-item-desc"> | ||
128 | These are the default stopwords provided by Lemur. | ||
129 | |||
130 | </td> | ||
131 | |||
132 | </tr> | ||
133 | |||
134 | </table> | ||
135 | </div> | ||
136 | </div> | ||
137 | |||
138 | |||
139 | |||
140 | |||
141 | <!-- if method_list --> | ||
142 | |||
143 | |||
144 | |||
145 | |||
146 | </div> | ||
147 | |||
148 | <div id="validator-badges"> | ||
149 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
150 | </div> | ||
151 | |||
152 | </body> | ||
153 | </html> | ||
154 |
doc/classes/RIR/Document.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>Class: RIR::Document [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="classHeader"> | ||
46 | <table class="header-table"> | ||
47 | <tr class="top-aligned-row"> | ||
48 | <td><strong>Class</strong></td> | ||
49 | <td class="class-name-in-header">RIR::Document</td> | ||
50 | </tr> | ||
51 | <tr class="top-aligned-row"> | ||
52 | <td><strong>In:</strong></td> | ||
53 | <td> | ||
54 | |||
55 | |||
56 | <a href="../../files/lib/rir/document_rb.html"> | ||
57 | |||
58 | lib/rir/document.rb | ||
59 | |||
60 | </a> | ||
61 | |||
62 | |||
63 | <br /> | ||
64 | |||
65 | </td> | ||
66 | </tr> | ||
67 | |||
68 | |||
69 | <tr class="top-aligned-row"> | ||
70 | <td><strong>Parent:</strong></td> | ||
71 | <td> | ||
72 | |||
73 | Object | ||
74 | |||
75 | </td> | ||
76 | </tr> | ||
77 | |||
78 | </table> | ||
79 | </div> | ||
80 | <!-- banner header --> | ||
81 | |||
82 | <div id="bodyContent"> | ||
83 | |||
84 | <div id="contextContent"> | ||
85 | |||
86 | <div id="description"> | ||
87 | <p> | ||
88 | A <a href="Document.html">Document</a> is a bag of words and is constructed | ||
89 | from a string. | ||
90 | </p> | ||
91 | |||
92 | </div> | ||
93 | |||
94 | </div> | ||
95 | |||
96 | |||
97 | <div id="method-list"> | ||
98 | <h3 class="section-bar">Methods</h3> | ||
99 | |||
100 | <div class="name-list"> | ||
101 | |||
102 | <a href="#M000012">count_words</a> | ||
103 | |||
104 | <a href="#M000013">entropy</a> | ||
105 | |||
106 | <a href="#M000010">format_words</a> | ||
107 | |||
108 | <a href="#M000014">new</a> | ||
109 | |||
110 | <a href="#M000011">ngrams</a> | ||
111 | |||
112 | </div> | ||
113 | </div> | ||
114 | |||
115 | </div> | ||
116 | |||
117 | <!-- if includes --> | ||
118 | |||
119 | <div id="section"> | ||
120 | |||
121 | |||
122 | |||
123 | <div id="attribute-list"> | ||
124 | <h3 class="section-bar">Attributes</h3> | ||
125 | |||
126 | <div class="name-list"> | ||
127 | <table> | ||
128 | |||
129 | <tr class="top-aligned-row context-row"> | ||
130 | <td class="context-item-name">doc_content</td> | ||
131 | |||
132 | <td class="context-item-value"> [R] </td> | ||
133 | |||
134 | <td class="context-item-desc"></td> | ||
135 | </tr> | ||
136 | |||
137 | <tr class="top-aligned-row context-row"> | ||
138 | <td class="context-item-name">words</td> | ||
139 | |||
140 | <td class="context-item-value"> [R] </td> | ||
141 | |||
142 | <td class="context-item-desc"></td> | ||
143 | </tr> | ||
144 | |||
145 | </table> | ||
146 | </div> | ||
147 | </div> | ||
148 | |||
149 | |||
150 | <!-- if method_list --> | ||
151 | |||
152 | <div id="methods"> | ||
153 | |||
154 | <h3 class="section-bar">Public Class methods</h3> | ||
155 | |||
156 | |||
157 | <div id="method-M000014" class="method-detail"> | ||
158 | <a name="M000014"></a> | ||
159 | |||
160 | <div class="method-heading"> | ||
161 | |||
162 | <a href="Document.src/M000014.html" target="Code" class="method-signature" | ||
163 | onclick="popupCode('Document.src/M000014.html');return false;"> | ||
164 | |||
165 | <span class="method-name">new</span><span class="method-args">(content)</span> | ||
166 | |||
167 | </a> | ||
168 | |||
169 | </div> | ||
170 | |||
171 | <div class="method-description"> | ||
172 | |||
173 | </div> | ||
174 | </div> | ||
175 | |||
176 | |||
177 | <h3 class="section-bar">Public Instance methods</h3> | ||
178 | |||
179 | |||
180 | <div id="method-M000012" class="method-detail"> | ||
181 | <a name="M000012"></a> | ||
182 | |||
183 | <div class="method-heading"> | ||
184 | |||
185 | <a href="Document.src/M000012.html" target="Code" class="method-signature" | ||
186 | onclick="popupCode('Document.src/M000012.html');return false;"> | ||
187 | |||
188 | <span class="method-name">count_words</span><span class="method-args">()</span> | ||
189 | |||
190 | </a> | ||
191 | |||
192 | </div> | ||
193 | |||
194 | <div class="method-description"> | ||
195 | |||
196 | <p> | ||
197 | Returns a Hash containing the words and their associated counts in the | ||
198 | current <a href="Document.html">Document</a>. | ||
199 | </p> | ||
200 | <pre> | ||
201 | count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } | ||
202 | </pre> | ||
203 | |||
204 | </div> | ||
205 | </div> | ||
206 | |||
207 | |||
208 | <div id="method-M000013" class="method-detail"> | ||
209 | <a name="M000013"></a> | ||
210 | |||
211 | <div class="method-heading"> | ||
212 | |||
213 | <a href="Document.src/M000013.html" target="Code" class="method-signature" | ||
214 | onclick="popupCode('Document.src/M000013.html');return false;"> | ||
215 | |||
216 | <span class="method-name">entropy</span><span class="method-args">(s)</span> | ||
217 | |||
218 | </a> | ||
219 | |||
220 | </div> | ||
221 | |||
222 | <div class="method-description"> | ||
223 | |||
224 | <p> | ||
225 | Computes the entropy of a given string <tt>s</tt> inside the document. | ||
226 | </p> | ||
227 | <p> | ||
228 | If the string parameter is composed of many words (i.e. tokens separated by | ||
229 | whitespace(s)), it is considered as an ngram. | ||
230 | </p> | ||
231 | <pre> | ||
232 | entropy("guitar") #=> 0.00389919463243839 | ||
233 | </pre> | ||
234 | |||
235 | </div> | ||
236 | </div> | ||
237 | |||
238 | |||
239 | <div id="method-M000011" class="method-detail"> | ||
240 | <a name="M000011"></a> | ||
241 | |||
242 | <div class="method-heading"> | ||
243 | |||
244 | <a href="Document.src/M000011.html" target="Code" class="method-signature" | ||
245 | onclick="popupCode('Document.src/M000011.html');return false;"> | ||
246 | |||
247 | <span class="method-name">ngrams</span><span class="method-args">(n)</span> | ||
248 | |||
249 | </a> | ||
250 | |||
251 | </div> | ||
252 | |||
253 | <div class="method-description"> | ||
254 | |||
255 | <p> | ||
256 | Returns an Array containing the <tt>n</tt>-grams (words) from the current | ||
257 | <a href="Document.html">Document</a>. | ||
258 | </p> | ||
259 | <pre> | ||
260 | ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...] | ||
261 | </pre> | ||
262 | |||
263 | </div> | ||
264 | </div> | ||
265 | |||
266 | |||
267 | <h3 class="section-bar">Protected Instance methods</h3> | ||
268 | |||
269 | |||
270 | <div id="method-M000010" class="method-detail"> | ||
271 | <a name="M000010"></a> | ||
272 | |||
273 | <div class="method-heading"> | ||
274 | |||
275 | <a href="Document.src/M000010.html" target="Code" class="method-signature" | ||
276 | onclick="popupCode('Document.src/M000010.html');return false;"> | ||
277 | |||
278 | <span class="method-name">format_words</span><span class="method-args">()</span> | ||
279 | |||
280 | </a> | ||
281 | |||
282 | </div> | ||
283 | |||
284 | <div class="method-description"> | ||
285 | |||
286 | <p> | ||
287 | Any non-word characters are removed from the words (see <a | ||
288 | href="http://perldoc.perl.org/perlre.html">perldoc.perl.org/perlre.html</a> | ||
289 | and the W special escape). | ||
290 | </p> | ||
291 | <p> | ||
292 | Protected function, only meant to by called at the initialization. | ||
293 | </p> | ||
294 | |||
295 | </div> | ||
296 | </div> | ||
297 | |||
298 | |||
299 | |||
300 | </div> | ||
301 | |||
302 | |||
303 | |||
304 | |||
305 | </div> | ||
306 | |||
307 | <div id="validator-badges"> | ||
308 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
309 | </div> | ||
310 | |||
311 | </body> | ||
312 | </html> | ||
313 |
doc/classes/RIR/Document.src/M000010.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>format_words (RIR::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 31</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">format_words</span> | ||
12 | <span class="ruby-identifier">wo</span> = [] | ||
13 | |||
14 | <span class="ruby-ivar">@doc_content</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | ||
15 | <span class="ruby-identifier">w</span>.<span class="ruby-identifier">split</span>(<span class="ruby-regexp re">/\W/</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sw</span><span class="ruby-operator">|</span> | ||
16 | <span class="ruby-identifier">wo</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">sw</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">sw</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[a-zA-Z]/</span> | ||
17 | <span class="ruby-keyword kw">end</span> | ||
18 | <span class="ruby-keyword kw">end</span> | ||
19 | |||
20 | <span class="ruby-identifier">wo</span> | ||
21 | <span class="ruby-keyword kw">end</span></pre> | ||
22 | </body> | ||
23 | </html> | ||
24 |
doc/classes/RIR/Document.src/M000011.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>ngrams (RIR::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 46</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">ngrams</span>(<span class="ruby-identifier">n</span>) | ||
12 | <span class="ruby-identifier">window</span> = [] | ||
13 | <span class="ruby-identifier">ngrams_array</span> = [] | ||
14 | |||
15 | <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | ||
16 | <span class="ruby-identifier">window</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">w</span>) | ||
17 | <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">n</span> | ||
18 | <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">" "</span>) | ||
19 | <span class="ruby-identifier">window</span>.<span class="ruby-identifier">delete_at</span>(<span class="ruby-value">0</span>) | ||
20 | <span class="ruby-keyword kw">end</span> | ||
21 | <span class="ruby-keyword kw">end</span> | ||
22 | |||
23 | <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">uniq</span> | ||
24 | <span class="ruby-keyword kw">end</span></pre> | ||
25 | </body> | ||
26 | </html> | ||
27 |
doc/classes/RIR/Document.src/M000012.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>count_words (RIR::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 64</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">count_words</span> | ||
12 | <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">h</span>,<span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">h</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-value">0</span> } | ||
13 | <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>.<span class="ruby-identifier">downcase</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> } | ||
14 | |||
15 | <span class="ruby-identifier">counts</span> | ||
16 | <span class="ruby-keyword kw">end</span></pre> | ||
17 | </body> | ||
18 | </html> | ||
19 |
doc/classes/RIR/Document.src/M000013.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>entropy (RIR::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 77</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">entropy</span>(<span class="ruby-identifier">s</span>) | ||
12 | <span class="ruby-identifier">en</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span> | ||
13 | <span class="ruby-identifier">counts</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span> | ||
14 | |||
15 | <span class="ruby-identifier">s</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | ||
16 | <span class="ruby-identifier">p_wi</span> = <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span> | ||
17 | <span class="ruby-identifier">en</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">p_wi</span><span class="ruby-operator">*</span><span class="ruby-constant">Math</span>.<span class="ruby-identifier">log2</span>(<span class="ruby-identifier">p_wi</span>) | ||
18 | <span class="ruby-keyword kw">end</span> | ||
19 | |||
20 | <span class="ruby-identifier">en</span> <span class="ruby-operator">*=</span> <span class="ruby-value">-1</span> | ||
21 | <span class="ruby-identifier">en</span> | ||
22 | <span class="ruby-keyword kw">end</span></pre> | ||
23 | </body> | ||
24 | </html> | ||
25 |
doc/classes/RIR/Document.src/M000014.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>new (RIR::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 92</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">content</span>) | ||
12 | <span class="ruby-ivar">@doc_content</span> = <span class="ruby-identifier">content</span> | ||
13 | <span class="ruby-ivar">@words</span> = <span class="ruby-identifier">format_words</span> | ||
14 | <span class="ruby-keyword kw">end</span></pre> | ||
15 | </body> | ||
16 | </html> | ||
17 |
doc/classes/RIR/WebDocument.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>Class: RIR::WebDocument [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="classHeader"> | ||
46 | <table class="header-table"> | ||
47 | <tr class="top-aligned-row"> | ||
48 | <td><strong>Class</strong></td> | ||
49 | <td class="class-name-in-header">RIR::WebDocument</td> | ||
50 | </tr> | ||
51 | <tr class="top-aligned-row"> | ||
52 | <td><strong>In:</strong></td> | ||
53 | <td> | ||
54 | |||
55 | |||
56 | <a href="../../files/lib/rir/document_rb.html"> | ||
57 | |||
58 | lib/rir/document.rb | ||
59 | |||
60 | </a> | ||
61 | |||
62 | |||
63 | <br /> | ||
64 | |||
65 | </td> | ||
66 | </tr> | ||
67 | |||
68 | |||
69 | <tr class="top-aligned-row"> | ||
70 | <td><strong>Parent:</strong></td> | ||
71 | <td> | ||
72 | |||
73 | <a href="Document.html"> | ||
74 | |||
75 | RIR::Document | ||
76 | |||
77 | </a> | ||
78 | |||
79 | </td> | ||
80 | </tr> | ||
81 | |||
82 | </table> | ||
83 | </div> | ||
84 | <!-- banner header --> | ||
85 | |||
86 | <div id="bodyContent"> | ||
87 | |||
88 | <div id="contextContent"> | ||
89 | |||
90 | <div id="description"> | ||
91 | <p> | ||
92 | A <a href="WebDocument.html">WebDocument</a> is a <a | ||
93 | href="Document.html">Document</a> with a <tt>url</tt>. | ||
94 | </p> | ||
95 | |||
96 | </div> | ||
97 | |||
98 | </div> | ||
99 | |||
100 | |||
101 | <div id="method-list"> | ||
102 | <h3 class="section-bar">Methods</h3> | ||
103 | |||
104 | <div class="name-list"> | ||
105 | |||
106 | <a href="#M000015">get_content</a> | ||
107 | |||
108 | <a href="#M000016">new</a> | ||
109 | |||
110 | </div> | ||
111 | </div> | ||
112 | |||
113 | </div> | ||
114 | |||
115 | <!-- if includes --> | ||
116 | |||
117 | <div id="section"> | ||
118 | |||
119 | |||
120 | |||
121 | <div id="attribute-list"> | ||
122 | <h3 class="section-bar">Attributes</h3> | ||
123 | |||
124 | <div class="name-list"> | ||
125 | <table> | ||
126 | |||
127 | <tr class="top-aligned-row context-row"> | ||
128 | <td class="context-item-name">url</td> | ||
129 | |||
130 | <td class="context-item-value"> [R] </td> | ||
131 | |||
132 | <td class="context-item-desc"></td> | ||
133 | </tr> | ||
134 | |||
135 | </table> | ||
136 | </div> | ||
137 | </div> | ||
138 | |||
139 | |||
140 | <!-- if method_list --> | ||
141 | |||
142 | <div id="methods"> | ||
143 | |||
144 | <h3 class="section-bar">Public Class methods</h3> | ||
145 | |||
146 | |||
147 | <div id="method-M000015" class="method-detail"> | ||
148 | <a name="M000015"></a> | ||
149 | |||
150 | <div class="method-heading"> | ||
151 | |||
152 | <a href="WebDocument.src/M000015.html" target="Code" class="method-signature" | ||
153 | onclick="popupCode('WebDocument.src/M000015.html');return false;"> | ||
154 | |||
155 | <span class="method-name">get_content</span><span class="method-args">(url)</span> | ||
156 | |||
157 | </a> | ||
158 | |||
159 | </div> | ||
160 | |||
161 | <div class="method-description"> | ||
162 | |||
163 | <p> | ||
164 | Returns the HTML text from the page of a given <tt>url</tt>. | ||
165 | </p> | ||
166 | |||
167 | </div> | ||
168 | </div> | ||
169 | |||
170 | |||
171 | <div id="method-M000016" class="method-detail"> | ||
172 | <a name="M000016"></a> | ||
173 | |||
174 | <div class="method-heading"> | ||
175 | |||
176 | <a href="WebDocument.src/M000016.html" target="Code" class="method-signature" | ||
177 | onclick="popupCode('WebDocument.src/M000016.html');return false;"> | ||
178 | |||
179 | <span class="method-name">new</span><span class="method-args">(url)</span> | ||
180 | |||
181 | </a> | ||
182 | |||
183 | </div> | ||
184 | |||
185 | <div class="method-description"> | ||
186 | |||
187 | <p> | ||
188 | <a href="WebDocument.html">WebDocument</a> constructor, the content of the | ||
189 | <a href="Document.html">Document</a> is the HTML page without the tags. | ||
190 | </p> | ||
191 | |||
192 | </div> | ||
193 | </div> | ||
194 | |||
195 | |||
196 | |||
197 | </div> | ||
198 | |||
199 | |||
200 | |||
201 | |||
202 | </div> | ||
203 | |||
204 | <div id="validator-badges"> | ||
205 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
206 | </div> | ||
207 | |||
208 | </body> | ||
209 | </html> | ||
210 |
doc/classes/RIR/WebDocument.src/M000015.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>get_content (RIR::WebDocument)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 105</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>) | ||
12 | <span class="ruby-identifier">require</span> <span class="ruby-value str">'net/http'</span> | ||
13 | <span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>(<span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span>(<span class="ruby-identifier">url</span>)) | ||
14 | <span class="ruby-keyword kw">end</span></pre> | ||
15 | </body> | ||
16 | </html> | ||
17 |
doc/classes/RIR/WebDocument.src/M000016.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>new (RIR::WebDocument)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 112</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">url</span>) | ||
12 | <span class="ruby-ivar">@url</span> = <span class="ruby-identifier">url</span> | ||
13 | <span class="ruby-keyword kw">super</span> <span class="ruby-constant">WebDocument</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>).<span class="ruby-identifier">strip_javascripts</span>.<span class="ruby-identifier">strip_stylesheets</span>.<span class="ruby-identifier">strip_xml_tags</span> | ||
14 | <span class="ruby-keyword kw">end</span></pre> | ||
15 | </body> | ||
16 | </html> | ||
17 |
doc/classes/RIR/WikipediaPage.html
File was created | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>Class: RIR::WikipediaPage [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="classHeader"> | ||
46 | <table class="header-table"> | ||
47 | <tr class="top-aligned-row"> | ||
48 | <td><strong>Class</strong></td> | ||
49 | <td class="class-name-in-header">RIR::WikipediaPage</td> | ||
50 | </tr> | ||
51 | <tr class="top-aligned-row"> | ||
52 | <td><strong>In:</strong></td> | ||
53 | <td> | ||
54 | |||
55 | |||
56 | <a href="../../files/lib/rir/document_rb.html"> | ||
57 | |||
58 | lib/rir/document.rb | ||
59 | |||
60 | </a> | ||
61 | |||
62 | |||
63 | <br /> | ||
64 | |||
65 | </td> | ||
66 | </tr> | ||
67 | |||
68 | |||
69 | <tr class="top-aligned-row"> | ||
70 | <td><strong>Parent:</strong></td> | ||
71 | <td> | ||
72 | |||
73 | <a href="WebDocument.html"> | ||
74 | |||
75 | RIR::WebDocument | ||
76 | |||
77 | </a> | ||
78 | |||
79 | </td> | ||
80 | </tr> | ||
81 | |||
82 | </table> | ||
83 | </div> | ||
84 | <!-- banner header --> | ||
85 | |||
86 | <div id="bodyContent"> | ||
87 | |||
88 | <div id="contextContent"> | ||
89 | |||
90 | <div id="description"> | ||
91 | <p> | ||
92 | A <a href="WikipediaPage.html">WikipediaPage</a> is a <a | ||
93 | href="WebDocument.html">WebDocument</a>. | ||
94 | </p> | ||
95 | |||
96 | </div> | ||
97 | |||
98 | </div> | ||
99 | |||
100 | |||
101 | </div> | ||
102 | |||
103 | <!-- if includes --> | ||
104 | |||
105 | <div id="section"> | ||
106 | |||
107 | |||
108 | |||
109 | |||
110 | <!-- if method_list --> | ||
111 | |||
112 | |||
113 | |||
114 | |||
115 | </div> | ||
116 | |||
117 | <div id="validator-badges"> | ||
118 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
119 | </div> | ||
120 | |||
121 | </body> | ||
122 | </html> | ||
123 |
doc/classes/Rir.html
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | File was deleted | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>Module: Rir [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="classHeader"> | ||
46 | <table class="header-table"> | ||
47 | <tr class="top-aligned-row"> | ||
48 | <td><strong>Module</strong></td> | ||
49 | <td class="class-name-in-header">Rir</td> | ||
50 | </tr> | ||
51 | <tr class="top-aligned-row"> | ||
52 | <td><strong>In:</strong></td> | ||
53 | <td> | ||
54 | |||
55 | |||
56 | <a href="../files/lib/rir/string_rb.html"> | ||
57 | |||
58 | lib/rir/string.rb | ||
59 | |||
60 | </a> | ||
61 | |||
62 | |||
63 | <br /> | ||
64 | |||
65 | |||
66 | <a href="../files/lib/rir/document_rb.html"> | ||
67 | |||
68 | lib/rir/document.rb | ||
69 | |||
70 | </a> | ||
71 | |||
72 | |||
73 | <br /> | ||
74 | |||
75 | </td> | ||
76 | </tr> | ||
77 | |||
78 | |||
79 | </table> | ||
80 | </div> | ||
81 | <!-- banner header --> | ||
82 | |||
83 | <div id="bodyContent"> | ||
84 | |||
85 | <div id="contextContent"> | ||
86 | |||
87 | <div id="description"> | ||
88 | <p> | ||
89 | General module for many purposes related to Information Retrieval. | ||
90 | </p> | ||
91 | <hr size="1"></hr><p> | ||
92 | General module for many purposes related to Information Retrieval. | ||
93 | </p> | ||
94 | |||
95 | </div> | ||
96 | |||
97 | </div> | ||
98 | |||
99 | |||
100 | </div> | ||
101 | |||
102 | <!-- if includes --> | ||
103 | |||
104 | <div id="section"> | ||
105 | |||
106 | <div id="class-list"> | ||
107 | <h3 class="section-bar">Classes and Modules</h3> | ||
108 | |||
109 | Class <a href="Rir/Document.html" class="link">Rir::Document</a><br /> | ||
110 | Class <a href="Rir/WebDocument.html" class="link">Rir::WebDocument</a><br /> | ||
111 | Class <a href="Rir/WikipediaPage.html" class="link">Rir::WikipediaPage</a><br /> | ||
112 | |||
113 | </div> | ||
114 | |||
115 | <div id="constants-list"> | ||
116 | <h3 class="section-bar">Constants</h3> | ||
117 | |||
118 | <div class="name-list"> | ||
119 | <table summary="Constants"> | ||
120 | |||
121 | <tr class="top-aligned-row context-row"> | ||
122 | <td class="context-item-name">Stoplist</td> | ||
123 | <td>=</td> | ||
124 | <td class="context-item-value">[ "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", "yours", "yourself", "yourselves" ]</td> | ||
125 | |||
126 | <td> </td> | ||
127 | <td class="context-item-desc"> | ||
128 | These are the default stopwords provided by Lemur. | ||
129 | |||
130 | </td> | ||
131 | |||
132 | </tr> | ||
133 | |||
134 | </table> | ||
135 | </div> | ||
136 | </div> | ||
137 | |||
138 | |||
139 | |||
140 | |||
141 | <!-- if method_list --> | ||
142 | |||
143 | |||
144 | |||
145 | |||
146 | </div> | ||
147 | |||
148 | <div id="validator-badges"> | ||
149 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
150 | </div> | ||
151 | |||
152 | </body> | ||
153 | </html> | ||
154 | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
doc/classes/Rir/Document.html
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | File was deleted | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>Class: Rir::Document [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="classHeader"> | ||
46 | <table class="header-table"> | ||
47 | <tr class="top-aligned-row"> | ||
48 | <td><strong>Class</strong></td> | ||
49 | <td class="class-name-in-header">Rir::Document</td> | ||
50 | </tr> | ||
51 | <tr class="top-aligned-row"> | ||
52 | <td><strong>In:</strong></td> | ||
53 | <td> | ||
54 | |||
55 | |||
56 | <a href="../../files/lib/rir/document_rb.html"> | ||
57 | |||
58 | lib/rir/document.rb | ||
59 | |||
60 | </a> | ||
61 | |||
62 | |||
63 | <br /> | ||
64 | |||
65 | </td> | ||
66 | </tr> | ||
67 | |||
68 | |||
69 | <tr class="top-aligned-row"> | ||
70 | <td><strong>Parent:</strong></td> | ||
71 | <td> | ||
72 | |||
73 | Object | ||
74 | |||
75 | </td> | ||
76 | </tr> | ||
77 | |||
78 | </table> | ||
79 | </div> | ||
80 | <!-- banner header --> | ||
81 | |||
82 | <div id="bodyContent"> | ||
83 | |||
84 | <div id="contextContent"> | ||
85 | |||
86 | <div id="description"> | ||
87 | <p> | ||
88 | A <a href="Document.html">Document</a> is a bag of words and is constructed | ||
89 | from a string. | ||
90 | </p> | ||
91 | |||
92 | </div> | ||
93 | |||
94 | </div> | ||
95 | |||
96 | |||
97 | <div id="method-list"> | ||
98 | <h3 class="section-bar">Methods</h3> | ||
99 | |||
100 | <div class="name-list"> | ||
101 | |||
102 | <a href="#M000012">count_words</a> | ||
103 | |||
104 | <a href="#M000013">entropy</a> | ||
105 | |||
106 | <a href="#M000010">format_words</a> | ||
107 | |||
108 | <a href="#M000014">new</a> | ||
109 | |||
110 | <a href="#M000011">ngrams</a> | ||
111 | |||
112 | </div> | ||
113 | </div> | ||
114 | |||
115 | </div> | ||
116 | |||
117 | <!-- if includes --> | ||
118 | |||
119 | <div id="section"> | ||
120 | |||
121 | |||
122 | |||
123 | <div id="attribute-list"> | ||
124 | <h3 class="section-bar">Attributes</h3> | ||
125 | |||
126 | <div class="name-list"> | ||
127 | <table> | ||
128 | |||
129 | <tr class="top-aligned-row context-row"> | ||
130 | <td class="context-item-name">doc_content</td> | ||
131 | |||
132 | <td class="context-item-value"> [R] </td> | ||
133 | |||
134 | <td class="context-item-desc"></td> | ||
135 | </tr> | ||
136 | |||
137 | <tr class="top-aligned-row context-row"> | ||
138 | <td class="context-item-name">words</td> | ||
139 | |||
140 | <td class="context-item-value"> [R] </td> | ||
141 | |||
142 | <td class="context-item-desc"></td> | ||
143 | </tr> | ||
144 | |||
145 | </table> | ||
146 | </div> | ||
147 | </div> | ||
148 | |||
149 | |||
150 | <!-- if method_list --> | ||
151 | |||
152 | <div id="methods"> | ||
153 | |||
154 | <h3 class="section-bar">Public Class methods</h3> | ||
155 | |||
156 | |||
157 | <div id="method-M000014" class="method-detail"> | ||
158 | <a name="M000014"></a> | ||
159 | |||
160 | <div class="method-heading"> | ||
161 | |||
162 | <a href="Document.src/M000014.html" target="Code" class="method-signature" | ||
163 | onclick="popupCode('Document.src/M000014.html');return false;"> | ||
164 | |||
165 | <span class="method-name">new</span><span class="method-args">(content)</span> | ||
166 | |||
167 | </a> | ||
168 | |||
169 | </div> | ||
170 | |||
171 | <div class="method-description"> | ||
172 | |||
173 | </div> | ||
174 | </div> | ||
175 | |||
176 | |||
177 | <h3 class="section-bar">Public Instance methods</h3> | ||
178 | |||
179 | |||
180 | <div id="method-M000012" class="method-detail"> | ||
181 | <a name="M000012"></a> | ||
182 | |||
183 | <div class="method-heading"> | ||
184 | |||
185 | <a href="Document.src/M000012.html" target="Code" class="method-signature" | ||
186 | onclick="popupCode('Document.src/M000012.html');return false;"> | ||
187 | |||
188 | <span class="method-name">count_words</span><span class="method-args">()</span> | ||
189 | |||
190 | </a> | ||
191 | |||
192 | </div> | ||
193 | |||
194 | <div class="method-description"> | ||
195 | |||
196 | <p> | ||
197 | Returns a Hash containing the words and their associated counts in the | ||
198 | current <a href="Document.html">Document</a>. | ||
199 | </p> | ||
200 | <pre> | ||
201 | count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } | ||
202 | </pre> | ||
203 | |||
204 | </div> | ||
205 | </div> | ||
206 | |||
207 | |||
208 | <div id="method-M000013" class="method-detail"> | ||
209 | <a name="M000013"></a> | ||
210 | |||
211 | <div class="method-heading"> | ||
212 | |||
213 | <a href="Document.src/M000013.html" target="Code" class="method-signature" | ||
214 | onclick="popupCode('Document.src/M000013.html');return false;"> | ||
215 | |||
216 | <span class="method-name">entropy</span><span class="method-args">(s)</span> | ||
217 | |||
218 | </a> | ||
219 | |||
220 | </div> | ||
221 | |||
222 | <div class="method-description"> | ||
223 | |||
224 | <p> | ||
225 | Computes the entropy of a given string <tt>s</tt> inside the document. | ||
226 | </p> | ||
227 | <p> | ||
228 | If the string parameter is composed of many words (i.e. tokens separated by | ||
229 | whitespace(s)), it is considered as an ngram. | ||
230 | </p> | ||
231 | <pre> | ||
232 | entropy("guitar") #=> 0.00389919463243839 | ||
233 | </pre> | ||
234 | |||
235 | </div> | ||
236 | </div> | ||
237 | |||
238 | |||
239 | <div id="method-M000011" class="method-detail"> | ||
240 | <a name="M000011"></a> | ||
241 | |||
242 | <div class="method-heading"> | ||
243 | |||
244 | <a href="Document.src/M000011.html" target="Code" class="method-signature" | ||
245 | onclick="popupCode('Document.src/M000011.html');return false;"> | ||
246 | |||
247 | <span class="method-name">ngrams</span><span class="method-args">(n)</span> | ||
248 | |||
249 | </a> | ||
250 | |||
251 | </div> | ||
252 | |||
253 | <div class="method-description"> | ||
254 | |||
255 | <p> | ||
256 | Returns an Array containing the <tt>n</tt>-grams (words) from the current | ||
257 | <a href="Document.html">Document</a>. | ||
258 | </p> | ||
259 | <pre> | ||
260 | ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...] | ||
261 | </pre> | ||
262 | |||
263 | </div> | ||
264 | </div> | ||
265 | |||
266 | |||
267 | <h3 class="section-bar">Protected Instance methods</h3> | ||
268 | |||
269 | |||
270 | <div id="method-M000010" class="method-detail"> | ||
271 | <a name="M000010"></a> | ||
272 | |||
273 | <div class="method-heading"> | ||
274 | |||
275 | <a href="Document.src/M000010.html" target="Code" class="method-signature" | ||
276 | onclick="popupCode('Document.src/M000010.html');return false;"> | ||
277 | |||
278 | <span class="method-name">format_words</span><span class="method-args">()</span> | ||
279 | |||
280 | </a> | ||
281 | |||
282 | </div> | ||
283 | |||
284 | <div class="method-description"> | ||
285 | |||
286 | <p> | ||
287 | Any non-word characters are removed from the words (see <a | ||
288 | href="http://perldoc.perl.org/perlre.html">perldoc.perl.org/perlre.html</a> | ||
289 | and the W special escape). | ||
290 | </p> | ||
291 | <p> | ||
292 | Protected function, only meant to by called at the initialization. | ||
293 | </p> | ||
294 | |||
295 | </div> | ||
296 | </div> | ||
297 | |||
298 | |||
299 | |||
300 | </div> | ||
301 | |||
302 | |||
303 | |||
304 | |||
305 | </div> | ||
306 | |||
307 | <div id="validator-badges"> | ||
308 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
309 | </div> | ||
310 | |||
311 | </body> | ||
312 | </html> | ||
313 | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
doc/classes/Rir/Document.src/M000010.html
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | File was deleted | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>format_words (Rir::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 31</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">format_words</span> | ||
12 | <span class="ruby-identifier">wo</span> = [] | ||
13 | |||
14 | <span class="ruby-ivar">@doc_content</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | ||
15 | <span class="ruby-identifier">w</span>.<span class="ruby-identifier">split</span>(<span class="ruby-regexp re">/\W/</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sw</span><span class="ruby-operator">|</span> | ||
16 | <span class="ruby-identifier">wo</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">sw</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">sw</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[a-zA-Z]/</span> | ||
17 | <span class="ruby-keyword kw">end</span> | ||
18 | <span class="ruby-keyword kw">end</span> | ||
19 | |||
20 | <span class="ruby-identifier">wo</span> | ||
21 | <span class="ruby-keyword kw">end</span></pre> | ||
22 | </body> | ||
23 | </html> | ||
24 | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
doc/classes/Rir/Document.src/M000011.html
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | File was deleted | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>ngrams (Rir::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 46</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">ngrams</span>(<span class="ruby-identifier">n</span>) | ||
12 | <span class="ruby-identifier">window</span> = [] | ||
13 | <span class="ruby-identifier">ngrams_array</span> = [] | ||
14 | |||
15 | <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | ||
16 | <span class="ruby-identifier">window</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">w</span>) | ||
17 | <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">n</span> | ||
18 | <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">" "</span>) | ||
19 | <span class="ruby-identifier">window</span>.<span class="ruby-identifier">delete_at</span>(<span class="ruby-value">0</span>) | ||
20 | <span class="ruby-keyword kw">end</span> | ||
21 | <span class="ruby-keyword kw">end</span> | ||
22 | |||
23 | <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">uniq</span> | ||
24 | <span class="ruby-keyword kw">end</span></pre> | ||
25 | </body> | ||
26 | </html> | ||
27 | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
doc/classes/Rir/Document.src/M000012.html
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | File was deleted | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>count_words (Rir::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 64</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">count_words</span> | ||
12 | <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">h</span>,<span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">h</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-value">0</span> } | ||
13 | <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>.<span class="ruby-identifier">downcase</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> } | ||
14 | |||
15 | <span class="ruby-identifier">counts</span> | ||
16 | <span class="ruby-keyword kw">end</span></pre> | ||
17 | </body> | ||
18 | </html> | ||
19 | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
doc/classes/Rir/Document.src/M000013.html
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | File was deleted | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>entropy (Rir::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 77</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">entropy</span>(<span class="ruby-identifier">s</span>) | ||
12 | <span class="ruby-identifier">en</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span> | ||
13 | <span class="ruby-identifier">counts</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span> | ||
14 | |||
15 | <span class="ruby-identifier">s</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | ||
16 | <span class="ruby-identifier">p_wi</span> = <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span> | ||
17 | <span class="ruby-identifier">en</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">p_wi</span><span class="ruby-operator">*</span><span class="ruby-constant">Math</span>.<span class="ruby-identifier">log2</span>(<span class="ruby-identifier">p_wi</span>) | ||
18 | <span class="ruby-keyword kw">end</span> | ||
19 | |||
20 | <span class="ruby-identifier">en</span> <span class="ruby-operator">*=</span> <span class="ruby-value">-1</span> | ||
21 | <span class="ruby-identifier">en</span> | ||
22 | <span class="ruby-keyword kw">end</span></pre> | ||
23 | </body> | ||
24 | </html> | ||
25 | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
doc/classes/Rir/Document.src/M000014.html
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | File was deleted | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>new (Rir::Document)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 92</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">content</span>) | ||
12 | <span class="ruby-ivar">@doc_content</span> = <span class="ruby-identifier">content</span> | ||
13 | <span class="ruby-ivar">@words</span> = <span class="ruby-identifier">format_words</span> | ||
14 | <span class="ruby-keyword kw">end</span></pre> | ||
15 | </body> | ||
16 | </html> | ||
17 | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
doc/classes/Rir/WebDocument.html
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | File was deleted | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>Class: Rir::WebDocument [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="classHeader"> | ||
46 | <table class="header-table"> | ||
47 | <tr class="top-aligned-row"> | ||
48 | <td><strong>Class</strong></td> | ||
49 | <td class="class-name-in-header">Rir::WebDocument</td> | ||
50 | </tr> | ||
51 | <tr class="top-aligned-row"> | ||
52 | <td><strong>In:</strong></td> | ||
53 | <td> | ||
54 | |||
55 | |||
56 | <a href="../../files/lib/rir/document_rb.html"> | ||
57 | |||
58 | lib/rir/document.rb | ||
59 | |||
60 | </a> | ||
61 | |||
62 | |||
63 | <br /> | ||
64 | |||
65 | </td> | ||
66 | </tr> | ||
67 | |||
68 | |||
69 | <tr class="top-aligned-row"> | ||
70 | <td><strong>Parent:</strong></td> | ||
71 | <td> | ||
72 | |||
73 | <a href="Document.html"> | ||
74 | |||
75 | Rir::Document | ||
76 | |||
77 | </a> | ||
78 | |||
79 | </td> | ||
80 | </tr> | ||
81 | |||
82 | </table> | ||
83 | </div> | ||
84 | <!-- banner header --> | ||
85 | |||
86 | <div id="bodyContent"> | ||
87 | |||
88 | <div id="contextContent"> | ||
89 | |||
90 | <div id="description"> | ||
91 | <p> | ||
92 | A <a href="WebDocument.html">WebDocument</a> is a <a | ||
93 | href="Document.html">Document</a> with a <tt>url</tt>. | ||
94 | </p> | ||
95 | |||
96 | </div> | ||
97 | |||
98 | </div> | ||
99 | |||
100 | |||
101 | <div id="method-list"> | ||
102 | <h3 class="section-bar">Methods</h3> | ||
103 | |||
104 | <div class="name-list"> | ||
105 | |||
106 | <a href="#M000015">get_content</a> | ||
107 | |||
108 | <a href="#M000016">new</a> | ||
109 | |||
110 | </div> | ||
111 | </div> | ||
112 | |||
113 | </div> | ||
114 | |||
115 | <!-- if includes --> | ||
116 | |||
117 | <div id="section"> | ||
118 | |||
119 | |||
120 | |||
121 | <div id="attribute-list"> | ||
122 | <h3 class="section-bar">Attributes</h3> | ||
123 | |||
124 | <div class="name-list"> | ||
125 | <table> | ||
126 | |||
127 | <tr class="top-aligned-row context-row"> | ||
128 | <td class="context-item-name">url</td> | ||
129 | |||
130 | <td class="context-item-value"> [R] </td> | ||
131 | |||
132 | <td class="context-item-desc"></td> | ||
133 | </tr> | ||
134 | |||
135 | </table> | ||
136 | </div> | ||
137 | </div> | ||
138 | |||
139 | |||
140 | <!-- if method_list --> | ||
141 | |||
142 | <div id="methods"> | ||
143 | |||
144 | <h3 class="section-bar">Public Class methods</h3> | ||
145 | |||
146 | |||
147 | <div id="method-M000015" class="method-detail"> | ||
148 | <a name="M000015"></a> | ||
149 | |||
150 | <div class="method-heading"> | ||
151 | |||
152 | <a href="WebDocument.src/M000015.html" target="Code" class="method-signature" | ||
153 | onclick="popupCode('WebDocument.src/M000015.html');return false;"> | ||
154 | |||
155 | <span class="method-name">get_content</span><span class="method-args">(url)</span> | ||
156 | |||
157 | </a> | ||
158 | |||
159 | </div> | ||
160 | |||
161 | <div class="method-description"> | ||
162 | |||
163 | <p> | ||
164 | Returns the HTML text from the page of a given <tt>url</tt>. | ||
165 | </p> | ||
166 | |||
167 | </div> | ||
168 | </div> | ||
169 | |||
170 | |||
171 | <div id="method-M000016" class="method-detail"> | ||
172 | <a name="M000016"></a> | ||
173 | |||
174 | <div class="method-heading"> | ||
175 | |||
176 | <a href="WebDocument.src/M000016.html" target="Code" class="method-signature" | ||
177 | onclick="popupCode('WebDocument.src/M000016.html');return false;"> | ||
178 | |||
179 | <span class="method-name">new</span><span class="method-args">(url)</span> | ||
180 | |||
181 | </a> | ||
182 | |||
183 | </div> | ||
184 | |||
185 | <div class="method-description"> | ||
186 | |||
187 | <p> | ||
188 | <a href="WebDocument.html">WebDocument</a> constructor, the content of the | ||
189 | <a href="Document.html">Document</a> is the HTML page without the tags. | ||
190 | </p> | ||
191 | |||
192 | </div> | ||
193 | </div> | ||
194 | |||
195 | |||
196 | |||
197 | </div> | ||
198 | |||
199 | |||
200 | |||
201 | |||
202 | </div> | ||
203 | |||
204 | <div id="validator-badges"> | ||
205 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
206 | </div> | ||
207 | |||
208 | </body> | ||
209 | </html> | ||
210 | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
doc/classes/Rir/WebDocument.src/M000015.html
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | File was deleted | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>get_content (Rir::WebDocument)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 105</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>) | ||
12 | <span class="ruby-identifier">require</span> <span class="ruby-value str">'net/http'</span> | ||
13 | <span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>(<span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span>(<span class="ruby-identifier">url</span>)) | ||
14 | <span class="ruby-keyword kw">end</span></pre> | ||
15 | </body> | ||
16 | </html> | ||
17 | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
doc/classes/Rir/WebDocument.src/M000016.html
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | File was deleted | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>new (Rir::WebDocument)</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | ||
8 | </head> | ||
9 | <body class="standalone-code"> | ||
10 | <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 112</span> | ||
11 | <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">url</span>) | ||
12 | <span class="ruby-ivar">@url</span> = <span class="ruby-identifier">url</span> | ||
13 | <span class="ruby-keyword kw">super</span> <span class="ruby-constant">WebDocument</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>).<span class="ruby-identifier">strip_javascripts</span>.<span class="ruby-identifier">strip_stylesheets</span>.<span class="ruby-identifier">strip_xml_tags</span> | ||
14 | <span class="ruby-keyword kw">end</span></pre> | ||
15 | </body> | ||
16 | </html> | ||
17 | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
doc/classes/Rir/WikipediaPage.html
1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | File was deleted | |
2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | ||
3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | ||
4 | <head> | ||
5 | <title>Class: Rir::WikipediaPage [RDoc Documentation]</title> | ||
6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | ||
8 | <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | ||
9 | <script type="text/javascript"> | ||
10 | // <![CDATA[ | ||
11 | |||
12 | function popupCode( url ) { | ||
13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | ||
14 | } | ||
15 | |||
16 | function toggleCode( id ) { | ||
17 | if ( document.getElementById ) | ||
18 | elem = document.getElementById( id ); | ||
19 | else if ( document.all ) | ||
20 | elem = eval( "document.all." + id ); | ||
21 | else | ||
22 | return false; | ||
23 | |||
24 | elemStyle = elem.style; | ||
25 | |||
26 | if ( elemStyle.display != "block" ) { | ||
27 | elemStyle.display = "block" | ||
28 | } else { | ||
29 | elemStyle.display = "none" | ||
30 | } | ||
31 | |||
32 | return true; | ||
33 | } | ||
34 | |||
35 | // Make codeblocks hidden by default | ||
36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | ||
37 | |||
38 | // ]]> | ||
39 | </script> | ||
40 | |||
41 | </head> | ||
42 | <body> | ||
43 | |||
44 | |||
45 | <div id="classHeader"> | ||
46 | <table class="header-table"> | ||
47 | <tr class="top-aligned-row"> | ||
48 | <td><strong>Class</strong></td> | ||
49 | <td class="class-name-in-header">Rir::WikipediaPage</td> | ||
50 | </tr> | ||
51 | <tr class="top-aligned-row"> | ||
52 | <td><strong>In:</strong></td> | ||
53 | <td> | ||
54 | |||
55 | |||
56 | <a href="../../files/lib/rir/document_rb.html"> | ||
57 | |||
58 | lib/rir/document.rb | ||
59 | |||
60 | </a> | ||
61 | |||
62 | |||
63 | <br /> | ||
64 | |||
65 | </td> | ||
66 | </tr> | ||
67 | |||
68 | |||
69 | <tr class="top-aligned-row"> | ||
70 | <td><strong>Parent:</strong></td> | ||
71 | <td> | ||
72 | |||
73 | <a href="WebDocument.html"> | ||
74 | |||
75 | Rir::WebDocument | ||
76 | |||
77 | </a> | ||
78 | |||
79 | </td> | ||
80 | </tr> | ||
81 | |||
82 | </table> | ||
83 | </div> | ||
84 | <!-- banner header --> | ||
85 | |||
86 | <div id="bodyContent"> | ||
87 | |||
88 | <div id="contextContent"> | ||
89 | |||
90 | <div id="description"> | ||
91 | <p> | ||
92 | A <a href="WikipediaPage.html">WikipediaPage</a> is a <a | ||
93 | href="WebDocument.html">WebDocument</a>. | ||
94 | </p> | ||
95 | |||
96 | </div> | ||
97 | |||
98 | </div> | ||
99 | |||
100 | |||
101 | </div> | ||
102 | |||
103 | <!-- if includes --> | ||
104 | |||
105 | <div id="section"> | ||
106 | |||
107 | |||
108 | |||
109 | |||
110 | <!-- if method_list --> | ||
111 | |||
112 | |||
113 | |||
114 | |||
115 | </div> | ||
116 | |||
117 | <div id="validator-badges"> | ||
118 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | ||
119 | </div> | ||
120 | |||
121 | </body> | ||
122 | </html> | ||
123 | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |