Commit 35f45ab54d40489d2fe5d4fc5d39c40290635dea
1 parent
7043da90bf
Exists in
master
changing the main module name, generating RDoc
Showing 22 changed files with 935 additions and 935 deletions Side-by-side Diff
- doc/classes/RIR.html
- doc/classes/RIR/Document.html
- doc/classes/RIR/Document.src/M000010.html
- doc/classes/RIR/Document.src/M000011.html
- doc/classes/RIR/Document.src/M000012.html
- doc/classes/RIR/Document.src/M000013.html
- doc/classes/RIR/Document.src/M000014.html
- doc/classes/RIR/WebDocument.html
- doc/classes/RIR/WebDocument.src/M000015.html
- doc/classes/RIR/WebDocument.src/M000016.html
- doc/classes/RIR/WikipediaPage.html
- doc/classes/Rir.html
- doc/classes/Rir/Document.html
- doc/classes/Rir/Document.src/M000010.html
- doc/classes/Rir/Document.src/M000011.html
- doc/classes/Rir/Document.src/M000012.html
- doc/classes/Rir/Document.src/M000013.html
- doc/classes/Rir/Document.src/M000014.html
- doc/classes/Rir/WebDocument.html
- doc/classes/Rir/WebDocument.src/M000015.html
- doc/classes/Rir/WebDocument.src/M000016.html
- doc/classes/Rir/WikipediaPage.html
doc/classes/RIR.html
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>Module: RIR [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="classHeader"> | |
46 | + <table class="header-table"> | |
47 | + <tr class="top-aligned-row"> | |
48 | + <td><strong>Module</strong></td> | |
49 | + <td class="class-name-in-header">RIR</td> | |
50 | + </tr> | |
51 | + <tr class="top-aligned-row"> | |
52 | + <td><strong>In:</strong></td> | |
53 | + <td> | |
54 | + | |
55 | + | |
56 | + <a href="../files/lib/rir/string_rb.html"> | |
57 | + | |
58 | + lib/rir/string.rb | |
59 | + | |
60 | + </a> | |
61 | + | |
62 | + | |
63 | + <br /> | |
64 | + | |
65 | + | |
66 | + <a href="../files/lib/rir/document_rb.html"> | |
67 | + | |
68 | + lib/rir/document.rb | |
69 | + | |
70 | + </a> | |
71 | + | |
72 | + | |
73 | + <br /> | |
74 | + | |
75 | + </td> | |
76 | + </tr> | |
77 | + | |
78 | + | |
79 | + </table> | |
80 | + </div> | |
81 | + <!-- banner header --> | |
82 | + | |
83 | + <div id="bodyContent"> | |
84 | + | |
85 | + <div id="contextContent"> | |
86 | + | |
87 | + <div id="description"> | |
88 | + <p> | |
89 | +General module for many purposes related to Information Retrieval. | |
90 | +</p> | |
91 | +<hr size="1"></hr><p> | |
92 | +General module for many purposes related to Information Retrieval. | |
93 | +</p> | |
94 | + | |
95 | + </div> | |
96 | + | |
97 | + </div> | |
98 | + | |
99 | + | |
100 | + </div> | |
101 | + | |
102 | + <!-- if includes --> | |
103 | + | |
104 | + <div id="section"> | |
105 | + | |
106 | + <div id="class-list"> | |
107 | + <h3 class="section-bar">Classes and Modules</h3> | |
108 | + | |
109 | + Class <a href="RIR/Document.html" class="link">RIR::Document</a><br /> | |
110 | +Class <a href="RIR/WebDocument.html" class="link">RIR::WebDocument</a><br /> | |
111 | +Class <a href="RIR/WikipediaPage.html" class="link">RIR::WikipediaPage</a><br /> | |
112 | + | |
113 | + </div> | |
114 | + | |
115 | + <div id="constants-list"> | |
116 | + <h3 class="section-bar">Constants</h3> | |
117 | + | |
118 | + <div class="name-list"> | |
119 | + <table summary="Constants"> | |
120 | + | |
121 | + <tr class="top-aligned-row context-row"> | |
122 | + <td class="context-item-name">Stoplist</td> | |
123 | + <td>=</td> | |
124 | + <td class="context-item-value">[ "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", "yours", "yourself", "yourselves" ]</td> | |
125 | + | |
126 | + <td> </td> | |
127 | + <td class="context-item-desc"> | |
128 | +These are the default stopwords provided by Lemur. | |
129 | + | |
130 | +</td> | |
131 | + | |
132 | + </tr> | |
133 | + | |
134 | + </table> | |
135 | + </div> | |
136 | + </div> | |
137 | + | |
138 | + | |
139 | + | |
140 | + | |
141 | + <!-- if method_list --> | |
142 | + | |
143 | + | |
144 | + | |
145 | + | |
146 | + </div> | |
147 | + | |
148 | +<div id="validator-badges"> | |
149 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
150 | +</div> | |
151 | + | |
152 | +</body> | |
153 | +</html> |
doc/classes/RIR/Document.html
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>Class: RIR::Document [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="classHeader"> | |
46 | + <table class="header-table"> | |
47 | + <tr class="top-aligned-row"> | |
48 | + <td><strong>Class</strong></td> | |
49 | + <td class="class-name-in-header">RIR::Document</td> | |
50 | + </tr> | |
51 | + <tr class="top-aligned-row"> | |
52 | + <td><strong>In:</strong></td> | |
53 | + <td> | |
54 | + | |
55 | + | |
56 | + <a href="../../files/lib/rir/document_rb.html"> | |
57 | + | |
58 | + lib/rir/document.rb | |
59 | + | |
60 | + </a> | |
61 | + | |
62 | + | |
63 | + <br /> | |
64 | + | |
65 | + </td> | |
66 | + </tr> | |
67 | + | |
68 | + | |
69 | + <tr class="top-aligned-row"> | |
70 | + <td><strong>Parent:</strong></td> | |
71 | + <td> | |
72 | + | |
73 | + Object | |
74 | + | |
75 | + </td> | |
76 | + </tr> | |
77 | + | |
78 | + </table> | |
79 | + </div> | |
80 | + <!-- banner header --> | |
81 | + | |
82 | + <div id="bodyContent"> | |
83 | + | |
84 | + <div id="contextContent"> | |
85 | + | |
86 | + <div id="description"> | |
87 | + <p> | |
88 | +A <a href="Document.html">Document</a> is a bag of words and is constructed | |
89 | +from a string. | |
90 | +</p> | |
91 | + | |
92 | + </div> | |
93 | + | |
94 | + </div> | |
95 | + | |
96 | + | |
97 | + <div id="method-list"> | |
98 | + <h3 class="section-bar">Methods</h3> | |
99 | + | |
100 | + <div class="name-list"> | |
101 | + | |
102 | + <a href="#M000012">count_words</a> | |
103 | + | |
104 | + <a href="#M000013">entropy</a> | |
105 | + | |
106 | + <a href="#M000010">format_words</a> | |
107 | + | |
108 | + <a href="#M000014">new</a> | |
109 | + | |
110 | + <a href="#M000011">ngrams</a> | |
111 | + | |
112 | + </div> | |
113 | + </div> | |
114 | + | |
115 | + </div> | |
116 | + | |
117 | + <!-- if includes --> | |
118 | + | |
119 | + <div id="section"> | |
120 | + | |
121 | + | |
122 | + | |
123 | + <div id="attribute-list"> | |
124 | + <h3 class="section-bar">Attributes</h3> | |
125 | + | |
126 | + <div class="name-list"> | |
127 | + <table> | |
128 | + | |
129 | + <tr class="top-aligned-row context-row"> | |
130 | + <td class="context-item-name">doc_content</td> | |
131 | + | |
132 | + <td class="context-item-value"> [R] </td> | |
133 | + | |
134 | + <td class="context-item-desc"></td> | |
135 | + </tr> | |
136 | + | |
137 | + <tr class="top-aligned-row context-row"> | |
138 | + <td class="context-item-name">words</td> | |
139 | + | |
140 | + <td class="context-item-value"> [R] </td> | |
141 | + | |
142 | + <td class="context-item-desc"></td> | |
143 | + </tr> | |
144 | + | |
145 | + </table> | |
146 | + </div> | |
147 | + </div> | |
148 | + | |
149 | + | |
150 | + <!-- if method_list --> | |
151 | + | |
152 | + <div id="methods"> | |
153 | + | |
154 | + <h3 class="section-bar">Public Class methods</h3> | |
155 | + | |
156 | + | |
157 | + <div id="method-M000014" class="method-detail"> | |
158 | + <a name="M000014"></a> | |
159 | + | |
160 | + <div class="method-heading"> | |
161 | + | |
162 | + <a href="Document.src/M000014.html" target="Code" class="method-signature" | |
163 | + onclick="popupCode('Document.src/M000014.html');return false;"> | |
164 | + | |
165 | + <span class="method-name">new</span><span class="method-args">(content)</span> | |
166 | + | |
167 | + </a> | |
168 | + | |
169 | + </div> | |
170 | + | |
171 | + <div class="method-description"> | |
172 | + | |
173 | + </div> | |
174 | + </div> | |
175 | + | |
176 | + | |
177 | + <h3 class="section-bar">Public Instance methods</h3> | |
178 | + | |
179 | + | |
180 | + <div id="method-M000012" class="method-detail"> | |
181 | + <a name="M000012"></a> | |
182 | + | |
183 | + <div class="method-heading"> | |
184 | + | |
185 | + <a href="Document.src/M000012.html" target="Code" class="method-signature" | |
186 | + onclick="popupCode('Document.src/M000012.html');return false;"> | |
187 | + | |
188 | + <span class="method-name">count_words</span><span class="method-args">()</span> | |
189 | + | |
190 | + </a> | |
191 | + | |
192 | + </div> | |
193 | + | |
194 | + <div class="method-description"> | |
195 | + | |
196 | + <p> | |
197 | +Returns a Hash containing the words and their associated counts in the | |
198 | +current <a href="Document.html">Document</a>. | |
199 | +</p> | |
200 | +<pre> | |
201 | + count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } | |
202 | +</pre> | |
203 | + | |
204 | + </div> | |
205 | + </div> | |
206 | + | |
207 | + | |
208 | + <div id="method-M000013" class="method-detail"> | |
209 | + <a name="M000013"></a> | |
210 | + | |
211 | + <div class="method-heading"> | |
212 | + | |
213 | + <a href="Document.src/M000013.html" target="Code" class="method-signature" | |
214 | + onclick="popupCode('Document.src/M000013.html');return false;"> | |
215 | + | |
216 | + <span class="method-name">entropy</span><span class="method-args">(s)</span> | |
217 | + | |
218 | + </a> | |
219 | + | |
220 | + </div> | |
221 | + | |
222 | + <div class="method-description"> | |
223 | + | |
224 | + <p> | |
225 | +Computes the entropy of a given string <tt>s</tt> inside the document. | |
226 | +</p> | |
227 | +<p> | |
228 | +If the string parameter is composed of many words (i.e. tokens separated by | |
229 | +whitespace(s)), it is considered as an ngram. | |
230 | +</p> | |
231 | +<pre> | |
232 | + entropy("guitar") #=> 0.00389919463243839 | |
233 | +</pre> | |
234 | + | |
235 | + </div> | |
236 | + </div> | |
237 | + | |
238 | + | |
239 | + <div id="method-M000011" class="method-detail"> | |
240 | + <a name="M000011"></a> | |
241 | + | |
242 | + <div class="method-heading"> | |
243 | + | |
244 | + <a href="Document.src/M000011.html" target="Code" class="method-signature" | |
245 | + onclick="popupCode('Document.src/M000011.html');return false;"> | |
246 | + | |
247 | + <span class="method-name">ngrams</span><span class="method-args">(n)</span> | |
248 | + | |
249 | + </a> | |
250 | + | |
251 | + </div> | |
252 | + | |
253 | + <div class="method-description"> | |
254 | + | |
255 | + <p> | |
256 | +Returns an Array containing the <tt>n</tt>-grams (words) from the current | |
257 | +<a href="Document.html">Document</a>. | |
258 | +</p> | |
259 | +<pre> | |
260 | + ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...] | |
261 | +</pre> | |
262 | + | |
263 | + </div> | |
264 | + </div> | |
265 | + | |
266 | + | |
267 | + <h3 class="section-bar">Protected Instance methods</h3> | |
268 | + | |
269 | + | |
270 | + <div id="method-M000010" class="method-detail"> | |
271 | + <a name="M000010"></a> | |
272 | + | |
273 | + <div class="method-heading"> | |
274 | + | |
275 | + <a href="Document.src/M000010.html" target="Code" class="method-signature" | |
276 | + onclick="popupCode('Document.src/M000010.html');return false;"> | |
277 | + | |
278 | + <span class="method-name">format_words</span><span class="method-args">()</span> | |
279 | + | |
280 | + </a> | |
281 | + | |
282 | + </div> | |
283 | + | |
284 | + <div class="method-description"> | |
285 | + | |
286 | + <p> | |
287 | +Any non-word characters are removed from the words (see <a | |
288 | +href="http://perldoc.perl.org/perlre.html">perldoc.perl.org/perlre.html</a> | |
289 | +and the W special escape). | |
290 | +</p> | |
291 | +<p> | |
292 | +Protected function, only meant to by called at the initialization. | |
293 | +</p> | |
294 | + | |
295 | + </div> | |
296 | + </div> | |
297 | + | |
298 | + | |
299 | + | |
300 | + </div> | |
301 | + | |
302 | + | |
303 | + | |
304 | + | |
305 | + </div> | |
306 | + | |
307 | +<div id="validator-badges"> | |
308 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
309 | +</div> | |
310 | + | |
311 | +</body> | |
312 | +</html> |
doc/classes/RIR/Document.src/M000010.html
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>format_words (RIR::Document)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 31</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">format_words</span> | |
12 | + <span class="ruby-identifier">wo</span> = [] | |
13 | + | |
14 | + <span class="ruby-ivar">@doc_content</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | |
15 | + <span class="ruby-identifier">w</span>.<span class="ruby-identifier">split</span>(<span class="ruby-regexp re">/\W/</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sw</span><span class="ruby-operator">|</span> | |
16 | + <span class="ruby-identifier">wo</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">sw</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">sw</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[a-zA-Z]/</span> | |
17 | + <span class="ruby-keyword kw">end</span> | |
18 | + <span class="ruby-keyword kw">end</span> | |
19 | + | |
20 | + <span class="ruby-identifier">wo</span> | |
21 | + <span class="ruby-keyword kw">end</span></pre> | |
22 | +</body> | |
23 | +</html> |
doc/classes/RIR/Document.src/M000011.html
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>ngrams (RIR::Document)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 46</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">ngrams</span>(<span class="ruby-identifier">n</span>) | |
12 | + <span class="ruby-identifier">window</span> = [] | |
13 | + <span class="ruby-identifier">ngrams_array</span> = [] | |
14 | + | |
15 | + <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | |
16 | + <span class="ruby-identifier">window</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">w</span>) | |
17 | + <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">n</span> | |
18 | + <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">" "</span>) | |
19 | + <span class="ruby-identifier">window</span>.<span class="ruby-identifier">delete_at</span>(<span class="ruby-value">0</span>) | |
20 | + <span class="ruby-keyword kw">end</span> | |
21 | + <span class="ruby-keyword kw">end</span> | |
22 | + | |
23 | + <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">uniq</span> | |
24 | + <span class="ruby-keyword kw">end</span></pre> | |
25 | +</body> | |
26 | +</html> |
doc/classes/RIR/Document.src/M000012.html
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>count_words (RIR::Document)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 64</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">count_words</span> | |
12 | + <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">h</span>,<span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">h</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-value">0</span> } | |
13 | + <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>.<span class="ruby-identifier">downcase</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> } | |
14 | + | |
15 | + <span class="ruby-identifier">counts</span> | |
16 | + <span class="ruby-keyword kw">end</span></pre> | |
17 | +</body> | |
18 | +</html> |
doc/classes/RIR/Document.src/M000013.html
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>entropy (RIR::Document)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 77</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">entropy</span>(<span class="ruby-identifier">s</span>) | |
12 | + <span class="ruby-identifier">en</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span> | |
13 | + <span class="ruby-identifier">counts</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span> | |
14 | + | |
15 | + <span class="ruby-identifier">s</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | |
16 | + <span class="ruby-identifier">p_wi</span> = <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span> | |
17 | + <span class="ruby-identifier">en</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">p_wi</span><span class="ruby-operator">*</span><span class="ruby-constant">Math</span>.<span class="ruby-identifier">log2</span>(<span class="ruby-identifier">p_wi</span>) | |
18 | + <span class="ruby-keyword kw">end</span> | |
19 | + | |
20 | + <span class="ruby-identifier">en</span> <span class="ruby-operator">*=</span> <span class="ruby-value">-1</span> | |
21 | + <span class="ruby-identifier">en</span> | |
22 | + <span class="ruby-keyword kw">end</span></pre> | |
23 | +</body> | |
24 | +</html> |
doc/classes/RIR/Document.src/M000014.html
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>new (RIR::Document)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 92</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">content</span>) | |
12 | + <span class="ruby-ivar">@doc_content</span> = <span class="ruby-identifier">content</span> | |
13 | + <span class="ruby-ivar">@words</span> = <span class="ruby-identifier">format_words</span> | |
14 | + <span class="ruby-keyword kw">end</span></pre> | |
15 | +</body> | |
16 | +</html> |
doc/classes/RIR/WebDocument.html
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>Class: RIR::WebDocument [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="classHeader"> | |
46 | + <table class="header-table"> | |
47 | + <tr class="top-aligned-row"> | |
48 | + <td><strong>Class</strong></td> | |
49 | + <td class="class-name-in-header">RIR::WebDocument</td> | |
50 | + </tr> | |
51 | + <tr class="top-aligned-row"> | |
52 | + <td><strong>In:</strong></td> | |
53 | + <td> | |
54 | + | |
55 | + | |
56 | + <a href="../../files/lib/rir/document_rb.html"> | |
57 | + | |
58 | + lib/rir/document.rb | |
59 | + | |
60 | + </a> | |
61 | + | |
62 | + | |
63 | + <br /> | |
64 | + | |
65 | + </td> | |
66 | + </tr> | |
67 | + | |
68 | + | |
69 | + <tr class="top-aligned-row"> | |
70 | + <td><strong>Parent:</strong></td> | |
71 | + <td> | |
72 | + | |
73 | + <a href="Document.html"> | |
74 | + | |
75 | + RIR::Document | |
76 | + | |
77 | + </a> | |
78 | + | |
79 | + </td> | |
80 | + </tr> | |
81 | + | |
82 | + </table> | |
83 | + </div> | |
84 | + <!-- banner header --> | |
85 | + | |
86 | + <div id="bodyContent"> | |
87 | + | |
88 | + <div id="contextContent"> | |
89 | + | |
90 | + <div id="description"> | |
91 | + <p> | |
92 | +A <a href="WebDocument.html">WebDocument</a> is a <a | |
93 | +href="Document.html">Document</a> with a <tt>url</tt>. | |
94 | +</p> | |
95 | + | |
96 | + </div> | |
97 | + | |
98 | + </div> | |
99 | + | |
100 | + | |
101 | + <div id="method-list"> | |
102 | + <h3 class="section-bar">Methods</h3> | |
103 | + | |
104 | + <div class="name-list"> | |
105 | + | |
106 | + <a href="#M000015">get_content</a> | |
107 | + | |
108 | + <a href="#M000016">new</a> | |
109 | + | |
110 | + </div> | |
111 | + </div> | |
112 | + | |
113 | + </div> | |
114 | + | |
115 | + <!-- if includes --> | |
116 | + | |
117 | + <div id="section"> | |
118 | + | |
119 | + | |
120 | + | |
121 | + <div id="attribute-list"> | |
122 | + <h3 class="section-bar">Attributes</h3> | |
123 | + | |
124 | + <div class="name-list"> | |
125 | + <table> | |
126 | + | |
127 | + <tr class="top-aligned-row context-row"> | |
128 | + <td class="context-item-name">url</td> | |
129 | + | |
130 | + <td class="context-item-value"> [R] </td> | |
131 | + | |
132 | + <td class="context-item-desc"></td> | |
133 | + </tr> | |
134 | + | |
135 | + </table> | |
136 | + </div> | |
137 | + </div> | |
138 | + | |
139 | + | |
140 | + <!-- if method_list --> | |
141 | + | |
142 | + <div id="methods"> | |
143 | + | |
144 | + <h3 class="section-bar">Public Class methods</h3> | |
145 | + | |
146 | + | |
147 | + <div id="method-M000015" class="method-detail"> | |
148 | + <a name="M000015"></a> | |
149 | + | |
150 | + <div class="method-heading"> | |
151 | + | |
152 | + <a href="WebDocument.src/M000015.html" target="Code" class="method-signature" | |
153 | + onclick="popupCode('WebDocument.src/M000015.html');return false;"> | |
154 | + | |
155 | + <span class="method-name">get_content</span><span class="method-args">(url)</span> | |
156 | + | |
157 | + </a> | |
158 | + | |
159 | + </div> | |
160 | + | |
161 | + <div class="method-description"> | |
162 | + | |
163 | + <p> | |
164 | +Returns the HTML text from the page of a given <tt>url</tt>. | |
165 | +</p> | |
166 | + | |
167 | + </div> | |
168 | + </div> | |
169 | + | |
170 | + | |
171 | + <div id="method-M000016" class="method-detail"> | |
172 | + <a name="M000016"></a> | |
173 | + | |
174 | + <div class="method-heading"> | |
175 | + | |
176 | + <a href="WebDocument.src/M000016.html" target="Code" class="method-signature" | |
177 | + onclick="popupCode('WebDocument.src/M000016.html');return false;"> | |
178 | + | |
179 | + <span class="method-name">new</span><span class="method-args">(url)</span> | |
180 | + | |
181 | + </a> | |
182 | + | |
183 | + </div> | |
184 | + | |
185 | + <div class="method-description"> | |
186 | + | |
187 | + <p> | |
188 | +<a href="WebDocument.html">WebDocument</a> constructor, the content of the | |
189 | +<a href="Document.html">Document</a> is the HTML page without the tags. | |
190 | +</p> | |
191 | + | |
192 | + </div> | |
193 | + </div> | |
194 | + | |
195 | + | |
196 | + | |
197 | + </div> | |
198 | + | |
199 | + | |
200 | + | |
201 | + | |
202 | + </div> | |
203 | + | |
204 | +<div id="validator-badges"> | |
205 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
206 | +</div> | |
207 | + | |
208 | +</body> | |
209 | +</html> |
doc/classes/RIR/WebDocument.src/M000015.html
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>get_content (RIR::WebDocument)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 105</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>) | |
12 | + <span class="ruby-identifier">require</span> <span class="ruby-value str">'net/http'</span> | |
13 | + <span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>(<span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span>(<span class="ruby-identifier">url</span>)) | |
14 | + <span class="ruby-keyword kw">end</span></pre> | |
15 | +</body> | |
16 | +</html> |
doc/classes/RIR/WebDocument.src/M000016.html
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>new (RIR::WebDocument)</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | +</head> | |
9 | +<body class="standalone-code"> | |
10 | + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 112</span> | |
11 | + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">url</span>) | |
12 | + <span class="ruby-ivar">@url</span> = <span class="ruby-identifier">url</span> | |
13 | + <span class="ruby-keyword kw">super</span> <span class="ruby-constant">WebDocument</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>).<span class="ruby-identifier">strip_javascripts</span>.<span class="ruby-identifier">strip_stylesheets</span>.<span class="ruby-identifier">strip_xml_tags</span> | |
14 | + <span class="ruby-keyword kw">end</span></pre> | |
15 | +</body> | |
16 | +</html> |
doc/classes/RIR/WikipediaPage.html
1 | +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | +<head> | |
5 | + <title>Class: RIR::WikipediaPage [RDoc Documentation]</title> | |
6 | + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | + <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
9 | + <script type="text/javascript"> | |
10 | + // <![CDATA[ | |
11 | + | |
12 | + function popupCode( url ) { | |
13 | + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | + } | |
15 | + | |
16 | + function toggleCode( id ) { | |
17 | + if ( document.getElementById ) | |
18 | + elem = document.getElementById( id ); | |
19 | + else if ( document.all ) | |
20 | + elem = eval( "document.all." + id ); | |
21 | + else | |
22 | + return false; | |
23 | + | |
24 | + elemStyle = elem.style; | |
25 | + | |
26 | + if ( elemStyle.display != "block" ) { | |
27 | + elemStyle.display = "block" | |
28 | + } else { | |
29 | + elemStyle.display = "none" | |
30 | + } | |
31 | + | |
32 | + return true; | |
33 | + } | |
34 | + | |
35 | + // Make codeblocks hidden by default | |
36 | + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | + | |
38 | + // ]]> | |
39 | + </script> | |
40 | + | |
41 | +</head> | |
42 | +<body> | |
43 | + | |
44 | + | |
45 | + <div id="classHeader"> | |
46 | + <table class="header-table"> | |
47 | + <tr class="top-aligned-row"> | |
48 | + <td><strong>Class</strong></td> | |
49 | + <td class="class-name-in-header">RIR::WikipediaPage</td> | |
50 | + </tr> | |
51 | + <tr class="top-aligned-row"> | |
52 | + <td><strong>In:</strong></td> | |
53 | + <td> | |
54 | + | |
55 | + | |
56 | + <a href="../../files/lib/rir/document_rb.html"> | |
57 | + | |
58 | + lib/rir/document.rb | |
59 | + | |
60 | + </a> | |
61 | + | |
62 | + | |
63 | + <br /> | |
64 | + | |
65 | + </td> | |
66 | + </tr> | |
67 | + | |
68 | + | |
69 | + <tr class="top-aligned-row"> | |
70 | + <td><strong>Parent:</strong></td> | |
71 | + <td> | |
72 | + | |
73 | + <a href="WebDocument.html"> | |
74 | + | |
75 | + RIR::WebDocument | |
76 | + | |
77 | + </a> | |
78 | + | |
79 | + </td> | |
80 | + </tr> | |
81 | + | |
82 | + </table> | |
83 | + </div> | |
84 | + <!-- banner header --> | |
85 | + | |
86 | + <div id="bodyContent"> | |
87 | + | |
88 | + <div id="contextContent"> | |
89 | + | |
90 | + <div id="description"> | |
91 | + <p> | |
92 | +A <a href="WikipediaPage.html">WikipediaPage</a> is a <a | |
93 | +href="WebDocument.html">WebDocument</a>. | |
94 | +</p> | |
95 | + | |
96 | + </div> | |
97 | + | |
98 | + </div> | |
99 | + | |
100 | + | |
101 | + </div> | |
102 | + | |
103 | + <!-- if includes --> | |
104 | + | |
105 | + <div id="section"> | |
106 | + | |
107 | + | |
108 | + | |
109 | + | |
110 | + <!-- if method_list --> | |
111 | + | |
112 | + | |
113 | + | |
114 | + | |
115 | + </div> | |
116 | + | |
117 | +<div id="validator-badges"> | |
118 | + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
119 | +</div> | |
120 | + | |
121 | +</body> | |
122 | +</html> |
doc/classes/Rir.html
1 | -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | -<head> | |
5 | - <title>Module: Rir [RDoc Documentation]</title> | |
6 | - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | - <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | - <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | |
9 | - <script type="text/javascript"> | |
10 | - // <![CDATA[ | |
11 | - | |
12 | - function popupCode( url ) { | |
13 | - window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | - } | |
15 | - | |
16 | - function toggleCode( id ) { | |
17 | - if ( document.getElementById ) | |
18 | - elem = document.getElementById( id ); | |
19 | - else if ( document.all ) | |
20 | - elem = eval( "document.all." + id ); | |
21 | - else | |
22 | - return false; | |
23 | - | |
24 | - elemStyle = elem.style; | |
25 | - | |
26 | - if ( elemStyle.display != "block" ) { | |
27 | - elemStyle.display = "block" | |
28 | - } else { | |
29 | - elemStyle.display = "none" | |
30 | - } | |
31 | - | |
32 | - return true; | |
33 | - } | |
34 | - | |
35 | - // Make codeblocks hidden by default | |
36 | - document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | - | |
38 | - // ]]> | |
39 | - </script> | |
40 | - | |
41 | -</head> | |
42 | -<body> | |
43 | - | |
44 | - | |
45 | - <div id="classHeader"> | |
46 | - <table class="header-table"> | |
47 | - <tr class="top-aligned-row"> | |
48 | - <td><strong>Module</strong></td> | |
49 | - <td class="class-name-in-header">Rir</td> | |
50 | - </tr> | |
51 | - <tr class="top-aligned-row"> | |
52 | - <td><strong>In:</strong></td> | |
53 | - <td> | |
54 | - | |
55 | - | |
56 | - <a href="../files/lib/rir/string_rb.html"> | |
57 | - | |
58 | - lib/rir/string.rb | |
59 | - | |
60 | - </a> | |
61 | - | |
62 | - | |
63 | - <br /> | |
64 | - | |
65 | - | |
66 | - <a href="../files/lib/rir/document_rb.html"> | |
67 | - | |
68 | - lib/rir/document.rb | |
69 | - | |
70 | - </a> | |
71 | - | |
72 | - | |
73 | - <br /> | |
74 | - | |
75 | - </td> | |
76 | - </tr> | |
77 | - | |
78 | - | |
79 | - </table> | |
80 | - </div> | |
81 | - <!-- banner header --> | |
82 | - | |
83 | - <div id="bodyContent"> | |
84 | - | |
85 | - <div id="contextContent"> | |
86 | - | |
87 | - <div id="description"> | |
88 | - <p> | |
89 | -General module for many purposes related to Information Retrieval. | |
90 | -</p> | |
91 | -<hr size="1"></hr><p> | |
92 | -General module for many purposes related to Information Retrieval. | |
93 | -</p> | |
94 | - | |
95 | - </div> | |
96 | - | |
97 | - </div> | |
98 | - | |
99 | - | |
100 | - </div> | |
101 | - | |
102 | - <!-- if includes --> | |
103 | - | |
104 | - <div id="section"> | |
105 | - | |
106 | - <div id="class-list"> | |
107 | - <h3 class="section-bar">Classes and Modules</h3> | |
108 | - | |
109 | - Class <a href="Rir/Document.html" class="link">Rir::Document</a><br /> | |
110 | -Class <a href="Rir/WebDocument.html" class="link">Rir::WebDocument</a><br /> | |
111 | -Class <a href="Rir/WikipediaPage.html" class="link">Rir::WikipediaPage</a><br /> | |
112 | - | |
113 | - </div> | |
114 | - | |
115 | - <div id="constants-list"> | |
116 | - <h3 class="section-bar">Constants</h3> | |
117 | - | |
118 | - <div class="name-list"> | |
119 | - <table summary="Constants"> | |
120 | - | |
121 | - <tr class="top-aligned-row context-row"> | |
122 | - <td class="context-item-name">Stoplist</td> | |
123 | - <td>=</td> | |
124 | - <td class="context-item-value">[ "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", "yours", "yourself", "yourselves" ]</td> | |
125 | - | |
126 | - <td> </td> | |
127 | - <td class="context-item-desc"> | |
128 | -These are the default stopwords provided by Lemur. | |
129 | - | |
130 | -</td> | |
131 | - | |
132 | - </tr> | |
133 | - | |
134 | - </table> | |
135 | - </div> | |
136 | - </div> | |
137 | - | |
138 | - | |
139 | - | |
140 | - | |
141 | - <!-- if method_list --> | |
142 | - | |
143 | - | |
144 | - | |
145 | - | |
146 | - </div> | |
147 | - | |
148 | -<div id="validator-badges"> | |
149 | - <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
150 | -</div> | |
151 | - | |
152 | -</body> | |
153 | -</html> |
doc/classes/Rir/Document.html
1 | -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | -<head> | |
5 | - <title>Class: Rir::Document [RDoc Documentation]</title> | |
6 | - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | - <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | - <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
9 | - <script type="text/javascript"> | |
10 | - // <![CDATA[ | |
11 | - | |
12 | - function popupCode( url ) { | |
13 | - window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | - } | |
15 | - | |
16 | - function toggleCode( id ) { | |
17 | - if ( document.getElementById ) | |
18 | - elem = document.getElementById( id ); | |
19 | - else if ( document.all ) | |
20 | - elem = eval( "document.all." + id ); | |
21 | - else | |
22 | - return false; | |
23 | - | |
24 | - elemStyle = elem.style; | |
25 | - | |
26 | - if ( elemStyle.display != "block" ) { | |
27 | - elemStyle.display = "block" | |
28 | - } else { | |
29 | - elemStyle.display = "none" | |
30 | - } | |
31 | - | |
32 | - return true; | |
33 | - } | |
34 | - | |
35 | - // Make codeblocks hidden by default | |
36 | - document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | - | |
38 | - // ]]> | |
39 | - </script> | |
40 | - | |
41 | -</head> | |
42 | -<body> | |
43 | - | |
44 | - | |
45 | - <div id="classHeader"> | |
46 | - <table class="header-table"> | |
47 | - <tr class="top-aligned-row"> | |
48 | - <td><strong>Class</strong></td> | |
49 | - <td class="class-name-in-header">Rir::Document</td> | |
50 | - </tr> | |
51 | - <tr class="top-aligned-row"> | |
52 | - <td><strong>In:</strong></td> | |
53 | - <td> | |
54 | - | |
55 | - | |
56 | - <a href="../../files/lib/rir/document_rb.html"> | |
57 | - | |
58 | - lib/rir/document.rb | |
59 | - | |
60 | - </a> | |
61 | - | |
62 | - | |
63 | - <br /> | |
64 | - | |
65 | - </td> | |
66 | - </tr> | |
67 | - | |
68 | - | |
69 | - <tr class="top-aligned-row"> | |
70 | - <td><strong>Parent:</strong></td> | |
71 | - <td> | |
72 | - | |
73 | - Object | |
74 | - | |
75 | - </td> | |
76 | - </tr> | |
77 | - | |
78 | - </table> | |
79 | - </div> | |
80 | - <!-- banner header --> | |
81 | - | |
82 | - <div id="bodyContent"> | |
83 | - | |
84 | - <div id="contextContent"> | |
85 | - | |
86 | - <div id="description"> | |
87 | - <p> | |
88 | -A <a href="Document.html">Document</a> is a bag of words and is constructed | |
89 | -from a string. | |
90 | -</p> | |
91 | - | |
92 | - </div> | |
93 | - | |
94 | - </div> | |
95 | - | |
96 | - | |
97 | - <div id="method-list"> | |
98 | - <h3 class="section-bar">Methods</h3> | |
99 | - | |
100 | - <div class="name-list"> | |
101 | - | |
102 | - <a href="#M000012">count_words</a> | |
103 | - | |
104 | - <a href="#M000013">entropy</a> | |
105 | - | |
106 | - <a href="#M000010">format_words</a> | |
107 | - | |
108 | - <a href="#M000014">new</a> | |
109 | - | |
110 | - <a href="#M000011">ngrams</a> | |
111 | - | |
112 | - </div> | |
113 | - </div> | |
114 | - | |
115 | - </div> | |
116 | - | |
117 | - <!-- if includes --> | |
118 | - | |
119 | - <div id="section"> | |
120 | - | |
121 | - | |
122 | - | |
123 | - <div id="attribute-list"> | |
124 | - <h3 class="section-bar">Attributes</h3> | |
125 | - | |
126 | - <div class="name-list"> | |
127 | - <table> | |
128 | - | |
129 | - <tr class="top-aligned-row context-row"> | |
130 | - <td class="context-item-name">doc_content</td> | |
131 | - | |
132 | - <td class="context-item-value"> [R] </td> | |
133 | - | |
134 | - <td class="context-item-desc"></td> | |
135 | - </tr> | |
136 | - | |
137 | - <tr class="top-aligned-row context-row"> | |
138 | - <td class="context-item-name">words</td> | |
139 | - | |
140 | - <td class="context-item-value"> [R] </td> | |
141 | - | |
142 | - <td class="context-item-desc"></td> | |
143 | - </tr> | |
144 | - | |
145 | - </table> | |
146 | - </div> | |
147 | - </div> | |
148 | - | |
149 | - | |
150 | - <!-- if method_list --> | |
151 | - | |
152 | - <div id="methods"> | |
153 | - | |
154 | - <h3 class="section-bar">Public Class methods</h3> | |
155 | - | |
156 | - | |
157 | - <div id="method-M000014" class="method-detail"> | |
158 | - <a name="M000014"></a> | |
159 | - | |
160 | - <div class="method-heading"> | |
161 | - | |
162 | - <a href="Document.src/M000014.html" target="Code" class="method-signature" | |
163 | - onclick="popupCode('Document.src/M000014.html');return false;"> | |
164 | - | |
165 | - <span class="method-name">new</span><span class="method-args">(content)</span> | |
166 | - | |
167 | - </a> | |
168 | - | |
169 | - </div> | |
170 | - | |
171 | - <div class="method-description"> | |
172 | - | |
173 | - </div> | |
174 | - </div> | |
175 | - | |
176 | - | |
177 | - <h3 class="section-bar">Public Instance methods</h3> | |
178 | - | |
179 | - | |
180 | - <div id="method-M000012" class="method-detail"> | |
181 | - <a name="M000012"></a> | |
182 | - | |
183 | - <div class="method-heading"> | |
184 | - | |
185 | - <a href="Document.src/M000012.html" target="Code" class="method-signature" | |
186 | - onclick="popupCode('Document.src/M000012.html');return false;"> | |
187 | - | |
188 | - <span class="method-name">count_words</span><span class="method-args">()</span> | |
189 | - | |
190 | - </a> | |
191 | - | |
192 | - </div> | |
193 | - | |
194 | - <div class="method-description"> | |
195 | - | |
196 | - <p> | |
197 | -Returns a Hash containing the words and their associated counts in the | |
198 | -current <a href="Document.html">Document</a>. | |
199 | -</p> | |
200 | -<pre> | |
201 | - count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } | |
202 | -</pre> | |
203 | - | |
204 | - </div> | |
205 | - </div> | |
206 | - | |
207 | - | |
208 | - <div id="method-M000013" class="method-detail"> | |
209 | - <a name="M000013"></a> | |
210 | - | |
211 | - <div class="method-heading"> | |
212 | - | |
213 | - <a href="Document.src/M000013.html" target="Code" class="method-signature" | |
214 | - onclick="popupCode('Document.src/M000013.html');return false;"> | |
215 | - | |
216 | - <span class="method-name">entropy</span><span class="method-args">(s)</span> | |
217 | - | |
218 | - </a> | |
219 | - | |
220 | - </div> | |
221 | - | |
222 | - <div class="method-description"> | |
223 | - | |
224 | - <p> | |
225 | -Computes the entropy of a given string <tt>s</tt> inside the document. | |
226 | -</p> | |
227 | -<p> | |
228 | -If the string parameter is composed of many words (i.e. tokens separated by | |
229 | -whitespace(s)), it is considered as an ngram. | |
230 | -</p> | |
231 | -<pre> | |
232 | - entropy("guitar") #=> 0.00389919463243839 | |
233 | -</pre> | |
234 | - | |
235 | - </div> | |
236 | - </div> | |
237 | - | |
238 | - | |
239 | - <div id="method-M000011" class="method-detail"> | |
240 | - <a name="M000011"></a> | |
241 | - | |
242 | - <div class="method-heading"> | |
243 | - | |
244 | - <a href="Document.src/M000011.html" target="Code" class="method-signature" | |
245 | - onclick="popupCode('Document.src/M000011.html');return false;"> | |
246 | - | |
247 | - <span class="method-name">ngrams</span><span class="method-args">(n)</span> | |
248 | - | |
249 | - </a> | |
250 | - | |
251 | - </div> | |
252 | - | |
253 | - <div class="method-description"> | |
254 | - | |
255 | - <p> | |
256 | -Returns an Array containing the <tt>n</tt>-grams (words) from the current | |
257 | -<a href="Document.html">Document</a>. | |
258 | -</p> | |
259 | -<pre> | |
260 | - ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...] | |
261 | -</pre> | |
262 | - | |
263 | - </div> | |
264 | - </div> | |
265 | - | |
266 | - | |
267 | - <h3 class="section-bar">Protected Instance methods</h3> | |
268 | - | |
269 | - | |
270 | - <div id="method-M000010" class="method-detail"> | |
271 | - <a name="M000010"></a> | |
272 | - | |
273 | - <div class="method-heading"> | |
274 | - | |
275 | - <a href="Document.src/M000010.html" target="Code" class="method-signature" | |
276 | - onclick="popupCode('Document.src/M000010.html');return false;"> | |
277 | - | |
278 | - <span class="method-name">format_words</span><span class="method-args">()</span> | |
279 | - | |
280 | - </a> | |
281 | - | |
282 | - </div> | |
283 | - | |
284 | - <div class="method-description"> | |
285 | - | |
286 | - <p> | |
287 | -Any non-word characters are removed from the words (see <a | |
288 | -href="http://perldoc.perl.org/perlre.html">perldoc.perl.org/perlre.html</a> | |
289 | -and the W special escape). | |
290 | -</p> | |
291 | -<p> | |
292 | -Protected function, only meant to by called at the initialization. | |
293 | -</p> | |
294 | - | |
295 | - </div> | |
296 | - </div> | |
297 | - | |
298 | - | |
299 | - | |
300 | - </div> | |
301 | - | |
302 | - | |
303 | - | |
304 | - | |
305 | - </div> | |
306 | - | |
307 | -<div id="validator-badges"> | |
308 | - <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
309 | -</div> | |
310 | - | |
311 | -</body> | |
312 | -</html> |
doc/classes/Rir/Document.src/M000010.html
1 | -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | -<head> | |
5 | - <title>format_words (Rir::Document)</title> | |
6 | - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | -</head> | |
9 | -<body class="standalone-code"> | |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 31</span> | |
11 | - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">format_words</span> | |
12 | - <span class="ruby-identifier">wo</span> = [] | |
13 | - | |
14 | - <span class="ruby-ivar">@doc_content</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | |
15 | - <span class="ruby-identifier">w</span>.<span class="ruby-identifier">split</span>(<span class="ruby-regexp re">/\W/</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sw</span><span class="ruby-operator">|</span> | |
16 | - <span class="ruby-identifier">wo</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">sw</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">sw</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[a-zA-Z]/</span> | |
17 | - <span class="ruby-keyword kw">end</span> | |
18 | - <span class="ruby-keyword kw">end</span> | |
19 | - | |
20 | - <span class="ruby-identifier">wo</span> | |
21 | - <span class="ruby-keyword kw">end</span></pre> | |
22 | -</body> | |
23 | -</html> |
doc/classes/Rir/Document.src/M000011.html
1 | -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | -<head> | |
5 | - <title>ngrams (Rir::Document)</title> | |
6 | - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | -</head> | |
9 | -<body class="standalone-code"> | |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 46</span> | |
11 | - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">ngrams</span>(<span class="ruby-identifier">n</span>) | |
12 | - <span class="ruby-identifier">window</span> = [] | |
13 | - <span class="ruby-identifier">ngrams_array</span> = [] | |
14 | - | |
15 | - <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | |
16 | - <span class="ruby-identifier">window</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">w</span>) | |
17 | - <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">n</span> | |
18 | - <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">" "</span>) | |
19 | - <span class="ruby-identifier">window</span>.<span class="ruby-identifier">delete_at</span>(<span class="ruby-value">0</span>) | |
20 | - <span class="ruby-keyword kw">end</span> | |
21 | - <span class="ruby-keyword kw">end</span> | |
22 | - | |
23 | - <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">uniq</span> | |
24 | - <span class="ruby-keyword kw">end</span></pre> | |
25 | -</body> | |
26 | -</html> |
doc/classes/Rir/Document.src/M000012.html
1 | -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | -<head> | |
5 | - <title>count_words (Rir::Document)</title> | |
6 | - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | -</head> | |
9 | -<body class="standalone-code"> | |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 64</span> | |
11 | - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">count_words</span> | |
12 | - <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">h</span>,<span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">h</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-value">0</span> } | |
13 | - <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>.<span class="ruby-identifier">downcase</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> } | |
14 | - | |
15 | - <span class="ruby-identifier">counts</span> | |
16 | - <span class="ruby-keyword kw">end</span></pre> | |
17 | -</body> | |
18 | -</html> |
doc/classes/Rir/Document.src/M000013.html
1 | -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | -<head> | |
5 | - <title>entropy (Rir::Document)</title> | |
6 | - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | -</head> | |
9 | -<body class="standalone-code"> | |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 77</span> | |
11 | - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">entropy</span>(<span class="ruby-identifier">s</span>) | |
12 | - <span class="ruby-identifier">en</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span> | |
13 | - <span class="ruby-identifier">counts</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span> | |
14 | - | |
15 | - <span class="ruby-identifier">s</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> | |
16 | - <span class="ruby-identifier">p_wi</span> = <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span> | |
17 | - <span class="ruby-identifier">en</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">p_wi</span><span class="ruby-operator">*</span><span class="ruby-constant">Math</span>.<span class="ruby-identifier">log2</span>(<span class="ruby-identifier">p_wi</span>) | |
18 | - <span class="ruby-keyword kw">end</span> | |
19 | - | |
20 | - <span class="ruby-identifier">en</span> <span class="ruby-operator">*=</span> <span class="ruby-value">-1</span> | |
21 | - <span class="ruby-identifier">en</span> | |
22 | - <span class="ruby-keyword kw">end</span></pre> | |
23 | -</body> | |
24 | -</html> |
doc/classes/Rir/Document.src/M000014.html
1 | -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | -<head> | |
5 | - <title>new (Rir::Document)</title> | |
6 | - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | -</head> | |
9 | -<body class="standalone-code"> | |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 92</span> | |
11 | - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">content</span>) | |
12 | - <span class="ruby-ivar">@doc_content</span> = <span class="ruby-identifier">content</span> | |
13 | - <span class="ruby-ivar">@words</span> = <span class="ruby-identifier">format_words</span> | |
14 | - <span class="ruby-keyword kw">end</span></pre> | |
15 | -</body> | |
16 | -</html> |
doc/classes/Rir/WebDocument.html
1 | -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | -<head> | |
5 | - <title>Class: Rir::WebDocument [RDoc Documentation]</title> | |
6 | - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | - <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | - <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
9 | - <script type="text/javascript"> | |
10 | - // <![CDATA[ | |
11 | - | |
12 | - function popupCode( url ) { | |
13 | - window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | - } | |
15 | - | |
16 | - function toggleCode( id ) { | |
17 | - if ( document.getElementById ) | |
18 | - elem = document.getElementById( id ); | |
19 | - else if ( document.all ) | |
20 | - elem = eval( "document.all." + id ); | |
21 | - else | |
22 | - return false; | |
23 | - | |
24 | - elemStyle = elem.style; | |
25 | - | |
26 | - if ( elemStyle.display != "block" ) { | |
27 | - elemStyle.display = "block" | |
28 | - } else { | |
29 | - elemStyle.display = "none" | |
30 | - } | |
31 | - | |
32 | - return true; | |
33 | - } | |
34 | - | |
35 | - // Make codeblocks hidden by default | |
36 | - document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | - | |
38 | - // ]]> | |
39 | - </script> | |
40 | - | |
41 | -</head> | |
42 | -<body> | |
43 | - | |
44 | - | |
45 | - <div id="classHeader"> | |
46 | - <table class="header-table"> | |
47 | - <tr class="top-aligned-row"> | |
48 | - <td><strong>Class</strong></td> | |
49 | - <td class="class-name-in-header">Rir::WebDocument</td> | |
50 | - </tr> | |
51 | - <tr class="top-aligned-row"> | |
52 | - <td><strong>In:</strong></td> | |
53 | - <td> | |
54 | - | |
55 | - | |
56 | - <a href="../../files/lib/rir/document_rb.html"> | |
57 | - | |
58 | - lib/rir/document.rb | |
59 | - | |
60 | - </a> | |
61 | - | |
62 | - | |
63 | - <br /> | |
64 | - | |
65 | - </td> | |
66 | - </tr> | |
67 | - | |
68 | - | |
69 | - <tr class="top-aligned-row"> | |
70 | - <td><strong>Parent:</strong></td> | |
71 | - <td> | |
72 | - | |
73 | - <a href="Document.html"> | |
74 | - | |
75 | - Rir::Document | |
76 | - | |
77 | - </a> | |
78 | - | |
79 | - </td> | |
80 | - </tr> | |
81 | - | |
82 | - </table> | |
83 | - </div> | |
84 | - <!-- banner header --> | |
85 | - | |
86 | - <div id="bodyContent"> | |
87 | - | |
88 | - <div id="contextContent"> | |
89 | - | |
90 | - <div id="description"> | |
91 | - <p> | |
92 | -A <a href="WebDocument.html">WebDocument</a> is a <a | |
93 | -href="Document.html">Document</a> with a <tt>url</tt>. | |
94 | -</p> | |
95 | - | |
96 | - </div> | |
97 | - | |
98 | - </div> | |
99 | - | |
100 | - | |
101 | - <div id="method-list"> | |
102 | - <h3 class="section-bar">Methods</h3> | |
103 | - | |
104 | - <div class="name-list"> | |
105 | - | |
106 | - <a href="#M000015">get_content</a> | |
107 | - | |
108 | - <a href="#M000016">new</a> | |
109 | - | |
110 | - </div> | |
111 | - </div> | |
112 | - | |
113 | - </div> | |
114 | - | |
115 | - <!-- if includes --> | |
116 | - | |
117 | - <div id="section"> | |
118 | - | |
119 | - | |
120 | - | |
121 | - <div id="attribute-list"> | |
122 | - <h3 class="section-bar">Attributes</h3> | |
123 | - | |
124 | - <div class="name-list"> | |
125 | - <table> | |
126 | - | |
127 | - <tr class="top-aligned-row context-row"> | |
128 | - <td class="context-item-name">url</td> | |
129 | - | |
130 | - <td class="context-item-value"> [R] </td> | |
131 | - | |
132 | - <td class="context-item-desc"></td> | |
133 | - </tr> | |
134 | - | |
135 | - </table> | |
136 | - </div> | |
137 | - </div> | |
138 | - | |
139 | - | |
140 | - <!-- if method_list --> | |
141 | - | |
142 | - <div id="methods"> | |
143 | - | |
144 | - <h3 class="section-bar">Public Class methods</h3> | |
145 | - | |
146 | - | |
147 | - <div id="method-M000015" class="method-detail"> | |
148 | - <a name="M000015"></a> | |
149 | - | |
150 | - <div class="method-heading"> | |
151 | - | |
152 | - <a href="WebDocument.src/M000015.html" target="Code" class="method-signature" | |
153 | - onclick="popupCode('WebDocument.src/M000015.html');return false;"> | |
154 | - | |
155 | - <span class="method-name">get_content</span><span class="method-args">(url)</span> | |
156 | - | |
157 | - </a> | |
158 | - | |
159 | - </div> | |
160 | - | |
161 | - <div class="method-description"> | |
162 | - | |
163 | - <p> | |
164 | -Returns the HTML text from the page of a given <tt>url</tt>. | |
165 | -</p> | |
166 | - | |
167 | - </div> | |
168 | - </div> | |
169 | - | |
170 | - | |
171 | - <div id="method-M000016" class="method-detail"> | |
172 | - <a name="M000016"></a> | |
173 | - | |
174 | - <div class="method-heading"> | |
175 | - | |
176 | - <a href="WebDocument.src/M000016.html" target="Code" class="method-signature" | |
177 | - onclick="popupCode('WebDocument.src/M000016.html');return false;"> | |
178 | - | |
179 | - <span class="method-name">new</span><span class="method-args">(url)</span> | |
180 | - | |
181 | - </a> | |
182 | - | |
183 | - </div> | |
184 | - | |
185 | - <div class="method-description"> | |
186 | - | |
187 | - <p> | |
188 | -<a href="WebDocument.html">WebDocument</a> constructor, the content of the | |
189 | -<a href="Document.html">Document</a> is the HTML page without the tags. | |
190 | -</p> | |
191 | - | |
192 | - </div> | |
193 | - </div> | |
194 | - | |
195 | - | |
196 | - | |
197 | - </div> | |
198 | - | |
199 | - | |
200 | - | |
201 | - | |
202 | - </div> | |
203 | - | |
204 | -<div id="validator-badges"> | |
205 | - <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
206 | -</div> | |
207 | - | |
208 | -</body> | |
209 | -</html> |
doc/classes/Rir/WebDocument.src/M000015.html
1 | -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | -<head> | |
5 | - <title>get_content (Rir::WebDocument)</title> | |
6 | - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | -</head> | |
9 | -<body class="standalone-code"> | |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 105</span> | |
11 | - <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>) | |
12 | - <span class="ruby-identifier">require</span> <span class="ruby-value str">'net/http'</span> | |
13 | - <span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>(<span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span>(<span class="ruby-identifier">url</span>)) | |
14 | - <span class="ruby-keyword kw">end</span></pre> | |
15 | -</body> | |
16 | -</html> |
doc/classes/Rir/WebDocument.src/M000016.html
1 | -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | -<head> | |
5 | - <title>new (Rir::WebDocument)</title> | |
6 | - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | |
8 | -</head> | |
9 | -<body class="standalone-code"> | |
10 | - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 112</span> | |
11 | - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">url</span>) | |
12 | - <span class="ruby-ivar">@url</span> = <span class="ruby-identifier">url</span> | |
13 | - <span class="ruby-keyword kw">super</span> <span class="ruby-constant">WebDocument</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>).<span class="ruby-identifier">strip_javascripts</span>.<span class="ruby-identifier">strip_stylesheets</span>.<span class="ruby-identifier">strip_xml_tags</span> | |
14 | - <span class="ruby-keyword kw">end</span></pre> | |
15 | -</body> | |
16 | -</html> |
doc/classes/Rir/WikipediaPage.html
1 | -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | |
2 | -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | |
3 | -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | |
4 | -<head> | |
5 | - <title>Class: Rir::WikipediaPage [RDoc Documentation]</title> | |
6 | - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | |
7 | - <meta http-equiv="Content-Script-Type" content="text/javascript" /> | |
8 | - <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" /> | |
9 | - <script type="text/javascript"> | |
10 | - // <![CDATA[ | |
11 | - | |
12 | - function popupCode( url ) { | |
13 | - window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | |
14 | - } | |
15 | - | |
16 | - function toggleCode( id ) { | |
17 | - if ( document.getElementById ) | |
18 | - elem = document.getElementById( id ); | |
19 | - else if ( document.all ) | |
20 | - elem = eval( "document.all." + id ); | |
21 | - else | |
22 | - return false; | |
23 | - | |
24 | - elemStyle = elem.style; | |
25 | - | |
26 | - if ( elemStyle.display != "block" ) { | |
27 | - elemStyle.display = "block" | |
28 | - } else { | |
29 | - elemStyle.display = "none" | |
30 | - } | |
31 | - | |
32 | - return true; | |
33 | - } | |
34 | - | |
35 | - // Make codeblocks hidden by default | |
36 | - document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | |
37 | - | |
38 | - // ]]> | |
39 | - </script> | |
40 | - | |
41 | -</head> | |
42 | -<body> | |
43 | - | |
44 | - | |
45 | - <div id="classHeader"> | |
46 | - <table class="header-table"> | |
47 | - <tr class="top-aligned-row"> | |
48 | - <td><strong>Class</strong></td> | |
49 | - <td class="class-name-in-header">Rir::WikipediaPage</td> | |
50 | - </tr> | |
51 | - <tr class="top-aligned-row"> | |
52 | - <td><strong>In:</strong></td> | |
53 | - <td> | |
54 | - | |
55 | - | |
56 | - <a href="../../files/lib/rir/document_rb.html"> | |
57 | - | |
58 | - lib/rir/document.rb | |
59 | - | |
60 | - </a> | |
61 | - | |
62 | - | |
63 | - <br /> | |
64 | - | |
65 | - </td> | |
66 | - </tr> | |
67 | - | |
68 | - | |
69 | - <tr class="top-aligned-row"> | |
70 | - <td><strong>Parent:</strong></td> | |
71 | - <td> | |
72 | - | |
73 | - <a href="WebDocument.html"> | |
74 | - | |
75 | - Rir::WebDocument | |
76 | - | |
77 | - </a> | |
78 | - | |
79 | - </td> | |
80 | - </tr> | |
81 | - | |
82 | - </table> | |
83 | - </div> | |
84 | - <!-- banner header --> | |
85 | - | |
86 | - <div id="bodyContent"> | |
87 | - | |
88 | - <div id="contextContent"> | |
89 | - | |
90 | - <div id="description"> | |
91 | - <p> | |
92 | -A <a href="WikipediaPage.html">WikipediaPage</a> is a <a | |
93 | -href="WebDocument.html">WebDocument</a>. | |
94 | -</p> | |
95 | - | |
96 | - </div> | |
97 | - | |
98 | - </div> | |
99 | - | |
100 | - | |
101 | - </div> | |
102 | - | |
103 | - <!-- if includes --> | |
104 | - | |
105 | - <div id="section"> | |
106 | - | |
107 | - | |
108 | - | |
109 | - | |
110 | - <!-- if method_list --> | |
111 | - | |
112 | - | |
113 | - | |
114 | - | |
115 | - </div> | |
116 | - | |
117 | -<div id="validator-badges"> | |
118 | - <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | |
119 | -</div> | |
120 | - | |
121 | -</body> | |
122 | -</html> |