Commit 35f45ab54d40489d2fe5d4fc5d39c40290635dea

Authored by Romain Deveaud
1 parent 7043da90bf
Exists in master

changing the main module name, generating RDoc

Showing 22 changed files with 935 additions and 935 deletions Side-by-side Diff

doc/classes/RIR.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Module: RIR [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Module</strong></td>
  49 + <td class="class-name-in-header">RIR</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../files/lib/rir/string_rb.html">
  57 +
  58 + lib/rir/string.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 +
  66 + <a href="../files/lib/rir/document_rb.html">
  67 +
  68 + lib/rir/document.rb
  69 +
  70 + </a>
  71 +
  72 +
  73 + <br />
  74 +
  75 + </td>
  76 + </tr>
  77 +
  78 +
  79 + </table>
  80 + </div>
  81 + <!-- banner header -->
  82 +
  83 + <div id="bodyContent">
  84 +
  85 + <div id="contextContent">
  86 +
  87 + <div id="description">
  88 + <p>
  89 +General module for many purposes related to Information Retrieval.
  90 +</p>
  91 +<hr size="1"></hr><p>
  92 +General module for many purposes related to Information Retrieval.
  93 +</p>
  94 +
  95 + </div>
  96 +
  97 + </div>
  98 +
  99 +
  100 + </div>
  101 +
  102 + <!-- if includes -->
  103 +
  104 + <div id="section">
  105 +
  106 + <div id="class-list">
  107 + <h3 class="section-bar">Classes and Modules</h3>
  108 +
  109 + Class <a href="RIR/Document.html" class="link">RIR::Document</a><br />
  110 +Class <a href="RIR/WebDocument.html" class="link">RIR::WebDocument</a><br />
  111 +Class <a href="RIR/WikipediaPage.html" class="link">RIR::WikipediaPage</a><br />
  112 +
  113 + </div>
  114 +
  115 + <div id="constants-list">
  116 + <h3 class="section-bar">Constants</h3>
  117 +
  118 + <div class="name-list">
  119 + <table summary="Constants">
  120 +
  121 + <tr class="top-aligned-row context-row">
  122 + <td class="context-item-name">Stoplist</td>
  123 + <td>=</td>
  124 + <td class="context-item-value">[ &quot;a&quot;, &quot;anything&quot;, &quot;anyway&quot;, &quot;anywhere&quot;, &quot;apart&quot;, &quot;are&quot;, &quot;around&quot;, &quot;as&quot;, &quot;at&quot;, &quot;av&quot;, &quot;be&quot;, &quot;became&quot;, &quot;because&quot;, &quot;become&quot;, &quot;becomes&quot;, &quot;becoming&quot;, &quot;been&quot;, &quot;before&quot;, &quot;beforehand&quot;, &quot;behind&quot;, &quot;being&quot;, &quot;below&quot;, &quot;beside&quot;, &quot;besides&quot;, &quot;between&quot;, &quot;beyond&quot;, &quot;both&quot;, &quot;but&quot;, &quot;by&quot;, &quot;can&quot;, &quot;cannot&quot;, &quot;canst&quot;, &quot;certain&quot;, &quot;cf&quot;, &quot;choose&quot;, &quot;contrariwise&quot;, &quot;cos&quot;, &quot;could&quot;, &quot;cu&quot;, &quot;day&quot;, &quot;do&quot;, &quot;does&quot;, &quot;doesn't&quot;, &quot;doing&quot;, &quot;dost&quot;, &quot;doth&quot;, &quot;double&quot;, &quot;down&quot;, &quot;dual&quot;, &quot;during&quot;, &quot;each&quot;, &quot;either&quot;, &quot;else&quot;, &quot;elsewhere&quot;, &quot;enough&quot;, &quot;et&quot;, &quot;etc&quot;, &quot;even&quot;, &quot;ever&quot;, &quot;every&quot;, &quot;everybody&quot;, &quot;everyone&quot;, &quot;everything&quot;, &quot;everywhere&quot;, &quot;except&quot;, &quot;excepted&quot;, &quot;excepting&quot;, &quot;exception&quot;, &quot;exclude&quot;, &quot;excluding&quot;, &quot;exclusive&quot;, &quot;far&quot;, &quot;farther&quot;, &quot;farthest&quot;, &quot;few&quot;, &quot;ff&quot;, &quot;first&quot;, &quot;for&quot;, &quot;formerly&quot;, &quot;forth&quot;, &quot;forward&quot;, &quot;from&quot;, &quot;front&quot;, &quot;further&quot;, &quot;furthermore&quot;, &quot;furthest&quot;, &quot;get&quot;, &quot;go&quot;, &quot;had&quot;, &quot;halves&quot;, &quot;hardly&quot;, &quot;has&quot;, &quot;hast&quot;, &quot;hath&quot;, &quot;have&quot;, &quot;he&quot;, &quot;hence&quot;, &quot;henceforth&quot;, &quot;her&quot;, &quot;here&quot;, &quot;hereabouts&quot;, &quot;hereafter&quot;, &quot;hereby&quot;, &quot;herein&quot;, &quot;hereto&quot;, &quot;hereupon&quot;, &quot;hers&quot;, &quot;herself&quot;, &quot;him&quot;, &quot;himself&quot;, &quot;hindmost&quot;, &quot;his&quot;, &quot;hither&quot;, &quot;hitherto&quot;, &quot;how&quot;, &quot;however&quot;, &quot;howsoever&quot;, &quot;i&quot;, &quot;ie&quot;, &quot;if&quot;, &quot;in&quot;, &quot;inasmuch&quot;, &quot;inc&quot;, &quot;include&quot;, &quot;included&quot;, &quot;including&quot;, &quot;indeed&quot;, &quot;indoors&quot;, &quot;inside&quot;, &quot;insomuch&quot;, &quot;instead&quot;, &quot;into&quot;, &quot;inward&quot;, &quot;inwards&quot;, &quot;is&quot;, &quot;it&quot;, &quot;its&quot;, &quot;itself&quot;, &quot;just&quot;, &quot;kind&quot;, &quot;kg&quot;, &quot;km&quot;, &quot;last&quot;, &quot;latter&quot;, &quot;latterly&quot;, &quot;less&quot;, &quot;lest&quot;, &quot;let&quot;, &quot;like&quot;, &quot;little&quot;, &quot;ltd&quot;, &quot;many&quot;, &quot;may&quot;, &quot;maybe&quot;, &quot;me&quot;, &quot;meantime&quot;, &quot;meanwhile&quot;, &quot;might&quot;, &quot;moreover&quot;, &quot;most&quot;, &quot;mostly&quot;, &quot;more&quot;, &quot;mr&quot;, &quot;mrs&quot;, &quot;ms&quot;, &quot;much&quot;, &quot;must&quot;, &quot;my&quot;, &quot;myself&quot;, &quot;namely&quot;, &quot;need&quot;, &quot;neither&quot;, &quot;never&quot;, &quot;nevertheless&quot;, &quot;next&quot;, &quot;no&quot;, &quot;nobody&quot;, &quot;none&quot;, &quot;nonetheless&quot;, &quot;noone&quot;, &quot;nope&quot;, &quot;nor&quot;, &quot;not&quot;, &quot;nothing&quot;, &quot;notwithstanding&quot;, &quot;now&quot;, &quot;nowadays&quot;, &quot;nowhere&quot;, &quot;of&quot;, &quot;off&quot;, &quot;often&quot;, &quot;ok&quot;, &quot;on&quot;, &quot;once&quot;, &quot;one&quot;, &quot;only&quot;, &quot;onto&quot;, &quot;or&quot;, &quot;other&quot;, &quot;others&quot;, &quot;otherwise&quot;, &quot;ought&quot;, &quot;our&quot;, &quot;ours&quot;, &quot;ourselves&quot;, &quot;out&quot;, &quot;outside&quot;, &quot;over&quot;, &quot;own&quot;, &quot;per&quot;, &quot;perhaps&quot;, &quot;plenty&quot;, &quot;provide&quot;, &quot;quite&quot;, &quot;rather&quot;, &quot;really&quot;, &quot;round&quot;, &quot;said&quot;, &quot;sake&quot;, &quot;same&quot;, &quot;sang&quot;, &quot;save&quot;, &quot;saw&quot;, &quot;see&quot;, &quot;seeing&quot;, &quot;seem&quot;, &quot;seemed&quot;, &quot;seeming&quot;, &quot;seems&quot;, &quot;seen&quot;, &quot;seldom&quot;, &quot;selves&quot;, &quot;sent&quot;, &quot;several&quot;, &quot;shalt&quot;, &quot;she&quot;, &quot;should&quot;, &quot;shown&quot;, &quot;sideways&quot;, &quot;since&quot;, &quot;slept&quot;, &quot;slew&quot;, &quot;slung&quot;, &quot;slunk&quot;, &quot;smote&quot;, &quot;so&quot;, &quot;some&quot;, &quot;somebody&quot;, &quot;somehow&quot;, &quot;someone&quot;, &quot;something&quot;, &quot;sometime&quot;, &quot;sometimes&quot;, &quot;somewhat&quot;, &quot;somewhere&quot;, &quot;spake&quot;, &quot;spat&quot;, &quot;spoke&quot;, &quot;spoken&quot;, &quot;sprang&quot;, &quot;sprung&quot;, &quot;stave&quot;, &quot;staves&quot;, &quot;still&quot;, &quot;such&quot;, &quot;supposing&quot;, &quot;than&quot;, &quot;that&quot;, &quot;the&quot;, &quot;thee&quot;, &quot;their&quot;, &quot;them&quot;, &quot;themselves&quot;, &quot;then&quot;, &quot;thence&quot;, &quot;thenceforth&quot;, &quot;there&quot;, &quot;thereabout&quot;, &quot;thereabouts&quot;, &quot;thereafter&quot;, &quot;thereby&quot;, &quot;therefore&quot;, &quot;therein&quot;, &quot;thereof&quot;, &quot;thereon&quot;, &quot;thereto&quot;, &quot;thereupon&quot;, &quot;these&quot;, &quot;they&quot;, &quot;this&quot;, &quot;those&quot;, &quot;thou&quot;, &quot;though&quot;, &quot;thrice&quot;, &quot;through&quot;, &quot;throughout&quot;, &quot;thru&quot;, &quot;thus&quot;, &quot;thy&quot;, &quot;thyself&quot;, &quot;till&quot;, &quot;to&quot;, &quot;together&quot;, &quot;too&quot;, &quot;toward&quot;, &quot;towards&quot;, &quot;ugh&quot;, &quot;unable&quot;, &quot;under&quot;, &quot;underneath&quot;, &quot;unless&quot;, &quot;unlike&quot;, &quot;until&quot;, &quot;up&quot;, &quot;upon&quot;, &quot;upward&quot;, &quot;upwards&quot;, &quot;us&quot;, &quot;use&quot;, &quot;used&quot;, &quot;using&quot;, &quot;very&quot;, &quot;via&quot;, &quot;vs&quot;, &quot;want&quot;, &quot;was&quot;, &quot;we&quot;, &quot;week&quot;, &quot;well&quot;, &quot;were&quot;, &quot;what&quot;, &quot;whatever&quot;, &quot;whatsoever&quot;, &quot;when&quot;, &quot;whence&quot;, &quot;whenever&quot;, &quot;whensoever&quot;, &quot;where&quot;, &quot;whereabouts&quot;, &quot;whereafter&quot;, &quot;whereas&quot;, &quot;whereat&quot;, &quot;whereby&quot;, &quot;wherefore&quot;, &quot;wherefrom&quot;, &quot;wherein&quot;, &quot;whereinto&quot;, &quot;whereof&quot;, &quot;whereon&quot;, &quot;wheresoever&quot;, &quot;whereto&quot;, &quot;whereunto&quot;, &quot;whereupon&quot;, &quot;wherever&quot;, &quot;wherewith&quot;, &quot;whether&quot;, &quot;whew&quot;, &quot;which&quot;, &quot;whichever&quot;, &quot;whichsoever&quot;, &quot;while&quot;, &quot;whilst&quot;, &quot;whither&quot;, &quot;who&quot;, &quot;whoa&quot;, &quot;whoever&quot;, &quot;whole&quot;, &quot;whom&quot;, &quot;whomever&quot;, &quot;whomsoever&quot;, &quot;whose&quot;, &quot;whosoever&quot;, &quot;why&quot;, &quot;will&quot;, &quot;wilt&quot;, &quot;with&quot;, &quot;within&quot;, &quot;without&quot;, &quot;worse&quot;, &quot;worst&quot;, &quot;would&quot;, &quot;wow&quot;, &quot;ye&quot;, &quot;yet&quot;, &quot;year&quot;, &quot;yippee&quot;, &quot;you&quot;, &quot;your&quot;, &quot;yours&quot;, &quot;yourself&quot;, &quot;yourselves&quot; ]</td>
  125 +
  126 + <td>&nbsp;</td>
  127 + <td class="context-item-desc">
  128 +These are the default stopwords provided by Lemur.
  129 +
  130 +</td>
  131 +
  132 + </tr>
  133 +
  134 + </table>
  135 + </div>
  136 + </div>
  137 +
  138 +
  139 +
  140 +
  141 + <!-- if method_list -->
  142 +
  143 +
  144 +
  145 +
  146 + </div>
  147 +
  148 +<div id="validator-badges">
  149 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  150 +</div>
  151 +
  152 +</body>
  153 +</html>
doc/classes/RIR/Document.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Class: RIR::Document [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Class</strong></td>
  49 + <td class="class-name-in-header">RIR::Document</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../../files/lib/rir/document_rb.html">
  57 +
  58 + lib/rir/document.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 + </td>
  66 + </tr>
  67 +
  68 +
  69 + <tr class="top-aligned-row">
  70 + <td><strong>Parent:</strong></td>
  71 + <td>
  72 +
  73 + Object
  74 +
  75 + </td>
  76 + </tr>
  77 +
  78 + </table>
  79 + </div>
  80 + <!-- banner header -->
  81 +
  82 + <div id="bodyContent">
  83 +
  84 + <div id="contextContent">
  85 +
  86 + <div id="description">
  87 + <p>
  88 +A <a href="Document.html">Document</a> is a bag of words and is constructed
  89 +from a string.
  90 +</p>
  91 +
  92 + </div>
  93 +
  94 + </div>
  95 +
  96 +
  97 + <div id="method-list">
  98 + <h3 class="section-bar">Methods</h3>
  99 +
  100 + <div class="name-list">
  101 +
  102 + <a href="#M000012">count_words</a>&nbsp;&nbsp;
  103 +
  104 + <a href="#M000013">entropy</a>&nbsp;&nbsp;
  105 +
  106 + <a href="#M000010">format_words</a>&nbsp;&nbsp;
  107 +
  108 + <a href="#M000014">new</a>&nbsp;&nbsp;
  109 +
  110 + <a href="#M000011">ngrams</a>&nbsp;&nbsp;
  111 +
  112 + </div>
  113 + </div>
  114 +
  115 + </div>
  116 +
  117 + <!-- if includes -->
  118 +
  119 + <div id="section">
  120 +
  121 +
  122 +
  123 + <div id="attribute-list">
  124 + <h3 class="section-bar">Attributes</h3>
  125 +
  126 + <div class="name-list">
  127 + <table>
  128 +
  129 + <tr class="top-aligned-row context-row">
  130 + <td class="context-item-name">doc_content</td>
  131 +
  132 + <td class="context-item-value">&nbsp;[R]&nbsp;</td>
  133 +
  134 + <td class="context-item-desc"></td>
  135 + </tr>
  136 +
  137 + <tr class="top-aligned-row context-row">
  138 + <td class="context-item-name">words</td>
  139 +
  140 + <td class="context-item-value">&nbsp;[R]&nbsp;</td>
  141 +
  142 + <td class="context-item-desc"></td>
  143 + </tr>
  144 +
  145 + </table>
  146 + </div>
  147 + </div>
  148 +
  149 +
  150 + <!-- if method_list -->
  151 +
  152 + <div id="methods">
  153 +
  154 + <h3 class="section-bar">Public Class methods</h3>
  155 +
  156 +
  157 + <div id="method-M000014" class="method-detail">
  158 + <a name="M000014"></a>
  159 +
  160 + <div class="method-heading">
  161 +
  162 + <a href="Document.src/M000014.html" target="Code" class="method-signature"
  163 + onclick="popupCode('Document.src/M000014.html');return false;">
  164 +
  165 + <span class="method-name">new</span><span class="method-args">(content)</span>
  166 +
  167 + </a>
  168 +
  169 + </div>
  170 +
  171 + <div class="method-description">
  172 +
  173 + </div>
  174 + </div>
  175 +
  176 +
  177 + <h3 class="section-bar">Public Instance methods</h3>
  178 +
  179 +
  180 + <div id="method-M000012" class="method-detail">
  181 + <a name="M000012"></a>
  182 +
  183 + <div class="method-heading">
  184 +
  185 + <a href="Document.src/M000012.html" target="Code" class="method-signature"
  186 + onclick="popupCode('Document.src/M000012.html');return false;">
  187 +
  188 + <span class="method-name">count_words</span><span class="method-args">()</span>
  189 +
  190 + </a>
  191 +
  192 + </div>
  193 +
  194 + <div class="method-description">
  195 +
  196 + <p>
  197 +Returns a Hash containing the words and their associated counts in the
  198 +current <a href="Document.html">Document</a>.
  199 +</p>
  200 +<pre>
  201 + count_words #=&gt; { &quot;guitar&quot;=&gt;1, &quot;bass&quot;=&gt;3, &quot;album&quot;=&gt;20, ... }
  202 +</pre>
  203 +
  204 + </div>
  205 + </div>
  206 +
  207 +
  208 + <div id="method-M000013" class="method-detail">
  209 + <a name="M000013"></a>
  210 +
  211 + <div class="method-heading">
  212 +
  213 + <a href="Document.src/M000013.html" target="Code" class="method-signature"
  214 + onclick="popupCode('Document.src/M000013.html');return false;">
  215 +
  216 + <span class="method-name">entropy</span><span class="method-args">(s)</span>
  217 +
  218 + </a>
  219 +
  220 + </div>
  221 +
  222 + <div class="method-description">
  223 +
  224 + <p>
  225 +Computes the entropy of a given string <tt>s</tt> inside the document.
  226 +</p>
  227 +<p>
  228 +If the string parameter is composed of many words (i.e. tokens separated by
  229 +whitespace(s)), it is considered as an ngram.
  230 +</p>
  231 +<pre>
  232 + entropy(&quot;guitar&quot;) #=&gt; 0.00389919463243839
  233 +</pre>
  234 +
  235 + </div>
  236 + </div>
  237 +
  238 +
  239 + <div id="method-M000011" class="method-detail">
  240 + <a name="M000011"></a>
  241 +
  242 + <div class="method-heading">
  243 +
  244 + <a href="Document.src/M000011.html" target="Code" class="method-signature"
  245 + onclick="popupCode('Document.src/M000011.html');return false;">
  246 +
  247 + <span class="method-name">ngrams</span><span class="method-args">(n)</span>
  248 +
  249 + </a>
  250 +
  251 + </div>
  252 +
  253 + <div class="method-description">
  254 +
  255 + <p>
  256 +Returns an Array containing the <tt>n</tt>-grams (words) from the current
  257 +<a href="Document.html">Document</a>.
  258 +</p>
  259 +<pre>
  260 + ngrams(2) #=&gt; [&quot;the free&quot;, &quot;free encyclopedia&quot;, &quot;encyclopedia var&quot;, &quot;var skin&quot;, ...]
  261 +</pre>
  262 +
  263 + </div>
  264 + </div>
  265 +
  266 +
  267 + <h3 class="section-bar">Protected Instance methods</h3>
  268 +
  269 +
  270 + <div id="method-M000010" class="method-detail">
  271 + <a name="M000010"></a>
  272 +
  273 + <div class="method-heading">
  274 +
  275 + <a href="Document.src/M000010.html" target="Code" class="method-signature"
  276 + onclick="popupCode('Document.src/M000010.html');return false;">
  277 +
  278 + <span class="method-name">format_words</span><span class="method-args">()</span>
  279 +
  280 + </a>
  281 +
  282 + </div>
  283 +
  284 + <div class="method-description">
  285 +
  286 + <p>
  287 +Any non-word characters are removed from the words (see <a
  288 +href="http://perldoc.perl.org/perlre.html">perldoc.perl.org/perlre.html</a>
  289 +and the W special escape).
  290 +</p>
  291 +<p>
  292 +Protected function, only meant to by called at the initialization.
  293 +</p>
  294 +
  295 + </div>
  296 + </div>
  297 +
  298 +
  299 +
  300 + </div>
  301 +
  302 +
  303 +
  304 +
  305 + </div>
  306 +
  307 +<div id="validator-badges">
  308 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  309 +</div>
  310 +
  311 +</body>
  312 +</html>
doc/classes/RIR/Document.src/M000010.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>format_words (RIR::Document)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 31</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">format_words</span>
  12 + <span class="ruby-identifier">wo</span> = []
  13 +
  14 + <span class="ruby-ivar">@doc_content</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
  15 + <span class="ruby-identifier">w</span>.<span class="ruby-identifier">split</span>(<span class="ruby-regexp re">/\W/</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sw</span><span class="ruby-operator">|</span>
  16 + <span class="ruby-identifier">wo</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">sw</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">sw</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[a-zA-Z]/</span>
  17 + <span class="ruby-keyword kw">end</span>
  18 + <span class="ruby-keyword kw">end</span>
  19 +
  20 + <span class="ruby-identifier">wo</span>
  21 + <span class="ruby-keyword kw">end</span></pre>
  22 +</body>
  23 +</html>
doc/classes/RIR/Document.src/M000011.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>ngrams (RIR::Document)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 46</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">ngrams</span>(<span class="ruby-identifier">n</span>)
  12 + <span class="ruby-identifier">window</span> = []
  13 + <span class="ruby-identifier">ngrams_array</span> = []
  14 +
  15 + <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
  16 + <span class="ruby-identifier">window</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">w</span>)
  17 + <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">n</span>
  18 + <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">&quot; &quot;</span>)
  19 + <span class="ruby-identifier">window</span>.<span class="ruby-identifier">delete_at</span>(<span class="ruby-value">0</span>)
  20 + <span class="ruby-keyword kw">end</span>
  21 + <span class="ruby-keyword kw">end</span>
  22 +
  23 + <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">uniq</span>
  24 + <span class="ruby-keyword kw">end</span></pre>
  25 +</body>
  26 +</html>
doc/classes/RIR/Document.src/M000012.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>count_words (RIR::Document)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 64</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">count_words</span>
  12 + <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">h</span>,<span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">h</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-value">0</span> }
  13 + <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>.<span class="ruby-identifier">downcase</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> }
  14 +
  15 + <span class="ruby-identifier">counts</span>
  16 + <span class="ruby-keyword kw">end</span></pre>
  17 +</body>
  18 +</html>
doc/classes/RIR/Document.src/M000013.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>entropy (RIR::Document)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 77</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">entropy</span>(<span class="ruby-identifier">s</span>)
  12 + <span class="ruby-identifier">en</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span>
  13 + <span class="ruby-identifier">counts</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span>
  14 +
  15 + <span class="ruby-identifier">s</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
  16 + <span class="ruby-identifier">p_wi</span> = <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span>
  17 + <span class="ruby-identifier">en</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">p_wi</span><span class="ruby-operator">*</span><span class="ruby-constant">Math</span>.<span class="ruby-identifier">log2</span>(<span class="ruby-identifier">p_wi</span>)
  18 + <span class="ruby-keyword kw">end</span>
  19 +
  20 + <span class="ruby-identifier">en</span> <span class="ruby-operator">*=</span> <span class="ruby-value">-1</span>
  21 + <span class="ruby-identifier">en</span>
  22 + <span class="ruby-keyword kw">end</span></pre>
  23 +</body>
  24 +</html>
doc/classes/RIR/Document.src/M000014.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>new (RIR::Document)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 92</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">content</span>)
  12 + <span class="ruby-ivar">@doc_content</span> = <span class="ruby-identifier">content</span>
  13 + <span class="ruby-ivar">@words</span> = <span class="ruby-identifier">format_words</span>
  14 + <span class="ruby-keyword kw">end</span></pre>
  15 +</body>
  16 +</html>
doc/classes/RIR/WebDocument.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Class: RIR::WebDocument [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Class</strong></td>
  49 + <td class="class-name-in-header">RIR::WebDocument</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../../files/lib/rir/document_rb.html">
  57 +
  58 + lib/rir/document.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 + </td>
  66 + </tr>
  67 +
  68 +
  69 + <tr class="top-aligned-row">
  70 + <td><strong>Parent:</strong></td>
  71 + <td>
  72 +
  73 + <a href="Document.html">
  74 +
  75 + RIR::Document
  76 +
  77 + </a>
  78 +
  79 + </td>
  80 + </tr>
  81 +
  82 + </table>
  83 + </div>
  84 + <!-- banner header -->
  85 +
  86 + <div id="bodyContent">
  87 +
  88 + <div id="contextContent">
  89 +
  90 + <div id="description">
  91 + <p>
  92 +A <a href="WebDocument.html">WebDocument</a> is a <a
  93 +href="Document.html">Document</a> with a <tt>url</tt>.
  94 +</p>
  95 +
  96 + </div>
  97 +
  98 + </div>
  99 +
  100 +
  101 + <div id="method-list">
  102 + <h3 class="section-bar">Methods</h3>
  103 +
  104 + <div class="name-list">
  105 +
  106 + <a href="#M000015">get_content</a>&nbsp;&nbsp;
  107 +
  108 + <a href="#M000016">new</a>&nbsp;&nbsp;
  109 +
  110 + </div>
  111 + </div>
  112 +
  113 + </div>
  114 +
  115 + <!-- if includes -->
  116 +
  117 + <div id="section">
  118 +
  119 +
  120 +
  121 + <div id="attribute-list">
  122 + <h3 class="section-bar">Attributes</h3>
  123 +
  124 + <div class="name-list">
  125 + <table>
  126 +
  127 + <tr class="top-aligned-row context-row">
  128 + <td class="context-item-name">url</td>
  129 +
  130 + <td class="context-item-value">&nbsp;[R]&nbsp;</td>
  131 +
  132 + <td class="context-item-desc"></td>
  133 + </tr>
  134 +
  135 + </table>
  136 + </div>
  137 + </div>
  138 +
  139 +
  140 + <!-- if method_list -->
  141 +
  142 + <div id="methods">
  143 +
  144 + <h3 class="section-bar">Public Class methods</h3>
  145 +
  146 +
  147 + <div id="method-M000015" class="method-detail">
  148 + <a name="M000015"></a>
  149 +
  150 + <div class="method-heading">
  151 +
  152 + <a href="WebDocument.src/M000015.html" target="Code" class="method-signature"
  153 + onclick="popupCode('WebDocument.src/M000015.html');return false;">
  154 +
  155 + <span class="method-name">get_content</span><span class="method-args">(url)</span>
  156 +
  157 + </a>
  158 +
  159 + </div>
  160 +
  161 + <div class="method-description">
  162 +
  163 + <p>
  164 +Returns the HTML text from the page of a given <tt>url</tt>.
  165 +</p>
  166 +
  167 + </div>
  168 + </div>
  169 +
  170 +
  171 + <div id="method-M000016" class="method-detail">
  172 + <a name="M000016"></a>
  173 +
  174 + <div class="method-heading">
  175 +
  176 + <a href="WebDocument.src/M000016.html" target="Code" class="method-signature"
  177 + onclick="popupCode('WebDocument.src/M000016.html');return false;">
  178 +
  179 + <span class="method-name">new</span><span class="method-args">(url)</span>
  180 +
  181 + </a>
  182 +
  183 + </div>
  184 +
  185 + <div class="method-description">
  186 +
  187 + <p>
  188 +<a href="WebDocument.html">WebDocument</a> constructor, the content of the
  189 +<a href="Document.html">Document</a> is the HTML page without the tags.
  190 +</p>
  191 +
  192 + </div>
  193 + </div>
  194 +
  195 +
  196 +
  197 + </div>
  198 +
  199 +
  200 +
  201 +
  202 + </div>
  203 +
  204 +<div id="validator-badges">
  205 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  206 +</div>
  207 +
  208 +</body>
  209 +</html>
doc/classes/RIR/WebDocument.src/M000015.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>get_content (RIR::WebDocument)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 105</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>)
  12 + <span class="ruby-identifier">require</span> <span class="ruby-value str">'net/http'</span>
  13 + <span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>(<span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span>(<span class="ruby-identifier">url</span>))
  14 + <span class="ruby-keyword kw">end</span></pre>
  15 +</body>
  16 +</html>
doc/classes/RIR/WebDocument.src/M000016.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>new (RIR::WebDocument)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 112</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">url</span>)
  12 + <span class="ruby-ivar">@url</span> = <span class="ruby-identifier">url</span>
  13 + <span class="ruby-keyword kw">super</span> <span class="ruby-constant">WebDocument</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>).<span class="ruby-identifier">strip_javascripts</span>.<span class="ruby-identifier">strip_stylesheets</span>.<span class="ruby-identifier">strip_xml_tags</span>
  14 + <span class="ruby-keyword kw">end</span></pre>
  15 +</body>
  16 +</html>
doc/classes/RIR/WikipediaPage.html
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Class: RIR::WikipediaPage [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Class</strong></td>
  49 + <td class="class-name-in-header">RIR::WikipediaPage</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../../files/lib/rir/document_rb.html">
  57 +
  58 + lib/rir/document.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 + </td>
  66 + </tr>
  67 +
  68 +
  69 + <tr class="top-aligned-row">
  70 + <td><strong>Parent:</strong></td>
  71 + <td>
  72 +
  73 + <a href="WebDocument.html">
  74 +
  75 + RIR::WebDocument
  76 +
  77 + </a>
  78 +
  79 + </td>
  80 + </tr>
  81 +
  82 + </table>
  83 + </div>
  84 + <!-- banner header -->
  85 +
  86 + <div id="bodyContent">
  87 +
  88 + <div id="contextContent">
  89 +
  90 + <div id="description">
  91 + <p>
  92 +A <a href="WikipediaPage.html">WikipediaPage</a> is a <a
  93 +href="WebDocument.html">WebDocument</a>.
  94 +</p>
  95 +
  96 + </div>
  97 +
  98 + </div>
  99 +
  100 +
  101 + </div>
  102 +
  103 + <!-- if includes -->
  104 +
  105 + <div id="section">
  106 +
  107 +
  108 +
  109 +
  110 + <!-- if method_list -->
  111 +
  112 +
  113 +
  114 +
  115 + </div>
  116 +
  117 +<div id="validator-badges">
  118 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  119 +</div>
  120 +
  121 +</body>
  122 +</html>
doc/classes/Rir.html
1   -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2   -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3   -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4   -<head>
5   - <title>Module: Rir [RDoc Documentation]</title>
6   - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7   - <meta http-equiv="Content-Script-Type" content="text/javascript" />
8   - <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
9   - <script type="text/javascript">
10   - // <![CDATA[
11   -
12   - function popupCode( url ) {
13   - window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14   - }
15   -
16   - function toggleCode( id ) {
17   - if ( document.getElementById )
18   - elem = document.getElementById( id );
19   - else if ( document.all )
20   - elem = eval( "document.all." + id );
21   - else
22   - return false;
23   -
24   - elemStyle = elem.style;
25   -
26   - if ( elemStyle.display != "block" ) {
27   - elemStyle.display = "block"
28   - } else {
29   - elemStyle.display = "none"
30   - }
31   -
32   - return true;
33   - }
34   -
35   - // Make codeblocks hidden by default
36   - document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37   -
38   - // ]]>
39   - </script>
40   -
41   -</head>
42   -<body>
43   -
44   -
45   - <div id="classHeader">
46   - <table class="header-table">
47   - <tr class="top-aligned-row">
48   - <td><strong>Module</strong></td>
49   - <td class="class-name-in-header">Rir</td>
50   - </tr>
51   - <tr class="top-aligned-row">
52   - <td><strong>In:</strong></td>
53   - <td>
54   -
55   -
56   - <a href="../files/lib/rir/string_rb.html">
57   -
58   - lib/rir/string.rb
59   -
60   - </a>
61   -
62   -
63   - <br />
64   -
65   -
66   - <a href="../files/lib/rir/document_rb.html">
67   -
68   - lib/rir/document.rb
69   -
70   - </a>
71   -
72   -
73   - <br />
74   -
75   - </td>
76   - </tr>
77   -
78   -
79   - </table>
80   - </div>
81   - <!-- banner header -->
82   -
83   - <div id="bodyContent">
84   -
85   - <div id="contextContent">
86   -
87   - <div id="description">
88   - <p>
89   -General module for many purposes related to Information Retrieval.
90   -</p>
91   -<hr size="1"></hr><p>
92   -General module for many purposes related to Information Retrieval.
93   -</p>
94   -
95   - </div>
96   -
97   - </div>
98   -
99   -
100   - </div>
101   -
102   - <!-- if includes -->
103   -
104   - <div id="section">
105   -
106   - <div id="class-list">
107   - <h3 class="section-bar">Classes and Modules</h3>
108   -
109   - Class <a href="Rir/Document.html" class="link">Rir::Document</a><br />
110   -Class <a href="Rir/WebDocument.html" class="link">Rir::WebDocument</a><br />
111   -Class <a href="Rir/WikipediaPage.html" class="link">Rir::WikipediaPage</a><br />
112   -
113   - </div>
114   -
115   - <div id="constants-list">
116   - <h3 class="section-bar">Constants</h3>
117   -
118   - <div class="name-list">
119   - <table summary="Constants">
120   -
121   - <tr class="top-aligned-row context-row">
122   - <td class="context-item-name">Stoplist</td>
123   - <td>=</td>
124   - <td class="context-item-value">[ &quot;a&quot;, &quot;anything&quot;, &quot;anyway&quot;, &quot;anywhere&quot;, &quot;apart&quot;, &quot;are&quot;, &quot;around&quot;, &quot;as&quot;, &quot;at&quot;, &quot;av&quot;, &quot;be&quot;, &quot;became&quot;, &quot;because&quot;, &quot;become&quot;, &quot;becomes&quot;, &quot;becoming&quot;, &quot;been&quot;, &quot;before&quot;, &quot;beforehand&quot;, &quot;behind&quot;, &quot;being&quot;, &quot;below&quot;, &quot;beside&quot;, &quot;besides&quot;, &quot;between&quot;, &quot;beyond&quot;, &quot;both&quot;, &quot;but&quot;, &quot;by&quot;, &quot;can&quot;, &quot;cannot&quot;, &quot;canst&quot;, &quot;certain&quot;, &quot;cf&quot;, &quot;choose&quot;, &quot;contrariwise&quot;, &quot;cos&quot;, &quot;could&quot;, &quot;cu&quot;, &quot;day&quot;, &quot;do&quot;, &quot;does&quot;, &quot;doesn't&quot;, &quot;doing&quot;, &quot;dost&quot;, &quot;doth&quot;, &quot;double&quot;, &quot;down&quot;, &quot;dual&quot;, &quot;during&quot;, &quot;each&quot;, &quot;either&quot;, &quot;else&quot;, &quot;elsewhere&quot;, &quot;enough&quot;, &quot;et&quot;, &quot;etc&quot;, &quot;even&quot;, &quot;ever&quot;, &quot;every&quot;, &quot;everybody&quot;, &quot;everyone&quot;, &quot;everything&quot;, &quot;everywhere&quot;, &quot;except&quot;, &quot;excepted&quot;, &quot;excepting&quot;, &quot;exception&quot;, &quot;exclude&quot;, &quot;excluding&quot;, &quot;exclusive&quot;, &quot;far&quot;, &quot;farther&quot;, &quot;farthest&quot;, &quot;few&quot;, &quot;ff&quot;, &quot;first&quot;, &quot;for&quot;, &quot;formerly&quot;, &quot;forth&quot;, &quot;forward&quot;, &quot;from&quot;, &quot;front&quot;, &quot;further&quot;, &quot;furthermore&quot;, &quot;furthest&quot;, &quot;get&quot;, &quot;go&quot;, &quot;had&quot;, &quot;halves&quot;, &quot;hardly&quot;, &quot;has&quot;, &quot;hast&quot;, &quot;hath&quot;, &quot;have&quot;, &quot;he&quot;, &quot;hence&quot;, &quot;henceforth&quot;, &quot;her&quot;, &quot;here&quot;, &quot;hereabouts&quot;, &quot;hereafter&quot;, &quot;hereby&quot;, &quot;herein&quot;, &quot;hereto&quot;, &quot;hereupon&quot;, &quot;hers&quot;, &quot;herself&quot;, &quot;him&quot;, &quot;himself&quot;, &quot;hindmost&quot;, &quot;his&quot;, &quot;hither&quot;, &quot;hitherto&quot;, &quot;how&quot;, &quot;however&quot;, &quot;howsoever&quot;, &quot;i&quot;, &quot;ie&quot;, &quot;if&quot;, &quot;in&quot;, &quot;inasmuch&quot;, &quot;inc&quot;, &quot;include&quot;, &quot;included&quot;, &quot;including&quot;, &quot;indeed&quot;, &quot;indoors&quot;, &quot;inside&quot;, &quot;insomuch&quot;, &quot;instead&quot;, &quot;into&quot;, &quot;inward&quot;, &quot;inwards&quot;, &quot;is&quot;, &quot;it&quot;, &quot;its&quot;, &quot;itself&quot;, &quot;just&quot;, &quot;kind&quot;, &quot;kg&quot;, &quot;km&quot;, &quot;last&quot;, &quot;latter&quot;, &quot;latterly&quot;, &quot;less&quot;, &quot;lest&quot;, &quot;let&quot;, &quot;like&quot;, &quot;little&quot;, &quot;ltd&quot;, &quot;many&quot;, &quot;may&quot;, &quot;maybe&quot;, &quot;me&quot;, &quot;meantime&quot;, &quot;meanwhile&quot;, &quot;might&quot;, &quot;moreover&quot;, &quot;most&quot;, &quot;mostly&quot;, &quot;more&quot;, &quot;mr&quot;, &quot;mrs&quot;, &quot;ms&quot;, &quot;much&quot;, &quot;must&quot;, &quot;my&quot;, &quot;myself&quot;, &quot;namely&quot;, &quot;need&quot;, &quot;neither&quot;, &quot;never&quot;, &quot;nevertheless&quot;, &quot;next&quot;, &quot;no&quot;, &quot;nobody&quot;, &quot;none&quot;, &quot;nonetheless&quot;, &quot;noone&quot;, &quot;nope&quot;, &quot;nor&quot;, &quot;not&quot;, &quot;nothing&quot;, &quot;notwithstanding&quot;, &quot;now&quot;, &quot;nowadays&quot;, &quot;nowhere&quot;, &quot;of&quot;, &quot;off&quot;, &quot;often&quot;, &quot;ok&quot;, &quot;on&quot;, &quot;once&quot;, &quot;one&quot;, &quot;only&quot;, &quot;onto&quot;, &quot;or&quot;, &quot;other&quot;, &quot;others&quot;, &quot;otherwise&quot;, &quot;ought&quot;, &quot;our&quot;, &quot;ours&quot;, &quot;ourselves&quot;, &quot;out&quot;, &quot;outside&quot;, &quot;over&quot;, &quot;own&quot;, &quot;per&quot;, &quot;perhaps&quot;, &quot;plenty&quot;, &quot;provide&quot;, &quot;quite&quot;, &quot;rather&quot;, &quot;really&quot;, &quot;round&quot;, &quot;said&quot;, &quot;sake&quot;, &quot;same&quot;, &quot;sang&quot;, &quot;save&quot;, &quot;saw&quot;, &quot;see&quot;, &quot;seeing&quot;, &quot;seem&quot;, &quot;seemed&quot;, &quot;seeming&quot;, &quot;seems&quot;, &quot;seen&quot;, &quot;seldom&quot;, &quot;selves&quot;, &quot;sent&quot;, &quot;several&quot;, &quot;shalt&quot;, &quot;she&quot;, &quot;should&quot;, &quot;shown&quot;, &quot;sideways&quot;, &quot;since&quot;, &quot;slept&quot;, &quot;slew&quot;, &quot;slung&quot;, &quot;slunk&quot;, &quot;smote&quot;, &quot;so&quot;, &quot;some&quot;, &quot;somebody&quot;, &quot;somehow&quot;, &quot;someone&quot;, &quot;something&quot;, &quot;sometime&quot;, &quot;sometimes&quot;, &quot;somewhat&quot;, &quot;somewhere&quot;, &quot;spake&quot;, &quot;spat&quot;, &quot;spoke&quot;, &quot;spoken&quot;, &quot;sprang&quot;, &quot;sprung&quot;, &quot;stave&quot;, &quot;staves&quot;, &quot;still&quot;, &quot;such&quot;, &quot;supposing&quot;, &quot;than&quot;, &quot;that&quot;, &quot;the&quot;, &quot;thee&quot;, &quot;their&quot;, &quot;them&quot;, &quot;themselves&quot;, &quot;then&quot;, &quot;thence&quot;, &quot;thenceforth&quot;, &quot;there&quot;, &quot;thereabout&quot;, &quot;thereabouts&quot;, &quot;thereafter&quot;, &quot;thereby&quot;, &quot;therefore&quot;, &quot;therein&quot;, &quot;thereof&quot;, &quot;thereon&quot;, &quot;thereto&quot;, &quot;thereupon&quot;, &quot;these&quot;, &quot;they&quot;, &quot;this&quot;, &quot;those&quot;, &quot;thou&quot;, &quot;though&quot;, &quot;thrice&quot;, &quot;through&quot;, &quot;throughout&quot;, &quot;thru&quot;, &quot;thus&quot;, &quot;thy&quot;, &quot;thyself&quot;, &quot;till&quot;, &quot;to&quot;, &quot;together&quot;, &quot;too&quot;, &quot;toward&quot;, &quot;towards&quot;, &quot;ugh&quot;, &quot;unable&quot;, &quot;under&quot;, &quot;underneath&quot;, &quot;unless&quot;, &quot;unlike&quot;, &quot;until&quot;, &quot;up&quot;, &quot;upon&quot;, &quot;upward&quot;, &quot;upwards&quot;, &quot;us&quot;, &quot;use&quot;, &quot;used&quot;, &quot;using&quot;, &quot;very&quot;, &quot;via&quot;, &quot;vs&quot;, &quot;want&quot;, &quot;was&quot;, &quot;we&quot;, &quot;week&quot;, &quot;well&quot;, &quot;were&quot;, &quot;what&quot;, &quot;whatever&quot;, &quot;whatsoever&quot;, &quot;when&quot;, &quot;whence&quot;, &quot;whenever&quot;, &quot;whensoever&quot;, &quot;where&quot;, &quot;whereabouts&quot;, &quot;whereafter&quot;, &quot;whereas&quot;, &quot;whereat&quot;, &quot;whereby&quot;, &quot;wherefore&quot;, &quot;wherefrom&quot;, &quot;wherein&quot;, &quot;whereinto&quot;, &quot;whereof&quot;, &quot;whereon&quot;, &quot;wheresoever&quot;, &quot;whereto&quot;, &quot;whereunto&quot;, &quot;whereupon&quot;, &quot;wherever&quot;, &quot;wherewith&quot;, &quot;whether&quot;, &quot;whew&quot;, &quot;which&quot;, &quot;whichever&quot;, &quot;whichsoever&quot;, &quot;while&quot;, &quot;whilst&quot;, &quot;whither&quot;, &quot;who&quot;, &quot;whoa&quot;, &quot;whoever&quot;, &quot;whole&quot;, &quot;whom&quot;, &quot;whomever&quot;, &quot;whomsoever&quot;, &quot;whose&quot;, &quot;whosoever&quot;, &quot;why&quot;, &quot;will&quot;, &quot;wilt&quot;, &quot;with&quot;, &quot;within&quot;, &quot;without&quot;, &quot;worse&quot;, &quot;worst&quot;, &quot;would&quot;, &quot;wow&quot;, &quot;ye&quot;, &quot;yet&quot;, &quot;year&quot;, &quot;yippee&quot;, &quot;you&quot;, &quot;your&quot;, &quot;yours&quot;, &quot;yourself&quot;, &quot;yourselves&quot; ]</td>
125   -
126   - <td>&nbsp;</td>
127   - <td class="context-item-desc">
128   -These are the default stopwords provided by Lemur.
129   -
130   -</td>
131   -
132   - </tr>
133   -
134   - </table>
135   - </div>
136   - </div>
137   -
138   -
139   -
140   -
141   - <!-- if method_list -->
142   -
143   -
144   -
145   -
146   - </div>
147   -
148   -<div id="validator-badges">
149   - <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
150   -</div>
151   -
152   -</body>
153   -</html>
doc/classes/Rir/Document.html
1   -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2   -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3   -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4   -<head>
5   - <title>Class: Rir::Document [RDoc Documentation]</title>
6   - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7   - <meta http-equiv="Content-Script-Type" content="text/javascript" />
8   - <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
9   - <script type="text/javascript">
10   - // <![CDATA[
11   -
12   - function popupCode( url ) {
13   - window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14   - }
15   -
16   - function toggleCode( id ) {
17   - if ( document.getElementById )
18   - elem = document.getElementById( id );
19   - else if ( document.all )
20   - elem = eval( "document.all." + id );
21   - else
22   - return false;
23   -
24   - elemStyle = elem.style;
25   -
26   - if ( elemStyle.display != "block" ) {
27   - elemStyle.display = "block"
28   - } else {
29   - elemStyle.display = "none"
30   - }
31   -
32   - return true;
33   - }
34   -
35   - // Make codeblocks hidden by default
36   - document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37   -
38   - // ]]>
39   - </script>
40   -
41   -</head>
42   -<body>
43   -
44   -
45   - <div id="classHeader">
46   - <table class="header-table">
47   - <tr class="top-aligned-row">
48   - <td><strong>Class</strong></td>
49   - <td class="class-name-in-header">Rir::Document</td>
50   - </tr>
51   - <tr class="top-aligned-row">
52   - <td><strong>In:</strong></td>
53   - <td>
54   -
55   -
56   - <a href="../../files/lib/rir/document_rb.html">
57   -
58   - lib/rir/document.rb
59   -
60   - </a>
61   -
62   -
63   - <br />
64   -
65   - </td>
66   - </tr>
67   -
68   -
69   - <tr class="top-aligned-row">
70   - <td><strong>Parent:</strong></td>
71   - <td>
72   -
73   - Object
74   -
75   - </td>
76   - </tr>
77   -
78   - </table>
79   - </div>
80   - <!-- banner header -->
81   -
82   - <div id="bodyContent">
83   -
84   - <div id="contextContent">
85   -
86   - <div id="description">
87   - <p>
88   -A <a href="Document.html">Document</a> is a bag of words and is constructed
89   -from a string.
90   -</p>
91   -
92   - </div>
93   -
94   - </div>
95   -
96   -
97   - <div id="method-list">
98   - <h3 class="section-bar">Methods</h3>
99   -
100   - <div class="name-list">
101   -
102   - <a href="#M000012">count_words</a>&nbsp;&nbsp;
103   -
104   - <a href="#M000013">entropy</a>&nbsp;&nbsp;
105   -
106   - <a href="#M000010">format_words</a>&nbsp;&nbsp;
107   -
108   - <a href="#M000014">new</a>&nbsp;&nbsp;
109   -
110   - <a href="#M000011">ngrams</a>&nbsp;&nbsp;
111   -
112   - </div>
113   - </div>
114   -
115   - </div>
116   -
117   - <!-- if includes -->
118   -
119   - <div id="section">
120   -
121   -
122   -
123   - <div id="attribute-list">
124   - <h3 class="section-bar">Attributes</h3>
125   -
126   - <div class="name-list">
127   - <table>
128   -
129   - <tr class="top-aligned-row context-row">
130   - <td class="context-item-name">doc_content</td>
131   -
132   - <td class="context-item-value">&nbsp;[R]&nbsp;</td>
133   -
134   - <td class="context-item-desc"></td>
135   - </tr>
136   -
137   - <tr class="top-aligned-row context-row">
138   - <td class="context-item-name">words</td>
139   -
140   - <td class="context-item-value">&nbsp;[R]&nbsp;</td>
141   -
142   - <td class="context-item-desc"></td>
143   - </tr>
144   -
145   - </table>
146   - </div>
147   - </div>
148   -
149   -
150   - <!-- if method_list -->
151   -
152   - <div id="methods">
153   -
154   - <h3 class="section-bar">Public Class methods</h3>
155   -
156   -
157   - <div id="method-M000014" class="method-detail">
158   - <a name="M000014"></a>
159   -
160   - <div class="method-heading">
161   -
162   - <a href="Document.src/M000014.html" target="Code" class="method-signature"
163   - onclick="popupCode('Document.src/M000014.html');return false;">
164   -
165   - <span class="method-name">new</span><span class="method-args">(content)</span>
166   -
167   - </a>
168   -
169   - </div>
170   -
171   - <div class="method-description">
172   -
173   - </div>
174   - </div>
175   -
176   -
177   - <h3 class="section-bar">Public Instance methods</h3>
178   -
179   -
180   - <div id="method-M000012" class="method-detail">
181   - <a name="M000012"></a>
182   -
183   - <div class="method-heading">
184   -
185   - <a href="Document.src/M000012.html" target="Code" class="method-signature"
186   - onclick="popupCode('Document.src/M000012.html');return false;">
187   -
188   - <span class="method-name">count_words</span><span class="method-args">()</span>
189   -
190   - </a>
191   -
192   - </div>
193   -
194   - <div class="method-description">
195   -
196   - <p>
197   -Returns a Hash containing the words and their associated counts in the
198   -current <a href="Document.html">Document</a>.
199   -</p>
200   -<pre>
201   - count_words #=&gt; { &quot;guitar&quot;=&gt;1, &quot;bass&quot;=&gt;3, &quot;album&quot;=&gt;20, ... }
202   -</pre>
203   -
204   - </div>
205   - </div>
206   -
207   -
208   - <div id="method-M000013" class="method-detail">
209   - <a name="M000013"></a>
210   -
211   - <div class="method-heading">
212   -
213   - <a href="Document.src/M000013.html" target="Code" class="method-signature"
214   - onclick="popupCode('Document.src/M000013.html');return false;">
215   -
216   - <span class="method-name">entropy</span><span class="method-args">(s)</span>
217   -
218   - </a>
219   -
220   - </div>
221   -
222   - <div class="method-description">
223   -
224   - <p>
225   -Computes the entropy of a given string <tt>s</tt> inside the document.
226   -</p>
227   -<p>
228   -If the string parameter is composed of many words (i.e. tokens separated by
229   -whitespace(s)), it is considered as an ngram.
230   -</p>
231   -<pre>
232   - entropy(&quot;guitar&quot;) #=&gt; 0.00389919463243839
233   -</pre>
234   -
235   - </div>
236   - </div>
237   -
238   -
239   - <div id="method-M000011" class="method-detail">
240   - <a name="M000011"></a>
241   -
242   - <div class="method-heading">
243   -
244   - <a href="Document.src/M000011.html" target="Code" class="method-signature"
245   - onclick="popupCode('Document.src/M000011.html');return false;">
246   -
247   - <span class="method-name">ngrams</span><span class="method-args">(n)</span>
248   -
249   - </a>
250   -
251   - </div>
252   -
253   - <div class="method-description">
254   -
255   - <p>
256   -Returns an Array containing the <tt>n</tt>-grams (words) from the current
257   -<a href="Document.html">Document</a>.
258   -</p>
259   -<pre>
260   - ngrams(2) #=&gt; [&quot;the free&quot;, &quot;free encyclopedia&quot;, &quot;encyclopedia var&quot;, &quot;var skin&quot;, ...]
261   -</pre>
262   -
263   - </div>
264   - </div>
265   -
266   -
267   - <h3 class="section-bar">Protected Instance methods</h3>
268   -
269   -
270   - <div id="method-M000010" class="method-detail">
271   - <a name="M000010"></a>
272   -
273   - <div class="method-heading">
274   -
275   - <a href="Document.src/M000010.html" target="Code" class="method-signature"
276   - onclick="popupCode('Document.src/M000010.html');return false;">
277   -
278   - <span class="method-name">format_words</span><span class="method-args">()</span>
279   -
280   - </a>
281   -
282   - </div>
283   -
284   - <div class="method-description">
285   -
286   - <p>
287   -Any non-word characters are removed from the words (see <a
288   -href="http://perldoc.perl.org/perlre.html">perldoc.perl.org/perlre.html</a>
289   -and the W special escape).
290   -</p>
291   -<p>
292   -Protected function, only meant to by called at the initialization.
293   -</p>
294   -
295   - </div>
296   - </div>
297   -
298   -
299   -
300   - </div>
301   -
302   -
303   -
304   -
305   - </div>
306   -
307   -<div id="validator-badges">
308   - <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
309   -</div>
310   -
311   -</body>
312   -</html>
doc/classes/Rir/Document.src/M000010.html
1   -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2   -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3   -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4   -<head>
5   - <title>format_words (Rir::Document)</title>
6   - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7   - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8   -</head>
9   -<body class="standalone-code">
10   - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 31</span>
11   - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">format_words</span>
12   - <span class="ruby-identifier">wo</span> = []
13   -
14   - <span class="ruby-ivar">@doc_content</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
15   - <span class="ruby-identifier">w</span>.<span class="ruby-identifier">split</span>(<span class="ruby-regexp re">/\W/</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sw</span><span class="ruby-operator">|</span>
16   - <span class="ruby-identifier">wo</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">sw</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">sw</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[a-zA-Z]/</span>
17   - <span class="ruby-keyword kw">end</span>
18   - <span class="ruby-keyword kw">end</span>
19   -
20   - <span class="ruby-identifier">wo</span>
21   - <span class="ruby-keyword kw">end</span></pre>
22   -</body>
23   -</html>
doc/classes/Rir/Document.src/M000011.html
1   -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2   -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3   -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4   -<head>
5   - <title>ngrams (Rir::Document)</title>
6   - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7   - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8   -</head>
9   -<body class="standalone-code">
10   - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 46</span>
11   - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">ngrams</span>(<span class="ruby-identifier">n</span>)
12   - <span class="ruby-identifier">window</span> = []
13   - <span class="ruby-identifier">ngrams_array</span> = []
14   -
15   - <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
16   - <span class="ruby-identifier">window</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">w</span>)
17   - <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">n</span>
18   - <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">&quot; &quot;</span>)
19   - <span class="ruby-identifier">window</span>.<span class="ruby-identifier">delete_at</span>(<span class="ruby-value">0</span>)
20   - <span class="ruby-keyword kw">end</span>
21   - <span class="ruby-keyword kw">end</span>
22   -
23   - <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">uniq</span>
24   - <span class="ruby-keyword kw">end</span></pre>
25   -</body>
26   -</html>
doc/classes/Rir/Document.src/M000012.html
1   -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2   -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3   -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4   -<head>
5   - <title>count_words (Rir::Document)</title>
6   - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7   - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8   -</head>
9   -<body class="standalone-code">
10   - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 64</span>
11   - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">count_words</span>
12   - <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">h</span>,<span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">h</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-value">0</span> }
13   - <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>.<span class="ruby-identifier">downcase</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> }
14   -
15   - <span class="ruby-identifier">counts</span>
16   - <span class="ruby-keyword kw">end</span></pre>
17   -</body>
18   -</html>
doc/classes/Rir/Document.src/M000013.html
1   -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2   -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3   -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4   -<head>
5   - <title>entropy (Rir::Document)</title>
6   - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7   - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8   -</head>
9   -<body class="standalone-code">
10   - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 77</span>
11   - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">entropy</span>(<span class="ruby-identifier">s</span>)
12   - <span class="ruby-identifier">en</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span>
13   - <span class="ruby-identifier">counts</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span>
14   -
15   - <span class="ruby-identifier">s</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
16   - <span class="ruby-identifier">p_wi</span> = <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span>
17   - <span class="ruby-identifier">en</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">p_wi</span><span class="ruby-operator">*</span><span class="ruby-constant">Math</span>.<span class="ruby-identifier">log2</span>(<span class="ruby-identifier">p_wi</span>)
18   - <span class="ruby-keyword kw">end</span>
19   -
20   - <span class="ruby-identifier">en</span> <span class="ruby-operator">*=</span> <span class="ruby-value">-1</span>
21   - <span class="ruby-identifier">en</span>
22   - <span class="ruby-keyword kw">end</span></pre>
23   -</body>
24   -</html>
doc/classes/Rir/Document.src/M000014.html
1   -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2   -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3   -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4   -<head>
5   - <title>new (Rir::Document)</title>
6   - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7   - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8   -</head>
9   -<body class="standalone-code">
10   - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 92</span>
11   - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">content</span>)
12   - <span class="ruby-ivar">@doc_content</span> = <span class="ruby-identifier">content</span>
13   - <span class="ruby-ivar">@words</span> = <span class="ruby-identifier">format_words</span>
14   - <span class="ruby-keyword kw">end</span></pre>
15   -</body>
16   -</html>
doc/classes/Rir/WebDocument.html
1   -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2   -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3   -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4   -<head>
5   - <title>Class: Rir::WebDocument [RDoc Documentation]</title>
6   - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7   - <meta http-equiv="Content-Script-Type" content="text/javascript" />
8   - <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
9   - <script type="text/javascript">
10   - // <![CDATA[
11   -
12   - function popupCode( url ) {
13   - window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14   - }
15   -
16   - function toggleCode( id ) {
17   - if ( document.getElementById )
18   - elem = document.getElementById( id );
19   - else if ( document.all )
20   - elem = eval( "document.all." + id );
21   - else
22   - return false;
23   -
24   - elemStyle = elem.style;
25   -
26   - if ( elemStyle.display != "block" ) {
27   - elemStyle.display = "block"
28   - } else {
29   - elemStyle.display = "none"
30   - }
31   -
32   - return true;
33   - }
34   -
35   - // Make codeblocks hidden by default
36   - document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37   -
38   - // ]]>
39   - </script>
40   -
41   -</head>
42   -<body>
43   -
44   -
45   - <div id="classHeader">
46   - <table class="header-table">
47   - <tr class="top-aligned-row">
48   - <td><strong>Class</strong></td>
49   - <td class="class-name-in-header">Rir::WebDocument</td>
50   - </tr>
51   - <tr class="top-aligned-row">
52   - <td><strong>In:</strong></td>
53   - <td>
54   -
55   -
56   - <a href="../../files/lib/rir/document_rb.html">
57   -
58   - lib/rir/document.rb
59   -
60   - </a>
61   -
62   -
63   - <br />
64   -
65   - </td>
66   - </tr>
67   -
68   -
69   - <tr class="top-aligned-row">
70   - <td><strong>Parent:</strong></td>
71   - <td>
72   -
73   - <a href="Document.html">
74   -
75   - Rir::Document
76   -
77   - </a>
78   -
79   - </td>
80   - </tr>
81   -
82   - </table>
83   - </div>
84   - <!-- banner header -->
85   -
86   - <div id="bodyContent">
87   -
88   - <div id="contextContent">
89   -
90   - <div id="description">
91   - <p>
92   -A <a href="WebDocument.html">WebDocument</a> is a <a
93   -href="Document.html">Document</a> with a <tt>url</tt>.
94   -</p>
95   -
96   - </div>
97   -
98   - </div>
99   -
100   -
101   - <div id="method-list">
102   - <h3 class="section-bar">Methods</h3>
103   -
104   - <div class="name-list">
105   -
106   - <a href="#M000015">get_content</a>&nbsp;&nbsp;
107   -
108   - <a href="#M000016">new</a>&nbsp;&nbsp;
109   -
110   - </div>
111   - </div>
112   -
113   - </div>
114   -
115   - <!-- if includes -->
116   -
117   - <div id="section">
118   -
119   -
120   -
121   - <div id="attribute-list">
122   - <h3 class="section-bar">Attributes</h3>
123   -
124   - <div class="name-list">
125   - <table>
126   -
127   - <tr class="top-aligned-row context-row">
128   - <td class="context-item-name">url</td>
129   -
130   - <td class="context-item-value">&nbsp;[R]&nbsp;</td>
131   -
132   - <td class="context-item-desc"></td>
133   - </tr>
134   -
135   - </table>
136   - </div>
137   - </div>
138   -
139   -
140   - <!-- if method_list -->
141   -
142   - <div id="methods">
143   -
144   - <h3 class="section-bar">Public Class methods</h3>
145   -
146   -
147   - <div id="method-M000015" class="method-detail">
148   - <a name="M000015"></a>
149   -
150   - <div class="method-heading">
151   -
152   - <a href="WebDocument.src/M000015.html" target="Code" class="method-signature"
153   - onclick="popupCode('WebDocument.src/M000015.html');return false;">
154   -
155   - <span class="method-name">get_content</span><span class="method-args">(url)</span>
156   -
157   - </a>
158   -
159   - </div>
160   -
161   - <div class="method-description">
162   -
163   - <p>
164   -Returns the HTML text from the page of a given <tt>url</tt>.
165   -</p>
166   -
167   - </div>
168   - </div>
169   -
170   -
171   - <div id="method-M000016" class="method-detail">
172   - <a name="M000016"></a>
173   -
174   - <div class="method-heading">
175   -
176   - <a href="WebDocument.src/M000016.html" target="Code" class="method-signature"
177   - onclick="popupCode('WebDocument.src/M000016.html');return false;">
178   -
179   - <span class="method-name">new</span><span class="method-args">(url)</span>
180   -
181   - </a>
182   -
183   - </div>
184   -
185   - <div class="method-description">
186   -
187   - <p>
188   -<a href="WebDocument.html">WebDocument</a> constructor, the content of the
189   -<a href="Document.html">Document</a> is the HTML page without the tags.
190   -</p>
191   -
192   - </div>
193   - </div>
194   -
195   -
196   -
197   - </div>
198   -
199   -
200   -
201   -
202   - </div>
203   -
204   -<div id="validator-badges">
205   - <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
206   -</div>
207   -
208   -</body>
209   -</html>
doc/classes/Rir/WebDocument.src/M000015.html
1   -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2   -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3   -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4   -<head>
5   - <title>get_content (Rir::WebDocument)</title>
6   - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7   - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8   -</head>
9   -<body class="standalone-code">
10   - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 105</span>
11   - <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>)
12   - <span class="ruby-identifier">require</span> <span class="ruby-value str">'net/http'</span>
13   - <span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>(<span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span>(<span class="ruby-identifier">url</span>))
14   - <span class="ruby-keyword kw">end</span></pre>
15   -</body>
16   -</html>
doc/classes/Rir/WebDocument.src/M000016.html
1   -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2   -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3   -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4   -<head>
5   - <title>new (Rir::WebDocument)</title>
6   - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7   - <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
8   -</head>
9   -<body class="standalone-code">
10   - <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 112</span>
11   - <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">url</span>)
12   - <span class="ruby-ivar">@url</span> = <span class="ruby-identifier">url</span>
13   - <span class="ruby-keyword kw">super</span> <span class="ruby-constant">WebDocument</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>).<span class="ruby-identifier">strip_javascripts</span>.<span class="ruby-identifier">strip_stylesheets</span>.<span class="ruby-identifier">strip_xml_tags</span>
14   - <span class="ruby-keyword kw">end</span></pre>
15   -</body>
16   -</html>
doc/classes/Rir/WikipediaPage.html
1   -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
2   -"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
3   -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
4   -<head>
5   - <title>Class: Rir::WikipediaPage [RDoc Documentation]</title>
6   - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
7   - <meta http-equiv="Content-Script-Type" content="text/javascript" />
8   - <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
9   - <script type="text/javascript">
10   - // <![CDATA[
11   -
12   - function popupCode( url ) {
13   - window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
14   - }
15   -
16   - function toggleCode( id ) {
17   - if ( document.getElementById )
18   - elem = document.getElementById( id );
19   - else if ( document.all )
20   - elem = eval( "document.all." + id );
21   - else
22   - return false;
23   -
24   - elemStyle = elem.style;
25   -
26   - if ( elemStyle.display != "block" ) {
27   - elemStyle.display = "block"
28   - } else {
29   - elemStyle.display = "none"
30   - }
31   -
32   - return true;
33   - }
34   -
35   - // Make codeblocks hidden by default
36   - document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
37   -
38   - // ]]>
39   - </script>
40   -
41   -</head>
42   -<body>
43   -
44   -
45   - <div id="classHeader">
46   - <table class="header-table">
47   - <tr class="top-aligned-row">
48   - <td><strong>Class</strong></td>
49   - <td class="class-name-in-header">Rir::WikipediaPage</td>
50   - </tr>
51   - <tr class="top-aligned-row">
52   - <td><strong>In:</strong></td>
53   - <td>
54   -
55   -
56   - <a href="../../files/lib/rir/document_rb.html">
57   -
58   - lib/rir/document.rb
59   -
60   - </a>
61   -
62   -
63   - <br />
64   -
65   - </td>
66   - </tr>
67   -
68   -
69   - <tr class="top-aligned-row">
70   - <td><strong>Parent:</strong></td>
71   - <td>
72   -
73   - <a href="WebDocument.html">
74   -
75   - Rir::WebDocument
76   -
77   - </a>
78   -
79   - </td>
80   - </tr>
81   -
82   - </table>
83   - </div>
84   - <!-- banner header -->
85   -
86   - <div id="bodyContent">
87   -
88   - <div id="contextContent">
89   -
90   - <div id="description">
91   - <p>
92   -A <a href="WikipediaPage.html">WikipediaPage</a> is a <a
93   -href="WebDocument.html">WebDocument</a>.
94   -</p>
95   -
96   - </div>
97   -
98   - </div>
99   -
100   -
101   - </div>
102   -
103   - <!-- if includes -->
104   -
105   - <div id="section">
106   -
107   -
108   -
109   -
110   - <!-- if method_list -->
111   -
112   -
113   -
114   -
115   - </div>
116   -
117   -<div id="validator-badges">
118   - <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
119   -</div>
120   -
121   -</body>
122   -</html>