Commit 7043da90bf781276184a770f306cfe7b59c17d5a

Authored by Romain Deveaud
0 parents
Exists in master

first commit

Showing 37 changed files with 2767 additions and 0 deletions Side-by-side Diff

... ... @@ -0,0 +1,19 @@
  1 +# Ruby Information Retrieval (rIR)
  2 +
  3 +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
  4 +
  5 +License
  6 +=======
  7 +
  8 +This program is free software: you can redistribute it and/or modify
  9 +it under the terms of the GNU General Public License as published by
  10 +the Free Software Foundation, either version 3 of the License, or
  11 +(at your option) any later version.
  12 +
  13 +This program is distributed in the hope that it will be useful,
  14 +but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16 +GNU General Public License for more details.
  17 +
  18 +You should have received a copy of the GNU General Public License
  19 +along with this program. If not, see <http://www.gnu.org/licenses/>.
doc/classes/Rir.html
... ... @@ -0,0 +1,153 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Module: Rir [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Module</strong></td>
  49 + <td class="class-name-in-header">Rir</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../files/lib/rir/string_rb.html">
  57 +
  58 + lib/rir/string.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 +
  66 + <a href="../files/lib/rir/document_rb.html">
  67 +
  68 + lib/rir/document.rb
  69 +
  70 + </a>
  71 +
  72 +
  73 + <br />
  74 +
  75 + </td>
  76 + </tr>
  77 +
  78 +
  79 + </table>
  80 + </div>
  81 + <!-- banner header -->
  82 +
  83 + <div id="bodyContent">
  84 +
  85 + <div id="contextContent">
  86 +
  87 + <div id="description">
  88 + <p>
  89 +General module for many purposes related to Information Retrieval.
  90 +</p>
  91 +<hr size="1"></hr><p>
  92 +General module for many purposes related to Information Retrieval.
  93 +</p>
  94 +
  95 + </div>
  96 +
  97 + </div>
  98 +
  99 +
  100 + </div>
  101 +
  102 + <!-- if includes -->
  103 +
  104 + <div id="section">
  105 +
  106 + <div id="class-list">
  107 + <h3 class="section-bar">Classes and Modules</h3>
  108 +
  109 + Class <a href="Rir/Document.html" class="link">Rir::Document</a><br />
  110 +Class <a href="Rir/WebDocument.html" class="link">Rir::WebDocument</a><br />
  111 +Class <a href="Rir/WikipediaPage.html" class="link">Rir::WikipediaPage</a><br />
  112 +
  113 + </div>
  114 +
  115 + <div id="constants-list">
  116 + <h3 class="section-bar">Constants</h3>
  117 +
  118 + <div class="name-list">
  119 + <table summary="Constants">
  120 +
  121 + <tr class="top-aligned-row context-row">
  122 + <td class="context-item-name">Stoplist</td>
  123 + <td>=</td>
  124 + <td class="context-item-value">[ &quot;a&quot;, &quot;anything&quot;, &quot;anyway&quot;, &quot;anywhere&quot;, &quot;apart&quot;, &quot;are&quot;, &quot;around&quot;, &quot;as&quot;, &quot;at&quot;, &quot;av&quot;, &quot;be&quot;, &quot;became&quot;, &quot;because&quot;, &quot;become&quot;, &quot;becomes&quot;, &quot;becoming&quot;, &quot;been&quot;, &quot;before&quot;, &quot;beforehand&quot;, &quot;behind&quot;, &quot;being&quot;, &quot;below&quot;, &quot;beside&quot;, &quot;besides&quot;, &quot;between&quot;, &quot;beyond&quot;, &quot;both&quot;, &quot;but&quot;, &quot;by&quot;, &quot;can&quot;, &quot;cannot&quot;, &quot;canst&quot;, &quot;certain&quot;, &quot;cf&quot;, &quot;choose&quot;, &quot;contrariwise&quot;, &quot;cos&quot;, &quot;could&quot;, &quot;cu&quot;, &quot;day&quot;, &quot;do&quot;, &quot;does&quot;, &quot;doesn't&quot;, &quot;doing&quot;, &quot;dost&quot;, &quot;doth&quot;, &quot;double&quot;, &quot;down&quot;, &quot;dual&quot;, &quot;during&quot;, &quot;each&quot;, &quot;either&quot;, &quot;else&quot;, &quot;elsewhere&quot;, &quot;enough&quot;, &quot;et&quot;, &quot;etc&quot;, &quot;even&quot;, &quot;ever&quot;, &quot;every&quot;, &quot;everybody&quot;, &quot;everyone&quot;, &quot;everything&quot;, &quot;everywhere&quot;, &quot;except&quot;, &quot;excepted&quot;, &quot;excepting&quot;, &quot;exception&quot;, &quot;exclude&quot;, &quot;excluding&quot;, &quot;exclusive&quot;, &quot;far&quot;, &quot;farther&quot;, &quot;farthest&quot;, &quot;few&quot;, &quot;ff&quot;, &quot;first&quot;, &quot;for&quot;, &quot;formerly&quot;, &quot;forth&quot;, &quot;forward&quot;, &quot;from&quot;, &quot;front&quot;, &quot;further&quot;, &quot;furthermore&quot;, &quot;furthest&quot;, &quot;get&quot;, &quot;go&quot;, &quot;had&quot;, &quot;halves&quot;, &quot;hardly&quot;, &quot;has&quot;, &quot;hast&quot;, &quot;hath&quot;, &quot;have&quot;, &quot;he&quot;, &quot;hence&quot;, &quot;henceforth&quot;, &quot;her&quot;, &quot;here&quot;, &quot;hereabouts&quot;, &quot;hereafter&quot;, &quot;hereby&quot;, &quot;herein&quot;, &quot;hereto&quot;, &quot;hereupon&quot;, &quot;hers&quot;, &quot;herself&quot;, &quot;him&quot;, &quot;himself&quot;, &quot;hindmost&quot;, &quot;his&quot;, &quot;hither&quot;, &quot;hitherto&quot;, &quot;how&quot;, &quot;however&quot;, &quot;howsoever&quot;, &quot;i&quot;, &quot;ie&quot;, &quot;if&quot;, &quot;in&quot;, &quot;inasmuch&quot;, &quot;inc&quot;, &quot;include&quot;, &quot;included&quot;, &quot;including&quot;, &quot;indeed&quot;, &quot;indoors&quot;, &quot;inside&quot;, &quot;insomuch&quot;, &quot;instead&quot;, &quot;into&quot;, &quot;inward&quot;, &quot;inwards&quot;, &quot;is&quot;, &quot;it&quot;, &quot;its&quot;, &quot;itself&quot;, &quot;just&quot;, &quot;kind&quot;, &quot;kg&quot;, &quot;km&quot;, &quot;last&quot;, &quot;latter&quot;, &quot;latterly&quot;, &quot;less&quot;, &quot;lest&quot;, &quot;let&quot;, &quot;like&quot;, &quot;little&quot;, &quot;ltd&quot;, &quot;many&quot;, &quot;may&quot;, &quot;maybe&quot;, &quot;me&quot;, &quot;meantime&quot;, &quot;meanwhile&quot;, &quot;might&quot;, &quot;moreover&quot;, &quot;most&quot;, &quot;mostly&quot;, &quot;more&quot;, &quot;mr&quot;, &quot;mrs&quot;, &quot;ms&quot;, &quot;much&quot;, &quot;must&quot;, &quot;my&quot;, &quot;myself&quot;, &quot;namely&quot;, &quot;need&quot;, &quot;neither&quot;, &quot;never&quot;, &quot;nevertheless&quot;, &quot;next&quot;, &quot;no&quot;, &quot;nobody&quot;, &quot;none&quot;, &quot;nonetheless&quot;, &quot;noone&quot;, &quot;nope&quot;, &quot;nor&quot;, &quot;not&quot;, &quot;nothing&quot;, &quot;notwithstanding&quot;, &quot;now&quot;, &quot;nowadays&quot;, &quot;nowhere&quot;, &quot;of&quot;, &quot;off&quot;, &quot;often&quot;, &quot;ok&quot;, &quot;on&quot;, &quot;once&quot;, &quot;one&quot;, &quot;only&quot;, &quot;onto&quot;, &quot;or&quot;, &quot;other&quot;, &quot;others&quot;, &quot;otherwise&quot;, &quot;ought&quot;, &quot;our&quot;, &quot;ours&quot;, &quot;ourselves&quot;, &quot;out&quot;, &quot;outside&quot;, &quot;over&quot;, &quot;own&quot;, &quot;per&quot;, &quot;perhaps&quot;, &quot;plenty&quot;, &quot;provide&quot;, &quot;quite&quot;, &quot;rather&quot;, &quot;really&quot;, &quot;round&quot;, &quot;said&quot;, &quot;sake&quot;, &quot;same&quot;, &quot;sang&quot;, &quot;save&quot;, &quot;saw&quot;, &quot;see&quot;, &quot;seeing&quot;, &quot;seem&quot;, &quot;seemed&quot;, &quot;seeming&quot;, &quot;seems&quot;, &quot;seen&quot;, &quot;seldom&quot;, &quot;selves&quot;, &quot;sent&quot;, &quot;several&quot;, &quot;shalt&quot;, &quot;she&quot;, &quot;should&quot;, &quot;shown&quot;, &quot;sideways&quot;, &quot;since&quot;, &quot;slept&quot;, &quot;slew&quot;, &quot;slung&quot;, &quot;slunk&quot;, &quot;smote&quot;, &quot;so&quot;, &quot;some&quot;, &quot;somebody&quot;, &quot;somehow&quot;, &quot;someone&quot;, &quot;something&quot;, &quot;sometime&quot;, &quot;sometimes&quot;, &quot;somewhat&quot;, &quot;somewhere&quot;, &quot;spake&quot;, &quot;spat&quot;, &quot;spoke&quot;, &quot;spoken&quot;, &quot;sprang&quot;, &quot;sprung&quot;, &quot;stave&quot;, &quot;staves&quot;, &quot;still&quot;, &quot;such&quot;, &quot;supposing&quot;, &quot;than&quot;, &quot;that&quot;, &quot;the&quot;, &quot;thee&quot;, &quot;their&quot;, &quot;them&quot;, &quot;themselves&quot;, &quot;then&quot;, &quot;thence&quot;, &quot;thenceforth&quot;, &quot;there&quot;, &quot;thereabout&quot;, &quot;thereabouts&quot;, &quot;thereafter&quot;, &quot;thereby&quot;, &quot;therefore&quot;, &quot;therein&quot;, &quot;thereof&quot;, &quot;thereon&quot;, &quot;thereto&quot;, &quot;thereupon&quot;, &quot;these&quot;, &quot;they&quot;, &quot;this&quot;, &quot;those&quot;, &quot;thou&quot;, &quot;though&quot;, &quot;thrice&quot;, &quot;through&quot;, &quot;throughout&quot;, &quot;thru&quot;, &quot;thus&quot;, &quot;thy&quot;, &quot;thyself&quot;, &quot;till&quot;, &quot;to&quot;, &quot;together&quot;, &quot;too&quot;, &quot;toward&quot;, &quot;towards&quot;, &quot;ugh&quot;, &quot;unable&quot;, &quot;under&quot;, &quot;underneath&quot;, &quot;unless&quot;, &quot;unlike&quot;, &quot;until&quot;, &quot;up&quot;, &quot;upon&quot;, &quot;upward&quot;, &quot;upwards&quot;, &quot;us&quot;, &quot;use&quot;, &quot;used&quot;, &quot;using&quot;, &quot;very&quot;, &quot;via&quot;, &quot;vs&quot;, &quot;want&quot;, &quot;was&quot;, &quot;we&quot;, &quot;week&quot;, &quot;well&quot;, &quot;were&quot;, &quot;what&quot;, &quot;whatever&quot;, &quot;whatsoever&quot;, &quot;when&quot;, &quot;whence&quot;, &quot;whenever&quot;, &quot;whensoever&quot;, &quot;where&quot;, &quot;whereabouts&quot;, &quot;whereafter&quot;, &quot;whereas&quot;, &quot;whereat&quot;, &quot;whereby&quot;, &quot;wherefore&quot;, &quot;wherefrom&quot;, &quot;wherein&quot;, &quot;whereinto&quot;, &quot;whereof&quot;, &quot;whereon&quot;, &quot;wheresoever&quot;, &quot;whereto&quot;, &quot;whereunto&quot;, &quot;whereupon&quot;, &quot;wherever&quot;, &quot;wherewith&quot;, &quot;whether&quot;, &quot;whew&quot;, &quot;which&quot;, &quot;whichever&quot;, &quot;whichsoever&quot;, &quot;while&quot;, &quot;whilst&quot;, &quot;whither&quot;, &quot;who&quot;, &quot;whoa&quot;, &quot;whoever&quot;, &quot;whole&quot;, &quot;whom&quot;, &quot;whomever&quot;, &quot;whomsoever&quot;, &quot;whose&quot;, &quot;whosoever&quot;, &quot;why&quot;, &quot;will&quot;, &quot;wilt&quot;, &quot;with&quot;, &quot;within&quot;, &quot;without&quot;, &quot;worse&quot;, &quot;worst&quot;, &quot;would&quot;, &quot;wow&quot;, &quot;ye&quot;, &quot;yet&quot;, &quot;year&quot;, &quot;yippee&quot;, &quot;you&quot;, &quot;your&quot;, &quot;yours&quot;, &quot;yourself&quot;, &quot;yourselves&quot; ]</td>
  125 +
  126 + <td>&nbsp;</td>
  127 + <td class="context-item-desc">
  128 +These are the default stopwords provided by Lemur.
  129 +
  130 +</td>
  131 +
  132 + </tr>
  133 +
  134 + </table>
  135 + </div>
  136 + </div>
  137 +
  138 +
  139 +
  140 +
  141 + <!-- if method_list -->
  142 +
  143 +
  144 +
  145 +
  146 + </div>
  147 +
  148 +<div id="validator-badges">
  149 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  150 +</div>
  151 +
  152 +</body>
  153 +</html>
doc/classes/Rir/Document.html
... ... @@ -0,0 +1,312 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Class: Rir::Document [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Class</strong></td>
  49 + <td class="class-name-in-header">Rir::Document</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../../files/lib/rir/document_rb.html">
  57 +
  58 + lib/rir/document.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 + </td>
  66 + </tr>
  67 +
  68 +
  69 + <tr class="top-aligned-row">
  70 + <td><strong>Parent:</strong></td>
  71 + <td>
  72 +
  73 + Object
  74 +
  75 + </td>
  76 + </tr>
  77 +
  78 + </table>
  79 + </div>
  80 + <!-- banner header -->
  81 +
  82 + <div id="bodyContent">
  83 +
  84 + <div id="contextContent">
  85 +
  86 + <div id="description">
  87 + <p>
  88 +A <a href="Document.html">Document</a> is a bag of words and is constructed
  89 +from a string.
  90 +</p>
  91 +
  92 + </div>
  93 +
  94 + </div>
  95 +
  96 +
  97 + <div id="method-list">
  98 + <h3 class="section-bar">Methods</h3>
  99 +
  100 + <div class="name-list">
  101 +
  102 + <a href="#M000012">count_words</a>&nbsp;&nbsp;
  103 +
  104 + <a href="#M000013">entropy</a>&nbsp;&nbsp;
  105 +
  106 + <a href="#M000010">format_words</a>&nbsp;&nbsp;
  107 +
  108 + <a href="#M000014">new</a>&nbsp;&nbsp;
  109 +
  110 + <a href="#M000011">ngrams</a>&nbsp;&nbsp;
  111 +
  112 + </div>
  113 + </div>
  114 +
  115 + </div>
  116 +
  117 + <!-- if includes -->
  118 +
  119 + <div id="section">
  120 +
  121 +
  122 +
  123 + <div id="attribute-list">
  124 + <h3 class="section-bar">Attributes</h3>
  125 +
  126 + <div class="name-list">
  127 + <table>
  128 +
  129 + <tr class="top-aligned-row context-row">
  130 + <td class="context-item-name">doc_content</td>
  131 +
  132 + <td class="context-item-value">&nbsp;[R]&nbsp;</td>
  133 +
  134 + <td class="context-item-desc"></td>
  135 + </tr>
  136 +
  137 + <tr class="top-aligned-row context-row">
  138 + <td class="context-item-name">words</td>
  139 +
  140 + <td class="context-item-value">&nbsp;[R]&nbsp;</td>
  141 +
  142 + <td class="context-item-desc"></td>
  143 + </tr>
  144 +
  145 + </table>
  146 + </div>
  147 + </div>
  148 +
  149 +
  150 + <!-- if method_list -->
  151 +
  152 + <div id="methods">
  153 +
  154 + <h3 class="section-bar">Public Class methods</h3>
  155 +
  156 +
  157 + <div id="method-M000014" class="method-detail">
  158 + <a name="M000014"></a>
  159 +
  160 + <div class="method-heading">
  161 +
  162 + <a href="Document.src/M000014.html" target="Code" class="method-signature"
  163 + onclick="popupCode('Document.src/M000014.html');return false;">
  164 +
  165 + <span class="method-name">new</span><span class="method-args">(content)</span>
  166 +
  167 + </a>
  168 +
  169 + </div>
  170 +
  171 + <div class="method-description">
  172 +
  173 + </div>
  174 + </div>
  175 +
  176 +
  177 + <h3 class="section-bar">Public Instance methods</h3>
  178 +
  179 +
  180 + <div id="method-M000012" class="method-detail">
  181 + <a name="M000012"></a>
  182 +
  183 + <div class="method-heading">
  184 +
  185 + <a href="Document.src/M000012.html" target="Code" class="method-signature"
  186 + onclick="popupCode('Document.src/M000012.html');return false;">
  187 +
  188 + <span class="method-name">count_words</span><span class="method-args">()</span>
  189 +
  190 + </a>
  191 +
  192 + </div>
  193 +
  194 + <div class="method-description">
  195 +
  196 + <p>
  197 +Returns a Hash containing the words and their associated counts in the
  198 +current <a href="Document.html">Document</a>.
  199 +</p>
  200 +<pre>
  201 + count_words #=&gt; { &quot;guitar&quot;=&gt;1, &quot;bass&quot;=&gt;3, &quot;album&quot;=&gt;20, ... }
  202 +</pre>
  203 +
  204 + </div>
  205 + </div>
  206 +
  207 +
  208 + <div id="method-M000013" class="method-detail">
  209 + <a name="M000013"></a>
  210 +
  211 + <div class="method-heading">
  212 +
  213 + <a href="Document.src/M000013.html" target="Code" class="method-signature"
  214 + onclick="popupCode('Document.src/M000013.html');return false;">
  215 +
  216 + <span class="method-name">entropy</span><span class="method-args">(s)</span>
  217 +
  218 + </a>
  219 +
  220 + </div>
  221 +
  222 + <div class="method-description">
  223 +
  224 + <p>
  225 +Computes the entropy of a given string <tt>s</tt> inside the document.
  226 +</p>
  227 +<p>
  228 +If the string parameter is composed of many words (i.e. tokens separated by
  229 +whitespace(s)), it is considered as an ngram.
  230 +</p>
  231 +<pre>
  232 + entropy(&quot;guitar&quot;) #=&gt; 0.00389919463243839
  233 +</pre>
  234 +
  235 + </div>
  236 + </div>
  237 +
  238 +
  239 + <div id="method-M000011" class="method-detail">
  240 + <a name="M000011"></a>
  241 +
  242 + <div class="method-heading">
  243 +
  244 + <a href="Document.src/M000011.html" target="Code" class="method-signature"
  245 + onclick="popupCode('Document.src/M000011.html');return false;">
  246 +
  247 + <span class="method-name">ngrams</span><span class="method-args">(n)</span>
  248 +
  249 + </a>
  250 +
  251 + </div>
  252 +
  253 + <div class="method-description">
  254 +
  255 + <p>
  256 +Returns an Array containing the <tt>n</tt>-grams (words) from the current
  257 +<a href="Document.html">Document</a>.
  258 +</p>
  259 +<pre>
  260 + ngrams(2) #=&gt; [&quot;the free&quot;, &quot;free encyclopedia&quot;, &quot;encyclopedia var&quot;, &quot;var skin&quot;, ...]
  261 +</pre>
  262 +
  263 + </div>
  264 + </div>
  265 +
  266 +
  267 + <h3 class="section-bar">Protected Instance methods</h3>
  268 +
  269 +
  270 + <div id="method-M000010" class="method-detail">
  271 + <a name="M000010"></a>
  272 +
  273 + <div class="method-heading">
  274 +
  275 + <a href="Document.src/M000010.html" target="Code" class="method-signature"
  276 + onclick="popupCode('Document.src/M000010.html');return false;">
  277 +
  278 + <span class="method-name">format_words</span><span class="method-args">()</span>
  279 +
  280 + </a>
  281 +
  282 + </div>
  283 +
  284 + <div class="method-description">
  285 +
  286 + <p>
  287 +Any non-word characters are removed from the words (see <a
  288 +href="http://perldoc.perl.org/perlre.html">perldoc.perl.org/perlre.html</a>
  289 +and the W special escape).
  290 +</p>
  291 +<p>
  292 +Protected function, only meant to by called at the initialization.
  293 +</p>
  294 +
  295 + </div>
  296 + </div>
  297 +
  298 +
  299 +
  300 + </div>
  301 +
  302 +
  303 +
  304 +
  305 + </div>
  306 +
  307 +<div id="validator-badges">
  308 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  309 +</div>
  310 +
  311 +</body>
  312 +</html>
doc/classes/Rir/Document.src/M000010.html
... ... @@ -0,0 +1,23 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>format_words (Rir::Document)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 31</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">format_words</span>
  12 + <span class="ruby-identifier">wo</span> = []
  13 +
  14 + <span class="ruby-ivar">@doc_content</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
  15 + <span class="ruby-identifier">w</span>.<span class="ruby-identifier">split</span>(<span class="ruby-regexp re">/\W/</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">sw</span><span class="ruby-operator">|</span>
  16 + <span class="ruby-identifier">wo</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">sw</span>) <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">sw</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/[a-zA-Z]/</span>
  17 + <span class="ruby-keyword kw">end</span>
  18 + <span class="ruby-keyword kw">end</span>
  19 +
  20 + <span class="ruby-identifier">wo</span>
  21 + <span class="ruby-keyword kw">end</span></pre>
  22 +</body>
  23 +</html>
doc/classes/Rir/Document.src/M000011.html
... ... @@ -0,0 +1,26 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>ngrams (Rir::Document)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 46</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">ngrams</span>(<span class="ruby-identifier">n</span>)
  12 + <span class="ruby-identifier">window</span> = []
  13 + <span class="ruby-identifier">ngrams_array</span> = []
  14 +
  15 + <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
  16 + <span class="ruby-identifier">window</span>.<span class="ruby-identifier">push</span>(<span class="ruby-identifier">w</span>)
  17 + <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">n</span>
  18 + <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">push</span> <span class="ruby-identifier">window</span>.<span class="ruby-identifier">join</span>(<span class="ruby-value str">&quot; &quot;</span>)
  19 + <span class="ruby-identifier">window</span>.<span class="ruby-identifier">delete_at</span>(<span class="ruby-value">0</span>)
  20 + <span class="ruby-keyword kw">end</span>
  21 + <span class="ruby-keyword kw">end</span>
  22 +
  23 + <span class="ruby-identifier">ngrams_array</span>.<span class="ruby-identifier">uniq</span>
  24 + <span class="ruby-keyword kw">end</span></pre>
  25 +</body>
  26 +</html>
doc/classes/Rir/Document.src/M000012.html
... ... @@ -0,0 +1,18 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>count_words (Rir::Document)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 64</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">count_words</span>
  12 + <span class="ruby-identifier">counts</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">h</span>,<span class="ruby-identifier">k</span><span class="ruby-operator">|</span> <span class="ruby-identifier">h</span>[<span class="ruby-identifier">k</span>] = <span class="ruby-value">0</span> }
  13 + <span class="ruby-ivar">@words</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>.<span class="ruby-identifier">downcase</span>] <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> }
  14 +
  15 + <span class="ruby-identifier">counts</span>
  16 + <span class="ruby-keyword kw">end</span></pre>
  17 +</body>
  18 +</html>
doc/classes/Rir/Document.src/M000013.html
... ... @@ -0,0 +1,24 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>entropy (Rir::Document)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 77</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">entropy</span>(<span class="ruby-identifier">s</span>)
  12 + <span class="ruby-identifier">en</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span>
  13 + <span class="ruby-identifier">counts</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">count_words</span>
  14 +
  15 + <span class="ruby-identifier">s</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span>
  16 + <span class="ruby-identifier">p_wi</span> = <span class="ruby-identifier">counts</span>[<span class="ruby-identifier">w</span>].<span class="ruby-identifier">to_f</span><span class="ruby-operator">/</span><span class="ruby-ivar">@words</span>.<span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span>
  17 + <span class="ruby-identifier">en</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">p_wi</span><span class="ruby-operator">*</span><span class="ruby-constant">Math</span>.<span class="ruby-identifier">log2</span>(<span class="ruby-identifier">p_wi</span>)
  18 + <span class="ruby-keyword kw">end</span>
  19 +
  20 + <span class="ruby-identifier">en</span> <span class="ruby-operator">*=</span> <span class="ruby-value">-1</span>
  21 + <span class="ruby-identifier">en</span>
  22 + <span class="ruby-keyword kw">end</span></pre>
  23 +</body>
  24 +</html>
doc/classes/Rir/Document.src/M000014.html
... ... @@ -0,0 +1,16 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>new (Rir::Document)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 92</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">content</span>)
  12 + <span class="ruby-ivar">@doc_content</span> = <span class="ruby-identifier">content</span>
  13 + <span class="ruby-ivar">@words</span> = <span class="ruby-identifier">format_words</span>
  14 + <span class="ruby-keyword kw">end</span></pre>
  15 +</body>
  16 +</html>
doc/classes/Rir/WebDocument.html
... ... @@ -0,0 +1,209 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Class: Rir::WebDocument [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Class</strong></td>
  49 + <td class="class-name-in-header">Rir::WebDocument</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../../files/lib/rir/document_rb.html">
  57 +
  58 + lib/rir/document.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 + </td>
  66 + </tr>
  67 +
  68 +
  69 + <tr class="top-aligned-row">
  70 + <td><strong>Parent:</strong></td>
  71 + <td>
  72 +
  73 + <a href="Document.html">
  74 +
  75 + Rir::Document
  76 +
  77 + </a>
  78 +
  79 + </td>
  80 + </tr>
  81 +
  82 + </table>
  83 + </div>
  84 + <!-- banner header -->
  85 +
  86 + <div id="bodyContent">
  87 +
  88 + <div id="contextContent">
  89 +
  90 + <div id="description">
  91 + <p>
  92 +A <a href="WebDocument.html">WebDocument</a> is a <a
  93 +href="Document.html">Document</a> with a <tt>url</tt>.
  94 +</p>
  95 +
  96 + </div>
  97 +
  98 + </div>
  99 +
  100 +
  101 + <div id="method-list">
  102 + <h3 class="section-bar">Methods</h3>
  103 +
  104 + <div class="name-list">
  105 +
  106 + <a href="#M000015">get_content</a>&nbsp;&nbsp;
  107 +
  108 + <a href="#M000016">new</a>&nbsp;&nbsp;
  109 +
  110 + </div>
  111 + </div>
  112 +
  113 + </div>
  114 +
  115 + <!-- if includes -->
  116 +
  117 + <div id="section">
  118 +
  119 +
  120 +
  121 + <div id="attribute-list">
  122 + <h3 class="section-bar">Attributes</h3>
  123 +
  124 + <div class="name-list">
  125 + <table>
  126 +
  127 + <tr class="top-aligned-row context-row">
  128 + <td class="context-item-name">url</td>
  129 +
  130 + <td class="context-item-value">&nbsp;[R]&nbsp;</td>
  131 +
  132 + <td class="context-item-desc"></td>
  133 + </tr>
  134 +
  135 + </table>
  136 + </div>
  137 + </div>
  138 +
  139 +
  140 + <!-- if method_list -->
  141 +
  142 + <div id="methods">
  143 +
  144 + <h3 class="section-bar">Public Class methods</h3>
  145 +
  146 +
  147 + <div id="method-M000015" class="method-detail">
  148 + <a name="M000015"></a>
  149 +
  150 + <div class="method-heading">
  151 +
  152 + <a href="WebDocument.src/M000015.html" target="Code" class="method-signature"
  153 + onclick="popupCode('WebDocument.src/M000015.html');return false;">
  154 +
  155 + <span class="method-name">get_content</span><span class="method-args">(url)</span>
  156 +
  157 + </a>
  158 +
  159 + </div>
  160 +
  161 + <div class="method-description">
  162 +
  163 + <p>
  164 +Returns the HTML text from the page of a given <tt>url</tt>.
  165 +</p>
  166 +
  167 + </div>
  168 + </div>
  169 +
  170 +
  171 + <div id="method-M000016" class="method-detail">
  172 + <a name="M000016"></a>
  173 +
  174 + <div class="method-heading">
  175 +
  176 + <a href="WebDocument.src/M000016.html" target="Code" class="method-signature"
  177 + onclick="popupCode('WebDocument.src/M000016.html');return false;">
  178 +
  179 + <span class="method-name">new</span><span class="method-args">(url)</span>
  180 +
  181 + </a>
  182 +
  183 + </div>
  184 +
  185 + <div class="method-description">
  186 +
  187 + <p>
  188 +<a href="WebDocument.html">WebDocument</a> constructor, the content of the
  189 +<a href="Document.html">Document</a> is the HTML page without the tags.
  190 +</p>
  191 +
  192 + </div>
  193 + </div>
  194 +
  195 +
  196 +
  197 + </div>
  198 +
  199 +
  200 +
  201 +
  202 + </div>
  203 +
  204 +<div id="validator-badges">
  205 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  206 +</div>
  207 +
  208 +</body>
  209 +</html>
doc/classes/Rir/WebDocument.src/M000015.html
... ... @@ -0,0 +1,16 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>get_content (Rir::WebDocument)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 105</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>)
  12 + <span class="ruby-identifier">require</span> <span class="ruby-value str">'net/http'</span>
  13 + <span class="ruby-constant">Net</span><span class="ruby-operator">::</span><span class="ruby-constant">HTTP</span>.<span class="ruby-identifier">get</span>(<span class="ruby-constant">URI</span>.<span class="ruby-identifier">parse</span>(<span class="ruby-identifier">url</span>))
  14 + <span class="ruby-keyword kw">end</span></pre>
  15 +</body>
  16 +</html>
doc/classes/Rir/WebDocument.src/M000016.html
... ... @@ -0,0 +1,16 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>new (Rir::WebDocument)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/document.rb, line 112</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">url</span>)
  12 + <span class="ruby-ivar">@url</span> = <span class="ruby-identifier">url</span>
  13 + <span class="ruby-keyword kw">super</span> <span class="ruby-constant">WebDocument</span>.<span class="ruby-identifier">get_content</span>(<span class="ruby-identifier">url</span>).<span class="ruby-identifier">strip_javascripts</span>.<span class="ruby-identifier">strip_stylesheets</span>.<span class="ruby-identifier">strip_xml_tags</span>
  14 + <span class="ruby-keyword kw">end</span></pre>
  15 +</body>
  16 +</html>
doc/classes/Rir/WikipediaPage.html
... ... @@ -0,0 +1,122 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Class: Rir::WikipediaPage [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Class</strong></td>
  49 + <td class="class-name-in-header">Rir::WikipediaPage</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../../files/lib/rir/document_rb.html">
  57 +
  58 + lib/rir/document.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 + </td>
  66 + </tr>
  67 +
  68 +
  69 + <tr class="top-aligned-row">
  70 + <td><strong>Parent:</strong></td>
  71 + <td>
  72 +
  73 + <a href="WebDocument.html">
  74 +
  75 + Rir::WebDocument
  76 +
  77 + </a>
  78 +
  79 + </td>
  80 + </tr>
  81 +
  82 + </table>
  83 + </div>
  84 + <!-- banner header -->
  85 +
  86 + <div id="bodyContent">
  87 +
  88 + <div id="contextContent">
  89 +
  90 + <div id="description">
  91 + <p>
  92 +A <a href="WikipediaPage.html">WikipediaPage</a> is a <a
  93 +href="WebDocument.html">WebDocument</a>.
  94 +</p>
  95 +
  96 + </div>
  97 +
  98 + </div>
  99 +
  100 +
  101 + </div>
  102 +
  103 + <!-- if includes -->
  104 +
  105 + <div id="section">
  106 +
  107 +
  108 +
  109 +
  110 + <!-- if method_list -->
  111 +
  112 +
  113 +
  114 +
  115 + </div>
  116 +
  117 +<div id="validator-badges">
  118 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  119 +</div>
  120 +
  121 +</body>
  122 +</html>
doc/classes/String.html
... ... @@ -0,0 +1,404 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>Class: String [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="classHeader">
  46 + <table class="header-table">
  47 + <tr class="top-aligned-row">
  48 + <td><strong>Class</strong></td>
  49 + <td class="class-name-in-header">String</td>
  50 + </tr>
  51 + <tr class="top-aligned-row">
  52 + <td><strong>In:</strong></td>
  53 + <td>
  54 +
  55 +
  56 + <a href="../files/lib/rir/string_rb.html">
  57 +
  58 + lib/rir/string.rb
  59 +
  60 + </a>
  61 +
  62 +
  63 + <br />
  64 +
  65 + </td>
  66 + </tr>
  67 +
  68 +
  69 + <tr class="top-aligned-row">
  70 + <td><strong>Parent:</strong></td>
  71 + <td>
  72 +
  73 + Object
  74 +
  75 + </td>
  76 + </tr>
  77 +
  78 + </table>
  79 + </div>
  80 + <!-- banner header -->
  81 +
  82 + <div id="bodyContent">
  83 +
  84 + <div id="contextContent">
  85 +
  86 + <div id="description">
  87 + <p>
  88 +Extention of the standard class <a href="String.html">String</a> with
  89 +useful function.
  90 +</p>
  91 +
  92 + </div>
  93 +
  94 + </div>
  95 +
  96 +
  97 + <div id="method-list">
  98 + <h3 class="section-bar">Methods</h3>
  99 +
  100 + <div class="name-list">
  101 +
  102 + <a href="#M000009">extract_xmltags_values</a>&nbsp;&nbsp;
  103 +
  104 + <a href="#M000001">is_stopword?</a>&nbsp;&nbsp;
  105 +
  106 + <a href="#M000002">remove_special_characters</a>&nbsp;&nbsp;
  107 +
  108 + <a href="#M000006">strip_javascripts</a>&nbsp;&nbsp;
  109 +
  110 + <a href="#M000005">strip_javascripts!</a>&nbsp;&nbsp;
  111 +
  112 + <a href="#M000008">strip_stylesheets</a>&nbsp;&nbsp;
  113 +
  114 + <a href="#M000007">strip_stylesheets!</a>&nbsp;&nbsp;
  115 +
  116 + <a href="#M000004">strip_xml_tags</a>&nbsp;&nbsp;
  117 +
  118 + <a href="#M000003">strip_xml_tags!</a>&nbsp;&nbsp;
  119 +
  120 + </div>
  121 + </div>
  122 +
  123 + </div>
  124 +
  125 + <!-- if includes -->
  126 +
  127 + <div id="includes">
  128 + <h3 class="section-bar">Included Modules</h3>
  129 +
  130 + <div id="includes-list">
  131 +
  132 + <span class="include-name"><a href="Rir.html">Rir</a></span>
  133 +
  134 + </div>
  135 + </div>
  136 +
  137 + <div id="section">
  138 +
  139 +
  140 +
  141 +
  142 + <!-- if method_list -->
  143 +
  144 + <div id="methods">
  145 +
  146 + <h3 class="section-bar">Public Instance methods</h3>
  147 +
  148 +
  149 + <div id="method-M000009" class="method-detail">
  150 + <a name="M000009"></a>
  151 +
  152 + <div class="method-heading">
  153 +
  154 + <a href="String.src/M000009.html" target="Code" class="method-signature"
  155 + onclick="popupCode('String.src/M000009.html');return false;">
  156 +
  157 + <span class="method-name">extract_xmltags_values</span><span class="method-args">(tag_name)</span>
  158 +
  159 + </a>
  160 +
  161 + </div>
  162 +
  163 + <div class="method-description">
  164 +
  165 + <p>
  166 +Returns the text values inside all occurences of a XML tag in <tt>self</tt>
  167 +</p>
  168 +<pre>
  169 + s = &quot;four-piece in &lt;a href='#'&gt;Indianapolis&lt;/a&gt;, &lt;a href='#'&gt;Indiana&lt;/a&gt; at the Murat Theatre&quot;
  170 + s.extract_xmltags_values 'a' #=&gt; [&quot;Indianapolis&quot;, &quot;Indiana&quot;]
  171 +</pre>
  172 +
  173 + </div>
  174 + </div>
  175 +
  176 +
  177 + <div id="method-M000001" class="method-detail">
  178 + <a name="M000001"></a>
  179 +
  180 + <div class="method-heading">
  181 +
  182 + <a href="String.src/M000001.html" target="Code" class="method-signature"
  183 + onclick="popupCode('String.src/M000001.html');return false;">
  184 +
  185 + <span class="method-name">is_stopword?</span><span class="method-args">()</span>
  186 +
  187 + </a>
  188 +
  189 + </div>
  190 +
  191 + <div class="method-description">
  192 +
  193 + <p>
  194 +Returns <tt>true</tt> if <tt>self</tt> belongs to Rir::Stoplist,
  195 +<tt>false</tt> otherwise.
  196 +</p>
  197 +
  198 + </div>
  199 + </div>
  200 +
  201 +
  202 + <div id="method-M000002" class="method-detail">
  203 + <a name="M000002"></a>
  204 +
  205 + <div class="method-heading">
  206 +
  207 + <a href="String.src/M000002.html" target="Code" class="method-signature"
  208 + onclick="popupCode('String.src/M000002.html');return false;">
  209 +
  210 + <span class="method-name">remove_special_characters</span><span class="method-args">()</span>
  211 +
  212 + </a>
  213 +
  214 + </div>
  215 +
  216 + <div class="method-description">
  217 +
  218 + <p>
  219 +Do not use. TODO: rewamp. find why this function is here.
  220 +</p>
  221 +
  222 + </div>
  223 + </div>
  224 +
  225 +
  226 + <div id="method-M000006" class="method-detail">
  227 + <a name="M000006"></a>
  228 +
  229 + <div class="method-heading">
  230 +
  231 + <a href="String.src/M000006.html" target="Code" class="method-signature"
  232 + onclick="popupCode('String.src/M000006.html');return false;">
  233 +
  234 + <span class="method-name">strip_javascripts</span><span class="method-args">()</span>
  235 +
  236 + </a>
  237 +
  238 + </div>
  239 +
  240 + <div class="method-description">
  241 +
  242 + <p>
  243 +Removes all Javascript sources from <tt>self</tt>.
  244 +</p>
  245 +<pre>
  246 + s = &quot;&lt;script type='text/javascript'&gt;
  247 + var skin='vector',
  248 + stylepath='http://bits.wikimedia.org/skins-1.5'
  249 + &lt;/script&gt;
  250 +
  251 + test&quot;
  252 + s.strip_javascripts #=&gt; &quot;test&quot;
  253 +</pre>
  254 +
  255 + </div>
  256 + </div>
  257 +
  258 +
  259 + <div id="method-M000005" class="method-detail">
  260 + <a name="M000005"></a>
  261 +
  262 + <div class="method-heading">
  263 +
  264 + <a href="String.src/M000005.html" target="Code" class="method-signature"
  265 + onclick="popupCode('String.src/M000005.html');return false;">
  266 +
  267 + <span class="method-name">strip_javascripts!</span><span class="method-args">()</span>
  268 +
  269 + </a>
  270 +
  271 + </div>
  272 +
  273 + <div class="method-description">
  274 +
  275 + <p>
  276 +Removes all Javascript sources from <tt>self</tt>.
  277 +</p>
  278 +<pre>
  279 + s = &quot;&lt;script type='text/javascript'&gt;
  280 + var skin='vector',
  281 + stylepath='http://bits.wikimedia.org/skins-1.5'
  282 + &lt;/script&gt;
  283 +
  284 + test&quot;
  285 + s.strip_javascripts!
  286 + s #=&gt; &quot;test&quot;
  287 +</pre>
  288 +
  289 + </div>
  290 + </div>
  291 +
  292 +
  293 + <div id="method-M000008" class="method-detail">
  294 + <a name="M000008"></a>
  295 +
  296 + <div class="method-heading">
  297 +
  298 + <a href="String.src/M000008.html" target="Code" class="method-signature"
  299 + onclick="popupCode('String.src/M000008.html');return false;">
  300 +
  301 + <span class="method-name">strip_stylesheets</span><span class="method-args">()</span>
  302 +
  303 + </a>
  304 +
  305 + </div>
  306 +
  307 + <div class="method-description">
  308 +
  309 + </div>
  310 + </div>
  311 +
  312 +
  313 + <div id="method-M000007" class="method-detail">
  314 + <a name="M000007"></a>
  315 +
  316 + <div class="method-heading">
  317 +
  318 + <a href="String.src/M000007.html" target="Code" class="method-signature"
  319 + onclick="popupCode('String.src/M000007.html');return false;">
  320 +
  321 + <span class="method-name">strip_stylesheets!</span><span class="method-args">()</span>
  322 +
  323 + </a>
  324 +
  325 + </div>
  326 +
  327 + <div class="method-description">
  328 +
  329 + </div>
  330 + </div>
  331 +
  332 +
  333 + <div id="method-M000004" class="method-detail">
  334 + <a name="M000004"></a>
  335 +
  336 + <div class="method-heading">
  337 +
  338 + <a href="String.src/M000004.html" target="Code" class="method-signature"
  339 + onclick="popupCode('String.src/M000004.html');return false;">
  340 +
  341 + <span class="method-name">strip_xml_tags</span><span class="method-args">()</span>
  342 +
  343 + </a>
  344 +
  345 + </div>
  346 +
  347 + <div class="method-description">
  348 +
  349 + <p>
  350 +Removes all XML-like tags from <tt>self</tt>.
  351 +</p>
  352 +<pre>
  353 + s = &quot;&lt;html&gt;&lt;body&gt;test&lt;/body&gt;&lt;/html&gt;&quot;
  354 + s.strip_xml_tags #=&gt; &quot;test&quot;
  355 + s #=&gt; &quot;&lt;html&gt;&lt;body&gt;test&lt;/body&gt;&lt;/html&gt;&quot;
  356 +</pre>
  357 +
  358 + </div>
  359 + </div>
  360 +
  361 +
  362 + <div id="method-M000003" class="method-detail">
  363 + <a name="M000003"></a>
  364 +
  365 + <div class="method-heading">
  366 +
  367 + <a href="String.src/M000003.html" target="Code" class="method-signature"
  368 + onclick="popupCode('String.src/M000003.html');return false;">
  369 +
  370 + <span class="method-name">strip_xml_tags!</span><span class="method-args">()</span>
  371 +
  372 + </a>
  373 +
  374 + </div>
  375 +
  376 + <div class="method-description">
  377 +
  378 + <p>
  379 +Removes all XML-like tags from <tt>self</tt>.
  380 +</p>
  381 +<pre>
  382 + s = &quot;&lt;html&gt;&lt;body&gt;test&lt;/body&gt;&lt;/html&gt;&quot;
  383 + s.strip_xml_tags!
  384 + s #=&gt; &quot;test&quot;
  385 +</pre>
  386 +
  387 + </div>
  388 + </div>
  389 +
  390 +
  391 +
  392 + </div>
  393 +
  394 +
  395 +
  396 +
  397 + </div>
  398 +
  399 +<div id="validator-badges">
  400 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  401 +</div>
  402 +
  403 +</body>
  404 +</html>
doc/classes/String.src/M000001.html
... ... @@ -0,0 +1,15 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>is_stopword? (String)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 77</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">is_stopword?</span>
  12 + <span class="ruby-constant">Stoplist</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">downcase</span>)
  13 + <span class="ruby-keyword kw">end</span></pre>
  14 +</body>
  15 +</html>
doc/classes/String.src/M000002.html
... ... @@ -0,0 +1,15 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>remove_special_characters (String)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 83</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">remove_special_characters</span>
  12 + <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">split</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/\W/</span>,<span class="ruby-value str">' '</span>).<span class="ruby-identifier">split</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">w</span><span class="ruby-operator">|</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/\W/</span>,<span class="ruby-value str">' '</span>).<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/\A.\z/</span>, <span class="ruby-value str">''</span>)}.<span class="ruby-identifier">join</span>(<span class="ruby-value str">' '</span>).<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/\A.\z/</span>, <span class="ruby-value str">''</span>)}.<span class="ruby-identifier">join</span>(<span class="ruby-value str">' '</span>)
  13 + <span class="ruby-keyword kw">end</span></pre>
  14 +</body>
  15 +</html>
doc/classes/String.src/M000003.html
... ... @@ -0,0 +1,15 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>strip_xml_tags! (String)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 92</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_xml_tags!</span>
  12 + <span class="ruby-identifier">replace</span> <span class="ruby-identifier">strip_with_pattern</span> <span class="ruby-operator">/</span><span class="ruby-operator">&lt;</span>\<span class="ruby-regexp re">/?[^&gt;]*&gt;/</span>
  13 + <span class="ruby-keyword kw">end</span></pre>
  14 +</body>
  15 +</html>
doc/classes/String.src/M000004.html
... ... @@ -0,0 +1,15 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>strip_xml_tags (String)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 101</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_xml_tags</span>
  12 + <span class="ruby-identifier">dup</span>.<span class="ruby-identifier">strip_xml_tags!</span>
  13 + <span class="ruby-keyword kw">end</span></pre>
  14 +</body>
  15 +</html>
doc/classes/String.src/M000005.html
... ... @@ -0,0 +1,15 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>strip_javascripts! (String)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 115</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_javascripts!</span>
  12 + <span class="ruby-identifier">replace</span> <span class="ruby-identifier">strip_with_pattern</span> <span class="ruby-operator">/</span><span class="ruby-operator">&lt;</span><span class="ruby-identifier">script</span> <span class="ruby-identifier">type</span>=<span class="ruby-value str">&quot;text\/javascript&quot;</span><span class="ruby-operator">&gt;</span>(.<span class="ruby-operator">+</span><span class="ruby-value">?)</span><span class="ruby-operator">&lt;</span>\<span class="ruby-regexp re">/script&gt;/</span><span class="ruby-identifier">m</span>
  13 + <span class="ruby-keyword kw">end</span></pre>
  14 +</body>
  15 +</html>
doc/classes/String.src/M000006.html
... ... @@ -0,0 +1,15 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>strip_javascripts (String)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 128</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_javascripts</span>
  12 + <span class="ruby-identifier">dup</span>.<span class="ruby-identifier">strip_javascripts!</span>
  13 + <span class="ruby-keyword kw">end</span></pre>
  14 +</body>
  15 +</html>
doc/classes/String.src/M000007.html
... ... @@ -0,0 +1,16 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>strip_stylesheets! (String)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 132</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_stylesheets!</span>
  12 + <span class="ruby-comment cmt"># TODO: rewamp. dunno what is it.</span>
  13 + <span class="ruby-identifier">replace</span> <span class="ruby-identifier">strip_with_pattern</span> <span class="ruby-operator">/</span><span class="ruby-operator">&lt;</span><span class="ruby-identifier">style</span> <span class="ruby-identifier">type</span>=<span class="ruby-value str">&quot;text\/css&quot;</span><span class="ruby-operator">&gt;</span>(.<span class="ruby-operator">+</span><span class="ruby-value">?)</span><span class="ruby-operator">&lt;</span>\<span class="ruby-regexp re">/style&gt;/</span><span class="ruby-identifier">m</span>
  14 + <span class="ruby-keyword kw">end</span></pre>
  15 +</body>
  16 +</html>
doc/classes/String.src/M000008.html
... ... @@ -0,0 +1,15 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>strip_stylesheets (String)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 137</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">strip_stylesheets</span>
  12 + <span class="ruby-identifier">dup</span>.<span class="ruby-identifier">strip_stylesheets!</span>
  13 + <span class="ruby-keyword kw">end</span></pre>
  14 +</body>
  15 +</html>
doc/classes/String.src/M000009.html
... ... @@ -0,0 +1,15 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>extract_xmltags_values (String)</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  8 +</head>
  9 +<body class="standalone-code">
  10 + <pre><span class="ruby-comment cmt"># File lib/rir/string.rb, line 145</span>
  11 + <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">extract_xmltags_values</span>(<span class="ruby-identifier">tag_name</span>)
  12 + <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">scan</span>(<span class="ruby-node">/&lt;#{tag_name}.*?&gt;(.+?)&lt;\/#{tag_name}&gt;/</span>).<span class="ruby-identifier">flatten</span>
  13 + <span class="ruby-keyword kw">end</span></pre>
  14 +</body>
  15 +</html>
... ... @@ -0,0 +1 @@
  1 +Fri, 05 Nov 2010 14:41:10 +0100
doc/files/README_markdown.html
... ... @@ -0,0 +1,90 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>File: README.markdown [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="fileHeader">
  46 + <h1>README.markdown</h1>
  47 + <table class="header-table">
  48 + <tr class="top-aligned-row">
  49 + <td><strong>Path:</strong></td>
  50 + <td>README.markdown
  51 +
  52 + </td>
  53 + </tr>
  54 + <tr class="top-aligned-row">
  55 + <td><strong>Last Update:</strong></td>
  56 + <td>2010-11-05 14:40:41 +0100</td>
  57 + </tr>
  58 + </table>
  59 + </div>
  60 + <!-- banner header -->
  61 +
  62 + <div id="bodyContent">
  63 +
  64 + <div id="contextContent">
  65 +
  66 + </div>
  67 +
  68 +
  69 + </div>
  70 +
  71 + <!-- if includes -->
  72 +
  73 + <div id="section">
  74 +
  75 +
  76 +
  77 +
  78 + <!-- if method_list -->
  79 +
  80 +
  81 +
  82 +
  83 + </div>
  84 +
  85 +<div id="validator-badges">
  86 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  87 +</div>
  88 +
  89 +</body>
  90 +</html>
doc/files/lib/rir/document_rb.html
... ... @@ -0,0 +1,127 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>File: document.rb [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="fileHeader">
  46 + <h1>document.rb</h1>
  47 + <table class="header-table">
  48 + <tr class="top-aligned-row">
  49 + <td><strong>Path:</strong></td>
  50 + <td>lib/rir/document.rb
  51 +
  52 + </td>
  53 + </tr>
  54 + <tr class="top-aligned-row">
  55 + <td><strong>Last Update:</strong></td>
  56 + <td>2010-11-05 14:39:35 +0100</td>
  57 + </tr>
  58 + </table>
  59 + </div>
  60 + <!-- banner header -->
  61 +
  62 + <div id="bodyContent">
  63 +
  64 + <div id="contextContent">
  65 +
  66 + <div id="description">
  67 + <p>
  68 +This file is a part of an Information Retrieval oriented Ruby library
  69 +</p>
  70 +<p>
  71 +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
  72 +</p>
  73 +<p>
  74 +This program is free software: you can redistribute it and/or modify it
  75 +under the terms of the GNU General Public License as published by the Free
  76 +Software Foundation, either version 3 of the License, or (at your option)
  77 +any later version.
  78 +</p>
  79 +<p>
  80 +This program is distributed in the hope that it will be useful, but WITHOUT
  81 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  82 +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  83 +more details.
  84 +</p>
  85 +<p>
  86 +You should have received a copy of the GNU General Public License along
  87 +with this program. If not, see <<a
  88 +href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
  89 +</p>
  90 +
  91 + </div>
  92 +
  93 + <div id="requires-list">
  94 + <h3 class="section-bar">Required files</h3>
  95 +
  96 + <div class="name-list">
  97 +
  98 + net/http&nbsp;&nbsp;
  99 +
  100 + </div>
  101 + </div>
  102 +
  103 + </div>
  104 +
  105 +
  106 + </div>
  107 +
  108 + <!-- if includes -->
  109 +
  110 + <div id="section">
  111 +
  112 +
  113 +
  114 +
  115 + <!-- if method_list -->
  116 +
  117 +
  118 +
  119 +
  120 + </div>
  121 +
  122 +<div id="validator-badges">
  123 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  124 +</div>
  125 +
  126 +</body>
  127 +</html>
doc/files/lib/rir/string_rb.html
... ... @@ -0,0 +1,129 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>File: string.rb [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="fileHeader">
  46 + <h1>string.rb</h1>
  47 + <table class="header-table">
  48 + <tr class="top-aligned-row">
  49 + <td><strong>Path:</strong></td>
  50 + <td>lib/rir/string.rb
  51 +
  52 + </td>
  53 + </tr>
  54 + <tr class="top-aligned-row">
  55 + <td><strong>Last Update:</strong></td>
  56 + <td>2010-11-05 14:39:35 +0100</td>
  57 + </tr>
  58 + </table>
  59 + </div>
  60 + <!-- banner header -->
  61 +
  62 + <div id="bodyContent">
  63 +
  64 + <div id="contextContent">
  65 +
  66 + <div id="description">
  67 + <p>
  68 +This file is a part of an Information Retrieval oriented Ruby library
  69 +</p>
  70 +<p>
  71 +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
  72 +</p>
  73 +<p>
  74 +This program is free software: you can redistribute it and/or modify it
  75 +under the terms of the GNU General Public License as published by the Free
  76 +Software Foundation, either version 3 of the License, or (at your option)
  77 +any later version.
  78 +</p>
  79 +<p>
  80 +This program is distributed in the hope that it will be useful, but WITHOUT
  81 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  82 +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  83 +more details.
  84 +</p>
  85 +<p>
  86 +You should have received a copy of the GNU General Public License along
  87 +with this program. If not, see <<a
  88 +href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
  89 +</p>
  90 +
  91 + </div>
  92 +
  93 + <div id="requires-list">
  94 + <h3 class="section-bar">Required files</h3>
  95 +
  96 + <div class="name-list">
  97 +
  98 + cgi&nbsp;&nbsp;
  99 +
  100 + kconv&nbsp;&nbsp;
  101 +
  102 + </div>
  103 + </div>
  104 +
  105 + </div>
  106 +
  107 +
  108 + </div>
  109 +
  110 + <!-- if includes -->
  111 +
  112 + <div id="section">
  113 +
  114 +
  115 +
  116 +
  117 + <!-- if method_list -->
  118 +
  119 +
  120 +
  121 +
  122 + </div>
  123 +
  124 +<div id="validator-badges">
  125 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  126 +</div>
  127 +
  128 +</body>
  129 +</html>
doc/files/lib/rir_rb.html
... ... @@ -0,0 +1,102 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>File: rir.rb [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="fileHeader">
  46 + <h1>rir.rb</h1>
  47 + <table class="header-table">
  48 + <tr class="top-aligned-row">
  49 + <td><strong>Path:</strong></td>
  50 + <td>lib/rir.rb
  51 +
  52 + </td>
  53 + </tr>
  54 + <tr class="top-aligned-row">
  55 + <td><strong>Last Update:</strong></td>
  56 + <td>2010-11-05 14:39:35 +0100</td>
  57 + </tr>
  58 + </table>
  59 + </div>
  60 + <!-- banner header -->
  61 +
  62 + <div id="bodyContent">
  63 +
  64 + <div id="contextContent">
  65 +
  66 + <div id="requires-list">
  67 + <h3 class="section-bar">Required files</h3>
  68 +
  69 + <div class="name-list">
  70 +
  71 + rir/document&nbsp;&nbsp;
  72 +
  73 + rir/string&nbsp;&nbsp;
  74 +
  75 + </div>
  76 + </div>
  77 +
  78 + </div>
  79 +
  80 +
  81 + </div>
  82 +
  83 + <!-- if includes -->
  84 +
  85 + <div id="section">
  86 +
  87 +
  88 +
  89 +
  90 + <!-- if method_list -->
  91 +
  92 +
  93 +
  94 +
  95 + </div>
  96 +
  97 +<div id="validator-badges">
  98 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  99 +</div>
  100 +
  101 +</body>
  102 +</html>
doc/files/main_rb.html
... ... @@ -0,0 +1,100 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<head>
  5 + <title>File: main.rb [RDoc Documentation]</title>
  6 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  7 + <meta http-equiv="Content-Script-Type" content="text/javascript" />
  8 + <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
  9 + <script type="text/javascript">
  10 + // <![CDATA[
  11 +
  12 + function popupCode( url ) {
  13 + window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  14 + }
  15 +
  16 + function toggleCode( id ) {
  17 + if ( document.getElementById )
  18 + elem = document.getElementById( id );
  19 + else if ( document.all )
  20 + elem = eval( "document.all." + id );
  21 + else
  22 + return false;
  23 +
  24 + elemStyle = elem.style;
  25 +
  26 + if ( elemStyle.display != "block" ) {
  27 + elemStyle.display = "block"
  28 + } else {
  29 + elemStyle.display = "none"
  30 + }
  31 +
  32 + return true;
  33 + }
  34 +
  35 + // Make codeblocks hidden by default
  36 + document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  37 +
  38 + // ]]>
  39 + </script>
  40 +
  41 +</head>
  42 +<body>
  43 +
  44 +
  45 + <div id="fileHeader">
  46 + <h1>main.rb</h1>
  47 + <table class="header-table">
  48 + <tr class="top-aligned-row">
  49 + <td><strong>Path:</strong></td>
  50 + <td>main.rb
  51 +
  52 + </td>
  53 + </tr>
  54 + <tr class="top-aligned-row">
  55 + <td><strong>Last Update:</strong></td>
  56 + <td>2010-11-05 14:40:11 +0100</td>
  57 + </tr>
  58 + </table>
  59 + </div>
  60 + <!-- banner header -->
  61 +
  62 + <div id="bodyContent">
  63 +
  64 + <div id="contextContent">
  65 +
  66 + <div id="requires-list">
  67 + <h3 class="section-bar">Required files</h3>
  68 +
  69 + <div class="name-list">
  70 +
  71 + rir&nbsp;&nbsp;
  72 +
  73 + </div>
  74 + </div>
  75 +
  76 + </div>
  77 +
  78 +
  79 + </div>
  80 +
  81 + <!-- if includes -->
  82 +
  83 + <div id="section">
  84 +
  85 +
  86 +
  87 +
  88 + <!-- if method_list -->
  89 +
  90 +
  91 +
  92 +
  93 + </div>
  94 +
  95 +<div id="validator-badges">
  96 + <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  97 +</div>
  98 +
  99 +</body>
  100 +</html>
doc/fr_class_index.html
... ... @@ -0,0 +1,33 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<!--
  5 +
  6 + Classes [RDoc Documentation]
  7 +
  8 + -->
  9 +<head>
  10 + <title>Classes [RDoc Documentation]</title>
  11 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  12 + <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
  13 + <base target="docwin" />
  14 +</head>
  15 +<body>
  16 +<div class="index">
  17 + <h1 class="section-bar">Classes</h1>
  18 + <div id="index-entries">
  19 +
  20 + <a href="classes/Rir.html">Rir</a><br />
  21 +
  22 + <a href="classes/Rir/Document.html">Rir::Document</a><br />
  23 +
  24 + <a href="classes/Rir/WebDocument.html">Rir::WebDocument</a><br />
  25 +
  26 + <a href="classes/Rir/WikipediaPage.html">Rir::WikipediaPage</a><br />
  27 +
  28 + <a href="classes/String.html">String</a><br />
  29 +
  30 + </div>
  31 +</div>
  32 +</body>
  33 +</html>
doc/fr_file_index.html
... ... @@ -0,0 +1,33 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<!--
  5 +
  6 + Files [RDoc Documentation]
  7 +
  8 + -->
  9 +<head>
  10 + <title>Files [RDoc Documentation]</title>
  11 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  12 + <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
  13 + <base target="docwin" />
  14 +</head>
  15 +<body>
  16 +<div class="index">
  17 + <h1 class="section-bar">Files</h1>
  18 + <div id="index-entries">
  19 +
  20 + <a href="files/README_markdown.html">README.markdown</a><br />
  21 +
  22 + <a href="files/lib/rir_rb.html">lib/rir.rb</a><br />
  23 +
  24 + <a href="files/lib/rir/document_rb.html">lib/rir/document.rb</a><br />
  25 +
  26 + <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br />
  27 +
  28 + <a href="files/main_rb.html">main.rb</a><br />
  29 +
  30 + </div>
  31 +</div>
  32 +</body>
  33 +</html>
doc/fr_method_index.html
... ... @@ -0,0 +1,55 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<!--
  5 +
  6 + Methods [RDoc Documentation]
  7 +
  8 + -->
  9 +<head>
  10 + <title>Methods [RDoc Documentation]</title>
  11 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  12 + <link rel="stylesheet" href="rdoc-style.css" type="text/css" />
  13 + <base target="docwin" />
  14 +</head>
  15 +<body>
  16 +<div class="index">
  17 + <h1 class="section-bar">Methods</h1>
  18 + <div id="index-entries">
  19 +
  20 + <a href="classes/Rir/Document.html#M000012">count_words (Rir::Document)</a><br />
  21 +
  22 + <a href="classes/Rir/Document.html#M000013">entropy (Rir::Document)</a><br />
  23 +
  24 + <a href="classes/String.html#M000009">extract_xmltags_values (String)</a><br />
  25 +
  26 + <a href="classes/Rir/Document.html#M000010">format_words (Rir::Document)</a><br />
  27 +
  28 + <a href="classes/Rir/WebDocument.html#M000015">get_content (Rir::WebDocument)</a><br />
  29 +
  30 + <a href="classes/String.html#M000001">is_stopword? (String)</a><br />
  31 +
  32 + <a href="classes/Rir/WebDocument.html#M000016">new (Rir::WebDocument)</a><br />
  33 +
  34 + <a href="classes/Rir/Document.html#M000014">new (Rir::Document)</a><br />
  35 +
  36 + <a href="classes/Rir/Document.html#M000011">ngrams (Rir::Document)</a><br />
  37 +
  38 + <a href="classes/String.html#M000002">remove_special_characters (String)</a><br />
  39 +
  40 + <a href="classes/String.html#M000006">strip_javascripts (String)</a><br />
  41 +
  42 + <a href="classes/String.html#M000005">strip_javascripts! (String)</a><br />
  43 +
  44 + <a href="classes/String.html#M000008">strip_stylesheets (String)</a><br />
  45 +
  46 + <a href="classes/String.html#M000007">strip_stylesheets! (String)</a><br />
  47 +
  48 + <a href="classes/String.html#M000004">strip_xml_tags (String)</a><br />
  49 +
  50 + <a href="classes/String.html#M000003">strip_xml_tags! (String)</a><br />
  51 +
  52 + </div>
  53 +</div>
  54 +</body>
  55 +</html>
... ... @@ -0,0 +1,21 @@
  1 +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
  2 +"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
  3 +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  4 +<!--
  5 +
  6 + RDoc Documentation
  7 +
  8 + -->
  9 +<head>
  10 + <title>RDoc Documentation</title>
  11 + <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  12 +</head>
  13 +<frameset rows="20%, 80%">
  14 + <frameset cols="25%,35%,45%">
  15 + <frame src="fr_file_index.html" title="Files" name="Files" />
  16 + <frame src="fr_class_index.html" name="Classes" />
  17 + <frame src="fr_method_index.html" name="Methods" />
  18 + </frameset>
  19 + <frame src="files/README_markdown.html" name="docwin" />
  20 +</frameset>
  21 +</html>
... ... @@ -0,0 +1,299 @@
  1 +body {
  2 + font-family: Verdana,Arial,Helvetica,sans-serif;
  3 + font-size: 90%;
  4 + margin: 0;
  5 + margin-left: 40px;
  6 + padding: 0;
  7 + background: white;
  8 + color: black;
  9 +}
  10 +
  11 +h1, h2, h3, h4 {
  12 + margin: 0;
  13 + background: transparent;
  14 +}
  15 +
  16 +h1 {
  17 + font-size: 150%;
  18 +}
  19 +
  20 +h2,h3,h4 {
  21 + margin-top: 1em;
  22 +}
  23 +
  24 +:link, :visited {
  25 + background: #eef;
  26 + color: #039;
  27 + text-decoration: none;
  28 +}
  29 +
  30 +:link:hover, :visited:hover {
  31 + background: #039;
  32 + color: #eef;
  33 +}
  34 +
  35 +/* Override the base stylesheet's Anchor inside a table cell */
  36 +td > :link, td > :visited {
  37 + background: transparent;
  38 + color: #039;
  39 + text-decoration: none;
  40 +}
  41 +
  42 +/* and inside a section title */
  43 +.section-title > :link, .section-title > :visited {
  44 + background: transparent;
  45 + color: #eee;
  46 + text-decoration: none;
  47 +}
  48 +
  49 +/* === Structural elements =================================== */
  50 +
  51 +.index {
  52 + margin: 0;
  53 + margin-left: -40px;
  54 + padding: 0;
  55 + font-size: 90%;
  56 +}
  57 +
  58 +.index :link, .index :visited {
  59 + margin-left: 0.7em;
  60 +}
  61 +
  62 +.index .section-bar {
  63 + margin-left: 0px;
  64 + padding-left: 0.7em;
  65 + background: #ccc;
  66 + font-size: small;
  67 +}
  68 +
  69 +#classHeader, #fileHeader {
  70 + width: auto;
  71 + color: white;
  72 + padding: 0.5em 1.5em 0.5em 1.5em;
  73 + margin: 0;
  74 + margin-left: -40px;
  75 + border-bottom: 3px solid #006;
  76 +}
  77 +
  78 +#classHeader :link, #fileHeader :link,
  79 +#classHeader :visited, #fileHeader :visited {
  80 + background: inherit;
  81 + color: white;
  82 +}
  83 +
  84 +#classHeader td, #fileHeader td {
  85 + background: inherit;
  86 + color: white;
  87 +}
  88 +
  89 +#fileHeader {
  90 + background: #057;
  91 +}
  92 +
  93 +#classHeader {
  94 + background: #048;
  95 +}
  96 +
  97 +.class-name-in-header {
  98 + font-size: 180%;
  99 + font-weight: bold;
  100 +}
  101 +
  102 +#bodyContent {
  103 + padding: 0 1.5em 0 1.5em;
  104 +}
  105 +
  106 +#description {
  107 + padding: 0.5em 1.5em;
  108 + background: #efefef;
  109 + border: 1px dotted #999;
  110 +}
  111 +
  112 +#description h1, #description h2, #description h3,
  113 +#description h4, #description h5, #description h6 {
  114 + color: #125;
  115 + background: transparent;
  116 +}
  117 +
  118 +#validator-badges {
  119 + text-align: center;
  120 +}
  121 +
  122 +#validator-badges img {
  123 + border: 0;
  124 +}
  125 +
  126 +#copyright {
  127 + color: #333;
  128 + background: #efefef;
  129 + font: 0.75em sans-serif;
  130 + margin-top: 5em;
  131 + margin-bottom: 0;
  132 + padding: 0.5em 2em;
  133 +}
  134 +
  135 +/* === Classes =================================== */
  136 +
  137 +table.header-table {
  138 + color: white;
  139 + font-size: small;
  140 +}
  141 +
  142 +.type-note {
  143 + font-size: small;
  144 + color: #dedede;
  145 +}
  146 +
  147 +.section-bar {
  148 + color: #333;
  149 + border-bottom: 1px solid #999;
  150 + margin-left: -20px;
  151 +}
  152 +
  153 +.section-title {
  154 + background: #79a;
  155 + color: #eee;
  156 + padding: 3px;
  157 + margin-top: 2em;
  158 + margin-left: -30px;
  159 + border: 1px solid #999;
  160 +}
  161 +
  162 +.top-aligned-row {
  163 + vertical-align: top
  164 +}
  165 +
  166 +.bottom-aligned-row {
  167 + vertical-align: bottom
  168 +}
  169 +
  170 +#diagram img {
  171 + border: 0;
  172 +}
  173 +
  174 +/* --- Context section classes ----------------------- */
  175 +
  176 +.context-row { }
  177 +
  178 +.context-item-name {
  179 + font-family: monospace;
  180 + font-weight: bold;
  181 + color: black;
  182 +}
  183 +
  184 +.context-item-value {
  185 + font-size: small;
  186 + color: #448;
  187 +}
  188 +
  189 +.context-item-desc {
  190 + color: #333;
  191 + padding-left: 2em;
  192 +}
  193 +
  194 +/* --- Method classes -------------------------- */
  195 +
  196 +.method-detail {
  197 + background: #efefef;
  198 + padding: 0;
  199 + margin-top: 0.5em;
  200 + margin-bottom: 1em;
  201 + border: 1px dotted #ccc;
  202 +}
  203 +
  204 +.method-heading {
  205 + color: black;
  206 + background: #ccc;
  207 + border-bottom: 1px solid #666;
  208 + padding: 0.2em 0.5em 0 0.5em;
  209 +}
  210 +
  211 +.method-signature {
  212 + color: black;
  213 + background: inherit;
  214 +}
  215 +
  216 +.method-name {
  217 + font-weight: bold;
  218 +}
  219 +
  220 +.method-args {
  221 + font-style: italic;
  222 +}
  223 +
  224 +.method-description {
  225 + padding: 0 0.5em 0 0.5em;
  226 +}
  227 +
  228 +/* --- Source code sections -------------------- */
  229 +
  230 +:link.source-toggle, :visited.source-toggle {
  231 + font-size: 90%;
  232 +}
  233 +
  234 +div.method-source-code {
  235 + background: #262626;
  236 + color: #ffdead;
  237 + margin: 1em;
  238 + padding: 0.5em;
  239 + border: 1px dashed #999;
  240 + overflow: auto;
  241 +}
  242 +
  243 +div.method-source-code pre {
  244 + color: #ffdead;
  245 +}
  246 +
  247 +/* --- Ruby keyword styles --------------------- */
  248 +
  249 +.standalone-code {
  250 + background: #221111;
  251 + color: #ffdead;
  252 + overflow: auto;
  253 +}
  254 +
  255 +.ruby-constant {
  256 + color: #7fffd4;
  257 + background: transparent;
  258 +}
  259 +
  260 +.ruby-keyword {
  261 + color: #00ffff;
  262 + background: transparent;
  263 +}
  264 +
  265 +.ruby-ivar {
  266 + color: #eedd82;
  267 + background: transparent;
  268 +}
  269 +
  270 +.ruby-operator {
  271 + color: #00ffee;
  272 + background: transparent;
  273 +}
  274 +
  275 +.ruby-identifier {
  276 + color: #ffdead;
  277 + background: transparent;
  278 +}
  279 +
  280 +.ruby-node {
  281 + color: #ffa07a;
  282 + background: transparent;
  283 +}
  284 +
  285 +.ruby-comment {
  286 + color: #b22222;
  287 + font-weight: bold;
  288 + background: transparent;
  289 +}
  290 +
  291 +.ruby-regexp {
  292 + color: #ffa07a;
  293 + background: transparent;
  294 +}
  295 +
  296 +.ruby-value {
  297 + color: #7fffd4;
  298 + background: transparent;
  299 +}
... ... @@ -0,0 +1,4 @@
  1 +#!/usr/bin/env ruby
  2 +
  3 +require 'rir/document'
  4 +require 'rir/string'
... ... @@ -0,0 +1,121 @@
  1 +#!/usr/bin/env ruby
  2 +
  3 +# This file is a part of an Information Retrieval oriented Ruby library
  4 +#
  5 +# Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
  6 +#
  7 +# This program is free software: you can redistribute it and/or modify
  8 +# it under the terms of the GNU General Public License as published by
  9 +# the Free Software Foundation, either version 3 of the License, or
  10 +# (at your option) any later version.
  11 +#
  12 +# This program is distributed in the hope that it will be useful,
  13 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 +# GNU General Public License for more details.
  16 +#
  17 +# You should have received a copy of the GNU General Public License
  18 +# along with this program. If not, see <http://www.gnu.org/licenses/>.
  19 +
  20 +# General module for many purposes related to Information Retrieval.
  21 +module Rir
  22 +
  23 + # A Document is a bag of words and is constructed from a string.
  24 + class Document
  25 + attr_reader :words, :doc_content
  26 +
  27 + # Any non-word characters are removed from the words (see http://perldoc.perl.org/perlre.html
  28 + # and the \\W special escape).
  29 + #
  30 + # Protected function, only meant to by called at the initialization.
  31 + def format_words
  32 + wo = []
  33 +
  34 + @doc_content.split.each do |w|
  35 + w.split(/\W/).each do |sw|
  36 + wo.push(sw) if sw =~ /[a-zA-Z]/
  37 + end
  38 + end
  39 +
  40 + wo
  41 + end
  42 +
  43 + # Returns an Array containing the +n+-grams (words) from the current Document.
  44 + #
  45 + # ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...]
  46 + def ngrams(n)
  47 + window = []
  48 + ngrams_array = []
  49 +
  50 + @words.each do |w|
  51 + window.push(w)
  52 + if window.size == n
  53 + ngrams_array.push window.join(" ")
  54 + window.delete_at(0)
  55 + end
  56 + end
  57 +
  58 + ngrams_array.uniq
  59 + end
  60 +
  61 + # Returns a Hash containing the words and their associated counts in the current Document.
  62 + #
  63 + # count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... }
  64 + def count_words
  65 + counts = Hash.new { |h,k| h[k] = 0 }
  66 + @words.each { |w| counts[w.downcase] += 1 }
  67 +
  68 + counts
  69 + end
  70 +
  71 + # Computes the entropy of a given string +s+ inside the document.
  72 + #
  73 + # If the string parameter is composed of many words (i.e. tokens separated
  74 + # by whitespace(s)), it is considered as an ngram.
  75 + #
  76 + # entropy("guitar") #=> 0.00389919463243839
  77 + def entropy(s)
  78 + en = 0.0
  79 + counts = self.count_words
  80 +
  81 + s.split.each do |w|
  82 + p_wi = counts[w].to_f/@words.count.to_f
  83 + en += p_wi*Math.log2(p_wi)
  84 + end
  85 +
  86 + en *= -1
  87 + en
  88 + end
  89 +
  90 +
  91 +
  92 + def initialize(content)
  93 + @doc_content = content
  94 + @words = format_words
  95 + end
  96 +
  97 + protected :format_words
  98 + end
  99 +
  100 + # A WebDocument is a Document with a +url+.
  101 + class WebDocument < Document
  102 + attr_reader :url
  103 +
  104 + # Returns the HTML text from the page of a given +url+.
  105 + def self.get_content(url)
  106 + require 'net/http'
  107 + Net::HTTP.get(URI.parse(url))
  108 + end
  109 +
  110 + # WebDocument constructor, the content of the Document is the HTML page
  111 + # without the tags.
  112 + def initialize(url)
  113 + @url = url
  114 + super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags
  115 + end
  116 + end
  117 +
  118 + # A WikipediaPage is a WebDocument.
  119 + class WikipediaPage < WebDocument
  120 + end
  121 +end
... ... @@ -0,0 +1,155 @@
  1 +#!/usr/bin/env ruby
  2 +
  3 +# This file is a part of an Information Retrieval oriented Ruby library
  4 +#
  5 +# Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
  6 +#
  7 +# This program is free software: you can redistribute it and/or modify
  8 +# it under the terms of the GNU General Public License as published by
  9 +# the Free Software Foundation, either version 3 of the License, or
  10 +# (at your option) any later version.
  11 +#
  12 +# This program is distributed in the hope that it will be useful,
  13 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 +# GNU General Public License for more details.
  16 +#
  17 +# You should have received a copy of the GNU General Public License
  18 +# along with this program. If not, see <http://www.gnu.org/licenses/>.
  19 +
  20 +# General module for many purposes related to Information Retrieval.
  21 +module Rir
  22 +
  23 + # These are the default stopwords provided by Lemur.
  24 + Stoplist = [
  25 + "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av",
  26 + "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand",
  27 + "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by",
  28 + "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu",
  29 + "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during",
  30 + "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every",
  31 + "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting",
  32 + "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff",
  33 + "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore",
  34 + "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he",
  35 + "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto",
  36 + "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto",
  37 + "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include",
  38 + "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into",
  39 + "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last",
  40 + "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe",
  41 + "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs",
  42 + "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless",
  43 + "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing",
  44 + "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once",
  45 + "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours",
  46 + "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite",
  47 + "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing",
  48 + "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt",
  49 + "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote",
  50 + "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes",
  51 + "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave",
  52 + "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them",
  53 + "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts",
  54 + "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon",
  55 + "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru",
  56 + "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh",
  57 + "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward",
  58 + "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week",
  59 + "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever",
  60 + "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore",
  61 + "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto",
  62 + "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever",
  63 + "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom",
  64 + "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within",
  65 + "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your",
  66 + "yours", "yourself", "yourselves"
  67 + ]
  68 +
  69 +
  70 +end
  71 +
  72 +# Extention of the standard class String with useful function.
  73 +class String
  74 + include Rir
  75 +
  76 + # Returns +true+ if +self+ belongs to Rir::Stoplist, +false+ otherwise.
  77 + def is_stopword?
  78 + Stoplist.include?(self.downcase)
  79 + end
  80 +
  81 + # Do not use.
  82 + # TODO: rewamp. find why this function is here.
  83 + def remove_special_characters
  84 + self.split.collect { |w| w.gsub(/\W/,' ').split.collect { |w| w.gsub(/\W/,' ').strip.sub(/\A.\z/, '')}.join(' ').strip.sub(/\A.\z/, '')}.join(' ')
  85 + end
  86 +
  87 + # Removes all XML-like tags from +self+.
  88 + #
  89 + # s = "<html><body>test</body></html>"
  90 + # s.strip_xml_tags!
  91 + # s #=> "test"
  92 + def strip_xml_tags!
  93 + replace strip_with_pattern /<\/?[^>]*>/
  94 + end
  95 +
  96 + # Removes all XML-like tags from +self+.
  97 + #
  98 + # s = "<html><body>test</body></html>"
  99 + # s.strip_xml_tags #=> "test"
  100 + # s #=> "<html><body>test</body></html>"
  101 + def strip_xml_tags
  102 + dup.strip_xml_tags!
  103 + end
  104 +
  105 + # Removes all Javascript sources from +self+.
  106 + #
  107 + # s = "<script type='text/javascript'>
  108 + # var skin='vector',
  109 + # stylepath='http://bits.wikimedia.org/skins-1.5'
  110 + # </script>
  111 + #
  112 + # test"
  113 + # s.strip_javascripts!
  114 + # s #=> "test"
  115 + def strip_javascripts!
  116 + replace strip_with_pattern /<script type="text\/javascript">(.+?)<\/script>/m
  117 + end
  118 +
  119 + # Removes all Javascript sources from +self+.
  120 + #
  121 + # s = "<script type='text/javascript'>
  122 + # var skin='vector',
  123 + # stylepath='http://bits.wikimedia.org/skins-1.5'
  124 + # </script>
  125 + #
  126 + # test"
  127 + # s.strip_javascripts #=> "test"
  128 + def strip_javascripts
  129 + dup.strip_javascripts!
  130 + end
  131 +
  132 + def strip_stylesheets!
  133 + # TODO: rewamp. dunno what is it.
  134 + replace strip_with_pattern /<style type="text\/css">(.+?)<\/style>/m
  135 + end
  136 +
  137 + def strip_stylesheets
  138 + dup.strip_stylesheets!
  139 + end
  140 +
  141 + # Returns the text values inside all occurences of a XML tag in +self+
  142 + #
  143 + # s = "four-piece in <a href='#'>Indianapolis</a>, <a href='#'>Indiana</a> at the Murat Theatre"
  144 + # s.extract_xmltags_values 'a' #=> ["Indianapolis", "Indiana"]
  145 + def extract_xmltags_values(tag_name)
  146 + self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten
  147 + end
  148 +
  149 + private
  150 + def strip_with_pattern(pattern)
  151 + require 'cgi'
  152 + require 'kconv'
  153 + CGI::unescapeHTML(self.gsub(pattern,"")).toutf8
  154 + end
  155 +end
... ... @@ -0,0 +1,3 @@
  1 +$LOAD_PATH.unshift File.expand_path(File.join(File.dirname(__FILE__), "lib"))
  2 +
  3 +require 'rir'