Document.html 7.68 KB
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
  <title>Class: Mirimiri::Document [RDoc Documentation]</title>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  <meta http-equiv="Content-Script-Type" content="text/javascript" />
  <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
  <script type="text/javascript">
  // <![CDATA[

  function popupCode( url ) {
    window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
  }

  function toggleCode( id ) {
    if ( document.getElementById )
      elem = document.getElementById( id );
    else if ( document.all )
      elem = eval( "document.all." + id );
    else
      return false;

    elemStyle = elem.style;

    if ( elemStyle.display != "block" ) {
      elemStyle.display = "block"
    } else {
      elemStyle.display = "none"
    }

    return true;
  }

  // Make codeblocks hidden by default
  document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )

  // ]]>
  </script>

</head>
<body>


    <div id="classHeader">
        <table class="header-table">
        <tr class="top-aligned-row">
          <td><strong>Class</strong></td>
          <td class="class-name-in-header">Mirimiri::Document</td>
        </tr>
        <tr class="top-aligned-row">
            <td><strong>In:</strong></td>
            <td>


                <a href="../../files/lib/mirimiri/document_rb.html">

                lib/mirimiri/document.rb

                </a>


        <br />

            </td>
        </tr>


        <tr class="top-aligned-row">
            <td><strong>Parent:</strong></td>
            <td>

                Object

            </td>
        </tr>

        </table>
    </div>
  <!-- banner header -->

  <div id="bodyContent">

  <div id="contextContent">

    <div id="description">
      <p>
A <a href="Document.html">Document</a> is a bag of words and is constructed
from a string.
</p>

    </div>

   </div>


    <div id="method-list">
      <h3 class="section-bar">Methods</h3>

      <div class="name-list">

        <a href="#M000024">count_words</a>&nbsp;&nbsp;

        <a href="#M000025">entropy</a>&nbsp;&nbsp;

        <a href="#M000022">format_words</a>&nbsp;&nbsp;

        <a href="#M000027">new</a>&nbsp;&nbsp;

        <a href="#M000023">ngrams</a>&nbsp;&nbsp;

        <a href="#M000026">tf</a>&nbsp;&nbsp;

      </div>
    </div>

  </div>

    <!-- if includes -->

    <div id="section">



    <div id="attribute-list">
      <h3 class="section-bar">Attributes</h3>

      <div class="name-list">
        <table>

        <tr class="top-aligned-row context-row">
          <td class="context-item-name">doc_content</td>

          <td class="context-item-value">&nbsp;[R]&nbsp;</td>

          <td class="context-item-desc"></td>
        </tr>

        <tr class="top-aligned-row context-row">
          <td class="context-item-name">words</td>

          <td class="context-item-value">&nbsp;[R]&nbsp;</td>

          <td class="context-item-desc"></td>
        </tr>

        </table>
      </div>
    </div>


    <!-- if method_list -->

    <div id="methods">

      <h3 class="section-bar">Public Class methods</h3>


      <div id="method-M000027" class="method-detail">
        <a name="M000027"></a>

        <div class="method-heading">

          <a href="Document.src/M000027.html" target="Code" class="method-signature"
            onclick="popupCode('Document.src/M000027.html');return false;">

          <span class="method-name">new</span><span class="method-args">(content)</span>

          </a>

        </div>

        <div class="method-description">

        </div>
      </div>


      <h3 class="section-bar">Public Instance methods</h3>


      <div id="method-M000024" class="method-detail">
        <a name="M000024"></a>

        <div class="method-heading">

          <a href="Document.src/M000024.html" target="Code" class="method-signature"
            onclick="popupCode('Document.src/M000024.html');return false;">

          <span class="method-name">count_words</span><span class="method-args">()</span>

          </a>

        </div>

        <div class="method-description">

          <p>
Returns a Hash containing the words and their associated counts in the
current <a href="Document.html">Document</a>.
</p>
<pre>
  count_words #=&gt; { &quot;guitar&quot;=&gt;1, &quot;bass&quot;=&gt;3, &quot;album&quot;=&gt;20, ... }
</pre>

        </div>
      </div>


      <div id="method-M000025" class="method-detail">
        <a name="M000025"></a>

        <div class="method-heading">

          <a href="Document.src/M000025.html" target="Code" class="method-signature"
            onclick="popupCode('Document.src/M000025.html');return false;">

          <span class="method-name">entropy</span><span class="method-args">(s)</span>

          </a>

        </div>

        <div class="method-description">

          <p>
Computes the entropy of a given string <tt>s</tt> inside the document.
</p>
<p>
If the string parameter is composed of many words (i.e. tokens separated by
whitespace(s)), it is considered as an ngram.
</p>
<pre>
  entropy(&quot;guitar&quot;) #=&gt; 0.00432114812727959
  entropy(&quot;dillinger escape plan&quot;) #=&gt; 0.265862076325102
</pre>

        </div>
      </div>


      <div id="method-M000023" class="method-detail">
        <a name="M000023"></a>

        <div class="method-heading">

          <a href="Document.src/M000023.html" target="Code" class="method-signature"
            onclick="popupCode('Document.src/M000023.html');return false;">

          <span class="method-name">ngrams</span><span class="method-args">(n)</span>

          </a>

        </div>

        <div class="method-description">

          <p>
Returns an Array containing the <tt>n</tt>-grams (words) from the current
<a href="Document.html">Document</a>.
</p>
<pre>
  ngrams(2) #=&gt; [&quot;the free&quot;, &quot;free encyclopedia&quot;, &quot;encyclopedia var&quot;, &quot;var skin&quot;, ...]
</pre>

        </div>
      </div>


      <div id="method-M000026" class="method-detail">
        <a name="M000026"></a>

        <div class="method-heading">

          <a href="Document.src/M000026.html" target="Code" class="method-signature"
            onclick="popupCode('Document.src/M000026.html');return false;">

          <span class="method-name">tf</span><span class="method-args">(s)</span>

          </a>

        </div>

        <div class="method-description">

          <p>
Computes the term frequency of a given <b>word</b> <tt>s</tt>.
</p>
<pre>
  tf(&quot;guitar&quot;) #=&gt; 0.000380372765310004
</pre>

        </div>
      </div>


      <h3 class="section-bar">Protected Instance methods</h3>


      <div id="method-M000022" class="method-detail">
        <a name="M000022"></a>

        <div class="method-heading">

          <a href="Document.src/M000022.html" target="Code" class="method-signature"
            onclick="popupCode('Document.src/M000022.html');return false;">

          <span class="method-name">format_words</span><span class="method-args">()</span>

          </a>

        </div>

        <div class="method-description">

          <p>
Any non-word characters are removed from the words (see <a
href="http://perldoc.perl.org/perlre.html">perldoc.perl.org/perlre.html</a>
and the W special escape).
</p>
<p>
Protected function, only meant to by called at the initialization.
</p>

        </div>
      </div>



    </div>




  </div>

<div id="validator-badges">
  <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
</div>

</body>
</html>