Blame view

doc/classes/RIR/Document.html 7.66 KB
7043da90b   Romain Deveaud   first commit
1
2
3
4
  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
  <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
  <head>
35f45ab54   Romain Deveaud   changing the main...
5
    <title>Class: RIR::Document [RDoc Documentation]</title>
7043da90b   Romain Deveaud   first commit
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <meta http-equiv="Content-Script-Type" content="text/javascript" />
    <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
    <script type="text/javascript">
    // <![CDATA[
  
    function popupCode( url ) {
      window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
    }
  
    function toggleCode( id ) {
      if ( document.getElementById )
        elem = document.getElementById( id );
      else if ( document.all )
        elem = eval( "document.all." + id );
      else
        return false;
  
      elemStyle = elem.style;
  
      if ( elemStyle.display != "block" ) {
        elemStyle.display = "block"
      } else {
        elemStyle.display = "none"
      }
  
      return true;
    }
  
    // Make codeblocks hidden by default
    document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" )
  
    // ]]>
    </script>
  
  </head>
  <body>
  
  
      <div id="classHeader">
          <table class="header-table">
          <tr class="top-aligned-row">
            <td><strong>Class</strong></td>
35f45ab54   Romain Deveaud   changing the main...
49
            <td class="class-name-in-header">RIR::Document</td>
7043da90b   Romain Deveaud   first commit
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
          </tr>
          <tr class="top-aligned-row">
              <td><strong>In:</strong></td>
              <td>
  
  
                  <a href="../../files/lib/rir/document_rb.html">
  
                  lib/rir/document.rb
  
                  </a>
  
  
          <br />
  
              </td>
          </tr>
  
  
          <tr class="top-aligned-row">
              <td><strong>Parent:</strong></td>
              <td>
  
                  Object
  
              </td>
          </tr>
  
          </table>
      </div>
    <!-- banner header -->
  
    <div id="bodyContent">
  
    <div id="contextContent">
  
      <div id="description">
        <p>
  A <a href="Document.html">Document</a> is a bag of words and is constructed
  from a string.
  </p>
  
      </div>
  
     </div>
  
  
      <div id="method-list">
        <h3 class="section-bar">Methods</h3>
  
        <div class="name-list">
a79a22843   Romain Deveaud   new TreeTagger mo...
101
          <a href="#M000010">count_words</a>&nbsp;&nbsp;
7043da90b   Romain Deveaud   first commit
102

a79a22843   Romain Deveaud   new TreeTagger mo...
103
          <a href="#M000011">entropy</a>&nbsp;&nbsp;
7043da90b   Romain Deveaud   first commit
104

a79a22843   Romain Deveaud   new TreeTagger mo...
105
          <a href="#M000008">format_words</a>&nbsp;&nbsp;
7043da90b   Romain Deveaud   first commit
106

a79a22843   Romain Deveaud   new TreeTagger mo...
107
          <a href="#M000013">new</a>&nbsp;&nbsp;
7043da90b   Romain Deveaud   first commit
108

a79a22843   Romain Deveaud   new TreeTagger mo...
109
110
111
          <a href="#M000009">ngrams</a>&nbsp;&nbsp;
  
          <a href="#M000012">tf</a>&nbsp;&nbsp;
7043da90b   Romain Deveaud   first commit
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
  
        </div>
      </div>
  
    </div>
  
      <!-- if includes -->
  
      <div id="section">
  
  
  
      <div id="attribute-list">
        <h3 class="section-bar">Attributes</h3>
  
        <div class="name-list">
          <table>
  
          <tr class="top-aligned-row context-row">
            <td class="context-item-name">doc_content</td>
  
            <td class="context-item-value">&nbsp;[R]&nbsp;</td>
  
            <td class="context-item-desc"></td>
          </tr>
  
          <tr class="top-aligned-row context-row">
            <td class="context-item-name">words</td>
  
            <td class="context-item-value">&nbsp;[R]&nbsp;</td>
  
            <td class="context-item-desc"></td>
          </tr>
  
          </table>
        </div>
      </div>
  
  
      <!-- if method_list -->
  
      <div id="methods">
  
        <h3 class="section-bar">Public Class methods</h3>
a79a22843   Romain Deveaud   new TreeTagger mo...
156
157
        <div id="method-M000013" class="method-detail">
          <a name="M000013"></a>
7043da90b   Romain Deveaud   first commit
158
159
  
          <div class="method-heading">
a79a22843   Romain Deveaud   new TreeTagger mo...
160
161
            <a href="Document.src/M000013.html" target="Code" class="method-signature"
              onclick="popupCode('Document.src/M000013.html');return false;">
7043da90b   Romain Deveaud   first commit
162
163
164
165
166
167
168
169
170
171
172
173
174
175
  
            <span class="method-name">new</span><span class="method-args">(content)</span>
  
            </a>
  
          </div>
  
          <div class="method-description">
  
          </div>
        </div>
  
  
        <h3 class="section-bar">Public Instance methods</h3>
a79a22843   Romain Deveaud   new TreeTagger mo...
176
177
        <div id="method-M000010" class="method-detail">
          <a name="M000010"></a>
7043da90b   Romain Deveaud   first commit
178
179
  
          <div class="method-heading">
a79a22843   Romain Deveaud   new TreeTagger mo...
180
181
            <a href="Document.src/M000010.html" target="Code" class="method-signature"
              onclick="popupCode('Document.src/M000010.html');return false;">
7043da90b   Romain Deveaud   first commit
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
  
            <span class="method-name">count_words</span><span class="method-args">()</span>
  
            </a>
  
          </div>
  
          <div class="method-description">
  
            <p>
  Returns a Hash containing the words and their associated counts in the
  current <a href="Document.html">Document</a>.
  </p>
  <pre>
    count_words #=&gt; { &quot;guitar&quot;=&gt;1, &quot;bass&quot;=&gt;3, &quot;album&quot;=&gt;20, ... }
  </pre>
  
          </div>
        </div>
a79a22843   Romain Deveaud   new TreeTagger mo...
201
202
        <div id="method-M000011" class="method-detail">
          <a name="M000011"></a>
7043da90b   Romain Deveaud   first commit
203
204
  
          <div class="method-heading">
a79a22843   Romain Deveaud   new TreeTagger mo...
205
206
            <a href="Document.src/M000011.html" target="Code" class="method-signature"
              onclick="popupCode('Document.src/M000011.html');return false;">
7043da90b   Romain Deveaud   first commit
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
  
            <span class="method-name">entropy</span><span class="method-args">(s)</span>
  
            </a>
  
          </div>
  
          <div class="method-description">
  
            <p>
  Computes the entropy of a given string <tt>s</tt> inside the document.
  </p>
  <p>
  If the string parameter is composed of many words (i.e. tokens separated by
  whitespace(s)), it is considered as an ngram.
  </p>
  <pre>
a79a22843   Romain Deveaud   new TreeTagger mo...
224
225
    entropy(&quot;guitar&quot;) #=&gt; 0.00432114812727959
    entropy(&quot;dillinger escape plan&quot;) #=&gt; 0.265862076325102
7043da90b   Romain Deveaud   first commit
226
227
228
229
  </pre>
  
          </div>
        </div>
a79a22843   Romain Deveaud   new TreeTagger mo...
230
231
        <div id="method-M000009" class="method-detail">
          <a name="M000009"></a>
7043da90b   Romain Deveaud   first commit
232
233
  
          <div class="method-heading">
a79a22843   Romain Deveaud   new TreeTagger mo...
234
235
            <a href="Document.src/M000009.html" target="Code" class="method-signature"
              onclick="popupCode('Document.src/M000009.html');return false;">
7043da90b   Romain Deveaud   first commit
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
  
            <span class="method-name">ngrams</span><span class="method-args">(n)</span>
  
            </a>
  
          </div>
  
          <div class="method-description">
  
            <p>
  Returns an Array containing the <tt>n</tt>-grams (words) from the current
  <a href="Document.html">Document</a>.
  </p>
  <pre>
    ngrams(2) #=&gt; [&quot;the free&quot;, &quot;free encyclopedia&quot;, &quot;encyclopedia var&quot;, &quot;var skin&quot;, ...]
  </pre>
  
          </div>
        </div>
a79a22843   Romain Deveaud   new TreeTagger mo...
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
        <div id="method-M000012" class="method-detail">
          <a name="M000012"></a>
  
          <div class="method-heading">
  
            <a href="Document.src/M000012.html" target="Code" class="method-signature"
              onclick="popupCode('Document.src/M000012.html');return false;">
  
            <span class="method-name">tf</span><span class="method-args">(s)</span>
  
            </a>
  
          </div>
  
          <div class="method-description">
  
            <p>
  Computes the term frequency of a given <b>word</b> <tt>s</tt>.
  </p>
  <pre>
    tf(&quot;guitar&quot;) #=&gt; 0.000380372765310004
  </pre>
  
          </div>
        </div>
7043da90b   Romain Deveaud   first commit
280
        <h3 class="section-bar">Protected Instance methods</h3>
a79a22843   Romain Deveaud   new TreeTagger mo...
281
282
        <div id="method-M000008" class="method-detail">
          <a name="M000008"></a>
7043da90b   Romain Deveaud   first commit
283
284
  
          <div class="method-heading">
a79a22843   Romain Deveaud   new TreeTagger mo...
285
286
            <a href="Document.src/M000008.html" target="Code" class="method-signature"
              onclick="popupCode('Document.src/M000008.html');return false;">
7043da90b   Romain Deveaud   first commit
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
  
            <span class="method-name">format_words</span><span class="method-args">()</span>
  
            </a>
  
          </div>
  
          <div class="method-description">
  
            <p>
  Any non-word characters are removed from the words (see <a
  href="http://perldoc.perl.org/perlre.html">perldoc.perl.org/perlre.html</a>
  and the W special escape).
  </p>
  <p>
  Protected function, only meant to by called at the initialization.
  </p>
  
          </div>
        </div>
  
  
  
      </div>
  
  
  
  
    </div>
  
  <div id="validator-badges">
    <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
  </div>
  
  </body>
  </html>