Commit fd4cb285a4975c9a0b6624e93a42eb9fa812fee4

Authored by Romain Deveaud
1 parent b843bae6b0
Exists in master

doc changes + document and string improvements

Showing 14 changed files with 63 additions and 26 deletions Side-by-side Diff

doc/classes/String.html
... ... @@ -129,7 +129,7 @@
129 129  
130 130 <div id="includes-list">
131 131  
132   - <span class="include-name"><a href="Rir.html">Rir</a></span>
  132 + <span class="include-name"><a href="RIR.html">RIR</a></span>
133 133  
134 134 </div>
135 135 </div>
1   -Fri, 05 Nov 2010 14:41:10 +0100
  1 +Fri, 05 Nov 2010 15:06:41 +0100
doc/files/README_markdown.html
... ... @@ -53,7 +53,7 @@
53 53 </tr>
54 54 <tr class="top-aligned-row">
55 55 <td><strong>Last Update:</strong></td>
56   - <td>2010-11-05 14:40:41 +0100</td>
  56 + <td>2010-11-05 14:46:27 +0100</td>
57 57 </tr>
58 58 </table>
59 59 </div>
... ... @@ -62,6 +62,37 @@
62 62 <div id="bodyContent">
63 63  
64 64 <div id="contextContent">
  65 +
  66 + <div id="description">
  67 + <p>
  68 +# Ruby Information Retrieval (rIR)
  69 +</p>
  70 +<p>
  71 +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
  72 +</p>
  73 +<p>
  74 +License
  75 +</p>
  76 +<h6>=</h6>
  77 +<p>
  78 +This program is free software: you can redistribute it and/or modify it
  79 +under the terms of the GNU General Public License as published by the Free
  80 +Software Foundation, either version 3 of the License, or (at your option)
  81 +any later version.
  82 +</p>
  83 +<p>
  84 +This program is distributed in the hope that it will be useful, but WITHOUT
  85 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  86 +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  87 +more details.
  88 +</p>
  89 +<p>
  90 +You should have received a copy of the GNU General Public License along
  91 +with this program. If not, see <<a
  92 +href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>.
  93 +</p>
  94 +
  95 + </div>
65 96  
66 97 </div>
67 98  
doc/files/lib/rir/document_rb.html
... ... @@ -53,7 +53,7 @@
53 53 </tr>
54 54 <tr class="top-aligned-row">
55 55 <td><strong>Last Update:</strong></td>
56   - <td>2010-11-05 14:39:35 +0100</td>
  56 + <td>2010-11-05 15:06:24 +0100</td>
57 57 </tr>
58 58 </table>
59 59 </div>
doc/files/lib/rir/string_rb.html
... ... @@ -53,7 +53,7 @@
53 53 </tr>
54 54 <tr class="top-aligned-row">
55 55 <td><strong>Last Update:</strong></td>
56   - <td>2010-11-05 14:39:35 +0100</td>
  56 + <td>2010-11-05 15:06:35 +0100</td>
57 57 </tr>
58 58 </table>
59 59 </div>
doc/files/main_rb.html
... ... @@ -53,7 +53,7 @@
53 53 </tr>
54 54 <tr class="top-aligned-row">
55 55 <td><strong>Last Update:</strong></td>
56   - <td>2010-11-05 14:40:11 +0100</td>
  56 + <td>2010-11-05 15:05:38 +0100</td>
57 57 </tr>
58 58 </table>
59 59 </div>
doc/fr_class_index.html
... ... @@ -17,13 +17,13 @@
17 17 <h1 class="section-bar">Classes</h1>
18 18 <div id="index-entries">
19 19  
20   - <a href="classes/Rir.html">Rir</a><br />
  20 + <a href="classes/RIR.html">RIR</a><br />
21 21  
22   - <a href="classes/Rir/Document.html">Rir::Document</a><br />
  22 + <a href="classes/RIR/Document.html">RIR::Document</a><br />
23 23  
24   - <a href="classes/Rir/WebDocument.html">Rir::WebDocument</a><br />
  24 + <a href="classes/RIR/WebDocument.html">RIR::WebDocument</a><br />
25 25  
26   - <a href="classes/Rir/WikipediaPage.html">Rir::WikipediaPage</a><br />
  26 + <a href="classes/RIR/WikipediaPage.html">RIR::WikipediaPage</a><br />
27 27  
28 28 <a href="classes/String.html">String</a><br />
29 29  
doc/fr_file_index.html
... ... @@ -17,10 +17,6 @@
17 17 <h1 class="section-bar">Files</h1>
18 18 <div id="index-entries">
19 19  
20   - <a href="files/README_markdown.html">README.markdown</a><br />
21   -
22   - <a href="files/lib/rir_rb.html">lib/rir.rb</a><br />
23   -
24 20 <a href="files/lib/rir/document_rb.html">lib/rir/document.rb</a><br />
25 21  
26 22 <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br />
doc/fr_method_index.html
... ... @@ -17,23 +17,23 @@
17 17 <h1 class="section-bar">Methods</h1>
18 18 <div id="index-entries">
19 19  
20   - <a href="classes/Rir/Document.html#M000012">count_words (Rir::Document)</a><br />
  20 + <a href="classes/RIR/Document.html#M000012">count_words (RIR::Document)</a><br />
21 21  
22   - <a href="classes/Rir/Document.html#M000013">entropy (Rir::Document)</a><br />
  22 + <a href="classes/RIR/Document.html#M000013">entropy (RIR::Document)</a><br />
23 23  
24 24 <a href="classes/String.html#M000009">extract_xmltags_values (String)</a><br />
25 25  
26   - <a href="classes/Rir/Document.html#M000010">format_words (Rir::Document)</a><br />
  26 + <a href="classes/RIR/Document.html#M000010">format_words (RIR::Document)</a><br />
27 27  
28   - <a href="classes/Rir/WebDocument.html#M000015">get_content (Rir::WebDocument)</a><br />
  28 + <a href="classes/RIR/WebDocument.html#M000015">get_content (RIR::WebDocument)</a><br />
29 29  
30 30 <a href="classes/String.html#M000001">is_stopword? (String)</a><br />
31 31  
32   - <a href="classes/Rir/WebDocument.html#M000016">new (Rir::WebDocument)</a><br />
  32 + <a href="classes/RIR/WebDocument.html#M000016">new (RIR::WebDocument)</a><br />
33 33  
34   - <a href="classes/Rir/Document.html#M000014">new (Rir::Document)</a><br />
  34 + <a href="classes/RIR/Document.html#M000014">new (RIR::Document)</a><br />
35 35  
36   - <a href="classes/Rir/Document.html#M000011">ngrams (Rir::Document)</a><br />
  36 + <a href="classes/RIR/Document.html#M000011">ngrams (RIR::Document)</a><br />
37 37  
38 38 <a href="classes/String.html#M000002">remove_special_characters (String)</a><br />
39 39  
... ... @@ -16,7 +16,7 @@
16 16 <frame src="fr_class_index.html" name="Classes" />
17 17 <frame src="fr_method_index.html" name="Methods" />
18 18 </frameset>
19   - <frame src="files/README_markdown.html" name="docwin" />
  19 + <frame src="files/lib/rir/string_rb.html" name="docwin" />
20 20 </frameset>
21 21 </html>
... ... @@ -2,4 +2,5 @@
2 2  
3 3 require 'rir/document'
4 4 require 'rir/string'
  5 +require 'rir/query'
... ... @@ -18,7 +18,7 @@
18 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19  
20 20 # General module for many purposes related to Information Retrieval.
21   -module Rir
  21 +module RIR
22 22  
23 23 # A Document is a bag of words and is constructed from a string.
24 24 class Document
... ... @@ -18,7 +18,7 @@
18 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19  
20 20 # General module for many purposes related to Information Retrieval.
21   -module Rir
  21 +module RIR
22 22  
23 23 # These are the default stopwords provided by Lemur.
24 24 Stoplist = [
... ... @@ -71,7 +71,7 @@
71 71  
72 72 # Extention of the standard class String with useful function.
73 73 class String
74   - include Rir
  74 + include RIR
75 75  
76 76 # Returns +true+ if +self+ belongs to Rir::Stoplist, +false+ otherwise.
77 77 def is_stopword?
78 78  
... ... @@ -146,11 +146,12 @@
146 146 self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten
147 147 end
148 148  
149   - private
150 149 def strip_with_pattern(pattern)
151 150 require 'cgi'
152 151 require 'kconv'
153 152 CGI::unescapeHTML(self.gsub(pattern,"")).toutf8
154 153 end
  154 +
  155 + private :strip_with_pattern
155 156 end
1 1 $LOAD_PATH.unshift File.expand_path(File.join(File.dirname(__FILE__), "lib"))
2 2  
3 3 require 'rir'
  4 +
  5 +w = RIR::WikipediaPage.new("http://en.wikipedia.org/wiki/The_Dillinger_Escape_Plan")
  6 +p w.entropy("guitar")
  7 +
  8 +params = RIR::Indri::Parameters.new("path_vers_mon_index")
  9 +p params.rule
  10 +q = RIR::Indri::IndriQuery.new("pouet", "bla", params)
  11 +puts q