Commit fd4cb285a4975c9a0b6624e93a42eb9fa812fee4
1 parent
b843bae6b0
Exists in
master
doc changes + document and string improvements
Showing 14 changed files with 63 additions and 26 deletions Side-by-side Diff
- doc/classes/String.html
- doc/created.rid
- doc/files/README_markdown.html
- doc/files/lib/rir/document_rb.html
- doc/files/lib/rir/string_rb.html
- doc/files/main_rb.html
- doc/fr_class_index.html
- doc/fr_file_index.html
- doc/fr_method_index.html
- doc/index.html
- lib/rir.rb
- lib/rir/document.rb
- lib/rir/string.rb
- main.rb
doc/classes/String.html
doc/created.rid
doc/files/README_markdown.html
... | ... | @@ -53,7 +53,7 @@ |
53 | 53 | </tr> |
54 | 54 | <tr class="top-aligned-row"> |
55 | 55 | <td><strong>Last Update:</strong></td> |
56 | - <td>2010-11-05 14:40:41 +0100</td> | |
56 | + <td>2010-11-05 14:46:27 +0100</td> | |
57 | 57 | </tr> |
58 | 58 | </table> |
59 | 59 | </div> |
... | ... | @@ -62,6 +62,37 @@ |
62 | 62 | <div id="bodyContent"> |
63 | 63 | |
64 | 64 | <div id="contextContent"> |
65 | + | |
66 | + <div id="description"> | |
67 | + <p> | |
68 | +# Ruby Information Retrieval (rIR) | |
69 | +</p> | |
70 | +<p> | |
71 | +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | |
72 | +</p> | |
73 | +<p> | |
74 | +License | |
75 | +</p> | |
76 | +<h6>=</h6> | |
77 | +<p> | |
78 | +This program is free software: you can redistribute it and/or modify it | |
79 | +under the terms of the GNU General Public License as published by the Free | |
80 | +Software Foundation, either version 3 of the License, or (at your option) | |
81 | +any later version. | |
82 | +</p> | |
83 | +<p> | |
84 | +This program is distributed in the hope that it will be useful, but WITHOUT | |
85 | +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
86 | +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
87 | +more details. | |
88 | +</p> | |
89 | +<p> | |
90 | +You should have received a copy of the GNU General Public License along | |
91 | +with this program. If not, see <<a | |
92 | +href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. | |
93 | +</p> | |
94 | + | |
95 | + </div> | |
65 | 96 | |
66 | 97 | </div> |
67 | 98 |
doc/files/lib/rir/document_rb.html
doc/files/lib/rir/string_rb.html
doc/files/main_rb.html
doc/fr_class_index.html
... | ... | @@ -17,13 +17,13 @@ |
17 | 17 | <h1 class="section-bar">Classes</h1> |
18 | 18 | <div id="index-entries"> |
19 | 19 | |
20 | - <a href="classes/Rir.html">Rir</a><br /> | |
20 | + <a href="classes/RIR.html">RIR</a><br /> | |
21 | 21 | |
22 | - <a href="classes/Rir/Document.html">Rir::Document</a><br /> | |
22 | + <a href="classes/RIR/Document.html">RIR::Document</a><br /> | |
23 | 23 | |
24 | - <a href="classes/Rir/WebDocument.html">Rir::WebDocument</a><br /> | |
24 | + <a href="classes/RIR/WebDocument.html">RIR::WebDocument</a><br /> | |
25 | 25 | |
26 | - <a href="classes/Rir/WikipediaPage.html">Rir::WikipediaPage</a><br /> | |
26 | + <a href="classes/RIR/WikipediaPage.html">RIR::WikipediaPage</a><br /> | |
27 | 27 | |
28 | 28 | <a href="classes/String.html">String</a><br /> |
29 | 29 |
doc/fr_file_index.html
... | ... | @@ -17,10 +17,6 @@ |
17 | 17 | <h1 class="section-bar">Files</h1> |
18 | 18 | <div id="index-entries"> |
19 | 19 | |
20 | - <a href="files/README_markdown.html">README.markdown</a><br /> | |
21 | - | |
22 | - <a href="files/lib/rir_rb.html">lib/rir.rb</a><br /> | |
23 | - | |
24 | 20 | <a href="files/lib/rir/document_rb.html">lib/rir/document.rb</a><br /> |
25 | 21 | |
26 | 22 | <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br /> |
doc/fr_method_index.html
... | ... | @@ -17,23 +17,23 @@ |
17 | 17 | <h1 class="section-bar">Methods</h1> |
18 | 18 | <div id="index-entries"> |
19 | 19 | |
20 | - <a href="classes/Rir/Document.html#M000012">count_words (Rir::Document)</a><br /> | |
20 | + <a href="classes/RIR/Document.html#M000012">count_words (RIR::Document)</a><br /> | |
21 | 21 | |
22 | - <a href="classes/Rir/Document.html#M000013">entropy (Rir::Document)</a><br /> | |
22 | + <a href="classes/RIR/Document.html#M000013">entropy (RIR::Document)</a><br /> | |
23 | 23 | |
24 | 24 | <a href="classes/String.html#M000009">extract_xmltags_values (String)</a><br /> |
25 | 25 | |
26 | - <a href="classes/Rir/Document.html#M000010">format_words (Rir::Document)</a><br /> | |
26 | + <a href="classes/RIR/Document.html#M000010">format_words (RIR::Document)</a><br /> | |
27 | 27 | |
28 | - <a href="classes/Rir/WebDocument.html#M000015">get_content (Rir::WebDocument)</a><br /> | |
28 | + <a href="classes/RIR/WebDocument.html#M000015">get_content (RIR::WebDocument)</a><br /> | |
29 | 29 | |
30 | 30 | <a href="classes/String.html#M000001">is_stopword? (String)</a><br /> |
31 | 31 | |
32 | - <a href="classes/Rir/WebDocument.html#M000016">new (Rir::WebDocument)</a><br /> | |
32 | + <a href="classes/RIR/WebDocument.html#M000016">new (RIR::WebDocument)</a><br /> | |
33 | 33 | |
34 | - <a href="classes/Rir/Document.html#M000014">new (Rir::Document)</a><br /> | |
34 | + <a href="classes/RIR/Document.html#M000014">new (RIR::Document)</a><br /> | |
35 | 35 | |
36 | - <a href="classes/Rir/Document.html#M000011">ngrams (Rir::Document)</a><br /> | |
36 | + <a href="classes/RIR/Document.html#M000011">ngrams (RIR::Document)</a><br /> | |
37 | 37 | |
38 | 38 | <a href="classes/String.html#M000002">remove_special_characters (String)</a><br /> |
39 | 39 |
doc/index.html
... | ... | @@ -16,7 +16,7 @@ |
16 | 16 | <frame src="fr_class_index.html" name="Classes" /> |
17 | 17 | <frame src="fr_method_index.html" name="Methods" /> |
18 | 18 | </frameset> |
19 | - <frame src="files/README_markdown.html" name="docwin" /> | |
19 | + <frame src="files/lib/rir/string_rb.html" name="docwin" /> | |
20 | 20 | </frameset> |
21 | 21 | </html> |
lib/rir.rb
lib/rir/document.rb
... | ... | @@ -18,7 +18,7 @@ |
18 | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 | |
20 | 20 | # General module for many purposes related to Information Retrieval. |
21 | -module Rir | |
21 | +module RIR | |
22 | 22 | |
23 | 23 | # A Document is a bag of words and is constructed from a string. |
24 | 24 | class Document |
lib/rir/string.rb
... | ... | @@ -18,7 +18,7 @@ |
18 | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 | |
20 | 20 | # General module for many purposes related to Information Retrieval. |
21 | -module Rir | |
21 | +module RIR | |
22 | 22 | |
23 | 23 | # These are the default stopwords provided by Lemur. |
24 | 24 | Stoplist = [ |
... | ... | @@ -71,7 +71,7 @@ |
71 | 71 | |
72 | 72 | # Extention of the standard class String with useful function. |
73 | 73 | class String |
74 | - include Rir | |
74 | + include RIR | |
75 | 75 | |
76 | 76 | # Returns +true+ if +self+ belongs to Rir::Stoplist, +false+ otherwise. |
77 | 77 | def is_stopword? |
78 | 78 | |
... | ... | @@ -146,11 +146,12 @@ |
146 | 146 | self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten |
147 | 147 | end |
148 | 148 | |
149 | - private | |
150 | 149 | def strip_with_pattern(pattern) |
151 | 150 | require 'cgi' |
152 | 151 | require 'kconv' |
153 | 152 | CGI::unescapeHTML(self.gsub(pattern,"")).toutf8 |
154 | 153 | end |
154 | + | |
155 | + private :strip_with_pattern | |
155 | 156 | end |
main.rb
1 | 1 | $LOAD_PATH.unshift File.expand_path(File.join(File.dirname(__FILE__), "lib")) |
2 | 2 | |
3 | 3 | require 'rir' |
4 | + | |
5 | +w = RIR::WikipediaPage.new("http://en.wikipedia.org/wiki/The_Dillinger_Escape_Plan") | |
6 | +p w.entropy("guitar") | |
7 | + | |
8 | +params = RIR::Indri::Parameters.new("path_vers_mon_index") | |
9 | +p params.rule | |
10 | +q = RIR::Indri::IndriQuery.new("pouet", "bla", params) | |
11 | +puts q |