Commit fd4cb285a4975c9a0b6624e93a42eb9fa812fee4
1 parent
b843bae6b0
Exists in
master
doc changes + document and string improvements
Showing 14 changed files with 63 additions and 26 deletions Side-by-side Diff
- doc/classes/String.html
- doc/created.rid
- doc/files/README_markdown.html
- doc/files/lib/rir/document_rb.html
- doc/files/lib/rir/string_rb.html
- doc/files/main_rb.html
- doc/fr_class_index.html
- doc/fr_file_index.html
- doc/fr_method_index.html
- doc/index.html
- lib/rir.rb
- lib/rir/document.rb
- lib/rir/string.rb
- main.rb
doc/classes/String.html
doc/created.rid
doc/files/README_markdown.html
| ... | ... | @@ -53,7 +53,7 @@ |
| 53 | 53 | </tr> |
| 54 | 54 | <tr class="top-aligned-row"> |
| 55 | 55 | <td><strong>Last Update:</strong></td> |
| 56 | - <td>2010-11-05 14:40:41 +0100</td> | |
| 56 | + <td>2010-11-05 14:46:27 +0100</td> | |
| 57 | 57 | </tr> |
| 58 | 58 | </table> |
| 59 | 59 | </div> |
| ... | ... | @@ -62,6 +62,37 @@ |
| 62 | 62 | <div id="bodyContent"> |
| 63 | 63 | |
| 64 | 64 | <div id="contextContent"> |
| 65 | + | |
| 66 | + <div id="description"> | |
| 67 | + <p> | |
| 68 | +# Ruby Information Retrieval (rIR) | |
| 69 | +</p> | |
| 70 | +<p> | |
| 71 | +Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | |
| 72 | +</p> | |
| 73 | +<p> | |
| 74 | +License | |
| 75 | +</p> | |
| 76 | +<h6>=</h6> | |
| 77 | +<p> | |
| 78 | +This program is free software: you can redistribute it and/or modify it | |
| 79 | +under the terms of the GNU General Public License as published by the Free | |
| 80 | +Software Foundation, either version 3 of the License, or (at your option) | |
| 81 | +any later version. | |
| 82 | +</p> | |
| 83 | +<p> | |
| 84 | +This program is distributed in the hope that it will be useful, but WITHOUT | |
| 85 | +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
| 86 | +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
| 87 | +more details. | |
| 88 | +</p> | |
| 89 | +<p> | |
| 90 | +You should have received a copy of the GNU General Public License along | |
| 91 | +with this program. If not, see <<a | |
| 92 | +href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. | |
| 93 | +</p> | |
| 94 | + | |
| 95 | + </div> | |
| 65 | 96 | |
| 66 | 97 | </div> |
| 67 | 98 |
doc/files/lib/rir/document_rb.html
doc/files/lib/rir/string_rb.html
doc/files/main_rb.html
doc/fr_class_index.html
| ... | ... | @@ -17,13 +17,13 @@ |
| 17 | 17 | <h1 class="section-bar">Classes</h1> |
| 18 | 18 | <div id="index-entries"> |
| 19 | 19 | |
| 20 | - <a href="classes/Rir.html">Rir</a><br /> | |
| 20 | + <a href="classes/RIR.html">RIR</a><br /> | |
| 21 | 21 | |
| 22 | - <a href="classes/Rir/Document.html">Rir::Document</a><br /> | |
| 22 | + <a href="classes/RIR/Document.html">RIR::Document</a><br /> | |
| 23 | 23 | |
| 24 | - <a href="classes/Rir/WebDocument.html">Rir::WebDocument</a><br /> | |
| 24 | + <a href="classes/RIR/WebDocument.html">RIR::WebDocument</a><br /> | |
| 25 | 25 | |
| 26 | - <a href="classes/Rir/WikipediaPage.html">Rir::WikipediaPage</a><br /> | |
| 26 | + <a href="classes/RIR/WikipediaPage.html">RIR::WikipediaPage</a><br /> | |
| 27 | 27 | |
| 28 | 28 | <a href="classes/String.html">String</a><br /> |
| 29 | 29 |
doc/fr_file_index.html
| ... | ... | @@ -17,10 +17,6 @@ |
| 17 | 17 | <h1 class="section-bar">Files</h1> |
| 18 | 18 | <div id="index-entries"> |
| 19 | 19 | |
| 20 | - <a href="files/README_markdown.html">README.markdown</a><br /> | |
| 21 | - | |
| 22 | - <a href="files/lib/rir_rb.html">lib/rir.rb</a><br /> | |
| 23 | - | |
| 24 | 20 | <a href="files/lib/rir/document_rb.html">lib/rir/document.rb</a><br /> |
| 25 | 21 | |
| 26 | 22 | <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br /> |
doc/fr_method_index.html
| ... | ... | @@ -17,23 +17,23 @@ |
| 17 | 17 | <h1 class="section-bar">Methods</h1> |
| 18 | 18 | <div id="index-entries"> |
| 19 | 19 | |
| 20 | - <a href="classes/Rir/Document.html#M000012">count_words (Rir::Document)</a><br /> | |
| 20 | + <a href="classes/RIR/Document.html#M000012">count_words (RIR::Document)</a><br /> | |
| 21 | 21 | |
| 22 | - <a href="classes/Rir/Document.html#M000013">entropy (Rir::Document)</a><br /> | |
| 22 | + <a href="classes/RIR/Document.html#M000013">entropy (RIR::Document)</a><br /> | |
| 23 | 23 | |
| 24 | 24 | <a href="classes/String.html#M000009">extract_xmltags_values (String)</a><br /> |
| 25 | 25 | |
| 26 | - <a href="classes/Rir/Document.html#M000010">format_words (Rir::Document)</a><br /> | |
| 26 | + <a href="classes/RIR/Document.html#M000010">format_words (RIR::Document)</a><br /> | |
| 27 | 27 | |
| 28 | - <a href="classes/Rir/WebDocument.html#M000015">get_content (Rir::WebDocument)</a><br /> | |
| 28 | + <a href="classes/RIR/WebDocument.html#M000015">get_content (RIR::WebDocument)</a><br /> | |
| 29 | 29 | |
| 30 | 30 | <a href="classes/String.html#M000001">is_stopword? (String)</a><br /> |
| 31 | 31 | |
| 32 | - <a href="classes/Rir/WebDocument.html#M000016">new (Rir::WebDocument)</a><br /> | |
| 32 | + <a href="classes/RIR/WebDocument.html#M000016">new (RIR::WebDocument)</a><br /> | |
| 33 | 33 | |
| 34 | - <a href="classes/Rir/Document.html#M000014">new (Rir::Document)</a><br /> | |
| 34 | + <a href="classes/RIR/Document.html#M000014">new (RIR::Document)</a><br /> | |
| 35 | 35 | |
| 36 | - <a href="classes/Rir/Document.html#M000011">ngrams (Rir::Document)</a><br /> | |
| 36 | + <a href="classes/RIR/Document.html#M000011">ngrams (RIR::Document)</a><br /> | |
| 37 | 37 | |
| 38 | 38 | <a href="classes/String.html#M000002">remove_special_characters (String)</a><br /> |
| 39 | 39 |
doc/index.html
| ... | ... | @@ -16,7 +16,7 @@ |
| 16 | 16 | <frame src="fr_class_index.html" name="Classes" /> |
| 17 | 17 | <frame src="fr_method_index.html" name="Methods" /> |
| 18 | 18 | </frameset> |
| 19 | - <frame src="files/README_markdown.html" name="docwin" /> | |
| 19 | + <frame src="files/lib/rir/string_rb.html" name="docwin" /> | |
| 20 | 20 | </frameset> |
| 21 | 21 | </html> |
lib/rir.rb
lib/rir/document.rb
| ... | ... | @@ -18,7 +18,7 @@ |
| 18 | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 19 | 19 | |
| 20 | 20 | # General module for many purposes related to Information Retrieval. |
| 21 | -module Rir | |
| 21 | +module RIR | |
| 22 | 22 | |
| 23 | 23 | # A Document is a bag of words and is constructed from a string. |
| 24 | 24 | class Document |
lib/rir/string.rb
| ... | ... | @@ -18,7 +18,7 @@ |
| 18 | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 19 | 19 | |
| 20 | 20 | # General module for many purposes related to Information Retrieval. |
| 21 | -module Rir | |
| 21 | +module RIR | |
| 22 | 22 | |
| 23 | 23 | # These are the default stopwords provided by Lemur. |
| 24 | 24 | Stoplist = [ |
| ... | ... | @@ -71,7 +71,7 @@ |
| 71 | 71 | |
| 72 | 72 | # Extention of the standard class String with useful function. |
| 73 | 73 | class String |
| 74 | - include Rir | |
| 74 | + include RIR | |
| 75 | 75 | |
| 76 | 76 | # Returns +true+ if +self+ belongs to Rir::Stoplist, +false+ otherwise. |
| 77 | 77 | def is_stopword? |
| 78 | 78 | |
| ... | ... | @@ -146,11 +146,12 @@ |
| 146 | 146 | self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten |
| 147 | 147 | end |
| 148 | 148 | |
| 149 | - private | |
| 150 | 149 | def strip_with_pattern(pattern) |
| 151 | 150 | require 'cgi' |
| 152 | 151 | require 'kconv' |
| 153 | 152 | CGI::unescapeHTML(self.gsub(pattern,"")).toutf8 |
| 154 | 153 | end |
| 154 | + | |
| 155 | + private :strip_with_pattern | |
| 155 | 156 | end |
main.rb
| 1 | 1 | $LOAD_PATH.unshift File.expand_path(File.join(File.dirname(__FILE__), "lib")) |
| 2 | 2 | |
| 3 | 3 | require 'rir' |
| 4 | + | |
| 5 | +w = RIR::WikipediaPage.new("http://en.wikipedia.org/wiki/The_Dillinger_Escape_Plan") | |
| 6 | +p w.entropy("guitar") | |
| 7 | + | |
| 8 | +params = RIR::Indri::Parameters.new("path_vers_mon_index") | |
| 9 | +p params.rule | |
| 10 | +q = RIR::Indri::IndriQuery.new("pouet", "bla", params) | |
| 11 | +puts q |