From 4b35d222779a7b1edae4fd7084213693be077430 Mon Sep 17 00:00:00 2001 From: Romain Deveaud Date: Thu, 25 Nov 2010 17:12:05 +0100 Subject: [PATCH] resolving documentation troubleshootings --- doc/classes/RIR.html | 90 +++++++++++++++ doc/classes/RIR/Corpus.html | 20 ++-- doc/classes/RIR/Corpus.src/M000016.html | 15 --- doc/classes/RIR/Corpus.src/M000017.html | 15 --- doc/classes/RIR/Corpus.src/M000018.html | 15 --- doc/classes/RIR/Corpus.src/M000020.html | 15 +++ doc/classes/RIR/Corpus.src/M000021.html | 15 +++ doc/classes/RIR/Document.html | 60 +++++----- doc/classes/RIR/Document.src/M000008.html | 23 ---- doc/classes/RIR/Document.src/M000009.html | 26 ----- doc/classes/RIR/Document.src/M000010.html | 18 --- doc/classes/RIR/Document.src/M000011.html | 24 ---- doc/classes/RIR/Document.src/M000012.html | 15 --- doc/classes/RIR/Document.src/M000013.html | 16 --- doc/classes/RIR/Document.src/M000014.html | 16 --- doc/classes/RIR/Document.src/M000019.html | 23 ---- doc/classes/RIR/Document.src/M000020.html | 26 ----- doc/classes/RIR/Document.src/M000021.html | 18 --- doc/classes/RIR/Document.src/M000022.html | 21 ++-- doc/classes/RIR/Document.src/M000023.html | 20 +++- doc/classes/RIR/Document.src/M000024.html | 18 +++ doc/classes/RIR/Document.src/M000025.html | 24 ++++ doc/classes/RIR/Document.src/M000026.html | 15 +++ doc/classes/RIR/Document.src/M000027.html | 16 +++ doc/classes/RIR/Indri/IndriQuery.html | 20 ++-- doc/classes/RIR/Indri/IndriQuery.src/M000006.html | 22 ---- doc/classes/RIR/Indri/IndriQuery.src/M000007.html | 22 ---- doc/classes/RIR/Indri/IndriQuery.src/M000014.html | 22 ---- doc/classes/RIR/Indri/IndriQuery.src/M000015.html | 22 ---- doc/classes/RIR/Indri/IndriQuery.src/M000016.html | 22 ---- doc/classes/RIR/Indri/IndriQuery.src/M000018.html | 22 ++++ doc/classes/RIR/Indri/IndriQuery.src/M000019.html | 22 ++++ doc/classes/RIR/Indri/Parameters.html | 20 ++-- doc/classes/RIR/Indri/Parameters.src/M000004.html | 21 ---- doc/classes/RIR/Indri/Parameters.src/M000005.html | 29 ----- doc/classes/RIR/Indri/Parameters.src/M000012.html | 21 ---- doc/classes/RIR/Indri/Parameters.src/M000013.html | 29 ----- doc/classes/RIR/Indri/Parameters.src/M000014.html | 29 ----- doc/classes/RIR/Indri/Parameters.src/M000016.html | 21 ++++ doc/classes/RIR/Indri/Parameters.src/M000017.html | 29 +++++ doc/classes/RIR/TreeTagger/Chunk.html | 10 +- doc/classes/RIR/TreeTagger/Chunk.src/M000003.html | 16 --- doc/classes/RIR/TreeTagger/Chunk.src/M000015.html | 16 +++ doc/classes/RIR/TreeTagger/TaggerChunker.html | 20 ++-- .../RIR/TreeTagger/TaggerChunker.src/M000001.html | 39 ------- .../RIR/TreeTagger/TaggerChunker.src/M000002.html | 15 --- .../RIR/TreeTagger/TaggerChunker.src/M000013.html | 39 +++++++ .../RIR/TreeTagger/TaggerChunker.src/M000014.html | 15 +++ doc/classes/RIR/WebDocument.html | 20 ++-- doc/classes/RIR/WebDocument.src/M000014.html | 16 --- doc/classes/RIR/WebDocument.src/M000015.html | 16 --- doc/classes/RIR/WebDocument.src/M000016.html | 16 --- doc/classes/RIR/WebDocument.src/M000024.html | 16 --- doc/classes/RIR/WebDocument.src/M000025.html | 16 --- doc/classes/RIR/WebDocument.src/M000028.html | 16 +++ doc/classes/RIR/WebDocument.src/M000029.html | 16 +++ doc/classes/RIR/WikipediaPage.html | 30 ++--- doc/classes/RIR/WikipediaPage.src/M000016.html | 19 ---- doc/classes/RIR/WikipediaPage.src/M000017.html | 19 ---- doc/classes/RIR/WikipediaPage.src/M000018.html | 17 --- doc/classes/RIR/WikipediaPage.src/M000026.html | 17 --- doc/classes/RIR/WikipediaPage.src/M000027.html | 17 --- doc/classes/RIR/WikipediaPage.src/M000028.html | 21 ---- doc/classes/RIR/WikipediaPage.src/M000030.html | 19 ++++ doc/classes/RIR/WikipediaPage.src/M000031.html | 19 ++++ doc/classes/RIR/WikipediaPage.src/M000032.html | 17 +++ doc/classes/String.html | 110 +++++++++---------- doc/classes/String.src/M000001.html | 15 --- doc/classes/String.src/M000002.html | 8 +- doc/classes/String.src/M000003.html | 8 +- doc/classes/String.src/M000004.html | 8 +- doc/classes/String.src/M000005.html | 8 +- doc/classes/String.src/M000006.html | 8 +- doc/classes/String.src/M000007.html | 9 +- doc/classes/String.src/M000008.html | 9 +- doc/classes/String.src/M000009.html | 8 +- doc/classes/String.src/M000010.html | 8 +- doc/classes/String.src/M000011.html | 8 +- doc/classes/String.src/M000012.html | 2 +- doc/created.rid | 2 +- doc/files/README_markdown.html | 121 --------------------- doc/files/lib/rir/corpus_rb.html | 46 -------- doc/files/lib/rir/ttagger_rb.html | 46 ++++++++ doc/files/main_rb.html | 100 ----------------- doc/fr_class_index.html | 6 + doc/fr_file_index.html | 6 + doc/fr_method_index.html | 64 ++++++++--- 87 files changed, 771 insertions(+), 1263 deletions(-) delete mode 100644 doc/classes/RIR/Corpus.src/M000016.html delete mode 100644 doc/classes/RIR/Corpus.src/M000017.html delete mode 100644 doc/classes/RIR/Corpus.src/M000018.html create mode 100644 doc/classes/RIR/Corpus.src/M000020.html create mode 100644 doc/classes/RIR/Corpus.src/M000021.html delete mode 100644 doc/classes/RIR/Document.src/M000008.html delete mode 100644 doc/classes/RIR/Document.src/M000009.html delete mode 100644 doc/classes/RIR/Document.src/M000010.html delete mode 100644 doc/classes/RIR/Document.src/M000011.html delete mode 100644 doc/classes/RIR/Document.src/M000012.html delete mode 100644 doc/classes/RIR/Document.src/M000013.html delete mode 100644 doc/classes/RIR/Document.src/M000014.html delete mode 100644 doc/classes/RIR/Document.src/M000019.html delete mode 100644 doc/classes/RIR/Document.src/M000020.html delete mode 100644 doc/classes/RIR/Document.src/M000021.html create mode 100644 doc/classes/RIR/Document.src/M000024.html create mode 100644 doc/classes/RIR/Document.src/M000025.html create mode 100644 doc/classes/RIR/Document.src/M000026.html create mode 100644 doc/classes/RIR/Document.src/M000027.html delete mode 100644 doc/classes/RIR/Indri/IndriQuery.src/M000006.html delete mode 100644 doc/classes/RIR/Indri/IndriQuery.src/M000007.html delete mode 100644 doc/classes/RIR/Indri/IndriQuery.src/M000014.html delete mode 100644 doc/classes/RIR/Indri/IndriQuery.src/M000015.html delete mode 100644 doc/classes/RIR/Indri/IndriQuery.src/M000016.html create mode 100644 doc/classes/RIR/Indri/IndriQuery.src/M000018.html create mode 100644 doc/classes/RIR/Indri/IndriQuery.src/M000019.html delete mode 100644 doc/classes/RIR/Indri/Parameters.src/M000004.html delete mode 100644 doc/classes/RIR/Indri/Parameters.src/M000005.html delete mode 100644 doc/classes/RIR/Indri/Parameters.src/M000012.html delete mode 100644 doc/classes/RIR/Indri/Parameters.src/M000013.html delete mode 100644 doc/classes/RIR/Indri/Parameters.src/M000014.html create mode 100644 doc/classes/RIR/Indri/Parameters.src/M000016.html create mode 100644 doc/classes/RIR/Indri/Parameters.src/M000017.html delete mode 100644 doc/classes/RIR/TreeTagger/Chunk.src/M000003.html create mode 100644 doc/classes/RIR/TreeTagger/Chunk.src/M000015.html delete mode 100644 doc/classes/RIR/TreeTagger/TaggerChunker.src/M000001.html delete mode 100644 doc/classes/RIR/TreeTagger/TaggerChunker.src/M000002.html create mode 100644 doc/classes/RIR/TreeTagger/TaggerChunker.src/M000013.html create mode 100644 doc/classes/RIR/TreeTagger/TaggerChunker.src/M000014.html delete mode 100644 doc/classes/RIR/WebDocument.src/M000014.html delete mode 100644 doc/classes/RIR/WebDocument.src/M000015.html delete mode 100644 doc/classes/RIR/WebDocument.src/M000016.html delete mode 100644 doc/classes/RIR/WebDocument.src/M000024.html delete mode 100644 doc/classes/RIR/WebDocument.src/M000025.html create mode 100644 doc/classes/RIR/WebDocument.src/M000028.html create mode 100644 doc/classes/RIR/WebDocument.src/M000029.html delete mode 100644 doc/classes/RIR/WikipediaPage.src/M000016.html delete mode 100644 doc/classes/RIR/WikipediaPage.src/M000017.html delete mode 100644 doc/classes/RIR/WikipediaPage.src/M000018.html delete mode 100644 doc/classes/RIR/WikipediaPage.src/M000026.html delete mode 100644 doc/classes/RIR/WikipediaPage.src/M000027.html delete mode 100644 doc/classes/RIR/WikipediaPage.src/M000028.html create mode 100644 doc/classes/RIR/WikipediaPage.src/M000030.html create mode 100644 doc/classes/RIR/WikipediaPage.src/M000031.html create mode 100644 doc/classes/RIR/WikipediaPage.src/M000032.html delete mode 100644 doc/classes/String.src/M000001.html delete mode 100644 doc/files/README_markdown.html delete mode 100644 doc/files/main_rb.html diff --git a/doc/classes/RIR.html b/doc/classes/RIR.html index 77d50d4..0149a10 100644 --- a/doc/classes/RIR.html +++ b/doc/classes/RIR.html @@ -63,6 +63,16 @@
+ + + lib/rir/corpus.rb + + + + +
+ + lib/rir/query.rb @@ -73,6 +83,16 @@
+
+ + lib/rir/string.rb + + + + +
+ + lib/rir/document.rb @@ -142,6 +162,52 @@ with this program. If not, see <www.gnu.org/licenses/>.


+This file is a part of an Information Retrieval oriented Ruby library +

+

+Copyright (C) 2010-2011 Romain Deveaud +

+

+This program is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. +

+

+This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. +

+

+You should have received a copy of the GNU General Public License along +with this program. If not, see <www.gnu.org/licenses/>. +

+

+This file is a part of an Information Retrieval oriented Ruby library +

+

+Copyright (C) 2010-2011 Romain Deveaud +

+

+This program is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. +

+

+This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. +

+

+You should have received a copy of the GNU General Public License along +with this program. If not, see <www.gnu.org/licenses/>. +

+

General module for many purposes related to Information Retrieval.

@@ -161,6 +227,7 @@ General module for many purposes related to Information Retrieval. Module RIR::Indri
Module RIR::TreeTagger
+Class RIR::Corpus
Class RIR::Document
Class RIR::Query
Class RIR::WebDocument
@@ -168,6 +235,29 @@ Class RIR::WikipediaPage
+
+

Constants

+ +
+ + + + + + + + + + + + +
Stoplist=[ "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", "yours", "yourself", "yourselves" ]  +These are the default stopwords provided by Lemur. + +
+
+
+ diff --git a/doc/classes/RIR/Corpus.html b/doc/classes/RIR/Corpus.html index 316ee35..2fdf78b 100644 --- a/doc/classes/RIR/Corpus.html +++ b/doc/classes/RIR/Corpus.html @@ -91,9 +91,9 @@
- files   + files   - new   + new  
@@ -132,13 +132,13 @@

Public Class methods

-
- +
+
- + new(path) @@ -155,13 +155,13 @@

Public Instance methods

-
- +
+
- + files() diff --git a/doc/classes/RIR/Corpus.src/M000016.html b/doc/classes/RIR/Corpus.src/M000016.html deleted file mode 100644 index cb605ef..0000000 --- a/doc/classes/RIR/Corpus.src/M000016.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - new (RIR::Corpus) - - - - -
# File lib/rir/corpus.rb, line 25
-    def initialize(path)
-      @path = path.chomp "/"
-    end
- - diff --git a/doc/classes/RIR/Corpus.src/M000017.html b/doc/classes/RIR/Corpus.src/M000017.html deleted file mode 100644 index a4eb5fa..0000000 --- a/doc/classes/RIR/Corpus.src/M000017.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - files (RIR::Corpus) - - - - -
# File lib/rir/corpus.rb, line 35
-    def files
-      Dir["#{@path}/**/*.*"]
-    end
- - diff --git a/doc/classes/RIR/Corpus.src/M000018.html b/doc/classes/RIR/Corpus.src/M000018.html deleted file mode 100644 index 2ebdcfd..0000000 --- a/doc/classes/RIR/Corpus.src/M000018.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - files (RIR::Corpus) - - - - -
# File lib/rir/corpus.rb, line 36
-    def files
-      Dir["#{@path}/**/*.*"]
-    end
- - diff --git a/doc/classes/RIR/Corpus.src/M000020.html b/doc/classes/RIR/Corpus.src/M000020.html new file mode 100644 index 0000000..cb605ef --- /dev/null +++ b/doc/classes/RIR/Corpus.src/M000020.html @@ -0,0 +1,15 @@ + + + + new (RIR::Corpus) + + + + +
# File lib/rir/corpus.rb, line 25
+    def initialize(path)
+      @path = path.chomp "/"
+    end
+ + diff --git a/doc/classes/RIR/Corpus.src/M000021.html b/doc/classes/RIR/Corpus.src/M000021.html new file mode 100644 index 0000000..a4eb5fa --- /dev/null +++ b/doc/classes/RIR/Corpus.src/M000021.html @@ -0,0 +1,15 @@ + + + + files (RIR::Corpus) + + + + +
# File lib/rir/corpus.rb, line 35
+    def files
+      Dir["#{@path}/**/*.*"]
+    end
+ + diff --git a/doc/classes/RIR/Document.html b/doc/classes/RIR/Document.html index 00f66b4..fdefaa1 100644 --- a/doc/classes/RIR/Document.html +++ b/doc/classes/RIR/Document.html @@ -99,17 +99,17 @@ from a string.
- count_words   + count_words   - entropy   + entropy   - format_words   + format_words   - new   + new   - ngrams   + ngrams   - tf   + tf  
@@ -156,13 +156,13 @@ from a string.

Public Class methods

-
- +
+
- + new(content) @@ -179,13 +179,13 @@ from a string.

Public Instance methods

-
- +
+ -
- +
+ -
- +
+ -
- +
+
- + tf(s) @@ -297,13 +297,13 @@ Computes the term frequency of a given word s.

Protected Instance methods

-
- +
+
- + format_words() diff --git a/doc/classes/RIR/Document.src/M000008.html b/doc/classes/RIR/Document.src/M000008.html deleted file mode 100644 index 72c51f5..0000000 --- a/doc/classes/RIR/Document.src/M000008.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - format_words (RIR::Document) - - - - -
# File lib/rir/document.rb, line 31
-    def format_words
-      wo = []
-
-      @doc_content.split.each do |w|
-        w.split(/\W/).each do |sw| 
-          wo.push(sw.downcase) if sw =~ /[a-zA-Z]/ 
-        end
-      end
-      
-      wo
-    end
- - diff --git a/doc/classes/RIR/Document.src/M000009.html b/doc/classes/RIR/Document.src/M000009.html deleted file mode 100644 index 6257629..0000000 --- a/doc/classes/RIR/Document.src/M000009.html +++ /dev/null @@ -1,26 +0,0 @@ - - - - ngrams (RIR::Document) - - - - -
# File lib/rir/document.rb, line 46
-    def ngrams(n)
-      window       = []
-      ngrams_array = []
-
-      @words.each do |w|
-        window.push(w)
-        if window.size == n
-          ngrams_array.push window.join(" ")
-          window.delete_at(0)
-        end
-      end
-
-      ngrams_array.uniq
-    end
- - diff --git a/doc/classes/RIR/Document.src/M000010.html b/doc/classes/RIR/Document.src/M000010.html deleted file mode 100644 index e8ddeec..0000000 --- a/doc/classes/RIR/Document.src/M000010.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - count_words (RIR::Document) - - - - -
# File lib/rir/document.rb, line 64
-    def count_words
-      counts = Hash.new { |h,k| h[k] = 0 }
-      @words.each { |w| counts[w] += 1 }
-
-      counts
-    end
- - diff --git a/doc/classes/RIR/Document.src/M000011.html b/doc/classes/RIR/Document.src/M000011.html deleted file mode 100644 index 50f6db7..0000000 --- a/doc/classes/RIR/Document.src/M000011.html +++ /dev/null @@ -1,24 +0,0 @@ - - - - entropy (RIR::Document) - - - - -
# File lib/rir/document.rb, line 78
-    def entropy(s)
-      en = 0.0
-      counts = self.count_words
-
-      s.split.each do |w|
-        p_wi = counts[w].to_f/@words.count.to_f
-        en += p_wi*Math.log2(p_wi)
-      end
-
-      en *= -1
-      en
-    end
- - diff --git a/doc/classes/RIR/Document.src/M000012.html b/doc/classes/RIR/Document.src/M000012.html deleted file mode 100644 index eb4436b..0000000 --- a/doc/classes/RIR/Document.src/M000012.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - tf (RIR::Document) - - - - -
# File lib/rir/document.rb, line 94
-    def tf(s)
-      self.count_words[s].to_f/@words.size.to_f
-    end
- - diff --git a/doc/classes/RIR/Document.src/M000013.html b/doc/classes/RIR/Document.src/M000013.html deleted file mode 100644 index 1ef96d5..0000000 --- a/doc/classes/RIR/Document.src/M000013.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - new (RIR::Document) - - - - -
# File lib/rir/document.rb, line 99
-    def initialize(content)
-      @doc_content = content
-      @words = format_words
-    end
- - diff --git a/doc/classes/RIR/Document.src/M000014.html b/doc/classes/RIR/Document.src/M000014.html deleted file mode 100644 index b882fcd..0000000 --- a/doc/classes/RIR/Document.src/M000014.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - new (RIR::Document) - - - - -
# File lib/rir/document.rb, line 92
-    def initialize(content)
-      @doc_content = content
-      @words = format_words
-    end
- - diff --git a/doc/classes/RIR/Document.src/M000019.html b/doc/classes/RIR/Document.src/M000019.html deleted file mode 100644 index 72c51f5..0000000 --- a/doc/classes/RIR/Document.src/M000019.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - format_words (RIR::Document) - - - - -
# File lib/rir/document.rb, line 31
-    def format_words
-      wo = []
-
-      @doc_content.split.each do |w|
-        w.split(/\W/).each do |sw| 
-          wo.push(sw.downcase) if sw =~ /[a-zA-Z]/ 
-        end
-      end
-      
-      wo
-    end
- - diff --git a/doc/classes/RIR/Document.src/M000020.html b/doc/classes/RIR/Document.src/M000020.html deleted file mode 100644 index 6257629..0000000 --- a/doc/classes/RIR/Document.src/M000020.html +++ /dev/null @@ -1,26 +0,0 @@ - - - - ngrams (RIR::Document) - - - - -
# File lib/rir/document.rb, line 46
-    def ngrams(n)
-      window       = []
-      ngrams_array = []
-
-      @words.each do |w|
-        window.push(w)
-        if window.size == n
-          ngrams_array.push window.join(" ")
-          window.delete_at(0)
-        end
-      end
-
-      ngrams_array.uniq
-    end
- - diff --git a/doc/classes/RIR/Document.src/M000021.html b/doc/classes/RIR/Document.src/M000021.html deleted file mode 100644 index e8ddeec..0000000 --- a/doc/classes/RIR/Document.src/M000021.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - count_words (RIR::Document) - - - - -
# File lib/rir/document.rb, line 64
-    def count_words
-      counts = Hash.new { |h,k| h[k] = 0 }
-      @words.each { |w| counts[w] += 1 }
-
-      counts
-    end
- - diff --git a/doc/classes/RIR/Document.src/M000022.html b/doc/classes/RIR/Document.src/M000022.html index 5694971..72c51f5 100644 --- a/doc/classes/RIR/Document.src/M000022.html +++ b/doc/classes/RIR/Document.src/M000022.html @@ -2,23 +2,22 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - entropy (RIR::Document) + format_words (RIR::Document) -
# File lib/rir/document.rb, line 77
-    def entropy(s)
-      en = 0.0
-      counts = self.count_words
+  
# File lib/rir/document.rb, line 31
+    def format_words
+      wo = []
 
-      s.split.each do |w|
-        p_wi = counts[w].to_f/@words.count.to_f
-        en += p_wi*Math.log2(p_wi)
+      @doc_content.split.each do |w|
+        w.split(/\W/).each do |sw| 
+          wo.push(sw.downcase) if sw =~ /[a-zA-Z]/ 
+        end
       end
-
-      en *= -1
-      en
+      
+      wo
     end
diff --git a/doc/classes/RIR/Document.src/M000023.html b/doc/classes/RIR/Document.src/M000023.html index b882fcd..6257629 100644 --- a/doc/classes/RIR/Document.src/M000023.html +++ b/doc/classes/RIR/Document.src/M000023.html @@ -2,15 +2,25 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - new (RIR::Document) + ngrams (RIR::Document) -
# File lib/rir/document.rb, line 92
-    def initialize(content)
-      @doc_content = content
-      @words = format_words
+  
# File lib/rir/document.rb, line 46
+    def ngrams(n)
+      window       = []
+      ngrams_array = []
+
+      @words.each do |w|
+        window.push(w)
+        if window.size == n
+          ngrams_array.push window.join(" ")
+          window.delete_at(0)
+        end
+      end
+
+      ngrams_array.uniq
     end
diff --git a/doc/classes/RIR/Document.src/M000024.html b/doc/classes/RIR/Document.src/M000024.html new file mode 100644 index 0000000..e8ddeec --- /dev/null +++ b/doc/classes/RIR/Document.src/M000024.html @@ -0,0 +1,18 @@ + + + + count_words (RIR::Document) + + + + +
# File lib/rir/document.rb, line 64
+    def count_words
+      counts = Hash.new { |h,k| h[k] = 0 }
+      @words.each { |w| counts[w] += 1 }
+
+      counts
+    end
+ + diff --git a/doc/classes/RIR/Document.src/M000025.html b/doc/classes/RIR/Document.src/M000025.html new file mode 100644 index 0000000..50f6db7 --- /dev/null +++ b/doc/classes/RIR/Document.src/M000025.html @@ -0,0 +1,24 @@ + + + + entropy (RIR::Document) + + + + +
# File lib/rir/document.rb, line 78
+    def entropy(s)
+      en = 0.0
+      counts = self.count_words
+
+      s.split.each do |w|
+        p_wi = counts[w].to_f/@words.count.to_f
+        en += p_wi*Math.log2(p_wi)
+      end
+
+      en *= -1
+      en
+    end
+ + diff --git a/doc/classes/RIR/Document.src/M000026.html b/doc/classes/RIR/Document.src/M000026.html new file mode 100644 index 0000000..eb4436b --- /dev/null +++ b/doc/classes/RIR/Document.src/M000026.html @@ -0,0 +1,15 @@ + + + + tf (RIR::Document) + + + + +
# File lib/rir/document.rb, line 94
+    def tf(s)
+      self.count_words[s].to_f/@words.size.to_f
+    end
+ + diff --git a/doc/classes/RIR/Document.src/M000027.html b/doc/classes/RIR/Document.src/M000027.html new file mode 100644 index 0000000..1ef96d5 --- /dev/null +++ b/doc/classes/RIR/Document.src/M000027.html @@ -0,0 +1,16 @@ + + + + new (RIR::Document) + + + + +
# File lib/rir/document.rb, line 99
+    def initialize(content)
+      @doc_content = content
+      @words = format_words
+    end
+ + diff --git a/doc/classes/RIR/Indri/IndriQuery.html b/doc/classes/RIR/Indri/IndriQuery.html index 922525d..02ba68c 100644 --- a/doc/classes/RIR/Indri/IndriQuery.html +++ b/doc/classes/RIR/Indri/IndriQuery.html @@ -95,9 +95,9 @@
- new   + new   - to_s   + to_s  
@@ -160,13 +160,13 @@

Public Class methods

-
- +
+
- + new(id,query,params) @@ -183,13 +183,13 @@

Public Instance methods

-
- +
+
- + to_s() diff --git a/doc/classes/RIR/Indri/IndriQuery.src/M000006.html b/doc/classes/RIR/Indri/IndriQuery.src/M000006.html deleted file mode 100644 index c72d135..0000000 --- a/doc/classes/RIR/Indri/IndriQuery.src/M000006.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - new (RIR::Indri::IndriQuery) - - - - -
# File lib/rir/query.rb, line 62
-      def initialize(id,query,params)
-        @params = params
-        # Here we set the default retrieval model as Language Modeling
-        # with a Dirichlet smoothing at 2500.
-        # TODO: maybe a Rule class...
-        @params.rule  = 'method:dirichlet,mu:2500' if @params.rule.nil?
-
-        @id     = id
-        @query  = query
-      end
- - diff --git a/doc/classes/RIR/Indri/IndriQuery.src/M000007.html b/doc/classes/RIR/Indri/IndriQuery.src/M000007.html deleted file mode 100644 index e237f9a..0000000 --- a/doc/classes/RIR/Indri/IndriQuery.src/M000007.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - to_s (RIR::Indri::IndriQuery) - - - - -
# File lib/rir/query.rb, line 73
-      def to_s
-        h = @params.to_s
-        h += "<query>\n"
-        h += "<number>#{@id}</number>\n"
-        h += "<text>#{@query}</text>\n"
-        h += "</query>\n"
-        h += "</parameters>"
-
-        h
-      end
- - diff --git a/doc/classes/RIR/Indri/IndriQuery.src/M000014.html b/doc/classes/RIR/Indri/IndriQuery.src/M000014.html deleted file mode 100644 index c72d135..0000000 --- a/doc/classes/RIR/Indri/IndriQuery.src/M000014.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - new (RIR::Indri::IndriQuery) - - - - -
# File lib/rir/query.rb, line 62
-      def initialize(id,query,params)
-        @params = params
-        # Here we set the default retrieval model as Language Modeling
-        # with a Dirichlet smoothing at 2500.
-        # TODO: maybe a Rule class...
-        @params.rule  = 'method:dirichlet,mu:2500' if @params.rule.nil?
-
-        @id     = id
-        @query  = query
-      end
- - diff --git a/doc/classes/RIR/Indri/IndriQuery.src/M000015.html b/doc/classes/RIR/Indri/IndriQuery.src/M000015.html deleted file mode 100644 index e237f9a..0000000 --- a/doc/classes/RIR/Indri/IndriQuery.src/M000015.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - to_s (RIR::Indri::IndriQuery) - - - - -
# File lib/rir/query.rb, line 73
-      def to_s
-        h = @params.to_s
-        h += "<query>\n"
-        h += "<number>#{@id}</number>\n"
-        h += "<text>#{@query}</text>\n"
-        h += "</query>\n"
-        h += "</parameters>"
-
-        h
-      end
- - diff --git a/doc/classes/RIR/Indri/IndriQuery.src/M000016.html b/doc/classes/RIR/Indri/IndriQuery.src/M000016.html deleted file mode 100644 index 515e431..0000000 --- a/doc/classes/RIR/Indri/IndriQuery.src/M000016.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - to_s (RIR::Indri::IndriQuery) - - - - -
# File lib/rir/query.rb, line 74
-      def to_s
-        h = @params.to_s
-        h += "<query>\n"
-        h += "<number>#{@id}</number>\n"
-        h += "<text>#{@query}</text>\n"
-        h += "</query>\n"
-        h += "</parameters>"
-
-        h
-      end
- - diff --git a/doc/classes/RIR/Indri/IndriQuery.src/M000018.html b/doc/classes/RIR/Indri/IndriQuery.src/M000018.html new file mode 100644 index 0000000..c72d135 --- /dev/null +++ b/doc/classes/RIR/Indri/IndriQuery.src/M000018.html @@ -0,0 +1,22 @@ + + + + new (RIR::Indri::IndriQuery) + + + + +
# File lib/rir/query.rb, line 62
+      def initialize(id,query,params)
+        @params = params
+        # Here we set the default retrieval model as Language Modeling
+        # with a Dirichlet smoothing at 2500.
+        # TODO: maybe a Rule class...
+        @params.rule  = 'method:dirichlet,mu:2500' if @params.rule.nil?
+
+        @id     = id
+        @query  = query
+      end
+ + diff --git a/doc/classes/RIR/Indri/IndriQuery.src/M000019.html b/doc/classes/RIR/Indri/IndriQuery.src/M000019.html new file mode 100644 index 0000000..e237f9a --- /dev/null +++ b/doc/classes/RIR/Indri/IndriQuery.src/M000019.html @@ -0,0 +1,22 @@ + + + + to_s (RIR::Indri::IndriQuery) + + + + +
# File lib/rir/query.rb, line 73
+      def to_s
+        h = @params.to_s
+        h += "<query>\n"
+        h += "<number>#{@id}</number>\n"
+        h += "<text>#{@query}</text>\n"
+        h += "</query>\n"
+        h += "</parameters>"
+
+        h
+      end
+ + diff --git a/doc/classes/RIR/Indri/Parameters.html b/doc/classes/RIR/Indri/Parameters.html index 28e11cb..79d6734 100644 --- a/doc/classes/RIR/Indri/Parameters.html +++ b/doc/classes/RIR/Indri/Parameters.html @@ -91,9 +91,9 @@
- new   + new   - to_s   + to_s  
@@ -196,13 +196,13 @@

Public Class methods

-
- +
+
- + new(corpus,mem="1g",count="1000",offset="1",run_id="default",print_query=false,print_docs=false) @@ -219,13 +219,13 @@

Public Instance methods

-
- +
+
- + to_s() diff --git a/doc/classes/RIR/Indri/Parameters.src/M000004.html b/doc/classes/RIR/Indri/Parameters.src/M000004.html deleted file mode 100644 index a5d26e0..0000000 --- a/doc/classes/RIR/Indri/Parameters.src/M000004.html +++ /dev/null @@ -1,21 +0,0 @@ - - - - new (RIR::Indri::Parameters) - - - - -
# File lib/rir/query.rb, line 30
-      def initialize(corpus,mem="1g",count="1000",offset="1",run_id="default",print_query=false,print_docs=false)
-        @index_path  = corpus
-        @memory      = mem
-        @count       = count
-        @offset      = offset
-        @run_id      = run_id
-        @print_query = print_query ? "true" : "false"
-        @print_docs  = print_docs  ? "true" : "false"
-      end
- - diff --git a/doc/classes/RIR/Indri/Parameters.src/M000005.html b/doc/classes/RIR/Indri/Parameters.src/M000005.html deleted file mode 100644 index 3a235ca..0000000 --- a/doc/classes/RIR/Indri/Parameters.src/M000005.html +++ /dev/null @@ -1,29 +0,0 @@ - - - - to_s (RIR::Indri::Parameters) - - - - -
# File lib/rir/query.rb, line 40
-      def to_s
-        h = "<parameters>\n"
-        h += "<memory>#{@memory}</memory>\n"
-        h += "<index>#{@index_path}</index>\n"
-        h += "<count>#{@count}</count>\n"
-        unless @baseline.nil?
-          h += "<baseline>#{@baseline}</baseline>\n" 
-        else
-          h += "<rule>#{@rule}</rule>\n"
-        end
-        h += "<queryOffset>#{@offset}</queryOffset>\n"
-        h += "<runID>#{@run_id}</runID>\n"
-        h += "<printQuery>#{@print_query}</printQuery>\n"
-        h += "<printDocuments>#{@print_docs}</printDocuments>\n"
-
-        h
-      end
- - diff --git a/doc/classes/RIR/Indri/Parameters.src/M000012.html b/doc/classes/RIR/Indri/Parameters.src/M000012.html deleted file mode 100644 index 23acfd5..0000000 --- a/doc/classes/RIR/Indri/Parameters.src/M000012.html +++ /dev/null @@ -1,21 +0,0 @@ - - - - new (RIR::Indri::Parameters) - - - - -
# File lib/rir/query.rb, line 30
-      def initialize(corpus,mem="1g",count="1000",offset="1",run_id="default",print_query=false,print_docs=false)
-        @corpus      = corpus
-        @memory      = mem
-        @count       = count
-        @offset      = offset
-        @run_id      = run_id
-        @print_query = print_query ? "true" : "false"
-        @print_docs  = print_docs  ? "true" : "false"
-      end
- - diff --git a/doc/classes/RIR/Indri/Parameters.src/M000013.html b/doc/classes/RIR/Indri/Parameters.src/M000013.html deleted file mode 100644 index bc4b8a7..0000000 --- a/doc/classes/RIR/Indri/Parameters.src/M000013.html +++ /dev/null @@ -1,29 +0,0 @@ - - - - to_s (RIR::Indri::Parameters) - - - - -
# File lib/rir/query.rb, line 40
-      def to_s
-        h = "<parameters>\n"
-        h += "<memory>#{@memory}</memory>\n"
-        h += "<index>#{@corpus}</index>\n"
-        h += "<count>#{@count}</count>\n"
-        unless @baseline.nil?
-          h += "<baseline>#{@baseline}</baseline>\n" 
-        else
-          h += "<rule>#{@rule}</rule>\n"
-        end
-        h += "<queryOffset>#{@offset}</queryOffset>\n"
-        h += "<runID>#{@run_id}</runID>\n"
-        h += "<printQuery>#{@print_query}</printQuery>\n"
-        h += "<printDocuments>#{@print_docs}</printDocuments>\n"
-
-        h
-      end
- - diff --git a/doc/classes/RIR/Indri/Parameters.src/M000014.html b/doc/classes/RIR/Indri/Parameters.src/M000014.html deleted file mode 100644 index 3529737..0000000 --- a/doc/classes/RIR/Indri/Parameters.src/M000014.html +++ /dev/null @@ -1,29 +0,0 @@ - - - - to_s (RIR::Indri::Parameters) - - - - -
# File lib/rir/query.rb, line 41
-      def to_s
-        h = "<parameters>\n"
-        h += "<memory>#{@memory}</memory>\n"
-        h += "<index>#{@corpus}</index>\n"
-        h += "<count>#{@count}</count>\n"
-        unless @baseline.nil?
-          h += "<baseline>#{@baseline}</baseline>\n" 
-        else
-          h += "<rule>#{@rule}</rule>\n"
-        end
-        h += "<queryOffset>#{@offset}</queryOffset>\n"
-        h += "<runID>#{@run_id}</runID>\n"
-        h += "<printQuery>#{@print_query}</printQuery>\n"
-        h += "<printDocuments>#{@print_docs}</printDocuments>\n"
-
-        h
-      end
- - diff --git a/doc/classes/RIR/Indri/Parameters.src/M000016.html b/doc/classes/RIR/Indri/Parameters.src/M000016.html new file mode 100644 index 0000000..a5d26e0 --- /dev/null +++ b/doc/classes/RIR/Indri/Parameters.src/M000016.html @@ -0,0 +1,21 @@ + + + + new (RIR::Indri::Parameters) + + + + +
# File lib/rir/query.rb, line 30
+      def initialize(corpus,mem="1g",count="1000",offset="1",run_id="default",print_query=false,print_docs=false)
+        @index_path  = corpus
+        @memory      = mem
+        @count       = count
+        @offset      = offset
+        @run_id      = run_id
+        @print_query = print_query ? "true" : "false"
+        @print_docs  = print_docs  ? "true" : "false"
+      end
+ + diff --git a/doc/classes/RIR/Indri/Parameters.src/M000017.html b/doc/classes/RIR/Indri/Parameters.src/M000017.html new file mode 100644 index 0000000..3a235ca --- /dev/null +++ b/doc/classes/RIR/Indri/Parameters.src/M000017.html @@ -0,0 +1,29 @@ + + + + to_s (RIR::Indri::Parameters) + + + + +
# File lib/rir/query.rb, line 40
+      def to_s
+        h = "<parameters>\n"
+        h += "<memory>#{@memory}</memory>\n"
+        h += "<index>#{@index_path}</index>\n"
+        h += "<count>#{@count}</count>\n"
+        unless @baseline.nil?
+          h += "<baseline>#{@baseline}</baseline>\n" 
+        else
+          h += "<rule>#{@rule}</rule>\n"
+        end
+        h += "<queryOffset>#{@offset}</queryOffset>\n"
+        h += "<runID>#{@run_id}</runID>\n"
+        h += "<printQuery>#{@print_query}</printQuery>\n"
+        h += "<printDocuments>#{@print_docs}</printDocuments>\n"
+
+        h
+      end
+ + diff --git a/doc/classes/RIR/TreeTagger/Chunk.html b/doc/classes/RIR/TreeTagger/Chunk.html index 95f4e8c..20dca98 100644 --- a/doc/classes/RIR/TreeTagger/Chunk.html +++ b/doc/classes/RIR/TreeTagger/Chunk.html @@ -99,7 +99,7 @@ href="TaggerChunker.html">TaggerChunker
file.
- new   + new  
@@ -146,13 +146,13 @@ href="TaggerChunker.html">TaggerChunker file.

Public Class methods

-
- +
+ @@ -147,13 +147,13 @@ This class handles generic parsing of tagger-chunker outputs.

Public Class methods

-
- +
+ -
- +
+
- + parse(chunk_lines) diff --git a/doc/classes/RIR/TreeTagger/TaggerChunker.src/M000001.html b/doc/classes/RIR/TreeTagger/TaggerChunker.src/M000001.html deleted file mode 100644 index 3bdb228..0000000 --- a/doc/classes/RIR/TreeTagger/TaggerChunker.src/M000001.html +++ /dev/null @@ -1,39 +0,0 @@ - - - - parse (RIR::TreeTagger::TaggerChunker) - - - - -
# File lib/rir/ttagger.rb, line 33
-      def self.parse chunk_lines
-        open = false
-        tag  = nil
-
-        chunks = []
-        words  = []
-
-        chunk_lines.each do |l|
-          l.chomp!
-          if l =~ /^<\w+>$/
-            open = true
-            tag  = l
-          elsif l =~ /^<\/\w+>$/
-            if !words.empty? && open && l == tag.sub(/</, '</')
-              open = false
-              chunks.push Chunk.new(words.join(" "), tag) 
-              words.clear
-            else
-              next
-            end
-          else
-            words.push(l.split.first)
-          end
-        end
-
-        chunks
-      end
- - diff --git a/doc/classes/RIR/TreeTagger/TaggerChunker.src/M000002.html b/doc/classes/RIR/TreeTagger/TaggerChunker.src/M000002.html deleted file mode 100644 index c33487c..0000000 --- a/doc/classes/RIR/TreeTagger/TaggerChunker.src/M000002.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - new (RIR::TreeTagger::TaggerChunker) - - - - -
# File lib/rir/ttagger.rb, line 65
-      def initialize chunk_file
-        @chunks = TaggerChunker.parse File.open(chunk_file).readlines
-      end
- - diff --git a/doc/classes/RIR/TreeTagger/TaggerChunker.src/M000013.html b/doc/classes/RIR/TreeTagger/TaggerChunker.src/M000013.html new file mode 100644 index 0000000..3bdb228 --- /dev/null +++ b/doc/classes/RIR/TreeTagger/TaggerChunker.src/M000013.html @@ -0,0 +1,39 @@ + + + + parse (RIR::TreeTagger::TaggerChunker) + + + + +
# File lib/rir/ttagger.rb, line 33
+      def self.parse chunk_lines
+        open = false
+        tag  = nil
+
+        chunks = []
+        words  = []
+
+        chunk_lines.each do |l|
+          l.chomp!
+          if l =~ /^<\w+>$/
+            open = true
+            tag  = l
+          elsif l =~ /^<\/\w+>$/
+            if !words.empty? && open && l == tag.sub(/</, '</')
+              open = false
+              chunks.push Chunk.new(words.join(" "), tag) 
+              words.clear
+            else
+              next
+            end
+          else
+            words.push(l.split.first)
+          end
+        end
+
+        chunks
+      end
+ + diff --git a/doc/classes/RIR/TreeTagger/TaggerChunker.src/M000014.html b/doc/classes/RIR/TreeTagger/TaggerChunker.src/M000014.html new file mode 100644 index 0000000..c33487c --- /dev/null +++ b/doc/classes/RIR/TreeTagger/TaggerChunker.src/M000014.html @@ -0,0 +1,15 @@ + + + + new (RIR::TreeTagger::TaggerChunker) + + + + +
# File lib/rir/ttagger.rb, line 65
+      def initialize chunk_file
+        @chunks = TaggerChunker.parse File.open(chunk_file).readlines
+      end
+ + diff --git a/doc/classes/RIR/WebDocument.html b/doc/classes/RIR/WebDocument.html index f2e6c6a..4f034b9 100644 --- a/doc/classes/RIR/WebDocument.html +++ b/doc/classes/RIR/WebDocument.html @@ -103,9 +103,9 @@ href="Document.html">Document
with a url.
- get_content   + get_content   - new   + new  
@@ -144,13 +144,13 @@ href="Document.html">Document with a url.

Public Class methods

-
- +
+ -
- +
+
- + new(url) diff --git a/doc/classes/RIR/WebDocument.src/M000014.html b/doc/classes/RIR/WebDocument.src/M000014.html deleted file mode 100644 index e3931e7..0000000 --- a/doc/classes/RIR/WebDocument.src/M000014.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - get_content (RIR::WebDocument) - - - - -
# File lib/rir/document.rb, line 112
-    def self.get_content(url)
-      require 'net/http'
-      Net::HTTP.get(URI.parse(url))
-    end
- - diff --git a/doc/classes/RIR/WebDocument.src/M000015.html b/doc/classes/RIR/WebDocument.src/M000015.html deleted file mode 100644 index dd6da25..0000000 --- a/doc/classes/RIR/WebDocument.src/M000015.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - new (RIR::WebDocument) - - - - -
# File lib/rir/document.rb, line 119
-    def initialize(url)
-      @url = url
-      super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags
-    end
- - diff --git a/doc/classes/RIR/WebDocument.src/M000016.html b/doc/classes/RIR/WebDocument.src/M000016.html deleted file mode 100644 index d7a5169..0000000 --- a/doc/classes/RIR/WebDocument.src/M000016.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - new (RIR::WebDocument) - - - - -
# File lib/rir/document.rb, line 112
-    def initialize(url)
-      @url = url
-      super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags
-    end
- - diff --git a/doc/classes/RIR/WebDocument.src/M000024.html b/doc/classes/RIR/WebDocument.src/M000024.html deleted file mode 100644 index 54776b5..0000000 --- a/doc/classes/RIR/WebDocument.src/M000024.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - get_content (RIR::WebDocument) - - - - -
# File lib/rir/document.rb, line 105
-    def self.get_content(url)
-      require 'net/http'
-      Net::HTTP.get(URI.parse(url))
-    end
- - diff --git a/doc/classes/RIR/WebDocument.src/M000025.html b/doc/classes/RIR/WebDocument.src/M000025.html deleted file mode 100644 index d7a5169..0000000 --- a/doc/classes/RIR/WebDocument.src/M000025.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - new (RIR::WebDocument) - - - - -
# File lib/rir/document.rb, line 112
-    def initialize(url)
-      @url = url
-      super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags
-    end
- - diff --git a/doc/classes/RIR/WebDocument.src/M000028.html b/doc/classes/RIR/WebDocument.src/M000028.html new file mode 100644 index 0000000..e3931e7 --- /dev/null +++ b/doc/classes/RIR/WebDocument.src/M000028.html @@ -0,0 +1,16 @@ + + + + get_content (RIR::WebDocument) + + + + +
# File lib/rir/document.rb, line 112
+    def self.get_content(url)
+      require 'net/http'
+      Net::HTTP.get(URI.parse(url))
+    end
+ + diff --git a/doc/classes/RIR/WebDocument.src/M000029.html b/doc/classes/RIR/WebDocument.src/M000029.html new file mode 100644 index 0000000..dd6da25 --- /dev/null +++ b/doc/classes/RIR/WebDocument.src/M000029.html @@ -0,0 +1,16 @@ + + + + new (RIR::WebDocument) + + + + +
# File lib/rir/document.rb, line 119
+    def initialize(url)
+      @url = url
+      super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags
+    end
+ + diff --git a/doc/classes/RIR/WikipediaPage.html b/doc/classes/RIR/WikipediaPage.html index 02b496f..645a791 100644 --- a/doc/classes/RIR/WikipediaPage.html +++ b/doc/classes/RIR/WikipediaPage.html @@ -103,11 +103,11 @@ href="WebDocument.html">WebDocument
.
@@ -128,13 +128,13 @@ href="WebDocument.html">WebDocument.

Public Class methods

-
- +
+ -
- +
+ -
- +
+
- + search_wikipedia_titles(name) diff --git a/doc/classes/RIR/WikipediaPage.src/M000016.html b/doc/classes/RIR/WikipediaPage.src/M000016.html deleted file mode 100644 index eb3518e..0000000 --- a/doc/classes/RIR/WikipediaPage.src/M000016.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - search_wikipedia_titles (RIR::WikipediaPage) - - - - -
# File lib/rir/document.rb, line 132
-    def self.search_wikipedia_titles(name)
-      raise ArgumentError, "Bad encoding", name unless name.isutf8
-
-      res = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&format=xml" ).toutf8).elements['api/query/search']
-
-      res.collect { |e| e.attributes['title'] } unless res.nil?
-    end
- - diff --git a/doc/classes/RIR/WikipediaPage.src/M000017.html b/doc/classes/RIR/WikipediaPage.src/M000017.html deleted file mode 100644 index 0b6b98b..0000000 --- a/doc/classes/RIR/WikipediaPage.src/M000017.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - get_url (RIR::WikipediaPage) - - - - -
# File lib/rir/document.rb, line 140
-    def self.get_url(name)
-      raise ArgumentError, "Bad encoding", name unless name.isutf8
-
-      atts = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&titles=#{URI.escape name}&inprop=url&prop=info&format=xml" ).toutf8).elements['api/query/pages/page'].attributes
-
-      atts['fullurl'] if atts['missing'].nil?
-    end
- - diff --git a/doc/classes/RIR/WikipediaPage.src/M000018.html b/doc/classes/RIR/WikipediaPage.src/M000018.html deleted file mode 100644 index d93d8db..0000000 --- a/doc/classes/RIR/WikipediaPage.src/M000018.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - search_homepage (RIR::WikipediaPage) - - - - -
# File lib/rir/document.rb, line 148
-    def self.search_homepage(name)
-      title = WikipediaPage.search_wikipedia_titles name
-
-      WikipediaPage.new(WikipediaPage.get_url title[0]) unless title.nil? || title.empty?
-    end
- - diff --git a/doc/classes/RIR/WikipediaPage.src/M000026.html b/doc/classes/RIR/WikipediaPage.src/M000026.html deleted file mode 100644 index 3000535..0000000 --- a/doc/classes/RIR/WikipediaPage.src/M000026.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - search_wikipedia_titles (RIR::WikipediaPage) - - - - -
# File lib/rir/document.rb, line 125
-    def self.search_wikipedia_titles(name)
-      res = REXML::Document.new(Net::HTTP.get(URI.parse("http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&format=xml")).toutf8).elements['api/query/search']
-
-      res.collect { |e| e.attributes['title'] } unless res.nil?
-    end
- - diff --git a/doc/classes/RIR/WikipediaPage.src/M000027.html b/doc/classes/RIR/WikipediaPage.src/M000027.html deleted file mode 100644 index 3023cc7..0000000 --- a/doc/classes/RIR/WikipediaPage.src/M000027.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - get_url (RIR::WikipediaPage) - - - - -
# File lib/rir/document.rb, line 131
-    def self.get_url(name)
-      atts = REXML::Document.new(Net::HTTP.get(URI.parse("http://en.wikipedia.org/w/api.php?action=query&titles=#{URI.escape name}&inprop=url&prop=info&format=xml")).toutf8).elements['api/query/pages/page'].attributes
-
-      atts['fullurl'] if atts['missing'].nil?
-    end
- - diff --git a/doc/classes/RIR/WikipediaPage.src/M000028.html b/doc/classes/RIR/WikipediaPage.src/M000028.html deleted file mode 100644 index b5289f7..0000000 --- a/doc/classes/RIR/WikipediaPage.src/M000028.html +++ /dev/null @@ -1,21 +0,0 @@ - - - - search_homepage (RIR::WikipediaPage) - - - - -
# File lib/rir/document.rb, line 137
-    def self.search_homepage(name)
-      title = WikipediaPage.search_wikipedia_titles name
-
-      begin
-        WikipediaPage.new(WikipediaPage.get_url title[0]) unless title.nil? || title.empty?
-      rescue
-        puts title[0]
-      end
-    end
- - diff --git a/doc/classes/RIR/WikipediaPage.src/M000030.html b/doc/classes/RIR/WikipediaPage.src/M000030.html new file mode 100644 index 0000000..eb3518e --- /dev/null +++ b/doc/classes/RIR/WikipediaPage.src/M000030.html @@ -0,0 +1,19 @@ + + + + search_wikipedia_titles (RIR::WikipediaPage) + + + + +
# File lib/rir/document.rb, line 132
+    def self.search_wikipedia_titles(name)
+      raise ArgumentError, "Bad encoding", name unless name.isutf8
+
+      res = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&format=xml" ).toutf8).elements['api/query/search']
+
+      res.collect { |e| e.attributes['title'] } unless res.nil?
+    end
+ + diff --git a/doc/classes/RIR/WikipediaPage.src/M000031.html b/doc/classes/RIR/WikipediaPage.src/M000031.html new file mode 100644 index 0000000..0b6b98b --- /dev/null +++ b/doc/classes/RIR/WikipediaPage.src/M000031.html @@ -0,0 +1,19 @@ + + + + get_url (RIR::WikipediaPage) + + + + +
# File lib/rir/document.rb, line 140
+    def self.get_url(name)
+      raise ArgumentError, "Bad encoding", name unless name.isutf8
+
+      atts = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&titles=#{URI.escape name}&inprop=url&prop=info&format=xml" ).toutf8).elements['api/query/pages/page'].attributes
+
+      atts['fullurl'] if atts['missing'].nil?
+    end
+ + diff --git a/doc/classes/RIR/WikipediaPage.src/M000032.html b/doc/classes/RIR/WikipediaPage.src/M000032.html new file mode 100644 index 0000000..d93d8db --- /dev/null +++ b/doc/classes/RIR/WikipediaPage.src/M000032.html @@ -0,0 +1,17 @@ + + + + search_homepage (RIR::WikipediaPage) + + + + +
# File lib/rir/document.rb, line 148
+    def self.search_homepage(name)
+      title = WikipediaPage.search_wikipedia_titles name
+
+      WikipediaPage.new(WikipediaPage.get_url title[0]) unless title.nil? || title.empty?
+    end
+ + diff --git a/doc/classes/String.html b/doc/classes/String.html index 076d643..f7f1a75 100644 --- a/doc/classes/String.html +++ b/doc/classes/String.html @@ -99,27 +99,27 @@ useful function.
@@ -150,13 +150,13 @@ useful function.

Public Instance methods

-
- +
+ -
- +
+ -
- +
+ -
- +
+ -
- +
+ -
- +
+ -
- +
+ -
- +
+ -
- +
+ -
- +
+ -
- +
+
- + strip_xml_tags!() diff --git a/doc/classes/String.src/M000001.html b/doc/classes/String.src/M000001.html deleted file mode 100644 index 603a7ac..0000000 --- a/doc/classes/String.src/M000001.html +++ /dev/null @@ -1,15 +0,0 @@ - - - - is_stopword? (String) - - - - -
# File lib/rir/string.rb, line 76
-  def is_stopword?
-    Stoplist.include?(self.downcase)
-  end
- - diff --git a/doc/classes/String.src/M000002.html b/doc/classes/String.src/M000002.html index c21c139..603a7ac 100644 --- a/doc/classes/String.src/M000002.html +++ b/doc/classes/String.src/M000002.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - remove_special_characters (String) + is_stopword? (String) -
# File lib/rir/string.rb, line 82
-  def remove_special_characters
-    self.split.collect { |w| w.gsub(/\W/,' ').split.collect { |w| w.gsub(/\W/,' ').strip.sub(/\A.\z/, '')}.join(' ').strip.sub(/\A.\z/, '')}.join(' ')
+  
# File lib/rir/string.rb, line 76
+  def is_stopword?
+    Stoplist.include?(self.downcase)
   end
diff --git a/doc/classes/String.src/M000003.html b/doc/classes/String.src/M000003.html index 01c1839..c21c139 100644 --- a/doc/classes/String.src/M000003.html +++ b/doc/classes/String.src/M000003.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - strip_xml_tags! (String) + remove_special_characters (String) -
# File lib/rir/string.rb, line 91
-  def strip_xml_tags!
-    replace strip_with_pattern /<\/?[^>]*>/
+  
# File lib/rir/string.rb, line 82
+  def remove_special_characters
+    self.split.collect { |w| w.gsub(/\W/,' ').split.collect { |w| w.gsub(/\W/,' ').strip.sub(/\A.\z/, '')}.join(' ').strip.sub(/\A.\z/, '')}.join(' ')
   end
diff --git a/doc/classes/String.src/M000004.html b/doc/classes/String.src/M000004.html index 2d020b7..01c1839 100644 --- a/doc/classes/String.src/M000004.html +++ b/doc/classes/String.src/M000004.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - strip_xml_tags (String) + strip_xml_tags! (String) -
# File lib/rir/string.rb, line 100
-  def strip_xml_tags
-    dup.strip_xml_tags!
+  
# File lib/rir/string.rb, line 91
+  def strip_xml_tags!
+    replace strip_with_pattern /<\/?[^>]*>/
   end
diff --git a/doc/classes/String.src/M000005.html b/doc/classes/String.src/M000005.html index 1f77395..2d020b7 100644 --- a/doc/classes/String.src/M000005.html +++ b/doc/classes/String.src/M000005.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - strip_javascripts! (String) + strip_xml_tags (String) -
# File lib/rir/string.rb, line 114
-  def strip_javascripts!
-    replace strip_with_pattern /<script type="text\/javascript">(.+?)<\/script>/m 
+  
# File lib/rir/string.rb, line 100
+  def strip_xml_tags
+    dup.strip_xml_tags!
   end
diff --git a/doc/classes/String.src/M000006.html b/doc/classes/String.src/M000006.html index 8a73177..1f77395 100644 --- a/doc/classes/String.src/M000006.html +++ b/doc/classes/String.src/M000006.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - strip_javascripts (String) + strip_javascripts! (String) -
# File lib/rir/string.rb, line 127
-  def strip_javascripts
-    dup.strip_javascripts!
+  
# File lib/rir/string.rb, line 114
+  def strip_javascripts!
+    replace strip_with_pattern /<script type="text\/javascript">(.+?)<\/script>/m 
   end
diff --git a/doc/classes/String.src/M000007.html b/doc/classes/String.src/M000007.html index 49c5a94..8a73177 100644 --- a/doc/classes/String.src/M000007.html +++ b/doc/classes/String.src/M000007.html @@ -2,15 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - strip_stylesheets! (String) + strip_javascripts (String) -
# File lib/rir/string.rb, line 131
-  def strip_stylesheets!
-  # TODO: rewamp. dunno what is it.
-    replace strip_with_pattern /<style type="text\/css">(.+?)<\/style>/m 
+  
# File lib/rir/string.rb, line 127
+  def strip_javascripts
+    dup.strip_javascripts!
   end
diff --git a/doc/classes/String.src/M000008.html b/doc/classes/String.src/M000008.html index a10b5bd..49c5a94 100644 --- a/doc/classes/String.src/M000008.html +++ b/doc/classes/String.src/M000008.html @@ -2,14 +2,15 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - strip_stylesheets (String) + strip_stylesheets! (String) -
# File lib/rir/string.rb, line 136
-  def strip_stylesheets
-    dup.strip_stylesheets!
+  
# File lib/rir/string.rb, line 131
+  def strip_stylesheets!
+  # TODO: rewamp. dunno what is it.
+    replace strip_with_pattern /<style type="text\/css">(.+?)<\/style>/m 
   end
diff --git a/doc/classes/String.src/M000009.html b/doc/classes/String.src/M000009.html index 37f6f1f..a10b5bd 100644 --- a/doc/classes/String.src/M000009.html +++ b/doc/classes/String.src/M000009.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - strip_punctuation! (String) + strip_stylesheets (String) -
# File lib/rir/string.rb, line 145
-  def strip_punctuation!
-    replace strip_with_pattern /[^a-zA-Z0-9\-\s]/
+  
# File lib/rir/string.rb, line 136
+  def strip_stylesheets
+    dup.strip_stylesheets!
   end
diff --git a/doc/classes/String.src/M000010.html b/doc/classes/String.src/M000010.html index 36b9164..37f6f1f 100644 --- a/doc/classes/String.src/M000010.html +++ b/doc/classes/String.src/M000010.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - strip_punctuation (String) + strip_punctuation! (String) -
# File lib/rir/string.rb, line 153
-  def strip_punctuation
-    dup.strip_punctuation!
+  
# File lib/rir/string.rb, line 145
+  def strip_punctuation!
+    replace strip_with_pattern /[^a-zA-Z0-9\-\s]/
   end
diff --git a/doc/classes/String.src/M000011.html b/doc/classes/String.src/M000011.html index c920941..36b9164 100644 --- a/doc/classes/String.src/M000011.html +++ b/doc/classes/String.src/M000011.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> - extract_xmltags_values (String) + strip_punctuation (String) -
# File lib/rir/string.rb, line 161
-  def extract_xmltags_values(tag_name)
-    self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten
+  
# File lib/rir/string.rb, line 153
+  def strip_punctuation
+    dup.strip_punctuation!
   end
diff --git a/doc/classes/String.src/M000012.html b/doc/classes/String.src/M000012.html index 427128d..c920941 100644 --- a/doc/classes/String.src/M000012.html +++ b/doc/classes/String.src/M000012.html @@ -7,7 +7,7 @@ -
# File lib/rir/string.rb, line 162
+  
# File lib/rir/string.rb, line 161
   def extract_xmltags_values(tag_name)
     self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten
   end
diff --git a/doc/created.rid b/doc/created.rid index 5367b38..6e8bb1d 100644 --- a/doc/created.rid +++ b/doc/created.rid @@ -1 +1 @@ -Thu, 25 Nov 2010 17:01:52 +0100 +Thu, 25 Nov 2010 17:10:04 +0100 diff --git a/doc/files/README_markdown.html b/doc/files/README_markdown.html deleted file mode 100644 index a52aaa0..0000000 --- a/doc/files/README_markdown.html +++ /dev/null @@ -1,121 +0,0 @@ - - - - File: README.markdown [RDoc Documentation] - - - - - - - - - -
-

README.markdown

- - - - - - - - - -
Path:README.markdown - -
Last Update:2010-11-05 14:46:27 +0100
-
- - -
- -
- -
-

-# Ruby Information Retrieval (rIR) -

-

-Copyright (C) 2010-2011 Romain Deveaud -

-

-License -

-
=
-

-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -

-

-This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -

-

-You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -

- -
- -
- - -
- - - -
- - - - - - - - - -
- -
- - - diff --git a/doc/files/lib/rir/corpus_rb.html b/doc/files/lib/rir/corpus_rb.html index 2833220..95ef563 100644 --- a/doc/files/lib/rir/corpus_rb.html +++ b/doc/files/lib/rir/corpus_rb.html @@ -87,52 +87,6 @@ You should have received a copy of the GNU General Public License along with this program. If not, see <www.gnu.org/licenses/>.

-

-This file is a part of an Information Retrieval oriented Ruby library -

-

-Copyright (C) 2010-2011 Romain Deveaud -

-

-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -

-

-This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -

-

-You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -

-

-This file is a part of an Information Retrieval oriented Ruby library -

-

-Copyright (C) 2010-2011 Romain Deveaud -

-

-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -

-

-This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -

-

-You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -

diff --git a/doc/files/lib/rir/ttagger_rb.html b/doc/files/lib/rir/ttagger_rb.html index 67b5fa6..7358f13 100644 --- a/doc/files/lib/rir/ttagger_rb.html +++ b/doc/files/lib/rir/ttagger_rb.html @@ -111,6 +111,52 @@ with this program. If not, see <www.gnu.org/licenses/>.


+This file is a part of an Information Retrieval oriented Ruby library +

+

+Copyright (C) 2010-2011 Romain Deveaud +

+

+This program is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. +

+

+This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. +

+

+You should have received a copy of the GNU General Public License along +with this program. If not, see <www.gnu.org/licenses/>. +

+

+This file is a part of an Information Retrieval oriented Ruby library +

+

+Copyright (C) 2010-2011 Romain Deveaud +

+

+This program is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. +

+

+This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. +

+

+You should have received a copy of the GNU General Public License along +with this program. If not, see <www.gnu.org/licenses/>. +

+

General module for many purposes related to Information Retrieval.

diff --git a/doc/files/main_rb.html b/doc/files/main_rb.html deleted file mode 100644 index 192bac7..0000000 --- a/doc/files/main_rb.html +++ /dev/null @@ -1,100 +0,0 @@ - - - - File: main.rb [RDoc Documentation] - - - - - - - - - -
-

main.rb

- - - - - - - - - -
Path:main.rb - -
Last Update:2010-11-05 15:05:38 +0100
-
- - -
- -
- -
-

Required files

- -
- - rir   - -
-
- -
- - -
- - - -
- - - - - - - - - -
- - - - - diff --git a/doc/fr_class_index.html b/doc/fr_class_index.html index 36ac9d8..0773723 100644 --- a/doc/fr_class_index.html +++ b/doc/fr_class_index.html @@ -19,6 +19,8 @@ RIR
+ RIR::Corpus
+ RIR::Document
RIR::Indri
@@ -45,6 +47,10 @@ RIR::WikipediaPage
+ Regexp
+ + String
+
diff --git a/doc/fr_file_index.html b/doc/fr_file_index.html index 6d7fbed..87c39ae 100644 --- a/doc/fr_file_index.html +++ b/doc/fr_file_index.html @@ -19,10 +19,16 @@ lib/rir.rb
+ lib/rir/corpus.rb
+ lib/rir/document.rb
lib/rir/query.rb
+ lib/rir/regexp.rb
+ + lib/rir/string.rb
+ lib/rir/ttagger.rb
diff --git a/doc/fr_method_index.html b/doc/fr_method_index.html index 368ae37..4b02dfc 100644 --- a/doc/fr_method_index.html +++ b/doc/fr_method_index.html @@ -17,41 +17,69 @@

Methods

- count_words (RIR::Document)
+ count_words (RIR::Document)
- entropy (RIR::Document)
+ entropy (RIR::Document)
- format_words (RIR::Document)
+ extract_xmltags_values (String)
- get_content (RIR::WebDocument)
+ files (RIR::Corpus)
- get_url (RIR::WikipediaPage)
+ format_words (RIR::Document)
- new (RIR::WebDocument)
+ get_content (RIR::WebDocument)
- new (RIR::Indri::IndriQuery)
+ get_url (RIR::WikipediaPage)
- new (RIR::Indri::Parameters)
+ is_stopword? (String)
- new (RIR::Document)
+ negated (Regexp)
- new (RIR::TreeTagger::TaggerChunker)
+ new (RIR::Document)
- new (RIR::TreeTagger::Chunk)
+ new (RIR::WebDocument)
- ngrams (RIR::Document)
+ new (RIR::Indri::IndriQuery)
- parse (RIR::TreeTagger::TaggerChunker)
+ new (RIR::Corpus)
- search_homepage (RIR::WikipediaPage)
+ new (RIR::TreeTagger::TaggerChunker)
- search_wikipedia_titles (RIR::WikipediaPage)
+ new (RIR::Indri::Parameters)
- tf (RIR::Document)
+ new (RIR::TreeTagger::Chunk)
- to_s (RIR::Indri::Parameters)
+ ngrams (RIR::Document)
- to_s (RIR::Indri::IndriQuery)
+ parse (RIR::TreeTagger::TaggerChunker)
+ + remove_special_characters (String)
+ + search_homepage (RIR::WikipediaPage)
+ + search_wikipedia_titles (RIR::WikipediaPage)
+ + strip_javascripts (String)
+ + strip_javascripts! (String)
+ + strip_punctuation (String)
+ + strip_punctuation! (String)
+ + strip_stylesheets (String)
+ + strip_stylesheets! (String)
+ + strip_xml_tags (String)
+ + strip_xml_tags! (String)
+ + tf (RIR::Document)
+ + to_s (RIR::Indri::IndriQuery)
+ + to_s (RIR::Indri::Parameters)
-- 1.8.2.3