diff --git a/doc/classes/RIR/Corpus.html b/doc/classes/RIR/Corpus.html new file mode 100644 index 0000000..316ee35 --- /dev/null +++ b/doc/classes/RIR/Corpus.html @@ -0,0 +1,200 @@ + + +
+Class | +RIR::Corpus | +
In: | +
+
+
+
+
+ lib/rir/corpus.rb
+
+
+
+
+ + + |
+
Parent: | ++ + Object + + | +
path | + +[RW] | + ++ |
+Recursively outputs all files in self.path. WARNING ! This +function may take a lot of time if many files are in subdirectories. +
++ c = Corpus.new "my/path" + c.files # => ["README.txt", "lib/code.rb"] ++ +
# File lib/rir/corpus.rb, line 25 + def initialize(path) + @path = path.chomp "/" + end+ + diff --git a/doc/classes/RIR/Corpus.src/M000017.html b/doc/classes/RIR/Corpus.src/M000017.html new file mode 100644 index 0000000..a4eb5fa --- /dev/null +++ b/doc/classes/RIR/Corpus.src/M000017.html @@ -0,0 +1,15 @@ + + + +
# File lib/rir/corpus.rb, line 35 + def files + Dir["#{@path}/**/*.*"] + end+ + diff --git a/doc/classes/RIR/Corpus.src/M000018.html b/doc/classes/RIR/Corpus.src/M000018.html new file mode 100644 index 0000000..2ebdcfd --- /dev/null +++ b/doc/classes/RIR/Corpus.src/M000018.html @@ -0,0 +1,15 @@ + + + +
# File lib/rir/corpus.rb, line 36 + def files + Dir["#{@path}/**/*.*"] + end+ + diff --git a/doc/classes/RIR/Document.src/M000019.html b/doc/classes/RIR/Document.src/M000019.html new file mode 100644 index 0000000..72c51f5 --- /dev/null +++ b/doc/classes/RIR/Document.src/M000019.html @@ -0,0 +1,23 @@ + + + +
# File lib/rir/document.rb, line 31 + def format_words + wo = [] + + @doc_content.split.each do |w| + w.split(/\W/).each do |sw| + wo.push(sw.downcase) if sw =~ /[a-zA-Z]/ + end + end + + wo + end+ + diff --git a/doc/classes/RIR/Document.src/M000020.html b/doc/classes/RIR/Document.src/M000020.html new file mode 100644 index 0000000..6257629 --- /dev/null +++ b/doc/classes/RIR/Document.src/M000020.html @@ -0,0 +1,26 @@ + + + +
# File lib/rir/document.rb, line 46 + def ngrams(n) + window = [] + ngrams_array = [] + + @words.each do |w| + window.push(w) + if window.size == n + ngrams_array.push window.join(" ") + window.delete_at(0) + end + end + + ngrams_array.uniq + end+ + diff --git a/doc/classes/RIR/Document.src/M000021.html b/doc/classes/RIR/Document.src/M000021.html new file mode 100644 index 0000000..e8ddeec --- /dev/null +++ b/doc/classes/RIR/Document.src/M000021.html @@ -0,0 +1,18 @@ + + + +
# File lib/rir/document.rb, line 64 + def count_words + counts = Hash.new { |h,k| h[k] = 0 } + @words.each { |w| counts[w] += 1 } + + counts + end+ + diff --git a/doc/classes/RIR/Document.src/M000022.html b/doc/classes/RIR/Document.src/M000022.html new file mode 100644 index 0000000..5694971 --- /dev/null +++ b/doc/classes/RIR/Document.src/M000022.html @@ -0,0 +1,24 @@ + + + +
# File lib/rir/document.rb, line 77 + def entropy(s) + en = 0.0 + counts = self.count_words + + s.split.each do |w| + p_wi = counts[w].to_f/@words.count.to_f + en += p_wi*Math.log2(p_wi) + end + + en *= -1 + en + end+ + diff --git a/doc/classes/RIR/Document.src/M000023.html b/doc/classes/RIR/Document.src/M000023.html new file mode 100644 index 0000000..b882fcd --- /dev/null +++ b/doc/classes/RIR/Document.src/M000023.html @@ -0,0 +1,16 @@ + + + +
# File lib/rir/document.rb, line 92 + def initialize(content) + @doc_content = content + @words = format_words + end+ + diff --git a/doc/classes/RIR/Indri.html b/doc/classes/RIR/Indri.html new file mode 100644 index 0000000..34a1058 --- /dev/null +++ b/doc/classes/RIR/Indri.html @@ -0,0 +1,109 @@ + + + +
Module | +RIR::Indri | +
In: | +
+
+
+
+
+ lib/rir/query.rb
+
+
+
+
+ + + |
+
Class | +RIR::Indri::IndriQuery | +
In: | +
+
+
+
+
+ lib/rir/query.rb
+
+
+
+
+ + + |
+
Parent: | ++ + + + RIR::Query + + + + | +
id | + +[RW] | + ++ |
params | + +[RW] | + ++ |
query | + +[RW] | + ++ |
rule | + +[RW] | + ++ |
# File lib/rir/query.rb, line 62 + def initialize(id,query,params) + @params = params + # Here we set the default retrieval model as Language Modeling + # with a Dirichlet smoothing at 2500. + # TODO: maybe a Rule class... + @params.rule = 'method:dirichlet,mu:2500' if @params.rule.nil? + + @id = id + @query = query + end+ + diff --git a/doc/classes/RIR/Indri/IndriQuery.src/M000015.html b/doc/classes/RIR/Indri/IndriQuery.src/M000015.html new file mode 100644 index 0000000..e237f9a --- /dev/null +++ b/doc/classes/RIR/Indri/IndriQuery.src/M000015.html @@ -0,0 +1,22 @@ + + + +
# File lib/rir/query.rb, line 73 + def to_s + h = @params.to_s + h += "<query>\n" + h += "<number>#{@id}</number>\n" + h += "<text>#{@query}</text>\n" + h += "</query>\n" + h += "</parameters>" + + h + end+ + diff --git a/doc/classes/RIR/Indri/IndriQuery.src/M000016.html b/doc/classes/RIR/Indri/IndriQuery.src/M000016.html new file mode 100644 index 0000000..515e431 --- /dev/null +++ b/doc/classes/RIR/Indri/IndriQuery.src/M000016.html @@ -0,0 +1,22 @@ + + + +
# File lib/rir/query.rb, line 74 + def to_s + h = @params.to_s + h += "<query>\n" + h += "<number>#{@id}</number>\n" + h += "<text>#{@query}</text>\n" + h += "</query>\n" + h += "</parameters>" + + h + end+ + diff --git a/doc/classes/RIR/Indri/Parameters.html b/doc/classes/RIR/Indri/Parameters.html new file mode 100644 index 0000000..686469e --- /dev/null +++ b/doc/classes/RIR/Indri/Parameters.html @@ -0,0 +1,255 @@ + + + +
Class | +RIR::Indri::Parameters | +
In: | +
+
+
+
+
+ lib/rir/query.rb
+
+
+
+
+ + + |
+
Parent: | ++ + Object + + | +
baseline | + +[RW] | + ++ |
corpus | + +[RW] | + ++ |
count | + +[RW] | + ++ |
memory | + +[RW] | + ++ |
offset | + +[RW] | + ++ |
print_docs | + +[RW] | + ++ |
print_query | + +[RW] | + ++ |
rule | + +[RW] | + ++ |
run_id | + +[RW] | + ++ |
# File lib/rir/query.rb, line 30 + def initialize(corpus,mem="1g",count="1000",offset="1",run_id="default",print_query=false,print_docs=false) + @corpus = corpus + @memory = mem + @count = count + @offset = offset + @run_id = run_id + @print_query = print_query ? "true" : "false" + @print_docs = print_docs ? "true" : "false" + end+ + diff --git a/doc/classes/RIR/Indri/Parameters.src/M000013.html b/doc/classes/RIR/Indri/Parameters.src/M000013.html new file mode 100644 index 0000000..bc4b8a7 --- /dev/null +++ b/doc/classes/RIR/Indri/Parameters.src/M000013.html @@ -0,0 +1,29 @@ + + + +
# File lib/rir/query.rb, line 40 + def to_s + h = "<parameters>\n" + h += "<memory>#{@memory}</memory>\n" + h += "<index>#{@corpus}</index>\n" + h += "<count>#{@count}</count>\n" + unless @baseline.nil? + h += "<baseline>#{@baseline}</baseline>\n" + else + h += "<rule>#{@rule}</rule>\n" + end + h += "<queryOffset>#{@offset}</queryOffset>\n" + h += "<runID>#{@run_id}</runID>\n" + h += "<printQuery>#{@print_query}</printQuery>\n" + h += "<printDocuments>#{@print_docs}</printDocuments>\n" + + h + end+ + diff --git a/doc/classes/RIR/Indri/Parameters.src/M000014.html b/doc/classes/RIR/Indri/Parameters.src/M000014.html new file mode 100644 index 0000000..3529737 --- /dev/null +++ b/doc/classes/RIR/Indri/Parameters.src/M000014.html @@ -0,0 +1,29 @@ + + + +
# File lib/rir/query.rb, line 41 + def to_s + h = "<parameters>\n" + h += "<memory>#{@memory}</memory>\n" + h += "<index>#{@corpus}</index>\n" + h += "<count>#{@count}</count>\n" + unless @baseline.nil? + h += "<baseline>#{@baseline}</baseline>\n" + else + h += "<rule>#{@rule}</rule>\n" + end + h += "<queryOffset>#{@offset}</queryOffset>\n" + h += "<runID>#{@run_id}</runID>\n" + h += "<printQuery>#{@print_query}</printQuery>\n" + h += "<printDocuments>#{@print_docs}</printDocuments>\n" + + h + end+ + diff --git a/doc/classes/RIR/Query.html b/doc/classes/RIR/Query.html new file mode 100644 index 0000000..ba41f8d --- /dev/null +++ b/doc/classes/RIR/Query.html @@ -0,0 +1,110 @@ + + + +
Class | +RIR::Query | +
In: | +
+
+
+
+
+ lib/rir/query.rb
+
+
+
+
+ + + |
+
Parent: | ++ + Object + + | +
# File lib/rir/document.rb, line 105 + def self.get_content(url) + require 'net/http' + Net::HTTP.get(URI.parse(url)) + end+ + diff --git a/doc/classes/RIR/WebDocument.src/M000025.html b/doc/classes/RIR/WebDocument.src/M000025.html new file mode 100644 index 0000000..d7a5169 --- /dev/null +++ b/doc/classes/RIR/WebDocument.src/M000025.html @@ -0,0 +1,16 @@ + + + +
# File lib/rir/document.rb, line 112 + def initialize(url) + @url = url + super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags + end+ + diff --git a/doc/classes/RIR/WikipediaPage.src/M000026.html b/doc/classes/RIR/WikipediaPage.src/M000026.html new file mode 100644 index 0000000..3000535 --- /dev/null +++ b/doc/classes/RIR/WikipediaPage.src/M000026.html @@ -0,0 +1,17 @@ + + + +
# File lib/rir/document.rb, line 125 + def self.search_wikipedia_titles(name) + res = REXML::Document.new(Net::HTTP.get(URI.parse("http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&format=xml")).toutf8).elements['api/query/search'] + + res.collect { |e| e.attributes['title'] } unless res.nil? + end+ + diff --git a/doc/classes/RIR/WikipediaPage.src/M000027.html b/doc/classes/RIR/WikipediaPage.src/M000027.html new file mode 100644 index 0000000..3023cc7 --- /dev/null +++ b/doc/classes/RIR/WikipediaPage.src/M000027.html @@ -0,0 +1,17 @@ + + + +
# File lib/rir/document.rb, line 131 + def self.get_url(name) + atts = REXML::Document.new(Net::HTTP.get(URI.parse("http://en.wikipedia.org/w/api.php?action=query&titles=#{URI.escape name}&inprop=url&prop=info&format=xml")).toutf8).elements['api/query/pages/page'].attributes + + atts['fullurl'] if atts['missing'].nil? + end+ + diff --git a/doc/classes/RIR/WikipediaPage.src/M000028.html b/doc/classes/RIR/WikipediaPage.src/M000028.html new file mode 100644 index 0000000..b5289f7 --- /dev/null +++ b/doc/classes/RIR/WikipediaPage.src/M000028.html @@ -0,0 +1,21 @@ + + + +
# File lib/rir/document.rb, line 137 + def self.search_homepage(name) + title = WikipediaPage.search_wikipedia_titles name + + begin + WikipediaPage.new(WikipediaPage.get_url title[0]) unless title.nil? || title.empty? + rescue + puts title[0] + end + end+ + diff --git a/doc/classes/Regexp.html b/doc/classes/Regexp.html new file mode 100644 index 0000000..d23a050 --- /dev/null +++ b/doc/classes/Regexp.html @@ -0,0 +1,175 @@ + + + +
Class | +Regexp | +
In: | +
+
+
+
+
+ lib/rir/regexp.rb
+
+
+
+
+ + + |
+
Parent: | ++ + Object + + | +
+This file is a part of an Information Retrieval oriented Ruby library +
+
+Copyright (C) 2010-2011 Romain Deveaud
+This program is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. +
++This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. +
++You should have received a copy of the GNU General Public License along +with this program. If not, see <www.gnu.org/licenses/>. +
+ +# File lib/rir/regexp.rb, line 22 + def negated + /^((?!#{self}).)*$/ + end+ + diff --git a/doc/classes/String.src/M000010.html b/doc/classes/String.src/M000010.html new file mode 100644 index 0000000..36b9164 --- /dev/null +++ b/doc/classes/String.src/M000010.html @@ -0,0 +1,15 @@ + + + +
# File lib/rir/string.rb, line 153 + def strip_punctuation + dup.strip_punctuation! + end+ + diff --git a/doc/classes/String.src/M000011.html b/doc/classes/String.src/M000011.html new file mode 100644 index 0000000..c920941 --- /dev/null +++ b/doc/classes/String.src/M000011.html @@ -0,0 +1,15 @@ + + + +
# File lib/rir/string.rb, line 161 + def extract_xmltags_values(tag_name) + self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten + end+ + diff --git a/doc/classes/String.src/M000012.html b/doc/classes/String.src/M000012.html new file mode 100644 index 0000000..427128d --- /dev/null +++ b/doc/classes/String.src/M000012.html @@ -0,0 +1,15 @@ + + + +
# File lib/rir/string.rb, line 162 + def extract_xmltags_values(tag_name) + self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten + end+ + diff --git a/doc/files/lib/rir/corpus_rb.html b/doc/files/lib/rir/corpus_rb.html new file mode 100644 index 0000000..2833220 --- /dev/null +++ b/doc/files/lib/rir/corpus_rb.html @@ -0,0 +1,163 @@ + + + +
Path: | +lib/rir/corpus.rb + + | +
Last Update: | +2010-11-23 18:20:24 +0100 | +
+This file is a part of an Information Retrieval oriented Ruby library +
+
+Copyright (C) 2010-2011 Romain Deveaud
+This program is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. +
++This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. +
++You should have received a copy of the GNU General Public License along +with this program. If not, see <www.gnu.org/licenses/>. +
++This file is a part of an Information Retrieval oriented Ruby library +
+
+Copyright (C) 2010-2011 Romain Deveaud
+This program is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. +
++This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. +
++You should have received a copy of the GNU General Public License along +with this program. If not, see <www.gnu.org/licenses/>. +
++This file is a part of an Information Retrieval oriented Ruby library +
+
+Copyright (C) 2010-2011 Romain Deveaud
+This program is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. +
++This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. +
++You should have received a copy of the GNU General Public License along +with this program. If not, see <www.gnu.org/licenses/>. +
+ +Path: | +lib/rir/query.rb + + | +
Last Update: | +2010-11-23 18:20:30 +0100 | +
+This file is a part of an Information Retrieval oriented Ruby library +
+
+Copyright (C) 2010-2011 Romain Deveaud
+This program is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. +
++This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. +
++You should have received a copy of the GNU General Public License along +with this program. If not, see <www.gnu.org/licenses/>. +
+ +Path: | +lib/rir/regexp.rb + + | +
Last Update: | +2010-11-19 11:27:06 +0100 | +
+This file is a part of an Information Retrieval oriented Ruby library +
+
+Copyright (C) 2010-2011 Romain Deveaud
+This program is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. +
++This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. +
++You should have received a copy of the GNU General Public License along +with this program. If not, see <www.gnu.org/licenses/>. +
+ +