diff --git a/doc/classes/Corpus.html b/doc/classes/Corpus.html new file mode 100644 index 0000000..cacc4da --- /dev/null +++ b/doc/classes/Corpus.html @@ -0,0 +1,200 @@ + + +
+Class | +Corpus | +
In: | +
+
+
+
+
+ lib/rir/corpus.rb
+
+
+
+
+ + + |
+
Parent: | ++ + Object + + | +
path | + +[RW] | + ++ |
+Recursively outputs all files in self.path. WARNING ! This +function may take a lot of time if many files are in subdirectories. +
++ c = Corpus.new "my/path" + c.files # => ["README.txt", "lib/code.rb"] ++ +
# File lib/rir/corpus.rb, line 25 + def initialize(path) + @path = path.chomp "/" + end+ + diff --git a/doc/classes/Corpus.src/M000002.html b/doc/classes/Corpus.src/M000002.html new file mode 100644 index 0000000..0a9287f --- /dev/null +++ b/doc/classes/Corpus.src/M000002.html @@ -0,0 +1,15 @@ + + + +
# File lib/rir/corpus.rb, line 35 + def files + Dir["#{@path}/**/*.*"] + end+ + diff --git a/doc/classes/Indri.html b/doc/classes/Indri.html new file mode 100644 index 0000000..c875dd7 --- /dev/null +++ b/doc/classes/Indri.html @@ -0,0 +1,109 @@ + + + +
Module | +Indri | +
In: | +
+
+
+
+
+ lib/rir/query.rb
+
+
+
+
+ + + |
+
Class | +Indri::IndriQuery | +
In: | +
+
+
+
+
+ lib/rir/query.rb
+
+
+
+
+ + + |
+
Parent: | ++ + + + Query + + + + | +
id | + +[RW] | + ++ |
params | + +[RW] | + ++ |
query | + +[RW] | + ++ |
rule | + +[RW] | + ++ |
# File lib/rir/query.rb, line 62 + def initialize(id,query,params) + @params = params + # Here we set the default retrieval model as Language Modeling + # with a Dirichlet smoothing at 2500. + # TODO: maybe a Rule class... + @params.rule = 'method:dirichlet,mu:2500' if @params.rule.nil? + + @id = id + @query = query + end+ + diff --git a/doc/classes/Indri/IndriQuery.src/M000021.html b/doc/classes/Indri/IndriQuery.src/M000021.html new file mode 100644 index 0000000..728320c --- /dev/null +++ b/doc/classes/Indri/IndriQuery.src/M000021.html @@ -0,0 +1,22 @@ + + + +
# File lib/rir/query.rb, line 73 + def to_s + h = @params.to_s + h += "<query>\n" + h += "<number>#{@id}</number>\n" + h += "<text>#{@query}</text>\n" + h += "</query>\n" + h += "</parameters>" + + h + end+ + diff --git a/doc/classes/Indri/Parameters.html b/doc/classes/Indri/Parameters.html new file mode 100644 index 0000000..e70ca27 --- /dev/null +++ b/doc/classes/Indri/Parameters.html @@ -0,0 +1,255 @@ + + + +
Class | +Indri::Parameters | +
In: | +
+
+
+
+
+ lib/rir/query.rb
+
+
+
+
+ + + |
+
Parent: | ++ + Object + + | +
baseline | + +[RW] | + ++ |
count | + +[RW] | + ++ |
index_path | + +[RW] | + ++ |
memory | + +[RW] | + ++ |
offset | + +[RW] | + ++ |
print_docs | + +[RW] | + ++ |
print_query | + +[RW] | + ++ |
rule | + +[RW] | + ++ |
run_id | + +[RW] | + ++ |
# File lib/rir/query.rb, line 30 + def initialize(corpus,mem="1g",count="1000",offset="1",run_id="default",print_query=false,print_docs=false) + @index_path = corpus + @memory = mem + @count = count + @offset = offset + @run_id = run_id + @print_query = print_query ? "true" : "false" + @print_docs = print_docs ? "true" : "false" + end+ + diff --git a/doc/classes/Indri/Parameters.src/M000019.html b/doc/classes/Indri/Parameters.src/M000019.html new file mode 100644 index 0000000..da5c34c --- /dev/null +++ b/doc/classes/Indri/Parameters.src/M000019.html @@ -0,0 +1,29 @@ + + + +
# File lib/rir/query.rb, line 40 + def to_s + h = "<parameters>\n" + h += "<memory>#{@memory}</memory>\n" + h += "<index>#{@index_path}</index>\n" + h += "<count>#{@count}</count>\n" + unless @baseline.nil? + h += "<baseline>#{@baseline}</baseline>\n" + else + h += "<rule>#{@rule}</rule>\n" + end + h += "<queryOffset>#{@offset}</queryOffset>\n" + h += "<runID>#{@run_id}</runID>\n" + h += "<printQuery>#{@print_query}</printQuery>\n" + h += "<printDocuments>#{@print_docs}</printDocuments>\n" + + h + end+ + diff --git a/doc/classes/Query.html b/doc/classes/Query.html new file mode 100644 index 0000000..c29e471 --- /dev/null +++ b/doc/classes/Query.html @@ -0,0 +1,110 @@ + + + +
Class | +Query | +
In: | +
+
+
+
+
+ lib/rir/query.rb
+
+
+
+
+ + + |
+
Parent: | ++ + Object + + | +
-This file is a part of an Information Retrieval oriented Ruby library -
-
-Copyright (C) 2010-2011 Romain Deveaud
-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -
--This file is a part of an Information Retrieval oriented Ruby library -
-
-Copyright (C) 2010-2011 Romain Deveaud
-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -
--This file is a part of an Information Retrieval oriented Ruby library -
-
-Copyright (C) 2010-2011 Romain Deveaud
-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -
--This file is a part of an Information Retrieval oriented Ruby library -
-
-Copyright (C) 2010-2011 Romain Deveaud
-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -
-+
General module for many purposes related to Information Retrieval.
@@ -225,11 +103,7 @@ General module for many purposes related to Information Retrieval.# File lib/rir/document.rb, line 31 +# File lib/rir/document.rb, line 34 def format_words wo = [] diff --git a/doc/classes/RIR/Document.src/M000023.html b/doc/classes/RIR/Document.src/M000023.html index 6257629..3416fef 100644 --- a/doc/classes/RIR/Document.src/M000023.html +++ b/doc/classes/RIR/Document.src/M000023.html @@ -7,7 +7,7 @@ -# File lib/rir/document.rb, line 46 +# File lib/rir/document.rb, line 49 def ngrams(n) window = [] ngrams_array = [] diff --git a/doc/classes/RIR/Document.src/M000024.html b/doc/classes/RIR/Document.src/M000024.html index e8ddeec..bb859fd 100644 --- a/doc/classes/RIR/Document.src/M000024.html +++ b/doc/classes/RIR/Document.src/M000024.html @@ -7,7 +7,7 @@ -# File lib/rir/document.rb, line 64 +# File lib/rir/document.rb, line 67 def count_words counts = Hash.new { |h,k| h[k] = 0 } @words.each { |w| counts[w] += 1 } diff --git a/doc/classes/RIR/Document.src/M000025.html b/doc/classes/RIR/Document.src/M000025.html index 50f6db7..9ccf905 100644 --- a/doc/classes/RIR/Document.src/M000025.html +++ b/doc/classes/RIR/Document.src/M000025.html @@ -7,7 +7,7 @@ -# File lib/rir/document.rb, line 78 +# File lib/rir/document.rb, line 81 def entropy(s) en = 0.0 counts = self.count_words diff --git a/doc/classes/RIR/Document.src/M000026.html b/doc/classes/RIR/Document.src/M000026.html index eb4436b..0b57bd6 100644 --- a/doc/classes/RIR/Document.src/M000026.html +++ b/doc/classes/RIR/Document.src/M000026.html @@ -7,7 +7,7 @@ -# File lib/rir/document.rb, line 94 +# File lib/rir/document.rb, line 97 def tf(s) self.count_words[s].to_f/@words.size.to_f enddiff --git a/doc/classes/RIR/Document.src/M000027.html b/doc/classes/RIR/Document.src/M000027.html index 1ef96d5..6b8e2c2 100644 --- a/doc/classes/RIR/Document.src/M000027.html +++ b/doc/classes/RIR/Document.src/M000027.html @@ -7,7 +7,7 @@ -# File lib/rir/document.rb, line 99 +# File lib/rir/document.rb, line 102 def initialize(content) @doc_content = content @words = format_words diff --git a/doc/classes/RIR/WebDocument.src/M000028.html b/doc/classes/RIR/WebDocument.src/M000028.html index e3931e7..cf2b1a5 100644 --- a/doc/classes/RIR/WebDocument.src/M000028.html +++ b/doc/classes/RIR/WebDocument.src/M000028.html @@ -7,7 +7,7 @@ -# File lib/rir/document.rb, line 112 +# File lib/rir/document.rb, line 115 def self.get_content(url) require 'net/http' Net::HTTP.get(URI.parse(url)) diff --git a/doc/classes/RIR/WebDocument.src/M000029.html b/doc/classes/RIR/WebDocument.src/M000029.html index dd6da25..6750287 100644 --- a/doc/classes/RIR/WebDocument.src/M000029.html +++ b/doc/classes/RIR/WebDocument.src/M000029.html @@ -7,7 +7,7 @@ -# File lib/rir/document.rb, line 119 +# File lib/rir/document.rb, line 122 def initialize(url) @url = url super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags diff --git a/doc/classes/RIR/WikipediaPage.src/M000030.html b/doc/classes/RIR/WikipediaPage.src/M000030.html index eb3518e..3318c27 100644 --- a/doc/classes/RIR/WikipediaPage.src/M000030.html +++ b/doc/classes/RIR/WikipediaPage.src/M000030.html @@ -7,7 +7,7 @@ -# File lib/rir/document.rb, line 132 +# File lib/rir/document.rb, line 135 def self.search_wikipedia_titles(name) raise ArgumentError, "Bad encoding", name unless name.isutf8 diff --git a/doc/classes/RIR/WikipediaPage.src/M000031.html b/doc/classes/RIR/WikipediaPage.src/M000031.html index 0b6b98b..01ebe85 100644 --- a/doc/classes/RIR/WikipediaPage.src/M000031.html +++ b/doc/classes/RIR/WikipediaPage.src/M000031.html @@ -7,7 +7,7 @@ -# File lib/rir/document.rb, line 140 +# File lib/rir/document.rb, line 143 def self.get_url(name) raise ArgumentError, "Bad encoding", name unless name.isutf8 diff --git a/doc/classes/RIR/WikipediaPage.src/M000032.html b/doc/classes/RIR/WikipediaPage.src/M000032.html index d93d8db..41f155c 100644 --- a/doc/classes/RIR/WikipediaPage.src/M000032.html +++ b/doc/classes/RIR/WikipediaPage.src/M000032.html @@ -7,7 +7,7 @@ -# File lib/rir/document.rb, line 148 +# File lib/rir/document.rb, line 151 def self.search_homepage(name) title = WikipediaPage.search_wikipedia_titles name diff --git a/doc/classes/Regexp.html b/doc/classes/Regexp.html index d23a050..03160e6 100644 --- a/doc/classes/Regexp.html +++ b/doc/classes/Regexp.html @@ -83,33 +83,6 @@-@@ -118,7 +91,7 @@ href="http://www.gnu.org/licenses/">www.gnu.org/licenses/>.---This file is a part of an Information Retrieval oriented Ruby library -
--Copyright (C) 2010-2011 Romain Deveaud
-- -This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -
- -
# File lib/rir/regexp.rb, line 24 + def negated + /^((?!#{self}).)*$/ + end+ + diff --git a/doc/classes/String.html b/doc/classes/String.html index f7f1a75..1df0e56 100644 --- a/doc/classes/String.html +++ b/doc/classes/String.html @@ -99,27 +99,27 @@ useful function.
# File lib/rir/string.rb, line 91 - def strip_xml_tags! - replace strip_with_pattern /<\/?[^>]*>/ +# File lib/rir/string.rb, line 78 + def is_stopword? + Stoplist.include?(self.downcase) enddiff --git a/doc/classes/String.src/M000005.html b/doc/classes/String.src/M000005.html index 2d020b7..9073156 100644 --- a/doc/classes/String.src/M000005.html +++ b/doc/classes/String.src/M000005.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -strip_xml_tags (String) +remove_special_characters (String) -# File lib/rir/string.rb, line 100 - def strip_xml_tags - dup.strip_xml_tags! +# File lib/rir/string.rb, line 84 + def remove_special_characters + self.split.collect { |w| w.gsub(/\W/,' ').split.collect { |w| w.gsub(/\W/,' ').strip.sub(/\A.\z/, '')}.join(' ').strip.sub(/\A.\z/, '')}.join(' ') enddiff --git a/doc/classes/String.src/M000006.html b/doc/classes/String.src/M000006.html index 1f77395..0d97963 100644 --- a/doc/classes/String.src/M000006.html +++ b/doc/classes/String.src/M000006.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -strip_javascripts! (String) +strip_xml_tags! (String) -# File lib/rir/string.rb, line 114 - def strip_javascripts! - replace strip_with_pattern /<script type="text\/javascript">(.+?)<\/script>/m +# File lib/rir/string.rb, line 93 + def strip_xml_tags! + replace strip_with_pattern /<\/?[^>]*>/ enddiff --git a/doc/classes/String.src/M000007.html b/doc/classes/String.src/M000007.html index 8a73177..00efa8f 100644 --- a/doc/classes/String.src/M000007.html +++ b/doc/classes/String.src/M000007.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -strip_javascripts (String) +strip_xml_tags (String) -# File lib/rir/string.rb, line 127 - def strip_javascripts - dup.strip_javascripts! +# File lib/rir/string.rb, line 102 + def strip_xml_tags + dup.strip_xml_tags! enddiff --git a/doc/classes/String.src/M000008.html b/doc/classes/String.src/M000008.html index 49c5a94..93970bf 100644 --- a/doc/classes/String.src/M000008.html +++ b/doc/classes/String.src/M000008.html @@ -2,15 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -strip_stylesheets! (String) +strip_javascripts! (String) -# File lib/rir/string.rb, line 131 - def strip_stylesheets! - # TODO: rewamp. dunno what is it. - replace strip_with_pattern /<style type="text\/css">(.+?)<\/style>/m +# File lib/rir/string.rb, line 116 + def strip_javascripts! + replace strip_with_pattern /<script type="text\/javascript">(.+?)<\/script>/m enddiff --git a/doc/classes/String.src/M000009.html b/doc/classes/String.src/M000009.html index a10b5bd..b143c5a 100644 --- a/doc/classes/String.src/M000009.html +++ b/doc/classes/String.src/M000009.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -strip_stylesheets (String) +strip_javascripts (String) -# File lib/rir/string.rb, line 136 - def strip_stylesheets - dup.strip_stylesheets! +# File lib/rir/string.rb, line 129 + def strip_javascripts + dup.strip_javascripts! enddiff --git a/doc/classes/String.src/M000010.html b/doc/classes/String.src/M000010.html index 37f6f1f..f26b6c6 100644 --- a/doc/classes/String.src/M000010.html +++ b/doc/classes/String.src/M000010.html @@ -2,14 +2,15 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -strip_punctuation! (String) +strip_stylesheets! (String) -# File lib/rir/string.rb, line 145 - def strip_punctuation! - replace strip_with_pattern /[^a-zA-Z0-9\-\s]/ +# File lib/rir/string.rb, line 133 + def strip_stylesheets! + # TODO: rewamp. dunno what is it. + replace strip_with_pattern /<style type="text\/css">(.+?)<\/style>/m enddiff --git a/doc/classes/String.src/M000011.html b/doc/classes/String.src/M000011.html index 36b9164..00ac846 100644 --- a/doc/classes/String.src/M000011.html +++ b/doc/classes/String.src/M000011.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -strip_punctuation (String) +strip_stylesheets (String) -# File lib/rir/string.rb, line 153 - def strip_punctuation - dup.strip_punctuation! +# File lib/rir/string.rb, line 138 + def strip_stylesheets + dup.strip_stylesheets! enddiff --git a/doc/classes/String.src/M000012.html b/doc/classes/String.src/M000012.html index c920941..a6e3495 100644 --- a/doc/classes/String.src/M000012.html +++ b/doc/classes/String.src/M000012.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -extract_xmltags_values (String) +strip_punctuation! (String) -# File lib/rir/string.rb, line 161 - def extract_xmltags_values(tag_name) - self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten +# File lib/rir/string.rb, line 147 + def strip_punctuation! + replace strip_with_pattern /[^a-zA-Z0-9\-\s]/ enddiff --git a/doc/classes/String.src/M000013.html b/doc/classes/String.src/M000013.html new file mode 100644 index 0000000..aa192ed --- /dev/null +++ b/doc/classes/String.src/M000013.html @@ -0,0 +1,15 @@ + + + +strip_punctuation (String) + + + + +# File lib/rir/string.rb, line 155 + def strip_punctuation + dup.strip_punctuation! + end+ + diff --git a/doc/classes/String.src/M000014.html b/doc/classes/String.src/M000014.html new file mode 100644 index 0000000..d6e6648 --- /dev/null +++ b/doc/classes/String.src/M000014.html @@ -0,0 +1,15 @@ + + + +extract_xmltags_values (String) + + + + +# File lib/rir/string.rb, line 163 + def extract_xmltags_values(tag_name) + self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten + end+ + diff --git a/doc/classes/TreeTagger.html b/doc/classes/TreeTagger.html new file mode 100644 index 0000000..7ca1358 --- /dev/null +++ b/doc/classes/TreeTagger.html @@ -0,0 +1,123 @@ + + + +Module: TreeTagger [RDoc Documentation] + + + + + + + + + +++ + ++
++ +Module +TreeTagger ++ + + +In: ++ + + + + lib/rir/ttagger.rb + + + + + +
+ ++ ++ + + ++ ++ + +++ ++TreeTagger-related stuff module. +
++See www.ims.uni-stuttgart.de/projekte/corplex/TreeTagger/DecisionTreeTagger.html +
+ ++ ++ +++ + + + + + + + + +Classes and Modules
+ + Class TreeTagger::Chunk
+Class TreeTagger::TaggerChunker
+Class TreeTagger::TaggerChunkerEnglish
+Class TreeTagger::TaggerChunkerFrench
+Class TreeTagger::TaggerChunkerGerman
+ ++ ++ + + diff --git a/doc/classes/TreeTagger/Chunk.html b/doc/classes/TreeTagger/Chunk.html new file mode 100644 index 0000000..4aec6b0 --- /dev/null +++ b/doc/classes/TreeTagger/Chunk.html @@ -0,0 +1,195 @@ + + + +Class: TreeTagger::Chunk [RDoc Documentation] + + + + + + + + + +++ + ++
++ +Class +TreeTagger::Chunk ++ + + +In: ++ + + + + lib/rir/ttagger.rb + + + + + +
+ ++ + +Parent: ++ + Object + + ++ ++ + + ++ ++ + +++ ++Represents a Chunk extracted when parsing a TaggerChunker file. +
+ +++ +Methods
+ ++ + new + +++ + + ++ +++ + + + +Attributes
+ ++++ +
++ + +tag + +[R] + ++ + + +words + +[R] + ++ + ++ + + + +Public Class methods
+ + ++ + ++ + + ++ + + + new(str,tag) + + + ++ ++ +++Creates a Chunk. +
++
+ +- str are whitespace-separated terms. + +
+- tag see : ftp.ims.uni-stuttgart.de/pub/corpora/chunker-tagset-english.txt + +
++ ++ + + diff --git a/doc/classes/TreeTagger/Chunk.src/M000017.html b/doc/classes/TreeTagger/Chunk.src/M000017.html new file mode 100644 index 0000000..739251a --- /dev/null +++ b/doc/classes/TreeTagger/Chunk.src/M000017.html @@ -0,0 +1,16 @@ + + + +new (TreeTagger::Chunk) + + + + +# File lib/rir/ttagger.rb, line 89 + def initialize str,tag + @words = str.split + @tag = tag[1..-2] + end+ + diff --git a/doc/classes/TreeTagger/TaggerChunker.html b/doc/classes/TreeTagger/TaggerChunker.html new file mode 100644 index 0000000..2e7f693 --- /dev/null +++ b/doc/classes/TreeTagger/TaggerChunker.html @@ -0,0 +1,216 @@ + + + +Class: TreeTagger::TaggerChunker [RDoc Documentation] + + + + + + + + + +++ + ++
++ +Class +TreeTagger::TaggerChunker ++ + + +In: ++ + + + + lib/rir/ttagger.rb + + + + + +
+ ++ + +Parent: ++ + Object + + ++ ++ + + ++ ++ + + + +++ ++This class handles generic parsing of tagger-chunker outputs. +
+ ++ + + ++ +++ + + + +Attributes
+ ++++ +
++ + +chunks + +[R] + ++ + + +file + +[R] + ++ + ++ + + + +Public Class methods
+ + ++ + ++ + ++ + + + new(chunk_file) + + + ++ ++ +++Initializes parsing. chunk_file is the output of +tagger-chunker- and must be a valid path to the file. +
++ TaggerChunker.new("ttout/2010020") #=> #<RIR::TreeTagger::TaggerChunker:0x92fd088 @chunks=[#<RIR::TreeTagger::Chunk:0x8ec5a10 @words=["robert", "schumann"], @tag="NC">, ...] ...> ++ ++ + ++ + + ++ + + + parse(chunk_lines) + + + ++ ++ +++Parses a tagger-chunker output and returns an Array of Chunk. +
+ ++ ++ + + diff --git a/doc/classes/TreeTagger/TaggerChunker.src/M000015.html b/doc/classes/TreeTagger/TaggerChunker.src/M000015.html new file mode 100644 index 0000000..a3a3546 --- /dev/null +++ b/doc/classes/TreeTagger/TaggerChunker.src/M000015.html @@ -0,0 +1,39 @@ + + + +parse (TreeTagger::TaggerChunker) + + + + +# File lib/rir/ttagger.rb, line 34 + def self.parse chunk_lines + open = false + tag = nil + + chunks = [] + words = [] + + chunk_lines.each do |l| + l.chomp! + if l =~ /^<\w+>$/ + open = true + tag = l + elsif l =~ /^<\/\w+>$/ + if !words.empty? && open && l == tag.sub(/</, '</') + open = false + chunks.push Chunk.new(words.join(" "), tag) + words.clear + else + next + end + else + words.push(l.split.first) + end + end + + chunks + end+ + diff --git a/doc/classes/TreeTagger/TaggerChunker.src/M000016.html b/doc/classes/TreeTagger/TaggerChunker.src/M000016.html new file mode 100644 index 0000000..6652c5c --- /dev/null +++ b/doc/classes/TreeTagger/TaggerChunker.src/M000016.html @@ -0,0 +1,15 @@ + + + +new (TreeTagger::TaggerChunker) + + + + +# File lib/rir/ttagger.rb, line 66 + def initialize chunk_file + @chunks = TaggerChunker.parse File.open(chunk_file).readlines + end+ + diff --git a/doc/classes/TreeTagger/TaggerChunkerEnglish.html b/doc/classes/TreeTagger/TaggerChunkerEnglish.html new file mode 100644 index 0000000..45936ae --- /dev/null +++ b/doc/classes/TreeTagger/TaggerChunkerEnglish.html @@ -0,0 +1,114 @@ + + + +Class: TreeTagger::TaggerChunkerEnglish [RDoc Documentation] + + + + + + + + + +++ + ++
++ +Class +TreeTagger::TaggerChunkerEnglish ++ + + +In: ++ + + + + lib/rir/ttagger.rb + + + + + +
+ ++ + +Parent: ++ + + + TreeTagger::TaggerChunker + + + + ++ ++ + + ++ ++ + ++ + + + + + + + + ++ ++ ++ + + diff --git a/doc/classes/TreeTagger/TaggerChunkerFrench.html b/doc/classes/TreeTagger/TaggerChunkerFrench.html new file mode 100644 index 0000000..8309193 --- /dev/null +++ b/doc/classes/TreeTagger/TaggerChunkerFrench.html @@ -0,0 +1,114 @@ + + + +Class: TreeTagger::TaggerChunkerFrench [RDoc Documentation] + + + + + + + + + +++ + ++
++ +Class +TreeTagger::TaggerChunkerFrench ++ + + +In: ++ + + + + lib/rir/ttagger.rb + + + + + +
+ ++ + +Parent: ++ + + + TreeTagger::TaggerChunker + + + + ++ ++ + + ++ ++ + ++ + + + + + + + + ++ ++ ++ + + diff --git a/doc/classes/TreeTagger/TaggerChunkerGerman.html b/doc/classes/TreeTagger/TaggerChunkerGerman.html new file mode 100644 index 0000000..e9a998a --- /dev/null +++ b/doc/classes/TreeTagger/TaggerChunkerGerman.html @@ -0,0 +1,114 @@ + + + +Class: TreeTagger::TaggerChunkerGerman [RDoc Documentation] + + + + + + + + + +++ + ++
++ +Class +TreeTagger::TaggerChunkerGerman ++ + + +In: ++ + + + + lib/rir/ttagger.rb + + + + + +
+ ++ + +Parent: ++ + + + TreeTagger::TaggerChunker + + + + ++ ++ + + ++ ++ + ++ + + + + + + + + ++ ++ ++ + + diff --git a/doc/created.rid b/doc/created.rid index 6e8bb1d..896d22c 100644 --- a/doc/created.rid +++ b/doc/created.rid @@ -1 +1 @@ -Thu, 25 Nov 2010 17:10:04 +0100 +Thu, 25 Nov 2010 17:21:51 +0100 diff --git a/doc/files/lib/rir/corpus_rb.html b/doc/files/lib/rir/corpus_rb.html index 95ef563..a211e38 100644 --- a/doc/files/lib/rir/corpus_rb.html +++ b/doc/files/lib/rir/corpus_rb.html @@ -53,7 +53,7 @@Last Update: -2010-11-23 18:20:24 +0100 +2010-11-25 17:20:52 +0100
-This file is a part of an Information Retrieval oriented Ruby library -
-
-Copyright (C) 2010-2011 Romain Deveaud
-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -
- --This file is a part of an Information Retrieval oriented Ruby library -
-
-Copyright (C) 2010-2011 Romain Deveaud
-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -
- --This file is a part of an Information Retrieval oriented Ruby library -
-
-Copyright (C) 2010-2011 Romain Deveaud
-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -
- --This file is a part of an Information Retrieval oriented Ruby library -
-
-Copyright (C) 2010-2011 Romain Deveaud
-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -
- --This file is a part of an Information Retrieval oriented Ruby library -
-
-Copyright (C) 2010-2011 Romain Deveaud
-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. +
+General module for many purposes related to Information Retrieval.
-This file is a part of an Information Retrieval oriented Ruby library -
-
-Copyright (C) 2010-2011 Romain Deveaud
-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -
--This file is a part of an Information Retrieval oriented Ruby library -
-
-Copyright (C) 2010-2011 Romain Deveaud
-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -
--This file is a part of an Information Retrieval oriented Ruby library -
-
-Copyright (C) 2010-2011 Romain Deveaud
-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -
--This file is a part of an Information Retrieval oriented Ruby library -
-
-Copyright (C) 2010-2011 Romain Deveaud
-This program is free software: you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the Free -Software Foundation, either version 3 of the License, or (at your option) -any later version. -
--This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -more details. -
--You should have received a copy of the GNU General Public License along -with this program. If not, see <www.gnu.org/licenses/>. -
--General module for many purposes related to Information Retrieval. -
- -