From 35f45ab54d40489d2fe5d4fc5d39c40290635dea Mon Sep 17 00:00:00 2001 From: Romain Deveaud Date: Fri, 5 Nov 2010 15:09:15 +0100 Subject: [PATCH] changing the main module name, generating RDoc --- doc/classes/RIR.html | 153 +++++++++++++ doc/classes/RIR/Document.html | 312 +++++++++++++++++++++++++++ doc/classes/RIR/Document.src/M000010.html | 23 ++ doc/classes/RIR/Document.src/M000011.html | 26 +++ doc/classes/RIR/Document.src/M000012.html | 18 ++ doc/classes/RIR/Document.src/M000013.html | 24 +++ doc/classes/RIR/Document.src/M000014.html | 16 ++ doc/classes/RIR/WebDocument.html | 209 ++++++++++++++++++ doc/classes/RIR/WebDocument.src/M000015.html | 16 ++ doc/classes/RIR/WebDocument.src/M000016.html | 16 ++ doc/classes/RIR/WikipediaPage.html | 122 +++++++++++ doc/classes/Rir.html | 153 ------------- doc/classes/Rir/Document.html | 312 --------------------------- doc/classes/Rir/Document.src/M000010.html | 23 -- doc/classes/Rir/Document.src/M000011.html | 26 --- doc/classes/Rir/Document.src/M000012.html | 18 -- doc/classes/Rir/Document.src/M000013.html | 24 --- doc/classes/Rir/Document.src/M000014.html | 16 -- doc/classes/Rir/WebDocument.html | 209 ------------------ doc/classes/Rir/WebDocument.src/M000015.html | 16 -- doc/classes/Rir/WebDocument.src/M000016.html | 16 -- doc/classes/Rir/WikipediaPage.html | 122 ----------- 22 files changed, 935 insertions(+), 935 deletions(-) create mode 100644 doc/classes/RIR.html create mode 100644 doc/classes/RIR/Document.html create mode 100644 doc/classes/RIR/Document.src/M000010.html create mode 100644 doc/classes/RIR/Document.src/M000011.html create mode 100644 doc/classes/RIR/Document.src/M000012.html create mode 100644 doc/classes/RIR/Document.src/M000013.html create mode 100644 doc/classes/RIR/Document.src/M000014.html create mode 100644 doc/classes/RIR/WebDocument.html create mode 100644 doc/classes/RIR/WebDocument.src/M000015.html create mode 100644 doc/classes/RIR/WebDocument.src/M000016.html create mode 100644 doc/classes/RIR/WikipediaPage.html delete mode 100644 doc/classes/Rir.html delete mode 100644 doc/classes/Rir/Document.html delete mode 100644 doc/classes/Rir/Document.src/M000010.html delete mode 100644 doc/classes/Rir/Document.src/M000011.html delete mode 100644 doc/classes/Rir/Document.src/M000012.html delete mode 100644 doc/classes/Rir/Document.src/M000013.html delete mode 100644 doc/classes/Rir/Document.src/M000014.html delete mode 100644 doc/classes/Rir/WebDocument.html delete mode 100644 doc/classes/Rir/WebDocument.src/M000015.html delete mode 100644 doc/classes/Rir/WebDocument.src/M000016.html delete mode 100644 doc/classes/Rir/WikipediaPage.html diff --git a/doc/classes/RIR.html b/doc/classes/RIR.html new file mode 100644 index 0000000..77f7788 --- /dev/null +++ b/doc/classes/RIR.html @@ -0,0 +1,153 @@ + + + + Module: RIR [RDoc Documentation] + + + + + + + + + +
+ + + + + + + + + + + +
ModuleRIR
In: + + + + + lib/rir/string.rb + + + + +
+ + + + + lib/rir/document.rb + + + + +
+ +
+
+ + +
+ +
+ +
+

+General module for many purposes related to Information Retrieval. +

+

+General module for many purposes related to Information Retrieval. +

+ +
+ +
+ + +
+ + + +
+ +
+

Classes and Modules

+ + Class RIR::Document
+Class RIR::WebDocument
+Class RIR::WikipediaPage
+ +
+ +
+

Constants

+ +
+ + + + + + + + + + + + +
Stoplist=[ "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", "yours", "yourself", "yourselves" ]  +These are the default stopwords provided by Lemur. + +
+
+
+ + + + + + + + + +
+ +
+

[Validate]

+
+ + + diff --git a/doc/classes/RIR/Document.html b/doc/classes/RIR/Document.html new file mode 100644 index 0000000..e6eb41c --- /dev/null +++ b/doc/classes/RIR/Document.html @@ -0,0 +1,312 @@ + + + + Class: RIR::Document [RDoc Documentation] + + + + + + + + + +
+ + + + + + + + + + + + + + + + +
ClassRIR::Document
In: + + + + + lib/rir/document.rb + + + + +
+ +
Parent: + + Object + +
+
+ + +
+ +
+ +
+

+A Document is a bag of words and is constructed +from a string. +

+ +
+ +
+ + +
+

Methods

+ +
+ + count_words   + + entropy   + + format_words   + + new   + + ngrams   + +
+
+ +
+ + + +
+ + + +
+

Attributes

+ +
+ + + + + + + + + + + + + + + + + + +
doc_content [R] 
words [R] 
+
+
+ + + + +
+ +

Public Class methods

+ + +
+ + + + +
+ +
+
+ + +

Public Instance methods

+ + +
+ + + + +
+ +

+Returns a Hash containing the words and their associated counts in the +current Document. +

+
+  count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... }
+
+ +
+
+ + +
+ + + + +
+ +

+Computes the entropy of a given string s inside the document. +

+

+If the string parameter is composed of many words (i.e. tokens separated by +whitespace(s)), it is considered as an ngram. +

+
+  entropy("guitar") #=> 0.00389919463243839
+
+ +
+
+ + +
+ + + + +
+ +

+Returns an Array containing the n-grams (words) from the current +Document. +

+
+  ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...]
+
+ +
+
+ + +

Protected Instance methods

+ + +
+ + + + +
+ +

+Any non-word characters are removed from the words (see perldoc.perl.org/perlre.html +and the W special escape). +

+

+Protected function, only meant to by called at the initialization. +

+ +
+
+ + + +
+ + + + +
+ +
+

[Validate]

+
+ + + diff --git a/doc/classes/RIR/Document.src/M000010.html b/doc/classes/RIR/Document.src/M000010.html new file mode 100644 index 0000000..5056008 --- /dev/null +++ b/doc/classes/RIR/Document.src/M000010.html @@ -0,0 +1,23 @@ + + + + format_words (RIR::Document) + + + + +
# File lib/rir/document.rb, line 31
+    def format_words
+      wo = []
+
+      @doc_content.split.each do |w|
+        w.split(/\W/).each do |sw| 
+          wo.push(sw) if sw =~ /[a-zA-Z]/ 
+        end
+      end
+      
+      wo
+    end
+ + diff --git a/doc/classes/RIR/Document.src/M000011.html b/doc/classes/RIR/Document.src/M000011.html new file mode 100644 index 0000000..6257629 --- /dev/null +++ b/doc/classes/RIR/Document.src/M000011.html @@ -0,0 +1,26 @@ + + + + ngrams (RIR::Document) + + + + +
# File lib/rir/document.rb, line 46
+    def ngrams(n)
+      window       = []
+      ngrams_array = []
+
+      @words.each do |w|
+        window.push(w)
+        if window.size == n
+          ngrams_array.push window.join(" ")
+          window.delete_at(0)
+        end
+      end
+
+      ngrams_array.uniq
+    end
+ + diff --git a/doc/classes/RIR/Document.src/M000012.html b/doc/classes/RIR/Document.src/M000012.html new file mode 100644 index 0000000..978234a --- /dev/null +++ b/doc/classes/RIR/Document.src/M000012.html @@ -0,0 +1,18 @@ + + + + count_words (RIR::Document) + + + + +
# File lib/rir/document.rb, line 64
+    def count_words
+      counts = Hash.new { |h,k| h[k] = 0 }
+      @words.each { |w| counts[w.downcase] += 1 }
+
+      counts
+    end
+ + diff --git a/doc/classes/RIR/Document.src/M000013.html b/doc/classes/RIR/Document.src/M000013.html new file mode 100644 index 0000000..5694971 --- /dev/null +++ b/doc/classes/RIR/Document.src/M000013.html @@ -0,0 +1,24 @@ + + + + entropy (RIR::Document) + + + + +
# File lib/rir/document.rb, line 77
+    def entropy(s)
+      en = 0.0
+      counts = self.count_words
+
+      s.split.each do |w|
+        p_wi = counts[w].to_f/@words.count.to_f
+        en += p_wi*Math.log2(p_wi)
+      end
+
+      en *= -1
+      en
+    end
+ + diff --git a/doc/classes/RIR/Document.src/M000014.html b/doc/classes/RIR/Document.src/M000014.html new file mode 100644 index 0000000..b882fcd --- /dev/null +++ b/doc/classes/RIR/Document.src/M000014.html @@ -0,0 +1,16 @@ + + + + new (RIR::Document) + + + + +
# File lib/rir/document.rb, line 92
+    def initialize(content)
+      @doc_content = content
+      @words = format_words
+    end
+ + diff --git a/doc/classes/RIR/WebDocument.html b/doc/classes/RIR/WebDocument.html new file mode 100644 index 0000000..4ca5a23 --- /dev/null +++ b/doc/classes/RIR/WebDocument.html @@ -0,0 +1,209 @@ + + + + Class: RIR::WebDocument [RDoc Documentation] + + + + + + + + + +
+ + + + + + + + + + + + + + + + +
ClassRIR::WebDocument
In: + + + + + lib/rir/document.rb + + + + +
+ +
Parent: + + + + RIR::Document + + + +
+
+ + +
+ +
+ +
+

+A WebDocument is a Document with a url. +

+ +
+ +
+ + +
+

Methods

+ +
+ + get_content   + + new   + +
+
+ +
+ + + +
+ + + +
+

Attributes

+ +
+ + + + + + + + + + +
url [R] 
+
+
+ + + + +
+ +

Public Class methods

+ + +
+ + + + +
+ +

+Returns the HTML text from the page of a given url. +

+ +
+
+ + +
+ + + + +
+ +

+WebDocument constructor, the content of the +Document is the HTML page without the tags. +

+ +
+
+ + + +
+ + + + +
+ +
+

[Validate]

+
+ + + diff --git a/doc/classes/RIR/WebDocument.src/M000015.html b/doc/classes/RIR/WebDocument.src/M000015.html new file mode 100644 index 0000000..54776b5 --- /dev/null +++ b/doc/classes/RIR/WebDocument.src/M000015.html @@ -0,0 +1,16 @@ + + + + get_content (RIR::WebDocument) + + + + +
# File lib/rir/document.rb, line 105
+    def self.get_content(url)
+      require 'net/http'
+      Net::HTTP.get(URI.parse(url))
+    end
+ + diff --git a/doc/classes/RIR/WebDocument.src/M000016.html b/doc/classes/RIR/WebDocument.src/M000016.html new file mode 100644 index 0000000..d7a5169 --- /dev/null +++ b/doc/classes/RIR/WebDocument.src/M000016.html @@ -0,0 +1,16 @@ + + + + new (RIR::WebDocument) + + + + +
# File lib/rir/document.rb, line 112
+    def initialize(url)
+      @url = url
+      super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags
+    end
+ + diff --git a/doc/classes/RIR/WikipediaPage.html b/doc/classes/RIR/WikipediaPage.html new file mode 100644 index 0000000..65f343e --- /dev/null +++ b/doc/classes/RIR/WikipediaPage.html @@ -0,0 +1,122 @@ + + + + Class: RIR::WikipediaPage [RDoc Documentation] + + + + + + + + + +
+ + + + + + + + + + + + + + + + +
ClassRIR::WikipediaPage
In: + + + + + lib/rir/document.rb + + + + +
+ +
Parent: + + + + RIR::WebDocument + + + +
+
+ + +
+ +
+ +
+

+A WikipediaPage is a WebDocument. +

+ +
+ +
+ + +
+ + + +
+ + + + + + + + + +
+ +
+

[Validate]

+
+ + + diff --git a/doc/classes/Rir.html b/doc/classes/Rir.html deleted file mode 100644 index f3b2275..0000000 --- a/doc/classes/Rir.html +++ /dev/null @@ -1,153 +0,0 @@ - - - - Module: Rir [RDoc Documentation] - - - - - - - - - -
- - - - - - - - - - - -
ModuleRir
In: - - - - - lib/rir/string.rb - - - - -
- - - - - lib/rir/document.rb - - - - -
- -
-
- - -
- -
- -
-

-General module for many purposes related to Information Retrieval. -

-

-General module for many purposes related to Information Retrieval. -

- -
- -
- - -
- - - -
- -
-

Classes and Modules

- - Class Rir::Document
-Class Rir::WebDocument
-Class Rir::WikipediaPage
- -
- -
-

Constants

- -
- - - - - - - - - - - - -
Stoplist=[ "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", "yours", "yourself", "yourselves" ]  -These are the default stopwords provided by Lemur. - -
-
-
- - - - - - - - - -
- -
-

[Validate]

-
- - - diff --git a/doc/classes/Rir/Document.html b/doc/classes/Rir/Document.html deleted file mode 100644 index 9f6f91d..0000000 --- a/doc/classes/Rir/Document.html +++ /dev/null @@ -1,312 +0,0 @@ - - - - Class: Rir::Document [RDoc Documentation] - - - - - - - - - -
- - - - - - - - - - - - - - - - -
ClassRir::Document
In: - - - - - lib/rir/document.rb - - - - -
- -
Parent: - - Object - -
-
- - -
- -
- -
-

-A Document is a bag of words and is constructed -from a string. -

- -
- -
- - -
-

Methods

- -
- - count_words   - - entropy   - - format_words   - - new   - - ngrams   - -
-
- -
- - - -
- - - -
-

Attributes

- -
- - - - - - - - - - - - - - - - - - -
doc_content [R] 
words [R] 
-
-
- - - - -
- -

Public Class methods

- - -
- - - - -
- -
-
- - -

Public Instance methods

- - -
- - - - -
- -

-Returns a Hash containing the words and their associated counts in the -current Document. -

-
-  count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... }
-
- -
-
- - -
- - - - -
- -

-Computes the entropy of a given string s inside the document. -

-

-If the string parameter is composed of many words (i.e. tokens separated by -whitespace(s)), it is considered as an ngram. -

-
-  entropy("guitar") #=> 0.00389919463243839
-
- -
-
- - -
- - - - -
- -

-Returns an Array containing the n-grams (words) from the current -Document. -

-
-  ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...]
-
- -
-
- - -

Protected Instance methods

- - -
- - - - -
- -

-Any non-word characters are removed from the words (see perldoc.perl.org/perlre.html -and the W special escape). -

-

-Protected function, only meant to by called at the initialization. -

- -
-
- - - -
- - - - -
- -
-

[Validate]

-
- - - diff --git a/doc/classes/Rir/Document.src/M000010.html b/doc/classes/Rir/Document.src/M000010.html deleted file mode 100644 index a030e0e..0000000 --- a/doc/classes/Rir/Document.src/M000010.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - format_words (Rir::Document) - - - - -
# File lib/rir/document.rb, line 31
-    def format_words
-      wo = []
-
-      @doc_content.split.each do |w|
-        w.split(/\W/).each do |sw| 
-          wo.push(sw) if sw =~ /[a-zA-Z]/ 
-        end
-      end
-      
-      wo
-    end
- - diff --git a/doc/classes/Rir/Document.src/M000011.html b/doc/classes/Rir/Document.src/M000011.html deleted file mode 100644 index e12fd03..0000000 --- a/doc/classes/Rir/Document.src/M000011.html +++ /dev/null @@ -1,26 +0,0 @@ - - - - ngrams (Rir::Document) - - - - -
# File lib/rir/document.rb, line 46
-    def ngrams(n)
-      window       = []
-      ngrams_array = []
-
-      @words.each do |w|
-        window.push(w)
-        if window.size == n
-          ngrams_array.push window.join(" ")
-          window.delete_at(0)
-        end
-      end
-
-      ngrams_array.uniq
-    end
- - diff --git a/doc/classes/Rir/Document.src/M000012.html b/doc/classes/Rir/Document.src/M000012.html deleted file mode 100644 index 9f23ad9..0000000 --- a/doc/classes/Rir/Document.src/M000012.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - count_words (Rir::Document) - - - - -
# File lib/rir/document.rb, line 64
-    def count_words
-      counts = Hash.new { |h,k| h[k] = 0 }
-      @words.each { |w| counts[w.downcase] += 1 }
-
-      counts
-    end
- - diff --git a/doc/classes/Rir/Document.src/M000013.html b/doc/classes/Rir/Document.src/M000013.html deleted file mode 100644 index 986eab3..0000000 --- a/doc/classes/Rir/Document.src/M000013.html +++ /dev/null @@ -1,24 +0,0 @@ - - - - entropy (Rir::Document) - - - - -
# File lib/rir/document.rb, line 77
-    def entropy(s)
-      en = 0.0
-      counts = self.count_words
-
-      s.split.each do |w|
-        p_wi = counts[w].to_f/@words.count.to_f
-        en += p_wi*Math.log2(p_wi)
-      end
-
-      en *= -1
-      en
-    end
- - diff --git a/doc/classes/Rir/Document.src/M000014.html b/doc/classes/Rir/Document.src/M000014.html deleted file mode 100644 index 8c644b0..0000000 --- a/doc/classes/Rir/Document.src/M000014.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - new (Rir::Document) - - - - -
# File lib/rir/document.rb, line 92
-    def initialize(content)
-      @doc_content = content
-      @words = format_words
-    end
- - diff --git a/doc/classes/Rir/WebDocument.html b/doc/classes/Rir/WebDocument.html deleted file mode 100644 index 35e437f..0000000 --- a/doc/classes/Rir/WebDocument.html +++ /dev/null @@ -1,209 +0,0 @@ - - - - Class: Rir::WebDocument [RDoc Documentation] - - - - - - - - - -
- - - - - - - - - - - - - - - - -
ClassRir::WebDocument
In: - - - - - lib/rir/document.rb - - - - -
- -
Parent: - - - - Rir::Document - - - -
-
- - -
- -
- -
-

-A WebDocument is a Document with a url. -

- -
- -
- - -
-

Methods

- -
- - get_content   - - new   - -
-
- -
- - - -
- - - -
-

Attributes

- -
- - - - - - - - - - -
url [R] 
-
-
- - - - -
- -

Public Class methods

- - -
- - - - -
- -

-Returns the HTML text from the page of a given url. -

- -
-
- - -
- - - - -
- -

-WebDocument constructor, the content of the -Document is the HTML page without the tags. -

- -
-
- - - -
- - - - -
- -
-

[Validate]

-
- - - diff --git a/doc/classes/Rir/WebDocument.src/M000015.html b/doc/classes/Rir/WebDocument.src/M000015.html deleted file mode 100644 index 31f8332..0000000 --- a/doc/classes/Rir/WebDocument.src/M000015.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - get_content (Rir::WebDocument) - - - - -
# File lib/rir/document.rb, line 105
-    def self.get_content(url)
-      require 'net/http'
-      Net::HTTP.get(URI.parse(url))
-    end
- - diff --git a/doc/classes/Rir/WebDocument.src/M000016.html b/doc/classes/Rir/WebDocument.src/M000016.html deleted file mode 100644 index 1186ae7..0000000 --- a/doc/classes/Rir/WebDocument.src/M000016.html +++ /dev/null @@ -1,16 +0,0 @@ - - - - new (Rir::WebDocument) - - - - -
# File lib/rir/document.rb, line 112
-    def initialize(url)
-      @url = url
-      super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags
-    end
- - diff --git a/doc/classes/Rir/WikipediaPage.html b/doc/classes/Rir/WikipediaPage.html deleted file mode 100644 index 0824f45..0000000 --- a/doc/classes/Rir/WikipediaPage.html +++ /dev/null @@ -1,122 +0,0 @@ - - - - Class: Rir::WikipediaPage [RDoc Documentation] - - - - - - - - - -
- - - - - - - - - - - - - - - - -
ClassRir::WikipediaPage
In: - - - - - lib/rir/document.rb - - - - -
- -
Parent: - - - - Rir::WebDocument - - - -
-
- - -
- -
- -
-

-A WikipediaPage is a WebDocument. -

- -
- -
- - -
- - - -
- - - - - - - - - -
- -
-

[Validate]

-
- - - -- 1.8.2.3