diff --git a/doc/classes/RIR.html b/doc/classes/RIR.html index 77f7788..e909f57 100644 --- a/doc/classes/RIR.html +++ b/doc/classes/RIR.html @@ -53,9 +53,9 @@
-General module for many purposes related to Information Retrieval. +This file is a part of an Information Retrieval oriented Ruby library +
+
+Copyright (C) 2010-2011 Romain Deveaud
+This program is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. +
++This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. +
++You should have received a copy of the GNU General Public License along +with this program. If not, see <www.gnu.org/licenses/>.
-General module for many purposes related to Information Retrieval. +This file is a part of an Information Retrieval oriented Ruby library +
+
+Copyright (C) 2010-2011 Romain Deveaud
+This program is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. +
++This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. +
++You should have received a copy of the GNU General Public License along +with this program. If not, see <www.gnu.org/licenses/>. +
++This file is a part of an Information Retrieval oriented Ruby library +
+
+Copyright (C) 2010-2011 Romain Deveaud
+This program is free software: you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation, either version 3 of the License, or (at your option) +any later version. +
++This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. +
++You should have received a copy of the GNU General Public License along +with this program. If not, see <www.gnu.org/licenses/>.
+Removes punctuation from self. +
++ s = "hello, world. how are you?!" + s.strip_punctuation # => "hello world how are you" ++ +
+Removes punctuation from self. +
++ s = "hello, world. how are you?!" + s.strip_punctuation! + s # => "hello world how are you" ++ +
# File lib/rir/string.rb, line 77 +# File lib/rir/string.rb, line 76 def is_stopword? Stoplist.include?(self.downcase) enddiff --git a/doc/classes/String.src/M000002.html b/doc/classes/String.src/M000002.html index 1d3aa25..c21c139 100644 --- a/doc/classes/String.src/M000002.html +++ b/doc/classes/String.src/M000002.html @@ -7,7 +7,7 @@ -# File lib/rir/string.rb, line 83 +# File lib/rir/string.rb, line 82 def remove_special_characters self.split.collect { |w| w.gsub(/\W/,' ').split.collect { |w| w.gsub(/\W/,' ').strip.sub(/\A.\z/, '')}.join(' ').strip.sub(/\A.\z/, '')}.join(' ') enddiff --git a/doc/classes/String.src/M000003.html b/doc/classes/String.src/M000003.html index 21c6728..01c1839 100644 --- a/doc/classes/String.src/M000003.html +++ b/doc/classes/String.src/M000003.html @@ -7,7 +7,7 @@ -# File lib/rir/string.rb, line 92 +# File lib/rir/string.rb, line 91 def strip_xml_tags! replace strip_with_pattern /<\/?[^>]*>/ enddiff --git a/doc/classes/String.src/M000004.html b/doc/classes/String.src/M000004.html index a913161..2d020b7 100644 --- a/doc/classes/String.src/M000004.html +++ b/doc/classes/String.src/M000004.html @@ -7,7 +7,7 @@ -# File lib/rir/string.rb, line 101 +# File lib/rir/string.rb, line 100 def strip_xml_tags dup.strip_xml_tags! enddiff --git a/doc/classes/String.src/M000005.html b/doc/classes/String.src/M000005.html index 188323f..1f77395 100644 --- a/doc/classes/String.src/M000005.html +++ b/doc/classes/String.src/M000005.html @@ -7,7 +7,7 @@ -# File lib/rir/string.rb, line 115 +# File lib/rir/string.rb, line 114 def strip_javascripts! replace strip_with_pattern /<script type="text\/javascript">(.+?)<\/script>/m enddiff --git a/doc/classes/String.src/M000006.html b/doc/classes/String.src/M000006.html index ad91df4..8a73177 100644 --- a/doc/classes/String.src/M000006.html +++ b/doc/classes/String.src/M000006.html @@ -7,7 +7,7 @@ -# File lib/rir/string.rb, line 128 +# File lib/rir/string.rb, line 127 def strip_javascripts dup.strip_javascripts! enddiff --git a/doc/classes/String.src/M000007.html b/doc/classes/String.src/M000007.html index 448264e..49c5a94 100644 --- a/doc/classes/String.src/M000007.html +++ b/doc/classes/String.src/M000007.html @@ -7,7 +7,7 @@ -# File lib/rir/string.rb, line 132 +# File lib/rir/string.rb, line 131 def strip_stylesheets! # TODO: rewamp. dunno what is it. replace strip_with_pattern /<style type="text\/css">(.+?)<\/style>/m diff --git a/doc/classes/String.src/M000008.html b/doc/classes/String.src/M000008.html index 8a44d27..a10b5bd 100644 --- a/doc/classes/String.src/M000008.html +++ b/doc/classes/String.src/M000008.html @@ -7,7 +7,7 @@ -# File lib/rir/string.rb, line 137 +# File lib/rir/string.rb, line 136 def strip_stylesheets dup.strip_stylesheets! enddiff --git a/doc/classes/String.src/M000009.html b/doc/classes/String.src/M000009.html index 2203bd0..37f6f1f 100644 --- a/doc/classes/String.src/M000009.html +++ b/doc/classes/String.src/M000009.html @@ -2,14 +2,14 @@ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -extract_xmltags_values (String) +strip_punctuation! (String) # File lib/rir/string.rb, line 145 - def extract_xmltags_values(tag_name) - self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten + def strip_punctuation! + replace strip_with_pattern /[^a-zA-Z0-9\-\s]/ enddiff --git a/doc/created.rid b/doc/created.rid index 0b10800..5d2582c 100644 --- a/doc/created.rid +++ b/doc/created.rid @@ -1 +1 @@ -Fri, 05 Nov 2010 15:06:41 +0100 +Tue, 23 Nov 2010 18:20:46 +0100 diff --git a/doc/files/lib/rir/document_rb.html b/doc/files/lib/rir/document_rb.html index 5dc4860..767c904 100644 --- a/doc/files/lib/rir/document_rb.html +++ b/doc/files/lib/rir/document_rb.html @@ -53,7 +53,7 @@Last Update: -2010-11-05 15:06:24 +0100 +2010-11-23 18:14:13 +0100