Commit fd4cb285a4975c9a0b6624e93a42eb9fa812fee4
1 parent
b843bae6b0
Exists in
master
doc changes + document and string improvements
Showing 14 changed files with 63 additions and 26 deletions Inline Diff
- doc/classes/String.html
- doc/created.rid
- doc/files/README_markdown.html
- doc/files/lib/rir/document_rb.html
- doc/files/lib/rir/string_rb.html
- doc/files/main_rb.html
- doc/fr_class_index.html
- doc/fr_file_index.html
- doc/fr_method_index.html
- doc/index.html
- lib/rir.rb
- lib/rir/document.rb
- lib/rir/string.rb
- main.rb
doc/classes/String.html
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
| 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> |
| 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
| 4 | <head> | 4 | <head> |
| 5 | <title>Class: String [RDoc Documentation]</title> | 5 | <title>Class: String [RDoc Documentation]</title> |
| 6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | 6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| 7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | 7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> |
| 8 | <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | 8 | <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> |
| 9 | <script type="text/javascript"> | 9 | <script type="text/javascript"> |
| 10 | // <![CDATA[ | 10 | // <![CDATA[ |
| 11 | 11 | ||
| 12 | function popupCode( url ) { | 12 | function popupCode( url ) { |
| 13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | 13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") |
| 14 | } | 14 | } |
| 15 | 15 | ||
| 16 | function toggleCode( id ) { | 16 | function toggleCode( id ) { |
| 17 | if ( document.getElementById ) | 17 | if ( document.getElementById ) |
| 18 | elem = document.getElementById( id ); | 18 | elem = document.getElementById( id ); |
| 19 | else if ( document.all ) | 19 | else if ( document.all ) |
| 20 | elem = eval( "document.all." + id ); | 20 | elem = eval( "document.all." + id ); |
| 21 | else | 21 | else |
| 22 | return false; | 22 | return false; |
| 23 | 23 | ||
| 24 | elemStyle = elem.style; | 24 | elemStyle = elem.style; |
| 25 | 25 | ||
| 26 | if ( elemStyle.display != "block" ) { | 26 | if ( elemStyle.display != "block" ) { |
| 27 | elemStyle.display = "block" | 27 | elemStyle.display = "block" |
| 28 | } else { | 28 | } else { |
| 29 | elemStyle.display = "none" | 29 | elemStyle.display = "none" |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | return true; | 32 | return true; |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | // Make codeblocks hidden by default | 35 | // Make codeblocks hidden by default |
| 36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | 36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) |
| 37 | 37 | ||
| 38 | // ]]> | 38 | // ]]> |
| 39 | </script> | 39 | </script> |
| 40 | 40 | ||
| 41 | </head> | 41 | </head> |
| 42 | <body> | 42 | <body> |
| 43 | 43 | ||
| 44 | 44 | ||
| 45 | <div id="classHeader"> | 45 | <div id="classHeader"> |
| 46 | <table class="header-table"> | 46 | <table class="header-table"> |
| 47 | <tr class="top-aligned-row"> | 47 | <tr class="top-aligned-row"> |
| 48 | <td><strong>Class</strong></td> | 48 | <td><strong>Class</strong></td> |
| 49 | <td class="class-name-in-header">String</td> | 49 | <td class="class-name-in-header">String</td> |
| 50 | </tr> | 50 | </tr> |
| 51 | <tr class="top-aligned-row"> | 51 | <tr class="top-aligned-row"> |
| 52 | <td><strong>In:</strong></td> | 52 | <td><strong>In:</strong></td> |
| 53 | <td> | 53 | <td> |
| 54 | 54 | ||
| 55 | 55 | ||
| 56 | <a href="../files/lib/rir/string_rb.html"> | 56 | <a href="../files/lib/rir/string_rb.html"> |
| 57 | 57 | ||
| 58 | lib/rir/string.rb | 58 | lib/rir/string.rb |
| 59 | 59 | ||
| 60 | </a> | 60 | </a> |
| 61 | 61 | ||
| 62 | 62 | ||
| 63 | <br /> | 63 | <br /> |
| 64 | 64 | ||
| 65 | </td> | 65 | </td> |
| 66 | </tr> | 66 | </tr> |
| 67 | 67 | ||
| 68 | 68 | ||
| 69 | <tr class="top-aligned-row"> | 69 | <tr class="top-aligned-row"> |
| 70 | <td><strong>Parent:</strong></td> | 70 | <td><strong>Parent:</strong></td> |
| 71 | <td> | 71 | <td> |
| 72 | 72 | ||
| 73 | Object | 73 | Object |
| 74 | 74 | ||
| 75 | </td> | 75 | </td> |
| 76 | </tr> | 76 | </tr> |
| 77 | 77 | ||
| 78 | </table> | 78 | </table> |
| 79 | </div> | 79 | </div> |
| 80 | <!-- banner header --> | 80 | <!-- banner header --> |
| 81 | 81 | ||
| 82 | <div id="bodyContent"> | 82 | <div id="bodyContent"> |
| 83 | 83 | ||
| 84 | <div id="contextContent"> | 84 | <div id="contextContent"> |
| 85 | 85 | ||
| 86 | <div id="description"> | 86 | <div id="description"> |
| 87 | <p> | 87 | <p> |
| 88 | Extention of the standard class <a href="String.html">String</a> with | 88 | Extention of the standard class <a href="String.html">String</a> with |
| 89 | useful function. | 89 | useful function. |
| 90 | </p> | 90 | </p> |
| 91 | 91 | ||
| 92 | </div> | 92 | </div> |
| 93 | 93 | ||
| 94 | </div> | 94 | </div> |
| 95 | 95 | ||
| 96 | 96 | ||
| 97 | <div id="method-list"> | 97 | <div id="method-list"> |
| 98 | <h3 class="section-bar">Methods</h3> | 98 | <h3 class="section-bar">Methods</h3> |
| 99 | 99 | ||
| 100 | <div class="name-list"> | 100 | <div class="name-list"> |
| 101 | 101 | ||
| 102 | <a href="#M000009">extract_xmltags_values</a> | 102 | <a href="#M000009">extract_xmltags_values</a> |
| 103 | 103 | ||
| 104 | <a href="#M000001">is_stopword?</a> | 104 | <a href="#M000001">is_stopword?</a> |
| 105 | 105 | ||
| 106 | <a href="#M000002">remove_special_characters</a> | 106 | <a href="#M000002">remove_special_characters</a> |
| 107 | 107 | ||
| 108 | <a href="#M000006">strip_javascripts</a> | 108 | <a href="#M000006">strip_javascripts</a> |
| 109 | 109 | ||
| 110 | <a href="#M000005">strip_javascripts!</a> | 110 | <a href="#M000005">strip_javascripts!</a> |
| 111 | 111 | ||
| 112 | <a href="#M000008">strip_stylesheets</a> | 112 | <a href="#M000008">strip_stylesheets</a> |
| 113 | 113 | ||
| 114 | <a href="#M000007">strip_stylesheets!</a> | 114 | <a href="#M000007">strip_stylesheets!</a> |
| 115 | 115 | ||
| 116 | <a href="#M000004">strip_xml_tags</a> | 116 | <a href="#M000004">strip_xml_tags</a> |
| 117 | 117 | ||
| 118 | <a href="#M000003">strip_xml_tags!</a> | 118 | <a href="#M000003">strip_xml_tags!</a> |
| 119 | 119 | ||
| 120 | </div> | 120 | </div> |
| 121 | </div> | 121 | </div> |
| 122 | 122 | ||
| 123 | </div> | 123 | </div> |
| 124 | 124 | ||
| 125 | <!-- if includes --> | 125 | <!-- if includes --> |
| 126 | 126 | ||
| 127 | <div id="includes"> | 127 | <div id="includes"> |
| 128 | <h3 class="section-bar">Included Modules</h3> | 128 | <h3 class="section-bar">Included Modules</h3> |
| 129 | 129 | ||
| 130 | <div id="includes-list"> | 130 | <div id="includes-list"> |
| 131 | 131 | ||
| 132 | <span class="include-name"><a href="Rir.html">Rir</a></span> | 132 | <span class="include-name"><a href="RIR.html">RIR</a></span> |
| 133 | 133 | ||
| 134 | </div> | 134 | </div> |
| 135 | </div> | 135 | </div> |
| 136 | 136 | ||
| 137 | <div id="section"> | 137 | <div id="section"> |
| 138 | 138 | ||
| 139 | 139 | ||
| 140 | 140 | ||
| 141 | 141 | ||
| 142 | <!-- if method_list --> | 142 | <!-- if method_list --> |
| 143 | 143 | ||
| 144 | <div id="methods"> | 144 | <div id="methods"> |
| 145 | 145 | ||
| 146 | <h3 class="section-bar">Public Instance methods</h3> | 146 | <h3 class="section-bar">Public Instance methods</h3> |
| 147 | 147 | ||
| 148 | 148 | ||
| 149 | <div id="method-M000009" class="method-detail"> | 149 | <div id="method-M000009" class="method-detail"> |
| 150 | <a name="M000009"></a> | 150 | <a name="M000009"></a> |
| 151 | 151 | ||
| 152 | <div class="method-heading"> | 152 | <div class="method-heading"> |
| 153 | 153 | ||
| 154 | <a href="String.src/M000009.html" target="Code" class="method-signature" | 154 | <a href="String.src/M000009.html" target="Code" class="method-signature" |
| 155 | onclick="popupCode('String.src/M000009.html');return false;"> | 155 | onclick="popupCode('String.src/M000009.html');return false;"> |
| 156 | 156 | ||
| 157 | <span class="method-name">extract_xmltags_values</span><span class="method-args">(tag_name)</span> | 157 | <span class="method-name">extract_xmltags_values</span><span class="method-args">(tag_name)</span> |
| 158 | 158 | ||
| 159 | </a> | 159 | </a> |
| 160 | 160 | ||
| 161 | </div> | 161 | </div> |
| 162 | 162 | ||
| 163 | <div class="method-description"> | 163 | <div class="method-description"> |
| 164 | 164 | ||
| 165 | <p> | 165 | <p> |
| 166 | Returns the text values inside all occurences of a XML tag in <tt>self</tt> | 166 | Returns the text values inside all occurences of a XML tag in <tt>self</tt> |
| 167 | </p> | 167 | </p> |
| 168 | <pre> | 168 | <pre> |
| 169 | s = "four-piece in <a href='#'>Indianapolis</a>, <a href='#'>Indiana</a> at the Murat Theatre" | 169 | s = "four-piece in <a href='#'>Indianapolis</a>, <a href='#'>Indiana</a> at the Murat Theatre" |
| 170 | s.extract_xmltags_values 'a' #=> ["Indianapolis", "Indiana"] | 170 | s.extract_xmltags_values 'a' #=> ["Indianapolis", "Indiana"] |
| 171 | </pre> | 171 | </pre> |
| 172 | 172 | ||
| 173 | </div> | 173 | </div> |
| 174 | </div> | 174 | </div> |
| 175 | 175 | ||
| 176 | 176 | ||
| 177 | <div id="method-M000001" class="method-detail"> | 177 | <div id="method-M000001" class="method-detail"> |
| 178 | <a name="M000001"></a> | 178 | <a name="M000001"></a> |
| 179 | 179 | ||
| 180 | <div class="method-heading"> | 180 | <div class="method-heading"> |
| 181 | 181 | ||
| 182 | <a href="String.src/M000001.html" target="Code" class="method-signature" | 182 | <a href="String.src/M000001.html" target="Code" class="method-signature" |
| 183 | onclick="popupCode('String.src/M000001.html');return false;"> | 183 | onclick="popupCode('String.src/M000001.html');return false;"> |
| 184 | 184 | ||
| 185 | <span class="method-name">is_stopword?</span><span class="method-args">()</span> | 185 | <span class="method-name">is_stopword?</span><span class="method-args">()</span> |
| 186 | 186 | ||
| 187 | </a> | 187 | </a> |
| 188 | 188 | ||
| 189 | </div> | 189 | </div> |
| 190 | 190 | ||
| 191 | <div class="method-description"> | 191 | <div class="method-description"> |
| 192 | 192 | ||
| 193 | <p> | 193 | <p> |
| 194 | Returns <tt>true</tt> if <tt>self</tt> belongs to Rir::Stoplist, | 194 | Returns <tt>true</tt> if <tt>self</tt> belongs to Rir::Stoplist, |
| 195 | <tt>false</tt> otherwise. | 195 | <tt>false</tt> otherwise. |
| 196 | </p> | 196 | </p> |
| 197 | 197 | ||
| 198 | </div> | 198 | </div> |
| 199 | </div> | 199 | </div> |
| 200 | 200 | ||
| 201 | 201 | ||
| 202 | <div id="method-M000002" class="method-detail"> | 202 | <div id="method-M000002" class="method-detail"> |
| 203 | <a name="M000002"></a> | 203 | <a name="M000002"></a> |
| 204 | 204 | ||
| 205 | <div class="method-heading"> | 205 | <div class="method-heading"> |
| 206 | 206 | ||
| 207 | <a href="String.src/M000002.html" target="Code" class="method-signature" | 207 | <a href="String.src/M000002.html" target="Code" class="method-signature" |
| 208 | onclick="popupCode('String.src/M000002.html');return false;"> | 208 | onclick="popupCode('String.src/M000002.html');return false;"> |
| 209 | 209 | ||
| 210 | <span class="method-name">remove_special_characters</span><span class="method-args">()</span> | 210 | <span class="method-name">remove_special_characters</span><span class="method-args">()</span> |
| 211 | 211 | ||
| 212 | </a> | 212 | </a> |
| 213 | 213 | ||
| 214 | </div> | 214 | </div> |
| 215 | 215 | ||
| 216 | <div class="method-description"> | 216 | <div class="method-description"> |
| 217 | 217 | ||
| 218 | <p> | 218 | <p> |
| 219 | Do not use. TODO: rewamp. find why this function is here. | 219 | Do not use. TODO: rewamp. find why this function is here. |
| 220 | </p> | 220 | </p> |
| 221 | 221 | ||
| 222 | </div> | 222 | </div> |
| 223 | </div> | 223 | </div> |
| 224 | 224 | ||
| 225 | 225 | ||
| 226 | <div id="method-M000006" class="method-detail"> | 226 | <div id="method-M000006" class="method-detail"> |
| 227 | <a name="M000006"></a> | 227 | <a name="M000006"></a> |
| 228 | 228 | ||
| 229 | <div class="method-heading"> | 229 | <div class="method-heading"> |
| 230 | 230 | ||
| 231 | <a href="String.src/M000006.html" target="Code" class="method-signature" | 231 | <a href="String.src/M000006.html" target="Code" class="method-signature" |
| 232 | onclick="popupCode('String.src/M000006.html');return false;"> | 232 | onclick="popupCode('String.src/M000006.html');return false;"> |
| 233 | 233 | ||
| 234 | <span class="method-name">strip_javascripts</span><span class="method-args">()</span> | 234 | <span class="method-name">strip_javascripts</span><span class="method-args">()</span> |
| 235 | 235 | ||
| 236 | </a> | 236 | </a> |
| 237 | 237 | ||
| 238 | </div> | 238 | </div> |
| 239 | 239 | ||
| 240 | <div class="method-description"> | 240 | <div class="method-description"> |
| 241 | 241 | ||
| 242 | <p> | 242 | <p> |
| 243 | Removes all Javascript sources from <tt>self</tt>. | 243 | Removes all Javascript sources from <tt>self</tt>. |
| 244 | </p> | 244 | </p> |
| 245 | <pre> | 245 | <pre> |
| 246 | s = "<script type='text/javascript'> | 246 | s = "<script type='text/javascript'> |
| 247 | var skin='vector', | 247 | var skin='vector', |
| 248 | stylepath='http://bits.wikimedia.org/skins-1.5' | 248 | stylepath='http://bits.wikimedia.org/skins-1.5' |
| 249 | </script> | 249 | </script> |
| 250 | 250 | ||
| 251 | test" | 251 | test" |
| 252 | s.strip_javascripts #=> "test" | 252 | s.strip_javascripts #=> "test" |
| 253 | </pre> | 253 | </pre> |
| 254 | 254 | ||
| 255 | </div> | 255 | </div> |
| 256 | </div> | 256 | </div> |
| 257 | 257 | ||
| 258 | 258 | ||
| 259 | <div id="method-M000005" class="method-detail"> | 259 | <div id="method-M000005" class="method-detail"> |
| 260 | <a name="M000005"></a> | 260 | <a name="M000005"></a> |
| 261 | 261 | ||
| 262 | <div class="method-heading"> | 262 | <div class="method-heading"> |
| 263 | 263 | ||
| 264 | <a href="String.src/M000005.html" target="Code" class="method-signature" | 264 | <a href="String.src/M000005.html" target="Code" class="method-signature" |
| 265 | onclick="popupCode('String.src/M000005.html');return false;"> | 265 | onclick="popupCode('String.src/M000005.html');return false;"> |
| 266 | 266 | ||
| 267 | <span class="method-name">strip_javascripts!</span><span class="method-args">()</span> | 267 | <span class="method-name">strip_javascripts!</span><span class="method-args">()</span> |
| 268 | 268 | ||
| 269 | </a> | 269 | </a> |
| 270 | 270 | ||
| 271 | </div> | 271 | </div> |
| 272 | 272 | ||
| 273 | <div class="method-description"> | 273 | <div class="method-description"> |
| 274 | 274 | ||
| 275 | <p> | 275 | <p> |
| 276 | Removes all Javascript sources from <tt>self</tt>. | 276 | Removes all Javascript sources from <tt>self</tt>. |
| 277 | </p> | 277 | </p> |
| 278 | <pre> | 278 | <pre> |
| 279 | s = "<script type='text/javascript'> | 279 | s = "<script type='text/javascript'> |
| 280 | var skin='vector', | 280 | var skin='vector', |
| 281 | stylepath='http://bits.wikimedia.org/skins-1.5' | 281 | stylepath='http://bits.wikimedia.org/skins-1.5' |
| 282 | </script> | 282 | </script> |
| 283 | 283 | ||
| 284 | test" | 284 | test" |
| 285 | s.strip_javascripts! | 285 | s.strip_javascripts! |
| 286 | s #=> "test" | 286 | s #=> "test" |
| 287 | </pre> | 287 | </pre> |
| 288 | 288 | ||
| 289 | </div> | 289 | </div> |
| 290 | </div> | 290 | </div> |
| 291 | 291 | ||
| 292 | 292 | ||
| 293 | <div id="method-M000008" class="method-detail"> | 293 | <div id="method-M000008" class="method-detail"> |
| 294 | <a name="M000008"></a> | 294 | <a name="M000008"></a> |
| 295 | 295 | ||
| 296 | <div class="method-heading"> | 296 | <div class="method-heading"> |
| 297 | 297 | ||
| 298 | <a href="String.src/M000008.html" target="Code" class="method-signature" | 298 | <a href="String.src/M000008.html" target="Code" class="method-signature" |
| 299 | onclick="popupCode('String.src/M000008.html');return false;"> | 299 | onclick="popupCode('String.src/M000008.html');return false;"> |
| 300 | 300 | ||
| 301 | <span class="method-name">strip_stylesheets</span><span class="method-args">()</span> | 301 | <span class="method-name">strip_stylesheets</span><span class="method-args">()</span> |
| 302 | 302 | ||
| 303 | </a> | 303 | </a> |
| 304 | 304 | ||
| 305 | </div> | 305 | </div> |
| 306 | 306 | ||
| 307 | <div class="method-description"> | 307 | <div class="method-description"> |
| 308 | 308 | ||
| 309 | </div> | 309 | </div> |
| 310 | </div> | 310 | </div> |
| 311 | 311 | ||
| 312 | 312 | ||
| 313 | <div id="method-M000007" class="method-detail"> | 313 | <div id="method-M000007" class="method-detail"> |
| 314 | <a name="M000007"></a> | 314 | <a name="M000007"></a> |
| 315 | 315 | ||
| 316 | <div class="method-heading"> | 316 | <div class="method-heading"> |
| 317 | 317 | ||
| 318 | <a href="String.src/M000007.html" target="Code" class="method-signature" | 318 | <a href="String.src/M000007.html" target="Code" class="method-signature" |
| 319 | onclick="popupCode('String.src/M000007.html');return false;"> | 319 | onclick="popupCode('String.src/M000007.html');return false;"> |
| 320 | 320 | ||
| 321 | <span class="method-name">strip_stylesheets!</span><span class="method-args">()</span> | 321 | <span class="method-name">strip_stylesheets!</span><span class="method-args">()</span> |
| 322 | 322 | ||
| 323 | </a> | 323 | </a> |
| 324 | 324 | ||
| 325 | </div> | 325 | </div> |
| 326 | 326 | ||
| 327 | <div class="method-description"> | 327 | <div class="method-description"> |
| 328 | 328 | ||
| 329 | </div> | 329 | </div> |
| 330 | </div> | 330 | </div> |
| 331 | 331 | ||
| 332 | 332 | ||
| 333 | <div id="method-M000004" class="method-detail"> | 333 | <div id="method-M000004" class="method-detail"> |
| 334 | <a name="M000004"></a> | 334 | <a name="M000004"></a> |
| 335 | 335 | ||
| 336 | <div class="method-heading"> | 336 | <div class="method-heading"> |
| 337 | 337 | ||
| 338 | <a href="String.src/M000004.html" target="Code" class="method-signature" | 338 | <a href="String.src/M000004.html" target="Code" class="method-signature" |
| 339 | onclick="popupCode('String.src/M000004.html');return false;"> | 339 | onclick="popupCode('String.src/M000004.html');return false;"> |
| 340 | 340 | ||
| 341 | <span class="method-name">strip_xml_tags</span><span class="method-args">()</span> | 341 | <span class="method-name">strip_xml_tags</span><span class="method-args">()</span> |
| 342 | 342 | ||
| 343 | </a> | 343 | </a> |
| 344 | 344 | ||
| 345 | </div> | 345 | </div> |
| 346 | 346 | ||
| 347 | <div class="method-description"> | 347 | <div class="method-description"> |
| 348 | 348 | ||
| 349 | <p> | 349 | <p> |
| 350 | Removes all XML-like tags from <tt>self</tt>. | 350 | Removes all XML-like tags from <tt>self</tt>. |
| 351 | </p> | 351 | </p> |
| 352 | <pre> | 352 | <pre> |
| 353 | s = "<html><body>test</body></html>" | 353 | s = "<html><body>test</body></html>" |
| 354 | s.strip_xml_tags #=> "test" | 354 | s.strip_xml_tags #=> "test" |
| 355 | s #=> "<html><body>test</body></html>" | 355 | s #=> "<html><body>test</body></html>" |
| 356 | </pre> | 356 | </pre> |
| 357 | 357 | ||
| 358 | </div> | 358 | </div> |
| 359 | </div> | 359 | </div> |
| 360 | 360 | ||
| 361 | 361 | ||
| 362 | <div id="method-M000003" class="method-detail"> | 362 | <div id="method-M000003" class="method-detail"> |
| 363 | <a name="M000003"></a> | 363 | <a name="M000003"></a> |
| 364 | 364 | ||
| 365 | <div class="method-heading"> | 365 | <div class="method-heading"> |
| 366 | 366 | ||
| 367 | <a href="String.src/M000003.html" target="Code" class="method-signature" | 367 | <a href="String.src/M000003.html" target="Code" class="method-signature" |
| 368 | onclick="popupCode('String.src/M000003.html');return false;"> | 368 | onclick="popupCode('String.src/M000003.html');return false;"> |
| 369 | 369 | ||
| 370 | <span class="method-name">strip_xml_tags!</span><span class="method-args">()</span> | 370 | <span class="method-name">strip_xml_tags!</span><span class="method-args">()</span> |
| 371 | 371 | ||
| 372 | </a> | 372 | </a> |
| 373 | 373 | ||
| 374 | </div> | 374 | </div> |
| 375 | 375 | ||
| 376 | <div class="method-description"> | 376 | <div class="method-description"> |
| 377 | 377 | ||
| 378 | <p> | 378 | <p> |
| 379 | Removes all XML-like tags from <tt>self</tt>. | 379 | Removes all XML-like tags from <tt>self</tt>. |
| 380 | </p> | 380 | </p> |
| 381 | <pre> | 381 | <pre> |
| 382 | s = "<html><body>test</body></html>" | 382 | s = "<html><body>test</body></html>" |
| 383 | s.strip_xml_tags! | 383 | s.strip_xml_tags! |
| 384 | s #=> "test" | 384 | s #=> "test" |
| 385 | </pre> | 385 | </pre> |
| 386 | 386 | ||
| 387 | </div> | 387 | </div> |
| 388 | </div> | 388 | </div> |
| 389 | 389 | ||
| 390 | 390 | ||
| 391 | 391 | ||
| 392 | </div> | 392 | </div> |
| 393 | 393 | ||
| 394 | 394 | ||
| 395 | 395 | ||
| 396 | 396 | ||
| 397 | </div> | 397 | </div> |
| 398 | 398 | ||
| 399 | <div id="validator-badges"> | 399 | <div id="validator-badges"> |
| 400 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | 400 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> |
| 401 | </div> | 401 | </div> |
| 402 | 402 | ||
| 403 | </body> | 403 | </body> |
| 404 | </html> | 404 | </html> |
| 405 | 405 |
doc/created.rid
| 1 | Fri, 05 Nov 2010 14:41:10 +0100 | 1 | Fri, 05 Nov 2010 15:06:41 +0100 |
| 2 | 2 |
doc/files/README_markdown.html
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
| 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> |
| 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
| 4 | <head> | 4 | <head> |
| 5 | <title>File: README.markdown [RDoc Documentation]</title> | 5 | <title>File: README.markdown [RDoc Documentation]</title> |
| 6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | 6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| 7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | 7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> |
| 8 | <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | 8 | <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> |
| 9 | <script type="text/javascript"> | 9 | <script type="text/javascript"> |
| 10 | // <![CDATA[ | 10 | // <![CDATA[ |
| 11 | 11 | ||
| 12 | function popupCode( url ) { | 12 | function popupCode( url ) { |
| 13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | 13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") |
| 14 | } | 14 | } |
| 15 | 15 | ||
| 16 | function toggleCode( id ) { | 16 | function toggleCode( id ) { |
| 17 | if ( document.getElementById ) | 17 | if ( document.getElementById ) |
| 18 | elem = document.getElementById( id ); | 18 | elem = document.getElementById( id ); |
| 19 | else if ( document.all ) | 19 | else if ( document.all ) |
| 20 | elem = eval( "document.all." + id ); | 20 | elem = eval( "document.all." + id ); |
| 21 | else | 21 | else |
| 22 | return false; | 22 | return false; |
| 23 | 23 | ||
| 24 | elemStyle = elem.style; | 24 | elemStyle = elem.style; |
| 25 | 25 | ||
| 26 | if ( elemStyle.display != "block" ) { | 26 | if ( elemStyle.display != "block" ) { |
| 27 | elemStyle.display = "block" | 27 | elemStyle.display = "block" |
| 28 | } else { | 28 | } else { |
| 29 | elemStyle.display = "none" | 29 | elemStyle.display = "none" |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | return true; | 32 | return true; |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | // Make codeblocks hidden by default | 35 | // Make codeblocks hidden by default |
| 36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | 36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) |
| 37 | 37 | ||
| 38 | // ]]> | 38 | // ]]> |
| 39 | </script> | 39 | </script> |
| 40 | 40 | ||
| 41 | </head> | 41 | </head> |
| 42 | <body> | 42 | <body> |
| 43 | 43 | ||
| 44 | 44 | ||
| 45 | <div id="fileHeader"> | 45 | <div id="fileHeader"> |
| 46 | <h1>README.markdown</h1> | 46 | <h1>README.markdown</h1> |
| 47 | <table class="header-table"> | 47 | <table class="header-table"> |
| 48 | <tr class="top-aligned-row"> | 48 | <tr class="top-aligned-row"> |
| 49 | <td><strong>Path:</strong></td> | 49 | <td><strong>Path:</strong></td> |
| 50 | <td>README.markdown | 50 | <td>README.markdown |
| 51 | 51 | ||
| 52 | </td> | 52 | </td> |
| 53 | </tr> | 53 | </tr> |
| 54 | <tr class="top-aligned-row"> | 54 | <tr class="top-aligned-row"> |
| 55 | <td><strong>Last Update:</strong></td> | 55 | <td><strong>Last Update:</strong></td> |
| 56 | <td>2010-11-05 14:40:41 +0100</td> | 56 | <td>2010-11-05 14:46:27 +0100</td> |
| 57 | </tr> | 57 | </tr> |
| 58 | </table> | 58 | </table> |
| 59 | </div> | 59 | </div> |
| 60 | <!-- banner header --> | 60 | <!-- banner header --> |
| 61 | 61 | ||
| 62 | <div id="bodyContent"> | 62 | <div id="bodyContent"> |
| 63 | 63 | ||
| 64 | <div id="contextContent"> | 64 | <div id="contextContent"> |
| 65 | |||
| 66 | <div id="description"> | ||
| 67 | <p> | ||
| 68 | # Ruby Information Retrieval (rIR) | ||
| 69 | </p> | ||
| 70 | <p> | ||
| 71 | Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | ||
| 72 | </p> | ||
| 73 | <p> | ||
| 74 | License | ||
| 75 | </p> | ||
| 76 | <h6>=</h6> | ||
| 77 | <p> | ||
| 78 | This program is free software: you can redistribute it and/or modify it | ||
| 79 | under the terms of the GNU General Public License as published by the Free | ||
| 80 | Software Foundation, either version 3 of the License, or (at your option) | ||
| 81 | any later version. | ||
| 82 | </p> | ||
| 83 | <p> | ||
| 84 | This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 85 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 86 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 87 | more details. | ||
| 88 | </p> | ||
| 89 | <p> | ||
| 90 | You should have received a copy of the GNU General Public License along | ||
| 91 | with this program. If not, see <<a | ||
| 92 | href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. | ||
| 93 | </p> | ||
| 94 | |||
| 95 | </div> | ||
| 65 | 96 | ||
| 66 | </div> | 97 | </div> |
| 67 | 98 | ||
| 68 | 99 | ||
| 69 | </div> | 100 | </div> |
| 70 | 101 | ||
| 71 | <!-- if includes --> | 102 | <!-- if includes --> |
| 72 | 103 | ||
| 73 | <div id="section"> | 104 | <div id="section"> |
| 74 | 105 | ||
| 75 | 106 | ||
| 76 | 107 | ||
| 77 | 108 | ||
| 78 | <!-- if method_list --> | 109 | <!-- if method_list --> |
| 79 | 110 | ||
| 80 | 111 | ||
| 81 | 112 | ||
| 82 | 113 | ||
| 83 | </div> | 114 | </div> |
| 84 | 115 | ||
| 85 | <div id="validator-badges"> | 116 | <div id="validator-badges"> |
| 86 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | 117 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> |
| 87 | </div> | 118 | </div> |
| 88 | 119 | ||
| 89 | </body> | 120 | </body> |
| 90 | </html> | 121 | </html> |
| 91 | 122 |
doc/files/lib/rir/document_rb.html
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
| 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> |
| 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
| 4 | <head> | 4 | <head> |
| 5 | <title>File: document.rb [RDoc Documentation]</title> | 5 | <title>File: document.rb [RDoc Documentation]</title> |
| 6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | 6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| 7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | 7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> |
| 8 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | 8 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> |
| 9 | <script type="text/javascript"> | 9 | <script type="text/javascript"> |
| 10 | // <![CDATA[ | 10 | // <![CDATA[ |
| 11 | 11 | ||
| 12 | function popupCode( url ) { | 12 | function popupCode( url ) { |
| 13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | 13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") |
| 14 | } | 14 | } |
| 15 | 15 | ||
| 16 | function toggleCode( id ) { | 16 | function toggleCode( id ) { |
| 17 | if ( document.getElementById ) | 17 | if ( document.getElementById ) |
| 18 | elem = document.getElementById( id ); | 18 | elem = document.getElementById( id ); |
| 19 | else if ( document.all ) | 19 | else if ( document.all ) |
| 20 | elem = eval( "document.all." + id ); | 20 | elem = eval( "document.all." + id ); |
| 21 | else | 21 | else |
| 22 | return false; | 22 | return false; |
| 23 | 23 | ||
| 24 | elemStyle = elem.style; | 24 | elemStyle = elem.style; |
| 25 | 25 | ||
| 26 | if ( elemStyle.display != "block" ) { | 26 | if ( elemStyle.display != "block" ) { |
| 27 | elemStyle.display = "block" | 27 | elemStyle.display = "block" |
| 28 | } else { | 28 | } else { |
| 29 | elemStyle.display = "none" | 29 | elemStyle.display = "none" |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | return true; | 32 | return true; |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | // Make codeblocks hidden by default | 35 | // Make codeblocks hidden by default |
| 36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | 36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) |
| 37 | 37 | ||
| 38 | // ]]> | 38 | // ]]> |
| 39 | </script> | 39 | </script> |
| 40 | 40 | ||
| 41 | </head> | 41 | </head> |
| 42 | <body> | 42 | <body> |
| 43 | 43 | ||
| 44 | 44 | ||
| 45 | <div id="fileHeader"> | 45 | <div id="fileHeader"> |
| 46 | <h1>document.rb</h1> | 46 | <h1>document.rb</h1> |
| 47 | <table class="header-table"> | 47 | <table class="header-table"> |
| 48 | <tr class="top-aligned-row"> | 48 | <tr class="top-aligned-row"> |
| 49 | <td><strong>Path:</strong></td> | 49 | <td><strong>Path:</strong></td> |
| 50 | <td>lib/rir/document.rb | 50 | <td>lib/rir/document.rb |
| 51 | 51 | ||
| 52 | </td> | 52 | </td> |
| 53 | </tr> | 53 | </tr> |
| 54 | <tr class="top-aligned-row"> | 54 | <tr class="top-aligned-row"> |
| 55 | <td><strong>Last Update:</strong></td> | 55 | <td><strong>Last Update:</strong></td> |
| 56 | <td>2010-11-05 14:39:35 +0100</td> | 56 | <td>2010-11-05 15:06:24 +0100</td> |
| 57 | </tr> | 57 | </tr> |
| 58 | </table> | 58 | </table> |
| 59 | </div> | 59 | </div> |
| 60 | <!-- banner header --> | 60 | <!-- banner header --> |
| 61 | 61 | ||
| 62 | <div id="bodyContent"> | 62 | <div id="bodyContent"> |
| 63 | 63 | ||
| 64 | <div id="contextContent"> | 64 | <div id="contextContent"> |
| 65 | 65 | ||
| 66 | <div id="description"> | 66 | <div id="description"> |
| 67 | <p> | 67 | <p> |
| 68 | This file is a part of an Information Retrieval oriented Ruby library | 68 | This file is a part of an Information Retrieval oriented Ruby library |
| 69 | </p> | 69 | </p> |
| 70 | <p> | 70 | <p> |
| 71 | Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | 71 | Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> |
| 72 | </p> | 72 | </p> |
| 73 | <p> | 73 | <p> |
| 74 | This program is free software: you can redistribute it and/or modify it | 74 | This program is free software: you can redistribute it and/or modify it |
| 75 | under the terms of the GNU General Public License as published by the Free | 75 | under the terms of the GNU General Public License as published by the Free |
| 76 | Software Foundation, either version 3 of the License, or (at your option) | 76 | Software Foundation, either version 3 of the License, or (at your option) |
| 77 | any later version. | 77 | any later version. |
| 78 | </p> | 78 | </p> |
| 79 | <p> | 79 | <p> |
| 80 | This program is distributed in the hope that it will be useful, but WITHOUT | 80 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 81 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | 81 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 82 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | 82 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| 83 | more details. | 83 | more details. |
| 84 | </p> | 84 | </p> |
| 85 | <p> | 85 | <p> |
| 86 | You should have received a copy of the GNU General Public License along | 86 | You should have received a copy of the GNU General Public License along |
| 87 | with this program. If not, see <<a | 87 | with this program. If not, see <<a |
| 88 | href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. | 88 | href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. |
| 89 | </p> | 89 | </p> |
| 90 | 90 | ||
| 91 | </div> | 91 | </div> |
| 92 | 92 | ||
| 93 | <div id="requires-list"> | 93 | <div id="requires-list"> |
| 94 | <h3 class="section-bar">Required files</h3> | 94 | <h3 class="section-bar">Required files</h3> |
| 95 | 95 | ||
| 96 | <div class="name-list"> | 96 | <div class="name-list"> |
| 97 | 97 | ||
| 98 | net/http | 98 | net/http |
| 99 | 99 | ||
| 100 | </div> | 100 | </div> |
| 101 | </div> | 101 | </div> |
| 102 | 102 | ||
| 103 | </div> | 103 | </div> |
| 104 | 104 | ||
| 105 | 105 | ||
| 106 | </div> | 106 | </div> |
| 107 | 107 | ||
| 108 | <!-- if includes --> | 108 | <!-- if includes --> |
| 109 | 109 | ||
| 110 | <div id="section"> | 110 | <div id="section"> |
| 111 | 111 | ||
| 112 | 112 | ||
| 113 | 113 | ||
| 114 | 114 | ||
| 115 | <!-- if method_list --> | 115 | <!-- if method_list --> |
| 116 | 116 | ||
| 117 | 117 | ||
| 118 | 118 | ||
| 119 | 119 | ||
| 120 | </div> | 120 | </div> |
| 121 | 121 | ||
| 122 | <div id="validator-badges"> | 122 | <div id="validator-badges"> |
| 123 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | 123 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> |
| 124 | </div> | 124 | </div> |
| 125 | 125 | ||
| 126 | </body> | 126 | </body> |
| 127 | </html> | 127 | </html> |
| 128 | 128 |
doc/files/lib/rir/string_rb.html
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
| 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> |
| 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
| 4 | <head> | 4 | <head> |
| 5 | <title>File: string.rb [RDoc Documentation]</title> | 5 | <title>File: string.rb [RDoc Documentation]</title> |
| 6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | 6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| 7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | 7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> |
| 8 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> | 8 | <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" /> |
| 9 | <script type="text/javascript"> | 9 | <script type="text/javascript"> |
| 10 | // <![CDATA[ | 10 | // <![CDATA[ |
| 11 | 11 | ||
| 12 | function popupCode( url ) { | 12 | function popupCode( url ) { |
| 13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | 13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") |
| 14 | } | 14 | } |
| 15 | 15 | ||
| 16 | function toggleCode( id ) { | 16 | function toggleCode( id ) { |
| 17 | if ( document.getElementById ) | 17 | if ( document.getElementById ) |
| 18 | elem = document.getElementById( id ); | 18 | elem = document.getElementById( id ); |
| 19 | else if ( document.all ) | 19 | else if ( document.all ) |
| 20 | elem = eval( "document.all." + id ); | 20 | elem = eval( "document.all." + id ); |
| 21 | else | 21 | else |
| 22 | return false; | 22 | return false; |
| 23 | 23 | ||
| 24 | elemStyle = elem.style; | 24 | elemStyle = elem.style; |
| 25 | 25 | ||
| 26 | if ( elemStyle.display != "block" ) { | 26 | if ( elemStyle.display != "block" ) { |
| 27 | elemStyle.display = "block" | 27 | elemStyle.display = "block" |
| 28 | } else { | 28 | } else { |
| 29 | elemStyle.display = "none" | 29 | elemStyle.display = "none" |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | return true; | 32 | return true; |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | // Make codeblocks hidden by default | 35 | // Make codeblocks hidden by default |
| 36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | 36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) |
| 37 | 37 | ||
| 38 | // ]]> | 38 | // ]]> |
| 39 | </script> | 39 | </script> |
| 40 | 40 | ||
| 41 | </head> | 41 | </head> |
| 42 | <body> | 42 | <body> |
| 43 | 43 | ||
| 44 | 44 | ||
| 45 | <div id="fileHeader"> | 45 | <div id="fileHeader"> |
| 46 | <h1>string.rb</h1> | 46 | <h1>string.rb</h1> |
| 47 | <table class="header-table"> | 47 | <table class="header-table"> |
| 48 | <tr class="top-aligned-row"> | 48 | <tr class="top-aligned-row"> |
| 49 | <td><strong>Path:</strong></td> | 49 | <td><strong>Path:</strong></td> |
| 50 | <td>lib/rir/string.rb | 50 | <td>lib/rir/string.rb |
| 51 | 51 | ||
| 52 | </td> | 52 | </td> |
| 53 | </tr> | 53 | </tr> |
| 54 | <tr class="top-aligned-row"> | 54 | <tr class="top-aligned-row"> |
| 55 | <td><strong>Last Update:</strong></td> | 55 | <td><strong>Last Update:</strong></td> |
| 56 | <td>2010-11-05 14:39:35 +0100</td> | 56 | <td>2010-11-05 15:06:35 +0100</td> |
| 57 | </tr> | 57 | </tr> |
| 58 | </table> | 58 | </table> |
| 59 | </div> | 59 | </div> |
| 60 | <!-- banner header --> | 60 | <!-- banner header --> |
| 61 | 61 | ||
| 62 | <div id="bodyContent"> | 62 | <div id="bodyContent"> |
| 63 | 63 | ||
| 64 | <div id="contextContent"> | 64 | <div id="contextContent"> |
| 65 | 65 | ||
| 66 | <div id="description"> | 66 | <div id="description"> |
| 67 | <p> | 67 | <p> |
| 68 | This file is a part of an Information Retrieval oriented Ruby library | 68 | This file is a part of an Information Retrieval oriented Ruby library |
| 69 | </p> | 69 | </p> |
| 70 | <p> | 70 | <p> |
| 71 | Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | 71 | Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> |
| 72 | </p> | 72 | </p> |
| 73 | <p> | 73 | <p> |
| 74 | This program is free software: you can redistribute it and/or modify it | 74 | This program is free software: you can redistribute it and/or modify it |
| 75 | under the terms of the GNU General Public License as published by the Free | 75 | under the terms of the GNU General Public License as published by the Free |
| 76 | Software Foundation, either version 3 of the License, or (at your option) | 76 | Software Foundation, either version 3 of the License, or (at your option) |
| 77 | any later version. | 77 | any later version. |
| 78 | </p> | 78 | </p> |
| 79 | <p> | 79 | <p> |
| 80 | This program is distributed in the hope that it will be useful, but WITHOUT | 80 | This program is distributed in the hope that it will be useful, but WITHOUT |
| 81 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | 81 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 82 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | 82 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| 83 | more details. | 83 | more details. |
| 84 | </p> | 84 | </p> |
| 85 | <p> | 85 | <p> |
| 86 | You should have received a copy of the GNU General Public License along | 86 | You should have received a copy of the GNU General Public License along |
| 87 | with this program. If not, see <<a | 87 | with this program. If not, see <<a |
| 88 | href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. | 88 | href="http://www.gnu.org/licenses/">www.gnu.org/licenses/</a>>. |
| 89 | </p> | 89 | </p> |
| 90 | 90 | ||
| 91 | </div> | 91 | </div> |
| 92 | 92 | ||
| 93 | <div id="requires-list"> | 93 | <div id="requires-list"> |
| 94 | <h3 class="section-bar">Required files</h3> | 94 | <h3 class="section-bar">Required files</h3> |
| 95 | 95 | ||
| 96 | <div class="name-list"> | 96 | <div class="name-list"> |
| 97 | 97 | ||
| 98 | cgi | 98 | cgi |
| 99 | 99 | ||
| 100 | kconv | 100 | kconv |
| 101 | 101 | ||
| 102 | </div> | 102 | </div> |
| 103 | </div> | 103 | </div> |
| 104 | 104 | ||
| 105 | </div> | 105 | </div> |
| 106 | 106 | ||
| 107 | 107 | ||
| 108 | </div> | 108 | </div> |
| 109 | 109 | ||
| 110 | <!-- if includes --> | 110 | <!-- if includes --> |
| 111 | 111 | ||
| 112 | <div id="section"> | 112 | <div id="section"> |
| 113 | 113 | ||
| 114 | 114 | ||
| 115 | 115 | ||
| 116 | 116 | ||
| 117 | <!-- if method_list --> | 117 | <!-- if method_list --> |
| 118 | 118 | ||
| 119 | 119 | ||
| 120 | 120 | ||
| 121 | 121 | ||
| 122 | </div> | 122 | </div> |
| 123 | 123 | ||
| 124 | <div id="validator-badges"> | 124 | <div id="validator-badges"> |
| 125 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | 125 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> |
| 126 | </div> | 126 | </div> |
| 127 | 127 | ||
| 128 | </body> | 128 | </body> |
| 129 | </html> | 129 | </html> |
| 130 | 130 |
doc/files/main_rb.html
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
| 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> |
| 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
| 4 | <head> | 4 | <head> |
| 5 | <title>File: main.rb [RDoc Documentation]</title> | 5 | <title>File: main.rb [RDoc Documentation]</title> |
| 6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | 6 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| 7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> | 7 | <meta http-equiv="Content-Script-Type" content="text/javascript" /> |
| 8 | <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> | 8 | <link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" /> |
| 9 | <script type="text/javascript"> | 9 | <script type="text/javascript"> |
| 10 | // <![CDATA[ | 10 | // <![CDATA[ |
| 11 | 11 | ||
| 12 | function popupCode( url ) { | 12 | function popupCode( url ) { |
| 13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") | 13 | window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400") |
| 14 | } | 14 | } |
| 15 | 15 | ||
| 16 | function toggleCode( id ) { | 16 | function toggleCode( id ) { |
| 17 | if ( document.getElementById ) | 17 | if ( document.getElementById ) |
| 18 | elem = document.getElementById( id ); | 18 | elem = document.getElementById( id ); |
| 19 | else if ( document.all ) | 19 | else if ( document.all ) |
| 20 | elem = eval( "document.all." + id ); | 20 | elem = eval( "document.all." + id ); |
| 21 | else | 21 | else |
| 22 | return false; | 22 | return false; |
| 23 | 23 | ||
| 24 | elemStyle = elem.style; | 24 | elemStyle = elem.style; |
| 25 | 25 | ||
| 26 | if ( elemStyle.display != "block" ) { | 26 | if ( elemStyle.display != "block" ) { |
| 27 | elemStyle.display = "block" | 27 | elemStyle.display = "block" |
| 28 | } else { | 28 | } else { |
| 29 | elemStyle.display = "none" | 29 | elemStyle.display = "none" |
| 30 | } | 30 | } |
| 31 | 31 | ||
| 32 | return true; | 32 | return true; |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | // Make codeblocks hidden by default | 35 | // Make codeblocks hidden by default |
| 36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) | 36 | document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }<\/style>" ) |
| 37 | 37 | ||
| 38 | // ]]> | 38 | // ]]> |
| 39 | </script> | 39 | </script> |
| 40 | 40 | ||
| 41 | </head> | 41 | </head> |
| 42 | <body> | 42 | <body> |
| 43 | 43 | ||
| 44 | 44 | ||
| 45 | <div id="fileHeader"> | 45 | <div id="fileHeader"> |
| 46 | <h1>main.rb</h1> | 46 | <h1>main.rb</h1> |
| 47 | <table class="header-table"> | 47 | <table class="header-table"> |
| 48 | <tr class="top-aligned-row"> | 48 | <tr class="top-aligned-row"> |
| 49 | <td><strong>Path:</strong></td> | 49 | <td><strong>Path:</strong></td> |
| 50 | <td>main.rb | 50 | <td>main.rb |
| 51 | 51 | ||
| 52 | </td> | 52 | </td> |
| 53 | </tr> | 53 | </tr> |
| 54 | <tr class="top-aligned-row"> | 54 | <tr class="top-aligned-row"> |
| 55 | <td><strong>Last Update:</strong></td> | 55 | <td><strong>Last Update:</strong></td> |
| 56 | <td>2010-11-05 14:40:11 +0100</td> | 56 | <td>2010-11-05 15:05:38 +0100</td> |
| 57 | </tr> | 57 | </tr> |
| 58 | </table> | 58 | </table> |
| 59 | </div> | 59 | </div> |
| 60 | <!-- banner header --> | 60 | <!-- banner header --> |
| 61 | 61 | ||
| 62 | <div id="bodyContent"> | 62 | <div id="bodyContent"> |
| 63 | 63 | ||
| 64 | <div id="contextContent"> | 64 | <div id="contextContent"> |
| 65 | 65 | ||
| 66 | <div id="requires-list"> | 66 | <div id="requires-list"> |
| 67 | <h3 class="section-bar">Required files</h3> | 67 | <h3 class="section-bar">Required files</h3> |
| 68 | 68 | ||
| 69 | <div class="name-list"> | 69 | <div class="name-list"> |
| 70 | 70 | ||
| 71 | rir | 71 | rir |
| 72 | 72 | ||
| 73 | </div> | 73 | </div> |
| 74 | </div> | 74 | </div> |
| 75 | 75 | ||
| 76 | </div> | 76 | </div> |
| 77 | 77 | ||
| 78 | 78 | ||
| 79 | </div> | 79 | </div> |
| 80 | 80 | ||
| 81 | <!-- if includes --> | 81 | <!-- if includes --> |
| 82 | 82 | ||
| 83 | <div id="section"> | 83 | <div id="section"> |
| 84 | 84 | ||
| 85 | 85 | ||
| 86 | 86 | ||
| 87 | 87 | ||
| 88 | <!-- if method_list --> | 88 | <!-- if method_list --> |
| 89 | 89 | ||
| 90 | 90 | ||
| 91 | 91 | ||
| 92 | 92 | ||
| 93 | </div> | 93 | </div> |
| 94 | 94 | ||
| 95 | <div id="validator-badges"> | 95 | <div id="validator-badges"> |
| 96 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> | 96 | <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p> |
| 97 | </div> | 97 | </div> |
| 98 | 98 | ||
| 99 | </body> | 99 | </body> |
| 100 | </html> | 100 | </html> |
| 101 | 101 |
doc/fr_class_index.html
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
| 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> |
| 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
| 4 | <!-- | 4 | <!-- |
| 5 | 5 | ||
| 6 | Classes [RDoc Documentation] | 6 | Classes [RDoc Documentation] |
| 7 | 7 | ||
| 8 | --> | 8 | --> |
| 9 | <head> | 9 | <head> |
| 10 | <title>Classes [RDoc Documentation]</title> | 10 | <title>Classes [RDoc Documentation]</title> |
| 11 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | 11 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| 12 | <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> | 12 | <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> |
| 13 | <base target="docwin" /> | 13 | <base target="docwin" /> |
| 14 | </head> | 14 | </head> |
| 15 | <body> | 15 | <body> |
| 16 | <div class="index"> | 16 | <div class="index"> |
| 17 | <h1 class="section-bar">Classes</h1> | 17 | <h1 class="section-bar">Classes</h1> |
| 18 | <div id="index-entries"> | 18 | <div id="index-entries"> |
| 19 | 19 | ||
| 20 | <a href="classes/Rir.html">Rir</a><br /> | 20 | <a href="classes/RIR.html">RIR</a><br /> |
| 21 | 21 | ||
| 22 | <a href="classes/Rir/Document.html">Rir::Document</a><br /> | 22 | <a href="classes/RIR/Document.html">RIR::Document</a><br /> |
| 23 | 23 | ||
| 24 | <a href="classes/Rir/WebDocument.html">Rir::WebDocument</a><br /> | 24 | <a href="classes/RIR/WebDocument.html">RIR::WebDocument</a><br /> |
| 25 | 25 | ||
| 26 | <a href="classes/Rir/WikipediaPage.html">Rir::WikipediaPage</a><br /> | 26 | <a href="classes/RIR/WikipediaPage.html">RIR::WikipediaPage</a><br /> |
| 27 | 27 | ||
| 28 | <a href="classes/String.html">String</a><br /> | 28 | <a href="classes/String.html">String</a><br /> |
| 29 | 29 | ||
| 30 | </div> | 30 | </div> |
| 31 | </div> | 31 | </div> |
| 32 | </body> | 32 | </body> |
| 33 | </html> | 33 | </html> |
| 34 | 34 |
doc/fr_file_index.html
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
| 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> |
| 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
| 4 | <!-- | 4 | <!-- |
| 5 | 5 | ||
| 6 | Files [RDoc Documentation] | 6 | Files [RDoc Documentation] |
| 7 | 7 | ||
| 8 | --> | 8 | --> |
| 9 | <head> | 9 | <head> |
| 10 | <title>Files [RDoc Documentation]</title> | 10 | <title>Files [RDoc Documentation]</title> |
| 11 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | 11 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| 12 | <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> | 12 | <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> |
| 13 | <base target="docwin" /> | 13 | <base target="docwin" /> |
| 14 | </head> | 14 | </head> |
| 15 | <body> | 15 | <body> |
| 16 | <div class="index"> | 16 | <div class="index"> |
| 17 | <h1 class="section-bar">Files</h1> | 17 | <h1 class="section-bar">Files</h1> |
| 18 | <div id="index-entries"> | 18 | <div id="index-entries"> |
| 19 | 19 | ||
| 20 | <a href="files/README_markdown.html">README.markdown</a><br /> | ||
| 21 | |||
| 22 | <a href="files/lib/rir_rb.html">lib/rir.rb</a><br /> | ||
| 23 | |||
| 24 | <a href="files/lib/rir/document_rb.html">lib/rir/document.rb</a><br /> | 20 | <a href="files/lib/rir/document_rb.html">lib/rir/document.rb</a><br /> |
| 25 | 21 | ||
| 26 | <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br /> | 22 | <a href="files/lib/rir/string_rb.html">lib/rir/string.rb</a><br /> |
| 27 | 23 | ||
| 28 | <a href="files/main_rb.html">main.rb</a><br /> | 24 | <a href="files/main_rb.html">main.rb</a><br /> |
| 29 | 25 | ||
| 30 | </div> | 26 | </div> |
| 31 | </div> | 27 | </div> |
| 32 | </body> | 28 | </body> |
| 33 | </html> | 29 | </html> |
| 34 | 30 |
doc/fr_method_index.html
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" |
| 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> | 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> |
| 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
| 4 | <!-- | 4 | <!-- |
| 5 | 5 | ||
| 6 | Methods [RDoc Documentation] | 6 | Methods [RDoc Documentation] |
| 7 | 7 | ||
| 8 | --> | 8 | --> |
| 9 | <head> | 9 | <head> |
| 10 | <title>Methods [RDoc Documentation]</title> | 10 | <title>Methods [RDoc Documentation]</title> |
| 11 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | 11 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| 12 | <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> | 12 | <link rel="stylesheet" href="rdoc-style.css" type="text/css" /> |
| 13 | <base target="docwin" /> | 13 | <base target="docwin" /> |
| 14 | </head> | 14 | </head> |
| 15 | <body> | 15 | <body> |
| 16 | <div class="index"> | 16 | <div class="index"> |
| 17 | <h1 class="section-bar">Methods</h1> | 17 | <h1 class="section-bar">Methods</h1> |
| 18 | <div id="index-entries"> | 18 | <div id="index-entries"> |
| 19 | 19 | ||
| 20 | <a href="classes/Rir/Document.html#M000012">count_words (Rir::Document)</a><br /> | 20 | <a href="classes/RIR/Document.html#M000012">count_words (RIR::Document)</a><br /> |
| 21 | 21 | ||
| 22 | <a href="classes/Rir/Document.html#M000013">entropy (Rir::Document)</a><br /> | 22 | <a href="classes/RIR/Document.html#M000013">entropy (RIR::Document)</a><br /> |
| 23 | 23 | ||
| 24 | <a href="classes/String.html#M000009">extract_xmltags_values (String)</a><br /> | 24 | <a href="classes/String.html#M000009">extract_xmltags_values (String)</a><br /> |
| 25 | 25 | ||
| 26 | <a href="classes/Rir/Document.html#M000010">format_words (Rir::Document)</a><br /> | 26 | <a href="classes/RIR/Document.html#M000010">format_words (RIR::Document)</a><br /> |
| 27 | 27 | ||
| 28 | <a href="classes/Rir/WebDocument.html#M000015">get_content (Rir::WebDocument)</a><br /> | 28 | <a href="classes/RIR/WebDocument.html#M000015">get_content (RIR::WebDocument)</a><br /> |
| 29 | 29 | ||
| 30 | <a href="classes/String.html#M000001">is_stopword? (String)</a><br /> | 30 | <a href="classes/String.html#M000001">is_stopword? (String)</a><br /> |
| 31 | 31 | ||
| 32 | <a href="classes/Rir/WebDocument.html#M000016">new (Rir::WebDocument)</a><br /> | 32 | <a href="classes/RIR/WebDocument.html#M000016">new (RIR::WebDocument)</a><br /> |
| 33 | 33 | ||
| 34 | <a href="classes/Rir/Document.html#M000014">new (Rir::Document)</a><br /> | 34 | <a href="classes/RIR/Document.html#M000014">new (RIR::Document)</a><br /> |
| 35 | 35 | ||
| 36 | <a href="classes/Rir/Document.html#M000011">ngrams (Rir::Document)</a><br /> | 36 | <a href="classes/RIR/Document.html#M000011">ngrams (RIR::Document)</a><br /> |
| 37 | 37 | ||
| 38 | <a href="classes/String.html#M000002">remove_special_characters (String)</a><br /> | 38 | <a href="classes/String.html#M000002">remove_special_characters (String)</a><br /> |
| 39 | 39 | ||
| 40 | <a href="classes/String.html#M000006">strip_javascripts (String)</a><br /> | 40 | <a href="classes/String.html#M000006">strip_javascripts (String)</a><br /> |
| 41 | 41 | ||
| 42 | <a href="classes/String.html#M000005">strip_javascripts! (String)</a><br /> | 42 | <a href="classes/String.html#M000005">strip_javascripts! (String)</a><br /> |
| 43 | 43 | ||
| 44 | <a href="classes/String.html#M000008">strip_stylesheets (String)</a><br /> | 44 | <a href="classes/String.html#M000008">strip_stylesheets (String)</a><br /> |
| 45 | 45 | ||
| 46 | <a href="classes/String.html#M000007">strip_stylesheets! (String)</a><br /> | 46 | <a href="classes/String.html#M000007">strip_stylesheets! (String)</a><br /> |
| 47 | 47 | ||
| 48 | <a href="classes/String.html#M000004">strip_xml_tags (String)</a><br /> | 48 | <a href="classes/String.html#M000004">strip_xml_tags (String)</a><br /> |
| 49 | 49 | ||
| 50 | <a href="classes/String.html#M000003">strip_xml_tags! (String)</a><br /> | 50 | <a href="classes/String.html#M000003">strip_xml_tags! (String)</a><br /> |
| 51 | 51 | ||
| 52 | </div> | 52 | </div> |
| 53 | </div> | 53 | </div> |
| 54 | </body> | 54 | </body> |
| 55 | </html> | 55 | </html> |
| 56 | 56 |
doc/index.html
| 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" | 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" |
| 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> | 2 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd"> |
| 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> | 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
| 4 | <!-- | 4 | <!-- |
| 5 | 5 | ||
| 6 | RDoc Documentation | 6 | RDoc Documentation |
| 7 | 7 | ||
| 8 | --> | 8 | --> |
| 9 | <head> | 9 | <head> |
| 10 | <title>RDoc Documentation</title> | 10 | <title>RDoc Documentation</title> |
| 11 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | 11 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| 12 | </head> | 12 | </head> |
| 13 | <frameset rows="20%, 80%"> | 13 | <frameset rows="20%, 80%"> |
| 14 | <frameset cols="25%,35%,45%"> | 14 | <frameset cols="25%,35%,45%"> |
| 15 | <frame src="fr_file_index.html" title="Files" name="Files" /> | 15 | <frame src="fr_file_index.html" title="Files" name="Files" /> |
| 16 | <frame src="fr_class_index.html" name="Classes" /> | 16 | <frame src="fr_class_index.html" name="Classes" /> |
| 17 | <frame src="fr_method_index.html" name="Methods" /> | 17 | <frame src="fr_method_index.html" name="Methods" /> |
| 18 | </frameset> | 18 | </frameset> |
| 19 | <frame src="files/README_markdown.html" name="docwin" /> | 19 | <frame src="files/lib/rir/string_rb.html" name="docwin" /> |
| 20 | </frameset> | 20 | </frameset> |
| 21 | </html> | 21 | </html> |
| 22 | 22 |
lib/rir.rb
| 1 | #!/usr/bin/env ruby | 1 | #!/usr/bin/env ruby |
| 2 | 2 | ||
| 3 | require 'rir/document' | 3 | require 'rir/document' |
| 4 | require 'rir/string' | 4 | require 'rir/string' |
| 5 | require 'rir/query' | ||
| 5 | 6 |
lib/rir/document.rb
| 1 | #!/usr/bin/env ruby | 1 | #!/usr/bin/env ruby |
| 2 | 2 | ||
| 3 | # This file is a part of an Information Retrieval oriented Ruby library | 3 | # This file is a part of an Information Retrieval oriented Ruby library |
| 4 | # | 4 | # |
| 5 | # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | 5 | # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> |
| 6 | # | 6 | # |
| 7 | # This program is free software: you can redistribute it and/or modify | 7 | # This program is free software: you can redistribute it and/or modify |
| 8 | # it under the terms of the GNU General Public License as published by | 8 | # it under the terms of the GNU General Public License as published by |
| 9 | # the Free Software Foundation, either version 3 of the License, or | 9 | # the Free Software Foundation, either version 3 of the License, or |
| 10 | # (at your option) any later version. | 10 | # (at your option) any later version. |
| 11 | # | 11 | # |
| 12 | # This program is distributed in the hope that it will be useful, | 12 | # This program is distributed in the hope that it will be useful, |
| 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | # GNU General Public License for more details. | 15 | # GNU General Public License for more details. |
| 16 | # | 16 | # |
| 17 | # You should have received a copy of the GNU General Public License | 17 | # You should have received a copy of the GNU General Public License |
| 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 19 | 19 | ||
| 20 | # General module for many purposes related to Information Retrieval. | 20 | # General module for many purposes related to Information Retrieval. |
| 21 | module Rir | 21 | module RIR |
| 22 | 22 | ||
| 23 | # A Document is a bag of words and is constructed from a string. | 23 | # A Document is a bag of words and is constructed from a string. |
| 24 | class Document | 24 | class Document |
| 25 | attr_reader :words, :doc_content | 25 | attr_reader :words, :doc_content |
| 26 | 26 | ||
| 27 | # Any non-word characters are removed from the words (see http://perldoc.perl.org/perlre.html | 27 | # Any non-word characters are removed from the words (see http://perldoc.perl.org/perlre.html |
| 28 | # and the \\W special escape). | 28 | # and the \\W special escape). |
| 29 | # | 29 | # |
| 30 | # Protected function, only meant to by called at the initialization. | 30 | # Protected function, only meant to by called at the initialization. |
| 31 | def format_words | 31 | def format_words |
| 32 | wo = [] | 32 | wo = [] |
| 33 | 33 | ||
| 34 | @doc_content.split.each do |w| | 34 | @doc_content.split.each do |w| |
| 35 | w.split(/\W/).each do |sw| | 35 | w.split(/\W/).each do |sw| |
| 36 | wo.push(sw) if sw =~ /[a-zA-Z]/ | 36 | wo.push(sw) if sw =~ /[a-zA-Z]/ |
| 37 | end | 37 | end |
| 38 | end | 38 | end |
| 39 | 39 | ||
| 40 | wo | 40 | wo |
| 41 | end | 41 | end |
| 42 | 42 | ||
| 43 | # Returns an Array containing the +n+-grams (words) from the current Document. | 43 | # Returns an Array containing the +n+-grams (words) from the current Document. |
| 44 | # | 44 | # |
| 45 | # ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...] | 45 | # ngrams(2) #=> ["the free", "free encyclopedia", "encyclopedia var", "var skin", ...] |
| 46 | def ngrams(n) | 46 | def ngrams(n) |
| 47 | window = [] | 47 | window = [] |
| 48 | ngrams_array = [] | 48 | ngrams_array = [] |
| 49 | 49 | ||
| 50 | @words.each do |w| | 50 | @words.each do |w| |
| 51 | window.push(w) | 51 | window.push(w) |
| 52 | if window.size == n | 52 | if window.size == n |
| 53 | ngrams_array.push window.join(" ") | 53 | ngrams_array.push window.join(" ") |
| 54 | window.delete_at(0) | 54 | window.delete_at(0) |
| 55 | end | 55 | end |
| 56 | end | 56 | end |
| 57 | 57 | ||
| 58 | ngrams_array.uniq | 58 | ngrams_array.uniq |
| 59 | end | 59 | end |
| 60 | 60 | ||
| 61 | # Returns a Hash containing the words and their associated counts in the current Document. | 61 | # Returns a Hash containing the words and their associated counts in the current Document. |
| 62 | # | 62 | # |
| 63 | # count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } | 63 | # count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } |
| 64 | def count_words | 64 | def count_words |
| 65 | counts = Hash.new { |h,k| h[k] = 0 } | 65 | counts = Hash.new { |h,k| h[k] = 0 } |
| 66 | @words.each { |w| counts[w.downcase] += 1 } | 66 | @words.each { |w| counts[w.downcase] += 1 } |
| 67 | 67 | ||
| 68 | counts | 68 | counts |
| 69 | end | 69 | end |
| 70 | 70 | ||
| 71 | # Computes the entropy of a given string +s+ inside the document. | 71 | # Computes the entropy of a given string +s+ inside the document. |
| 72 | # | 72 | # |
| 73 | # If the string parameter is composed of many words (i.e. tokens separated | 73 | # If the string parameter is composed of many words (i.e. tokens separated |
| 74 | # by whitespace(s)), it is considered as an ngram. | 74 | # by whitespace(s)), it is considered as an ngram. |
| 75 | # | 75 | # |
| 76 | # entropy("guitar") #=> 0.00389919463243839 | 76 | # entropy("guitar") #=> 0.00389919463243839 |
| 77 | def entropy(s) | 77 | def entropy(s) |
| 78 | en = 0.0 | 78 | en = 0.0 |
| 79 | counts = self.count_words | 79 | counts = self.count_words |
| 80 | 80 | ||
| 81 | s.split.each do |w| | 81 | s.split.each do |w| |
| 82 | p_wi = counts[w].to_f/@words.count.to_f | 82 | p_wi = counts[w].to_f/@words.count.to_f |
| 83 | en += p_wi*Math.log2(p_wi) | 83 | en += p_wi*Math.log2(p_wi) |
| 84 | end | 84 | end |
| 85 | 85 | ||
| 86 | en *= -1 | 86 | en *= -1 |
| 87 | en | 87 | en |
| 88 | end | 88 | end |
| 89 | 89 | ||
| 90 | 90 | ||
| 91 | 91 | ||
| 92 | def initialize(content) | 92 | def initialize(content) |
| 93 | @doc_content = content | 93 | @doc_content = content |
| 94 | @words = format_words | 94 | @words = format_words |
| 95 | end | 95 | end |
| 96 | 96 | ||
| 97 | protected :format_words | 97 | protected :format_words |
| 98 | end | 98 | end |
| 99 | 99 | ||
| 100 | # A WebDocument is a Document with a +url+. | 100 | # A WebDocument is a Document with a +url+. |
| 101 | class WebDocument < Document | 101 | class WebDocument < Document |
| 102 | attr_reader :url | 102 | attr_reader :url |
| 103 | 103 | ||
| 104 | # Returns the HTML text from the page of a given +url+. | 104 | # Returns the HTML text from the page of a given +url+. |
| 105 | def self.get_content(url) | 105 | def self.get_content(url) |
| 106 | require 'net/http' | 106 | require 'net/http' |
| 107 | Net::HTTP.get(URI.parse(url)) | 107 | Net::HTTP.get(URI.parse(url)) |
| 108 | end | 108 | end |
| 109 | 109 | ||
| 110 | # WebDocument constructor, the content of the Document is the HTML page | 110 | # WebDocument constructor, the content of the Document is the HTML page |
| 111 | # without the tags. | 111 | # without the tags. |
| 112 | def initialize(url) | 112 | def initialize(url) |
| 113 | @url = url | 113 | @url = url |
| 114 | super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags | 114 | super WebDocument.get_content(url).strip_javascripts.strip_stylesheets.strip_xml_tags |
| 115 | end | 115 | end |
| 116 | end | 116 | end |
| 117 | 117 | ||
| 118 | # A WikipediaPage is a WebDocument. | 118 | # A WikipediaPage is a WebDocument. |
| 119 | class WikipediaPage < WebDocument | 119 | class WikipediaPage < WebDocument |
| 120 | end | 120 | end |
| 121 | end | 121 | end |
| 122 | 122 |
lib/rir/string.rb
| 1 | #!/usr/bin/env ruby | 1 | #!/usr/bin/env ruby |
| 2 | 2 | ||
| 3 | # This file is a part of an Information Retrieval oriented Ruby library | 3 | # This file is a part of an Information Retrieval oriented Ruby library |
| 4 | # | 4 | # |
| 5 | # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> | 5 | # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com> |
| 6 | # | 6 | # |
| 7 | # This program is free software: you can redistribute it and/or modify | 7 | # This program is free software: you can redistribute it and/or modify |
| 8 | # it under the terms of the GNU General Public License as published by | 8 | # it under the terms of the GNU General Public License as published by |
| 9 | # the Free Software Foundation, either version 3 of the License, or | 9 | # the Free Software Foundation, either version 3 of the License, or |
| 10 | # (at your option) any later version. | 10 | # (at your option) any later version. |
| 11 | # | 11 | # |
| 12 | # This program is distributed in the hope that it will be useful, | 12 | # This program is distributed in the hope that it will be useful, |
| 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | # GNU General Public License for more details. | 15 | # GNU General Public License for more details. |
| 16 | # | 16 | # |
| 17 | # You should have received a copy of the GNU General Public License | 17 | # You should have received a copy of the GNU General Public License |
| 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 19 | 19 | ||
| 20 | # General module for many purposes related to Information Retrieval. | 20 | # General module for many purposes related to Information Retrieval. |
| 21 | module Rir | 21 | module RIR |
| 22 | 22 | ||
| 23 | # These are the default stopwords provided by Lemur. | 23 | # These are the default stopwords provided by Lemur. |
| 24 | Stoplist = [ | 24 | Stoplist = [ |
| 25 | "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", | 25 | "a", "anything", "anyway", "anywhere", "apart", "are", "around", "as", "at", "av", |
| 26 | "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", | 26 | "be", "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", |
| 27 | "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", | 27 | "behind", "being", "below", "beside", "besides", "between", "beyond", "both", "but", "by", |
| 28 | "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", | 28 | "can", "cannot", "canst", "certain", "cf", "choose", "contrariwise", "cos", "could", "cu", |
| 29 | "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", | 29 | "day", "do", "does", "doesn't", "doing", "dost", "doth", "double", "down", "dual", "during", |
| 30 | "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", | 30 | "each", "either", "else", "elsewhere", "enough", "et", "etc", "even", "ever", "every", |
| 31 | "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", | 31 | "everybody", "everyone", "everything", "everywhere", "except", "excepted", "excepting", |
| 32 | "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", | 32 | "exception", "exclude", "excluding", "exclusive", "far", "farther", "farthest", "few", "ff", |
| 33 | "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", | 33 | "first", "for", "formerly", "forth", "forward", "from", "front", "further", "furthermore", |
| 34 | "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", | 34 | "furthest", "get", "go", "had", "halves", "hardly", "has", "hast", "hath", "have", "he", |
| 35 | "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", | 35 | "hence", "henceforth", "her", "here", "hereabouts", "hereafter", "hereby", "herein", "hereto", |
| 36 | "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", | 36 | "hereupon", "hers", "herself", "him", "himself", "hindmost", "his", "hither", "hitherto", |
| 37 | "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", | 37 | "how", "however", "howsoever", "i", "ie", "if", "in", "inasmuch", "inc", "include", |
| 38 | "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", | 38 | "included", "including", "indeed", "indoors", "inside", "insomuch", "instead", "into", |
| 39 | "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", | 39 | "inward", "inwards", "is", "it", "its", "itself", "just", "kind", "kg", "km", "last", |
| 40 | "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", | 40 | "latter", "latterly", "less", "lest", "let", "like", "little", "ltd", "many", "may", "maybe", |
| 41 | "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", | 41 | "me", "meantime", "meanwhile", "might", "moreover", "most", "mostly", "more", "mr", "mrs", |
| 42 | "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", | 42 | "ms", "much", "must", "my", "myself", "namely", "need", "neither", "never", "nevertheless", |
| 43 | "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", | 43 | "next", "no", "nobody", "none", "nonetheless", "noone", "nope", "nor", "not", "nothing", |
| 44 | "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", | 44 | "notwithstanding", "now", "nowadays", "nowhere", "of", "off", "often", "ok", "on", "once", |
| 45 | "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", | 45 | "one", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", |
| 46 | "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", | 46 | "ourselves", "out", "outside", "over", "own", "per", "perhaps", "plenty", "provide", "quite", |
| 47 | "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", | 47 | "rather", "really", "round", "said", "sake", "same", "sang", "save", "saw", "see", "seeing", |
| 48 | "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", | 48 | "seem", "seemed", "seeming", "seems", "seen", "seldom", "selves", "sent", "several", "shalt", |
| 49 | "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", | 49 | "she", "should", "shown", "sideways", "since", "slept", "slew", "slung", "slunk", "smote", |
| 50 | "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", | 50 | "so", "some", "somebody", "somehow", "someone", "something", "sometime", "sometimes", |
| 51 | "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", | 51 | "somewhat", "somewhere", "spake", "spat", "spoke", "spoken", "sprang", "sprung", "stave", |
| 52 | "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", | 52 | "staves", "still", "such", "supposing", "than", "that", "the", "thee", "their", "them", |
| 53 | "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", | 53 | "themselves", "then", "thence", "thenceforth", "there", "thereabout", "thereabouts", |
| 54 | "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", | 54 | "thereafter", "thereby", "therefore", "therein", "thereof", "thereon", "thereto", "thereupon", |
| 55 | "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", | 55 | "these", "they", "this", "those", "thou", "though", "thrice", "through", "throughout", "thru", |
| 56 | "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", | 56 | "thus", "thy", "thyself", "till", "to", "together", "too", "toward", "towards", "ugh", |
| 57 | "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", | 57 | "unable", "under", "underneath", "unless", "unlike", "until", "up", "upon", "upward", |
| 58 | "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", | 58 | "upwards", "us", "use", "used", "using", "very", "via", "vs", "want", "was", "we", "week", |
| 59 | "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", | 59 | "well", "were", "what", "whatever", "whatsoever", "when", "whence", "whenever", "whensoever", |
| 60 | "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", | 60 | "where", "whereabouts", "whereafter", "whereas", "whereat", "whereby", "wherefore", |
| 61 | "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", | 61 | "wherefrom", "wherein", "whereinto", "whereof", "whereon", "wheresoever", "whereto", |
| 62 | "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", | 62 | "whereunto", "whereupon", "wherever", "wherewith", "whether", "whew", "which", "whichever", |
| 63 | "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", | 63 | "whichsoever", "while", "whilst", "whither", "who", "whoa", "whoever", "whole", "whom", |
| 64 | "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", | 64 | "whomever", "whomsoever", "whose", "whosoever", "why", "will", "wilt", "with", "within", |
| 65 | "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", | 65 | "without", "worse", "worst", "would", "wow", "ye", "yet", "year", "yippee", "you", "your", |
| 66 | "yours", "yourself", "yourselves" | 66 | "yours", "yourself", "yourselves" |
| 67 | ] | 67 | ] |
| 68 | 68 | ||
| 69 | 69 | ||
| 70 | end | 70 | end |
| 71 | 71 | ||
| 72 | # Extention of the standard class String with useful function. | 72 | # Extention of the standard class String with useful function. |
| 73 | class String | 73 | class String |
| 74 | include Rir | 74 | include RIR |
| 75 | 75 | ||
| 76 | # Returns +true+ if +self+ belongs to Rir::Stoplist, +false+ otherwise. | 76 | # Returns +true+ if +self+ belongs to Rir::Stoplist, +false+ otherwise. |
| 77 | def is_stopword? | 77 | def is_stopword? |
| 78 | Stoplist.include?(self.downcase) | 78 | Stoplist.include?(self.downcase) |
| 79 | end | 79 | end |
| 80 | 80 | ||
| 81 | # Do not use. | 81 | # Do not use. |
| 82 | # TODO: rewamp. find why this function is here. | 82 | # TODO: rewamp. find why this function is here. |
| 83 | def remove_special_characters | 83 | def remove_special_characters |
| 84 | self.split.collect { |w| w.gsub(/\W/,' ').split.collect { |w| w.gsub(/\W/,' ').strip.sub(/\A.\z/, '')}.join(' ').strip.sub(/\A.\z/, '')}.join(' ') | 84 | self.split.collect { |w| w.gsub(/\W/,' ').split.collect { |w| w.gsub(/\W/,' ').strip.sub(/\A.\z/, '')}.join(' ').strip.sub(/\A.\z/, '')}.join(' ') |
| 85 | end | 85 | end |
| 86 | 86 | ||
| 87 | # Removes all XML-like tags from +self+. | 87 | # Removes all XML-like tags from +self+. |
| 88 | # | 88 | # |
| 89 | # s = "<html><body>test</body></html>" | 89 | # s = "<html><body>test</body></html>" |
| 90 | # s.strip_xml_tags! | 90 | # s.strip_xml_tags! |
| 91 | # s #=> "test" | 91 | # s #=> "test" |
| 92 | def strip_xml_tags! | 92 | def strip_xml_tags! |
| 93 | replace strip_with_pattern /<\/?[^>]*>/ | 93 | replace strip_with_pattern /<\/?[^>]*>/ |
| 94 | end | 94 | end |
| 95 | 95 | ||
| 96 | # Removes all XML-like tags from +self+. | 96 | # Removes all XML-like tags from +self+. |
| 97 | # | 97 | # |
| 98 | # s = "<html><body>test</body></html>" | 98 | # s = "<html><body>test</body></html>" |
| 99 | # s.strip_xml_tags #=> "test" | 99 | # s.strip_xml_tags #=> "test" |
| 100 | # s #=> "<html><body>test</body></html>" | 100 | # s #=> "<html><body>test</body></html>" |
| 101 | def strip_xml_tags | 101 | def strip_xml_tags |
| 102 | dup.strip_xml_tags! | 102 | dup.strip_xml_tags! |
| 103 | end | 103 | end |
| 104 | 104 | ||
| 105 | # Removes all Javascript sources from +self+. | 105 | # Removes all Javascript sources from +self+. |
| 106 | # | 106 | # |
| 107 | # s = "<script type='text/javascript'> | 107 | # s = "<script type='text/javascript'> |
| 108 | # var skin='vector', | 108 | # var skin='vector', |
| 109 | # stylepath='http://bits.wikimedia.org/skins-1.5' | 109 | # stylepath='http://bits.wikimedia.org/skins-1.5' |
| 110 | # </script> | 110 | # </script> |
| 111 | # | 111 | # |
| 112 | # test" | 112 | # test" |
| 113 | # s.strip_javascripts! | 113 | # s.strip_javascripts! |
| 114 | # s #=> "test" | 114 | # s #=> "test" |
| 115 | def strip_javascripts! | 115 | def strip_javascripts! |
| 116 | replace strip_with_pattern /<script type="text\/javascript">(.+?)<\/script>/m | 116 | replace strip_with_pattern /<script type="text\/javascript">(.+?)<\/script>/m |
| 117 | end | 117 | end |
| 118 | 118 | ||
| 119 | # Removes all Javascript sources from +self+. | 119 | # Removes all Javascript sources from +self+. |
| 120 | # | 120 | # |
| 121 | # s = "<script type='text/javascript'> | 121 | # s = "<script type='text/javascript'> |
| 122 | # var skin='vector', | 122 | # var skin='vector', |
| 123 | # stylepath='http://bits.wikimedia.org/skins-1.5' | 123 | # stylepath='http://bits.wikimedia.org/skins-1.5' |
| 124 | # </script> | 124 | # </script> |
| 125 | # | 125 | # |
| 126 | # test" | 126 | # test" |
| 127 | # s.strip_javascripts #=> "test" | 127 | # s.strip_javascripts #=> "test" |
| 128 | def strip_javascripts | 128 | def strip_javascripts |
| 129 | dup.strip_javascripts! | 129 | dup.strip_javascripts! |
| 130 | end | 130 | end |
| 131 | 131 | ||
| 132 | def strip_stylesheets! | 132 | def strip_stylesheets! |
| 133 | # TODO: rewamp. dunno what is it. | 133 | # TODO: rewamp. dunno what is it. |
| 134 | replace strip_with_pattern /<style type="text\/css">(.+?)<\/style>/m | 134 | replace strip_with_pattern /<style type="text\/css">(.+?)<\/style>/m |
| 135 | end | 135 | end |
| 136 | 136 | ||
| 137 | def strip_stylesheets | 137 | def strip_stylesheets |
| 138 | dup.strip_stylesheets! | 138 | dup.strip_stylesheets! |
| 139 | end | 139 | end |
| 140 | 140 | ||
| 141 | # Returns the text values inside all occurences of a XML tag in +self+ | 141 | # Returns the text values inside all occurences of a XML tag in +self+ |
| 142 | # | 142 | # |
| 143 | # s = "four-piece in <a href='#'>Indianapolis</a>, <a href='#'>Indiana</a> at the Murat Theatre" | 143 | # s = "four-piece in <a href='#'>Indianapolis</a>, <a href='#'>Indiana</a> at the Murat Theatre" |
| 144 | # s.extract_xmltags_values 'a' #=> ["Indianapolis", "Indiana"] | 144 | # s.extract_xmltags_values 'a' #=> ["Indianapolis", "Indiana"] |
| 145 | def extract_xmltags_values(tag_name) | 145 | def extract_xmltags_values(tag_name) |
| 146 | self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten | 146 | self.scan(/<#{tag_name}.*?>(.+?)<\/#{tag_name}>/).flatten |
| 147 | end | 147 | end |
| 148 | 148 | ||
| 149 | private | ||
| 150 | def strip_with_pattern(pattern) | 149 | def strip_with_pattern(pattern) |
| 151 | require 'cgi' | 150 | require 'cgi' |
| 152 | require 'kconv' | 151 | require 'kconv' |
| 153 | CGI::unescapeHTML(self.gsub(pattern,"")).toutf8 | 152 | CGI::unescapeHTML(self.gsub(pattern,"")).toutf8 |
| 154 | end | 153 | end |
| 154 | |||
| 155 | private :strip_with_pattern | ||
| 155 | end | 156 | end |
main.rb
| 1 | $LOAD_PATH.unshift File.expand_path(File.join(File.dirname(__FILE__), "lib")) | 1 | $LOAD_PATH.unshift File.expand_path(File.join(File.dirname(__FILE__), "lib")) |
| 2 | 2 | ||
| 3 | require 'rir' | 3 | require 'rir' |
| 4 | |||
| 5 | w = RIR::WikipediaPage.new("http://en.wikipedia.org/wiki/The_Dillinger_Escape_Plan") | ||
| 6 | p w.entropy("guitar") | ||
| 7 | |||
| 8 | params = RIR::Indri::Parameters.new("path_vers_mon_index") | ||
| 9 | p params.rule | ||
| 10 | q = RIR::Indri::IndriQuery.new("pouet", "bla", params) | ||
| 11 | puts q | ||
| 4 | 12 |