Commit 3e81fa06a9b8fbedc6ca161cb26b8a1884c93d36
1 parent
145387519e
Exists in
master
an entropy computation example. words in an RIR::Document are now lowercased.
Showing 3 changed files with 12 additions and 9 deletions Side-by-side Diff
examples/entropy.rb
| 1 | +require 'rir' | |
| 2 | + | |
| 3 | +# Concatenates all lines from one file, without \n | |
| 4 | +readme = File.open('README.markdown').readlines.collect { |l| l.chomp }.join(" ") | |
| 5 | + | |
| 6 | +# Creates the document with a string | |
| 7 | +doc = RIR::Document.new readme | |
| 8 | + | |
| 9 | +# Outputs all the unique words of the document with their entropy scores | |
| 10 | +p doc.words.collect { |w| "#{w} => #{doc.entropy w}" } |
lib/rir/document.rb
| ... | ... | @@ -33,7 +33,7 @@ |
| 33 | 33 | |
| 34 | 34 | @doc_content.split.each do |w| |
| 35 | 35 | w.split(/\W/).each do |sw| |
| 36 | - wo.push(sw) if sw =~ /[a-zA-Z]/ | |
| 36 | + wo.push(sw.downcase) if sw =~ /[a-zA-Z]/ | |
| 37 | 37 | end |
| 38 | 38 | end |
| 39 | 39 | |
| ... | ... | @@ -63,7 +63,7 @@ |
| 63 | 63 | # count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } |
| 64 | 64 | def count_words |
| 65 | 65 | counts = Hash.new { |h,k| h[k] = 0 } |
| 66 | - @words.each { |w| counts[w.downcase] += 1 } | |
| 66 | + @words.each { |w| counts[w] += 1 } | |
| 67 | 67 | |
| 68 | 68 | counts |
| 69 | 69 | end |
main.rb
| ... | ... | @@ -4,11 +4,4 @@ |
| 4 | 4 | |
| 5 | 5 | w = RIR::WikipediaPage.new("http://en.wikipedia.org/wiki/The_Dillinger_Escape_Plan") |
| 6 | 6 | p w.entropy("guitar") |
| 7 | - | |
| 8 | -params = RIR::Indri::Parameters.new("path_vers_mon_index") | |
| 9 | -q = RIR::Indri::IndriQuery.new("pouet", "bla", params) | |
| 10 | -puts q | |
| 11 | - | |
| 12 | -c = RIR::Corpus.new "/home/romain/INEX/BookTrack/corpus/" | |
| 13 | -puts c.files.size |