Commit 3e81fa06a9b8fbedc6ca161cb26b8a1884c93d36
1 parent
145387519e
Exists in
master
an entropy computation example. words in an RIR::Document are now lowercased.
Showing 3 changed files with 12 additions and 9 deletions Side-by-side Diff
examples/entropy.rb
1 | +require 'rir' | |
2 | + | |
3 | +# Concatenates all lines from one file, without \n | |
4 | +readme = File.open('README.markdown').readlines.collect { |l| l.chomp }.join(" ") | |
5 | + | |
6 | +# Creates the document with a string | |
7 | +doc = RIR::Document.new readme | |
8 | + | |
9 | +# Outputs all the unique words of the document with their entropy scores | |
10 | +p doc.words.collect { |w| "#{w} => #{doc.entropy w}" } |
lib/rir/document.rb
... | ... | @@ -33,7 +33,7 @@ |
33 | 33 | |
34 | 34 | @doc_content.split.each do |w| |
35 | 35 | w.split(/\W/).each do |sw| |
36 | - wo.push(sw) if sw =~ /[a-zA-Z]/ | |
36 | + wo.push(sw.downcase) if sw =~ /[a-zA-Z]/ | |
37 | 37 | end |
38 | 38 | end |
39 | 39 | |
... | ... | @@ -63,7 +63,7 @@ |
63 | 63 | # count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... } |
64 | 64 | def count_words |
65 | 65 | counts = Hash.new { |h,k| h[k] = 0 } |
66 | - @words.each { |w| counts[w.downcase] += 1 } | |
66 | + @words.each { |w| counts[w] += 1 } | |
67 | 67 | |
68 | 68 | counts |
69 | 69 | end |
main.rb
... | ... | @@ -4,11 +4,4 @@ |
4 | 4 | |
5 | 5 | w = RIR::WikipediaPage.new("http://en.wikipedia.org/wiki/The_Dillinger_Escape_Plan") |
6 | 6 | p w.entropy("guitar") |
7 | - | |
8 | -params = RIR::Indri::Parameters.new("path_vers_mon_index") | |
9 | -q = RIR::Indri::IndriQuery.new("pouet", "bla", params) | |
10 | -puts q | |
11 | - | |
12 | -c = RIR::Corpus.new "/home/romain/INEX/BookTrack/corpus/" | |
13 | -puts c.files.size |