Commit 3e81fa06a9b8fbedc6ca161cb26b8a1884c93d36

Authored by Romain Deveaud
1 parent 145387519e
Exists in master

an entropy computation example. words in an RIR::Document are now lowercased.

Showing 3 changed files with 12 additions and 9 deletions Side-by-side Diff

  1 +require 'rir'
  2 +
  3 +# Concatenates all lines from one file, without \n
  4 +readme = File.open('README.markdown').readlines.collect { |l| l.chomp }.join(" ")
  5 +
  6 +# Creates the document with a string
  7 +doc = RIR::Document.new readme
  8 +
  9 +# Outputs all the unique words of the document with their entropy scores
  10 +p doc.words.collect { |w| "#{w} => #{doc.entropy w}" }
... ... @@ -33,7 +33,7 @@
33 33  
34 34 @doc_content.split.each do |w|
35 35 w.split(/\W/).each do |sw|
36   - wo.push(sw) if sw =~ /[a-zA-Z]/
  36 + wo.push(sw.downcase) if sw =~ /[a-zA-Z]/
37 37 end
38 38 end
39 39  
... ... @@ -63,7 +63,7 @@
63 63 # count_words #=> { "guitar"=>1, "bass"=>3, "album"=>20, ... }
64 64 def count_words
65 65 counts = Hash.new { |h,k| h[k] = 0 }
66   - @words.each { |w| counts[w.downcase] += 1 }
  66 + @words.each { |w| counts[w] += 1 }
67 67  
68 68 counts
69 69 end
... ... @@ -4,11 +4,4 @@
4 4  
5 5 w = RIR::WikipediaPage.new("http://en.wikipedia.org/wiki/The_Dillinger_Escape_Plan")
6 6 p w.entropy("guitar")
7   -
8   -params = RIR::Indri::Parameters.new("path_vers_mon_index")
9   -q = RIR::Indri::IndriQuery.new("pouet", "bla", params)
10   -puts q
11   -
12   -c = RIR::Corpus.new "/home/romain/INEX/BookTrack/corpus/"
13   -puts c.files.size