diff --git a/README.markdown b/README.markdown
index 8aa5416..5a601e4 100644
--- a/README.markdown
+++ b/README.markdown
@@ -1,6 +1,12 @@
 # mirimiri
 
-Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
+The various tools of this project were developed for research purposes during 
+my Ph.D. and heavily rely on the use of Indri (<http://lemurproject.org/indri.php>).
+Setting up Ruby is not as painful as it used to be since RVM (<https://rvm.io/>), 
+visit at least these two websites before trying to use `mirimiri`.
+
+
+Copyright (C) 2010-2013 Romain Deveaud <romain.deveaud@gmail.com>
 
 > The Fijian monkey-faced bat (Mirimiri acrodonta), also called the Fiji 
 > Flying Fox, is an Old World fruit bat endemic to Fiji. It was discovered 
diff --git a/lib/mirimiri/document.rb b/lib/mirimiri/document.rb
index 8a7aa31..b738e0c 100644
--- a/lib/mirimiri/document.rb
+++ b/lib/mirimiri/document.rb
@@ -25,7 +25,7 @@ module Mirimiri
 
   # A Document is a bag of words and is constructed from a string.
   class Document
-    attr_reader :words, :doc_content, :count_words
+    attr_reader :words, :doc_content, :xcount
 
     # Any non-word characters are removed from the words (see http://perldoc.perl.org/perlre.html
     # and the \\W special escape).
@@ -36,7 +36,7 @@ module Mirimiri
 
       @doc_content.split.each do |w|
         w.split(/\W/).each do |sw| 
-          wo.push(sw.downcase) if sw =~ /[a-zA-Z]/ 
+          wo.push(sw.downcase) if sw =~ /[[:alpha:]]/ 
         end
       end
       
@@ -80,7 +80,7 @@ module Mirimiri
       en = 0.0
 
       s.split.each do |w|
-        p_wi = @count_words[w].to_f/@words.count.to_f
+        p_wi = @xcount[w].to_f/@words.count.to_f
         en += p_wi*Math.log2(p_wi)
       end
 
@@ -101,7 +101,7 @@ module Mirimiri
       size = s.split.size
       
       if size == 1
-        p_wi = @count_words[s].to_f/@words.count.to_f
+        p_wi = @xcount[s].to_f/@words.count.to_f
         en += p_wi*Math.log(p_wi)
       elsif size > 1
         ng_size = ngrams(size)
@@ -117,14 +117,28 @@ module Mirimiri
     #
     #   tf("guitar") #=> 0.000380372765310004
     def tf(s)
-      @count_words[s].to_f/@words.size.to_f
+      @xcount[s].to_f/@words.size.to_f
     end
 
+    # Computes the KL divergence between the language model of the +self+
+    # and the language model of +doc+. 
+    #
+    # KL is not symmetric, see http://en.wikipedia.org/wiki/Kullback-Leibler_divergence
+    # for more information.
+    #
+    #   d1.kl(d2) #=> 0.2971808085725761
+    def kl(doc)
+      raise ArgumentError, 'Argument is not a Mirimiri::Document' unless doc.is_a? Mirimiri::Document 
+     
+      vocab = self.words & doc.words
+
+      vocab.inject(0.0) { |res,w| res + self.tf(w)*Math.log(self.tf(w)/doc.tf(w)) }
+    end
 
     def initialize(content="")
       @doc_content = content
       @words = format_words
-      @count_words = count_words
+      @xcount = count_words
       @ngrams = {}
     end
 
@@ -149,7 +163,7 @@ module Mirimiri
 
       @url = url
       content = only_tags.nil? ? WebDocument.get_content(url) : WebDocument.get_content(url).extract_xmltags_values(only_tags).join("")
-      super Sanitize.clean(content.unaccent.toutf8.force_encoding("UTF-8"), :remove_contents => ['script'])
+      super Sanitize.clean(content, :remove_contents => ['script','style'])
     end
   end
 
@@ -161,9 +175,9 @@ module Mirimiri
 
 
     def self.search_wikipedia_titles(name)
-      raise ArgumentError, "Bad encoding", name unless name.isutf8
+#      raise ArgumentError, "Bad encoding", name unless name.isutf8
 
-      res = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&format=xml" ).unaccent.toutf8).elements['api/query/search']
+      res = REXML::Document.new(Net::HTTP.get( URI.parse "http://en.wikipedia.org/w/api.php?action=query&list=search&srsearch=#{URI.escape name}&srlimit=20&format=xml" ).force_encoding("ISO-8859-1").encode("UTF-8")).elements['api/query/search']
 
      res.collect { |e| e.attributes['title'] } unless res.nil?
     end
diff --git a/lib/mirimiri/index.rb b/lib/mirimiri/index.rb
index 0a89694..1caeb09 100644
--- a/lib/mirimiri/index.rb
+++ b/lib/mirimiri/index.rb
@@ -32,7 +32,7 @@ module Indri
     end
 
     def runquery indriquery
-      raise ArgumentError, 'Argument is not an IndriQuery' unless indriquery.is_a? Indri::IndriQuery
+      raise ArgumentError, 'Argument is not an Indri::IndriQuery' unless indriquery.is_a? Indri::IndriQuery
   
       query = "IndriRunQuery -query=\"#{indriquery.query}\" -index=#{@path}"
 
diff --git a/lib/mirimiri/query.rb b/lib/mirimiri/query.rb
index 4f038b0..7f9f02d 100644
--- a/lib/mirimiri/query.rb
+++ b/lib/mirimiri/query.rb
@@ -20,6 +20,9 @@
 #++
 
 class Query
+  attr_accessor :query
+
+
 end
 
 module Indri
@@ -27,7 +30,7 @@ module Indri
   class Parameters
     attr_accessor :index_path, :memory, :count, :offset, :run_id, :print_query, :print_docs, :rule, :baseline
 
-    def initialize(corpus,count="1000",mem="1g",threads="1",offset="1",run_id="default",print_query=false,print_docs=false)
+    def initialize(corpus,count="1000",mem="1g",threads="1",offset="1",run_id="default",print_passages=false,print_query=false,print_docs=false)
       @index_path  = corpus
       @memory      = mem
       @count       = count
@@ -36,11 +39,15 @@ module Indri
       @run_id      = run_id
       @print_query = print_query ? "true" : "false"
       @print_docs  = print_docs  ? "true" : "false"
+      @print_passages  = print_passages  ? "true" : "false"
+      @indexes     = [corpus]
     end
 
     def to_s
       h = "<memory>#{@memory}</memory>\n"
-      h += "<index>#{@index_path}</index>\n"
+      @indexes.each do |i|
+        h += "<index>#{i}</index>\n"
+      end
       h += "<count>#{@count}</count>\n"
       h += "<threads>#{@threads}</threads>\n"
       unless @baseline.nil?
@@ -51,11 +58,16 @@ module Indri
       h += "<trecFormat>true</trecFormat>\n"
       h += "<queryOffset>#{@offset}</queryOffset>\n"
       h += "<runID>#{@run_id}</runID>\n"
+      h += "<printPassages>#{@print_passages}</printPassages>\n"
       h += "<printQuery>#{@print_query}</printQuery>\n"
       h += "<printDocuments>#{@print_docs}</printDocuments>\n"
 
       h
     end
+
+    def add_index path
+      @indexes << path
+    end
   end
 
   class IndriQueryOld < Query
@@ -92,6 +104,10 @@ module Indri
       raise ArgumentError, 'Argument 2 must be a String' unless (args.is_a?(String) || args.nil?)
       @args = args 
     end
+
+    def clarity index_path,terms=10,documents=5
+      `clarity -index=#{index_path} -documents=#{documents} -terms=#{terms} -smoothing=\"method:#{@sm_method},#{@sm_param}:#{@sm_value}\" -query=\"#{query}\"`.split("=").last.strip
+    end
   end
 
   class IndriQueries
diff --git a/lib/mirimiri/result.rb b/lib/mirimiri/result.rb
new file mode 100644
index 0000000..71d0776
--- /dev/null
+++ b/lib/mirimiri/result.rb
@@ -0,0 +1,62 @@
+#!/usr/bin/env ruby
+
+#--
+# This file is a part of the mirimiri library
+#
+# Copyright (C) 2010-2012 Romain Deveaud <romain.deveaud@gmail.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#++
+
+module Mirimiri
+
+  # This class represents one line of a TREC-formatted retrieval
+  # result. Typical output of Indri or Terrier.
+  class TrecResult
+    attr_accessor :topic, :doc, :rank, :score, :run
+
+    def initialize arg
+      t = arg.split 
+      @topic = t[0]
+      @doc   = t[2]
+      @rank  = t[3]
+      @score = t[4]
+      @run   = t[5]
+    end
+  end
+
+  # This class represents the output of trec_eval, when
+  # -q option is given.
+  class TrecEval
+    attr_accessor :metric, :run, :queries
+
+    def initialize arg
+      @queries = {}
+
+      arg.each_line do |line|
+        t = line.split
+        @metric = t[0] if @metric.nil?
+        @queries[t[1]] = t[2].to_f if t[1].is_integer?
+      end
+    end
+  end
+
+  # An array of TrecResult, or a run.
+  class TrecResults < Array
+
+    def initialize args
+      super args.collect { |res| TrecResult.new res }
+    end
+  end
+end
diff --git a/lib/mirimiri/string.rb b/lib/mirimiri/string.rb
index b94fa28..212823d 100644
--- a/lib/mirimiri/string.rb
+++ b/lib/mirimiri/string.rb
@@ -67,7 +67,8 @@ module Mirimiri
 "whoever","whole","whom","whomever","whomsoever","whose","whosoever","why","will",
 "wilt","with","within","without","worse","worst","would","wow","ye","yet","year",
 "yippee","you","your","yours","yourself","yourselves",
-  "edit", "new", "page", "article", "http", "www", "com", "org", "wikipedia", "en","html"
+  "edit", "new", "page", "article", "http", "www", "com", "org", "wikipedia", "en","html",
+  "amp","nbsp","quot"
   ]
 
   Transmap = { 
@@ -158,6 +159,7 @@ class String
 
   def unaccent
     # force_encoding is needed with ruby1.9
+#    Transmap.inject(self) { |str, (utf8, asc)| str.gsub(utf8, asc) }
     Transmap.inject(self.force_encoding("ASCII-8BIT")) { |str, (utf8, asc)| str.gsub(utf8, asc) }
   end
 
@@ -166,7 +168,15 @@ class String
     self.split.all? { |e| Stoplist.include?(e.downcase) }
   end
 
-  def sequential_dependence_model t=0.85,o=0.10,u=0.05,field=nil
+  def is_integer?
+    !self.empty? && self =~ /\A\d+\Z/ 
+  end
+
+  def numeric?
+    Float(self) != nil rescue false
+  end
+
+  def sequential_dependence_model field=nil,t=0.85,o=0.10,u=0.05
     d = Mirimiri::Document.new self
 
     if field.nil?
@@ -288,7 +298,13 @@ module Indri
   class IndriPrintedDocuments < String
 
     def extract_docs
-      self.split(/\d+ Q0 .+ \d+ -\d+.\d+ .+/).delete_if{ |x| x.empty? }  
+      self.split(/\d+ Q0 .+ \d+ -\d+.\d+ .+/).delete_if{ |x| x.empty? } 
+    end
+
+    def extract_docs_score
+      score = self.scan(/\d+ Q0 .+ \d+ (-\d+.\d+) .+/).flatten
+      name  = self.scan(/\d+ Q0 (.+) \d+ -\d+.\d+ .+/).collect { |n| n.first.scan(/(\d+).xml/).first }
+      return self.split(/\d+ Q0 .+ \d+ -\d+.\d+ .+/).delete_if{ |x| x.empty? },score,name 
     end
   end
 end
diff --git a/main.rb b/main.rb
index 170caa3..114056a 100644
--- a/main.rb
+++ b/main.rb
@@ -3,10 +3,26 @@ $LOAD_PATH.unshift File.expand_path(File.join(File.dirname(__FILE__), "lib"))
 require 'mirimiri'
 require "benchmark"
 
+# Fetch the text content of two Wikipedia pages using their URLs
 w = Mirimiri::WikipediaPage.new("http://en.wikipedia.org/wiki/The_Dillinger_Escape_Plan")
+u = Mirimiri::WikipediaPage.new("http://en.wikipedia.org/wiki/Pantera")
+
+# Compute the entropy of a word sequence, using `w` as context
 p w.entropy("dillinger escape plan")
 p w.tf("guitar")
 
+# Compute the KL-Divergence between the two pages
+p w.kl u
+
+
+# Mirimiri also comprises Indri-related classes
+
+# Building an Indri query
 query = Indri::IndriQuery.new({:query => "dillinger escape plan".sequential_dependence_model, :count => 10}, "-trecFormat=true -printDocuments=true")
+
+# Initializing the index on which the query will be executed
+# Must have been previously built using `IndriBuildIndex`
 index = Indri::IndriIndex.new "/mnt/disk1/ClueWeb09_English_1noSpam"
+
+# Run the query on the index and fetch the text of the documents
 s = Indri::IndriPrintedDocuments.new(index.runquery(query).force_encoding("ISO-8859-1").encode("UTF-8"))