Blame view

lib/mirimiri/query.rb 4.13 KB
cd7432252   Romain Deveaud   adding missing files
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
  #!/usr/bin/env ruby
  
  #--
  # This file is a part of the mirimiri library
  #
  # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
  #
  # This program is free software: you can redistribute it and/or modify
  # it under the terms of the GNU General Public License as published by
  # the Free Software Foundation, either version 3 of the License, or
  # (at your option) any later version.
  #
  # This program is distributed in the hope that it will be useful,
  # but WITHOUT ANY WARRANTY; without even the implied warranty of
  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  # GNU General Public License for more details.
  #
  # You should have received a copy of the GNU General Public License
  # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  #++
  
  class Query
b0ffa2ad4   Romain Deveaud   finally committin...
23
    attr_accessor :query
cd7432252   Romain Deveaud   adding missing files
24
25
26
27
28
29
  end
  
  module Indri
  
    class Parameters
      attr_accessor :index_path, :memory, :count, :offset, :run_id, :print_query, :print_docs, :rule, :baseline
b0ffa2ad4   Romain Deveaud   finally committin...
30
      def initialize(corpus,count="1000",mem="1g",threads="1",offset="1",run_id="default",print_passages=false,print_query=false,print_docs=false)
cd7432252   Romain Deveaud   adding missing files
31
32
33
        @index_path  = corpus
        @memory      = mem
        @count       = count
aa386f553   Romain Deveaud   changes in query,...
34
        @threads     = threads
cd7432252   Romain Deveaud   adding missing files
35
36
37
38
        @offset      = offset
        @run_id      = run_id
        @print_query = print_query ? "true" : "false"
        @print_docs  = print_docs  ? "true" : "false"
b0ffa2ad4   Romain Deveaud   finally committin...
39
40
        @print_passages  = print_passages  ? "true" : "false"
        @indexes     = [corpus]
cd7432252   Romain Deveaud   adding missing files
41
42
43
      end
  
      def to_s
845768f8a   Romain Deveaud   creating a group ...
44
45
        h = "<memory>#{@memory}</memory>
  "
b0ffa2ad4   Romain Deveaud   finally committin...
46
47
48
49
        @indexes.each do |i|
          h += "<index>#{i}</index>
  "
        end
cd7432252   Romain Deveaud   adding missing files
50
51
        h += "<count>#{@count}</count>
  "
aa386f553   Romain Deveaud   changes in query,...
52
53
        h += "<threads>#{@threads}</threads>
  "
cd7432252   Romain Deveaud   adding missing files
54
55
56
57
58
59
60
        unless @baseline.nil?
          h += "<baseline>#{@baseline}</baseline>
  " 
        else
          h += "<rule>#{@rule}</rule>
  "
        end
845768f8a   Romain Deveaud   creating a group ...
61
62
        h += "<trecFormat>true</trecFormat>
  "
cd7432252   Romain Deveaud   adding missing files
63
64
65
66
        h += "<queryOffset>#{@offset}</queryOffset>
  "
        h += "<runID>#{@run_id}</runID>
  "
b0ffa2ad4   Romain Deveaud   finally committin...
67
68
        h += "<printPassages>#{@print_passages}</printPassages>
  "
cd7432252   Romain Deveaud   adding missing files
69
70
71
72
73
74
75
        h += "<printQuery>#{@print_query}</printQuery>
  "
        h += "<printDocuments>#{@print_docs}</printDocuments>
  "
  
        h
      end
b0ffa2ad4   Romain Deveaud   finally committin...
76
77
78
79
  
      def add_index path
        @indexes << path
      end
cd7432252   Romain Deveaud   adding missing files
80
    end
cd7432252   Romain Deveaud   adding missing files
81

aa386f553   Romain Deveaud   changes in query,...
82
    class IndriQueryOld < Query
845768f8a   Romain Deveaud   creating a group ...
83
      attr_accessor :id, :query, :rule
cd7432252   Romain Deveaud   adding missing files
84

845768f8a   Romain Deveaud   creating a group ...
85
      def initialize(id,query)
cd7432252   Romain Deveaud   adding missing files
86
87
88
89
90
        @id     = id
        @query  = query
      end
  
      def to_s
845768f8a   Romain Deveaud   creating a group ...
91
92
        h = "<query>
  "
cd7432252   Romain Deveaud   adding missing files
93
94
95
96
97
98
        h += "<number>#{@id}</number>
  "
        h += "<text>#{@query}</text>
  "
        h += "</query>
  "
845768f8a   Romain Deveaud   creating a group ...
99
100
101
  
        h
      end
ca96fb31f   romain   exec method for I...
102
103
104
105
  
      def exec params
        `IndriRunQuery -query='#{@query}' -index=#{params.index_path} -count=#{params.count} -rule=method:dirichlet,mu:2500 -trecFormat`
      end
845768f8a   Romain Deveaud   creating a group ...
106
    end
aa386f553   Romain Deveaud   changes in query,...
107
108
109
110
    class IndriQuery < Query
      attr_accessor :query, :count, :sm_method, :sm_param, :sm_value, :args
  
      def initialize atts={},args=nil
e0e33fca0   Romain Deveaud   new way of queryi...
111
        raise ArgumentError, 'Argument 1 must be a Hash' unless atts.is_a? Hash
aa386f553   Romain Deveaud   changes in query,...
112
113
114
        atts.each do |k,v|
          instance_variable_set("@#{k}", v) unless v.nil?
        end
e0e33fca0   Romain Deveaud   new way of queryi...
115
        raise ArgumentError, 'Argument 2 must be a String' unless (args.is_a?(String) || args.nil?)
aa386f553   Romain Deveaud   changes in query,...
116
117
        @args = args 
      end
b0ffa2ad4   Romain Deveaud   finally committin...
118
119
120
121
  
      def clarity index_path,terms=10,documents=5
        `clarity -index=#{index_path} -documents=#{documents} -terms=#{terms} -smoothing=\"method:#{@sm_method},#{@sm_param}:#{@sm_value}\" -query=\"#{query}\"`.split("=").last.strip
      end
aa386f553   Romain Deveaud   changes in query,...
122
    end
845768f8a   Romain Deveaud   creating a group ...
123
124
    class IndriQueries
      attr_accessor :params, :queries
e0e33fca0   Romain Deveaud   new way of queryi...
125
126
      def initialize params
  #      @queries = queries    
845768f8a   Romain Deveaud   creating a group ...
127
128
  
        @params = params
e0e33fca0   Romain Deveaud   new way of queryi...
129
        @queries = {}
845768f8a   Romain Deveaud   creating a group ...
130
131
132
133
134
        # Here we set the default retrieval model as Language Modeling
        # with a Dirichlet smoothing at 2500.
        # TODO: maybe a Rule class...
        @params.rule  = 'method:dirichlet,mu:2500' if @params.rule.nil?
      end
e0e33fca0   Romain Deveaud   new way of queryi...
135
136
137
      def push id,query
        @queries[id.to_i] = query
      end
845768f8a   Romain Deveaud   creating a group ...
138
139
140
141
      def to_s
        h = "<parameters>
  "
        h += @params.to_s
e0e33fca0   Romain Deveaud   new way of queryi...
142
143
144
145
146
147
148
149
150
151
152
        h += @queries.sort { |a,b| a[0] <=> b[0] }.collect do |q|
              "<query>
  " +
              "<number>#{q[0]}</number>
  " +
              "<text>#{q[1]}</text>
  " +
              "</query>
  "
        end.join "" 
  #      h += @queries.collect { |q| q.to_s }.join ""
cd7432252   Romain Deveaud   adding missing files
153
154
155
156
157
158
159
        h += "</parameters>"
  
        h
      end
    end
  
  end