Commit b843bae6b0589e55e85ce55756b97ae3cbd7d6d4

Authored by Romain Deveaud
1 parent 35f45ab54d
Exists in master

new files + tests

Showing 4 changed files with 157 additions and 0 deletions Inline Diff

File was created 1 require 'rake'
2 require 'rake/testtask'
3
4 Rake::TestTask.new(:test) do |test|
5 test.libs << 'lib' << 'lib/rir' << 'test'
6 test.pattern = 'test/**/*_test.rb'
7 test.verbose = true
8 end
9
10 task :default => :test
11
File was created 1 #!/usr/bin/env ruby
2
3 # This file is a part of an Information Retrieval oriented Ruby library
4 #
5 # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 # General module for many purposes related to Information Retrieval.
21 module RIR
22
23 class Corpus
24 attr_accessor :path
25
26 def initialize(path)
27 @path = path
28 end
29
30 def files
31 Dir.glob("**/*.*")
32 end
33 end
34
35 end
36
File was created 1 #!/usr/bin/env ruby
2
3 # This file is a part of an Information Retrieval oriented Ruby library
4 #
5 # Copyright (C) 2010-2011 Romain Deveaud <romain.deveaud@gmail.com>
6 #
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
20 # General module for many purposes related to Information Retrieval.
21 module RIR
22
23 class Query
24 end
25
26 module Indri
27
28 class Parameters
29 attr_accessor :corpus, :memory, :count, :offset, :run_id, :print_query, :print_docs, :rule, :baseline
30
31 def initialize(corpus,mem="1g",count="1000",offset="1",run_id="default",print_query=false,print_docs=false)
32 @corpus = corpus
33 @memory = mem
34 @count = count
35 @offset = offset
36 @run_id = run_id
37 @print_query = print_query ? "true" : "false"
38 @print_docs = print_docs ? "true" : "false"
39 end
40
41 def to_s
42 h = "<parameters>\n"
43 h += "<memory>#{@memory}</memory>\n"
44 h += "<index>#{@corpus}</index>\n"
45 h += "<count>#{@count}</count>\n"
46 unless @baseline.nil?
47 h += "<baseline>#{@baseline}</baseline>\n"
48 else
49 h += "<rule>#{@rule}</rule>\n"
50 end
51 h += "<queryOffset>#{@offset}</queryOffset>\n"
52 h += "<runID>#{@run_id}</runID>\n"
53 h += "<printQuery>#{@print_query}</printQuery>\n"
54 h += "<printDocuments>#{@print_docs}</printDocuments>\n"
55
56 h
57 end
58 end
59
60 class IndriQuery < Query
61 attr_accessor :id, :query, :params, :rule
62
63 def initialize(id,query,params)
64 # @params = Parameters === params ? params : Parameters.new(corpus)
65 @params = params
66 # Here we set the default retrieval model as Language Modeling
67 # with a Dirichlet smoothing at 2500.
68 # TODO: maybe a Rule class...
69 @params.rule = 'method:dirichlet,mu:2500' if @params.rule.nil?
70
71 @id = id
72 @query = query
73 end
74
75 def to_s
76 h = @params.to_s
77 h += "<query>\n"
78 h += "<number>#{@id}</number>\n"
79 h += "<text>#{@query}</text>\n"
80 h += "</query>\n"
81 h += "</parameters>"
82
83 h
84 end
85 end
86
87 end
88 end
89
File was created 1 #!/usr/bin/env ruby
2
3 require 'test/unit'
4
5 require 'string'
6
7 class TestString < Test::Unit::TestCase
8
9 def test_extract_xml
10 s = "four-piece in <a>Indianapolis</a>, <a>Indiana</a> at the Murat Theatre"
11 assert_equal(["Indianapolis", "Indiana"],s.extract_xmltags_values('a'))
12 end
13
14 def test_stopword
15 assert_equal(true, "is".is_stopword?)
16 assert_equal(true, "seen".is_stopword?)
17 assert_equal(false, "totally".is_stopword?)
18 assert_equal(false, "Paris".is_stopword?)
19 end
20
21 def test_strip_xml
22 assert_equal("testme", "<test>testme</test>".strip_xml_tags)
23 end
24 end
25