verif_corpus.rb 1.92 KB
#!/usr/bin/env ruby

require "rubygems"

def launch(task)

    h = Hash.new
    total = Hash.new
    tweet = Hash.new

    dir = Dir.glob("results_train/cnn_#{task}*.txt")
    dir.each do |file|

        f = File.open(file)
        f.each do |line|
            line.chomp!
            line = line.split("\t")

            h[ line[0] ] ||= Array.new(4, 0.0)
            total[ line[0] ] ||= 0
            total[ line[0] ] += 1

            counter = 0
            line[1].split(" ").each do |x|
                h[ line[0] ][ counter ] += x.to_f 
                counter += 1
            end

        end
        f.close

    end

    gold = Hash.new
    f = File.open("db/#{task}-train.csv")
    f.each do |line|
        line.chomp!
        line = line.split("\t")
        gold[ line[0] ] = line[2]
        tweet[ line[0] ] = line[1]
    end
    f.close



    convert_task1 = ["negative", "positive", "objective", "mixed"]
    convert_task3 = ["negative", "positive", "objective", "mixed"]
    convert_task2 = ["figurative", "nonfigurative"]



    h.each do |key, value|
        if task == "task1"
            if gold[ key ] != convert_task1[ value.index( value.max ) ]
                puts "#{key} #{convert_task1[ value.index( value.max ) ]} #{gold[key]} -- #{tweet[key]}"
            end
        end

        if task == "task3"
            if gold[ key ] != convert_task3[ value.index( value.max ) ]
                puts "#{key} #{convert_task3[ value.index( value.max ) ]} #{gold[key]} -- #{tweet[key]}"
            end
        end

        if task == "task2"
            if gold[ key ] != convert_task2[ value.index( value.max ) ]
                puts "#{key} #{convert_task2[ value.index( value.max ) ]} #{gold[key]} -- #{tweet[key]}"
            end
        end

    end
    



end


def errarg
    puts "Usage : ./programme.rb"
    puts "Mickael Rouvier <mickael.rouvier@univ-avignon.fr>"
end


if ARGV.size == 1
    launch(ARGV[0])
else
    errarg
end