results_test.rb 7.97 KB
#!/usr/bin/env ruby

require "rubygems"
require "text-table"



class ConfusionMatrix
    def initialize
        @h = Hash.new()
        @total = 0
    end
    def keys
        @h.each do |key, value|
            yield key
        end
    end
    def store(actual, truth)
        @h[ actual ] ||= {"tp" => 0, "tn" => 0, "fp" => 0, "fn" => 0}
        @h[ truth ] ||= {"tp" => 0, "tn" => 0, "fp" => 0, "fn" => 0}
        if actual == truth
            @h[ actual ]["tp"] += 1
        else
            @h[ actual ]["fp"] += 1
            @h[ truth ]["fn"] += 1
            @h[ truth ]["tn"] += 1
        end
        @total += 1
    end
    def recall(name)
        t = @h[ name ]["tp"].to_f + @h[name]["fn"].to_f
        return 0 if t == 0
        return (@h[ name]["tp"].to_f / ( @h[ name]["tp"] + @h[name]["fn"] ).to_f )
    end
    def precision(name)
        t = @h[ name ]["tp"].to_f + @h[name]["fp"].to_f
        return 0 if t == 0
        return (@h[ name]["tp"].to_f / ( @h[ name]["tp"] + @h[name]["fp"] ).to_f )
    end
    def fscore(name)
        return (2 * precision(name) * recall(name) ) / ( precision(name) + recall(name) )
    end
    def score_deft_2017
        score = 0.0
        @h.each do |key, value|
            score += fscore(key)
        end
        return score / @h.size.to_f
    end
    def score_semeval_2016
        return ( fscore("positive") + fscore("negative") ) / 2
    end
    def score_semeval_2017
        return ( recall("positive") + recall("negative")  + recall("neutral") ) / 3
    end
    def accuracy
        somme = 0
        @h.each do |key, values|
            somme += values["tp"]
        end
        return somme.to_f / @total.to_f
    end
    def macro_fscore
        counter = 0
        p = 0
        keys do |key|
            p += fscore(key)
            counter += 1
        end
        return p.to_f/counter.to_f
    end
    def macro_precision
        counter = 0
        p = 0
        keys do |key|
            p += precision(key)
            counter += 1
        end
        return p.to_f/counter.to_f
    end
    def macro_recall
        counter = 0
        p = 0
        keys do |key|
            p += recall(key)
            counter += 1
        end
        return p.to_f/counter.to_f
    end
end

def get_scores(golds, results)


    convert = {"negative" => 0, "positive" => 1, "objective" => 2, "mixed" => 3, "figurative" => 0, "nonfigurative" => 1}
    score = ConfusionMatrix.new

    h = Hash.new
    golds.each do |gold|
        f = File.open(gold)
        f.each do |line|
            line.chomp!
            line = line.split("\t")
            h[ line[0] ] = convert[ line[1] ]
        end
        f.close
    end

    results.each do |result|
        if File.exist?( result ) == true
            f = File.open(result)
            f.each do |line|
                line.chomp!
                line = line.split("\t")

                s = line[1].split(" ").map{ |x| x.to_f }
                score.store( s.index(s.max), h[line[0]] )

            end
            f.close
        end
    end

    return score.score_deft_2017.round(4)
end



def get_score(gold, results)

    return "-" if File.exist?(results) == false

    convert = {"negative" => 0, "positive" => 1, "objective" => 2, "mixed" => 3, "figurative" => 0, "nonfigurative" => 1}
    score = ConfusionMatrix.new

    h = Hash.new
    f = File.open(gold)
    f.each do |line|
        line.chomp!
        line = line.split("\t")
        h[ line[0] ] = convert[ line[1] ]
    end
    f.close

    f = File.open(results)
    f.each do |line|
        line.chomp!
        line = line.split("\t")

        s = line[1].split(" ").map{ |x| x.to_f }
        score.store( s.index(s.max), h[line[0]] )

    end
    f.close

    return score.score_deft_2017.round(4)
end

def moyenne(ar)
    mean = 0.0
    counter = 0.0
    ar.each do |x|
        if x != "-"
            mean += x
            counter += 1
        end
    end
    if counter == 0.0
        return 0.0
    end
    return mean/counter
end


def get_score_best_system( gold, best, kbest)

    file_memory = Hash.new
    best.each do |file|
        file_memory[ file ] ||= Array.new

        f = File.open(file)
        f.each do |line|
            line.chomp!
            line = line.split("\t")
            line[1] = line[1].split(" ").map! { |x| x.to_f }
            file_memory[file].push( line )
        end
        f.close
    end


    convert = {"negative" => 0, "positive" => 1, "objective" => 2, "mixed" => 3, "figurative" => 0, "nonfigurative" => 1}

    h = Hash.new
    f = File.open(gold)
    f.each do |line|
        line.chomp!
        line = line.split("\t")
        h[ line[0] ] = convert[ line[1] ]
    end
    f.close


    best_score = -1
    best_name = nil

    #best.repeated_combination(kbest).each do |x|
    best.combination(kbest).each do |x|

        score = ConfusionMatrix.new

        s = Hash.new

        x.each do |file|
            #f = File.open(file)
            #f.each do |line|
            file_memory[file].each do |line|
                #line.chomp!
                #line = line.split("\t")
                s[ line[0] ] ||= Array.new(4, 0)
                counter = 0
                line[1].each do |y|
                    s[ line[0] ][ counter ] += y
                    counter += 1
                end
            end
            #f.close
        end

        s.each do |key, value|
            score.store( value.index( value.max ), h[ key ] )
        end

        if best_score < score.score_deft_2017.round(4)
            best_score = score.score_deft_2017.round(4)
            best_name = x
        end
    end

    p best_name
    return best_score
   
end



def launch()

    tasks = ["task1", "task2", "task3"]
    embeddings = ["word2vecf", "wang2vec", "word2vec", "distant"]
    sizes = ["100", "200", "300"]
    hiddens = ["123", "345", "567"]
    kfolds = ["0", "1", "2", "3"]



    table = Text::Table.new()
    table.head = ["System", "Kfold:0", "KFold:1", "KFold:2", "KFold:3", "Total \#1", "Total \#2"]


    output_dir = "results_test"

    tasks.each do |task|
    best_task = Hash.new
    embeddings.each do |embedding|
        sizes.each do |size|
            hiddens.each do |hidden|

                temp = []

                temp.push("#{task} #{embedding} #{size} #{hidden}")

                golds = []
                results = []


                kfolds.each do |kfold|

                    temp.push(  get_score( "data/#{task}_testGold.tokenize", "#{output_dir}/cnn_#{task}_#{kfold}_#{embedding}_size#{size}_#{hidden}.txt" ) )
                    golds.push( "data/#{task}_testGold.tokenize" )
                    results.push( "#{output_dir}/cnn_#{task}_#{kfold}_#{embedding}_size#{size}_#{hidden}.txt" )

                    if File.exist?("#{output_dir}/cnn_#{task}_#{kfold}_#{embedding}_size#{size}_#{hidden}.txt") == true
                        if File.size("#{output_dir}/cnn_#{task}_#{kfold}_#{embedding}_size#{size}_#{hidden}.txt") > 100
                            best_task[ kfold ] ||= Array.new
                            best_task[ kfold ].push( "#{output_dir}/cnn_#{task}_#{kfold}_#{embedding}_size#{size}_#{hidden}.txt" )
                        end
                    end

                end

                temp.push( get_scores( golds, results ) )

                temp.push( moyenne( temp[1..-2] ) )

                table.rows << temp

            end
        end
    end

    1.upto(1) do |top_best|
        temp = []
        temp.push( "Best system #{top_best}" )
        kfolds.each do |kfold|
            if best_task[ kfold ] != nil
                s = get_score_best_system( "data/#{task}_testGold.tokenize", best_task[ kfold ], top_best ) 
                temp.push( s  )
            else
                temp.push( "-" )
            end

        end
        temp.push( 0.0 )
        temp.push( moyenne( temp[1..-2] ) )
        p temp
        table.rows << temp
    end

    end

    puts table.to_s


end


def errarg
    puts "Usage : ./programme.rb"
    puts "Mickael Rouvier <mickael.rouvier@univ-avignon.fr>"
end


if ARGV.size == 0
    launch()
else
    errarg
end