tokenize_file_gold.rb 709 Bytes
#!/usr/bin/env ruby

require "rubygems"
require "tokenizer"

def launch(file)

    f = File.open(file)
    f.each do |line|
        begin
            line.chomp!
            line = line.split("\t")
            #l = line[2].gsub(/^"/, "").gsub(/"$/, "").gsub('\\""""', '"').gsub(/\\u[\da-f]{4}/i) { |m| [m[-4..-1].to_i(16)].pack('U')  }
            tok = tokenize( line[1] )
            line[1] = tok
            puts "#{line[0]}\t#{line[2]}\t#{line[1]}"
        rescue
            $stderr.puts line
        end
    end
    f.close

end


def errarg
    puts "Usage : ./programme.rb"
    puts "Mickael Rouvier <mickael.rouvier@univ-avignon.fr>"
end


if ARGV.size == 1
    launch(ARGV[0])
else
    errarg
end