Blame view
bin/tokenize_file_gold.rb
709 Bytes
362b552ee upload system |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
#!/usr/bin/env ruby require "rubygems" require "tokenizer" def launch(file) f = File.open(file) f.each do |line| begin line.chomp! line = line.split("\t") #l = line[2].gsub(/^"/, "").gsub(/"$/, "").gsub('\\""""', '"').gsub(/\\u[\da-f]{4}/i) { |m| [m[-4..-1].to_i(16)].pack('U') } tok = tokenize( line[1] ) line[1] = tok puts "#{line[0]}\t#{line[2]}\t#{line[1]}" rescue $stderr.puts line end end f.close end def errarg puts "Usage : ./programme.rb" puts "Mickael Rouvier <mickael.rouvier@univ-avignon.fr>" end if ARGV.size == 1 launch(ARGV[0]) else errarg end |