tokenize_file_gold.rb
709 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env ruby
require "rubygems"
require "tokenizer"
def launch(file)
f = File.open(file)
f.each do |line|
begin
line.chomp!
line = line.split("\t")
#l = line[2].gsub(/^"/, "").gsub(/"$/, "").gsub('\\""""', '"').gsub(/\\u[\da-f]{4}/i) { |m| [m[-4..-1].to_i(16)].pack('U') }
tok = tokenize( line[1] )
line[1] = tok
puts "#{line[0]}\t#{line[2]}\t#{line[1]}"
rescue
$stderr.puts line
end
end
f.close
end
def errarg
puts "Usage : ./programme.rb"
puts "Mickael Rouvier <mickael.rouvier@univ-avignon.fr>"
end
if ARGV.size == 1
launch(ARGV[0])
else
errarg
end