Blame view

bin/tokenize_file_gold.rb 709 Bytes
362b552ee   Rouvier Mickael   upload system
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
  #!/usr/bin/env ruby
  
  require "rubygems"
  require "tokenizer"
  
  def launch(file)
  
      f = File.open(file)
      f.each do |line|
          begin
              line.chomp!
              line = line.split("\t")
              #l = line[2].gsub(/^"/, "").gsub(/"$/, "").gsub('\\""""', '"').gsub(/\\u[\da-f]{4}/i) { |m| [m[-4..-1].to_i(16)].pack('U')  }
              tok = tokenize( line[1] )
              line[1] = tok
              puts "#{line[0]}\t#{line[2]}\t#{line[1]}"
          rescue
              $stderr.puts line
          end
      end
      f.close
  
  end
  
  
  def errarg
      puts "Usage : ./programme.rb"
      puts "Mickael Rouvier <mickael.rouvier@univ-avignon.fr>"
  end
  
  
  if ARGV.size == 1
      launch(ARGV[0])
  else
      errarg
  end