Blame view

egs/sprakbanken/s5/local/dict/score_prons.pl 1.63 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
  #!/usr/bin/env perl
  
  # This program takes candidate prons from "get_candidate_prons.pl" or
  # "limit_candidate_prons.pl", and a reference dictionary covering those words,
  # and outputs the same format but with scoring information added (so we go from
  # 6 to 7 fields).  The scoring information says, for each generated pron,
  # whether we have a match, a partial match, or no match, to some word in the
  # dictionary.  A partial match means it's correct except for stress.
  
  # The input is a 6-tuple on each line, like:
  # word;pron;base-word;base-pron;rule-name;de-stress
  #
  # The output is the same except with one more field, the score,
  # which may be "right", "wrong", "partial".
  
  if (@ARGV != 1 && @ARGV != 2) {
    die "Usage: score_prons.pl reference_dict [candidate_prons] > scored_candidate_prons";
  }
  
  $dict = shift @ARGV;
  open(D, "<$dict") || die "Opening dictionary $dict";
  
  while(<D>) { # Set up some hashes that tell us when
    # a (word,pron) pair is correct (and the same for
    # prons with stress information removed).
    chop;
    @A = split(" ", $_);
    $word = shift @A;
    $pron = join(" ", @A);
    $pron_nostress = $pron;
    $pron_nostress =~ s:\d::g;
    $word_and_pron{$word.";".$pron} = 1;
    $word_and_pron_nostress{$word.";".$pron_nostress} = 1;
  }
  
  while(<>) {
    chop;
    $line = $_;
    my ($word, $pron, $baseword, $basepron, $rulename, $destress) = split(";", $line);
    $pron_nostress = $pron;
    $pron_nostress =~ s:\d::g;
    if (defined $word_and_pron{$word.";".$pron}) {
      $score = "right";
    } elsif (defined $word_and_pron_nostress{$word.";".$pron_nostress}) {
      $score = "partial";
    } else {
      $score = "wrong";
    }
    print $line.";".$score."
  ";
  }