score_rules.pl
1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env perl
# This program takes the output of count_rules.pl, which is tuples
# of the form
#
# rule;destress;right-count;partial-count;wrong-count
#
# and outputs lines of the form
#
# rule;de-stress;score
#
# where the score, between 0 and 1 (1 better), is
# equal to:
#
# It forms a score between 0 and 1, of the form:
# ((#correct) + $partial_score * (#partial)) / (#correct + #partial + #wrong + $ballast)
#
# where $partial_score (e.g. 0.8) is the score we assign to a "partial" match,
# and $ballast is a small number, e.g. 1, that is treated like "extra" wrong scores, to penalize
# rules with few observations.
#
# It outputs all rules that at are at least the
$ballast = 1;
$partial_score = 0.8;
$destress_penalty = 1.0e-05; # Give destressed rules a small
# penalty vs. their no-destress counterparts, so if we
# have to choose arbitrarily we won't destress (seems safer)>
for ($n = 1; $n <= 4; $n++) {
if ($ARGV[0] eq "--ballast") {
shift @ARGV;
$ballast = shift @ARGV;
}
if ($ARGV[0] eq "--partial-score") {
shift @ARGV;
$partial_score = shift @ARGV;
($partial_score >= 0.0 && $partial_score <= 1.0) || die "Invalid partial_score: $partial_score";
}
}
(@ARGV == 0 || @ARGV == 1) || die "Usage: score_rules.pl [--ballast ballast-count] [--partial-score partial-score] [input from count_rules.pl]";
while(<>) {
@A = split(";", $_);
@A == 5 || die "Bad input line; $_";
($rule,$destress,$right_count,$partial_count,$wrong_count) = @A;
$rule_score = ($right_count + $partial_score*$partial_count) /
($right_count+$partial_count+$wrong_count+$ballast);
if ($destress eq "yes") { $rule_score -= $destress_penalty; }
print join(";", $rule, $destress, sprintf("%.5f", $rule_score)) . "\n";
}