Blame view
egs/wsj/s5/local/dict/score_rules.pl
1.72 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
#!/usr/bin/env perl # This program takes the output of count_rules.pl, which is tuples # of the form # # rule;destress;right-count;partial-count;wrong-count # # and outputs lines of the form # # rule;de-stress;score # # where the score, between 0 and 1 (1 better), is # equal to: # # It forms a score between 0 and 1, of the form: # ((#correct) + $partial_score * (#partial)) / (#correct + #partial + #wrong + $ballast) # # where $partial_score (e.g. 0.8) is the score we assign to a "partial" match, # and $ballast is a small number, e.g. 1, that is treated like "extra" wrong scores, to penalize # rules with few observations. # # It outputs all rules that at are at least the $ballast = 1; $partial_score = 0.8; $destress_penalty = 1.0e-05; # Give destressed rules a small # penalty vs. their no-destress counterparts, so if we # have to choose arbitrarily we won't destress (seems safer)> for ($n = 1; $n <= 4; $n++) { if ($ARGV[0] eq "--ballast") { shift @ARGV; $ballast = shift @ARGV; } if ($ARGV[0] eq "--partial-score") { shift @ARGV; $partial_score = shift @ARGV; ($partial_score >= 0.0 && $partial_score <= 1.0) || die "Invalid partial_score: $partial_score"; } } (@ARGV == 0 || @ARGV == 1) || die "Usage: score_rules.pl [--ballast ballast-count] [--partial-score partial-score] [input from count_rules.pl]"; while(<>) { @A = split(";", $_); @A == 5 || die "Bad input line; $_"; ($rule,$destress,$right_count,$partial_count,$wrong_count) = @A; $rule_score = ($right_count + $partial_score*$partial_count) / ($right_count+$partial_count+$wrong_count+$ballast); if ($destress eq "yes") { $rule_score -= $destress_penalty; } print join(";", $rule, $destress, sprintf("%.5f", $rule_score)) . " "; } |