get_rule_hierarchy.pl
2.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env perl
#This reads in rules, of the form put out by get_rules.pl, e.g.:
# ERT,,ER0 T,
# MENT,ING,M AH0 N T,IH0 NG
# S,TON,Z,T AH0 N
# ,ER,IH0 NG,IH0 NG ER0
# ,'S,M AH0 N,M AH0 N Z
#TIONS,TIVE,SH AH0 N Z,T IH0 V
# and it works out a hierarchy that says which rules are sub-cases
# of which rules: it outputs on each line a pair separated by ";", where
# each member of the pair is a rule, first one is the specialization, the
# second one being more general.
# E.g.:
# RED,RE,D,/ED,E,D,
# RED,RE,D,/D,,D,
# GING,GE,IH0 NG,/ING,I,IH0 NG,
# TOR,TING,T ER0,T IH0 NG/OR,OR,T ER0,T ER0
# ERED,ER,D,/RED,R,D,
# ERED,ER,D,/ED,,D,
while(<>) {
chop;
$rule = $_;
$isrule{$rule} = 1;
push @rules, $rule;
}
foreach my $rule (@rules) {
# Truncate the letters and phones in the rule, while we
# can, to get more general rules; if the more general rule
# exists, put out the pair.
@A = split(",", $rule);
@suffixa = split("", $A[0]);
@suffixb = split("", $A[1]);
@psuffixa = split(" ", $A[2]);
@psuffixb = split(" ", $A[3]);
for ($common_suffix_len = 0; $common_suffix_len < @suffixa && $common_suffix_len < @suffixb;) {
if ($suffixa[$common_suffix_len] eq $suffixb[$common_suffix_len]) {
$common_suffix_len++;
} else {
last;
}
}
for ($common_psuffix_len = 0; $common_psuffix_len < @psuffixa && $common_psuffix_len < @psuffixb;) {
if ($psuffixa[$common_psuffix_len] eq $psuffixb[$common_psuffix_len]) {
$common_psuffix_len++;
} else {
last;
}
}
# Get all combinations of pairs of integers <= (common_suffix_len, common_psuffix_len),
# except (0,0), and print out this rule together with the corresponding rule (if it exists).
for ($m = 0; $m <= $common_suffix_len; $m++) {
$sa = join("", @suffixa[$m...$#suffixa]); # @x[a..b] is array slice notation.
$sb = join("", @suffixb[$m...$#suffixb]);
for ($n = 0; $n <= $common_psuffix_len; $n++) {
if (!($m == 0 && $n == 0)) {
$psa = join(" ", @psuffixa[$n...$#psuffixa]);
$psb = join(" ", @psuffixb[$n...$#psuffixb]);
$more_general_rule = join(",", ($sa, $sb, $psa, $psb));
if (defined $isrule{$more_general_rule}) {
print $rule . ";" . $more_general_rule . "\n";
}
}
}
}
}