Blame view

egs/wsj/s5/local/dict/get_rule_hierarchy.pl 2.24 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
  #!/usr/bin/env perl
  
  #This reads in rules, of the form put out by get_rules.pl, e.g.:
  # ERT,,ER0 T,
  # MENT,ING,M AH0 N T,IH0 NG
  # S,TON,Z,T AH0 N
  # ,ER,IH0 NG,IH0 NG ER0
  # ,'S,M AH0 N,M AH0 N Z
  #TIONS,TIVE,SH AH0 N Z,T IH0 V
  
  # and it works out a hierarchy that says which rules are sub-cases
  # of which rules: it outputs on each line a pair separated by ";", where
  # each member of the pair is a rule, first one is the specialization, the
  # second one being more general.
  # E.g.:
  # RED,RE,D,/ED,E,D,
  # RED,RE,D,/D,,D,
  # GING,GE,IH0 NG,/ING,I,IH0 NG,
  # TOR,TING,T ER0,T IH0 NG/OR,OR,T ER0,T ER0 
  # ERED,ER,D,/RED,R,D,
  # ERED,ER,D,/ED,,D,
  
  
  
  
  while(<>) {
    chop;
    $rule = $_;
    $isrule{$rule} = 1;
    push @rules, $rule;
  }
  
  foreach my $rule (@rules) {
    # Truncate the letters and phones in the rule, while we
    # can, to get more general rules; if the more general rule
    # exists, put out the pair.
    @A = split(",", $rule);
    @suffixa = split("", $A[0]);
    @suffixb = split("", $A[1]);
    @psuffixa = split(" ", $A[2]);
    @psuffixb = split(" ", $A[3]);
    for ($common_suffix_len = 0; $common_suffix_len < @suffixa && $common_suffix_len < @suffixb;) {
      if ($suffixa[$common_suffix_len] eq $suffixb[$common_suffix_len]) {
        $common_suffix_len++;
      } else {
        last;
      }
    }
    for ($common_psuffix_len = 0; $common_psuffix_len < @psuffixa && $common_psuffix_len < @psuffixb;) {
      if ($psuffixa[$common_psuffix_len] eq $psuffixb[$common_psuffix_len]) {
        $common_psuffix_len++;
      } else {
        last;
      }
    }
    # Get all combinations of pairs of integers <= (common_suffix_len, common_psuffix_len),
    # except (0,0), and print out this rule together with the corresponding rule (if it exists).
    for ($m = 0; $m <= $common_suffix_len; $m++) {
      $sa = join("", @suffixa[$m...$#suffixa]); # @x[a..b] is array slice notation.
      $sb = join("", @suffixb[$m...$#suffixb]);
      for ($n = 0; $n <= $common_psuffix_len; $n++) {
        if (!($m == 0 && $n == 0)) {
          $psa = join(" ", @psuffixa[$n...$#psuffixa]);
          $psb = join(" ", @psuffixb[$n...$#psuffixb]);
          $more_general_rule = join(",", ($sa, $sb, $psa, $psb));
          if (defined $isrule{$more_general_rule}) {
            print $rule . ";" . $more_general_rule . "
  ";
          }
        }
      }
    }
  }