Blame view

egs/wsj/s5/utils/pinyin_map.pl 3.78 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
  #!/usr/bin/env perl
  use warnings; #sed replacement for -w perl parameter
  
  $num_args = $#ARGV + 1;
  if ($num_args != 1) {
    print "
  Usage: pinyin2phone.pl pinyin2phone
  ";
    exit;
  }
  
  open(MAPS, $ARGV[0]) or die("Could not open pinyin map file.");
  my %py2ph; foreach $line (<MAPS>) { @A = split(" ", $line);
    $py = shift(@A);
    $py2ph{$py} = [@A];
  }
  
  #foreach $word ( keys %py2ph ) {
       #foreach $i ( 0 .. $#{ $py2ph{$word} } ) {
       #    print " $word = $py2ph{$word}[$i]";
       #}
       #print " $#{ $py2ph{$word} }";
       #print "
  ";
  #}
  
  my @entry;
  
  while (<STDIN>) {
    @A = split(" ", $_);
    @entry = ();
    $W = shift(@A);
    push(@entry, $W);
    for($i = 0; $i < @A; $i++) {
      $initial= $A[$i]; $final = $A[$i];
      #print $initial, " ", $final, "
  ";
      if ($A[$i] =~ /^CH[A-Z0-9]+$/) {$initial =~ s:(CH)[A-Z0-9]+:$1:; $final =~ s:CH([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^SH[A-Z0-9]+$/) {$initial =~ s:(SH)[A-Z0-9]+:$1:; $final =~ s:SH([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^ZH[A-Z0-9]+$/) {$initial =~ s:(ZH)[A-Z0-9]+:$1:; $final =~ s:ZH([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^B[A-Z0-9]+$/) {$initial =~ s:(B)[A-Z0-9]+:$1:; $final =~ s:B([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^C[A-Z0-9]+$/) {$initial =~ s:(C)[A-Z0-9]+:$1:; $final =~ s:C([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^D[A-Z0-9]+$/) {$initial =~ s:(D)[A-Z0-9]+:$1:; $final =~ s:D([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^F[A-Z0-9]+$/) {$initial =~ s:(F)[A-Z0-9]+:$1:; $final =~ s:F([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^G[A-Z0-9]+$/) {$initial =~ s:(G)[A-Z0-9]+:$1:; $final =~ s:G([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^H[A-Z0-9]+$/) {$initial =~ s:(H)[A-Z0-9]+:$1:; $final =~ s:H([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^J[A-Z0-9]+$/) {$initial =~ s:(J)[A-Z0-9]+:$1:; $final =~ s:J([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^K[A-Z0-9]+$/) {$initial =~ s:(K)[A-Z0-9]+:$1:; $final =~ s:K([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^L[A-Z0-9]+$/) {$initial =~ s:(L)[A-Z0-9]+:$1:; $final =~ s:L([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^M[A-Z0-9]+$/) {$initial =~ s:(M)[A-Z0-9]+:$1:; $final =~ s:M([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^N[A-Z0-9]+$/) {$initial =~ s:(N)[A-Z0-9]+:$1:; $final =~ s:N([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^P[A-Z0-9]+$/) {$initial =~ s:(P)[A-Z0-9]+:$1:; $final =~ s:P([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^Q[A-Z0-9]+$/) {$initial =~ s:(Q)[A-Z0-9]+:$1:; $final =~ s:Q([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^R[A-Z0-9]+$/) {$initial =~ s:(R)[A-Z0-9]+:$1:; $final =~ s:R([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^S[A-Z0-9]+$/) {$initial =~ s:(S)[A-Z0-9]+:$1:; $final =~ s:S([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^T[A-Z0-9]+$/) {$initial =~ s:(T)[A-Z0-9]+:$1:; $final =~ s:T([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^W[A-Z0-9]+$/) {$initial =~ s:(W)[A-Z0-9]+:$1:; $final =~ s:W([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^X[A-Z0-9]+$/) {$initial =~ s:(X)[A-Z0-9]+:$1:; $final =~ s:X([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^Y[A-Z0-9]+$/) {$initial =~ s:(Y)[A-Z0-9]+:$1:; $final =~ s:Y([A-Z0-9]+):$1:;}
      elsif ($A[$i] =~ /^Z[A-Z0-9]+$/) {$initial =~ s:(Z)[A-Z0-9]+:$1:; $final =~ s:Z([A-Z0-9]+):$1:;}
      if ($initial ne $A[$i]) {
        $tone = $final;
        $final =~ s:([A-Z]+)[0-9]:$1:;
        $tone =~ s:[A-Z]+([0-9]):$1:;
        if (!(exists $py2ph{$initial}) or !(exists $py2ph{$final})) { die "$0: no entry find for ", $A[$i], " ", $initial, " ", $final;}
        push(@entry, @{$py2ph{$initial}});
        @tmp = @{$py2ph{$final}};
        for($j = 0; $j < @tmp ; $j++) {$tmp[$j] = $tmp[$j].$tone;}
        push(@entry, @tmp);
      }
      else {
        $tone = $A[$i];
        $A[$i] =~ s:([A-Z]+)[0-9]:$1:;
        $tone =~ s:[A-Z]+([0-9]):$1:;
        if (!(exists $py2ph{$A[$i]})) { die "$0: no entry find for ", $A[$i];}
        @tmp = @{$py2ph{$A[$i]}};
        for($j = 0; $j < @tmp ; $j++) {$tmp[$j] = $tmp[$j].$tone;}
        push(@entry, @tmp);
      }
    }
    print "@entry";
    print "
  ";
  }