Blame view
tools/scripts/PhonFormatter.pl
2.23 KB
e6be5137b reinitialized pro... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
#!/usr/bin/perl use strict; use warnings; use Getopt::Long; use Pod::Usage; #------------------------------------------- # MAIN #------------------------------------------- # options variables my $help = 0; my $withLiaison = 0; my $addPause = 0; # Parse options and print usage if there is a syntax # error, or if usage was explicitly requested GetOptions('help|?' => \$help, 'liaison' => \$withLiaison, 'addPause' => \$addPause); pod2usage(1) if($help); while(<STDIN>){ chomp($_); if($_ !~ /^\s*$/){ if($_ =~ /\?/){ print "$_ "; } elsif($addPause){ print "$_ "; print "$_ pause "; } else{ my @splittedLine = split(" ", $_); if($splittedLine[1]){ $splittedLine[1] =~ s/([a-z]{2})/ $1 /g; my $word = $splittedLine[0]; my $line; if($splittedLine[1] =~ /[A-Z]/){ my $lineWithoutLink = $splittedLine[1]; $lineWithoutLink =~ s/[A-Z]//g; my $lineWithoutLinkPause = $lineWithoutLink."pause"; my $lineNoPause = $splittedLine[1]; $lineNoPause =~ s/([A-Z])/formatLink($1, 0)/ge; my $linePause = $splittedLine[1]; $linePause =~ s/([A-Z])/formatLink($1, 1)/ge; if($withLiaison){ $line = "$word\t$lineWithoutLink $word\t$lineWithoutLinkPause $word\t$lineNoPause $word\t$linePause" } else{ $line = "$word\t$lineWithoutLink $word\t$lineWithoutLinkPause"; } } else{ my $lineNoPause = $splittedLine[1]; my $linePause = $splittedLine[1]."pause"; $line = "$word\t$lineNoPause $word\t$linePause"; } $line =~ s/\t /\t/g; $line =~ s/ $//g; $line =~ s/(\b)(tt|kk|pp)(\b)/$1O$2 B$2$3/g; $line =~ s/(\b)ee(\b)/$1eu$2/g; $line =~ s/(\s)+/$1/g; print "$line "; } } } } sub formatLink { my ($symbol, $isPaused) = @_; my $formattedSymbol = lc($symbol).lc($symbol); if($isPaused){ return "pause $formattedSymbol"; } else{ return $formattedSymbol; } } __END__ =head1 NAME PhonFormatter.pl - format a lia_phon output to a speeral compliant format =head1 SYNOPSIS PhonFormatter.pl [options] Options: -help|? brief help message -liaison also display liaison -addPause dummy process : for each line of input phonetized lexicon add another line with pause (work well if pause is not present in the phonetized lexicon) |