PhonFormatter.pl
2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use Pod::Usage;
#-------------------------------------------
# MAIN
#-------------------------------------------
# options variables
my $help = 0;
my $withLiaison = 0;
my $addPause = 0;
# Parse options and print usage if there is a syntax
# error, or if usage was explicitly requested
GetOptions('help|?' => \$help,
'liaison' => \$withLiaison,
'addPause' => \$addPause);
pod2usage(1) if($help);
while(<STDIN>){
chomp($_);
if($_ !~ /^\s*$/){
if($_ =~ /\?/){
print "$_\n";
} elsif($addPause){
print "$_\n";
print "$_ pause\n";
} else{
my @splittedLine = split(" ", $_);
if($splittedLine[1]){
$splittedLine[1] =~ s/([a-z]{2})/ $1 /g;
my $word = $splittedLine[0];
my $line;
if($splittedLine[1] =~ /[A-Z]/){
my $lineWithoutLink = $splittedLine[1];
$lineWithoutLink =~ s/[A-Z]//g;
my $lineWithoutLinkPause = $lineWithoutLink."pause";
my $lineNoPause = $splittedLine[1];
$lineNoPause =~ s/([A-Z])/formatLink($1, 0)/ge;
my $linePause = $splittedLine[1];
$linePause =~ s/([A-Z])/formatLink($1, 1)/ge;
if($withLiaison){
$line = "$word\t$lineWithoutLink\n$word\t$lineWithoutLinkPause\n$word\t$lineNoPause\n$word\t$linePause"
} else{
$line = "$word\t$lineWithoutLink\n$word\t$lineWithoutLinkPause";
}
} else{
my $lineNoPause = $splittedLine[1];
my $linePause = $splittedLine[1]."pause";
$line = "$word\t$lineNoPause\n$word\t$linePause";
}
$line =~ s/\t /\t/g;
$line =~ s/ $//g;
$line =~ s/(\b)(tt|kk|pp)(\b)/$1O$2 B$2$3/g;
$line =~ s/(\b)ee(\b)/$1eu$2/g;
$line =~ s/(\s)+/$1/g;
print "$line\n";
}
}
}
}
sub formatLink
{
my ($symbol, $isPaused) = @_;
my $formattedSymbol = lc($symbol).lc($symbol);
if($isPaused){
return "pause $formattedSymbol";
} else{
return $formattedSymbol;
}
}
__END__
=head1 NAME
PhonFormatter.pl - format a lia_phon output to a speeral compliant format
=head1 SYNOPSIS
PhonFormatter.pl [options]
Options:
-help|? brief help message
-liaison also display liaison
-addPause dummy process : for each line of input phonetized lexicon add another line with pause (work well if pause is not present in the phonetized lexicon)