Blame view

tools/scripts/PhonFormatter.pl 2.23 KB
e6be5137b   Jean-François Rey   reinitialized pro...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  #!/usr/bin/perl
  
  use strict;
  use warnings;
  use Getopt::Long;
  use Pod::Usage;
  
  #-------------------------------------------
  # MAIN
  #-------------------------------------------
  
  # options variables
  my $help = 0;
  my $withLiaison = 0;
  my $addPause = 0;
  
  # Parse options and print usage if there is a syntax
  # error, or if usage was explicitly requested
  GetOptions('help|?' => \$help, 
  	   'liaison' => \$withLiaison,
  	   'addPause' => \$addPause);
  
  pod2usage(1) if($help);
  
  while(<STDIN>){
  	chomp($_);
  	if($_ !~ /^\s*$/){
  		if($_ =~ /\?/){
  			print "$_
  ";
  		} elsif($addPause){ 
  			print "$_
  ";
  			print "$_ pause
  ";
  		} else{
  			my @splittedLine = split(" ", $_);
  			if($splittedLine[1]){
  				$splittedLine[1] =~ s/([a-z]{2})/ $1 /g;
  				my $word = $splittedLine[0];
  				my $line;
  				if($splittedLine[1] =~ /[A-Z]/){
  					my $lineWithoutLink = $splittedLine[1]; 
  					$lineWithoutLink =~ s/[A-Z]//g;
  					my $lineWithoutLinkPause = $lineWithoutLink."pause";
  					my $lineNoPause = $splittedLine[1];
  					$lineNoPause =~ s/([A-Z])/formatLink($1, 0)/ge;
  					my $linePause = $splittedLine[1];
  					$linePause =~ s/([A-Z])/formatLink($1, 1)/ge;
  					if($withLiaison){
  						$line = "$word\t$lineWithoutLink
  $word\t$lineWithoutLinkPause
  $word\t$lineNoPause
  $word\t$linePause"
  					} else{
  						$line = "$word\t$lineWithoutLink
  $word\t$lineWithoutLinkPause";
  					}
  				} else{
  					my $lineNoPause = $splittedLine[1];
  					my $linePause = $splittedLine[1]."pause";
  					$line = "$word\t$lineNoPause
  $word\t$linePause";
  				}
  				$line =~ s/\t /\t/g;
  				$line =~ s/ $//g;
  				$line =~ s/(\b)(tt|kk|pp)(\b)/$1O$2 B$2$3/g;
  				$line =~ s/(\b)ee(\b)/$1eu$2/g;
  				$line =~ s/(\s)+/$1/g;
  				print "$line
  ";
  			}
  		}
  	}
  }
  
  sub formatLink
  {
  	my ($symbol, $isPaused) = @_;
  	my $formattedSymbol = lc($symbol).lc($symbol);
  	if($isPaused){
  		return "pause $formattedSymbol";
  	} else{
  		return $formattedSymbol;
  	}
  }
  __END__
  
  =head1 NAME
  
  PhonFormatter.pl - format a lia_phon output to a speeral compliant format
  
  =head1 SYNOPSIS
  
  PhonFormatter.pl [options] 
  
  Options:
  	-help|?		brief help message
  
  	-liaison	also display liaison
  
  	-addPause	dummy process : for each line of input phonetized lexicon add another line with pause (work well if pause is not present in the phonetized lexicon)