Blame view
tools/scripts/ApplyCorrectionRules.pl
1.79 KB
e6be5137b reinitialized pro... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
#!/usr/bin/perl -w use strict; use Getopt::Long; use Pod::Usage; #------------------------------------------- # MAIN #------------------------------------------- # options variables my $help = 0; my $tagg = 0; my $correctionFile; # usefull variables my $sep="#"; my $preRegex=""; my %tokenRegexHash; GetOptions('help|?' => \$help, 'tagg' => \$tagg, 'correction' => \$correctionFile); pod2usage(1) if($help); pod2usage({-msg => "BAD USAGE - you must specify a cor ", -exitval => 1, -verbose => 0, -output => \*STDERR}) if(!$ARGV[0]); my $file; open($file, "$ARGV[0]") or die ("Cannot open : $ARGV[0] "); while(<$file>){ chomp($_); my @splittedLine = split($sep, $_); my $seeked = $splittedLine[0]; my $word = $seeked; my $correction = $splittedLine[1]; if($seeked =~ / /){ my @splittedSeeked = split(/ /, $seeked); $word = $splittedSeeked[0]; } if($preRegex){ $preRegex .= "| $word "; } else{ $preRegex = " $word "; } my @value = ($seeked, $correction); if(exists $tokenRegexHash{$word}){ push(@{$tokenRegexHash{$word}}, \@value); } else{ my @tab = (\@value); $tokenRegexHash{$word}=\@tab; } } close($file); while(<STDIN>){ $_ =~ tr/_/ /; my $sentence = $_; my @match = ($_ =~ m/ $preRegex /g); if($#match >= 0){ my %hash; for(my $i = 0; $i <= $#match; $i++){ $match[$i] =~ s/ //g; $hash{$match[$i]}++; } my @entities = keys %hash; for(my $i = 0; $i <= $#entities; $i++){ my @regex = @{$tokenRegexHash{$entities[$i]}}; for(my $y = 0; $y <= $#regex; $y++){ $sentence =~ s/ ${$regex[$y]}[0] / ${$regex[$y]}[1] /g; } } } $sentence =~ s/ +/ /g; $sentence =~ s/^ //g; $sentence =~ s/ $//g; print $sentence; } __END__ =head1 NAME =head1 SYNOPSIS cat corpus | ApplyCorrectionRules.pl [options] <correction.tab> Options : -help|? display this help |