Blame view
tools/scripts/Sentencer.pl
608 Bytes
e6be5137b reinitialized pro... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
#!/usr/bin/perl use strict; use warnings; my $lastLine; my $endLine; my $deb = 1; while(<STDIN>){ chomp($_); $_.=" "; if($lastLine){ print $lastLine; $lastLine = ""; } if($_ !~ /^\s*$/){ my $line = $_; if($deb){ $line = "<s> ".$line; $deb = 0; } $line =~ s/\.\s*/ <\/s> <s> /g; $line =~ s/( <\/s>)+/ <\/s>/g; $line =~ s/ +/ /g; $line =~ s/ +/ /g; $lastLine = $line; $endLine = $lastLine; } } if($endLine){ if($endLine =~ / <s> $/){ $endLine =~ s/ <s> $//g } else{ $endLine =~ s/$/ <\/s>/g } $endLine =~ s/ +/ /g; $endLine =~ s/$/ /g; print $endLine; } |