Blame view

tools/scripts/Sentencer.pl 608 Bytes
e6be5137b   Jean-François Rey   reinitialized pro...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
  #!/usr/bin/perl
  
  use strict;
  use warnings;
  
  my $lastLine;
  my $endLine;
  my $deb = 1;
  
  while(<STDIN>){
  	chomp($_);
  	$_.=" ";
  	if($lastLine){
  		print $lastLine;
  		$lastLine = "";
  	}
  	if($_ !~ /^\s*$/){
  		my $line = $_;
  		if($deb){
  			$line = "<s> ".$line;
  			$deb = 0;
  		}
  		$line =~ s/\.\s*/ <\/s>
  <s> /g;
  		$line =~ s/( <\/s>)+/ <\/s>/g;
  		$line =~ s/
  +/
  /g;
  		$line =~ s/ +/ /g;
  		$lastLine = $line;
  		$endLine = $lastLine;
  	}
  }
  
  if($endLine){
  	if($endLine =~ /
  <s> $/){
  		$endLine =~ s/
  <s> $//g
  	}
  	else{
  		$endLine =~ s/$/ <\/s>/g
  	}
  	$endLine =~ s/ +/ /g;
  	$endLine =~ s/$/
  /g;
  	print $endLine;
  }