Blame view

tools/sctk-2.4.10/bin/rttm2ctm.pl 1.59 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
  #!/usr/bin/perl -w
  
  use strict;
  use Data::Dumper;
  
  my $in = "-";
  my $out = "-";
  
  use Getopt::Std;
  getopts('i:o:');
  if (defined($main::opt_o)) {	$out = $main::opt_o; }
  if (defined($main::opt_i)) {	$in = $main::opt_i; }
  
  
  my %sort_order = ("SPKRINFO"       => 0,
  		  "NOSCORE"         =>  1,
  		  "NORTMETADATA"  =>  2,
  		  "SEGMENT"         =>  3,
  		  "SPEAKER"         =>  4,
  		  "SU"              =>  5,
  		  "A/P"             =>  6,
  		  "CB"              =>  7,
  		  "IP"              =>  8,
  		  "EDIT"            =>  9,
  		  "FILLER"          =>  10,
  		  "NON-SPEECH"      => 11,
  		  "NON-LEX"         => 12,
  		  "LEXEME"          => 13,
  		  "SUboundary"      => 14);
  my %spkrinfo;
  my %stm_file;
  
  open IN, "$in" || die "Failed to open $in";
  open OUT, ">$out" || die "Failed to open $out";
  
   while (<IN>) {
       next if ($_ =~ /;;/);
       my $wrdExp = '[\]\[\S\%\{'."\\'".'\<\>.-]+';
       my $txtExp = "$wrdExp|\\($wrdExp\\)|<NA>";
  #     my $txtExp = ".*|<NA>";
       if (/(SPKR-INFO|SEGMENT|LEXEME|NON-LEX|CB|SU|EDIT|FILLER|IP|NOSCORE|SPEAKER|NORTMETADATA|NON-SPEECH|A\/P)\s+(\S+)\s+(\d+)\s+(\d*\.?\d+|<NA>)\s+(\d*\.?\d+|<NA>)\s+($txtExp)\s+([\w&-]+|<NA>)\s+(\S+|<NA>)\s+(\d*\.?\d+|<NA>)/) {
  #     print "($1, $2, $3, $4, $5, $6, $7, $8, $9, (defined($10) ? $10 : undef))
  ";
         my ($type, $file, $chan, $beg, $dur, $token, $stype, $spkr, $conf, $slat) = ($1, $2, $3, $4, $5, $6, $7, $8, $9, (defined($10) ? $10 : undef));
         if ($1 eq "LEXEME" && $7 eq "lex"){
           print OUT "$file $chan $beg $dur $token $conf
  ";
         }
       } else {
    	   die "malformed line $.
  --> $_
  ";
       }
   }
  close IN;
  close OUT;