Blame view

tools/sctk-2.4.10/src/rttmSort/rttmSort.pl 3.19 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  #!/usr/bin/perl -w
  
  use strict;
  use Data::Dumper;
  
  my %sort_order = ("SPKRINFO"       => 0,
  		  "NOSCORE"         =>  1,
  		  "NORTMETADATA"  =>  2,
  		  "SEGMENT"         =>  3,
  		  "SPEAKER"         =>  4,
  		  "SU"              =>  5,
  		  "A/P"             =>  6,
  		  "CB"              =>  7,
  		  "IP"              =>  8,
  		  "EDIT"            =>  9,
  		  "FILLER"          =>  10,
  		  "NON-SPEECH"      => 11,
  		  "NON-LEX"         => 12,
  		  "LEXEME"          => 13,
  		  "SUboundary"      => 14);
  my %spkrinfo;
  my %stm_file;
   while (<>) {
       next if ($_ =~ /;;/);
       my $wrdExp = '[\]\[\w%\{'."\\'".'\<\>.-]+';
       $wrdExp = '[^\s]+';
       my $txtExp = "$wrdExp|\\($wrdExp\\)|<NA>";
       if (/(SPKR-INFO|SEGMENT|LEXEME|NON-LEX|CB|SU|EDIT|FILLER|IP|NOSCORE|SPEAKER|NORTMETADATA|NON-SPEECH|A\/P)\s+(\S+)\s+(\d+)\s+(\d*\.?\d+|<NA>)\s+(\d*\.?\d+|<NA>)\s+($txtExp)\s+([\w&-]+|<NA>)\s+(\S+|<NA>)\s+(\d*\.?\d+|<NA>)/) {
  	 if ($1 eq "SPKR-INFO") {
  #	     print "--$2  $8--
  ";
  	     die "Error: spkrinfo exists for '$2 $8'" if (exists($spkrinfo{$2." ".$8}));
  	     $spkrinfo{$2." ".$8}{file} = $2;
  	     $spkrinfo{$2." ".$8}{chan} = $3;
  	     $spkrinfo{$2." ".$8}{gender} = $7;
  	     $spkrinfo{$2." ".$8}{spkr} = $8;
  	     $spkrinfo{$2." ".$8}{conf} = $9;
  	     $spkrinfo{$2." ".$8}{line} = $. . " $_";
  	 } else {
  	     $stm_file{$2}{$3}{$4}{$.}{type} = $1;
  	     $stm_file{$2}{$3}{$4}{$.}{beg_time} = $4;
  	     $stm_file{$2}{$3}{$4}{$.}{end_time} = $5;
  	     $stm_file{$2}{$3}{$4}{$.}{token} = $6;
  	     $stm_file{$2}{$3}{$4}{$.}{subtype} = $7;
  	     $stm_file{$2}{$3}{$4}{$.}{speaker} = $8;
  	     $stm_file{$2}{$3}{$4}{$.}{conf} = $9;
  	     $stm_file{$2}{$3}{$4}{$.}{line} = $. . " $_";
  	 }
       } elsif (/^;;/) {
  	 #nothing
       } else {
  	 die "malformed line $.
  --> $_
  ";
       }
   }
  
  sub cmp_float {
      return 0 if ($a eq "<NA>" && $b eq "<NA>");
      return 1 if ($b eq "<NA>");
      return -1 if ($a eq "<NA>");
      return $a <=> $b;
  }
  
  
  foreach my $spkr (sort keys %spkrinfo) {
      print "SPKR-INFO $spkrinfo{$spkr}{file} $spkrinfo{$spkr}{chan} <NA> <NA> <NA> $spkrinfo{$spkr}{gender} $spkrinfo{$spkr}{spkr} $spkrinfo{$spkr}{conf}
  ";
  }
  
  #print Dumper(\%stm_file);
  foreach my $key_filename (sort keys %stm_file) {
      foreach my $key_channel (sort keys %{$stm_file{$key_filename}}) {
  	foreach my $key_begtime (sort cmp_float keys %{$stm_file{$key_filename}{$key_channel}}) {
  	    foreach my $line (sort {$sort_order{$stm_file{$key_filename}{$key_channel}{$key_begtime}{$a}{type}} <=>
  					$sort_order{$stm_file{$key_filename}{$key_channel}{$key_begtime}{$b}{type}}} 
  			      keys %{$stm_file{$key_filename}{$key_channel}{$key_begtime}}) {
  		print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{type} ";
  		print "$key_filename $key_channel ";
  		print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{beg_time} ";
  		print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{end_time} ";
  		print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{token} ";
  		print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{subtype} ";
  		print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{speaker} ";
  		print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{conf}
  ";
  	    }
  	}
      }
  }