Yannick Estève / ONTRAC-Kaldi

Blame view

tools/sctk-2.4.10/bin/stm2rttm.pl 4.62 KB
  #!/usr/bin/perl -w
  
  # Converts an STM file into a corresponding RTTM file
  # Authors: Chris Laprun, Audrey Tong, Jon Fiscus
  #
  # v4:
  #       - Added a check to make sure speakers do not have segments that overlap with themselves
  # v2: 
  # 	- Added constants for better legibility
  #	- Added smoothing capability
  #	- Now takes an evaluation code as parameter to support multiple evaluations
  
  use Getopt::Long;
  
  my $SUPPORTED = "Supported evaluations are:
  \t- Rich Transcription 02 (code: rt02)
  \t- Rich Transcription 05s (code: rt05s)
  \t- Rich Transcription 04 Spring (code: rt04s)
  ";
  my $ret    = GetOptions ("e|evaluation=s");
  my $evaluation;
  if (defined($opt_e)){
      $evaluation  = $opt_e;
  } else {
    die "Usage: stm2rttm.pl -e [rt02|rt04s|rt05s]
  Version: 0.1
  
  Error: You must specify an evaluation code with the -e option!
  " . $SUPPORTED;
  }
  my %STM = (); # STM entries
  my $FILE = 0; # index of the file id in the STM entry
  my $CHANNEL = 1; # index of the channel number in the STM entry
  my $SPEAKER = 2; # index of the speaker name in the STM entry
  my $START_TIME = 3; # index of the utterance start time in the STM entry
  my $END_TIME = 4; # index of the utterance end time in the STM entry
  my $CATEGORIES = 5; # index of the categories block in the STM entry
  my $TEXT = 6; # index of the utterance text in the STM entry
  
  my $SEX_INDEX = -1; # index of the sex category in the categories block
  my $SMOOTHING_TIME = -1.0; # number of seconds between utterances for smoothing
  
  if ($evaluation eq "rt04s"){
  	$SEX_INDEX = 1;
  	$SMOOTHING_TIME = 0;
  	$WITH_SEX = 1;
  } elsif ($evaluation eq "rt02") {
  	$SEX_INDEX = 2;
  	$SMOOTHING_TIME = 0.5;
  	$WITH_SEX = 1;
  } elsif ($evaluation eq "rt05s") {
  	$SMOOTHING_TIME = 0.3;
  	$WITH_SEX = 0;
  } else {
      die "Unknown target evaluation code!
  " . $SUPPORTED;
  }
  
  # Get the STM data from standard input
  while (<>){
  	next if ($_ =~ /^\s*$/);
  	next if ($_ =~ /(^;;|inter_segment_gap|intersegment_gap)/i);
  	my @d = split(/\s+/, $_, 7);
  	if (! defined ($d[$FILE]) || ! defined ($d[$CHANNEL]) || ! defined ($d[$SPEAKER]) ) {
  		die "No file, channel, or speaker defined";
  	}
  	
  	push (@{ $STM{$d[$SPEAKER]}{$d[$FILE]}{$d[$CHANNEL]} }, [ @d ]);
  	@info = split (",", $d[$CATEGORIES]);
  	if ($WITH_SEX){
  	    ($sex = $info[$SEX_INDEX]) =~ s/>//;
  	
  	    if ($sex eq "male") { $sex = "adult_male"; }
  	    elsif ($sex eq "female") { $sex = "adult_female"; }
  	    elsif ($sex =~ /unk/) { $sex = "unknown"; }
  	    else { die "Unknown sex $sex"; }
  	    
  	    if (defined($spkrInfo{$d[$SPEAKER]})){
  		die "Error: ambiguous spkr info $d[$SPEAKER]=>$sex but had $spkrInfo{$d[$SPEAKER]}" if ($spkrInfo{$d[$SPEAKER]} ne $sex && $d[$SPEAKER] !~ /excluded_region/i);
  	    } else {
  		$spkrInfo{$d[$SPEAKER]} = $sex;
  	    }
  	} else {
  		$spkrInfo{$d[$SPEAKER]} = "unknown";
  	}
      }
  
  
  
  # Sort STM entries for smoothing
  foreach $spkr (keys %STM) {
  	for $file (keys %{ $STM{$spkr} }){
  		for $chan (keys %{ $STM{$spkr}{$file} }){
  			@ { $STM{$spkr}{$file}{$chan} } = sort numerically (@ { $STM{$spkr}{$file}{$chan} });
  		}
  	}
  }
  
  # Perform smoothing
  foreach $spkr (keys %STM) {
  	for $file (keys %{ $STM{$spkr} }){
  		for $chan (keys %{ $STM{$spkr}{$file} }){
  	    	$first = 1;
  	    	for ($i=0; $i<@{ $STM{$spkr}{$file}{$chan} }; $i++){
  				$seg = $STM{$spkr}{$file}{$chan}[$i];
  				if ($first) {
  				    $prev_seg = $seg;
  			    	$first = 0;
  				} else {
  				    if ($seg->[$START_TIME] < $prev_seg->[$END_TIME]) {
  					die "Error: segments from the same speaker overlap
    ".
  					    join(" ",@{$prev_seg})."
    ".
  					    join(" ",@{$seg});
  				    }
  				    if ($seg->[$START_TIME] - $prev_seg->[$END_TIME] <= $SMOOTHING_TIME) {
  						$prev_seg->[$END_TIME] = $seg->[$END_TIME];
  						$prev_seg->[$TEXT] = $prev_seg->[$TEXT] . " " . $seg->[$TEXT];
  						splice (@{ $STM{$spkr}{$file}{$chan} }, $i, 1);
  						$i++;
  				    } else {
  						$prev_seg = $seg;
  			    	}
  				}
  	    	}
  		}
  	}
  }
  
  # Output speaker info metadata
  foreach $spkr(keys %STM){
  	for $file (keys %{ $STM{$spkr} }){
  		for $chan (keys %{ $STM{$spkr}{$file} }){
  	    	if ($spkr !~ /excluded_region/i) {
  				print "SPKR-INFO $file $chan <NA> <NA> <NA> $spkrInfo{$spkr} $spkr <NA>
  ";
  	    	}
  		}
  	}
  }
  
  # Output speaker turns
  foreach $spkr(keys %STM){
  	for $file (keys %{ $STM{$spkr} }){
  		for $chan (keys %{ $STM{$spkr}{$file} }){
  	    	for $seg (@{ $STM{$spkr}{$file}{$chan} }){
  				$beg = $seg->[$START_TIME];
  				$end = $seg->[$END_TIME];
  				$dur = sprintf("%.3f", $end - $beg);
  				if ($spkr =~ /excluded_region/i) {
  		    		print "NOSCORE $file $chan $beg $dur <NA> <NA> <NA> <NA>
  ";
  				} else {
  				    print "SPEAKER $file $chan $beg $dur <NA> <NA> $spkr <NA>
  ";
  				}
  			}
  		}
  	}
  }
  
  sub numerically {
  	$a->[$START_TIME] <=> $b->[$START_TIME];
  }