ScoreCtm2trigg.pl 2.15 KB
#!/usr/bin/perl

my $addVoc;
my $acoustFoundFile;

if(@ARGV == 1) {
   $addVoc = 1;
   $acoustFoundFile = $ARGV[0];
}

my @nonconfZones;
my $stopconf = 0;
my $previousEnd = 0;
my $begNonconf;
my $endNonconf;
while(<STDIN>){
	chomp($_);
	my @splittedLine = split(/ /, $_);
	#
	# Expected format : segmentId channel begin duration word confidence 
	#
	if(@splittedLine == 6){
		my ($segmentId, $channel, $begin, $duration, $word, $confidence) = @splittedLine;
		#
		# Recup du temps de debut du segment 
		# 	
		my @splittedName = split(/#/, $segmentId);
		my @splittedTime = split(/:/, $splittedName[1]);
		my $segBegin = $splittedTime[0] / 100;
		#
		# Recup du temps debut / fin (soustraire temps de debut du segment => fausse le boost)
		#
		my $wordBegin =  sprintf("%.02f", $begin - $segBegin);
		my $wordEnd = sprintf("%.02f", $wordBegin + $duration);
		
	
		#
		# Recup du score a appliquer en boost
		#
		if($word =~ /<s>|<\/s>/ || $confidence > 0.6){
			if($stopconf == 1){
				#fin d'une zone non confiante
				$endNonConf = $wordBegin;
				my @tab = ($begNonconf, $endNonConf);
				push(@nonconfZones, \@tab);
				$stopconf = 0;
			}
			$score = sprintf("%0.2f", 1 * $confidence);
		} else {
			if($stopconf == 0){
				#debut d'une zone non confiante
				$begNonconf = $previousEnd;		
				$stopconf = 1;
			}
			$score = sprintf("%0.2f", -(1 - $confidence));
		}
		print "$score $wordBegin $wordEnd $word\n";
		
		$previousEnd = $wordEnd;
	}
}
# cas d'une zone onn confiante finale
if($stopconf == 1){
	$endNonConf = $previousEnd;
	my @tab = ($begNonconf, $endNonConf);
	push(@nonconfZones, \@tab);
	$stopconf = 0;
}

if($addVoc) {
	my $file;
	my %toAdd;
	open($file, $acoustFoundFile) or die("Cannot open $acoustFoundFile");
	while(<$file>){
		chomp($_);
		#MATCHING=presse	WLAT_POS=35	SCORE=8.19956e-22;
		my @splittedLine = split(/\t/, $_);
		if(@splittedLine == 3){
			my $word = $splittedLine[0];
			my $pos = $splittedLine[1];
			my $score = $splittedLine[2];
			$word =~ s/^.+=//g;
			$toAdd{$word}++;	
		}
	}

	for(my $i = 0; $i < @nonconfZones; $i++){
		my $tabRef = $nonconfZones[$i];
		foreach my $word (keys(%toAdd)) {
			print "0.7 $$tabRef[0] $$tabRef[1] $word\n";
		}
	}
	
}