Blame view

tools/scripts/ScoreCtm2trigg.pl 2.15 KB
e6be5137b   Jean-François Rey   reinitialized pro...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
  #!/usr/bin/perl
  
  my $addVoc;
  my $acoustFoundFile;
  
  if(@ARGV == 1) {
     $addVoc = 1;
     $acoustFoundFile = $ARGV[0];
  }
  
  my @nonconfZones;
  my $stopconf = 0;
  my $previousEnd = 0;
  my $begNonconf;
  my $endNonconf;
  while(<STDIN>){
  	chomp($_);
  	my @splittedLine = split(/ /, $_);
  	#
  	# Expected format : segmentId channel begin duration word confidence 
  	#
  	if(@splittedLine == 6){
  		my ($segmentId, $channel, $begin, $duration, $word, $confidence) = @splittedLine;
  		#
  		# Recup du temps de debut du segment 
  		# 	
  		my @splittedName = split(/#/, $segmentId);
  		my @splittedTime = split(/:/, $splittedName[1]);
  		my $segBegin = $splittedTime[0] / 100;
  		#
  		# Recup du temps debut / fin (soustraire temps de debut du segment => fausse le boost)
  		#
  		my $wordBegin =  sprintf("%.02f", $begin - $segBegin);
  		my $wordEnd = sprintf("%.02f", $wordBegin + $duration);
  		
  	
  		#
  		# Recup du score a appliquer en boost
  		#
  		if($word =~ /<s>|<\/s>/ || $confidence > 0.6){
  			if($stopconf == 1){
  				#fin d'une zone non confiante
  				$endNonConf = $wordBegin;
  				my @tab = ($begNonconf, $endNonConf);
  				push(@nonconfZones, \@tab);
  				$stopconf = 0;
  			}
  			$score = sprintf("%0.2f", 1 * $confidence);
  		} else {
  			if($stopconf == 0){
  				#debut d'une zone non confiante
  				$begNonconf = $previousEnd;		
  				$stopconf = 1;
  			}
  			$score = sprintf("%0.2f", -(1 - $confidence));
  		}
  		print "$score $wordBegin $wordEnd $word
  ";
  		
  		$previousEnd = $wordEnd;
  	}
  }
  # cas d'une zone onn confiante finale
  if($stopconf == 1){
  	$endNonConf = $previousEnd;
  	my @tab = ($begNonconf, $endNonConf);
  	push(@nonconfZones, \@tab);
  	$stopconf = 0;
  }
  
  if($addVoc) {
  	my $file;
  	my %toAdd;
  	open($file, $acoustFoundFile) or die("Cannot open $acoustFoundFile");
  	while(<$file>){
  		chomp($_);
  		#MATCHING=presse	WLAT_POS=35	SCORE=8.19956e-22;
  		my @splittedLine = split(/\t/, $_);
  		if(@splittedLine == 3){
  			my $word = $splittedLine[0];
  			my $pos = $splittedLine[1];
  			my $score = $splittedLine[2];
  			$word =~ s/^.+=//g;
  			$toAdd{$word}++;	
  		}
  	}
  
  	for(my $i = 0; $i < @nonconfZones; $i++){
  		my $tabRef = $nonconfZones[$i];
  		foreach my $word (keys(%toAdd)) {
  			print "0.7 $$tabRef[0] $$tabRef[1] $word
  ";
  		}
  	}
  	
  }