ScoreCtm2trigg.pl
2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/perl
my $addVoc;
my $acoustFoundFile;
if(@ARGV == 1) {
$addVoc = 1;
$acoustFoundFile = $ARGV[0];
}
my @nonconfZones;
my $stopconf = 0;
my $previousEnd = 0;
my $begNonconf;
my $endNonconf;
while(<STDIN>){
chomp($_);
my @splittedLine = split(/ /, $_);
#
# Expected format : segmentId channel begin duration word confidence
#
if(@splittedLine == 6){
my ($segmentId, $channel, $begin, $duration, $word, $confidence) = @splittedLine;
#
# Recup du temps de debut du segment
#
my @splittedName = split(/#/, $segmentId);
my @splittedTime = split(/:/, $splittedName[1]);
my $segBegin = $splittedTime[0] / 100;
#
# Recup du temps debut / fin (soustraire temps de debut du segment => fausse le boost)
#
my $wordBegin = sprintf("%.02f", $begin - $segBegin);
my $wordEnd = sprintf("%.02f", $wordBegin + $duration);
#
# Recup du score a appliquer en boost
#
if($word =~ /<s>|<\/s>/ || $confidence > 0.6){
if($stopconf == 1){
#fin d'une zone non confiante
$endNonConf = $wordBegin;
my @tab = ($begNonconf, $endNonConf);
push(@nonconfZones, \@tab);
$stopconf = 0;
}
$score = sprintf("%0.2f", 1 * $confidence);
} else {
if($stopconf == 0){
#debut d'une zone non confiante
$begNonconf = $previousEnd;
$stopconf = 1;
}
$score = sprintf("%0.2f", -(1 - $confidence));
}
print "$score $wordBegin $wordEnd $word\n";
$previousEnd = $wordEnd;
}
}
# cas d'une zone onn confiante finale
if($stopconf == 1){
$endNonConf = $previousEnd;
my @tab = ($begNonconf, $endNonConf);
push(@nonconfZones, \@tab);
$stopconf = 0;
}
if($addVoc) {
my $file;
my %toAdd;
open($file, $acoustFoundFile) or die("Cannot open $acoustFoundFile");
while(<$file>){
chomp($_);
#MATCHING=presse WLAT_POS=35 SCORE=8.19956e-22;
my @splittedLine = split(/\t/, $_);
if(@splittedLine == 3){
my $word = $splittedLine[0];
my $pos = $splittedLine[1];
my $score = $splittedLine[2];
$word =~ s/^.+=//g;
$toAdd{$word}++;
}
}
for(my $i = 0; $i < @nonconfZones; $i++){
my $tabRef = $nonconfZones[$i];
foreach my $word (keys(%toAdd)) {
print "0.7 $$tabRef[0] $$tabRef[1] $word\n";
}
}
}