Blame view
tools/scripts/ScoreCtm2trigg.pl
2.15 KB
e6be5137b reinitialized pro... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
#!/usr/bin/perl my $addVoc; my $acoustFoundFile; if(@ARGV == 1) { $addVoc = 1; $acoustFoundFile = $ARGV[0]; } my @nonconfZones; my $stopconf = 0; my $previousEnd = 0; my $begNonconf; my $endNonconf; while(<STDIN>){ chomp($_); my @splittedLine = split(/ /, $_); # # Expected format : segmentId channel begin duration word confidence # if(@splittedLine == 6){ my ($segmentId, $channel, $begin, $duration, $word, $confidence) = @splittedLine; # # Recup du temps de debut du segment # my @splittedName = split(/#/, $segmentId); my @splittedTime = split(/:/, $splittedName[1]); my $segBegin = $splittedTime[0] / 100; # # Recup du temps debut / fin (soustraire temps de debut du segment => fausse le boost) # my $wordBegin = sprintf("%.02f", $begin - $segBegin); my $wordEnd = sprintf("%.02f", $wordBegin + $duration); # # Recup du score a appliquer en boost # if($word =~ /<s>|<\/s>/ || $confidence > 0.6){ if($stopconf == 1){ #fin d'une zone non confiante $endNonConf = $wordBegin; my @tab = ($begNonconf, $endNonConf); push(@nonconfZones, \@tab); $stopconf = 0; } $score = sprintf("%0.2f", 1 * $confidence); } else { if($stopconf == 0){ #debut d'une zone non confiante $begNonconf = $previousEnd; $stopconf = 1; } $score = sprintf("%0.2f", -(1 - $confidence)); } print "$score $wordBegin $wordEnd $word "; $previousEnd = $wordEnd; } } # cas d'une zone onn confiante finale if($stopconf == 1){ $endNonConf = $previousEnd; my @tab = ($begNonconf, $endNonConf); push(@nonconfZones, \@tab); $stopconf = 0; } if($addVoc) { my $file; my %toAdd; open($file, $acoustFoundFile) or die("Cannot open $acoustFoundFile"); while(<$file>){ chomp($_); #MATCHING=presse WLAT_POS=35 SCORE=8.19956e-22; my @splittedLine = split(/\t/, $_); if(@splittedLine == 3){ my $word = $splittedLine[0]; my $pos = $splittedLine[1]; my $score = $splittedLine[2]; $word =~ s/^.+=//g; $toAdd{$word}++; } } for(my $i = 0; $i < @nonconfZones; $i++){ my $tabRef = $nonconfZones[$i]; foreach my $word (keys(%toAdd)) { print "0.7 $$tabRef[0] $$tabRef[1] $word "; } } } |