AnalyserErreursAvecPRF_EtTaggerLesFichiersRES.pl 4.33 KB
#!/usr/bin/perl -w
#
use strict;
use warnings;


#Speaker sentences   9:  philippe_reltien   utt# 3 of 13
#id: (philippe_reltien-003)
#Labels: <o,f2,male>
#File: 19991103_0700_0800_inter
#Channel: 1
#Scores: (#C #S #D #I) 6 3 2 1
#	Ref times: t1= 673.89 t2= 676.56
#	REF:  il      *       a       UN      TREUIL  QUI     PEUT    REMONTER  quarante tonnes  de      débris
#	HYP:  il      Y       a       **      ******  TROIS   ÉQUIPES REMONTÉES quarante tonnes  de      débris
#	H_T1: 674.14  674.23  674.28                  674.44  674.71  675.08    675.41   675.78  675.99  676.10
#	H_T2: 674.23  674.28  674.44                  674.71  675.08  675.41    675.78   675.99  676.10  676.50
#	CONF: 0.0000  0.0000  0.0000                  0.0000  0.0000  0.0000    0.0000   0.0000  0.0000  0.0000
#	Eval:         I               D       D       S       S       S



if (@ARGV < 2)
{
	print "<prf> <OK|ERR : print good or erroneous words>\n";
	print "out : les mots errones\n";
	exit 0;	
}


open (PRF, $ARGV[0]) or die "can not open $ARGV[0]\n";

my $Type=$ARGV[1];
my @Ref;
my @Hyp;
my @Time1;
my @Time2;
my @Eval;

my $FileName;
my $Start;
my $Duree;

sub par_num { return $a <=> $b; }


sub TrouverIndex
{
	my ($Ref, $Res) = @_;

	my $Vide=1;

	my $NbMots=0;

	for (my $i=0; $i < @$Ref; $i++)
	{
		if ($Ref[$i] ne ' ' && $Vide == 1)
		{
			$Vide=0;
			@$Res[$NbMots++]=$i;
		}
		elsif ($Ref[$i] eq ' ')
		{
			$Vide = 1;
		}
	}
}


sub ReconstituerMot
{
	my ($Ref, $Index) = @_;

	my @Tmp;

	for (my $i=$Index; $i < @$Ref && @$Ref[$i] ne ' '; $i++)
	{
		push @Tmp, @$Ref[$i];
	}


	my $Res = join "", @Tmp;

	return $Res;
}



foreach my $Ligne (<PRF>)
{
	#if ($Ligne=~/Ref times: t1= ([0-9\.]+) .*/)
	#{
	#	$Start=$1;

	#	$Start*=100;
	#	$Start=int($Start+.5);
	#}

	if ($Ligne=~/File: +(.*)/)
	{
		$FileName=$1;
	}

	if ($Ligne=~/REF:  (.*)/)
	{
		@Ref=split //,$1;
		#@Ref = split / +/, $1;
	}


	if ($Ligne=~/HYP:  (.*)/)
	{
		@Hyp=split //,$1;
		#@Hyp = split / +/, $1;
	}

	
	if ($Ligne=~/H_T1: (.*)/)
	{
		@Time1=split //, $1;
		#@Time1 = reverse split / +/, $1;
	}

	if ($Ligne=~/H_T2: (.*)/)
	{
		@Time2=split //, $1;
		#@Time2 = reverse split / +/, $1;
	}


	if ($Ligne=~/Eval: (.*)/)
	{
		my @Decoupage;
		@Eval=split //, $1;


		TrouverIndex(\@Ref, \@Decoupage);

		my $n=@Decoupage;

		foreach my $Index (@Decoupage)
		{
			#print "---> $Index \n";
			#print "---> $Eval[$Index] \n";
			#
			if ($Type eq "ERR")
			{
				if ($Index < @Eval &&  ($Eval[$Index] eq 'S' || $Eval[$Index] eq 'I'))
				{
					#print "index : $Index \n";
					my $MotRef = ReconstituerMot (\@Ref, $Index);
					my $MotHyp = ReconstituerMot (\@Hyp, $Index);
					my $Start  = ReconstituerMot (\@Time1, $Index);
					my $End    = ReconstituerMot (\@Time2, $Index);

					$Start *= 100;
					$End   *= 100;

					$Start = int($Start+.5);
					$End   = int($End  +.5);


					my $Dur = $End-$Start;

					$MotHyp=~tr/[A-Z\xc0-\xdd]/[a-z\xe0-\xff]/;
					$MotRef=~tr/[A-Z\xc0-\xdd]/[a-z\xe0-\xff]/;

					print "$FileName $Start $Dur $MotHyp $MotRef\n";
				}
			}
			elsif ($Index < @Eval &&  ($Eval[$Index] ne 'S' && $Eval[$Index] ne 'I' && $Eval[$Index] ne 'D'))
			{
				#print "index : $Index \n";
				my $MotRef = ReconstituerMot (\@Ref, $Index);
				my $MotHyp = ReconstituerMot (\@Hyp, $Index);
				my $Start  = ReconstituerMot (\@Time1, $Index);
				my $End    = ReconstituerMot (\@Time2, $Index);

				$Start *= 100;
				$End   *= 100;

				$Start = int($Start+.5);
				$End   = int($End  +.5);


				my $Dur = $End-$Start;

				$MotHyp=~tr/[A-Z\xc0-\xdd]/[a-z\xe0-\xff]/;
				$MotRef=~tr/[A-Z\xc0-\xdd]/[a-z\xe0-\xff]/;

				print "$FileName $Start $Dur $MotHyp $MotRef\n";
			}

		}
	}


	if (0)
	{
		if ($Ligne=~/Eval:.*/)
		{

			my %AssociatedWord;
			my %AssociatedDur;

			foreach my $word  (@Hyp)
			{
				if(! ($word=~/\*+/))
				{
					my $Tmp = pop @Time1;
					my $Tmp2= pop @Time2;

					$Tmp*=100;
					$Tmp=int($Tmp+.5);

					$Tmp2*=100;
					$Tmp2=int($Tmp2+.5);


					if ($Tmp != 0 || $Tmp2 != 0)
					{	
						$AssociatedDur{$Tmp}=$Tmp2-$Tmp;
						$AssociatedWord{$Tmp}=$word;
					}
				}
			}

			#print values %AssociatedWord;
			foreach (sort par_num keys %AssociatedWord)
			{
				print "$AssociatedWord{$_} : $_";

				if ($AssociatedWord{$_}=~/[A-ZÉÀÈÙÊÔÛÎÂÇ]+/)
				{
					print "$FileName $_ $AssociatedDur{$_} $AssociatedWord{$_}\n";
				}
				else
				{
					print "OK\n";
				}
			}	
		}
	}
}