scoredCtmAndTaggedLem2All.pl
1.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/perl
#
# Concat scored ctm and taglem file
#
use strict;
use warnings;
my $tagLemFile="";
print $#ARGV;
if($#ARGV == 0)
{
$tagLemFile = $ARGV[0];
}else
{
die "BAD USAGE : cat <scored_ctm file> | ./scoredCtmAndTagLem2All.sh <taggerlem file>\n";
}
open(TAGLEM_FILE,"<".$tagLemFile) || die $!;
while(<STDIN>){
chomp($_);
my @splittedLine = split(/ /, $_);
my $basename = $splittedLine[0];
my $time = $splittedLine[2];
my $words = $splittedLine[4];
if( $words =~ m/\<\/?[a-zA-Z:]+\>/){next;}
my $conf = $splittedLine[5];
my @word = split(/_/,$words);
if($word[0] eq "<s>" || $word[0] eq "</s>"){next;}
my $tword;
my $ttag;
my $tlem;
for(my $i=0;$i<=$#word;$i++)
{
do{
if( eof(TAGLEM_FILE) ){last;}
my @taglem = split(/ /,<TAGLEM_FILE>);
$tword = $taglem[0];
$ttag = $taglem[1];
$tlem = $taglem[2];
chomp($tlem);
}while($tword =~ /#[0-9]+-[0-9]+#/);
if($tword eq $word[$i]){print "$basename $time $tword $ttag $tlem $conf\n";}
else { print "ERROR $tword $word[$i]\n";}
}
}
close TAGLEM_FILE;