Blame view
tools/scripts/scoredCtmAndTaggedLem2All.pl
1.17 KB
e6be5137b
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
#!/usr/bin/perl # # Concat scored ctm and taglem file # use strict; use warnings; my $tagLemFile=""; print $#ARGV; if($#ARGV == 0) { $tagLemFile = $ARGV[0]; }else { die "BAD USAGE : cat <scored_ctm file> | ./scoredCtmAndTagLem2All.sh <taggerlem file> "; } open(TAGLEM_FILE,"<".$tagLemFile) || die $!; while(<STDIN>){ chomp($_); my @splittedLine = split(/ /, $_); my $basename = $splittedLine[0]; my $time = $splittedLine[2]; my $words = $splittedLine[4]; if( $words =~ m/\<\/?[a-zA-Z:]+\>/){next;} my $conf = $splittedLine[5]; my @word = split(/_/,$words); if($word[0] eq "<s>" || $word[0] eq "</s>"){next;} my $tword; my $ttag; my $tlem; for(my $i=0;$i<=$#word;$i++) { do{ if( eof(TAGLEM_FILE) ){last;} my @taglem = split(/ /,<TAGLEM_FILE>); $tword = $taglem[0]; $ttag = $taglem[1]; $tlem = $taglem[2]; chomp($tlem); }while($tword =~ /#[0-9]+-[0-9]+#/); if($tword eq $word[$i]){print "$basename $time $tword $ttag $tlem $conf ";} else { print "ERROR $tword $word[$i] ";} } } close TAGLEM_FILE; |