rttmSort.pl
3.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/perl -w
use strict;
use Data::Dumper;
my %sort_order = ("SPKRINFO" => 0,
"NOSCORE" => 1,
"NORTMETADATA" => 2,
"SEGMENT" => 3,
"SPEAKER" => 4,
"SU" => 5,
"A/P" => 6,
"CB" => 7,
"IP" => 8,
"EDIT" => 9,
"FILLER" => 10,
"NON-SPEECH" => 11,
"NON-LEX" => 12,
"LEXEME" => 13,
"SUboundary" => 14);
my %spkrinfo;
my %stm_file;
while (<>) {
next if ($_ =~ /;;/);
my $wrdExp = '[\]\[\w%\{'."\\'".'\<\>.-]+';
$wrdExp = '[^\s]+';
my $txtExp = "$wrdExp|\\($wrdExp\\)|<NA>";
if (/(SPKR-INFO|SEGMENT|LEXEME|NON-LEX|CB|SU|EDIT|FILLER|IP|NOSCORE|SPEAKER|NORTMETADATA|NON-SPEECH|A\/P)\s+(\S+)\s+(\d+)\s+(\d*\.?\d+|<NA>)\s+(\d*\.?\d+|<NA>)\s+($txtExp)\s+([\w&-]+|<NA>)\s+(\S+|<NA>)\s+(\d*\.?\d+|<NA>)/) {
if ($1 eq "SPKR-INFO") {
# print "--$2 $8--\n";
die "Error: spkrinfo exists for '$2 $8'" if (exists($spkrinfo{$2." ".$8}));
$spkrinfo{$2." ".$8}{file} = $2;
$spkrinfo{$2." ".$8}{chan} = $3;
$spkrinfo{$2." ".$8}{gender} = $7;
$spkrinfo{$2." ".$8}{spkr} = $8;
$spkrinfo{$2." ".$8}{conf} = $9;
$spkrinfo{$2." ".$8}{line} = $. . " $_";
} else {
$stm_file{$2}{$3}{$4}{$.}{type} = $1;
$stm_file{$2}{$3}{$4}{$.}{beg_time} = $4;
$stm_file{$2}{$3}{$4}{$.}{end_time} = $5;
$stm_file{$2}{$3}{$4}{$.}{token} = $6;
$stm_file{$2}{$3}{$4}{$.}{subtype} = $7;
$stm_file{$2}{$3}{$4}{$.}{speaker} = $8;
$stm_file{$2}{$3}{$4}{$.}{conf} = $9;
$stm_file{$2}{$3}{$4}{$.}{line} = $. . " $_";
}
} elsif (/^;;/) {
#nothing
} else {
die "malformed line $.\n--> $_\n";
}
}
sub cmp_float {
return 0 if ($a eq "<NA>" && $b eq "<NA>");
return 1 if ($b eq "<NA>");
return -1 if ($a eq "<NA>");
return $a <=> $b;
}
foreach my $spkr (sort keys %spkrinfo) {
print "SPKR-INFO $spkrinfo{$spkr}{file} $spkrinfo{$spkr}{chan} <NA> <NA> <NA> $spkrinfo{$spkr}{gender} $spkrinfo{$spkr}{spkr} $spkrinfo{$spkr}{conf}\n";
}
#print Dumper(\%stm_file);
foreach my $key_filename (sort keys %stm_file) {
foreach my $key_channel (sort keys %{$stm_file{$key_filename}}) {
foreach my $key_begtime (sort cmp_float keys %{$stm_file{$key_filename}{$key_channel}}) {
foreach my $line (sort {$sort_order{$stm_file{$key_filename}{$key_channel}{$key_begtime}{$a}{type}} <=>
$sort_order{$stm_file{$key_filename}{$key_channel}{$key_begtime}{$b}{type}}}
keys %{$stm_file{$key_filename}{$key_channel}{$key_begtime}}) {
print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{type} ";
print "$key_filename $key_channel ";
print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{beg_time} ";
print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{end_time} ";
print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{token} ";
print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{subtype} ";
print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{speaker} ";
print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{conf}\n";
}
}
}
}