stm2rttm.pl
4.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/perl -w
# Converts an STM file into a corresponding RTTM file
# Authors: Chris Laprun, Audrey Tong, Jon Fiscus
#
# v4:
# - Added a check to make sure speakers do not have segments that overlap with themselves
# v2:
# - Added constants for better legibility
# - Added smoothing capability
# - Now takes an evaluation code as parameter to support multiple evaluations
use Getopt::Long;
my $SUPPORTED = "Supported evaluations are:\n\t- Rich Transcription 02 (code: rt02)\n\t- Rich Transcription 05s (code: rt05s)\n\t- Rich Transcription 04 Spring (code: rt04s)\n";
my $ret = GetOptions ("e|evaluation=s");
my $evaluation;
if (defined($opt_e)){
$evaluation = $opt_e;
} else {
die "Usage: stm2rttm.pl -e [rt02|rt04s|rt05s]\nVersion: 0.1\n\nError: You must specify an evaluation code with the -e option!\n" . $SUPPORTED;
}
my %STM = (); # STM entries
my $FILE = 0; # index of the file id in the STM entry
my $CHANNEL = 1; # index of the channel number in the STM entry
my $SPEAKER = 2; # index of the speaker name in the STM entry
my $START_TIME = 3; # index of the utterance start time in the STM entry
my $END_TIME = 4; # index of the utterance end time in the STM entry
my $CATEGORIES = 5; # index of the categories block in the STM entry
my $TEXT = 6; # index of the utterance text in the STM entry
my $SEX_INDEX = -1; # index of the sex category in the categories block
my $SMOOTHING_TIME = -1.0; # number of seconds between utterances for smoothing
if ($evaluation eq "rt04s"){
$SEX_INDEX = 1;
$SMOOTHING_TIME = 0;
$WITH_SEX = 1;
} elsif ($evaluation eq "rt02") {
$SEX_INDEX = 2;
$SMOOTHING_TIME = 0.5;
$WITH_SEX = 1;
} elsif ($evaluation eq "rt05s") {
$SMOOTHING_TIME = 0.3;
$WITH_SEX = 0;
} else {
die "Unknown target evaluation code!\n" . $SUPPORTED;
}
# Get the STM data from standard input
while (<>){
next if ($_ =~ /^\s*$/);
next if ($_ =~ /(^;;|inter_segment_gap|intersegment_gap)/i);
my @d = split(/\s+/, $_, 7);
if (! defined ($d[$FILE]) || ! defined ($d[$CHANNEL]) || ! defined ($d[$SPEAKER]) ) {
die "No file, channel, or speaker defined";
}
push (@{ $STM{$d[$SPEAKER]}{$d[$FILE]}{$d[$CHANNEL]} }, [ @d ]);
@info = split (",", $d[$CATEGORIES]);
if ($WITH_SEX){
($sex = $info[$SEX_INDEX]) =~ s/>//;
if ($sex eq "male") { $sex = "adult_male"; }
elsif ($sex eq "female") { $sex = "adult_female"; }
elsif ($sex =~ /unk/) { $sex = "unknown"; }
else { die "Unknown sex $sex"; }
if (defined($spkrInfo{$d[$SPEAKER]})){
die "Error: ambiguous spkr info $d[$SPEAKER]=>$sex but had $spkrInfo{$d[$SPEAKER]}" if ($spkrInfo{$d[$SPEAKER]} ne $sex && $d[$SPEAKER] !~ /excluded_region/i);
} else {
$spkrInfo{$d[$SPEAKER]} = $sex;
}
} else {
$spkrInfo{$d[$SPEAKER]} = "unknown";
}
}
# Sort STM entries for smoothing
foreach $spkr (keys %STM) {
for $file (keys %{ $STM{$spkr} }){
for $chan (keys %{ $STM{$spkr}{$file} }){
@ { $STM{$spkr}{$file}{$chan} } = sort numerically (@ { $STM{$spkr}{$file}{$chan} });
}
}
}
# Perform smoothing
foreach $spkr (keys %STM) {
for $file (keys %{ $STM{$spkr} }){
for $chan (keys %{ $STM{$spkr}{$file} }){
$first = 1;
for ($i=0; $i<@{ $STM{$spkr}{$file}{$chan} }; $i++){
$seg = $STM{$spkr}{$file}{$chan}[$i];
if ($first) {
$prev_seg = $seg;
$first = 0;
} else {
if ($seg->[$START_TIME] < $prev_seg->[$END_TIME]) {
die "Error: segments from the same speaker overlap\n ".
join(" ",@{$prev_seg})."\n ".
join(" ",@{$seg});
}
if ($seg->[$START_TIME] - $prev_seg->[$END_TIME] <= $SMOOTHING_TIME) {
$prev_seg->[$END_TIME] = $seg->[$END_TIME];
$prev_seg->[$TEXT] = $prev_seg->[$TEXT] . " " . $seg->[$TEXT];
splice (@{ $STM{$spkr}{$file}{$chan} }, $i, 1);
$i++;
} else {
$prev_seg = $seg;
}
}
}
}
}
}
# Output speaker info metadata
foreach $spkr(keys %STM){
for $file (keys %{ $STM{$spkr} }){
for $chan (keys %{ $STM{$spkr}{$file} }){
if ($spkr !~ /excluded_region/i) {
print "SPKR-INFO $file $chan <NA> <NA> <NA> $spkrInfo{$spkr} $spkr <NA>\n";
}
}
}
}
# Output speaker turns
foreach $spkr(keys %STM){
for $file (keys %{ $STM{$spkr} }){
for $chan (keys %{ $STM{$spkr}{$file} }){
for $seg (@{ $STM{$spkr}{$file}{$chan} }){
$beg = $seg->[$START_TIME];
$end = $seg->[$END_TIME];
$dur = sprintf("%.3f", $end - $beg);
if ($spkr =~ /excluded_region/i) {
print "NOSCORE $file $chan $beg $dur <NA> <NA> <NA> <NA>\n";
} else {
print "SPEAKER $file $chan $beg $dur <NA> <NA> $spkr <NA>\n";
}
}
}
}
}
sub numerically {
$a->[$START_TIME] <=> $b->[$START_TIME];
}