stm2rttm.pl 4.62 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151


#!/usr/bin/perl -w

# Converts an STM file into a corresponding RTTM file
# Authors: Chris Laprun, Audrey Tong, Jon Fiscus
#
# v4:
#       - Added a check to make sure speakers do not have segments that overlap with themselves
# v2: 
# 	- Added constants for better legibility
#	- Added smoothing capability
#	- Now takes an evaluation code as parameter to support multiple evaluations

use Getopt::Long;

my $SUPPORTED = "Supported evaluations are:\n\t- Rich Transcription 02 (code: rt02)\n\t- Rich Transcription 05s (code: rt05s)\n\t- Rich Transcription 04 Spring (code: rt04s)\n";
my $ret    = GetOptions ("e|evaluation=s");
my $evaluation;
if (defined($opt_e)){
    $evaluation  = $opt_e;
} else {
  die "Usage: stm2rttm.pl -e [rt02|rt04s|rt05s]\nVersion: 0.1\n\nError: You must specify an evaluation code with the -e option!\n" . $SUPPORTED;
}
my %STM = (); # STM entries
my $FILE = 0; # index of the file id in the STM entry
my $CHANNEL = 1; # index of the channel number in the STM entry
my $SPEAKER = 2; # index of the speaker name in the STM entry
my $START_TIME = 3; # index of the utterance start time in the STM entry
my $END_TIME = 4; # index of the utterance end time in the STM entry
my $CATEGORIES = 5; # index of the categories block in the STM entry
my $TEXT = 6; # index of the utterance text in the STM entry

my $SEX_INDEX = -1; # index of the sex category in the categories block
my $SMOOTHING_TIME = -1.0; # number of seconds between utterances for smoothing

if ($evaluation eq "rt04s"){
	$SEX_INDEX = 1;
	$SMOOTHING_TIME = 0;
	$WITH_SEX = 1;
} elsif ($evaluation eq "rt02") {
	$SEX_INDEX = 2;
	$SMOOTHING_TIME = 0.5;
	$WITH_SEX = 1;
} elsif ($evaluation eq "rt05s") {
	$SMOOTHING_TIME = 0.3;
	$WITH_SEX = 0;
} else {
    die "Unknown target evaluation code!\n" . $SUPPORTED;
}

# Get the STM data from standard input
while (<>){
	next if ($_ =~ /^\s*$/);
	next if ($_ =~ /(^;;|inter_segment_gap|intersegment_gap)/i);
	my @d = split(/\s+/, $_, 7);
	if (! defined ($d[$FILE]) || ! defined ($d[$CHANNEL]) || ! defined ($d[$SPEAKER]) ) {
		die "No file, channel, or speaker defined";
	}
	
	push (@{ $STM{$d[$SPEAKER]}{$d[$FILE]}{$d[$CHANNEL]} }, [ @d ]);
	@info = split (",", $d[$CATEGORIES]);
	if ($WITH_SEX){
	    ($sex = $info[$SEX_INDEX]) =~ s/>//;
	
	    if ($sex eq "male") { $sex = "adult_male"; }
	    elsif ($sex eq "female") { $sex = "adult_female"; }
	    elsif ($sex =~ /unk/) { $sex = "unknown"; }
	    else { die "Unknown sex $sex"; }
	    
	    if (defined($spkrInfo{$d[$SPEAKER]})){
		die "Error: ambiguous spkr info $d[$SPEAKER]=>$sex but had $spkrInfo{$d[$SPEAKER]}" if ($spkrInfo{$d[$SPEAKER]} ne $sex && $d[$SPEAKER] !~ /excluded_region/i);
	    } else {
		$spkrInfo{$d[$SPEAKER]} = $sex;
	    }
	} else {
		$spkrInfo{$d[$SPEAKER]} = "unknown";
	}
    }


# Sort STM entries for smoothing
foreach $spkr (keys %STM) {
	for $file (keys %{ $STM{$spkr} }){
		for $chan (keys %{ $STM{$spkr}{$file} }){
			@ { $STM{$spkr}{$file}{$chan} } = sort numerically (@ { $STM{$spkr}{$file}{$chan} });
		}
	}
}

# Perform smoothing
foreach $spkr (keys %STM) {
	for $file (keys %{ $STM{$spkr} }){
		for $chan (keys %{ $STM{$spkr}{$file} }){
	    	$first = 1;
	    	for ($i=0; $i<@{ $STM{$spkr}{$file}{$chan} }; $i++){
				$seg = $STM{$spkr}{$file}{$chan}[$i];
				if ($first) {
				    $prev_seg = $seg;
			    	$first = 0;
				} else {
				    if ($seg->[$START_TIME] < $prev_seg->[$END_TIME]) {
					die "Error: segments from the same speaker overlap\n  ".
					    join(" ",@{$prev_seg})."\n  ".
					    join(" ",@{$seg});
				    }
				    if ($seg->[$START_TIME] - $prev_seg->[$END_TIME] <= $SMOOTHING_TIME) {
						$prev_seg->[$END_TIME] = $seg->[$END_TIME];
						$prev_seg->[$TEXT] = $prev_seg->[$TEXT] . " " . $seg->[$TEXT];
						splice (@{ $STM{$spkr}{$file}{$chan} }, $i, 1);
						$i++;
				    } else {
						$prev_seg = $seg;
			    	}
				}
	    	}
		}
	}
}

# Output speaker info metadata
foreach $spkr(keys %STM){
	for $file (keys %{ $STM{$spkr} }){
		for $chan (keys %{ $STM{$spkr}{$file} }){
	    	if ($spkr !~ /excluded_region/i) {
				print "SPKR-INFO $file $chan <NA> <NA> <NA> $spkrInfo{$spkr} $spkr <NA>\n";
	    	}
		}
	}
}

# Output speaker turns
foreach $spkr(keys %STM){
	for $file (keys %{ $STM{$spkr} }){
		for $chan (keys %{ $STM{$spkr}{$file} }){
	    	for $seg (@{ $STM{$spkr}{$file}{$chan} }){
				$beg = $seg->[$START_TIME];
				$end = $seg->[$END_TIME];
				$dur = sprintf("%.3f", $end - $beg);
				if ($spkr =~ /excluded_region/i) {
		    		print "NOSCORE $file $chan $beg $dur <NA> <NA> <NA> <NA>\n";
				} else {
				    print "SPEAKER $file $chan $beg $dur <NA> <NA> $spkr <NA>\n";
				}
			}
		}
	}
}

sub numerically {
	$a->[$START_TIME] <=> $b->[$START_TIME];
}