Blame view
tools/sctk-2.4.10/bin/rttmSort.pl
3.19 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
#!/usr/bin/perl -w use strict; use Data::Dumper; my %sort_order = ("SPKRINFO" => 0, "NOSCORE" => 1, "NORTMETADATA" => 2, "SEGMENT" => 3, "SPEAKER" => 4, "SU" => 5, "A/P" => 6, "CB" => 7, "IP" => 8, "EDIT" => 9, "FILLER" => 10, "NON-SPEECH" => 11, "NON-LEX" => 12, "LEXEME" => 13, "SUboundary" => 14); my %spkrinfo; my %stm_file; while (<>) { next if ($_ =~ /;;/); my $wrdExp = '[\]\[\w%\{'."\\'".'\<\>.-]+'; $wrdExp = '[^\s]+'; my $txtExp = "$wrdExp|\\($wrdExp\\)|<NA>"; if (/(SPKR-INFO|SEGMENT|LEXEME|NON-LEX|CB|SU|EDIT|FILLER|IP|NOSCORE|SPEAKER|NORTMETADATA|NON-SPEECH|A\/P)\s+(\S+)\s+(\d+)\s+(\d*\.?\d+|<NA>)\s+(\d*\.?\d+|<NA>)\s+($txtExp)\s+([\w&-]+|<NA>)\s+(\S+|<NA>)\s+(\d*\.?\d+|<NA>)/) { if ($1 eq "SPKR-INFO") { # print "--$2 $8-- "; die "Error: spkrinfo exists for '$2 $8'" if (exists($spkrinfo{$2." ".$8})); $spkrinfo{$2." ".$8}{file} = $2; $spkrinfo{$2." ".$8}{chan} = $3; $spkrinfo{$2." ".$8}{gender} = $7; $spkrinfo{$2." ".$8}{spkr} = $8; $spkrinfo{$2." ".$8}{conf} = $9; $spkrinfo{$2." ".$8}{line} = $. . " $_"; } else { $stm_file{$2}{$3}{$4}{$.}{type} = $1; $stm_file{$2}{$3}{$4}{$.}{beg_time} = $4; $stm_file{$2}{$3}{$4}{$.}{end_time} = $5; $stm_file{$2}{$3}{$4}{$.}{token} = $6; $stm_file{$2}{$3}{$4}{$.}{subtype} = $7; $stm_file{$2}{$3}{$4}{$.}{speaker} = $8; $stm_file{$2}{$3}{$4}{$.}{conf} = $9; $stm_file{$2}{$3}{$4}{$.}{line} = $. . " $_"; } } elsif (/^;;/) { #nothing } else { die "malformed line $. --> $_ "; } } sub cmp_float { return 0 if ($a eq "<NA>" && $b eq "<NA>"); return 1 if ($b eq "<NA>"); return -1 if ($a eq "<NA>"); return $a <=> $b; } foreach my $spkr (sort keys %spkrinfo) { print "SPKR-INFO $spkrinfo{$spkr}{file} $spkrinfo{$spkr}{chan} <NA> <NA> <NA> $spkrinfo{$spkr}{gender} $spkrinfo{$spkr}{spkr} $spkrinfo{$spkr}{conf} "; } #print Dumper(\%stm_file); foreach my $key_filename (sort keys %stm_file) { foreach my $key_channel (sort keys %{$stm_file{$key_filename}}) { foreach my $key_begtime (sort cmp_float keys %{$stm_file{$key_filename}{$key_channel}}) { foreach my $line (sort {$sort_order{$stm_file{$key_filename}{$key_channel}{$key_begtime}{$a}{type}} <=> $sort_order{$stm_file{$key_filename}{$key_channel}{$key_begtime}{$b}{type}}} keys %{$stm_file{$key_filename}{$key_channel}{$key_begtime}}) { print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{type} "; print "$key_filename $key_channel "; print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{beg_time} "; print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{end_time} "; print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{token} "; print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{subtype} "; print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{speaker} "; print "$stm_file{$key_filename}{$key_channel}{$key_begtime}{$line}{conf} "; } } } } |