Blame view
1.59 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
#!/usr/bin/perl -w use strict; use Data::Dumper; my $in = "-"; my $out = "-"; use Getopt::Std; getopts('i:o:'); if (defined($main::opt_o)) { $out = $main::opt_o; } if (defined($main::opt_i)) { $in = $main::opt_i; } my %sort_order = ("SPKRINFO" => 0, "NOSCORE" => 1, "NORTMETADATA" => 2, "SEGMENT" => 3, "SPEAKER" => 4, "SU" => 5, "A/P" => 6, "CB" => 7, "IP" => 8, "EDIT" => 9, "FILLER" => 10, "NON-SPEECH" => 11, "NON-LEX" => 12, "LEXEME" => 13, "SUboundary" => 14); my %spkrinfo; my %stm_file; open IN, "$in" || die "Failed to open $in"; open OUT, ">$out" || die "Failed to open $out"; while (<IN>) { next if ($_ =~ /;;/); my $wrdExp = '[\]\[\S\%\{'."\\'".'\<\>.-]+'; my $txtExp = "$wrdExp|\\($wrdExp\\)|<NA>"; # my $txtExp = ".*|<NA>"; if (/(SPKR-INFO|SEGMENT|LEXEME|NON-LEX|CB|SU|EDIT|FILLER|IP|NOSCORE|SPEAKER|NORTMETADATA|NON-SPEECH|A\/P)\s+(\S+)\s+(\d+)\s+(\d*\.?\d+|<NA>)\s+(\d*\.?\d+|<NA>)\s+($txtExp)\s+([\w&-]+|<NA>)\s+(\S+|<NA>)\s+(\d*\.?\d+|<NA>)/) { # print "($1, $2, $3, $4, $5, $6, $7, $8, $9, (defined($10) ? $10 : undef)) "; my ($type, $file, $chan, $beg, $dur, $token, $stype, $spkr, $conf, $slat) = ($1, $2, $3, $4, $5, $6, $7, $8, $9, (defined($10) ? $10 : undef)); if ($1 eq "LEXEME" && $7 eq "lex"){ print OUT "$file $chan $beg $dur $token $conf "; } } else { die "malformed line $. --> $_ "; } } close IN; close OUT; |