Blame view
egs/chime1/s5/local/create_chime1_trans.pl
2.41 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
#!/usr/bin/env perl # # Copyright 2015 University of Sheffield (Author: Ning Ma) # Apache 2.0. # # Create transcriptions for the CHIME/GRID corpus from a list of # file names (used as UTTERANCE-ID, e.g. s1_bgab3n) # It outputs lines containing UTTERANCE-ID TRANSCRIPTIONS, e.g. # s1_bgab3n BIN GREEN AT B THREE NOW # # Usage: create_chime1_trans.pl train.flist use strict; use warnings; # Define silence label at begin/end of an utternace my $sil = "<SIL>"; my $in_list = $ARGV[0]; open my $info, $in_list or die "could not open $in_list: $!"; while (my $line = <$info>) { chomp($line); $line =~ s/\.[^.]+$//; # Remove extension just in case my @tokens = split("_", $line); my @chars = split("", $tokens[1]); my $trans; if ($chars[0] eq "b") { $trans = "BIN"} elsif ($chars[0] eq "l") { $trans = "LAY" } elsif ($chars[0] eq "p") { $trans = "PLACE" } elsif ($chars[0] eq "s") { $trans = "SET" } else { $trans = "!UNKNOWN"} if ($chars[1] eq "b") { $trans = $trans . " BLUE" } elsif ($chars[1] eq "g") { $trans = $trans . " GREEN" } elsif ($chars[1] eq "r") { $trans = $trans . " RED" } elsif ($chars[1] eq "w") { $trans = $trans . " WHITE" } else { $trans = $trans . "!UNKNOWN"} if ($chars[2] eq "a") { $trans = $trans . " AT" } elsif ($chars[2] eq "b") { $trans = $trans . " BY" } elsif ($chars[2] eq "i") { $trans = $trans . " IN" } elsif ($chars[2] eq "w") { $trans = $trans . " WITH" } else { $trans = $trans . "!UNKNOWN"} $trans = $trans . " " . uc($chars[3]); if ($chars[4] eq "z") { $trans = $trans . " ZERO" } elsif ($chars[4] eq "1") { $trans = $trans . " ONE" } elsif ($chars[4] eq "2") { $trans = $trans . " TWO" } elsif ($chars[4] eq "3") { $trans = $trans . " THREE" } elsif ($chars[4] eq "4") { $trans = $trans . " FOUR" } elsif ($chars[4] eq "5") { $trans = $trans . " FIVE" } elsif ($chars[4] eq "6") { $trans = $trans . " SIX" } elsif ($chars[4] eq "7") { $trans = $trans . " SEVEN" } elsif ($chars[4] eq "8") { $trans = $trans . " EIGHT" } elsif ($chars[4] eq "9") { $trans = $trans . " NINE" } else { $trans = $trans . "!UNKNOWN"} if ($chars[5] eq "a") { $trans = $trans . " AGAIN" } elsif ($chars[5] eq "n") { $trans = $trans . " NOW" } elsif ($chars[5] eq "p") { $trans = $trans . " PLEASE" } elsif ($chars[5] eq "s") { $trans = $trans . " SOON" } else { $trans = $trans . "!UNKNOWN"} #print "$line $sil $trans $sil "; print "$line\t$trans "; } |