Blame view

egs/chime1/s5/local/create_chime1_trans.pl 2.41 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
  #!/usr/bin/env perl
  #
  # Copyright 2015  University of Sheffield (Author: Ning Ma)
  # Apache 2.0.
  #
  # Create transcriptions for the CHIME/GRID corpus from a list of
  # file names (used as UTTERANCE-ID, e.g. s1_bgab3n)
  # It outputs lines containing UTTERANCE-ID TRANSCRIPTIONS, e.g. 
  #   s1_bgab3n BIN GREEN AT B THREE NOW
  #
  # Usage: create_chime1_trans.pl train.flist
  
  use strict;
  use warnings;
  
  # Define silence label at begin/end of an utternace
  my $sil = "<SIL>";
  
  my $in_list = $ARGV[0];
  
  open my $info, $in_list or die "could not open $in_list: $!";
  
  while (my $line = <$info>) {
    chomp($line);
    $line =~ s/\.[^.]+$//; # Remove extension just in case
    my @tokens = split("_", $line); 
    my @chars = split("", $tokens[1]);
    my $trans;
  
    if ($chars[0] eq "b") { $trans = "BIN"}
    elsif ($chars[0] eq "l") { $trans = "LAY" }
    elsif ($chars[0] eq "p") { $trans = "PLACE" }
    elsif ($chars[0] eq "s") { $trans = "SET" }
    else { $trans = "!UNKNOWN"}
  
    if ($chars[1] eq "b") { $trans = $trans . " BLUE" }
    elsif ($chars[1] eq "g") { $trans = $trans . " GREEN" }
    elsif ($chars[1] eq "r") { $trans = $trans . " RED" }
    elsif ($chars[1] eq "w") { $trans = $trans . " WHITE" }
    else { $trans = $trans . "!UNKNOWN"}
  
    if ($chars[2] eq "a") { $trans = $trans . " AT" }
    elsif ($chars[2] eq "b") { $trans = $trans . " BY" }
    elsif ($chars[2] eq "i") { $trans = $trans . " IN" }
    elsif ($chars[2] eq "w") { $trans = $trans . " WITH" }
    else { $trans = $trans . "!UNKNOWN"}
    
    $trans = $trans . " " . uc($chars[3]);
  
    if ($chars[4] eq "z") { $trans = $trans . " ZERO" }
    elsif ($chars[4] eq "1") { $trans = $trans . " ONE" }
    elsif ($chars[4] eq "2") { $trans = $trans . " TWO" }
    elsif ($chars[4] eq "3") { $trans = $trans . " THREE" }
    elsif ($chars[4] eq "4") { $trans = $trans . " FOUR" }
    elsif ($chars[4] eq "5") { $trans = $trans . " FIVE" }
    elsif ($chars[4] eq "6") { $trans = $trans . " SIX" }
    elsif ($chars[4] eq "7") { $trans = $trans . " SEVEN" }
    elsif ($chars[4] eq "8") { $trans = $trans . " EIGHT" }
    elsif ($chars[4] eq "9") { $trans = $trans . " NINE" }
    else { $trans = $trans . "!UNKNOWN"}
    
    if ($chars[5] eq "a") { $trans = $trans . " AGAIN" }
    elsif ($chars[5] eq "n") { $trans = $trans . " NOW" }
    elsif ($chars[5] eq "p") { $trans = $trans . " PLEASE" }
    elsif ($chars[5] eq "s") { $trans = $trans . " SOON" }
    else { $trans = $trans . "!UNKNOWN"}
    
    #print "$line    $sil $trans $sil
  ";
    print "$line\t$trans
  ";
  }