create_chime1_trans.pl
2.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#!/usr/bin/env perl
#
# Copyright 2015 University of Sheffield (Author: Ning Ma)
# Apache 2.0.
#
# Create transcriptions for the CHIME/GRID corpus from a list of
# file names (used as UTTERANCE-ID, e.g. s1_bgab3n)
# It outputs lines containing UTTERANCE-ID TRANSCRIPTIONS, e.g.
# s1_bgab3n BIN GREEN AT B THREE NOW
#
# Usage: create_chime1_trans.pl train.flist
use strict;
use warnings;
# Define silence label at begin/end of an utternace
my $sil = "<SIL>";
my $in_list = $ARGV[0];
open my $info, $in_list or die "could not open $in_list: $!";
while (my $line = <$info>) {
chomp($line);
$line =~ s/\.[^.]+$//; # Remove extension just in case
my @tokens = split("_", $line);
my @chars = split("", $tokens[1]);
my $trans;
if ($chars[0] eq "b") { $trans = "BIN"}
elsif ($chars[0] eq "l") { $trans = "LAY" }
elsif ($chars[0] eq "p") { $trans = "PLACE" }
elsif ($chars[0] eq "s") { $trans = "SET" }
else { $trans = "!UNKNOWN"}
if ($chars[1] eq "b") { $trans = $trans . " BLUE" }
elsif ($chars[1] eq "g") { $trans = $trans . " GREEN" }
elsif ($chars[1] eq "r") { $trans = $trans . " RED" }
elsif ($chars[1] eq "w") { $trans = $trans . " WHITE" }
else { $trans = $trans . "!UNKNOWN"}
if ($chars[2] eq "a") { $trans = $trans . " AT" }
elsif ($chars[2] eq "b") { $trans = $trans . " BY" }
elsif ($chars[2] eq "i") { $trans = $trans . " IN" }
elsif ($chars[2] eq "w") { $trans = $trans . " WITH" }
else { $trans = $trans . "!UNKNOWN"}
$trans = $trans . " " . uc($chars[3]);
if ($chars[4] eq "z") { $trans = $trans . " ZERO" }
elsif ($chars[4] eq "1") { $trans = $trans . " ONE" }
elsif ($chars[4] eq "2") { $trans = $trans . " TWO" }
elsif ($chars[4] eq "3") { $trans = $trans . " THREE" }
elsif ($chars[4] eq "4") { $trans = $trans . " FOUR" }
elsif ($chars[4] eq "5") { $trans = $trans . " FIVE" }
elsif ($chars[4] eq "6") { $trans = $trans . " SIX" }
elsif ($chars[4] eq "7") { $trans = $trans . " SEVEN" }
elsif ($chars[4] eq "8") { $trans = $trans . " EIGHT" }
elsif ($chars[4] eq "9") { $trans = $trans . " NINE" }
else { $trans = $trans . "!UNKNOWN"}
if ($chars[5] eq "a") { $trans = $trans . " AGAIN" }
elsif ($chars[5] eq "n") { $trans = $trans . " NOW" }
elsif ($chars[5] eq "p") { $trans = $trans . " PLEASE" }
elsif ($chars[5] eq "s") { $trans = $trans . " SOON" }
else { $trans = $trans . "!UNKNOWN"}
#print "$line $sil $trans $sil\n";
print "$line\t$trans\n";
}