Blame view

egs/hkust/s5/local/hkust_normalize.pl 823 Bytes
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
  #!/usr/bin/env perl
  use warnings; #sed replacement for -w perl parameter
  # Copyright Chao Weng 
  
  # normalizations for hkust trascript
  # see the docs/trans-guidelines.pdf for details
  
  while (<STDIN>) {
    @A = split(" ", $_);
    print "$A[0] ";
    for ($n = 1; $n < @A; $n++) { 
      $a = $A[$n];
      if (($a eq "{breath}")||($a eq "{cough}")||($a eq "{sneeze}")
         || ($a eq "{lipsmack}")) {print "[VOCALIZED-NOISE] "; next;}
      if (($a eq "{laugh}")) {print "[LAUGHTER] "; next;}
      if (($a eq "<noise>")) {print "[NOISE] "; next;}
      $tmp = $a;
      if ($tmp =~ /[^.,?+-]{0,}[.,?+-]+/) { $tmp =~ s:([^.,?+-]{0,})[.,?+-]+:$1:; }
      if ($tmp =~ /\~[A-Z]/) { $tmp =~ s:\~([A-Z]):$1:; }
      if ($tmp =~ /%\S/) { $tmp =~ s:%(\S):$1:; }
      if ($tmp =~ /[a-zA-Z]/) {$tmp=uc($tmp);} 
      print "$tmp "; 
    }
    print "
  "; 
  }