Blame view

egs/heroico/s5/local/heroico_answers_make_lists.pl 2.57 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
  #!/usr/bin/env perl
  
  # Copyright 2017 John Morgan
  # Apache 2.0.
  
  # heroico_answers_make_lists.pl - make acoustic model training lists
  
  use strict;
  use warnings;
  use Carp;
  
  use File::Spec;
  use File::Copy;
  use File::Basename;
  
  my $tmpdir = "data/local/tmp/heroico";
  
  system "mkdir -p $tmpdir/answers";
  
  # input wav file list
  my $w = "$tmpdir/wav_list.txt";
  
  # output temporary wav.scp files
  my $o = "$tmpdir/answers/wav.scp";
  
  # output temporary utt2spk files
  my $u = "$tmpdir/answers/utt2spk";
  
  # output temporary text files
  my $t = "$tmpdir/answers/text";
  
  # initialize hash for prompts
  my %prompts = ();
  
  # store prompts in hash
  LINEA: while ( my $line = <> ) {
    chomp $line;
    my ($num,$sent) = split /\t/, $line, 2;
    my ($volume,$directories,$file) = File::Spec->splitpath( $num );
    my @dirs = split /\//, $directories;
    # get the speaker number
    my $s = $dirs[-1];
    # pad the speaker number with zeroes
    my $spk = "";
    if ( $s < 10 ) {
        $spk = '000' . $s;
    } elsif ( $s < 100 ) {
        $spk = '00' . $s;
    } elsif ( $s < 1000 ) {
        $spk = '0' . $s;
    }
    # pad the filename with zeroes
    my $fn = "";
    if ( $file < 10 ) {
        $fn = '000' . $file;
    } elsif ( $file < 100 ) {
        $fn = '00' . $file;
    } elsif ( $file < 1000 ) {
        $fn = '0' . $file;
    }
    # the utterance name
    my $utt = $spk . '_' . $fn;
    $prompts{$utt} = $sent;
  }
  
  open my $W, '<', $w or croak "problem with $w $!";
  open my $O, '+>', $o or croak "problem with $o $!";
  open my $U, '+>', $u or croak "problem with $u $!";
  open my $T, '+>', $t or croak "problem with $t $!";
  
  LINE: while ( my $line = <$W> ) {
    chomp $line;
    next LINE unless ( $line =~ /Answers/ );
    next LINE if ( $line =~ /Recordings/ );
    my ($volume,$directories,$file) = File::Spec->splitpath( $line );
    my @dirs = split /\//, $directories;
    my $r = basename $line, ".wav";
    my $s = $dirs[-1];
    my $spk = "";
    # pad with zeroes
    if ( $s < 10 ) {
        $spk = '000' . $s;
    } elsif ( $s < 100 ) {
        $spk = '00' . $s;
    } elsif ( $s < 1000 ) {
        $spk = '0' . $s;
    }
    # pad the file name with zeroes
    my $rec = "";
    if ( $r < 10 ) {
        $rec = '000' . $r;
    } elsif ( $r < 100 ) {
        $rec = '00' . $r;
    } elsif ( $r < 1000 ) {
        $rec = '0' . $r;
    }
    my $rec_id = $spk . '_' . $rec;
    if ( exists $prompts{$rec_id} ) {
      print $T "$rec_id $prompts{$rec_id}
  ";
    } elsif ( defined $rec_id ) {
      warn  "warning: problem\t$rec_id";
      next LINE;
    } else {
      croak "$line";
    }
  
    print $O "$rec_id sox -r 22050 -e signed -b 16 $line -r 16000 -t wav - |
  ";
    print $U "$rec_id $spk
  ";
  }
  close $T;
  close $O;
  close $U;
  close $W;