Blame view

egs/wsj/s5/local/append_utterances.sh 2.8 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
  #!/bin/bash
  
  # Copyright 2014  Guoguo Chen
  # Apache 2.0
  
  # Begin configuration section.
  pad_silence=0.5
  # End configuration section.
  
  echo "$0 $@"
  
  [ -f ./path.sh ] && . ./path.sh
  . parse_options.sh || exit 1;
  
  if [ $# -ne 2 ]; then
    echo "Usage: $0 [options] <input-dir> <output-dir>"
    echo "Options:"
    echo "    --pad-silence           # silence to be padded between utterances"
    exit 1;
  fi
  
  input_dir=$1
  output_dir=$2
  
  for f in spk2gender spk2utt text utt2spk wav.scp; do
    [ ! -f $input_dir/$f ] && echo "$0: no such file $input_dir/$f" && exit 1;
  done
  
  # Checks if sox is on the path.
  sox=`which sox`
  [ $? -ne 0 ] && "sox: command not found." && exit 1;
  sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
  [ ! -x $sph2pipe ] && "sph2pipe: command not found." && exit 1;
  
  mkdir -p $output_dir
  cp -f $input_dir/spk2gender $output_dir/spk2gender
  
  # Creates a silence wav file. We create this actual sil.wav file instead of
  # using sox's padding because this way sox can properly pipe the length in the
  # header file. Otherwise sox will have to "count" all the samples and then
  # update the header, which is not proper in pipe.
  mkdir -p $output_dir/.tmp
  $sox -n -r 16000 -b 16 $output_dir/.tmp/sil.wav trim 0.0 $pad_silence
  
  cat $input_dir/spk2utt | perl -e '
    ($text_in, $wav_in, $text_out, $wav_out, $sox, $sph2pipe, $sil_wav) = @ARGV;
    open(TI, "<$text_in") || die "Error: fail to open $text_in
  ";
    open(TO, ">$text_out") || die "Error: fail to open $text_out
  ";
    open(WI, "<$wav_in") || die "Error: fail to open $wav_in
  ";
    open(WO, ">$wav_out") || die "Error: fail to open $wav_out
  ";
    while (<STDIN>) {
      chomp;
      my @col = split;  # We need to add "my" since we use reference below.
      @col >= 2 || "bad line $_
  ";
      $spk = shift @col;
      $spk2utt{$spk} = \@col;
    }
    while (<TI>) {
      chomp;
      @col = split;
      @col >= 2 || die "Error: bad line $_
  ";
      $utt = shift @col;
      $text{$utt} = join(" ", @col);
    }
    while (<WI>) {
      chomp;
      @col = split;
      @col >= 2 || die "Error: bad line $_
  ";
      $wav{$col[0]} = $col[4];
    }
    foreach $spk (keys %spk2utt) {
      @utts = @{$spk2utt{$spk}};
      # print $utts[0] . "
  ";
      $text_line = "";
      $wav_line = " $sox";
      foreach $utt (@utts) {
        $text_line .=  " " . $text{$utt};
        $wav_line .= " \"| $sph2pipe -f wav $wav{$utt}\"";  # speech
        $wav_line .= " $sil_wav";                           # silence
      }
      $text_line = $spk . $text_line . "
  ";
      $wav_line = $spk . $wav_line . " -t wav - |
  ";
      print TO $text_line;
      print WO $wav_line;
    }' $input_dir/text $input_dir/wav.scp $output_dir/text \
      $output_dir/wav.scp $sox $sph2pipe $output_dir/.tmp/sil.wav
  
  cat $input_dir/spk2utt | awk '{print $1" "$1;}' > $output_dir/spk2utt
  utils/spk2utt_to_utt2spk.pl $output_dir/spk2utt > $output_dir/utt2spk
  
  utils/fix_data_dir.sh $output_dir
  
  exit 0;