Blame view

egs/wsj/s5/utils/data/get_utt2dur.sh 5.07 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
  #!/bin/bash
  
  # Copyright 2016  Johns Hopkins University (author: Daniel Povey)
  # Apache 2.0
  
  # This script operates on a data directory, such as in data/train/, and adds the
  # utt2dur file if it does not already exist.  The file 'utt2dur' maps from
  # utterance to the duration of the utterance in seconds.  This script works it
  # out from the 'segments' file, or, if not present, from the wav.scp file (it
  # first tries interrogating the headers, and if this fails, it reads the wave
  # files in entirely.)
  
  frame_shift=0.01
  cmd=run.pl
  nj=4
  
  . utils/parse_options.sh
  . ./path.sh
  
  if [ $# != 1 ]; then
    echo "Usage: $0 [options] <datadir>"
    echo "e.g.:"
    echo " $0 data/train"
    echo " Options:"
    echo " --frame-shift      # frame shift in seconds. Only relevant when we are"
    echo "                    # getting duration from feats.scp, and only if the "
    echo "                    # file frame_shift does not exist (default: 0.01). "
    exit 1
  fi
  
  export LC_ALL=C
  
  data=$1
  
  if [ -s $data/utt2dur ] && \
    [ $(wc -l < $data/utt2spk) -eq $(wc -l < $data/utt2dur) ]; then
    echo "$0: $data/utt2dur already exists with the expected length.  We won't recompute it."
    exit 0;
  fi
  
  if [ -s $data/segments ]; then
    echo "$0: working out $data/utt2dur from $data/segments"
    awk '{len=$4-$3; print $1, len;}' < $data/segments  > $data/utt2dur
  elif [[ -s $data/frame_shift && -f $data/utt2num_frames ]]; then
    echo "$0: computing $data/utt2dur from $data/{frame_shift,utt2num_frames}."
    frame_shift=$(cat $data/frame_shift) || exit 1
    # The 1.5 correction is the typical value of (frame_length-frame_shift)/frame_shift.
    awk -v fs=$frame_shift '{ $2=($2+1.5)*fs; print }' <$data/utt2num_frames  >$data/utt2dur
  elif [ -f $data/wav.scp ]; then
    echo "$0: segments file does not exist so getting durations from wave files"
  
    # if the wav.scp contains only lines of the form
    # utt1  /foo/bar/sph2pipe -f wav /baz/foo.sph |
    if perl <$data/wav.scp -e '
       while (<>) { s/\|\s*$/ |/;  # make sure final | is preceded by space.
               @A = split; if (!($#A == 5 && $A[1] =~ m/sph2pipe$/ &&
                                 $A[2] eq "-f" && $A[3] eq "wav" && $A[5] eq "|")) { exit(1); }
               $utt = $A[0]; $sphere_file = $A[4];
  
               if (!open(F, "<$sphere_file")) { die "Error opening sphere file $sphere_file"; }
               $sample_rate = -1;  $sample_count = -1;
               for ($n = 0; $n <= 30; $n++) {
                  $line = <F>;
                  if ($line =~ m/sample_rate -i (\d+)/) { $sample_rate = $1; }
                  if ($line =~ m/sample_count -i (\d+)/) { $sample_count = $1; }
                  if ($line =~ m/end_head/) { break; }
               }
               close(F);
               if ($sample_rate == -1 || $sample_count == -1) {
                 die "could not parse sphere header from $sphere_file";
               }
               $duration = $sample_count * 1.0 / $sample_rate;
               print "$utt $duration
  ";
       } ' > $data/utt2dur; then
      echo "$0: successfully obtained utterance lengths from sphere-file headers"
    else
      echo "$0: could not get utterance lengths from sphere-file headers, using wav-to-duration"
      if ! command -v wav-to-duration >/dev/null; then
        echo  "$0: wav-to-duration is not on your path"
        exit 1;
      fi
  
      read_entire_file=false
      if grep -q 'sox.*speed' $data/wav.scp; then
        read_entire_file=true
        echo "$0: reading from the entire wav file to fix the problem caused by sox commands with speed perturbation. It is going to be slow."
        echo "... It is much faster if you call get_utt2dur.sh *before* doing the speed perturbation via e.g. perturb_data_dir_speed.sh or "
        echo "... perturb_data_dir_speed_3way.sh."
      fi
  
  
      num_utts=$(wc -l <$data/utt2spk)
      if [ $nj -gt $num_utts ]; then
        nj=$num_utts
      fi
  
      utils/data/split_data.sh --per-utt $data $nj
      sdata=$data/split${nj}utt
  
      $cmd JOB=1:$nj $data/log/get_durations.JOB.log \
        wav-to-duration --read-entire-file=$read_entire_file \
        scp:$sdata/JOB/wav.scp ark,t:$sdata/JOB/utt2dur || \
          { echo "$0: there was a problem getting the durations"; exit 1; }
  
      for n in `seq $nj`; do
        cat $sdata/$n/utt2dur
      done > $data/utt2dur
    fi
  elif [ -f $data/feats.scp ]; then
    echo "$0: wave file does not exist so getting durations from feats files"
    if [[ -s $data/frame_shift ]]; then
      frame_shift=$(cat $data/frame_shift) || exit 1
      echo "$0: using frame_shift=$frame_shift from file $data/frame_shift"
    fi
    # The 1.5 correction is the typical value of (frame_length-frame_shift)/frame_shift.
    feat-to-len scp:$data/feats.scp ark,t:- |
      awk -v frame_shift=$frame_shift '{print $1, ($2+1.5)*frame_shift}' >$data/utt2dur
  else
    echo "$0: Expected $data/wav.scp, $data/segments or $data/feats.scp to exist"
    exit 1
  fi
  
  len1=$(wc -l < $data/utt2spk)
  len2=$(wc -l < $data/utt2dur)
  if [ "$len1" != "$len2" ]; then
    echo "$0: warning: length of utt2dur does not equal that of utt2spk, $len2 != $len1"
    if [ $len1 -gt $[$len2*2] ]; then
      echo "$0: less than half of utterances got a duration: failing."
      exit 1
    fi
  fi
  
  echo "$0: computed $data/utt2dur"
  
  exit 0