get_utt2dur.sh
5.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/bin/bash
# Copyright 2016 Johns Hopkins University (author: Daniel Povey)
# Apache 2.0
# This script operates on a data directory, such as in data/train/, and adds the
# utt2dur file if it does not already exist. The file 'utt2dur' maps from
# utterance to the duration of the utterance in seconds. This script works it
# out from the 'segments' file, or, if not present, from the wav.scp file (it
# first tries interrogating the headers, and if this fails, it reads the wave
# files in entirely.)
frame_shift=0.01
cmd=run.pl
nj=4
. utils/parse_options.sh
. ./path.sh
if [ $# != 1 ]; then
echo "Usage: $0 [options] <datadir>"
echo "e.g.:"
echo " $0 data/train"
echo " Options:"
echo " --frame-shift # frame shift in seconds. Only relevant when we are"
echo " # getting duration from feats.scp, and only if the "
echo " # file frame_shift does not exist (default: 0.01). "
exit 1
fi
export LC_ALL=C
data=$1
if [ -s $data/utt2dur ] && \
[ $(wc -l < $data/utt2spk) -eq $(wc -l < $data/utt2dur) ]; then
echo "$0: $data/utt2dur already exists with the expected length. We won't recompute it."
exit 0;
fi
if [ -s $data/segments ]; then
echo "$0: working out $data/utt2dur from $data/segments"
awk '{len=$4-$3; print $1, len;}' < $data/segments > $data/utt2dur
elif [[ -s $data/frame_shift && -f $data/utt2num_frames ]]; then
echo "$0: computing $data/utt2dur from $data/{frame_shift,utt2num_frames}."
frame_shift=$(cat $data/frame_shift) || exit 1
# The 1.5 correction is the typical value of (frame_length-frame_shift)/frame_shift.
awk -v fs=$frame_shift '{ $2=($2+1.5)*fs; print }' <$data/utt2num_frames >$data/utt2dur
elif [ -f $data/wav.scp ]; then
echo "$0: segments file does not exist so getting durations from wave files"
# if the wav.scp contains only lines of the form
# utt1 /foo/bar/sph2pipe -f wav /baz/foo.sph |
if perl <$data/wav.scp -e '
while (<>) { s/\|\s*$/ |/; # make sure final | is preceded by space.
@A = split; if (!($#A == 5 && $A[1] =~ m/sph2pipe$/ &&
$A[2] eq "-f" && $A[3] eq "wav" && $A[5] eq "|")) { exit(1); }
$utt = $A[0]; $sphere_file = $A[4];
if (!open(F, "<$sphere_file")) { die "Error opening sphere file $sphere_file"; }
$sample_rate = -1; $sample_count = -1;
for ($n = 0; $n <= 30; $n++) {
$line = <F>;
if ($line =~ m/sample_rate -i (\d+)/) { $sample_rate = $1; }
if ($line =~ m/sample_count -i (\d+)/) { $sample_count = $1; }
if ($line =~ m/end_head/) { break; }
}
close(F);
if ($sample_rate == -1 || $sample_count == -1) {
die "could not parse sphere header from $sphere_file";
}
$duration = $sample_count * 1.0 / $sample_rate;
print "$utt $duration\n";
} ' > $data/utt2dur; then
echo "$0: successfully obtained utterance lengths from sphere-file headers"
else
echo "$0: could not get utterance lengths from sphere-file headers, using wav-to-duration"
if ! command -v wav-to-duration >/dev/null; then
echo "$0: wav-to-duration is not on your path"
exit 1;
fi
read_entire_file=false
if grep -q 'sox.*speed' $data/wav.scp; then
read_entire_file=true
echo "$0: reading from the entire wav file to fix the problem caused by sox commands with speed perturbation. It is going to be slow."
echo "... It is much faster if you call get_utt2dur.sh *before* doing the speed perturbation via e.g. perturb_data_dir_speed.sh or "
echo "... perturb_data_dir_speed_3way.sh."
fi
num_utts=$(wc -l <$data/utt2spk)
if [ $nj -gt $num_utts ]; then
nj=$num_utts
fi
utils/data/split_data.sh --per-utt $data $nj
sdata=$data/split${nj}utt
$cmd JOB=1:$nj $data/log/get_durations.JOB.log \
wav-to-duration --read-entire-file=$read_entire_file \
scp:$sdata/JOB/wav.scp ark,t:$sdata/JOB/utt2dur || \
{ echo "$0: there was a problem getting the durations"; exit 1; }
for n in `seq $nj`; do
cat $sdata/$n/utt2dur
done > $data/utt2dur
fi
elif [ -f $data/feats.scp ]; then
echo "$0: wave file does not exist so getting durations from feats files"
if [[ -s $data/frame_shift ]]; then
frame_shift=$(cat $data/frame_shift) || exit 1
echo "$0: using frame_shift=$frame_shift from file $data/frame_shift"
fi
# The 1.5 correction is the typical value of (frame_length-frame_shift)/frame_shift.
feat-to-len scp:$data/feats.scp ark,t:- |
awk -v frame_shift=$frame_shift '{print $1, ($2+1.5)*frame_shift}' >$data/utt2dur
else
echo "$0: Expected $data/wav.scp, $data/segments or $data/feats.scp to exist"
exit 1
fi
len1=$(wc -l < $data/utt2spk)
len2=$(wc -l < $data/utt2dur)
if [ "$len1" != "$len2" ]; then
echo "$0: warning: length of utt2dur does not equal that of utt2spk, $len2 != $len1"
if [ $len1 -gt $[$len2*2] ]; then
echo "$0: less than half of utterances got a duration: failing."
exit 1
fi
fi
echo "$0: computed $data/utt2dur"
exit 0