Blame view
egs/wsj/s5/utils/data/get_utt2dur.sh
5.07 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
#!/bin/bash # Copyright 2016 Johns Hopkins University (author: Daniel Povey) # Apache 2.0 # This script operates on a data directory, such as in data/train/, and adds the # utt2dur file if it does not already exist. The file 'utt2dur' maps from # utterance to the duration of the utterance in seconds. This script works it # out from the 'segments' file, or, if not present, from the wav.scp file (it # first tries interrogating the headers, and if this fails, it reads the wave # files in entirely.) frame_shift=0.01 cmd=run.pl nj=4 . utils/parse_options.sh . ./path.sh if [ $# != 1 ]; then echo "Usage: $0 [options] <datadir>" echo "e.g.:" echo " $0 data/train" echo " Options:" echo " --frame-shift # frame shift in seconds. Only relevant when we are" echo " # getting duration from feats.scp, and only if the " echo " # file frame_shift does not exist (default: 0.01). " exit 1 fi export LC_ALL=C data=$1 if [ -s $data/utt2dur ] && \ [ $(wc -l < $data/utt2spk) -eq $(wc -l < $data/utt2dur) ]; then echo "$0: $data/utt2dur already exists with the expected length. We won't recompute it." exit 0; fi if [ -s $data/segments ]; then echo "$0: working out $data/utt2dur from $data/segments" awk '{len=$4-$3; print $1, len;}' < $data/segments > $data/utt2dur elif [[ -s $data/frame_shift && -f $data/utt2num_frames ]]; then echo "$0: computing $data/utt2dur from $data/{frame_shift,utt2num_frames}." frame_shift=$(cat $data/frame_shift) || exit 1 # The 1.5 correction is the typical value of (frame_length-frame_shift)/frame_shift. awk -v fs=$frame_shift '{ $2=($2+1.5)*fs; print }' <$data/utt2num_frames >$data/utt2dur elif [ -f $data/wav.scp ]; then echo "$0: segments file does not exist so getting durations from wave files" # if the wav.scp contains only lines of the form # utt1 /foo/bar/sph2pipe -f wav /baz/foo.sph | if perl <$data/wav.scp -e ' while (<>) { s/\|\s*$/ |/; # make sure final | is preceded by space. @A = split; if (!($#A == 5 && $A[1] =~ m/sph2pipe$/ && $A[2] eq "-f" && $A[3] eq "wav" && $A[5] eq "|")) { exit(1); } $utt = $A[0]; $sphere_file = $A[4]; if (!open(F, "<$sphere_file")) { die "Error opening sphere file $sphere_file"; } $sample_rate = -1; $sample_count = -1; for ($n = 0; $n <= 30; $n++) { $line = <F>; if ($line =~ m/sample_rate -i (\d+)/) { $sample_rate = $1; } if ($line =~ m/sample_count -i (\d+)/) { $sample_count = $1; } if ($line =~ m/end_head/) { break; } } close(F); if ($sample_rate == -1 || $sample_count == -1) { die "could not parse sphere header from $sphere_file"; } $duration = $sample_count * 1.0 / $sample_rate; print "$utt $duration "; } ' > $data/utt2dur; then echo "$0: successfully obtained utterance lengths from sphere-file headers" else echo "$0: could not get utterance lengths from sphere-file headers, using wav-to-duration" if ! command -v wav-to-duration >/dev/null; then echo "$0: wav-to-duration is not on your path" exit 1; fi read_entire_file=false if grep -q 'sox.*speed' $data/wav.scp; then read_entire_file=true echo "$0: reading from the entire wav file to fix the problem caused by sox commands with speed perturbation. It is going to be slow." echo "... It is much faster if you call get_utt2dur.sh *before* doing the speed perturbation via e.g. perturb_data_dir_speed.sh or " echo "... perturb_data_dir_speed_3way.sh." fi num_utts=$(wc -l <$data/utt2spk) if [ $nj -gt $num_utts ]; then nj=$num_utts fi utils/data/split_data.sh --per-utt $data $nj sdata=$data/split${nj}utt $cmd JOB=1:$nj $data/log/get_durations.JOB.log \ wav-to-duration --read-entire-file=$read_entire_file \ scp:$sdata/JOB/wav.scp ark,t:$sdata/JOB/utt2dur || \ { echo "$0: there was a problem getting the durations"; exit 1; } for n in `seq $nj`; do cat $sdata/$n/utt2dur done > $data/utt2dur fi elif [ -f $data/feats.scp ]; then echo "$0: wave file does not exist so getting durations from feats files" if [[ -s $data/frame_shift ]]; then frame_shift=$(cat $data/frame_shift) || exit 1 echo "$0: using frame_shift=$frame_shift from file $data/frame_shift" fi # The 1.5 correction is the typical value of (frame_length-frame_shift)/frame_shift. feat-to-len scp:$data/feats.scp ark,t:- | awk -v frame_shift=$frame_shift '{print $1, ($2+1.5)*frame_shift}' >$data/utt2dur else echo "$0: Expected $data/wav.scp, $data/segments or $data/feats.scp to exist" exit 1 fi len1=$(wc -l < $data/utt2spk) len2=$(wc -l < $data/utt2dur) if [ "$len1" != "$len2" ]; then echo "$0: warning: length of utt2dur does not equal that of utt2spk, $len2 != $len1" if [ $len1 -gt $[$len2*2] ]; then echo "$0: less than half of utterances got a duration: failing." exit 1 fi fi echo "$0: computed $data/utt2dur" exit 0 |