Blame view

egs/wsj/s5/utils/data/get_utt2num_frames.sh 1.11 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  #! /bin/bash
  
  # Copyright 2016  Vimal Manohar
  # Apache 2.0.
  
  cmd=run.pl
  nj=4
  
  frame_shift=0.01
  frame_overlap=0.015
  
  . utils/parse_options.sh
  . ./path.sh
  
  if [ $# -ne 1 ]; then
    echo "This script writes a file utt2num_frames with the "
    echo "number of frames in each utterance as measured based on the "
    echo "duration of the utterances (in utt2dur) and the specified "
    echo "frame_shift and frame_overlap."
    echo "Usage: $0 <data>"
    exit 1
  fi
  
  data=$1
  
  if [ -s $data/utt2num_frames ]; then
    echo "$0: $data/utt2num_frames already present!"
    exit 0;
  fi
  
  if [ ! -f $data/feats.scp ]; then
    utils/data/get_utt2dur.sh $data
    awk -v fs=$frame_shift -v fovlp=$frame_overlap \
      '{print $1" "int( ($2 - fovlp) / fs)}' $data/utt2dur > $data/utt2num_frames
    exit 0
  fi
  
  utils/split_data.sh --per-utt $data $nj || exit 1
  $cmd JOB=1:$nj $data/log/get_utt2num_frames.JOB.log \
    feat-to-len scp:$data/split${nj}utt/JOB/feats.scp ark,t:$data/split${nj}utt/JOB/utt2num_frames || exit 1
  
  for n in `seq $nj`; do
    cat $data/split${nj}utt/$n/utt2num_frames
  done > $data/utt2num_frames
  
  echo "$0: Computed and wrote $data/utt2num_frames"