Blame view
egs/wsj/s5/utils/data/get_segments_for_data.sh
791 Bytes
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
#!/bin/bash # This script operates on a data directory, such as in data/train/, # and writes new segments to stdout. The file 'segments' maps from # utterance to time offsets into a recording, with the format: # <utterance-id> <recording-id> <segment-begin> <segment-end> # This script assumes utterance and recording ids are the same (i.e., that # wav.scp is indexed by utterance), and uses durations from 'utt2dur', # created if necessary by get_utt2dur.sh. . ./path.sh if [ $# != 1 ]; then echo "Usage: $0 [options] <datadir>" echo "e.g.:" echo " $0 data/train > data/train/segments" exit 1 fi data=$1 if [ ! -s $data/utt2dur ]; then utils/data/get_utt2dur.sh $data 1>&2 || exit 1; fi # <utt-id> <utt-id> 0 <utt-dur> awk '{ print $1, $1, 0, $2 }' $data/utt2dur exit 0 |