post_process_sad_to_segments.sh
1.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
# Copyright 2015-17 Vimal Manohar
# Apache 2.0.
# This script post-processes the output of steps/segmentation/decode_sad.sh,
# which is in the form of frame-level alignments, into a 'segments' file.
# The alignments must be speech activity detection marks i.e. 1 for silence
# and 2 for speech.
set -e -o pipefail -u
. ./path.sh
cmd=run.pl
stage=-10
nj=18
# The values below are in seconds
frame_shift=0.01
segment_padding=0.2
min_segment_dur=0
merge_consecutive_max_dur=0
. utils/parse_options.sh
if [ $# -ne 3 ]; then
echo "This script post-processes the output of steps/segmentation/decode_sad.sh, "
echo "which is in the form of frame-level alignments, into kaldi segments. "
echo "The alignments must be speech activity detection marks i.e. 1 for silence "
echo "and 2 for speech."
echo "Usage: $0 <data-dir> <vad-dir> <segmentation-dir>"
echo " e.g.: $0 data/dev_aspire_whole exp/vad_dev_aspire"
exit 1
fi
data_dir=$1
vad_dir=$2 # Alignment directory containing frame-level SAD labels
dir=$3
mkdir -p $dir
for f in $vad_dir/ali.1.gz $vad_dir/num_jobs; do
if [ ! -f $f ]; then
echo "$0: Could not find file $f" && exit 1
fi
done
nj=`cat $vad_dir/num_jobs` || exit 1
utils/split_data.sh $data_dir $nj
utils/data/get_utt2dur.sh $data_dir
if [ $stage -le 0 ]; then
$cmd JOB=1:$nj $dir/log/segmentation.JOB.log \
copy-int-vector "ark:gunzip -c $vad_dir/ali.JOB.gz |" ark,t:- \| \
steps/segmentation/internal/sad_to_segments.py \
--frame-shift=$frame_shift --segment-padding=$segment_padding \
--min-segment-dur=$min_segment_dur --merge-consecutive-max-dur=$merge_consecutive_max_dur \
--utt2dur=$data_dir/utt2dur - $dir/segments.JOB
fi
echo $nj > $dir/num_jobs
for n in $(seq $nj); do
cat $dir/segments.$n
done > $dir/segments