ali_to_targets.sh
3.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/bin/bash
# Copyright 2017 Vimal Manohar
# Apache 2.0
# This script converts alignments into targets for training neural network
# for speech activity detection. The mapping from phones to speech / silence / garbage
# is defined by the options --silence-phones and --garbage-phones.
# This is similar to the script steps/segmentation/lats_to_targets.sh which
# converts lattices to targets. See that script for details about the
# targets matrix.
set -o pipefail
silence_phones=
garbage_phones=
max_phone_duration=0.5
cmd=run.pl
[ -f ./path.sh ] && . ./path.sh
. utils/parse_options.sh
if [ $# -ne 4 ]; then
cat <<EOF
This script converts alignments into targets for training neural network
for speech activity detection. The mapping from phones to speech / silence / garbage
is defined by the options --silence-phones and --garbage-phones.
This is similar to the script steps/segmentation/lats_to_targets.sh which
converts lattices to targets. See that script for details about the
targets matrix.
Usage: steps/segmentation/ali_to_targets.sh <data-dir> <lang> <ali-dir> <targets-dir>"
e.g.: steps/segmentation/ali_to_targets.sh \
--silence-phones data/lang/phones/optional_silence.txt \
--garbage-phones data/lang/phones/silence.txt \
--max-phone-duration 0.5 \
data/train_split10s data/lang \
exp/segmentation1a/tri3b_train_split10s_ali \
exp/segmentation1a/tri3b_train_split10s_targets
EOF
exit 1
fi
data=$1
lang=$2
ali_dir=$3
dir=$4
if [ -f $ali_dir/final.mdl ]; then
srcdir=$ali_dir
else
srcdir=$ali_dir/..
fi
for f in $data/utt2spk $ali_dir/ali.1.gz $srcdir/final.mdl; do
if [ ! -f $f ]; then
echo "$0: Could not find file $f"
exit 1
fi
done
mkdir -p $dir
if [ -z "$garbage_phones" ]; then
oov_phone=$(steps/segmentation/internal/get_oov_phone.py $lang) || exit 1
echo $oov_phone | utils/int2sym.pl $lang/phones.txt > $dir/garbage_phones.txt || exit 1
else
cp $garbage_phones $dir/garbage_phones.txt || exit 1
fi
if [ -z "$silence_phones" ]; then
cat $lang/silence_phones.txt | \
utils/filter_scp.pl --exclude $dir/garbage_phones.txt > \
$dir/silence_phones.txt
else
cp $silence_phones $dir/silence_phones.txt
fi
nj=$(cat $ali_dir/num_jobs) || exit 1
$cmd JOB=1:$nj $dir/log/get_arc_info.JOB.log \
ali-to-phones --ctm-output --frame-shift=1 \
$srcdir/final.mdl "ark:gunzip -c $ali_dir/ali.JOB.gz |" - \| \
utils/int2sym.pl -f 5 $lang/phones.txt \| \
awk '{print $1" "int($3)" "int($4)" 1.0 "$5}' \> \
$dir/arc_info_sym.JOB.txt || exit 1
# make $dir an absolute pathname.
dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}`
frame_subsampling_factor=1
if [ -f $srcdir/frame_subsampling_factor ]; then
frame_subsampling_factor=$(cat $srcdir/frames_subsampling_factor)
echo $frame_subsampling_factor > $dir/frame_subsampling_factor
fi
frame_shift=$(utils/data/get_frame_shift.sh $data) || exit 1
max_phone_len=$(perl -e "print int($max_phone_duration / $frame_shift)")
$cmd JOB=1:$nj $dir/log/get_targets.JOB.log \
steps/segmentation/internal/arc_info_to_targets.py \
--silence-phones=$dir/silence_phones.txt \
--garbage-phones=$dir/garbage_phones.txt \
--max-phone-length=$max_phone_len \
$dir/arc_info_sym.JOB.txt - \| \
copy-feats ark,t:- \
ark,scp:$dir/targets.JOB.ark,$dir/targets.JOB.scp || exit 1
for n in $(seq $nj); do
cat $dir/targets.$n.scp
done > $dir/targets.scp
steps/segmentation/validate_targets_dir.sh $dir $data || exit 1
echo "$0: Done creating targets in $dir/targets.scp"