merge_targets_dirs.sh
3.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/bin/bash
# Copyright 2017 Vimal Manohar
# Apache 2.0
# This script merges targets dirs created from multiple sources (systems) into
# single targets matrices. See steps/segmentation/lats_to_targets.sh for
# details about the format of the targets.
# This script merges targets from multiple sources using weights supplied
# by --weights option. Also the option --remove-mismatch-frames can be
# used to remove frames different sources have mismatched labels.
# e.g. We can check if the labels from supervision-constrained lattices
# and those from decoding match.
cmd=run.pl
nj=4
weights= # A comma-separated list of weights corresponding to each
# target source being combined. Must match the number of
# source target directories.
remove_mismatch_frames=true # If true, the mismatch frames are removed by
# setting targets to 0 in the following cases:
# a) If none of the sources have a column with value > 0.5
# b) If two sources have columns with value > 0.5, but
# they occur at different indexes e.g. silence prob is > 0.5 for the
# targets from alignment, and speech prob > 0.5 for the targets from
# decoding
[ -f ./path.sh ] && . ./path.sh
. utils/parse_options.sh
if [ $# -lt 3 ]; then
cat <<EOF
This script merges targets dirs created from multiple sources (systems) into
single targets matrices.
See top of the script for more details.
Usage: steps/segmentation/merge_targets_dirs.py <data> <targets-1> <targets-2> ... <merged-targets>
e.g.: steps/segmentation/merge_targets_dirs.py --weights 1.0,0.5 \
data/train_whole \
exp/segmentation1a/tri3b_train_whole_sup_targets_sub3 \
exp/segmentation1a/tri3b_train_whole_targets_sub3 \
exp/segmentation1a/tri3b_train_whole_combined_targets_sub3
EOF
exit 1
fi
data=$1
dir=${@: -1} # last argument to the script
shift;
targets_dirs=( $@ ) # read the remaining arguments into an array
unset targets_dirs[${#targets_dirs[@]}-1] # 'pop' the last argument which is odir
num_sources=${#targets_dirs[@]} # number of targets to combine
utils/data/split_data.sh --per-utt $data $nj
sdata=${data}/split${nj}utt
frame_subsampling_factor=1
if [ -f ${targets_dirs[0]}/frame_subsampling_factor ]; then
frame_subsampling_factor=$(cat ${targets_dirs[0]}/frame_subsampling_factor) || exit 1
fi
mkdir -p $dir/split${nj}
target_id=1
for t in ${targets_dirs[@]}; do
this_frame_subsampling_factor=1
if [ -f $t/frame_subsampling_factor ]; then
this_frame_subsampling_factor=$(cat $t/frame_subsampling_factor) || exit 1
fi
if [ $this_frame_subsampling_factor -ne $frame_subsampling_factor ]; then
echo "$0: Mismatch in frame_subsampling_factor in $t and ${targets_dirs[0]}; $this_frame_subsampling_factor vs $frame_subsampling_factor"
exit 1
fi
utils/filter_scps.pl JOB=1:$nj $sdata/JOB/utt2spk \
$t/targets.scp $dir/split${nj}/in_targets.$target_id.JOB.scp
targets_rspecifiers+=("scp:$dir/split${nj}/in_targets.$target_id.JOB.scp")
target_id=$[target_id+1]
done
# convert $dir to an absolute pathname.
fdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $dir ${PWD}`
$cmd JOB=1:$nj $dir/log/merge_targets.JOB.log \
paste-feats "${targets_rspecifiers[@]}" ark,t:- \| \
steps/segmentation/internal/merge_targets.py --weights="$weights" \
--remove-mismatch-frames=$remove_mismatch_frames - - \| \
copy-feats ark,t:- ark,scp:$fdir/targets.JOB.ark,$fdir/targets.JOB.scp || exit 1
for n in `seq $nj`; do
cat $dir/targets.$n.scp
done > $dir/targets.scp
rm $dir/targets.*.scp # cleanup
if [ $frame_subsampling_factor -ne 1 ]; then
echo $frame_subsampling_factor > $dir/frame_subsampling_factor
fi
steps/segmentation/validate_targets_dir.sh $dir $data || exit 1
echo "$0: Merged target directories to $dir"
exit 0