shift_and_combine_feats.sh
2.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/bin/bash
# Copyright 2017 Hossein Hadian
# Apache 2.0
write_utt2orig= # if provided, this script will write
# a mapping of shifted utterance ids
# to the original ones into the file
# specified by this option
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. utils/parse_options.sh
if [ $# != 3 ]; then
echo "Usage: $0 <frame-subsampling-factor> <srcdir> <destdir>"
echo "e.g.: $0 3 data/train data/train_fs3"
echo "For use in perturbing data for discriminative training and alignment of"
echo "frame-subsampled systems, this script uses utils/data/shift_feats.sh"
echo "and utils/data/combine_data.sh to shift the features"
echo "<frame-subsampling-factor> different ways and combine them."
echo "E.g. if <frame-subsampling-factor> is 3, this script will combine"
echo "the data frame-shifted by -1, 0 and 1 (c.f. shift-feats)."
exit 1
fi
frame_subsampling_factor=$1
srcdir=$2
destdir=$3
if [ ! -f $srcdir/feats.scp ]; then
echo "$0: expected $srcdir/feats.scp to exist"
exit 1
fi
if [ -f $destdir/feats.scp ]; then
echo "$0: $destdir/feats.scp already exists: refusing to run this (please delete $destdir/feats.scp if you want this to run)"
exit 1
fi
if [ ! -z $write_utt2orig ]; then
awk '{print $1 " " $1}' $srcdir/feats.scp >$write_utt2orig
fi
tmp_shift_destdirs=()
for frame_shift in `seq $[-(frame_subsampling_factor/2)] $[-(frame_subsampling_factor/2) + frame_subsampling_factor - 1]`; do
if [ "$frame_shift" == 0 ]; then continue; fi
utils/data/shift_feats.sh $frame_shift $srcdir ${destdir}_fs$frame_shift || exit 1
tmp_shift_destdirs+=("${destdir}_fs$frame_shift")
if [ ! -z $write_utt2orig ]; then
awk -v prefix="fs$frame_shift-" '{printf("%s%s %s\n", prefix, $1, $1);}' $srcdir/feats.scp >>$write_utt2orig
fi
done
utils/data/combine_data.sh $destdir $srcdir ${tmp_shift_destdirs[@]} || exit 1
rm -r ${tmp_shift_destdirs[@]}
utils/validate_data_dir.sh $destdir
src_nf=`cat $srcdir/feats.scp | wc -l`
dest_nf=`cat $destdir/feats.scp | wc -l`
if [ $[src_nf*frame_subsampling_factor] -ne $dest_nf ]; then
echo "There was a problem. Expected number of feature lines in destination dir to be $[src_nf*frame_subsampling_factor];"
exit 1;
fi
echo "$0: Successfully generated $frame_subsampling_factor-way shifted version of data in $srcdir, in $destdir"