Blame view
Scripts/utils/reduce_data_dir.sh
1.89 KB
ec85f8892 first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
#!/bin/bash # koried, 10/29/2012 # Reduce a data set based on a list of turn-ids if [ $# != 3 ]; then echo "usage: $0 srcdir turnlist destdir" exit 1; fi srcdir=$1 reclist=$2 destdir=$3 if [ ! -f $srcdir/utt2spk ]; then echo "$0: no such file $srcdir/utt2spk" exit 1; fi function do_filtering { # assumes the utt2spk and spk2utt files already exist. [ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp [ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text [ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender [ -f $srcdir/cmvn.scp ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/cmvn.scp >$destdir/cmvn.scp if [ -f $srcdir/segments ]; then utils/filter_scp.pl $destdir/utt2spk <$srcdir/segments >$destdir/segments awk '{print $2;}' $destdir/segments | sort | uniq > $destdir/reco # recordings. # The next line would override the command above for wav.scp, which would be incorrect. [ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/reco <$srcdir/wav.scp >$destdir/wav.scp [ -f $srcdir/reco2file_and_channel ] && \ utils/filter_scp.pl $destdir/reco <$srcdir/reco2file_and_channel >$destdir/reco2file_and_channel # Filter the STM file for proper sclite scoring (this will also remove the comments lines) [ -f $srcdir/stm ] && utils/filter_scp.pl $destdir/reco < $srcdir/stm > $destdir/stm rm $destdir/reco fi srcutts=`cat $srcdir/utt2spk | wc -l` destutts=`cat $destdir/utt2spk | wc -l` echo "Reduced #utt from $srcutts to $destutts" } mkdir -p $destdir # filter the utt2spk based on the set of recordings utils/filter_scp.pl $reclist < $srcdir/utt2spk > $destdir/utt2spk utils/utt2spk_to_spk2utt.pl < $destdir/utt2spk > $destdir/spk2utt do_filtering; |