Blame view
egs/voxforge/s5/local/voxforge_select.sh
809 Bytes
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
#!/bin/bash # Copyright 2012 Vassil Panayotov # Apache 2.0 # Selects parts of VoxForge corpus based on certain criteria # and makes symbolic links to the respective recordings # regex to select speakers based on pronunciation dialect dialect='(American)|(British)' # e.g. accept a "dialect" parameter . utils/parse_options.sh echo "=== Starting VoxForge subset selection(accent: $dialect) ..." if [ $# -ne 2 ]; then echo "Usage: $0 [--dialect <regex>] <src-dir> <dst-dir>"; exit 1; fi SRCDIR=$1 DSTDIR=$2 rm -rf ${DSTDIR} 1>/dev/null 2>&1 mkdir -p ${DSTDIR} find $SRCDIR -iwholename '*etc/readme*' \ -exec egrep -iHl 'pronunciation dialect.*'${dialect} {} \; |\ while read f; do d=`dirname $f`; d=`dirname $d`; ln -s $d ${DSTDIR}; done echo "*** VoxForge subset selection finished!" |