voxforge_select.sh 809 Bytes
#!/bin/bash

# Copyright 2012 Vassil Panayotov
# Apache 2.0

# Selects parts of VoxForge corpus based on certain criteria
# and makes symbolic links to the respective recordings 

# regex to select speakers based on pronunciation dialect
dialect='(American)|(British)'

# e.g. accept a "dialect" parameter
. utils/parse_options.sh

echo "=== Starting VoxForge subset selection(accent: $dialect) ..."

if [ $# -ne 2 ]; then
  echo "Usage: $0 [--dialect <regex>] <src-dir> <dst-dir>";
  exit 1;
fi

SRCDIR=$1
DSTDIR=$2

rm -rf ${DSTDIR} 1>/dev/null 2>&1
mkdir -p ${DSTDIR}

find $SRCDIR -iwholename '*etc/readme*' \
 -exec egrep -iHl 'pronunciation dialect.*'${dialect} {} \; |\
while read f; do 
  d=`dirname $f`; 
  d=`dirname $d`; 
  ln -s $d ${DSTDIR}; 
done

echo "*** VoxForge subset selection finished!"