Blame view

egs/voxforge/s5/local/voxforge_select.sh 809 Bytes
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
  #!/bin/bash
  
  # Copyright 2012 Vassil Panayotov
  # Apache 2.0
  
  # Selects parts of VoxForge corpus based on certain criteria
  # and makes symbolic links to the respective recordings 
  
  # regex to select speakers based on pronunciation dialect
  dialect='(American)|(British)'
  
  # e.g. accept a "dialect" parameter
  . utils/parse_options.sh
  
  echo "=== Starting VoxForge subset selection(accent: $dialect) ..."
  
  if [ $# -ne 2 ]; then
    echo "Usage: $0 [--dialect <regex>] <src-dir> <dst-dir>";
    exit 1;
  fi
  
  SRCDIR=$1
  DSTDIR=$2
  
  rm -rf ${DSTDIR} 1>/dev/null 2>&1
  mkdir -p ${DSTDIR}
  
  find $SRCDIR -iwholename '*etc/readme*' \
   -exec egrep -iHl 'pronunciation dialect.*'${dialect} {} \; |\
  while read f; do 
    d=`dirname $f`; 
    d=`dirname $d`; 
    ln -s $d ${DSTDIR}; 
  done
  
  echo "*** VoxForge subset selection finished!"