qcri_lexicon_download.sh 522 Bytes
#!/bin/bash 

# Copyright 2018 John Morgan
# Apache 2.0.

# configuration variables
lex=$1
tmpdir=data/local/tmp
# where to put the downloaded speech corpus
downloaddir=$(pwd)
# Where to put the uncompressed file
datadir=$(pwd)
# end of configuration variable settings

# download the corpus 
if [ ! -f $downloaddir/qcri.txt.bz2 ]; then
  wget -O $downloaddir/qcri.txt.bz2 $lex
  (
    cd $downloaddir
    bzcat qcri.txt.bz2 | tail -n+4 > $datadir/qcri.txt
  )
else
  echo "$0: The corpus $lex was already downloaded."
fi