subs_download.sh 506 Bytes
#!/bin/bash

# Copyright 2018 John Morgan
# Apache 2.0.

# Begin configuration 
subs_src=$1
tmpdir=data/local/tmp
download_dir=$(pwd)
datadir=$(pwd)
# End configuration

# download the subs corpus
if [ ! -f $download_dir/subs.txt.gz ]; then
    wget -O $download_dir/subs.txt.gz $subs_src
else
  echo "$0: The corpus $subs_src was already downloaded."
fi

if [ ! -f $datadir/subs.txt ]; then
  (
    cd $datadir
    zcat < ./subs.txt.gz > subs.txt
  )
  else
    echo "$0: subs file already extracted."
fi