Blame view

egs/tunisian_msa/s5/local/qcri_lexicon_download.sh 522 Bytes
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
  #!/bin/bash 
  
  # Copyright 2018 John Morgan
  # Apache 2.0.
  
  # configuration variables
  lex=$1
  tmpdir=data/local/tmp
  # where to put the downloaded speech corpus
  downloaddir=$(pwd)
  # Where to put the uncompressed file
  datadir=$(pwd)
  # end of configuration variable settings
  
  # download the corpus 
  if [ ! -f $downloaddir/qcri.txt.bz2 ]; then
    wget -O $downloaddir/qcri.txt.bz2 $lex
    (
      cd $downloaddir
      bzcat qcri.txt.bz2 | tail -n+4 > $datadir/qcri.txt
    )
  else
    echo "$0: The corpus $lex was already downloaded."
  fi