Blame view

egs/tedlium/s5_r3/local/download_data.sh 1.25 KB
8dcb6dfcb   Yannick Estève   first commit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
  #!/bin/bash
  
  # Copyright  2014  Nickolay V. Shmyrev
  #            2014  Brno University of Technology (Author: Karel Vesely)
  #            2016  John Hopkins University (author: Daniel Povey)
  # Apache 2.0
  
  mkdir -p db
  
  cd db  ### Note: the rest of this script is executed from the directory 'db'.
  
  # TED-LIUM database:
  if [[ $(hostname -f) == *.clsp.jhu.edu ]] ; then
    if [ ! -e TEDLIUM_release-3 ]; then
      ln -sf /export/corpora5/TEDLIUM_release-3
    fi
    echo "$0: linking the TEDLIUM data from /export/corpora5/TEDLIUM_release-3"
  else
    if [ ! -e TEDLIUM_release-3 ]; then
      echo "$0: downloading TEDLIUM_release-3 data (it won't re-download if it was already downloaded.)"
      # the following command won't re-get it if it's already there
      # because of the --continue switch.
      wget --continue http://www.openslr.org/resources/51/TEDLIUM_release-3.tgz || exit 1
      
      echo "$0: extracting TEDLIUM_release-3 data"
      tar xf "TEDLIUM_release-3.tgz"
    else
      echo "$0: not downloading or un-tarring TEDLIUM_release3 because it already exists."
    fi
  fi
  
  
  num_sph=$(find TEDLIUM_release-3/data -name '*.sph' | wc -l)
  if [ "$num_sph" != 2351 ]; then
    echo "$0: expected to find 2351 .sph files in the directory db/TEDLIUM_release-3, found $num_sph"
    exit 1
  fi
  
  exit 0