Blame view
egs/mini_librispeech/s5/local/download_lm.sh
2.63 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
#!/bin/bash # Copyright 2014 Vassil Panayotov # 2017 Daniel Povey # Apache 2.0 if [ $# -ne "3" ]; then echo "Usage: $0 <base-url> <download_dir> <local?" echo "e.g.: $0 http://www.openslr.org/resources/11 ./corpus/ data/local/lm" exit 1 fi base_url=$1 dst_dir=$2 local_dir=$3 # given a filename returns the corresponding file size in bytes # The switch cases below can be autogenerated by entering the data directory and running: # for f in *; do echo "\"$f\") echo \"$(du -b $f | awk '{print $1}')\";;"; done function filesize() { case $1 in "3-gram.arpa.gz") echo "759636181";; "3-gram.pruned.1e-7.arpa.gz") echo "34094057";; "3-gram.pruned.3e-7.arpa.gz") echo "13654242";; "librispeech-lexicon.txt") echo "5627653";; "librispeech-vocab.txt") echo "1737588";; *) echo "";; esac } function check_and_download () { [[ $# -eq 1 ]] || { echo "check_and_download() expects exactly one argument!"; return 1; } fname=$1 echo "Downloading file '$fname' into '$dst_dir'..." expect_size="$(filesize $fname)" [[ ! -z "$expect_size" ]] || { echo "Unknown file size for '$fname'"; return 1; } if [[ -s $dst_dir/$fname ]]; then # In the following statement, the first version works on linux, and the part # after '||' works on Linux. f=$dst_dir/$fname fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f) if [[ "$fsize" -eq "$expect_size" ]]; then echo "'$fname' already exists and appears to be complete" return 0 else echo "WARNING: '$fname' exists, but the size is wrong - re-downloading ..." fi fi wget --no-check-certificate -O $dst_dir/$fname $base_url/$fname || { echo "Error while trying to download $fname!" return 1 } f=$dst_dir/$fname # In the following statement, the first version works on linux, and the part after '||' # works on Linux. fsize=$(set -o pipefail; du -b $f 2>/dev/null | awk '{print $1}' || stat '-f %z' $f) [[ "$fsize" -eq "$expect_size" ]] || { echo "$fname: file size mismatch!"; return 1; } return 0 } mkdir -p $dst_dir $local_dir for f in 3-gram.arpa.gz 3-gram.pruned.1e-7.arpa.gz 3-gram.pruned.3e-7.arpa.gz \ librispeech-vocab.txt librispeech-lexicon.txt; do check_and_download $f || exit 1 done dst_dir=$(readlink -f $dst_dir) ln -sf $dst_dir/3-gram.pruned.1e-7.arpa.gz $local_dir/lm_tgmed.arpa.gz ln -sf $dst_dir/3-gram.pruned.3e-7.arpa.gz $local_dir/lm_tgsmall.arpa.gz ln -sf $dst_dir/3-gram.arpa.gz $local_dir/lm_tglarge.arpa.gz ln -sf $dst_dir/librispeech-lexicon.txt $local_dir/librispeech-lexicon.txt ln -sf $dst_dir/librispeech-vocab.txt $local_dir/librispeech-vocab.txt exit 0 |