Blame view
egs/vystadial_en/s5/local/download_en_data.sh
1.29 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
#!/bin/bash # Copyright Ondrej Platek Apache 2.0 DATA_ROOT=$1 url=https://lindat.mff.cuni.cz/repository/xmlui/bitstream/handle/11858/00-097C-0000-0023-4671-4/data_voip_en.tgz # This might be faster: #url=http://www.openslr.org/resources/6/data_voip_en.tgz name=data_voip_en extract_file=205859 mkdir -p $DATA_ROOT if [ ! -f $DATA_ROOT/${name}.tgz ] ; then wget $url -O $DATA_ROOT/${name}.tgz || exit 1 echo "Data successfully downloaded" fi if [[ ! -d $DATA_ROOT/$name && -e $DATA_ROOT/$name ]] ; then echo "The $DATA_ROOT/$name is not a directory and we cannot extract the data!" exit 1; fi if [ ! -d $DATA_ROOT/$name ] ; then mkdir $DATA_ROOT/$name tar xfv $DATA_ROOT/${name}.tgz -C $DATA_ROOT | \ while read line; do x=$((x+1)) echo -en "$x extracted from $extract_file files.\r" done fi if [ -d $DATA_ROOT/$name ] ; then echo "Checking if data extracted correctly" num_files=`find $DATA_ROOT/$name -name '*' | wc -l` if [ ! $num_files -eq $extract_file ] ; then echo "Data extraction failed! Extracted $num_files instead of $extract_file" exit 1; fi echo "It seams that data are extracted correctly" fi pushd $DATA_ROOT for t in test train dev ; do ln -s $name/$t done ln -s $name/arpa_bigram arpa-bigram popd |