Blame view
egs/bentham/v1/local/prepare_data.sh
2.01 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
#!/bin/bash # Copyright 2018 Desh Raj (Johns Hopkins University) # Apache 2.0 # This script downloads the Bentham handwriting database and prepares the training # and test data (i.e text, images.scp, utt2spk and spk2utt) by calling create_splits.sh. # In addition, it downloads data for all texts of Bentham for LM training purpose. stage=0 download_dir=data/local/download/ database_dir="" text_corpus_dir="" mkdir -p $download_dir . ./cmd.sh . ./path.sh . ./utils/parse_options.sh || exit 1; BENTHAM_IMAGES_URL='http://transcriptorium.eu/~tsdata/BenthamR0/BenthamDatasetR0-Images.zip' BENTHAM_GT_URL='http://transcriptorium.eu/~tsdata/BenthamR0/BenthamDatasetR0-GT.zip' bentham_images=$database_dir"/images.zip" bentham_gt=$database_dir"/gt.zip" bentham_text=$download_dir"/text" # download and extract images and transcriptions if [ ! -f $bentham_images ]; then echo "Downloading images and transcriptions to $database_dir" mkdir -p $database_dir wget $BENTHAM_IMAGES_URL -O $bentham_images wget $BENTHAM_GT_URL -O $bentham_gt else echo "Not downloading since corpus already exists" fi if [ ! -d $download_dir/"gt" ]; then unzip $bentham_gt -d $download_dir mv $download_dir"/BenthamDatasetR0-GT" $download_dir"/gt" else echo "Local extracted corpus already exists" fi # Download extra Bentham text for LM training if [ -d $text_corpus_dir ]; then echo "$0: Not downloading Bentham text corpus as it is already there." else local/download_bentham_text.sh $text_corpus_dir fi # Copy extra Bentham text to local if [ -d $bentham_text ]; then echo "$0: Not copying as local Bentham already present." else mkdir -p $bentham_text cp $text_corpus_dir/Bentham-Text/* $bentham_text echo "$0: Done copying extra Bentham text to local." fi # Creating train, val, and test splits for all directories if [ -d data/train ]; then echo "Data splits and files already exist. Not creating again." else echo "Creating train, val, and test splits and corresponding files.." local/create_splits.sh $download_dir "data/" fi |