Blame view
egs/yomdle_zh/v1/local/create_download.sh
1.51 KB
8dcb6dfcb first commit |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
#!/bin/bash # Copyright 2018 Chun-Chieh Chang # The original format of the dataset given is GEDI and page images. # This script is written to create line images from page images. # It also creates csv files from the GEDI files. database_slam=/export/corpora5/slam/SLAM/Farsi/transcribed database_yomdle=/export/corpora5/slam/YOMDLE/final_farsi cangjie_url=https://raw.githubusercontent.com/wanleung/libcangjie/master/tables/cj5-cc.txt download_dir=download slam_dir=$download_dir/slam_farsi yomdle_dir=$download_dir/yomdle_farsi . ./cmd.sh . ./path.sh . ./utils/parse_options.sh || exit 1; echo "$0: Processing SLAM ${language}" echo "Date: $(date)." mkdir -p ${slam_dir}/{truth_csv,truth_csv_raw,truth_line_image} local/gedi2csv.py \ --inputDir ${database_slam} \ --outputDir ${slam_dir}/truth_csv_raw \ --log ${slam_dir}/GEDI2CSV_enriched.log local/create_line_image_from_page_image.py \ ${database_slam} \ ${slam_dir}/truth_csv_raw \ ${slam_dir} echo "$0: Processing YOMDLE ${language}" echo "Date: $(date)." mkdir -p ${yomdle_dir}/{truth_csv,truth_csv_raw,truth_line_image} local/yomdle2csv.py \ --inputDir ${database_yomdle} \ --outputDir ${yomdle_dir}/truth_csv_raw/ \ --log ${yomdle_dir}/YOMDLE2CSV.log local/create_line_image_from_page_image.py \ --im-format "jpg" \ ${database_yomdle}/images \ ${yomdle_dir}/truth_csv_raw \ ${yomdle_dir} echo "Downloading table for CangJie." wget -P $download_dir/ $cangjie_url || exit 1; perl -n -i -e 'print if $. > 8' $download_dir/cj5-cc.txt |