download_and_untar.sh
1.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
# Copyright 2018 Lucas Jo (Atlas Guide)
# 2018 Wonkyum Lee (Gridspace)
# Apache 2.0
if [ $# -ne "1" ]; then
echo "Usage: $0 <download_dir>"
echo "e.g.: $0 ./db"
exit 1
fi
exists(){
command -v "$1" >/dev/null 2>&1
}
dir=$1
local_lm_dir=data/local/lm
AUDIOINFO='AUDIO_INFO'
AUDIOLIST='train_data_01 test_data_01'
echo "Now download corpus ----------------------------------------------------"
if [ ! -f $dir/db.tar.gz ]; then
if [ ! -d $dir ]; then
mkdir -p $dir
fi
wget -O $dir/db.tar.gz http://www.openslr.org/resources/40/zeroth_korean.tar.gz
else
echo " $dir/db.tar.gz already exist"
fi
echo "Now extract corpus ----------------------------------------------------"
if [ ! -f $dir/$AUDIOINFO ]; then
tar -zxvf $dir/db.tar.gz -C $dir
else
echo " corpus already extracted"
fi
if [ ! -d $local_lm_dir ]; then
mkdir -p $local_lm_dir
fi
echo "Check LMs files"
LMList="\
zeroth.lm.fg.arpa.gz \
zeroth.lm.tg.arpa.gz \
zeroth.lm.tgmed.arpa.gz \
zeroth.lm.tgsmall.arpa.gz \
zeroth_lexicon \
zeroth_morfessor.seg"
for file in $LMList; do
if [ -f $local_lm_dir/$file ]; then
echo $file already exist
else
echo "Linking "$file
ln -s $PWD/$dir/$file $local_lm_dir/$file
fi
done
echo "all the files (lexicon, LM, segment model) are ready"