prepare_data.sh
2.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/bin/bash
# Copyright 2017 Johns Hopkins University (author: Hossein Hadian)
# Apache 2.0
# This script loads the training and test data for CIFAR-10 or CIFAR-100.
[ -f ./path.sh ] && . ./path.sh; # source the path.
dl_dir=data/download
cifar10=$dl_dir/cifar-10-batches-bin
cifar10_url=https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
cifar100=$dl_dir/cifar-100-binary
cifar100_url=https://www.cs.toronto.edu/~kriz/cifar-100-binary.tar.gz
mkdir -p $dl_dir
if [ -d $cifar10 ]; then
echo Not downloading CIFAR-10 as it is already there.
else
if [ ! -f $dl_dir/cifar-10-binary.tar.gz ]; then
echo Downloading CIFAR-10...
wget -P $dl_dir $cifar10_url || exit 1;
fi
tar -xvzf $dl_dir/cifar-10-binary.tar.gz -C $dl_dir || exit 1;
echo Done downloading and extracting CIFAR-10
fi
mkdir -p data/cifar10_{train,test}/data
seq 0 9 | paste -d' ' $cifar10/batches.meta.txt - | grep '\S' >data/cifar10_train/classes.txt
cp data/cifar10_{train,test}/classes.txt
echo 3 > data/cifar10_train/num_channels
echo 3 > data/cifar10_test/num_channels
local/process_data.py --dataset train $cifar10 data/cifar10_train/ | \
copy-feats --compress=true --compression-method=7 \
ark:- ark,scp:data/cifar10_train/data/images.ark,data/cifar10_train/images.scp || exit 1
local/process_data.py --dataset test $cifar10 data/cifar10_test/ | \
copy-feats --compress=true --compression-method=7 \
ark:- ark,scp:data/cifar10_test/data/images.ark,data/cifar10_test/images.scp || exit 1
### CIFAR 100
if [ -d $cifar100 ]; then
echo Not downloading CIFAR-100 as it is already there.
else
if [ ! -f $dl_dir/cifar-100-binary.tar.gz ]; then
echo Downloading CIFAR-100...
wget -P $dl_dir $cifar100_url || exit 1;
fi
tar -xvzf $dl_dir/cifar-100-binary.tar.gz -C $dl_dir || exit 1;
echo Done downloading and extracting CIFAR-100
fi
mkdir -p data/cifar100_{train,test}/data
seq 0 99 | paste -d' ' $cifar100/fine_label_names.txt - | grep '\S' >data/cifar100_train/classes.txt
# seq 0 19 | paste -d' ' $cifar100/coarse_label_names.txt - | grep '\S' >data/cifar100_train/coarse_classes.txt
cp data/cifar100_{train,test}/classes.txt
#cp data/cifar100_{train,test}/coarse_classes.txt
echo 3 > data/cifar100_train/num_channels
echo 3 > data/cifar100_test/num_channels
local/process_data.py --cifar-version CIFAR-100 --dataset train $cifar100 data/cifar100_train/ | \
copy-feats --compress=true --compression-method=7 \
ark:- ark,scp:data/cifar100_train/data/images.ark,data/cifar100_train/images.scp || exit 1
local/process_data.py --cifar-version CIFAR-100 --dataset test $cifar100 data/cifar100_test/ | \
copy-feats --compress=true --compression-method=7 \
ark:- ark,scp:data/cifar100_test/data/images.ark,data/cifar100_test/images.scp || exit 1