run_corpus.sh
883 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/bin/bash
export RUBYLIB=$RUBYLIB:bin/
echo "***** Create K-Fold *****"
for file in task1 task2 task3
do
echo "[x] $file"
ruby bin/decoupe.rb db/${file}-train.csv data/task1 4
done
echo "***** CREATE TRAIN CORPUS *****"
for kfold in `seq 0 1 3`
do
for file in task1 task2 task3
do
echo "[x] $file"
ruby bin/tokenize_file.rb data/${file}_train_${kfold}.txt > data/${file}_train_${kfold}.tokenize
done
done
echo "***** CREATE DEV CORPUS *****"
for kfold in `seq 0 1 3`
do
for file in task1 task2 task3
do
echo "[x] $file"
ruby bin/tokenize_file.rb data/${file}_dev_${kfold}.txt > data/${file}_dev_${kfold}.tokenize
done
done
echo "***** CREATE TEST CORPUS *****"
for file in task1 task2 task3
do
echo "[x] $file"
ruby bin/tokenize_file_gold.rb db/${file}-testGold.csv > data/${file}_test.tokenize
done