run.sh
2.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#OUTDIR="exp/test/pvector-2"
#DATADIR="data"
#NEW_LSTDIR="${OUTDIR}/lst"
#VECTOR_FILES_BEGIN="${DATADIR}/pvectors_1rst/pvectors_teacher"
#VECTOR_FILES_END=".txt"
#VECTOR_FILE="" # To specify if there's only one
#VECTOR_FILES_ONE=false # Specify there's only one file
#KMIN=2
#KMAX=100
# -- LOAD CONFIG FILE
CONFIG_FILE="config.sh"
if [ $# -eq 1 ]
then
CONFIG_FILE="$1"
else
echo "Need to have one and only one argument"
exit -1
fi
source $CONFIG_FILE
# -- DEFAULTS VALUES CONFIGURATION
if [ -z "$VECTOR_FILES_ONE" ]
then
VECTOR_FILES_ONE=false
fi
# -- MAKE DIRECTORIES
if [ ! -d "$OUTDIR" ];
then
mkdir -p $OUTDIR
fi
if [ ! -d "${NEW_LSTDIR}" ];
then
mkdir -p ${NEW_LSTDIR}
fi
# -- BEGIN BY KFOLD
for kfold in {1..4}
do
# Some usefull variable
CHAR_INFO="${DATADIR}/character_information.csv"
TRAIN_TYPE_LST="${NEW_LSTDIR}/train_${kfold}_type.lst"
VAL_TYPE_LST="${NEW_LSTDIR}/val_${kfold}_type.lst"
# Configuration for the run clustering file
if [ ${VECTOR_FILES_ONE} == false ]
then
VECTOR_FILE="${VECTOR_FILES_BEGIN}_${kfold}${VECTOR_FILES_END}"
fi
TRAIN_LST="${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst"
VAL_LST="${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst"
EXP_DIR="${OUTDIR}/${kfold}"
METAS_TYPE="${NEW_LSTDIR}/metas_${kfold}_type.lst" #*
METAS_CHARACTER="${DATADIR}/masseffect.lst"
if [ ! -d "${EXP_DIR}" ];
then
mkdir -p ${EXP_DIR}
fi
# Extract character information
echo "Extracting character information"
python3 "bin/replace_label.py" \
"${METAS_CHARACTER}" \
"${CHAR_INFO}" \
--field "type" \
--lst "${TRAIN_LST}" \
--outfile "${TRAIN_TYPE_LST}"
python3 "bin/replace_label.py" \
"${METAS_CHARACTER}" \
"${CHAR_INFO}" \
--field "type" \
--lst "${VAL_LST}" \
--outfile "${VAL_TYPE_LST}"
cat "${TRAIN_TYPE_LST}" "${VAL_TYPE_LST}" > "${METAS_TYPE}"
source "run-clustering.sh"
done
# Regroup measures with respect to character classes
echo "Regrouping measures with respect to character classes"
python3 "bin/regroup-measures.py" ${OUTDIR}
# Regroup measures with respect to type classes
echo "Regrouping measures with respect to type classes"
python3 "bin/regroup-measures.py" ${OUTDIR} --suffix "_type" --measurefile "measures_type.json"