Blame view
run.sh
3.12 KB
14d6e0cb3
|
1 |
|
fee5922c3
|
2 3 4 |
#OUTDIR="exp/test/pvector-2" #DATADIR="data" #NEW_LSTDIR="${OUTDIR}/lst" |
b3371498c
|
5 |
|
fee5922c3
|
6 7 8 9 |
#VECTOR_FILES_BEGIN="${DATADIR}/pvectors_1rst/pvectors_teacher" #VECTOR_FILES_END=".txt" #VECTOR_FILE="" # To specify if there's only one #VECTOR_FILES_ONE=false # Specify there's only one file |
b3371498c
|
10 |
|
fee5922c3
|
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
#KMIN=2 #KMAX=100 # -- LOAD CONFIG FILE CONFIG_FILE="config.sh" if [ $# -eq 1 ] then CONFIG_FILE="$1" else echo "Need to have one and only one argument" exit -1 fi source $CONFIG_FILE # -- DEFAULTS VALUES CONFIGURATION if [ -z "$VECTOR_FILES_ONE" ] then VECTOR_FILES_ONE=false fi |
e63ab06fc
|
32 33 34 35 36 37 38 39 40 41 |
if [ -z "$METAS_CHARACTER" ] then METAS_CHARACTER="${DATADIR}/masseffect.lst" fi if [ -z "$CHAR_INFO" ] then CHAR_INFO="${DATADIR}/character_information.csv" fi |
fee5922c3
|
42 43 |
# -- MAKE DIRECTORIES |
14d6e0cb3
|
44 45 46 47 |
if [ ! -d "$OUTDIR" ]; then mkdir -p $OUTDIR fi |
29644ae6c
|
48 49 50 51 |
if [ ! -d "${NEW_LSTDIR}" ]; then mkdir -p ${NEW_LSTDIR} fi |
fee5922c3
|
52 |
|
95142dfdc
|
53 54 55 56 57 58 59 60 61 62 |
# -- KFOLD MIN and MAX if [ -z "$MIN_KFOLD" ] then MIN_KFOLD=1 fi if [ -z "$MAX_KFOLD" ] then MAX_KFOLD=4 fi |
fee5922c3
|
63 |
# -- BEGIN BY KFOLD |
95142dfdc
|
64 |
for kfold in $(seq ${MIN_KFOLD} ${MAX_KFOLD}) |
b3371498c
|
65 |
do |
fee5922c3
|
66 |
# Some usefull variable |
fee5922c3
|
67 68 |
TRAIN_TYPE_LST="${NEW_LSTDIR}/train_${kfold}_type.lst" VAL_TYPE_LST="${NEW_LSTDIR}/val_${kfold}_type.lst" |
95142dfdc
|
69 70 |
TRAIN_LANG_LST="${NEW_LSTDIR}/train_${kfold}_lang.lst" VAL_LANG_LST="${NEW_LSTDIR}/val_${kfold}_lang.lst" |
b3371498c
|
71 |
|
fee5922c3
|
72 |
# Configuration for the run clustering file |
4a691a3d4
|
73 |
if [ ${VECTOR_FILES_ONE} == false ] |
b3371498c
|
74 |
then |
fee5922c3
|
75 |
VECTOR_FILE="${VECTOR_FILES_BEGIN}_${kfold}${VECTOR_FILES_END}" |
b3371498c
|
76 |
fi |
29644ae6c
|
77 |
|
e63ab06fc
|
78 79 |
TRAIN_LST="${MOTHER_LST_DIR}/lst/train_${kfold}.lst" VAL_LST="${MOTHER_LST_DIR}/lst/val_${kfold}.lst" |
fee5922c3
|
80 |
EXP_DIR="${OUTDIR}/${kfold}" |
95142dfdc
|
81 |
METAS_TYPE="${NEW_LSTDIR}/metas_${kfold}_type.lst" |
95142dfdc
|
82 |
METAS_LANG="${NEW_LSTDIR}/metas_${kfold}_lang.lst" |
fee5922c3
|
83 84 85 86 87 88 |
if [ ! -d "${EXP_DIR}" ]; then mkdir -p ${EXP_DIR} fi |
95142dfdc
|
89 |
# EXTRACT TYPE INFORMATION |
29644ae6c
|
90 |
echo "Extracting character information" |
95142dfdc
|
91 |
echo "Replace in train" |
29644ae6c
|
92 |
python3 "bin/replace_label.py" \ |
fee5922c3
|
93 94 95 96 97 |
"${METAS_CHARACTER}" \ "${CHAR_INFO}" \ --field "type" \ --lst "${TRAIN_LST}" \ --outfile "${TRAIN_TYPE_LST}" |
95142dfdc
|
98 |
echo "Replace in val" |
29644ae6c
|
99 |
python3 "bin/replace_label.py" \ |
fee5922c3
|
100 101 102 103 104 |
"${METAS_CHARACTER}" \ "${CHAR_INFO}" \ --field "type" \ --lst "${VAL_LST}" \ --outfile "${VAL_TYPE_LST}" |
95142dfdc
|
105 |
echo "Merge them" |
fee5922c3
|
106 |
cat "${TRAIN_TYPE_LST}" "${VAL_TYPE_LST}" > "${METAS_TYPE}" |
95142dfdc
|
107 108 109 110 111 112 113 114 115 116 117 |
# EXTRACT LANGUAGE INFORMATION echo "Language info for train" awk '$2=$1' FS=, OFS=, ${TRAIN_LST} > ${TRAIN_LANG_LST} echo "Language info for val" awk '$2=$1' FS=, OFS=, ${VAL_LST} > ${VAL_LANG_LST} echo "Merge them" cat "${TRAIN_LANG_LST}" "${VAL_LANG_LST}" > "${METAS_LANG}" echo "Then Run Clustering" |
fee5922c3
|
118 |
source "run-clustering.sh" |
b3371498c
|
119 |
done |
14d6e0cb3
|
120 |
|
fee5922c3
|
121 122 123 124 125 126 127 |
# Regroup measures with respect to character classes echo "Regrouping measures with respect to character classes" python3 "bin/regroup-measures.py" ${OUTDIR} # Regroup measures with respect to type classes echo "Regrouping measures with respect to type classes" python3 "bin/regroup-measures.py" ${OUTDIR} --suffix "_type" --measurefile "measures_type.json" |