Blame view
run-measures.sh
4.45 KB
42b4edb5a This file aims to... |
1 2 3 |
# Pour le moment, le run ne fait qu'executer # quelques petites commandes que l'on souhaite # tester. |
95142dfdc maj. No comment |
4 5 |
OUTDIR="exp/kmeans_euclidian/teacher-pvector-1" EXP_DIR=${OUTDIR} |
42b4edb5a This file aims to... |
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
DATADIR="data" NEW_LSTDIR="${OUTDIR}/lst" kmin=2 kmax=100 if [ ! -d "$OUTDIR" ]; then mkdir -p $OUTDIR fi if [ ! -d "$NEW_LSTDIR" ]; then mkdir -p $NEW_LSTDIR fi for kfold in {1..4} do pvector_file="${DATADIR}/pvectors_1rst/pvectors_teacher_${kfold}.txt" |
95142dfdc maj. No comment |
25 |
VECTOR_FILE=$pvector_file |
42b4edb5a This file aims to... |
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
lst_dir="${DATADIR}/pvectors_1rst/lst" output_kfold="${OUTDIR}/${kfold}" #python3 "bin/replace_label.py" \ # "${DATADIR}/masseffect.lst" \ # "${DATADIR}/character_information.csv" \ # --field "type" --lst "data/pvectors_1rst/lst/train_${kfold}.lst" \ # --outfile "${NEW_LSTDIR}/train_${kfold}_type.lst" #python3 "bin/replace_label.py" \ # "${DATADIR}/masseffect.lst" \ # "${DATADIR}/character_information.csv" \ # --field "type" --lst "data/pvectors_1rst/lst/val_${kfold}.lst" \ # --outfile "${NEW_LSTDIR}/val_${kfold}_type.lst" |
151e596e3 Some modification... |
41 |
#cat "${NEW_LSTDIR}/train_${kfold}_type.lst" "${NEW_LSTDIR}/val_${kfold}_type.lst" > "${NEW_LSTDIR}/metas_${kfold}_type.lst" |
95142dfdc maj. No comment |
42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
TRAIN_LST=${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst VAL_LST=${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst TRAIN_LANG_LST=${NEW_LSTDIR}/train_${kfold}_lang.lst VAL_LANG_LST=${NEW_LSTDIR}/val_${kfold}_lang.lst METAS_LANG=${NEW_LSTDIR}/metas_${kfold}_lang.lst # EXTRACT LANGUAGE INFORMATION awk '$2=$1' FS=, OFS=, ${TRAIN_LST} > ${TRAIN_LANG_LST} echo "VAL EXTRACT LANGUAGE INFO DONE" awk '$2=$1' FS=, OFS=, ${VAL_LST} > ${VAL_LANG_LST} echo "TRAIN EXTRACT LANGUAGE INFO DONE" cat "${TRAIN_LANG_LST}" "${VAL_LANG_LST}" > "${METAS_LANG}" echo "GLOBAL EXTRACT LANGUAGE INFO DONE" |
42b4edb5a This file aims to... |
56 |
|
42b4edb5a This file aims to... |
57 58 59 60 61 |
echo "Clustering - ${kfold}" for k in $(seq ${kmin} 1 ${kmax}) do echo "Kmeans Measuring and ploting - ${k}" |
95142dfdc maj. No comment |
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
SUB_EXP_DIR="${EXP_DIR}/${kfold}/${k}" # -- EXTRACT CLUSTERING LABELS python3 bin/extract_kmeans.py "${SUB_EXP_DIR}/clustering_${k}.pkl" \ "${VECTOR_FILE}" \ --outfile "${SUB_EXP_DIR}/clustered_${k}.txt" # -- MEASURES AND PLOT WITH RESPECT TO LANG VAR # Measures python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \ "${METAS_LANG}" \ "${TRAIN_LST}" \ "${VAL_LST}" \ --outfile "${SUB_EXP_DIR}/measures_lang.json" # This script plot the count matrix of the train set python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \ "${METAS_LANG}" \ "${TRAIN_LST}" \ --outfile "${SUB_EXP_DIR}/train_count_matrix_lang.pdf" # This script plot the count matrix of the validation set python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \ "${METAS_LANG}" \ "${VAL_LST}" \ --outfile "${SUB_EXP_DIR}/val_count_matrix_lang.pdf" rm ${SUB_EXP_DIR}/clustered_${k}.txt |
151e596e3 Some modification... |
91 92 93 94 |
#python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" \ # "${NEW_LSTDIR}/metas_${kfold}_type.lst" "${lst_dir}/train_${kfold}.lst" \ # "${lst_dir}/val_${kfold}.lst" \ # --outfile "${output_kfold}/${k}/measures_type.json" |
42b4edb5a This file aims to... |
95 96 |
# This script plot the count matrix of the train set |
95142dfdc maj. No comment |
97 98 99 |
#python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \ # ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/train_${kfold}.lst \ # --outfile ${output_kfold}/${k}/train_count_matrix_type.pdf |
42b4edb5a This file aims to... |
100 101 |
# This script plot the count matrix of the validation set |
95142dfdc maj. No comment |
102 103 104 |
#python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \ # ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/val_${kfold}.lst \ # --outfile ${output_kfold}/${k}/val_count_matrix_type.pdf |
42b4edb5a This file aims to... |
105 106 |
# This script plot the count matrix of the train set |
95142dfdc maj. No comment |
107 108 109 |
#python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \ # ${pvector_file} ${lst_dir}/train_${kfold}.lst \ # --outfile ${output_kfold}/${k}/train_count_matrix.pdf |
42b4edb5a This file aims to... |
110 111 |
# This script plot the count matrix of the validation set |
95142dfdc maj. No comment |
112 113 114 |
#python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \ # ${pvector_file} ${lst_dir}/val_${kfold}.lst \ # --outfile ${output_kfold}/${k}/val_count_matrix.pdf |
42b4edb5a This file aims to... |
115 116 |
done done |