run-measures.sh 4.45 KB
# Pour le moment, le run ne fait qu'executer
# quelques petites commandes que l'on souhaite
# tester.

OUTDIR="exp/kmeans_euclidian/teacher-pvector-1"
EXP_DIR=${OUTDIR}
DATADIR="data"
NEW_LSTDIR="${OUTDIR}/lst"

kmin=2
kmax=100

if [ ! -d "$OUTDIR" ];
then
    mkdir -p $OUTDIR
fi

if [ ! -d "$NEW_LSTDIR" ];
then
    mkdir -p $NEW_LSTDIR
fi

for kfold in {1..4}
do
    pvector_file="${DATADIR}/pvectors_1rst/pvectors_teacher_${kfold}.txt"
    VECTOR_FILE=$pvector_file
    lst_dir="${DATADIR}/pvectors_1rst/lst"
    output_kfold="${OUTDIR}/${kfold}"
    
    #python3 "bin/replace_label.py" \
    #    "${DATADIR}/masseffect.lst" \
    #    "${DATADIR}/character_information.csv" \
    #    --field "type" --lst "data/pvectors_1rst/lst/train_${kfold}.lst" \
    #    --outfile "${NEW_LSTDIR}/train_${kfold}_type.lst"

    #python3 "bin/replace_label.py" \
    #    "${DATADIR}/masseffect.lst" \
    #    "${DATADIR}/character_information.csv" \
    #    --field "type" --lst "data/pvectors_1rst/lst/val_${kfold}.lst" \
    #    --outfile "${NEW_LSTDIR}/val_${kfold}_type.lst"
   
    #cat "${NEW_LSTDIR}/train_${kfold}_type.lst" "${NEW_LSTDIR}/val_${kfold}_type.lst" > "${NEW_LSTDIR}/metas_${kfold}_type.lst"
    TRAIN_LST=${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst
    VAL_LST=${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst
    TRAIN_LANG_LST=${NEW_LSTDIR}/train_${kfold}_lang.lst
    VAL_LANG_LST=${NEW_LSTDIR}/val_${kfold}_lang.lst
    METAS_LANG=${NEW_LSTDIR}/metas_${kfold}_lang.lst
     
    # EXTRACT LANGUAGE INFORMATION
    awk '$2=$1' FS=, OFS=, ${TRAIN_LST} > ${TRAIN_LANG_LST}
    echo "VAL EXTRACT LANGUAGE INFO DONE"
    awk '$2=$1' FS=, OFS=, ${VAL_LST} > ${VAL_LANG_LST}
    echo "TRAIN EXTRACT LANGUAGE INFO DONE"
    cat "${TRAIN_LANG_LST}" "${VAL_LANG_LST}" > "${METAS_LANG}"
    echo "GLOBAL EXTRACT LANGUAGE INFO DONE"
  

    echo "Clustering - ${kfold}"

    for k in $(seq ${kmin} 1 ${kmax})
    do
        echo "Kmeans Measuring and ploting - ${k}"

	SUB_EXP_DIR="${EXP_DIR}/${kfold}/${k}"

	# -- EXTRACT CLUSTERING LABELS
	python3 bin/extract_kmeans.py "${SUB_EXP_DIR}/clustering_${k}.pkl" \
        "${VECTOR_FILE}" \
        --outfile "${SUB_EXP_DIR}/clustered_${k}.txt"

	# -- MEASURES AND PLOT WITH RESPECT TO LANG VAR
        # Measures
        python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
            "${METAS_LANG}" \
            "${TRAIN_LST}" \
            "${VAL_LST}" \
            --outfile "${SUB_EXP_DIR}/measures_lang.json"

        # This script plot the count matrix of the train set
        python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
            "${METAS_LANG}" \
            "${TRAIN_LST}" \
            --outfile "${SUB_EXP_DIR}/train_count_matrix_lang.pdf"

        # This script plot the count matrix of the validation set
        python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
            "${METAS_LANG}" \
            "${VAL_LST}" \
             --outfile "${SUB_EXP_DIR}/val_count_matrix_lang.pdf"

	rm ${SUB_EXP_DIR}/clustered_${k}.txt
        #python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" \
        #    "${NEW_LSTDIR}/metas_${kfold}_type.lst" "${lst_dir}/train_${kfold}.lst" \
        #    "${lst_dir}/val_${kfold}.lst" \
        #    --outfile "${output_kfold}/${k}/measures_type.json"
        
        # This script plot the count matrix of the train set
        #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
        #    ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/train_${kfold}.lst \
        #    --outfile ${output_kfold}/${k}/train_count_matrix_type.pdf
        
        # This script plot the count matrix of the validation set
        #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
        #    ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/val_${kfold}.lst \
        #    --outfile ${output_kfold}/${k}/val_count_matrix_type.pdf
        
        # This script plot the count matrix of the train set
        #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
        #    ${pvector_file} ${lst_dir}/train_${kfold}.lst \
        #    --outfile ${output_kfold}/${k}/train_count_matrix.pdf
        
        # This script plot the count matrix of the validation set
        #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
        #    ${pvector_file} ${lst_dir}/val_${kfold}.lst \
        #    --outfile ${output_kfold}/${k}/val_count_matrix.pdf
    done
done