run-clustering.sh 3.21 KB
#
# This script aims to compute clustering
# 


# -- CONFIGURATION
# THIS SCRIPT NEEDS THESE VARIABLES
# Vector file
#VECTOR_FILE=""
# Train list
#TRAIN_LST==""
# Val list
#VAL_LST=""
# Exp directory
#EXP_DIR=""
# Metas file with type values
#METAS_TYPE=""
# Metas file with character values
#METAS_CHARACTER=""


#echo "VECTOR FILE: $VECTOR_FILE"
#echo "TRAIN LIST: $TRAIN_LST"
#echo "VAL LIST: $VAL_LST"
#echo "EXP DIR: $EXP_DIR"
#echo "METAS TYPE: $METAS_TYPE"
#echo "METAS_CHARACTER: $METAS_CHARACTER"



# -- TRAIN KMEANS 
echo "Clustering - ${kfold}"
python3 bin/cluster_kmeans.py "${VECTOR_FILE}" \
    "${TRAIN_LST}" \
    "${EXP_DIR}" --kmin ${KMIN} --kmax ${KMAX}



for k in $(seq ${KMIN} 1 ${KMAX})
do
    SUB_EXP_DIR="${EXP_DIR}/${k}"
    
    # -- EXTRACT KMEANS VALUES
    echo "Kmeans Measuring and extraction - ${k}"
    python3 bin/extract_kmeans.py "${SUB_EXP_DIR}/clustering_${k}.pkl" \
        "${VECTOR_FILE}" \
        --outfile "${SUB_EXP_DIR}/clustered_${k}.txt"
    # -- MEASURES AND PLOT WITH RESPECT TO CHARACTER VAR
    # Measures
    python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
        "${METAS_CHARACTER}" \
        "${TRAIN_LST}" \
        "${VAL_LST}" \
        --outfile "${SUB_EXP_DIR}/measures.json"
    
    # Plot count matrix for train
    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
        ${VECTOR_FILE} \
        ${TRAIN_LST} \
        --outfile "${SUB_EXP_DIR}/train_count_matrix.pdf"
        
    # Plot count matrix for val
    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
        ${VECTOR_FILE} \
        ${VAL_LST} \
        --outfile "${SUB_EXP_DIR}/val_count_matrix.pdf"

    # -- MEASURES AND PLOT WITH RESPECT TO TYPE VAR
    # Measures
    python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
        "${METAS_TYPE}" \
        "${TRAIN_LST}" \
        "${VAL_LST}" \
        --outfile "${SUB_EXP_DIR}/measures_type.json"
        
    # This script plot the count matrix of the train set
    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
        "${METAS_TYPE}" \
        "${TRAIN_LST}" \
        --outfile "${SUB_EXP_DIR}/train_count_matrix_type.pdf"
        
    # This script plot the count matrix of the validation set
    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
        "${METAS_TYPE}" \
        "${VAL_LST}" \
        --outfile "${SUB_EXP_DIR}/val_count_matrix_type.pdf"
   
 
    # -- MEASURES AND PLOT WITH RESPECT TO LANG VAR
    # Measures
    python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
        "${METAS_LANG}" \
        "${TRAIN_LST}" \
        "${VAL_LST}" \
        --outfile "${SUB_EXP_DIR}/measures_lang.json"
        
    # This script plot the count matrix of the train set
    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
        "${METAS_LANG}" \
        "${TRAIN_LST}" \
        --outfile "${SUB_EXP_DIR}/train_count_matrix_lang.pdf"
        
    # This script plot the count matrix of the validation set
    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
        "${METAS_LANG}" \
        "${VAL_LST}" \
        --outfile "${SUB_EXP_DIR}/val_count_matrix_lang.pdf"
    
done