transform_exp_to_kd.sh 2 KB
# -- DESCRIPTION --
#
# This script aims to transform data in a shape that is
# usable mainly by knowledge distillation scripts.
#
# Firstly, it extracts clustering labels
# then change features with the given one
# and finally generate a list file.
#
# The pair features files and list file will be usable
# by the knowledge distillation system.
# --------------------


# -- CONFIGURATION --
# Configuration error
set -e

# KFOLD config
MIN_KFOLD=1
MAX_KFOLD=4

# KMEAN config
MIN_KMEAN=2
MAX_KMEAN=100

# Vector features file
DATADIR="data"
FEATURES_DIR="${DATADIR}/pv_from_xv"
FEATURES_PREFIX="me_pv_teacher"
FEATURES_SUFFIX=".txt"

EXP_DIR="exp/kmeans_euclidian/pv_from_xv"
VECTOR_FILE_MASSEFFECT="${DATADIR}/xvectors.txt"
OUTDIR="data/pv_from_xv/saved_clustered"

# -- CREATE DIRECTORIES
# OUTPUT DIRECTORY
if [ ! -d "${OUTDIR}" ]
then
    mkdir -p ${OUTDIR}
fi


# -- FUNCTIONS --
# Definition of the transform function
function transform() {
    # Define subdir variable
	local SUB_EXP_DIR="${EXP_DIR}/${k}/${kmean}"

    # Define features file variable
    local INITIAL_VECTOR_FILE="${FEATURES_DIR}/${FEATURES_PREFIX}_${k}${FEATURES_SUFFIX}"

    # Information of the current process
    echo "[KFOLD, KMEAN]: [${k}, ${kmean}]"

    # Define clustering model variable
	local CLUSTERING="${SUB_EXP_DIR}/clustering_${kmean}.pkl"


	# Define output file
	local OUTFILE_MASSEFFECT="${OUTDIR}/masseffect_clustered_${k}_${kmean}.txt"

    # Extracting clustering labels
	echo "Extracting clustering labels"
	python3 bin/extract_kmeans.py "${CLUSTERING}" \
		    "${INITIAL_VECTOR_FILE}" \
			--outfile "${OUTFILE_MASSEFFECT}"
    
    # Changing features
    echo "Changing features"
	python bin/replace-features.py ${VECTOR_FILE_MASSEFFECT} ${OUTFILE_MASSEFFECT}

    # Extracting list file
    cut -d' ' -f1 ${OUTFILE_MASSEFFECT} > "${OUTDIR}/masseffect_clustered_${k}_${kmean}.lst"
}


# -- MAIN LOOPS 
for k in $(seq ${MIN_KFOLD} ${MAX_KFOLD})
do
    for kmean in  $(seq ${MIN_KMEAN} ${MAX_KMEAN})
    do
        transform
    done
done