Blame view
utils/transform_exp_to_kd.sh
2 KB
e63ab06fc
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# -- DESCRIPTION -- # # This script aims to transform data in a shape that is # usable mainly by knowledge distillation scripts. # # Firstly, it extracts clustering labels # then change features with the given one # and finally generate a list file. # # The pair features files and list file will be usable # by the knowledge distillation system. # -------------------- # -- CONFIGURATION -- # Configuration error set -e # KFOLD config MIN_KFOLD=1 MAX_KFOLD=4 # KMEAN config MIN_KMEAN=2 MAX_KMEAN=100 # Vector features file DATADIR="data" FEATURES_DIR="${DATADIR}/pv_from_xv" FEATURES_PREFIX="me_pv_teacher" FEATURES_SUFFIX=".txt" EXP_DIR="exp/kmeans_euclidian/pv_from_xv" VECTOR_FILE_MASSEFFECT="${DATADIR}/xvectors.txt" OUTDIR="data/pv_from_xv/saved_clustered" # -- CREATE DIRECTORIES # OUTPUT DIRECTORY if [ ! -d "${OUTDIR}" ] then mkdir -p ${OUTDIR} fi # -- FUNCTIONS -- # Definition of the transform function function transform() { # Define subdir variable local SUB_EXP_DIR="${EXP_DIR}/${k}/${kmean}" # Define features file variable local INITIAL_VECTOR_FILE="${FEATURES_DIR}/${FEATURES_PREFIX}_${k}${FEATURES_SUFFIX}" # Information of the current process echo "[KFOLD, KMEAN]: [${k}, ${kmean}]" # Define clustering model variable local CLUSTERING="${SUB_EXP_DIR}/clustering_${kmean}.pkl" # Define output file local OUTFILE_MASSEFFECT="${OUTDIR}/masseffect_clustered_${k}_${kmean}.txt" # Extracting clustering labels echo "Extracting clustering labels" python3 bin/extract_kmeans.py "${CLUSTERING}" \ "${INITIAL_VECTOR_FILE}" \ --outfile "${OUTFILE_MASSEFFECT}" # Changing features echo "Changing features" python bin/replace-features.py ${VECTOR_FILE_MASSEFFECT} ${OUTFILE_MASSEFFECT} # Extracting list file cut -d' ' -f1 ${OUTFILE_MASSEFFECT} > "${OUTDIR}/masseffect_clustered_${k}_${kmean}.lst" } # -- MAIN LOOPS for k in $(seq ${MIN_KFOLD} ${MAX_KFOLD}) do for kmean in $(seq ${MIN_KMEAN} ${MAX_KMEAN}) do transform done done |