transform_exp_to_kd.sh
2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# -- DESCRIPTION --
#
# This script aims to transform data in a shape that is
# usable mainly by knowledge distillation scripts.
#
# Firstly, it extracts clustering labels
# then change features with the given one
# and finally generate a list file.
#
# The pair features files and list file will be usable
# by the knowledge distillation system.
# --------------------
# -- CONFIGURATION --
# Configuration error
set -e
# KFOLD config
MIN_KFOLD=1
MAX_KFOLD=4
# KMEAN config
MIN_KMEAN=2
MAX_KMEAN=100
# Vector features file
DATADIR="data"
FEATURES_DIR="${DATADIR}/pv_from_xv"
FEATURES_PREFIX="me_pv_teacher"
FEATURES_SUFFIX=".txt"
EXP_DIR="exp/kmeans_euclidian/pv_from_xv"
VECTOR_FILE_MASSEFFECT="${DATADIR}/xvectors.txt"
OUTDIR="data/pv_from_xv/saved_clustered"
# -- CREATE DIRECTORIES
# OUTPUT DIRECTORY
if [ ! -d "${OUTDIR}" ]
then
mkdir -p ${OUTDIR}
fi
# -- FUNCTIONS --
# Definition of the transform function
function transform() {
# Define subdir variable
local SUB_EXP_DIR="${EXP_DIR}/${k}/${kmean}"
# Define features file variable
local INITIAL_VECTOR_FILE="${FEATURES_DIR}/${FEATURES_PREFIX}_${k}${FEATURES_SUFFIX}"
# Information of the current process
echo "[KFOLD, KMEAN]: [${k}, ${kmean}]"
# Define clustering model variable
local CLUSTERING="${SUB_EXP_DIR}/clustering_${kmean}.pkl"
# Define output file
local OUTFILE_MASSEFFECT="${OUTDIR}/masseffect_clustered_${k}_${kmean}.txt"
# Extracting clustering labels
echo "Extracting clustering labels"
python3 bin/extract_kmeans.py "${CLUSTERING}" \
"${INITIAL_VECTOR_FILE}" \
--outfile "${OUTFILE_MASSEFFECT}"
# Changing features
echo "Changing features"
python bin/replace-features.py ${VECTOR_FILE_MASSEFFECT} ${OUTFILE_MASSEFFECT}
# Extracting list file
cut -d' ' -f1 ${OUTFILE_MASSEFFECT} > "${OUTDIR}/masseffect_clustered_${k}_${kmean}.lst"
}
# -- MAIN LOOPS
for k in $(seq ${MIN_KFOLD} ${MAX_KFOLD})
do
for kmean in $(seq ${MIN_KMEAN} ${MAX_KMEAN})
do
transform
done
done