Blame view

utils/transform_exp_to_kd.sh 2 KB
e63ab06fc   Mathias Quillot   New organisation ...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
  
  # -- DESCRIPTION --
  #
  # This script aims to transform data in a shape that is
  # usable mainly by knowledge distillation scripts.
  #
  # Firstly, it extracts clustering labels
  # then change features with the given one
  # and finally generate a list file.
  #
  # The pair features files and list file will be usable
  # by the knowledge distillation system.
  # --------------------
  
  
  # -- CONFIGURATION --
  # Configuration error
  set -e
  
  # KFOLD config
  MIN_KFOLD=1
  MAX_KFOLD=4
  
  # KMEAN config
  MIN_KMEAN=2
  MAX_KMEAN=100
  
  # Vector features file
  DATADIR="data"
  FEATURES_DIR="${DATADIR}/pv_from_xv"
  FEATURES_PREFIX="me_pv_teacher"
  FEATURES_SUFFIX=".txt"
  
  EXP_DIR="exp/kmeans_euclidian/pv_from_xv"
  VECTOR_FILE_MASSEFFECT="${DATADIR}/xvectors.txt"
  OUTDIR="data/pv_from_xv/saved_clustered"
  
  # -- CREATE DIRECTORIES
  # OUTPUT DIRECTORY
  if [ ! -d "${OUTDIR}" ]
  then
      mkdir -p ${OUTDIR}
  fi
  
  
  # -- FUNCTIONS --
  # Definition of the transform function
  function transform() {
      # Define subdir variable
  	local SUB_EXP_DIR="${EXP_DIR}/${k}/${kmean}"
  
      # Define features file variable
      local INITIAL_VECTOR_FILE="${FEATURES_DIR}/${FEATURES_PREFIX}_${k}${FEATURES_SUFFIX}"
  
      # Information of the current process
      echo "[KFOLD, KMEAN]: [${k}, ${kmean}]"
  
      # Define clustering model variable
  	local CLUSTERING="${SUB_EXP_DIR}/clustering_${kmean}.pkl"
  
  
  	# Define output file
  	local OUTFILE_MASSEFFECT="${OUTDIR}/masseffect_clustered_${k}_${kmean}.txt"
  
      # Extracting clustering labels
  	echo "Extracting clustering labels"
  	python3 bin/extract_kmeans.py "${CLUSTERING}" \
  		    "${INITIAL_VECTOR_FILE}" \
  			--outfile "${OUTFILE_MASSEFFECT}"
      
      # Changing features
      echo "Changing features"
  	python bin/replace-features.py ${VECTOR_FILE_MASSEFFECT} ${OUTFILE_MASSEFFECT}
  
      # Extracting list file
      cut -d' ' -f1 ${OUTFILE_MASSEFFECT} > "${OUTDIR}/masseffect_clustered_${k}_${kmean}.lst"
  }
  
  
  # -- MAIN LOOPS 
  for k in $(seq ${MIN_KFOLD} ${MAX_KFOLD})
  do
      for kmean in  $(seq ${MIN_KMEAN} ${MAX_KMEAN})
      do
          transform
      done
  done