Blame view

run-measures.sh 4.45 KB
42b4edb5a   Mathias Quillot   This file aims to...
1
2
3
  # Pour le moment, le run ne fait qu'executer
  # quelques petites commandes que l'on souhaite
  # tester.
95142dfdc   Mathias Quillot   maj. No comment
4
5
  OUTDIR="exp/kmeans_euclidian/teacher-pvector-1"
  EXP_DIR=${OUTDIR}
42b4edb5a   Mathias Quillot   This file aims to...
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
  DATADIR="data"
  NEW_LSTDIR="${OUTDIR}/lst"
  
  kmin=2
  kmax=100
  
  if [ ! -d "$OUTDIR" ];
  then
      mkdir -p $OUTDIR
  fi
  
  if [ ! -d "$NEW_LSTDIR" ];
  then
      mkdir -p $NEW_LSTDIR
  fi
  
  for kfold in {1..4}
  do
      pvector_file="${DATADIR}/pvectors_1rst/pvectors_teacher_${kfold}.txt"
95142dfdc   Mathias Quillot   maj. No comment
25
      VECTOR_FILE=$pvector_file
42b4edb5a   Mathias Quillot   This file aims to...
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
      lst_dir="${DATADIR}/pvectors_1rst/lst"
      output_kfold="${OUTDIR}/${kfold}"
      
      #python3 "bin/replace_label.py" \
      #    "${DATADIR}/masseffect.lst" \
      #    "${DATADIR}/character_information.csv" \
      #    --field "type" --lst "data/pvectors_1rst/lst/train_${kfold}.lst" \
      #    --outfile "${NEW_LSTDIR}/train_${kfold}_type.lst"
  
      #python3 "bin/replace_label.py" \
      #    "${DATADIR}/masseffect.lst" \
      #    "${DATADIR}/character_information.csv" \
      #    --field "type" --lst "data/pvectors_1rst/lst/val_${kfold}.lst" \
      #    --outfile "${NEW_LSTDIR}/val_${kfold}_type.lst"
     
151e596e3   Mathias Quillot   Some modification...
41
      #cat "${NEW_LSTDIR}/train_${kfold}_type.lst" "${NEW_LSTDIR}/val_${kfold}_type.lst" > "${NEW_LSTDIR}/metas_${kfold}_type.lst"
95142dfdc   Mathias Quillot   maj. No comment
42
43
44
45
46
47
48
49
50
51
52
53
54
55
      TRAIN_LST=${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst
      VAL_LST=${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst
      TRAIN_LANG_LST=${NEW_LSTDIR}/train_${kfold}_lang.lst
      VAL_LANG_LST=${NEW_LSTDIR}/val_${kfold}_lang.lst
      METAS_LANG=${NEW_LSTDIR}/metas_${kfold}_lang.lst
       
      # EXTRACT LANGUAGE INFORMATION
      awk '$2=$1' FS=, OFS=, ${TRAIN_LST} > ${TRAIN_LANG_LST}
      echo "VAL EXTRACT LANGUAGE INFO DONE"
      awk '$2=$1' FS=, OFS=, ${VAL_LST} > ${VAL_LANG_LST}
      echo "TRAIN EXTRACT LANGUAGE INFO DONE"
      cat "${TRAIN_LANG_LST}" "${VAL_LANG_LST}" > "${METAS_LANG}"
      echo "GLOBAL EXTRACT LANGUAGE INFO DONE"
    
42b4edb5a   Mathias Quillot   This file aims to...
56

42b4edb5a   Mathias Quillot   This file aims to...
57
58
59
60
61
      echo "Clustering - ${kfold}"
  
      for k in $(seq ${kmin} 1 ${kmax})
      do
          echo "Kmeans Measuring and ploting - ${k}"
95142dfdc   Mathias Quillot   maj. No comment
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
  
  	SUB_EXP_DIR="${EXP_DIR}/${kfold}/${k}"
  
  	# -- EXTRACT CLUSTERING LABELS
  	python3 bin/extract_kmeans.py "${SUB_EXP_DIR}/clustering_${k}.pkl" \
          "${VECTOR_FILE}" \
          --outfile "${SUB_EXP_DIR}/clustered_${k}.txt"
  
  	# -- MEASURES AND PLOT WITH RESPECT TO LANG VAR
          # Measures
          python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
              "${METAS_LANG}" \
              "${TRAIN_LST}" \
              "${VAL_LST}" \
              --outfile "${SUB_EXP_DIR}/measures_lang.json"
  
          # This script plot the count matrix of the train set
          python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
              "${METAS_LANG}" \
              "${TRAIN_LST}" \
              --outfile "${SUB_EXP_DIR}/train_count_matrix_lang.pdf"
  
          # This script plot the count matrix of the validation set
          python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
              "${METAS_LANG}" \
              "${VAL_LST}" \
               --outfile "${SUB_EXP_DIR}/val_count_matrix_lang.pdf"
  
  	rm ${SUB_EXP_DIR}/clustered_${k}.txt
151e596e3   Mathias Quillot   Some modification...
91
92
93
94
          #python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" \
          #    "${NEW_LSTDIR}/metas_${kfold}_type.lst" "${lst_dir}/train_${kfold}.lst" \
          #    "${lst_dir}/val_${kfold}.lst" \
          #    --outfile "${output_kfold}/${k}/measures_type.json"
42b4edb5a   Mathias Quillot   This file aims to...
95
96
          
          # This script plot the count matrix of the train set
95142dfdc   Mathias Quillot   maj. No comment
97
98
99
          #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
          #    ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/train_${kfold}.lst \
          #    --outfile ${output_kfold}/${k}/train_count_matrix_type.pdf
42b4edb5a   Mathias Quillot   This file aims to...
100
101
          
          # This script plot the count matrix of the validation set
95142dfdc   Mathias Quillot   maj. No comment
102
103
104
          #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
          #    ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/val_${kfold}.lst \
          #    --outfile ${output_kfold}/${k}/val_count_matrix_type.pdf
42b4edb5a   Mathias Quillot   This file aims to...
105
106
          
          # This script plot the count matrix of the train set
95142dfdc   Mathias Quillot   maj. No comment
107
108
109
          #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
          #    ${pvector_file} ${lst_dir}/train_${kfold}.lst \
          #    --outfile ${output_kfold}/${k}/train_count_matrix.pdf
42b4edb5a   Mathias Quillot   This file aims to...
110
111
          
          # This script plot the count matrix of the validation set
95142dfdc   Mathias Quillot   maj. No comment
112
113
114
          #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
          #    ${pvector_file} ${lst_dir}/val_${kfold}.lst \
          #    --outfile ${output_kfold}/${k}/val_count_matrix.pdf
42b4edb5a   Mathias Quillot   This file aims to...
115
116
      done
  done