Blame view

run.sh 3.82 KB
14d6e0cb3   Mathias Quillot   Basic run.sh for ...
1
2
3
  # Pour le moment, le run ne fait qu'executer
  # quelques petites commandes que l'on souhaite
  # tester.
b3371498c   Mathias Quillot   Run script update...
4
5
  OUTDIR="exp/kmeans_teacher_1/pvector-1"
  DATADIR="data"
29644ae6c   Mathias Quillot   New receip to bui...
6
  NEW_LSTDIR="${OUTDIR}/lst"
b3371498c   Mathias Quillot   Run script update...
7
8
9
  
  kmin=2
  kmax=100
14d6e0cb3   Mathias Quillot   Basic run.sh for ...
10
11
12
13
  if [ ! -d "$OUTDIR" ];
  then
      mkdir -p $OUTDIR
  fi
29644ae6c   Mathias Quillot   New receip to bui...
14
15
16
17
18
19
  if [ ! -d "${NEW_LSTDIR}" ];
  then
      mkdir -p ${NEW_LSTDIR}
  fi
  
  for kfold in 4 #..4}
b3371498c   Mathias Quillot   Run script update...
20
21
22
23
24
25
26
27
28
29
  do
      #echo "kfold = ${kfold}"
      pvector_file="${DATADIR}/pvectors_1rst/pvectors_teacher_${kfold}.txt"
      lst_dir="${DATADIR}/pvectors_1rst/lst"
      output_kfold="${OUTDIR}/${kfold}"
      
      if [ ! -d "${output_kfold}" ];
      then
          mkdir -p ${output_kfold}
      fi
29644ae6c   Mathias Quillot   New receip to bui...
30
31
32
33
34
35
36
37
38
          
      
      # Extract character information
      echo "Extracting character information"
      python3 "bin/replace_label.py" \
          "${DATADIR}/masseffect.lst" \
          "${DATADIR}/character_information.csv" \
          --field "type" --lst "data/pvectors_1rst/lst/train_${kfold}.lst" \
          --outfile "${NEW_LSTDIR}/train_${kfold}_type.lst"
b3371498c   Mathias Quillot   Run script update...
39
      
29644ae6c   Mathias Quillot   New receip to bui...
40
41
42
43
44
45
46
47
      python3 "bin/replace_label.py" \
          "${DATADIR}/masseffect.lst" \
          "${DATADIR}/character_information.csv" \
          --field "type" --lst "data/pvectors_1rst/lst/val_${kfold}.lst" \
          --outfile "${NEW_LSTDIR}/val_${kfold}_type.lst"
      cat "${NEW_LSTDIR}/train_${kfold}_type.lst" "${NEW_LSTDIR}/val_${kfold}_type.lst" > "${NEW_LSTDIR}/metas_${kfold}_type.lst"
  
      # -- TRAIN KMEANS 
b3371498c   Mathias Quillot   Run script update...
48
49
50
51
52
53
54
      echo "Clustering - ${kfold}"
      python3 bin/cluster_kmeans.py "${pvector_file}" \
          "${lst_dir}/train_${kfold}.lst" \
          "${output_kfold}" --kmin ${kmin} --kmax ${kmax}
  
      for k in $(seq ${kmin} 1 ${kmax})
      do
29644ae6c   Mathias Quillot   New receip to bui...
55
          # -- EXTRACT KMEANS VALUES
b3371498c   Mathias Quillot   Run script update...
56
          echo "Kmeans Measuring and extraction - ${k}"
b3371498c   Mathias Quillot   Run script update...
57
58
59
          python3 bin/extract_kmeans.py "${output_kfold}/${k}/clustering_${k}.pkl" \
              "${pvector_file}" \
              --outfile "${output_kfold}/${k}/clustered_${k}.txt"
29644ae6c   Mathias Quillot   New receip to bui...
60
61
62
63
          
          
          # -- MEASURES AND PLOT WITH RESPECT TO CHARACTER VAR
          # Measures
b3371498c   Mathias Quillot   Run script update...
64
          python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" "${pvector_file}" "${lst_dir}/train_${kfold}.lst" "${lst_dir}/val_${kfold}.lst" --outfile "${output_kfold}/${k}/measures.json"
29644ae6c   Mathias Quillot   New receip to bui...
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
          
          # Plot count matrix for train
          python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
          ${pvector_file} ${lst_dir}/train_${kfold}.lst \
          --outfile ${output_kfold}/${k}/train_count_matrix.pdf
          
          # Plot count matrix for val
          python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
              ${pvector_file} ${lst_dir}/val_${kfold}.lst \
              --outfile ${output_kfold}/${k}/val_count_matrix.pdf
  
          # Regroup measures with respect to character var
          python3 bin/regroup-measures.py exp/kmeans_teacher_1/pvector-1/
  
          # -- MEASURES AND PLOT WITH RESPECT TO TYPE VAR
          # Measures
          python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" "${NEW_LSTDIR}/metas_${kfold}_type.lst" "${lst_dir}/train_${kfold}.lst" "${lst_dir}/val_${kfold}.lst" --outfile "${output_kfold}/${k}/measures_type.json"
          
          # This script plot the count matrix of the train set
          python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
          ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/train_${kfold}.lst \
          --outfile ${output_kfold}/${k}/train_count_matrix_type.pdf
          
          # This script plot the count matrix of the validation set
          python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
          ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/val_${kfold}.lst \
          --outfile ${output_kfold}/${k}/val_count_matrix_type.pdf
  
          # Regroup measures with respect to type var 
          python3 bin/regroup-measures.py exp/kmeans_teacher_1/pvector-1/ --suffix "_type" --measurefile "measures_type.j
b3371498c   Mathias Quillot   Run script update...
95
96
      done
  done
14d6e0cb3   Mathias Quillot   Basic run.sh for ...
97