New way to exec the run file. Now you can run the clustering juste for one model…

…, or use the run file and launch for each fold. You can config it with configuration files in config.

New way to exec the run file. Now you can run the clustering juste for one model…
…, or use the run file and launch for each fold. You can config it with configuration files in config.
Mathias Quillot
1 parent 151e596e35
Showing 5 changed files with 198 additions and 76 deletions Side-by-side Diff
config/ivector_config.sh
config/pvector_config.sh
config/xvector_config.sh
run-clustering.sh
run.sh
+OUTDIR="exp/kmeans_euclidian/ivectors"
+DATADIR="data"
+NEW_LSTDIR="${OUTDIR}/lst"
+
+VECTOR_FILE="data/ivectors.txt" # To specify if there's only one
+VECTOR_FILES_ONE=true # Specify there's only one file
+
+KMIN=2
+KMAX=100
+OUTDIR="exp/kmeans_euclidian/teacher-pvector-1"
+DATADIR="data"
+NEW_LSTDIR="${OUTDIR}/lst"
+
+VECTOR_FILES_BEGIN="${DATADIR}/pvectors_1rst/pvectors_teacher"
+VECTOR_FILES_END=".txt"
+VECTOR_FILE="" # To specify if there's only one
+VECTOR_FILES_ONE=false # Specify there's only one file
+
+KMIN=2
+KMAX=100
+OUTDIR="exp/kmeans_euclidian/xvectors"
+DATADIR="data"
+NEW_LSTDIR="${OUTDIR}/lst"
+
+VECTOR_FILE="data/xvectors.txt" # To specify if there's only one
+VECTOR_FILES_ONE=true # Specify there's only one file
+
+KMIN=2
+KMAX=100
+#
+# This script aims to compute clustering
+# 
+
+
+# -- CONFIGURATION
+# THIS SCRIPT NEEDS THESE VARIABLES
+# Vector file
+#VECTOR_FILE=""
+# Train list
+#TRAIN_LST==""
+# Val list
+#VAL_LST=""
+# Exp directory
+#EXP_DIR=""
+# Metas file with type values
+#METAS_TYPE=""
+# Metas file with character values
+#METAS_CHARACTER=""
+
+
+#echo "VECTOR FILE: $VECTOR_FILE"
+#echo "TRAIN LIST: $TRAIN_LST"
+#echo "VAL LIST: $VAL_LST"
+#echo "EXP DIR: $EXP_DIR"
+#echo "METAS TYPE: $METAS_TYPE"
+#echo "METAS_CHARACTER: $METAS_CHARACTER"
+
+
+
+# -- TRAIN KMEANS 
+echo "Clustering - ${kfold}"
+python3 bin/cluster_kmeans.py "${VECTOR_FILE}" \
+    "${TRAIN_LST}" \
+    "${EXP_DIR}" --kmin ${KMIN} --kmax ${KMAX}
+
+
+
+for k in $(seq ${KMIN} 1 ${KMAX})
+do
+    SUB_EXP_DIR="${EXP_DIR}/${k}"
+    
+    # -- EXTRACT KMEANS VALUES
+    echo "Kmeans Measuring and extraction - ${k}"
+    python3 bin/extract_kmeans.py "${SUB_EXP_DIR}/clustering_${k}.pkl" \
+        "${VECTOR_FILE}" \
+        --outfile "${SUB_EXP_DIR}/clustered_${k}.txt"
+    # -- MEASURES AND PLOT WITH RESPECT TO CHARACTER VAR
+    # Measures
+    python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+        "${METAS_CHARACTER}" \
+        "${TRAIN_LST}" \
+        "${VAL_LST}" \
+        --outfile "${SUB_EXP_DIR}/measures.json"
+    
+    # Plot count matrix for train
+    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+        ${VECTOR_FILE} \
+        ${TRAIN_LST} \
+        --outfile "${SUB_EXP_DIR}/train_count_matrix.pdf"
+        
+    # Plot count matrix for val
+    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+        ${VECTOR_FILE} \
+        ${VAL_LST} \
+        --outfile "${SUB_EXP_DIR}/val_count_matrix.pdf"
+
+    # -- MEASURES AND PLOT WITH RESPECT TO TYPE VAR
+    # Measures
+    python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+        "${METAS_TYPE}" \
+        "${TRAIN_LST}" \
+        "${VAL_LST}" \
+        --outfile "${SUB_EXP_DIR}/measures_type.json"
+        
+    # This script plot the count matrix of the train set
+    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+        "${METAS_TYPE}" \
+        "${TRAIN_LST}" \
+        --outfile "${SUB_EXP_DIR}/train_count_matrix_type.pdf"
+        
+    # This script plot the count matrix of the validation set
+    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+        "${METAS_TYPE}" \
+        "${VAL_LST}" \
+        --outfile "${SUB_EXP_DIR}/val_count_matrix_type.pdf"
+
+done
-# Pour le moment, le run ne fait qu'executer
-# quelques petites commandes que l'on souhaite
-# tester.
  
-OUTDIR="exp/kmeans_teacher_1/pvector-1"
-DATADIR="data"
-NEW_LSTDIR="${OUTDIR}/lst"
+#OUTDIR="exp/test/pvector-2"
+#DATADIR="data"
+#NEW_LSTDIR="${OUTDIR}/lst"
  
-kmin=2
-kmax=100
+#VECTOR_FILES_BEGIN="${DATADIR}/pvectors_1rst/pvectors_teacher"
+#VECTOR_FILES_END=".txt"
+#VECTOR_FILE="" # To specify if there's only one
+#VECTOR_FILES_ONE=false # Specify there's only one file
  
+#KMIN=2
+#KMAX=100
+
+# -- LOAD CONFIG FILE
+CONFIG_FILE="config.sh"
+
+if [ $# -eq 1 ]
+then
+    CONFIG_FILE="$1"
+else
+    echo "Need to have one and only one argument"
+    exit -1
+fi
+
+source $CONFIG_FILE
+
+# -- DEFAULTS VALUES CONFIGURATION
+if [ -z "$VECTOR_FILES_ONE" ]
+then
+    VECTOR_FILES_ONE=false
+fi
+
+
+
+# -- MAKE DIRECTORIES
 if [ ! -d "$OUTDIR" ];
 then
     mkdir -p $OUTDIR
  
  
  
  
  
  
  
  
  
  
  
@@ -19,82 +43,61 @@
     mkdir -p ${NEW_LSTDIR}
 fi
  
-for kfold in 4 #..4}
+
+# -- BEGIN BY KFOLD
+for kfold in {1..4}
 do
-    #echo "kfold = ${kfold}"
-    pvector_file="${DATADIR}/pvectors_1rst/pvectors_teacher_${kfold}.txt"
-    lst_dir="${DATADIR}/pvectors_1rst/lst"
-    output_kfold="${OUTDIR}/${kfold}"
+    # Some usefull variable
+    CHAR_INFO="${DATADIR}/character_information.csv"
+    TRAIN_TYPE_LST="${NEW_LSTDIR}/train_${kfold}_type.lst"
+    VAL_TYPE_LST="${NEW_LSTDIR}/val_${kfold}_type.lst"
  
-    if [ ! -d "${output_kfold}" ];
+    # Configuration for the run clustering file
+    if [ ! ${VECTOR_FILES_ONE} ]
     then
-        mkdir -p ${output_kfold}
+        VECTOR_FILE="${VECTOR_FILES_BEGIN}_${kfold}${VECTOR_FILES_END}"
     fi
-        
  
+    TRAIN_LST="${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst"
+    VAL_LST="${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst"
+    EXP_DIR="${OUTDIR}/${kfold}"
+    METAS_TYPE="${NEW_LSTDIR}/metas_${kfold}_type.lst" #*
+    METAS_CHARACTER="${DATADIR}/masseffect.lst"
+
+    
+    
+    if [ ! -d "${EXP_DIR}" ];
+    then
+        mkdir -p ${EXP_DIR}
+    fi
+
+
     # Extract character information
     echo "Extracting character information"
     python3 "bin/replace_label.py" \
-        "${DATADIR}/masseffect.lst" \
-        "${DATADIR}/character_information.csv" \
-        --field "type" --lst "data/pvectors_1rst/lst/train_${kfold}.lst" \
-        --outfile "${NEW_LSTDIR}/train_${kfold}_type.lst"
-    
+        "${METAS_CHARACTER}" \
+        "${CHAR_INFO}" \
+        --field "type" \
+        --lst "${TRAIN_LST}" \
+        --outfile "${TRAIN_TYPE_LST}"
+
     python3 "bin/replace_label.py" \
-        "${DATADIR}/masseffect.lst" \
-        "${DATADIR}/character_information.csv" \
-        --field "type" --lst "data/pvectors_1rst/lst/val_${kfold}.lst" \
-        --outfile "${NEW_LSTDIR}/val_${kfold}_type.lst"
-    cat "${NEW_LSTDIR}/train_${kfold}_type.lst" "${NEW_LSTDIR}/val_${kfold}_type.lst" > "${NEW_LSTDIR}/metas_${kfold}_type.lst"
+        "${METAS_CHARACTER}" \
+        "${CHAR_INFO}" \
+        --field "type" \
+        --lst "${VAL_LST}" \
+        --outfile "${VAL_TYPE_LST}"
  
-    # -- TRAIN KMEANS 
-    echo "Clustering - ${kfold}"
-    python3 bin/cluster_kmeans.py "${pvector_file}" \
-        "${lst_dir}/train_${kfold}.lst" \
-        "${output_kfold}" --kmin ${kmin} --kmax ${kmax}
+    cat "${TRAIN_TYPE_LST}" "${VAL_TYPE_LST}" > "${METAS_TYPE}"
  
-    for k in $(seq ${kmin} 1 ${kmax})
-    do
-        # -- EXTRACT KMEANS VALUES
-        echo "Kmeans Measuring and extraction - ${k}"
-        python3 bin/extract_kmeans.py "${output_kfold}/${k}/clustering_${k}.pkl" \
-            "${pvector_file}" \
-            --outfile "${output_kfold}/${k}/clustered_${k}.txt"
-        
-        
-        # -- MEASURES AND PLOT WITH RESPECT TO CHARACTER VAR
-        # Measures
-        python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" "${pvector_file}" "${lst_dir}/train_${kfold}.lst" "${lst_dir}/val_${kfold}.lst" --outfile "${output_kfold}/${k}/measures.json"
-        
-        # Plot count matrix for train
-        python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
-        ${pvector_file} ${lst_dir}/train_${kfold}.lst \
-        --outfile ${output_kfold}/${k}/train_count_matrix.pdf
-        
-        # Plot count matrix for val
-        python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
-            ${pvector_file} ${lst_dir}/val_${kfold}.lst \
-            --outfile ${output_kfold}/${k}/val_count_matrix.pdf
+    source "run-clustering.sh"
+done
  
-        # Regroup measures with respect to character var
-        python3 bin/regroup-measures.py exp/kmeans_teacher_1/pvector-1/
+# Regroup measures with respect to character classes
+echo "Regrouping measures with respect to character classes"
+python3 "bin/regroup-measures.py" ${OUTDIR}
  
-        # -- MEASURES AND PLOT WITH RESPECT TO TYPE VAR
-        # Measures
-        python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" "${NEW_LSTDIR}/metas_${kfold}_type.lst" "${lst_dir}/train_${kfold}.lst" "${lst_dir}/val_${kfold}.lst" --outfile "${output_kfold}/${k}/measures_type.json"
-        
-        # This script plot the count matrix of the train set
-        python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
-        ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/train_${kfold}.lst \
-        --outfile ${output_kfold}/${k}/train_count_matrix_type.pdf
-        
-        # This script plot the count matrix of the validation set
-        python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
-        ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/val_${kfold}.lst \
-        --outfile ${output_kfold}/${k}/val_count_matrix_type.pdf
-
-        # Regroup measures with respect to type var 
-        python3 bin/regroup-measures.py exp/kmeans_teacher_1/pvector-1/ --suffix "_type" --measurefile "measures_type.j
-    done
-done
+# Regroup measures with respect to type classes
+echo "Regrouping measures with respect to type classes"
+python3 "bin/regroup-measures.py" ${OUTDIR} --suffix "_type" --measurefile "measures_type.json"
	1	+OUTDIR="exp/kmeans_euclidian/ivectors"
	2	+DATADIR="data"
	3	+NEW_LSTDIR="${OUTDIR}/lst"
	4	+
	5	+VECTOR_FILE="data/ivectors.txt" # To specify if there's only one
	6	+VECTOR_FILES_ONE=true # Specify there's only one file
	7	+
	8	+KMIN=2
	9	+KMAX=100
	1	+OUTDIR="exp/kmeans_euclidian/teacher-pvector-1"
	2	+DATADIR="data"
	3	+NEW_LSTDIR="${OUTDIR}/lst"
	4	+
	5	+VECTOR_FILES_BEGIN="${DATADIR}/pvectors_1rst/pvectors_teacher"
	6	+VECTOR_FILES_END=".txt"
	7	+VECTOR_FILE="" # To specify if there's only one
	8	+VECTOR_FILES_ONE=false # Specify there's only one file
	9	+
	10	+KMIN=2
	11	+KMAX=100
	1	+OUTDIR="exp/kmeans_euclidian/xvectors"
	2	+DATADIR="data"
	3	+NEW_LSTDIR="${OUTDIR}/lst"
	4	+
	5	+VECTOR_FILE="data/xvectors.txt" # To specify if there's only one
	6	+VECTOR_FILES_ONE=true # Specify there's only one file
	7	+
	8	+KMIN=2
	9	+KMAX=100
	1	+#
	2	+# This script aims to compute clustering
	3	+#
	4	+
	5	+
	6	+# -- CONFIGURATION
	7	+# THIS SCRIPT NEEDS THESE VARIABLES
	8	+# Vector file
	9	+#VECTOR_FILE=""
	10	+# Train list
	11	+#TRAIN_LST==""
	12	+# Val list
	13	+#VAL_LST=""
	14	+# Exp directory
	15	+#EXP_DIR=""
	16	+# Metas file with type values
	17	+#METAS_TYPE=""
	18	+# Metas file with character values
	19	+#METAS_CHARACTER=""
	20	+
	21	+
	22	+#echo "VECTOR FILE: $VECTOR_FILE"
	23	+#echo "TRAIN LIST: $TRAIN_LST"
	24	+#echo "VAL LIST: $VAL_LST"
	25	+#echo "EXP DIR: $EXP_DIR"
	26	+#echo "METAS TYPE: $METAS_TYPE"
	27	+#echo "METAS_CHARACTER: $METAS_CHARACTER"
	28	+
	29	+
	30	+
	31	+# -- TRAIN KMEANS
	32	+echo "Clustering - ${kfold}"
	33	+python3 bin/cluster_kmeans.py "${VECTOR_FILE}" \
	34	+ "${TRAIN_LST}" \
	35	+ "${EXP_DIR}" --kmin ${KMIN} --kmax ${KMAX}
	36	+
	37	+
	38	+
	39	+for k in $(seq ${KMIN} 1 ${KMAX})
	40	+do
	41	+ SUB_EXP_DIR="${EXP_DIR}/${k}"
	42	+
	43	+ # -- EXTRACT KMEANS VALUES
	44	+ echo "Kmeans Measuring and extraction - ${k}"
	45	+ python3 bin/extract_kmeans.py "${SUB_EXP_DIR}/clustering_${k}.pkl" \
	46	+ "${VECTOR_FILE}" \
	47	+ --outfile "${SUB_EXP_DIR}/clustered_${k}.txt"
	48	+ # -- MEASURES AND PLOT WITH RESPECT TO CHARACTER VAR
	49	+ # Measures
	50	+ python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
	51	+ "${METAS_CHARACTER}" \
	52	+ "${TRAIN_LST}" \
	53	+ "${VAL_LST}" \
	54	+ --outfile "${SUB_EXP_DIR}/measures.json"
	55	+
	56	+ # Plot count matrix for train
	57	+ python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
	58	+ ${VECTOR_FILE} \
	59	+ ${TRAIN_LST} \
	60	+ --outfile "${SUB_EXP_DIR}/train_count_matrix.pdf"
	61	+
	62	+ # Plot count matrix for val
	63	+ python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
	64	+ ${VECTOR_FILE} \
	65	+ ${VAL_LST} \
	66	+ --outfile "${SUB_EXP_DIR}/val_count_matrix.pdf"
	67	+
	68	+ # -- MEASURES AND PLOT WITH RESPECT TO TYPE VAR
	69	+ # Measures
	70	+ python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
	71	+ "${METAS_TYPE}" \
	72	+ "${TRAIN_LST}" \
	73	+ "${VAL_LST}" \
	74	+ --outfile "${SUB_EXP_DIR}/measures_type.json"
	75	+
	76	+ # This script plot the count matrix of the train set
	77	+ python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
	78	+ "${METAS_TYPE}" \
	79	+ "${TRAIN_LST}" \
	80	+ --outfile "${SUB_EXP_DIR}/train_count_matrix_type.pdf"
	81	+
	82	+ # This script plot the count matrix of the validation set
	83	+ python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
	84	+ "${METAS_TYPE}" \
	85	+ "${VAL_LST}" \
	86	+ --outfile "${SUB_EXP_DIR}/val_count_matrix_type.pdf"
	87	+
	88	+done
1		-# Pour le moment, le run ne fait qu'executer
2		-# quelques petites commandes que l'on souhaite
3		-# tester.
4	1
5		-OUTDIR="exp/kmeans_teacher_1/pvector-1"
6		-DATADIR="data"
7		-NEW_LSTDIR="${OUTDIR}/lst"
	2	+#OUTDIR="exp/test/pvector-2"
	3	+#DATADIR="data"
	4	+#NEW_LSTDIR="${OUTDIR}/lst"
8	5
9		-kmin=2
10		-kmax=100
	6	+#VECTOR_FILES_BEGIN="${DATADIR}/pvectors_1rst/pvectors_teacher"
	7	+#VECTOR_FILES_END=".txt"
	8	+#VECTOR_FILE="" # To specify if there's only one
	9	+#VECTOR_FILES_ONE=false # Specify there's only one file
11	10
	11	+#KMIN=2
	12	+#KMAX=100
	13	+
	14	+# -- LOAD CONFIG FILE
	15	+CONFIG_FILE="config.sh"
	16	+
	17	+if [ $# -eq 1 ]
	18	+then
	19	+ CONFIG_FILE="$1"
	20	+else
	21	+ echo "Need to have one and only one argument"
	22	+ exit -1
	23	+fi
	24	+
	25	+source $CONFIG_FILE
	26	+
	27	+# -- DEFAULTS VALUES CONFIGURATION
	28	+if [ -z "$VECTOR_FILES_ONE" ]
	29	+then
	30	+ VECTOR_FILES_ONE=false
	31	+fi
	32	+
	33	+
	34	+
	35	+# -- MAKE DIRECTORIES
12	36	if [ ! -d "$OUTDIR" ];
13	37	then
14	38	mkdir -p $OUTDIR
15	39
16	40
17	41
18	42
19	43
20	44
21	45
22	46
23	47
24	48
25	49
...	...	@@ -19,82 +43,61 @@
19	43	mkdir -p ${NEW_LSTDIR}
20	44	fi
21	45
22		-for kfold in 4 #..4}
	46	+
	47	+# -- BEGIN BY KFOLD
	48	+for kfold in {1..4}
23	49	do
24		- #echo "kfold = ${kfold}"
25		- pvector_file="${DATADIR}/pvectors_1rst/pvectors_teacher_${kfold}.txt"
26		- lst_dir="${DATADIR}/pvectors_1rst/lst"
27		- output_kfold="${OUTDIR}/${kfold}"
	50	+ # Some usefull variable
	51	+ CHAR_INFO="${DATADIR}/character_information.csv"
	52	+ TRAIN_TYPE_LST="${NEW_LSTDIR}/train_${kfold}_type.lst"
	53	+ VAL_TYPE_LST="${NEW_LSTDIR}/val_${kfold}_type.lst"
28	54
29		- if [ ! -d "${output_kfold}" ];
	55	+ # Configuration for the run clustering file
	56	+ if [ ! ${VECTOR_FILES_ONE} ]
30	57	then
31		- mkdir -p ${output_kfold}
	58	+ VECTOR_FILE="${VECTOR_FILES_BEGIN}_${kfold}${VECTOR_FILES_END}"
32	59	fi
33		-
34	60
	61	+ TRAIN_LST="${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst"
	62	+ VAL_LST="${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst"
	63	+ EXP_DIR="${OUTDIR}/${kfold}"
	64	+ METAS_TYPE="${NEW_LSTDIR}/metas_${kfold}_type.lst" #*
	65	+ METAS_CHARACTER="${DATADIR}/masseffect.lst"
	66	+
	67	+
	68	+
	69	+ if [ ! -d "${EXP_DIR}" ];
	70	+ then
	71	+ mkdir -p ${EXP_DIR}
	72	+ fi
	73	+
	74	+
35	75	# Extract character information
36	76	echo "Extracting character information"
37	77	python3 "bin/replace_label.py" \
38		- "${DATADIR}/masseffect.lst" \
39		- "${DATADIR}/character_information.csv" \
40		- --field "type" --lst "data/pvectors_1rst/lst/train_${kfold}.lst" \
41		- --outfile "${NEW_LSTDIR}/train_${kfold}_type.lst"
42		-
	78	+ "${METAS_CHARACTER}" \
	79	+ "${CHAR_INFO}" \
	80	+ --field "type" \
	81	+ --lst "${TRAIN_LST}" \
	82	+ --outfile "${TRAIN_TYPE_LST}"
	83	+
43	84	python3 "bin/replace_label.py" \
44		- "${DATADIR}/masseffect.lst" \
45		- "${DATADIR}/character_information.csv" \
46		- --field "type" --lst "data/pvectors_1rst/lst/val_${kfold}.lst" \
47		- --outfile "${NEW_LSTDIR}/val_${kfold}_type.lst"
48		- cat "${NEW_LSTDIR}/train_${kfold}_type.lst" "${NEW_LSTDIR}/val_${kfold}_type.lst" > "${NEW_LSTDIR}/metas_${kfold}_type.lst"
	85	+ "${METAS_CHARACTER}" \
	86	+ "${CHAR_INFO}" \
	87	+ --field "type" \
	88	+ --lst "${VAL_LST}" \
	89	+ --outfile "${VAL_TYPE_LST}"
49	90
50		- # -- TRAIN KMEANS
51		- echo "Clustering - ${kfold}"
52		- python3 bin/cluster_kmeans.py "${pvector_file}" \
53		- "${lst_dir}/train_${kfold}.lst" \
54		- "${output_kfold}" --kmin ${kmin} --kmax ${kmax}
	91	+ cat "${TRAIN_TYPE_LST}" "${VAL_TYPE_LST}" > "${METAS_TYPE}"
55	92
56		- for k in $(seq ${kmin} 1 ${kmax})
57		- do
58		- # -- EXTRACT KMEANS VALUES
59		- echo "Kmeans Measuring and extraction - ${k}"
60		- python3 bin/extract_kmeans.py "${output_kfold}/${k}/clustering_${k}.pkl" \
61		- "${pvector_file}" \
62		- --outfile "${output_kfold}/${k}/clustered_${k}.txt"
63		-
64		-
65		- # -- MEASURES AND PLOT WITH RESPECT TO CHARACTER VAR
66		- # Measures
67		- python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" "${pvector_file}" "${lst_dir}/train_${kfold}.lst" "${lst_dir}/val_${kfold}.lst" --outfile "${output_kfold}/${k}/measures.json"
68		-
69		- # Plot count matrix for train
70		- python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
71		- ${pvector_file} ${lst_dir}/train_${kfold}.lst \
72		- --outfile ${output_kfold}/${k}/train_count_matrix.pdf
73		-
74		- # Plot count matrix for val
75		- python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
76		- ${pvector_file} ${lst_dir}/val_${kfold}.lst \
77		- --outfile ${output_kfold}/${k}/val_count_matrix.pdf
	93	+ source "run-clustering.sh"
	94	+done
78	95
79		- # Regroup measures with respect to character var
80		- python3 bin/regroup-measures.py exp/kmeans_teacher_1/pvector-1/
	96	+# Regroup measures with respect to character classes
	97	+echo "Regrouping measures with respect to character classes"
	98	+python3 "bin/regroup-measures.py" ${OUTDIR}
81	99
82		- # -- MEASURES AND PLOT WITH RESPECT TO TYPE VAR
83		- # Measures
84		- python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" "${NEW_LSTDIR}/metas_${kfold}_type.lst" "${lst_dir}/train_${kfold}.lst" "${lst_dir}/val_${kfold}.lst" --outfile "${output_kfold}/${k}/measures_type.json"
85		-
86		- # This script plot the count matrix of the train set
87		- python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
88		- ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/train_${kfold}.lst \
89		- --outfile ${output_kfold}/${k}/train_count_matrix_type.pdf
90		-
91		- # This script plot the count matrix of the validation set
92		- python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
93		- ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/val_${kfold}.lst \
94		- --outfile ${output_kfold}/${k}/val_count_matrix_type.pdf
95		-
96		- # Regroup measures with respect to type var
97		- python3 bin/regroup-measures.py exp/kmeans_teacher_1/pvector-1/ --suffix "_type" --measurefile "measures_type.j
98		- done
99		-done
	100	+# Regroup measures with respect to type classes
	101	+echo "Regrouping measures with respect to type classes"
	102	+python3 "bin/regroup-measures.py" ${OUTDIR} --suffix "_type" --measurefile "measures_type.json"