maj. No comment

Mathias Quillot
1 parent 0ab563604a
Showing 7 changed files with 135 additions and 29 deletions Side-by-side Diff
bin/replace_label_lst.py
config/pv_from_xv_config.sh
config/pvector_config.sh
extract-labels.sh
run-clustering.sh
run-measures.sh
run.sh
+
+import argparse
+
+parser = argparse.ArgumentParser(description="extract label from lst file, move a label in fact")
+
+# Framework configuration
+OUTDIR="exp/kmeans_euclidian/pv_from_xv"
+DATADIR="data"
+NEW_LSTDIR="${OUTDIR}/lst"
+
+VECTOR_FILES_BEGIN="${DATADIR}/pv_from_xv/me_pv_teacher"
+VECTOR_FILES_END=".txt"
+VECTOR_FILE="" # To specify if there's only one
+VECTOR_FILES_ONE=false # Specify there's only one file
+
+KMIN=2
+KMAX=100
+
 OUTDIR="exp/kmeans_euclidian/teacher-pvector-1"
 DATADIR="data"
 NEW_LSTDIR="${OUTDIR}/lst"
@@ -2,20 +2,20 @@
  
 # Number of set
 k=4
+kmean=88
  
+
 # Vector features file
-VECTOR_FILE_MASSEFFECT="data/pvectors_1rst/pvectors_teacher_${k}.txt"
+VECTOR_FILE_MASSEFFECT="data/xvectors.txt"
  
-# Number of clusters
-kmean=6
  
 # Dirs
-EXP_DIR="exp/kmeans_euclidian/teacher-pvector-1/${k}/${kmean}"
+EXP_DIR="exp/kmeans_euclidian/xvectors/${k}/${kmean}"
 CLUSTERING="${EXP_DIR}/clustering_${kmean}.pkl"
  
  
 # Output dirs
-OUTFILE_MASSEFFECT="data/pvectors_1rst/saved_clustered/masseffect_clustered_${k}_${kmean}.txt"
+OUTFILE_MASSEFFECT="data/xvectors/saved_clustered/masseffect_clustered_xvectors_${k}_${kmean}.txt"
  
 python3 bin/extract_kmeans.py "${CLUSTERING}" \
         "${VECTOR_FILE_MASSEFFECT}" \
@@ -84,6 +84,27 @@
         "${METAS_TYPE}" \
         "${VAL_LST}" \
         --outfile "${SUB_EXP_DIR}/val_count_matrix_type.pdf"
-
+   
+ 
+    # -- MEASURES AND PLOT WITH RESPECT TO LANG VAR
+    # Measures
+    python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+        "${METAS_LANG}" \
+        "${TRAIN_LST}" \
+        "${VAL_LST}" \
+        --outfile "${SUB_EXP_DIR}/measures_lang.json"
+        
+    # This script plot the count matrix of the train set
+    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+        "${METAS_LANG}" \
+        "${TRAIN_LST}" \
+        --outfile "${SUB_EXP_DIR}/train_count_matrix_lang.pdf"
+        
+    # This script plot the count matrix of the validation set
+    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+        "${METAS_LANG}" \
+        "${VAL_LST}" \
+        --outfile "${SUB_EXP_DIR}/val_count_matrix_lang.pdf"
+    
 done
@@ -2,7 +2,8 @@
 # quelques petites commandes que l'on souhaite
 # tester.
  
-OUTDIR="exp/kmeans_teacher_1/pvector-1"
+OUTDIR="exp/kmeans_euclidian/teacher-pvector-1"
+EXP_DIR=${OUTDIR}
 DATADIR="data"
 NEW_LSTDIR="${OUTDIR}/lst"
  
@@ -22,6 +23,7 @@
 for kfold in {1..4}
 do
     pvector_file="${DATADIR}/pvectors_1rst/pvectors_teacher_${kfold}.txt"
+    VECTOR_FILE=$pvector_file
     lst_dir="${DATADIR}/pvectors_1rst/lst"
     output_kfold="${OUTDIR}/${kfold}"
  
  
  
  
  
  
  
@@ -38,41 +40,79 @@
     #    --outfile "${NEW_LSTDIR}/val_${kfold}_type.lst"
  
     #cat "${NEW_LSTDIR}/train_${kfold}_type.lst" "${NEW_LSTDIR}/val_${kfold}_type.lst" > "${NEW_LSTDIR}/metas_${kfold}_type.lst"
+    TRAIN_LST=${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst
+    VAL_LST=${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst
+    TRAIN_LANG_LST=${NEW_LSTDIR}/train_${kfold}_lang.lst
+    VAL_LANG_LST=${NEW_LSTDIR}/val_${kfold}_lang.lst
+    METAS_LANG=${NEW_LSTDIR}/metas_${kfold}_lang.lst
+     
+    # EXTRACT LANGUAGE INFORMATION
+    awk '$2=$1' FS=, OFS=, ${TRAIN_LST} > ${TRAIN_LANG_LST}
+    echo "VAL EXTRACT LANGUAGE INFO DONE"
+    awk '$2=$1' FS=, OFS=, ${VAL_LST} > ${VAL_LANG_LST}
+    echo "TRAIN EXTRACT LANGUAGE INFO DONE"
+    cat "${TRAIN_LANG_LST}" "${VAL_LANG_LST}" > "${METAS_LANG}"
+    echo "GLOBAL EXTRACT LANGUAGE INFO DONE"
+  
  
- 
     echo "Clustering - ${kfold}"
  
     for k in $(seq ${kmin} 1 ${kmax})
     do
         echo "Kmeans Measuring and ploting - ${k}"
-	
-	    # This script compute measures from clustering   
-        #python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" "${pvector_file}" "${lst_dir}/train_${kfold}.lst" "${lst_dir}/val_${kfold}.lst" --outfile "${output_kfold}/${k}/measures.json"
-        
+
+	SUB_EXP_DIR="${EXP_DIR}/${kfold}/${k}"
+
+	# -- EXTRACT CLUSTERING LABELS
+	python3 bin/extract_kmeans.py "${SUB_EXP_DIR}/clustering_${k}.pkl" \
+        "${VECTOR_FILE}" \
+        --outfile "${SUB_EXP_DIR}/clustered_${k}.txt"
+
+	# -- MEASURES AND PLOT WITH RESPECT TO LANG VAR
+        # Measures
+        python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+            "${METAS_LANG}" \
+            "${TRAIN_LST}" \
+            "${VAL_LST}" \
+            --outfile "${SUB_EXP_DIR}/measures_lang.json"
+
+        # This script plot the count matrix of the train set
+        python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+            "${METAS_LANG}" \
+            "${TRAIN_LST}" \
+            --outfile "${SUB_EXP_DIR}/train_count_matrix_lang.pdf"
+
+        # This script plot the count matrix of the validation set
+        python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+            "${METAS_LANG}" \
+            "${VAL_LST}" \
+             --outfile "${SUB_EXP_DIR}/val_count_matrix_lang.pdf"
+
+	rm ${SUB_EXP_DIR}/clustered_${k}.txt
         #python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" \
         #    "${NEW_LSTDIR}/metas_${kfold}_type.lst" "${lst_dir}/train_${kfold}.lst" \
         #    "${lst_dir}/val_${kfold}.lst" \
         #    --outfile "${output_kfold}/${k}/measures_type.json"
  
         # This script plot the count matrix of the train set
-        python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
-            ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/train_${kfold}.lst \
-            --outfile ${output_kfold}/${k}/train_count_matrix_type.pdf
+        #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
+        #    ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/train_${kfold}.lst \
+        #    --outfile ${output_kfold}/${k}/train_count_matrix_type.pdf
  
         # This script plot the count matrix of the validation set
-        python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
-            ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/val_${kfold}.lst \
-            --outfile ${output_kfold}/${k}/val_count_matrix_type.pdf
+        #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
+        #    ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/val_${kfold}.lst \
+        #    --outfile ${output_kfold}/${k}/val_count_matrix_type.pdf
  
         # This script plot the count matrix of the train set
-        python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
-            ${pvector_file} ${lst_dir}/train_${kfold}.lst \
-            --outfile ${output_kfold}/${k}/train_count_matrix.pdf
+        #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
+        #    ${pvector_file} ${lst_dir}/train_${kfold}.lst \
+        #    --outfile ${output_kfold}/${k}/train_count_matrix.pdf
  
         # This script plot the count matrix of the validation set
-        python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
-            ${pvector_file} ${lst_dir}/val_${kfold}.lst \
-            --outfile ${output_kfold}/${k}/val_count_matrix.pdf
+        #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
+        #    ${pvector_file} ${lst_dir}/val_${kfold}.lst \
+        #    --outfile ${output_kfold}/${k}/val_count_matrix.pdf
     done
 done
@@ -44,13 +44,26 @@
 fi
  
  
+# -- KFOLD MIN and MAX
+if [ -z "$MIN_KFOLD" ]
+then
+    MIN_KFOLD=1
+fi
+
+if [ -z "$MAX_KFOLD" ]
+then
+    MAX_KFOLD=4
+fi
+
 # -- BEGIN BY KFOLD
-for kfold in {1..4}
+for kfold in $(seq ${MIN_KFOLD} ${MAX_KFOLD})
 do
     # Some usefull variable
     CHAR_INFO="${DATADIR}/character_information.csv"
     TRAIN_TYPE_LST="${NEW_LSTDIR}/train_${kfold}_type.lst"
     VAL_TYPE_LST="${NEW_LSTDIR}/val_${kfold}_type.lst"
+    TRAIN_LANG_LST="${NEW_LSTDIR}/train_${kfold}_lang.lst"
+    VAL_LANG_LST="${NEW_LSTDIR}/val_${kfold}_lang.lst"
  
     # Configuration for the run clustering file
     if [ ${VECTOR_FILES_ONE} == false ]
  
@@ -61,9 +74,9 @@
     TRAIN_LST="${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst"
     VAL_LST="${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst"
     EXP_DIR="${OUTDIR}/${kfold}"
-    METAS_TYPE="${NEW_LSTDIR}/metas_${kfold}_type.lst" #*
+    METAS_TYPE="${NEW_LSTDIR}/metas_${kfold}_type.lst"
     METAS_CHARACTER="${DATADIR}/masseffect.lst"
-
+    METAS_LANG="${NEW_LSTDIR}/metas_${kfold}_lang.lst"
  
  
     if [ ! -d "${EXP_DIR}" ];
  
@@ -72,8 +85,9 @@
     fi
  
  
-    # Extract character information
+    # EXTRACT TYPE INFORMATION
     echo "Extracting character information"
+    echo "Replace in train"
     python3 "bin/replace_label.py" \
         "${METAS_CHARACTER}" \
         "${CHAR_INFO}" \
@@ -81,6 +95,7 @@
         --lst "${TRAIN_LST}" \
         --outfile "${TRAIN_TYPE_LST}"
  
+    echo "Replace in val"
     python3 "bin/replace_label.py" \
         "${METAS_CHARACTER}" \
         "${CHAR_INFO}" \
  
@@ -88,8 +103,19 @@
         --lst "${VAL_LST}" \
         --outfile "${VAL_TYPE_LST}"
  
+    echo "Merge them"
     cat "${TRAIN_TYPE_LST}" "${VAL_TYPE_LST}" > "${METAS_TYPE}"
-
+    
+    # EXTRACT LANGUAGE INFORMATION
+    echo "Language info for train"
+    awk '$2=$1' FS=, OFS=, ${TRAIN_LST} > ${TRAIN_LANG_LST}
+    echo "Language info for val"
+    awk '$2=$1' FS=, OFS=, ${VAL_LST} > ${VAL_LANG_LST}   
+    
+    echo "Merge them"
+    cat "${TRAIN_LANG_LST}" "${VAL_LANG_LST}" > "${METAS_LANG}"
+    
+    echo "Then Run Clustering"
     source "run-clustering.sh"
 done
	1	+
	2	+import argparse
	3	+
	4	+parser = argparse.ArgumentParser(description="extract label from lst file, move a label in fact")
	1	+
	2	+# Framework configuration
	3	+OUTDIR="exp/kmeans_euclidian/pv_from_xv"
	4	+DATADIR="data"
	5	+NEW_LSTDIR="${OUTDIR}/lst"
	6	+
	7	+VECTOR_FILES_BEGIN="${DATADIR}/pv_from_xv/me_pv_teacher"
	8	+VECTOR_FILES_END=".txt"
	9	+VECTOR_FILE="" # To specify if there's only one
	10	+VECTOR_FILES_ONE=false # Specify there's only one file
	11	+
	12	+KMIN=2
	13	+KMAX=100
	1	+
1	2	OUTDIR="exp/kmeans_euclidian/teacher-pvector-1"
2	3	DATADIR="data"
3	4	NEW_LSTDIR="${OUTDIR}/lst"
...	...	@@ -2,20 +2,20 @@
2	2
3	3	# Number of set
4	4	k=4
	5	+kmean=88
5	6
	7	+
6	8	# Vector features file
7		-VECTOR_FILE_MASSEFFECT="data/pvectors_1rst/pvectors_teacher_${k}.txt"
	9	+VECTOR_FILE_MASSEFFECT="data/xvectors.txt"
8	10
9		-# Number of clusters
10		-kmean=6
11	11
12	12	# Dirs
13		-EXP_DIR="exp/kmeans_euclidian/teacher-pvector-1/${k}/${kmean}"
	13	+EXP_DIR="exp/kmeans_euclidian/xvectors/${k}/${kmean}"
14	14	CLUSTERING="${EXP_DIR}/clustering_${kmean}.pkl"
15	15
16	16
17	17	# Output dirs
18		-OUTFILE_MASSEFFECT="data/pvectors_1rst/saved_clustered/masseffect_clustered_${k}_${kmean}.txt"
	18	+OUTFILE_MASSEFFECT="data/xvectors/saved_clustered/masseffect_clustered_xvectors_${k}_${kmean}.txt"
19	19
20	20	python3 bin/extract_kmeans.py "${CLUSTERING}" \
21	21	"${VECTOR_FILE_MASSEFFECT}" \
...	...	@@ -84,6 +84,27 @@
84	84	"${METAS_TYPE}" \
85	85	"${VAL_LST}" \
86	86	--outfile "${SUB_EXP_DIR}/val_count_matrix_type.pdf"
87		-
	87	+
	88	+
	89	+ # -- MEASURES AND PLOT WITH RESPECT TO LANG VAR
	90	+ # Measures
	91	+ python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
	92	+ "${METAS_LANG}" \
	93	+ "${TRAIN_LST}" \
	94	+ "${VAL_LST}" \
	95	+ --outfile "${SUB_EXP_DIR}/measures_lang.json"
	96	+
	97	+ # This script plot the count matrix of the train set
	98	+ python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
	99	+ "${METAS_LANG}" \
	100	+ "${TRAIN_LST}" \
	101	+ --outfile "${SUB_EXP_DIR}/train_count_matrix_lang.pdf"
	102	+
	103	+ # This script plot the count matrix of the validation set
	104	+ python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
	105	+ "${METAS_LANG}" \
	106	+ "${VAL_LST}" \
	107	+ --outfile "${SUB_EXP_DIR}/val_count_matrix_lang.pdf"
	108	+
88	109	done
...	...	@@ -2,7 +2,8 @@
2	2	# quelques petites commandes que l'on souhaite
3	3	# tester.
4	4
5		-OUTDIR="exp/kmeans_teacher_1/pvector-1"
	5	+OUTDIR="exp/kmeans_euclidian/teacher-pvector-1"
	6	+EXP_DIR=${OUTDIR}
6	7	DATADIR="data"
7	8	NEW_LSTDIR="${OUTDIR}/lst"
8	9
...	...	@@ -22,6 +23,7 @@
22	23	for kfold in {1..4}
23	24	do
24	25	pvector_file="${DATADIR}/pvectors_1rst/pvectors_teacher_${kfold}.txt"
	26	+ VECTOR_FILE=$pvector_file
25	27	lst_dir="${DATADIR}/pvectors_1rst/lst"
26	28	output_kfold="${OUTDIR}/${kfold}"
27	29
28	30
29	31
30	32
31	33
32	34
33	35
...	...	@@ -38,41 +40,79 @@
38	40	# --outfile "${NEW_LSTDIR}/val_${kfold}_type.lst"
39	41
40	42	#cat "${NEW_LSTDIR}/train_${kfold}_type.lst" "${NEW_LSTDIR}/val_${kfold}_type.lst" > "${NEW_LSTDIR}/metas_${kfold}_type.lst"
	43	+ TRAIN_LST=${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst
	44	+ VAL_LST=${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst
	45	+ TRAIN_LANG_LST=${NEW_LSTDIR}/train_${kfold}_lang.lst
	46	+ VAL_LANG_LST=${NEW_LSTDIR}/val_${kfold}_lang.lst
	47	+ METAS_LANG=${NEW_LSTDIR}/metas_${kfold}_lang.lst
	48	+
	49	+ # EXTRACT LANGUAGE INFORMATION
	50	+ awk '$2=$1' FS=, OFS=, ${TRAIN_LST} > ${TRAIN_LANG_LST}
	51	+ echo "VAL EXTRACT LANGUAGE INFO DONE"
	52	+ awk '$2=$1' FS=, OFS=, ${VAL_LST} > ${VAL_LANG_LST}
	53	+ echo "TRAIN EXTRACT LANGUAGE INFO DONE"
	54	+ cat "${TRAIN_LANG_LST}" "${VAL_LANG_LST}" > "${METAS_LANG}"
	55	+ echo "GLOBAL EXTRACT LANGUAGE INFO DONE"
	56	+
41	57
42		-
43	58	echo "Clustering - ${kfold}"
44	59
45	60	for k in $(seq ${kmin} 1 ${kmax})
46	61	do
47	62	echo "Kmeans Measuring and ploting - ${k}"
48		-
49		- # This script compute measures from clustering
50		- #python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" "${pvector_file}" "${lst_dir}/train_${kfold}.lst" "${lst_dir}/val_${kfold}.lst" --outfile "${output_kfold}/${k}/measures.json"
51		-
	63	+
	64	+ SUB_EXP_DIR="${EXP_DIR}/${kfold}/${k}"
	65	+
	66	+ # -- EXTRACT CLUSTERING LABELS
	67	+ python3 bin/extract_kmeans.py "${SUB_EXP_DIR}/clustering_${k}.pkl" \
	68	+ "${VECTOR_FILE}" \
	69	+ --outfile "${SUB_EXP_DIR}/clustered_${k}.txt"
	70	+
	71	+ # -- MEASURES AND PLOT WITH RESPECT TO LANG VAR
	72	+ # Measures
	73	+ python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
	74	+ "${METAS_LANG}" \
	75	+ "${TRAIN_LST}" \
	76	+ "${VAL_LST}" \
	77	+ --outfile "${SUB_EXP_DIR}/measures_lang.json"
	78	+
	79	+ # This script plot the count matrix of the train set
	80	+ python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
	81	+ "${METAS_LANG}" \
	82	+ "${TRAIN_LST}" \
	83	+ --outfile "${SUB_EXP_DIR}/train_count_matrix_lang.pdf"
	84	+
	85	+ # This script plot the count matrix of the validation set
	86	+ python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
	87	+ "${METAS_LANG}" \
	88	+ "${VAL_LST}" \
	89	+ --outfile "${SUB_EXP_DIR}/val_count_matrix_lang.pdf"
	90	+
	91	+ rm ${SUB_EXP_DIR}/clustered_${k}.txt
52	92	#python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" \
53	93	# "${NEW_LSTDIR}/metas_${kfold}_type.lst" "${lst_dir}/train_${kfold}.lst" \
54	94	# "${lst_dir}/val_${kfold}.lst" \
55	95	# --outfile "${output_kfold}/${k}/measures_type.json"
56	96
57	97	# This script plot the count matrix of the train set
58		- python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
59		- ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/train_${kfold}.lst \
60		- --outfile ${output_kfold}/${k}/train_count_matrix_type.pdf
	98	+ #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
	99	+ # ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/train_${kfold}.lst \
	100	+ # --outfile ${output_kfold}/${k}/train_count_matrix_type.pdf
61	101
62	102	# This script plot the count matrix of the validation set
63		- python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
64		- ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/val_${kfold}.lst \
65		- --outfile ${output_kfold}/${k}/val_count_matrix_type.pdf
	103	+ #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
	104	+ # ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/val_${kfold}.lst \
	105	+ # --outfile ${output_kfold}/${k}/val_count_matrix_type.pdf
66	106
67	107	# This script plot the count matrix of the train set
68		- python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
69		- ${pvector_file} ${lst_dir}/train_${kfold}.lst \
70		- --outfile ${output_kfold}/${k}/train_count_matrix.pdf
	108	+ #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
	109	+ # ${pvector_file} ${lst_dir}/train_${kfold}.lst \
	110	+ # --outfile ${output_kfold}/${k}/train_count_matrix.pdf
71	111
72	112	# This script plot the count matrix of the validation set
73		- python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
74		- ${pvector_file} ${lst_dir}/val_${kfold}.lst \
75		- --outfile ${output_kfold}/${k}/val_count_matrix.pdf
	113	+ #python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
	114	+ # ${pvector_file} ${lst_dir}/val_${kfold}.lst \
	115	+ # --outfile ${output_kfold}/${k}/val_count_matrix.pdf
76	116	done
77	117	done
...	...	@@ -44,13 +44,26 @@
44	44	fi
45	45
46	46
	47	+# -- KFOLD MIN and MAX
	48	+if [ -z "$MIN_KFOLD" ]
	49	+then
	50	+ MIN_KFOLD=1
	51	+fi
	52	+
	53	+if [ -z "$MAX_KFOLD" ]
	54	+then
	55	+ MAX_KFOLD=4
	56	+fi
	57	+
47	58	# -- BEGIN BY KFOLD
48		-for kfold in {1..4}
	59	+for kfold in $(seq ${MIN_KFOLD} ${MAX_KFOLD})
49	60	do
50	61	# Some usefull variable
51	62	CHAR_INFO="${DATADIR}/character_information.csv"
52	63	TRAIN_TYPE_LST="${NEW_LSTDIR}/train_${kfold}_type.lst"
53	64	VAL_TYPE_LST="${NEW_LSTDIR}/val_${kfold}_type.lst"
	65	+ TRAIN_LANG_LST="${NEW_LSTDIR}/train_${kfold}_lang.lst"
	66	+ VAL_LANG_LST="${NEW_LSTDIR}/val_${kfold}_lang.lst"
54	67
55	68	# Configuration for the run clustering file
56	69	if [ ${VECTOR_FILES_ONE} == false ]
57	70
...	...	@@ -61,9 +74,9 @@
61	74	TRAIN_LST="${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst"
62	75	VAL_LST="${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst"
63	76	EXP_DIR="${OUTDIR}/${kfold}"
64		- METAS_TYPE="${NEW_LSTDIR}/metas_${kfold}_type.lst" #*
	77	+ METAS_TYPE="${NEW_LSTDIR}/metas_${kfold}_type.lst"
65	78	METAS_CHARACTER="${DATADIR}/masseffect.lst"
66		-
	79	+ METAS_LANG="${NEW_LSTDIR}/metas_${kfold}_lang.lst"
67	80
68	81
69	82	if [ ! -d "${EXP_DIR}" ];
70	83
...	...	@@ -72,8 +85,9 @@
72	85	fi
73	86
74	87
75		- # Extract character information
	88	+ # EXTRACT TYPE INFORMATION
76	89	echo "Extracting character information"
	90	+ echo "Replace in train"
77	91	python3 "bin/replace_label.py" \
78	92	"${METAS_CHARACTER}" \
79	93	"${CHAR_INFO}" \
...	...	@@ -81,6 +95,7 @@
81	95	--lst "${TRAIN_LST}" \
82	96	--outfile "${TRAIN_TYPE_LST}"
83	97
	98	+ echo "Replace in val"
84	99	python3 "bin/replace_label.py" \
85	100	"${METAS_CHARACTER}" \
86	101	"${CHAR_INFO}" \
87	102
...	...	@@ -88,8 +103,19 @@
88	103	--lst "${VAL_LST}" \
89	104	--outfile "${VAL_TYPE_LST}"
90	105
	106	+ echo "Merge them"
91	107	cat "${TRAIN_TYPE_LST}" "${VAL_TYPE_LST}" > "${METAS_TYPE}"
92		-
	108	+
	109	+ # EXTRACT LANGUAGE INFORMATION
	110	+ echo "Language info for train"
	111	+ awk '$2=$1' FS=, OFS=, ${TRAIN_LST} > ${TRAIN_LANG_LST}
	112	+ echo "Language info for val"
	113	+ awk '$2=$1' FS=, OFS=, ${VAL_LST} > ${VAL_LANG_LST}
	114	+
	115	+ echo "Merge them"
	116	+ cat "${TRAIN_LANG_LST}" "${VAL_LANG_LST}" > "${METAS_LANG}"
	117	+
	118	+ echo "Then Run Clustering"
93	119	source "run-clustering.sh"
94	120	done
95	121