Quillot Mathias / Clustering

Browse Code »

Commit 95142dfdc54218f17529b6757ed7f310811b9534

Authored by Mathias Quillot 2019-10-29 10:19:25 +0100

1 parent 0ab563604a

Exists in master

maj. No comment

Showing 7 changed files with 135 additions and 29 deletions Inline Diff

bin/replace_label_lst.py
config/pv_from_xv_config.sh
config/pvector_config.sh
extract-labels.sh
run-clustering.sh
run-measures.sh
run.sh

bin/replace_label_lst.py

Diff comments View file @ 95142df

File was created	1
	2	import argparse
	3
	4	parser = argparse.ArgumentParser(description="extract label from lst file, move a label in fact")
	5
	6

config/pv_from_xv_config.sh

Diff comments View file @ 95142df

File was created	1
	2	# Framework configuration
	3	OUTDIR="exp/kmeans_euclidian/pv_from_xv"
	4	DATADIR="data"
	5	NEW_LSTDIR="${OUTDIR}/lst"
	6
	7	VECTOR_FILES_BEGIN="${DATADIR}/pv_from_xv/me_pv_teacher"
	8	VECTOR_FILES_END=".txt"
	9	VECTOR_FILE="" # To specify if there's only one
	10	VECTOR_FILES_ONE=false # Specify there's only one file
	11
	12	KMIN=2
	13	KMAX=100
	14

config/pvector_config.sh

Diff comments View file @ 95142df

		1
1	OUTDIR="exp/kmeans_euclidian/teacher-pvector-1"	2	OUTDIR="exp/kmeans_euclidian/teacher-pvector-1"
2	DATADIR="data"	3	DATADIR="data"
3	NEW_LSTDIR="${OUTDIR}/lst"	4	NEW_LSTDIR="${OUTDIR}/lst"
4		5
5	VECTOR_FILES_BEGIN="${DATADIR}/pvectors_1rst/pvectors_teacher"	6	VECTOR_FILES_BEGIN="${DATADIR}/pvectors_1rst/pvectors_teacher"
6	VECTOR_FILES_END=".txt"	7	VECTOR_FILES_END=".txt"
7	VECTOR_FILE="" # To specify if there's only one	8	VECTOR_FILE="" # To specify if there's only one
8	VECTOR_FILES_ONE=false # Specify there's only one file	9	VECTOR_FILES_ONE=false # Specify there's only one file
9		10
10	KMIN=2	11	KMIN=2
11	KMAX=100	12	KMAX=100
12		13

extract-labels.sh

Diff comments View file @ 95142df

 # Number of set
 k=4
+kmean=88
 # Vector features file
-VECTOR_FILE_MASSEFFECT="data/pvectors_1rst/pvectors_teacher_${k}.txt"
+VECTOR_FILE_MASSEFFECT="data/xvectors.txt"
-# Number of clusters
-kmean=6
 # Dirs
-EXP_DIR="exp/kmeans_euclidian/teacher-pvector-1/${k}/${kmean}"
+EXP_DIR="exp/kmeans_euclidian/xvectors/${k}/${kmean}"
 CLUSTERING="${EXP_DIR}/clustering_${kmean}.pkl"
 # Output dirs
-OUTFILE_MASSEFFECT="data/pvectors_1rst/saved_clustered/masseffect_clustered_${k}_${kmean}.txt"
+OUTFILE_MASSEFFECT="data/xvectors/saved_clustered/masseffect_clustered_xvectors_${k}_${kmean}.txt"
 python3 bin/extract_kmeans.py "${CLUSTERING}" \
         "${VECTOR_FILE_MASSEFFECT}" \

run-clustering.sh

Diff comments View file @ 95142df

 #
 # This script aims to compute clustering
 #
 # -- CONFIGURATION
 # THIS SCRIPT NEEDS THESE VARIABLES
 # Vector file
 #VECTOR_FILE=""
 # Train list
 #TRAIN_LST==""
 # Val list
 #VAL_LST=""
 # Exp directory
 #EXP_DIR=""
 # Metas file with type values
 #METAS_TYPE=""
 # Metas file with character values
 #METAS_CHARACTER=""
 #echo "VECTOR FILE: $VECTOR_FILE"
 #echo "TRAIN LIST: $TRAIN_LST"
 #echo "VAL LIST: $VAL_LST"
 #echo "EXP DIR: $EXP_DIR"
 #echo "METAS TYPE: $METAS_TYPE"
 #echo "METAS_CHARACTER: $METAS_CHARACTER"
 # -- TRAIN KMEANS
 echo "Clustering - ${kfold}"
 python3 bin/cluster_kmeans.py "${VECTOR_FILE}" \
     "${TRAIN_LST}" \
     "${EXP_DIR}" --kmin ${KMIN} --kmax ${KMAX}
 for k in $(seq ${KMIN} 1 ${KMAX})
 do
     SUB_EXP_DIR="${EXP_DIR}/${k}"
     # -- EXTRACT KMEANS VALUES
     echo "Kmeans Measuring and extraction - ${k}"
     python3 bin/extract_kmeans.py "${SUB_EXP_DIR}/clustering_${k}.pkl" \
         "${VECTOR_FILE}" \
         --outfile "${SUB_EXP_DIR}/clustered_${k}.txt"
     # -- MEASURES AND PLOT WITH RESPECT TO CHARACTER VAR
     # Measures
     python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
         "${METAS_CHARACTER}" \
         "${TRAIN_LST}" \
         "${VAL_LST}" \
         --outfile "${SUB_EXP_DIR}/measures.json"
     # Plot count matrix for train
     python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
         ${VECTOR_FILE} \
         ${TRAIN_LST} \
         --outfile "${SUB_EXP_DIR}/train_count_matrix.pdf"
     # Plot count matrix for val
     python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
         ${VECTOR_FILE} \
         ${VAL_LST} \
         --outfile "${SUB_EXP_DIR}/val_count_matrix.pdf"
     # -- MEASURES AND PLOT WITH RESPECT TO TYPE VAR
     # Measures
     python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
         "${METAS_TYPE}" \
         "${TRAIN_LST}" \
         "${VAL_LST}" \
         --outfile "${SUB_EXP_DIR}/measures_type.json"
     # This script plot the count matrix of the train set
     python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
         "${METAS_TYPE}" \
         "${TRAIN_LST}" \
         --outfile "${SUB_EXP_DIR}/train_count_matrix_type.pdf"
     # This script plot the count matrix of the validation set
     python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
         "${METAS_TYPE}" \
         "${VAL_LST}" \
         --outfile "${SUB_EXP_DIR}/val_count_matrix_type.pdf"
+    # -- MEASURES AND PLOT WITH RESPECT TO LANG VAR
+    # Measures
+    python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+        "${METAS_LANG}" \
+        "${TRAIN_LST}" \
+        "${VAL_LST}" \
+        --outfile "${SUB_EXP_DIR}/measures_lang.json"
+    # This script plot the count matrix of the train set
+    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+        "${METAS_LANG}" \
+        "${TRAIN_LST}" \
+        --outfile "${SUB_EXP_DIR}/train_count_matrix_lang.pdf"
+    # This script plot the count matrix of the validation set
+    python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
+        "${METAS_LANG}" \
+        "${VAL_LST}" \
+        --outfile "${SUB_EXP_DIR}/val_count_matrix_lang.pdf"
 done

run-measures.sh

Diff comments View file @ 95142df

1	# Pour le moment, le run ne fait qu'executer	1	# Pour le moment, le run ne fait qu'executer
2	# quelques petites commandes que l'on souhaite	2	# quelques petites commandes que l'on souhaite
3	# tester.	3	# tester.
4		4
5	OUTDIR="exp/kmeans_teacher_1/pvector-1"	5	OUTDIR="exp/kmeans_euclidian/teacher-pvector-1"
		6	EXP_DIR=${OUTDIR}
6	DATADIR="data"	7	DATADIR="data"
7	NEW_LSTDIR="${OUTDIR}/lst"	8	NEW_LSTDIR="${OUTDIR}/lst"
8		9
9	kmin=2	10	kmin=2
10	kmax=100	11	kmax=100
11		12
12	if [ ! -d "$OUTDIR" ];	13	if [ ! -d "$OUTDIR" ];
13	then	14	then
14	mkdir -p $OUTDIR	15	mkdir -p $OUTDIR
15	fi	16	fi
16		17
17	if [ ! -d "$NEW_LSTDIR" ];	18	if [ ! -d "$NEW_LSTDIR" ];
18	then	19	then
19	mkdir -p $NEW_LSTDIR	20	mkdir -p $NEW_LSTDIR
20	fi	21	fi
21		22
22	for kfold in {1..4}	23	for kfold in {1..4}
23	do	24	do
24	pvector_file="${DATADIR}/pvectors_1rst/pvectors_teacher_${kfold}.txt"	25	pvector_file="${DATADIR}/pvectors_1rst/pvectors_teacher_${kfold}.txt"
		26	VECTOR_FILE=$pvector_file
25	lst_dir="${DATADIR}/pvectors_1rst/lst"	27	lst_dir="${DATADIR}/pvectors_1rst/lst"
26	output_kfold="${OUTDIR}/${kfold}"	28	output_kfold="${OUTDIR}/${kfold}"
27		29
28	#python3 "bin/replace_label.py" \	30	#python3 "bin/replace_label.py" \
29	# "${DATADIR}/masseffect.lst" \	31	# "${DATADIR}/masseffect.lst" \
30	# "${DATADIR}/character_information.csv" \	32	# "${DATADIR}/character_information.csv" \
31	# --field "type" --lst "data/pvectors_1rst/lst/train_${kfold}.lst" \	33	# --field "type" --lst "data/pvectors_1rst/lst/train_${kfold}.lst" \
32	# --outfile "${NEW_LSTDIR}/train_${kfold}_type.lst"	34	# --outfile "${NEW_LSTDIR}/train_${kfold}_type.lst"
33		35
34	#python3 "bin/replace_label.py" \	36	#python3 "bin/replace_label.py" \
35	# "${DATADIR}/masseffect.lst" \	37	# "${DATADIR}/masseffect.lst" \
36	# "${DATADIR}/character_information.csv" \	38	# "${DATADIR}/character_information.csv" \
37	# --field "type" --lst "data/pvectors_1rst/lst/val_${kfold}.lst" \	39	# --field "type" --lst "data/pvectors_1rst/lst/val_${kfold}.lst" \
38	# --outfile "${NEW_LSTDIR}/val_${kfold}_type.lst"	40	# --outfile "${NEW_LSTDIR}/val_${kfold}_type.lst"
39		41
40	#cat "${NEW_LSTDIR}/train_${kfold}_type.lst" "${NEW_LSTDIR}/val_${kfold}_type.lst" > "${NEW_LSTDIR}/metas_${kfold}_type.lst"	42	#cat "${NEW_LSTDIR}/train_${kfold}_type.lst" "${NEW_LSTDIR}/val_${kfold}_type.lst" > "${NEW_LSTDIR}/metas_${kfold}_type.lst"
		43	TRAIN_LST=${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst
		44	VAL_LST=${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst
		45	TRAIN_LANG_LST=${NEW_LSTDIR}/train_${kfold}_lang.lst
		46	VAL_LANG_LST=${NEW_LSTDIR}/val_${kfold}_lang.lst
		47	METAS_LANG=${NEW_LSTDIR}/metas_${kfold}_lang.lst
		48
		49	# EXTRACT LANGUAGE INFORMATION
		50	awk '$2=$1' FS=, OFS=, ${TRAIN_LST} > ${TRAIN_LANG_LST}
		51	echo "VAL EXTRACT LANGUAGE INFO DONE"
		52	awk '$2=$1' FS=, OFS=, ${VAL_LST} > ${VAL_LANG_LST}
		53	echo "TRAIN EXTRACT LANGUAGE INFO DONE"
		54	cat "${TRAIN_LANG_LST}" "${VAL_LANG_LST}" > "${METAS_LANG}"
		55	echo "GLOBAL EXTRACT LANGUAGE INFO DONE"
		56
41		57
42
43	echo "Clustering - ${kfold}"	58	echo "Clustering - ${kfold}"
44		59
45	for k in $(seq ${kmin} 1 ${kmax})	60	for k in $(seq ${kmin} 1 ${kmax})
46	do	61	do
47	echo "Kmeans Measuring and ploting - ${k}"	62	echo "Kmeans Measuring and ploting - ${k}"
48		63
49	# This script compute measures from clustering	64	SUB_EXP_DIR="${EXP_DIR}/${kfold}/${k}"
50	#python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" "${pvector_file}" "${lst_dir}/train_${kfold}.lst" "${lst_dir}/val_${kfold}.lst" --outfile "${output_kfold}/${k}/measures.json"	65
51		66	# -- EXTRACT CLUSTERING LABELS
		67	python3 bin/extract_kmeans.py "${SUB_EXP_DIR}/clustering_${k}.pkl" \
		68	"${VECTOR_FILE}" \
		69	--outfile "${SUB_EXP_DIR}/clustered_${k}.txt"
		70
		71	# -- MEASURES AND PLOT WITH RESPECT TO LANG VAR
		72	# Measures
		73	python3 bin/measure_clustering.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
		74	"${METAS_LANG}" \
		75	"${TRAIN_LST}" \
		76	"${VAL_LST}" \
		77	--outfile "${SUB_EXP_DIR}/measures_lang.json"
		78
		79	# This script plot the count matrix of the train set
		80	python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
		81	"${METAS_LANG}" \
		82	"${TRAIN_LST}" \
		83	--outfile "${SUB_EXP_DIR}/train_count_matrix_lang.pdf"
		84
		85	# This script plot the count matrix of the validation set
		86	python3 bin/plot-count-matrix.py "${SUB_EXP_DIR}/clustered_${k}.txt" \
		87	"${METAS_LANG}" \
		88	"${VAL_LST}" \
		89	--outfile "${SUB_EXP_DIR}/val_count_matrix_lang.pdf"
		90
		91	rm ${SUB_EXP_DIR}/clustered_${k}.txt
52	#python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" \	92	#python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" \
53	# "${NEW_LSTDIR}/metas_${kfold}_type.lst" "${lst_dir}/train_${kfold}.lst" \	93	# "${NEW_LSTDIR}/metas_${kfold}_type.lst" "${lst_dir}/train_${kfold}.lst" \
54	# "${lst_dir}/val_${kfold}.lst" \	94	# "${lst_dir}/val_${kfold}.lst" \
55	# --outfile "${output_kfold}/${k}/measures_type.json"	95	# --outfile "${output_kfold}/${k}/measures_type.json"
56		96
57	# This script plot the count matrix of the train set	97	# This script plot the count matrix of the train set
58	python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \	98	#python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
59	${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/train_${kfold}.lst \	99	# ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/train_${kfold}.lst \
60	--outfile ${output_kfold}/${k}/train_count_matrix_type.pdf	100	# --outfile ${output_kfold}/${k}/train_count_matrix_type.pdf
61		101
62	# This script plot the count matrix of the validation set	102	# This script plot the count matrix of the validation set
63	python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \	103	#python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
64	${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/val_${kfold}.lst \	104	# ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/val_${kfold}.lst \
65	--outfile ${output_kfold}/${k}/val_count_matrix_type.pdf	105	# --outfile ${output_kfold}/${k}/val_count_matrix_type.pdf
66		106
67	# This script plot the count matrix of the train set	107	# This script plot the count matrix of the train set
68	python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \	108	#python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
69	${pvector_file} ${lst_dir}/train_${kfold}.lst \	109	# ${pvector_file} ${lst_dir}/train_${kfold}.lst \
70	--outfile ${output_kfold}/${k}/train_count_matrix.pdf	110	# --outfile ${output_kfold}/${k}/train_count_matrix.pdf
71		111
72	# This script plot the count matrix of the validation set	112	# This script plot the count matrix of the validation set
73	python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \	113	#python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
74	${pvector_file} ${lst_dir}/val_${kfold}.lst \	114	# ${pvector_file} ${lst_dir}/val_${kfold}.lst \
75	--outfile ${output_kfold}/${k}/val_count_matrix.pdf	115	# --outfile ${output_kfold}/${k}/val_count_matrix.pdf
76	done	116	done
77	done	117	done
78		118

run.sh

Diff comments View file @ 95142df

1		1
2	#OUTDIR="exp/test/pvector-2"	2	#OUTDIR="exp/test/pvector-2"
3	#DATADIR="data"	3	#DATADIR="data"
4	#NEW_LSTDIR="${OUTDIR}/lst"	4	#NEW_LSTDIR="${OUTDIR}/lst"
5		5
6	#VECTOR_FILES_BEGIN="${DATADIR}/pvectors_1rst/pvectors_teacher"	6	#VECTOR_FILES_BEGIN="${DATADIR}/pvectors_1rst/pvectors_teacher"
7	#VECTOR_FILES_END=".txt"	7	#VECTOR_FILES_END=".txt"
8	#VECTOR_FILE="" # To specify if there's only one	8	#VECTOR_FILE="" # To specify if there's only one
9	#VECTOR_FILES_ONE=false # Specify there's only one file	9	#VECTOR_FILES_ONE=false # Specify there's only one file
10		10
11	#KMIN=2	11	#KMIN=2
12	#KMAX=100	12	#KMAX=100
13		13
14	# -- LOAD CONFIG FILE	14	# -- LOAD CONFIG FILE
15	CONFIG_FILE="config.sh"	15	CONFIG_FILE="config.sh"
16		16
17	if [ $# -eq 1 ]	17	if [ $# -eq 1 ]
18	then	18	then
19	CONFIG_FILE="$1"	19	CONFIG_FILE="$1"
20	else	20	else
21	echo "Need to have one and only one argument"	21	echo "Need to have one and only one argument"
22	exit -1	22	exit -1
23	fi	23	fi
24		24
25	source $CONFIG_FILE	25	source $CONFIG_FILE
26		26
27	# -- DEFAULTS VALUES CONFIGURATION	27	# -- DEFAULTS VALUES CONFIGURATION
28	if [ -z "$VECTOR_FILES_ONE" ]	28	if [ -z "$VECTOR_FILES_ONE" ]
29	then	29	then
30	VECTOR_FILES_ONE=false	30	VECTOR_FILES_ONE=false
31	fi	31	fi
32		32
33		33
34		34
35	# -- MAKE DIRECTORIES	35	# -- MAKE DIRECTORIES
36	if [ ! -d "$OUTDIR" ];	36	if [ ! -d "$OUTDIR" ];
37	then	37	then
38	mkdir -p $OUTDIR	38	mkdir -p $OUTDIR
39	fi	39	fi
40		40
41	if [ ! -d "${NEW_LSTDIR}" ];	41	if [ ! -d "${NEW_LSTDIR}" ];
42	then	42	then
43	mkdir -p ${NEW_LSTDIR}	43	mkdir -p ${NEW_LSTDIR}
44	fi	44	fi
45		45
46		46
		47	# -- KFOLD MIN and MAX
		48	if [ -z "$MIN_KFOLD" ]
		49	then
		50	MIN_KFOLD=1
		51	fi
		52
		53	if [ -z "$MAX_KFOLD" ]
		54	then
		55	MAX_KFOLD=4
		56	fi
		57
47	# -- BEGIN BY KFOLD	58	# -- BEGIN BY KFOLD
48	for kfold in {1..4}	59	for kfold in $(seq ${MIN_KFOLD} ${MAX_KFOLD})
49	do	60	do
50	# Some usefull variable	61	# Some usefull variable
51	CHAR_INFO="${DATADIR}/character_information.csv"	62	CHAR_INFO="${DATADIR}/character_information.csv"
52	TRAIN_TYPE_LST="${NEW_LSTDIR}/train_${kfold}_type.lst"	63	TRAIN_TYPE_LST="${NEW_LSTDIR}/train_${kfold}_type.lst"
53	VAL_TYPE_LST="${NEW_LSTDIR}/val_${kfold}_type.lst"	64	VAL_TYPE_LST="${NEW_LSTDIR}/val_${kfold}_type.lst"
		65	TRAIN_LANG_LST="${NEW_LSTDIR}/train_${kfold}_lang.lst"
		66	VAL_LANG_LST="${NEW_LSTDIR}/val_${kfold}_lang.lst"
54		67
55	# Configuration for the run clustering file	68	# Configuration for the run clustering file
56	if [ ${VECTOR_FILES_ONE} == false ]	69	if [ ${VECTOR_FILES_ONE} == false ]
57	then	70	then
58	VECTOR_FILE="${VECTOR_FILES_BEGIN}_${kfold}${VECTOR_FILES_END}"	71	VECTOR_FILE="${VECTOR_FILES_BEGIN}_${kfold}${VECTOR_FILES_END}"
59	fi	72	fi
60		73
61	TRAIN_LST="${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst"	74	TRAIN_LST="${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst"
62	VAL_LST="${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst"	75	VAL_LST="${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst"
63	EXP_DIR="${OUTDIR}/${kfold}"	76	EXP_DIR="${OUTDIR}/${kfold}"
64	METAS_TYPE="${NEW_LSTDIR}/metas_${kfold}_type.lst" #*	77	METAS_TYPE="${NEW_LSTDIR}/metas_${kfold}_type.lst"
65	METAS_CHARACTER="${DATADIR}/masseffect.lst"	78	METAS_CHARACTER="${DATADIR}/masseffect.lst"
66		79	METAS_LANG="${NEW_LSTDIR}/metas_${kfold}_lang.lst"
67		80
68		81
69	if [ ! -d "${EXP_DIR}" ];	82	if [ ! -d "${EXP_DIR}" ];
70	then	83	then
71	mkdir -p ${EXP_DIR}	84	mkdir -p ${EXP_DIR}
72	fi	85	fi
73		86
74		87
75	# Extract character information	88	# EXTRACT TYPE INFORMATION
76	echo "Extracting character information"	89	echo "Extracting character information"
		90	echo "Replace in train"
77	python3 "bin/replace_label.py" \	91	python3 "bin/replace_label.py" \
78	"${METAS_CHARACTER}" \	92	"${METAS_CHARACTER}" \
79	"${CHAR_INFO}" \	93	"${CHAR_INFO}" \
80	--field "type" \	94	--field "type" \
81	--lst "${TRAIN_LST}" \	95	--lst "${TRAIN_LST}" \
82	--outfile "${TRAIN_TYPE_LST}"	96	--outfile "${TRAIN_TYPE_LST}"
83		97
		98	echo "Replace in val"
84	python3 "bin/replace_label.py" \	99	python3 "bin/replace_label.py" \
85	"${METAS_CHARACTER}" \	100	"${METAS_CHARACTER}" \
86	"${CHAR_INFO}" \	101	"${CHAR_INFO}" \
87	--field "type" \	102	--field "type" \
88	--lst "${VAL_LST}" \	103	--lst "${VAL_LST}" \
89	--outfile "${VAL_TYPE_LST}"	104	--outfile "${VAL_TYPE_LST}"
90		105
		106	echo "Merge them"
91	cat "${TRAIN_TYPE_LST}" "${VAL_TYPE_LST}" > "${METAS_TYPE}"	107	cat "${TRAIN_TYPE_LST}" "${VAL_TYPE_LST}" > "${METAS_TYPE}"
92		108
		109	# EXTRACT LANGUAGE INFORMATION
		110	echo "Language info for train"
		111	awk '$2=$1' FS=, OFS=, ${TRAIN_LST} > ${TRAIN_LANG_LST}
		112	echo "Language info for val"
		113	awk '$2=$1' FS=, OFS=, ${VAL_LST} > ${VAL_LANG_LST}
		114
		115	echo "Merge them"
		116	cat "${TRAIN_LANG_LST}" "${VAL_LANG_LST}" > "${METAS_LANG}"
		117
		118	echo "Then Run Clustering"
93	source "run-clustering.sh"	119	source "run-clustering.sh"
94	done	120	done
95		121
96	# Regroup measures with respect to character classes	122	# Regroup measures with respect to character classes
97	echo "Regrouping measures with respect to character classes"	123	echo "Regrouping measures with respect to character classes"
98	python3 "bin/regroup-measures.py" ${OUTDIR}	124	python3 "bin/regroup-measures.py" ${OUTDIR}
99		125
100	# Regroup measures with respect to type classes	126	# Regroup measures with respect to type classes
101	echo "Regrouping measures with respect to type classes"	127	echo "Regrouping measures with respect to type classes"
102	python3 "bin/regroup-measures.py" ${OUTDIR} --suffix "_type" --measurefile "measures_type.json"	128	python3 "bin/regroup-measures.py" ${OUTDIR} --suffix "_type" --measurefile "measures_type.json"
103		129
104		130
105		131