run.sh 3.82 KB
# Pour le moment, le run ne fait qu'executer
# quelques petites commandes que l'on souhaite
# tester.

OUTDIR="exp/kmeans_teacher_1/pvector-1"
DATADIR="data"
NEW_LSTDIR="${OUTDIR}/lst"

kmin=2
kmax=100

if [ ! -d "$OUTDIR" ];
then
    mkdir -p $OUTDIR
fi

if [ ! -d "${NEW_LSTDIR}" ];
then
    mkdir -p ${NEW_LSTDIR}
fi

for kfold in 4 #..4}
do
    #echo "kfold = ${kfold}"
    pvector_file="${DATADIR}/pvectors_1rst/pvectors_teacher_${kfold}.txt"
    lst_dir="${DATADIR}/pvectors_1rst/lst"
    output_kfold="${OUTDIR}/${kfold}"
    
    if [ ! -d "${output_kfold}" ];
    then
        mkdir -p ${output_kfold}
    fi
        
    
    # Extract character information
    echo "Extracting character information"
    python3 "bin/replace_label.py" \
        "${DATADIR}/masseffect.lst" \
        "${DATADIR}/character_information.csv" \
        --field "type" --lst "data/pvectors_1rst/lst/train_${kfold}.lst" \
        --outfile "${NEW_LSTDIR}/train_${kfold}_type.lst"
    
    python3 "bin/replace_label.py" \
        "${DATADIR}/masseffect.lst" \
        "${DATADIR}/character_information.csv" \
        --field "type" --lst "data/pvectors_1rst/lst/val_${kfold}.lst" \
        --outfile "${NEW_LSTDIR}/val_${kfold}_type.lst"
    cat "${NEW_LSTDIR}/train_${kfold}_type.lst" "${NEW_LSTDIR}/val_${kfold}_type.lst" > "${NEW_LSTDIR}/metas_${kfold}_type.lst"

    # -- TRAIN KMEANS 
    echo "Clustering - ${kfold}"
    python3 bin/cluster_kmeans.py "${pvector_file}" \
        "${lst_dir}/train_${kfold}.lst" \
        "${output_kfold}" --kmin ${kmin} --kmax ${kmax}

    for k in $(seq ${kmin} 1 ${kmax})
    do
        # -- EXTRACT KMEANS VALUES
        echo "Kmeans Measuring and extraction - ${k}"
        python3 bin/extract_kmeans.py "${output_kfold}/${k}/clustering_${k}.pkl" \
            "${pvector_file}" \
            --outfile "${output_kfold}/${k}/clustered_${k}.txt"
        
        
        # -- MEASURES AND PLOT WITH RESPECT TO CHARACTER VAR
        # Measures
        python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" "${pvector_file}" "${lst_dir}/train_${kfold}.lst" "${lst_dir}/val_${kfold}.lst" --outfile "${output_kfold}/${k}/measures.json"
        
        # Plot count matrix for train
        python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
        ${pvector_file} ${lst_dir}/train_${kfold}.lst \
        --outfile ${output_kfold}/${k}/train_count_matrix.pdf
        
        # Plot count matrix for val
        python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
            ${pvector_file} ${lst_dir}/val_${kfold}.lst \
            --outfile ${output_kfold}/${k}/val_count_matrix.pdf

        # Regroup measures with respect to character var
        python3 bin/regroup-measures.py exp/kmeans_teacher_1/pvector-1/

        # -- MEASURES AND PLOT WITH RESPECT TO TYPE VAR
        # Measures
        python3 bin/measure_clustering.py "${output_kfold}/${k}/clustered_${k}.txt" "${NEW_LSTDIR}/metas_${kfold}_type.lst" "${lst_dir}/train_${kfold}.lst" "${lst_dir}/val_${kfold}.lst" --outfile "${output_kfold}/${k}/measures_type.json"
        
        # This script plot the count matrix of the train set
        python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
        ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/train_${kfold}.lst \
        --outfile ${output_kfold}/${k}/train_count_matrix_type.pdf
        
        # This script plot the count matrix of the validation set
        python3 bin/plot-count-matrix.py ${output_kfold}/${k}/clustered_${k}.txt \
        ${NEW_LSTDIR}/metas_${kfold}_type.lst ${lst_dir}/val_${kfold}.lst \
        --outfile ${output_kfold}/${k}/val_count_matrix_type.pdf

        # Regroup measures with respect to type var 
        python3 bin/regroup-measures.py exp/kmeans_teacher_1/pvector-1/ --suffix "_type" --measurefile "measures_type.j
    done
done