run.sh 3.06 KB
#OUTDIR="exp/test/pvector-2"
#DATADIR="data"
#NEW_LSTDIR="${OUTDIR}/lst"

#VECTOR_FILES_BEGIN="${DATADIR}/pvectors_1rst/pvectors_teacher"
#VECTOR_FILES_END=".txt"
#VECTOR_FILE="" # To specify if there's only one
#VECTOR_FILES_ONE=false # Specify there's only one file

#KMIN=2
#KMAX=100

# -- LOAD CONFIG FILE
CONFIG_FILE="config.sh"

if [ $# -eq 1 ]
then
    CONFIG_FILE="$1"
else
    echo "Need to have one and only one argument"
    exit -1
fi

source $CONFIG_FILE

# -- DEFAULTS VALUES CONFIGURATION
if [ -z "$VECTOR_FILES_ONE" ]
then
    VECTOR_FILES_ONE=false
fi



# -- MAKE DIRECTORIES
if [ ! -d "$OUTDIR" ];
then
    mkdir -p $OUTDIR
fi

if [ ! -d "${NEW_LSTDIR}" ];
then
    mkdir -p ${NEW_LSTDIR}
fi


# -- KFOLD MIN and MAX
if [ -z "$MIN_KFOLD" ]
then
    MIN_KFOLD=1
fi

if [ -z "$MAX_KFOLD" ]
then
    MAX_KFOLD=4
fi

# -- BEGIN BY KFOLD
for kfold in $(seq ${MIN_KFOLD} ${MAX_KFOLD})
do
    # Some usefull variable
    CHAR_INFO="${DATADIR}/character_information.csv"
    TRAIN_TYPE_LST="${NEW_LSTDIR}/train_${kfold}_type.lst"
    VAL_TYPE_LST="${NEW_LSTDIR}/val_${kfold}_type.lst"
    TRAIN_LANG_LST="${NEW_LSTDIR}/train_${kfold}_lang.lst"
    VAL_LANG_LST="${NEW_LSTDIR}/val_${kfold}_lang.lst"
    
    # Configuration for the run clustering file
    if [ ${VECTOR_FILES_ONE} == false ]
    then
        VECTOR_FILE="${VECTOR_FILES_BEGIN}_${kfold}${VECTOR_FILES_END}"
    fi
    
    TRAIN_LST="${DATADIR}/pvectors_1rst/lst/train_${kfold}.lst"
    VAL_LST="${DATADIR}/pvectors_1rst/lst/val_${kfold}.lst"
    EXP_DIR="${OUTDIR}/${kfold}"
    METAS_TYPE="${NEW_LSTDIR}/metas_${kfold}_type.lst"
    METAS_CHARACTER="${DATADIR}/masseffect.lst"
    METAS_LANG="${NEW_LSTDIR}/metas_${kfold}_lang.lst"
    
    
    if [ ! -d "${EXP_DIR}" ];
    then
        mkdir -p ${EXP_DIR}
    fi


    # EXTRACT TYPE INFORMATION
    echo "Extracting character information"
    echo "Replace in train"
    python3 "bin/replace_label.py" \
        "${METAS_CHARACTER}" \
        "${CHAR_INFO}" \
        --field "type" \
        --lst "${TRAIN_LST}" \
        --outfile "${TRAIN_TYPE_LST}"

    echo "Replace in val"
    python3 "bin/replace_label.py" \
        "${METAS_CHARACTER}" \
        "${CHAR_INFO}" \
        --field "type" \
        --lst "${VAL_LST}" \
        --outfile "${VAL_TYPE_LST}"

    echo "Merge them"
    cat "${TRAIN_TYPE_LST}" "${VAL_TYPE_LST}" > "${METAS_TYPE}"
    
    # EXTRACT LANGUAGE INFORMATION
    echo "Language info for train"
    awk '$2=$1' FS=, OFS=, ${TRAIN_LST} > ${TRAIN_LANG_LST}
    echo "Language info for val"
    awk '$2=$1' FS=, OFS=, ${VAL_LST} > ${VAL_LANG_LST}   
    
    echo "Merge them"
    cat "${TRAIN_LANG_LST}" "${VAL_LANG_LST}" > "${METAS_LANG}"
    
    echo "Then Run Clustering"
    source "run-clustering.sh"
done

# Regroup measures with respect to character classes
echo "Regrouping measures with respect to character classes"
python3 "bin/regroup-measures.py" ${OUTDIR}

# Regroup measures with respect to type classes
echo "Regrouping measures with respect to type classes"
python3 "bin/regroup-measures.py" ${OUTDIR} --suffix "_type" --measurefile "measures_type.json"