From ee5cc2a7e7bf0f6a652ad365624150de89d9d9c3 Mon Sep 17 00:00:00 2001 From: Mathias Quillot Date: Mon, 22 Jul 2019 12:14:03 +0200 Subject: [PATCH] Regroup all measures from an experiment. Can specify the measure file in each model you want to treat --- bin/regroup-measures.py | 112 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 bin/regroup-measures.py diff --git a/bin/regroup-measures.py b/bin/regroup-measures.py new file mode 100644 index 0000000..187a558 --- /dev/null +++ b/bin/regroup-measures.py @@ -0,0 +1,112 @@ +''' +Regroup results into one file and a plot. +''' + +import numpy as np +import matplotlib.pyplot as plt +import argparse +import os +import json + + +def plot_values_clusters(filepath, values, title, xlabel, ylabel): + values = np.asarray(values) + x = np.arange(len(values)) + 2 + x_ticks = np.arange(len(values), step=5) + 2 + y = values + plt.scatter(x, y) + plt.xticks(x_ticks) + plt.title(title) + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.savefig(filepath) + plt.close() + +# -- PARSER +parser = argparse.ArgumentParser(description="") +parser.add_argument("expdir", type=str, help="Directory of experiment") +parser.add_argument("--measurefile", type=str, default="measures.json", help="Measure file it searchs in folders") +parser.add_argument("--suffix", type=str, default="", help="suffix of saved files") + +args = parser.parse_args() +EXP_DIR = args.expdir +MEASURE_FILE=args.measurefile +SUFFIX = args.suffix + +#EXP_DIR="exp/kmeans_teacher_1/pvector-1" +RESULTS_DIR=os.path.join(EXP_DIR, "res") + +# -- CONFIG +kmin = 2 +kmax = 100 + + +# -- CREATE FOLDER +if not os.path.exists(RESULTS_DIR): + os.makedirs(RESULTS_DIR) + +# -- BEGIN REGROUPMENT + +subsets = ["train", "val"] + +disequilibriums = [] + +def init_measures(): + measures = {} + + for subset in subsets: + measures[subset] = {} + measures[subset]["entropy"] = [] + measures[subset]["vscore"] = [] + measures[subset]["homogeneity"] = [] + measures[subset]["completeness"] = [] + return measures + +measures = init_measures() + +for kfold in range(1, 5): + print(kfold) + for k in range(kmin, kmax+1): + measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE) + with open(measures_file, 'r') as f: + meas_data = json.load(f) + disequilibriums.append(meas_data["disequilibrium"]) + for subset in subsets: + measures[subset]["entropy"].append(meas_data[subset]["entropy"]) + measures[subset]["vscore"].append(meas_data[subset]["vscore"]) + measures[subset]["homogeneity"].append(meas_data[subset]["homogeneity"]) + measures[subset]["completeness"].append(meas_data[subset]["completeness"]) + for subset in subsets: + plot_values_clusters( + os.path.join(RESULTS_DIR, "entropy_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), + measures[subset]["entropy"], + "Entropy " + str(subset) + " set " + str(kfold), + "N clusters", + "Entropy") + plot_values_clusters( + os.path.join(RESULTS_DIR, "vscore_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), + measures[subset]["vscore"], + "Vscore " + str(subset) + " set " + str(kfold), + "N clusters", + "Vscore") + plot_values_clusters( + os.path.join(RESULTS_DIR, "homogeneity_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), + measures[subset]["homogeneity"], + "Homogeneity " + str(subset) + " set " + str(kfold), + "N clusters", + "Homogeneity") + plot_values_clusters( + os.path.join(RESULTS_DIR, "completeness_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), + measures[subset]["completeness"], + "Completeness " + str(subset) + " set " + str(kfold), + "N clusters", + "Completeness") + plot_values_clusters( + os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"), + disequilibriums, + "Disequilibrium set " + str(kfold), + "N clusters", + "Disequilibrium") + + measures = init_measures() + disequilibriums = [] -- 1.8.2.3