Commit ee5cc2a7e7bf0f6a652ad365624150de89d9d9c3

Authored by Mathias Quillot
1 parent b7530e2693
Exists in master

Regroup all measures from an experiment. Can specify the measure file in each mo…

…del you want to treat

Showing 1 changed file with 112 additions and 0 deletions Side-by-side Diff

bin/regroup-measures.py
  1 +'''
  2 +Regroup results into one file and a plot.
  3 +'''
  4 +
  5 +import numpy as np
  6 +import matplotlib.pyplot as plt
  7 +import argparse
  8 +import os
  9 +import json
  10 +
  11 +
  12 +def plot_values_clusters(filepath, values, title, xlabel, ylabel):
  13 + values = np.asarray(values)
  14 + x = np.arange(len(values)) + 2
  15 + x_ticks = np.arange(len(values), step=5) + 2
  16 + y = values
  17 + plt.scatter(x, y)
  18 + plt.xticks(x_ticks)
  19 + plt.title(title)
  20 + plt.xlabel(xlabel)
  21 + plt.ylabel(ylabel)
  22 + plt.savefig(filepath)
  23 + plt.close()
  24 +
  25 +# -- PARSER
  26 +parser = argparse.ArgumentParser(description="")
  27 +parser.add_argument("expdir", type=str, help="Directory of experiment")
  28 +parser.add_argument("--measurefile", type=str, default="measures.json", help="Measure file it searchs in folders")
  29 +parser.add_argument("--suffix", type=str, default="", help="suffix of saved files")
  30 +
  31 +args = parser.parse_args()
  32 +EXP_DIR = args.expdir
  33 +MEASURE_FILE=args.measurefile
  34 +SUFFIX = args.suffix
  35 +
  36 +#EXP_DIR="exp/kmeans_teacher_1/pvector-1"
  37 +RESULTS_DIR=os.path.join(EXP_DIR, "res")
  38 +
  39 +# -- CONFIG
  40 +kmin = 2
  41 +kmax = 100
  42 +
  43 +
  44 +# -- CREATE FOLDER
  45 +if not os.path.exists(RESULTS_DIR):
  46 + os.makedirs(RESULTS_DIR)
  47 +
  48 +# -- BEGIN REGROUPMENT
  49 +
  50 +subsets = ["train", "val"]
  51 +
  52 +disequilibriums = []
  53 +
  54 +def init_measures():
  55 + measures = {}
  56 +
  57 + for subset in subsets:
  58 + measures[subset] = {}
  59 + measures[subset]["entropy"] = []
  60 + measures[subset]["vscore"] = []
  61 + measures[subset]["homogeneity"] = []
  62 + measures[subset]["completeness"] = []
  63 + return measures
  64 +
  65 +measures = init_measures()
  66 +
  67 +for kfold in range(1, 5):
  68 + print(kfold)
  69 + for k in range(kmin, kmax+1):
  70 + measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE)
  71 + with open(measures_file, 'r') as f:
  72 + meas_data = json.load(f)
  73 + disequilibriums.append(meas_data["disequilibrium"])
  74 + for subset in subsets:
  75 + measures[subset]["entropy"].append(meas_data[subset]["entropy"])
  76 + measures[subset]["vscore"].append(meas_data[subset]["vscore"])
  77 + measures[subset]["homogeneity"].append(meas_data[subset]["homogeneity"])
  78 + measures[subset]["completeness"].append(meas_data[subset]["completeness"])
  79 + for subset in subsets:
  80 + plot_values_clusters(
  81 + os.path.join(RESULTS_DIR, "entropy_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
  82 + measures[subset]["entropy"],
  83 + "Entropy " + str(subset) + " set " + str(kfold),
  84 + "N clusters",
  85 + "Entropy")
  86 + plot_values_clusters(
  87 + os.path.join(RESULTS_DIR, "vscore_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
  88 + measures[subset]["vscore"],
  89 + "Vscore " + str(subset) + " set " + str(kfold),
  90 + "N clusters",
  91 + "Vscore")
  92 + plot_values_clusters(
  93 + os.path.join(RESULTS_DIR, "homogeneity_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
  94 + measures[subset]["homogeneity"],
  95 + "Homogeneity " + str(subset) + " set " + str(kfold),
  96 + "N clusters",
  97 + "Homogeneity")
  98 + plot_values_clusters(
  99 + os.path.join(RESULTS_DIR, "completeness_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
  100 + measures[subset]["completeness"],
  101 + "Completeness " + str(subset) + " set " + str(kfold),
  102 + "N clusters",
  103 + "Completeness")
  104 + plot_values_clusters(
  105 + os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"),
  106 + disequilibriums,
  107 + "Disequilibrium set " + str(kfold),
  108 + "N clusters",
  109 + "Disequilibrium")
  110 +
  111 + measures = init_measures()
  112 + disequilibriums = []