Blame view
bin/regroup-measures.py
3.21 KB
ee5cc2a7e Regroup all measu... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
''' Regroup results into one file and a plot. ''' import numpy as np import matplotlib.pyplot as plt import argparse import os import json def plot_values_clusters(filepath, values, title, xlabel, ylabel): values = np.asarray(values) x = np.arange(len(values)) + 2 x_ticks = np.arange(len(values), step=5) + 2 y = values plt.scatter(x, y) plt.xticks(x_ticks) plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.savefig(filepath) plt.close() # -- PARSER parser = argparse.ArgumentParser(description="") parser.add_argument("expdir", type=str, help="Directory of experiment") parser.add_argument("--measurefile", type=str, default="measures.json", help="Measure file it searchs in folders") parser.add_argument("--suffix", type=str, default="", help="suffix of saved files") args = parser.parse_args() EXP_DIR = args.expdir MEASURE_FILE=args.measurefile SUFFIX = args.suffix #EXP_DIR="exp/kmeans_teacher_1/pvector-1" RESULTS_DIR=os.path.join(EXP_DIR, "res") # -- CONFIG kmin = 2 kmax = 100 # -- CREATE FOLDER if not os.path.exists(RESULTS_DIR): os.makedirs(RESULTS_DIR) # -- BEGIN REGROUPMENT subsets = ["train", "val"] disequilibriums = [] def init_measures(): measures = {} for subset in subsets: measures[subset] = {} measures[subset]["entropy"] = [] measures[subset]["vscore"] = [] measures[subset]["homogeneity"] = [] measures[subset]["completeness"] = [] return measures measures = init_measures() for kfold in range(1, 5): print(kfold) for k in range(kmin, kmax+1): measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE) with open(measures_file, 'r') as f: meas_data = json.load(f) disequilibriums.append(meas_data["disequilibrium"]) for subset in subsets: measures[subset]["entropy"].append(meas_data[subset]["entropy"]) measures[subset]["vscore"].append(meas_data[subset]["vscore"]) measures[subset]["homogeneity"].append(meas_data[subset]["homogeneity"]) measures[subset]["completeness"].append(meas_data[subset]["completeness"]) for subset in subsets: plot_values_clusters( os.path.join(RESULTS_DIR, "entropy_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), measures[subset]["entropy"], "Entropy " + str(subset) + " set " + str(kfold), "N clusters", "Entropy") plot_values_clusters( os.path.join(RESULTS_DIR, "vscore_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), measures[subset]["vscore"], "Vscore " + str(subset) + " set " + str(kfold), "N clusters", "Vscore") plot_values_clusters( os.path.join(RESULTS_DIR, "homogeneity_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), measures[subset]["homogeneity"], "Homogeneity " + str(subset) + " set " + str(kfold), "N clusters", "Homogeneity") plot_values_clusters( os.path.join(RESULTS_DIR, "completeness_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), measures[subset]["completeness"], "Completeness " + str(subset) + " set " + str(kfold), "N clusters", "Completeness") plot_values_clusters( os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"), disequilibriums, "Disequilibrium set " + str(kfold), "N clusters", "Disequilibrium") measures = init_measures() disequilibriums = [] |