Commit ee5cc2a7e7bf0f6a652ad365624150de89d9d9c3

Authored by Mathias Quillot
1 parent b7530e2693
Exists in master

Regroup all measures from an experiment. Can specify the measure file in each mo…

…del you want to treat

Showing 1 changed file with 112 additions and 0 deletions Inline Diff

bin/regroup-measures.py
File was created 1 '''
2 Regroup results into one file and a plot.
3 '''
4
5 import numpy as np
6 import matplotlib.pyplot as plt
7 import argparse
8 import os
9 import json
10
11
12 def plot_values_clusters(filepath, values, title, xlabel, ylabel):
13 values = np.asarray(values)
14 x = np.arange(len(values)) + 2
15 x_ticks = np.arange(len(values), step=5) + 2
16 y = values
17 plt.scatter(x, y)
18 plt.xticks(x_ticks)
19 plt.title(title)
20 plt.xlabel(xlabel)
21 plt.ylabel(ylabel)
22 plt.savefig(filepath)
23 plt.close()
24
25 # -- PARSER
26 parser = argparse.ArgumentParser(description="")
27 parser.add_argument("expdir", type=str, help="Directory of experiment")
28 parser.add_argument("--measurefile", type=str, default="measures.json", help="Measure file it searchs in folders")
29 parser.add_argument("--suffix", type=str, default="", help="suffix of saved files")
30
31 args = parser.parse_args()
32 EXP_DIR = args.expdir
33 MEASURE_FILE=args.measurefile
34 SUFFIX = args.suffix
35
36 #EXP_DIR="exp/kmeans_teacher_1/pvector-1"
37 RESULTS_DIR=os.path.join(EXP_DIR, "res")
38
39 # -- CONFIG
40 kmin = 2
41 kmax = 100
42
43
44 # -- CREATE FOLDER
45 if not os.path.exists(RESULTS_DIR):
46 os.makedirs(RESULTS_DIR)
47
48 # -- BEGIN REGROUPMENT
49
50 subsets = ["train", "val"]
51
52 disequilibriums = []
53
54 def init_measures():
55 measures = {}
56
57 for subset in subsets:
58 measures[subset] = {}
59 measures[subset]["entropy"] = []
60 measures[subset]["vscore"] = []
61 measures[subset]["homogeneity"] = []
62 measures[subset]["completeness"] = []
63 return measures
64
65 measures = init_measures()
66
67 for kfold in range(1, 5):
68 print(kfold)
69 for k in range(kmin, kmax+1):
70 measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE)
71 with open(measures_file, 'r') as f:
72 meas_data = json.load(f)
73 disequilibriums.append(meas_data["disequilibrium"])
74 for subset in subsets:
75 measures[subset]["entropy"].append(meas_data[subset]["entropy"])
76 measures[subset]["vscore"].append(meas_data[subset]["vscore"])
77 measures[subset]["homogeneity"].append(meas_data[subset]["homogeneity"])
78 measures[subset]["completeness"].append(meas_data[subset]["completeness"])
79 for subset in subsets:
80 plot_values_clusters(
81 os.path.join(RESULTS_DIR, "entropy_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
82 measures[subset]["entropy"],
83 "Entropy " + str(subset) + " set " + str(kfold),
84 "N clusters",
85 "Entropy")
86 plot_values_clusters(
87 os.path.join(RESULTS_DIR, "vscore_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
88 measures[subset]["vscore"],
89 "Vscore " + str(subset) + " set " + str(kfold),
90 "N clusters",
91 "Vscore")
92 plot_values_clusters(
93 os.path.join(RESULTS_DIR, "homogeneity_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
94 measures[subset]["homogeneity"],
95 "Homogeneity " + str(subset) + " set " + str(kfold),
96 "N clusters",
97 "Homogeneity")
98 plot_values_clusters(
99 os.path.join(RESULTS_DIR, "completeness_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
100 measures[subset]["completeness"],
101 "Completeness " + str(subset) + " set " + str(kfold),
102 "N clusters",
103 "Completeness")
104 plot_values_clusters(
105 os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"),
106 disequilibriums,
107 "Disequilibrium set " + str(kfold),
108 "N clusters",
109 "Disequilibrium")
110
111 measures = init_measures()
112 disequilibriums = []
113