Commit ee5cc2a7e7bf0f6a652ad365624150de89d9d9c3
1 parent
b7530e2693
Exists in
master
Regroup all measures from an experiment. Can specify the measure file in each mo…
…del you want to treat
Showing 1 changed file with 112 additions and 0 deletions Side-by-side Diff
bin/regroup-measures.py
1 | +''' | |
2 | +Regroup results into one file and a plot. | |
3 | +''' | |
4 | + | |
5 | +import numpy as np | |
6 | +import matplotlib.pyplot as plt | |
7 | +import argparse | |
8 | +import os | |
9 | +import json | |
10 | + | |
11 | + | |
12 | +def plot_values_clusters(filepath, values, title, xlabel, ylabel): | |
13 | + values = np.asarray(values) | |
14 | + x = np.arange(len(values)) + 2 | |
15 | + x_ticks = np.arange(len(values), step=5) + 2 | |
16 | + y = values | |
17 | + plt.scatter(x, y) | |
18 | + plt.xticks(x_ticks) | |
19 | + plt.title(title) | |
20 | + plt.xlabel(xlabel) | |
21 | + plt.ylabel(ylabel) | |
22 | + plt.savefig(filepath) | |
23 | + plt.close() | |
24 | + | |
25 | +# -- PARSER | |
26 | +parser = argparse.ArgumentParser(description="") | |
27 | +parser.add_argument("expdir", type=str, help="Directory of experiment") | |
28 | +parser.add_argument("--measurefile", type=str, default="measures.json", help="Measure file it searchs in folders") | |
29 | +parser.add_argument("--suffix", type=str, default="", help="suffix of saved files") | |
30 | + | |
31 | +args = parser.parse_args() | |
32 | +EXP_DIR = args.expdir | |
33 | +MEASURE_FILE=args.measurefile | |
34 | +SUFFIX = args.suffix | |
35 | + | |
36 | +#EXP_DIR="exp/kmeans_teacher_1/pvector-1" | |
37 | +RESULTS_DIR=os.path.join(EXP_DIR, "res") | |
38 | + | |
39 | +# -- CONFIG | |
40 | +kmin = 2 | |
41 | +kmax = 100 | |
42 | + | |
43 | + | |
44 | +# -- CREATE FOLDER | |
45 | +if not os.path.exists(RESULTS_DIR): | |
46 | + os.makedirs(RESULTS_DIR) | |
47 | + | |
48 | +# -- BEGIN REGROUPMENT | |
49 | + | |
50 | +subsets = ["train", "val"] | |
51 | + | |
52 | +disequilibriums = [] | |
53 | + | |
54 | +def init_measures(): | |
55 | + measures = {} | |
56 | + | |
57 | + for subset in subsets: | |
58 | + measures[subset] = {} | |
59 | + measures[subset]["entropy"] = [] | |
60 | + measures[subset]["vscore"] = [] | |
61 | + measures[subset]["homogeneity"] = [] | |
62 | + measures[subset]["completeness"] = [] | |
63 | + return measures | |
64 | + | |
65 | +measures = init_measures() | |
66 | + | |
67 | +for kfold in range(1, 5): | |
68 | + print(kfold) | |
69 | + for k in range(kmin, kmax+1): | |
70 | + measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE) | |
71 | + with open(measures_file, 'r') as f: | |
72 | + meas_data = json.load(f) | |
73 | + disequilibriums.append(meas_data["disequilibrium"]) | |
74 | + for subset in subsets: | |
75 | + measures[subset]["entropy"].append(meas_data[subset]["entropy"]) | |
76 | + measures[subset]["vscore"].append(meas_data[subset]["vscore"]) | |
77 | + measures[subset]["homogeneity"].append(meas_data[subset]["homogeneity"]) | |
78 | + measures[subset]["completeness"].append(meas_data[subset]["completeness"]) | |
79 | + for subset in subsets: | |
80 | + plot_values_clusters( | |
81 | + os.path.join(RESULTS_DIR, "entropy_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | |
82 | + measures[subset]["entropy"], | |
83 | + "Entropy " + str(subset) + " set " + str(kfold), | |
84 | + "N clusters", | |
85 | + "Entropy") | |
86 | + plot_values_clusters( | |
87 | + os.path.join(RESULTS_DIR, "vscore_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | |
88 | + measures[subset]["vscore"], | |
89 | + "Vscore " + str(subset) + " set " + str(kfold), | |
90 | + "N clusters", | |
91 | + "Vscore") | |
92 | + plot_values_clusters( | |
93 | + os.path.join(RESULTS_DIR, "homogeneity_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | |
94 | + measures[subset]["homogeneity"], | |
95 | + "Homogeneity " + str(subset) + " set " + str(kfold), | |
96 | + "N clusters", | |
97 | + "Homogeneity") | |
98 | + plot_values_clusters( | |
99 | + os.path.join(RESULTS_DIR, "completeness_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | |
100 | + measures[subset]["completeness"], | |
101 | + "Completeness " + str(subset) + " set " + str(kfold), | |
102 | + "N clusters", | |
103 | + "Completeness") | |
104 | + plot_values_clusters( | |
105 | + os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"), | |
106 | + disequilibriums, | |
107 | + "Disequilibrium set " + str(kfold), | |
108 | + "N clusters", | |
109 | + "Disequilibrium") | |
110 | + | |
111 | + measures = init_measures() | |
112 | + disequilibriums = [] |