Commit ee5cc2a7e7bf0f6a652ad365624150de89d9d9c3
1 parent
b7530e2693
Exists in
master
Regroup all measures from an experiment. Can specify the measure file in each mo…
…del you want to treat
Showing 1 changed file with 112 additions and 0 deletions Inline Diff
bin/regroup-measures.py
File was created | 1 | ''' | |
2 | Regroup results into one file and a plot. | ||
3 | ''' | ||
4 | |||
5 | import numpy as np | ||
6 | import matplotlib.pyplot as plt | ||
7 | import argparse | ||
8 | import os | ||
9 | import json | ||
10 | |||
11 | |||
12 | def plot_values_clusters(filepath, values, title, xlabel, ylabel): | ||
13 | values = np.asarray(values) | ||
14 | x = np.arange(len(values)) + 2 | ||
15 | x_ticks = np.arange(len(values), step=5) + 2 | ||
16 | y = values | ||
17 | plt.scatter(x, y) | ||
18 | plt.xticks(x_ticks) | ||
19 | plt.title(title) | ||
20 | plt.xlabel(xlabel) | ||
21 | plt.ylabel(ylabel) | ||
22 | plt.savefig(filepath) | ||
23 | plt.close() | ||
24 | |||
25 | # -- PARSER | ||
26 | parser = argparse.ArgumentParser(description="") | ||
27 | parser.add_argument("expdir", type=str, help="Directory of experiment") | ||
28 | parser.add_argument("--measurefile", type=str, default="measures.json", help="Measure file it searchs in folders") | ||
29 | parser.add_argument("--suffix", type=str, default="", help="suffix of saved files") | ||
30 | |||
31 | args = parser.parse_args() | ||
32 | EXP_DIR = args.expdir | ||
33 | MEASURE_FILE=args.measurefile | ||
34 | SUFFIX = args.suffix | ||
35 | |||
36 | #EXP_DIR="exp/kmeans_teacher_1/pvector-1" | ||
37 | RESULTS_DIR=os.path.join(EXP_DIR, "res") | ||
38 | |||
39 | # -- CONFIG | ||
40 | kmin = 2 | ||
41 | kmax = 100 | ||
42 | |||
43 | |||
44 | # -- CREATE FOLDER | ||
45 | if not os.path.exists(RESULTS_DIR): | ||
46 | os.makedirs(RESULTS_DIR) | ||
47 | |||
48 | # -- BEGIN REGROUPMENT | ||
49 | |||
50 | subsets = ["train", "val"] | ||
51 | |||
52 | disequilibriums = [] | ||
53 | |||
54 | def init_measures(): | ||
55 | measures = {} | ||
56 | |||
57 | for subset in subsets: | ||
58 | measures[subset] = {} | ||
59 | measures[subset]["entropy"] = [] | ||
60 | measures[subset]["vscore"] = [] | ||
61 | measures[subset]["homogeneity"] = [] | ||
62 | measures[subset]["completeness"] = [] | ||
63 | return measures | ||
64 | |||
65 | measures = init_measures() | ||
66 | |||
67 | for kfold in range(1, 5): | ||
68 | print(kfold) | ||
69 | for k in range(kmin, kmax+1): | ||
70 | measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE) | ||
71 | with open(measures_file, 'r') as f: | ||
72 | meas_data = json.load(f) | ||
73 | disequilibriums.append(meas_data["disequilibrium"]) | ||
74 | for subset in subsets: | ||
75 | measures[subset]["entropy"].append(meas_data[subset]["entropy"]) | ||
76 | measures[subset]["vscore"].append(meas_data[subset]["vscore"]) | ||
77 | measures[subset]["homogeneity"].append(meas_data[subset]["homogeneity"]) | ||
78 | measures[subset]["completeness"].append(meas_data[subset]["completeness"]) | ||
79 | for subset in subsets: | ||
80 | plot_values_clusters( | ||
81 | os.path.join(RESULTS_DIR, "entropy_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | ||
82 | measures[subset]["entropy"], | ||
83 | "Entropy " + str(subset) + " set " + str(kfold), | ||
84 | "N clusters", | ||
85 | "Entropy") | ||
86 | plot_values_clusters( | ||
87 | os.path.join(RESULTS_DIR, "vscore_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | ||
88 | measures[subset]["vscore"], | ||
89 | "Vscore " + str(subset) + " set " + str(kfold), | ||
90 | "N clusters", | ||
91 | "Vscore") | ||
92 | plot_values_clusters( | ||
93 | os.path.join(RESULTS_DIR, "homogeneity_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | ||
94 | measures[subset]["homogeneity"], | ||
95 | "Homogeneity " + str(subset) + " set " + str(kfold), | ||
96 | "N clusters", | ||
97 | "Homogeneity") | ||
98 | plot_values_clusters( | ||
99 | os.path.join(RESULTS_DIR, "completeness_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | ||
100 | measures[subset]["completeness"], | ||
101 | "Completeness " + str(subset) + " set " + str(kfold), | ||
102 | "N clusters", | ||
103 | "Completeness") | ||
104 | plot_values_clusters( | ||
105 | os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"), | ||
106 | disequilibriums, | ||
107 | "Disequilibrium set " + str(kfold), | ||
108 | "N clusters", | ||
109 | "Disequilibrium") | ||
110 | |||
111 | measures = init_measures() | ||
112 | disequilibriums = [] | ||
113 |