Commit ce4a6b1b9e5427788566e32d972b5eec252050a4
1 parent
80c28a0a27
Exists in
master
Plot 4 figures in same with all the measures for k = 2 to 100
Showing 1 changed file with 112 additions and 71 deletions Side-by-side Diff
bin/regroup-measures.py
1 | 1 | ''' |
2 | 2 | Regroup results into one file and a plot. |
3 | +TODO: Mettre en valeur les valeurs maximales | |
4 | +TODO: Sauvegarder les valeurs quelques part pour qu'on puisse facilement les retrouver. | |
5 | + | |
3 | 6 | ''' |
4 | 7 | |
5 | 8 | import numpy as np |
6 | 9 | |
7 | 10 | |
8 | 11 | |
9 | 12 | |
... | ... | @@ -9,32 +12,46 @@ |
9 | 12 | import json |
10 | 13 | |
11 | 14 | |
12 | -def plot_values_clusters(filepath, values, title, xlabel, ylabel): | |
13 | - values = np.asarray(values) | |
14 | - x = np.arange(len(values)) + 2 | |
15 | - x_ticks = np.arange(len(values), step=5) + 2 | |
16 | - y = values | |
17 | - plt.scatter(x, y) | |
18 | - plt.xticks(x_ticks) | |
19 | - plt.title(title) | |
20 | - plt.xlabel(xlabel) | |
21 | - plt.ylabel(ylabel) | |
22 | - plt.savefig(filepath) | |
23 | - plt.close() | |
15 | +def plot_values_clusters(values, title, xlabel, ylabel): | |
16 | + values = np.asarray(values) | |
17 | + x = np.arange(len(values)) + 2 | |
18 | + x_ticks = np.arange(len(values), step=10) + 2 | |
19 | + y = values | |
20 | + plt.scatter(x, y, s=1) | |
21 | + plt.xticks(x_ticks) | |
22 | + plt.title(title) | |
23 | + plt.xlabel(xlabel) | |
24 | + plt.ylabel(ylabel) | |
24 | 25 | |
26 | + | |
27 | +def save_plot(filepath): | |
28 | + plt.savefig(filepath) | |
29 | + plt.close() | |
30 | + | |
31 | + | |
32 | +def save_results(outfile, measures, titles): | |
33 | + with open(outfile, "w") as f: | |
34 | + f.write(",".join(titles) + "\n") | |
35 | + n = len(measures[0]) | |
36 | + for i in range(n): | |
37 | + f.write(",".join([str(ms[i]) for ms in measures]) + "\n") | |
38 | + | |
39 | + | |
25 | 40 | # -- PARSER |
26 | 41 | parser = argparse.ArgumentParser(description="") |
27 | 42 | parser.add_argument("expdir", type=str, help="Directory of experiment") |
28 | -parser.add_argument("--measurefile", type=str, default="measures.json", help="Measure file it searchs in folders") | |
29 | -parser.add_argument("--suffix", type=str, default="", help="suffix of saved files") | |
43 | +parser.add_argument("--measurefile", type=str, default="measures.json", | |
44 | + help="Measure file it searchs in folders") | |
45 | +parser.add_argument("--suffix", type=str, default="", | |
46 | + help="suffix of saved files") | |
30 | 47 | |
31 | 48 | args = parser.parse_args() |
32 | 49 | EXP_DIR = args.expdir |
33 | -MEASURE_FILE=args.measurefile | |
50 | +MEASURE_FILE = args.measurefile | |
34 | 51 | SUFFIX = args.suffix |
35 | 52 | |
36 | -#EXP_DIR="exp/kmeans_teacher_1/pvector-1" | |
37 | -RESULTS_DIR=os.path.join(EXP_DIR, "res") | |
53 | +# EXP_DIR="exp/kmeans_teacher_1/pvector-1" | |
54 | +RESULTS_DIR = os.path.join(EXP_DIR, "res") | |
38 | 55 | |
39 | 56 | # -- CONFIG |
40 | 57 | kmin = 2 |
... | ... | @@ -43,7 +60,7 @@ |
43 | 60 | |
44 | 61 | # -- CREATE FOLDER |
45 | 62 | if not os.path.exists(RESULTS_DIR): |
46 | - os.makedirs(RESULTS_DIR) | |
63 | + os.makedirs(RESULTS_DIR) | |
47 | 64 | |
48 | 65 | # -- BEGIN REGROUPMENT |
49 | 66 | |
50 | 67 | |
51 | 68 | |
52 | 69 | |
53 | 70 | |
... | ... | @@ -51,63 +68,87 @@ |
51 | 68 | |
52 | 69 | disequilibriums = [] |
53 | 70 | |
71 | + | |
54 | 72 | def init_measures(): |
55 | - measures = {} | |
73 | + measures = {} | |
56 | 74 | |
57 | - for subset in subsets: | |
58 | - measures[subset] = {} | |
59 | - measures[subset]["entropy"] = [] | |
60 | - measures[subset]["vscore"] = [] | |
61 | - measures[subset]["homogeneity"] = [] | |
62 | - measures[subset]["completeness"] = [] | |
63 | - return measures | |
75 | + for subset in subsets: | |
76 | + measures[subset] = {} | |
77 | + measures[subset]["entropy"] = [] | |
78 | + measures[subset]["vscore"] = [] | |
79 | + measures[subset]["homogeneity"] = [] | |
80 | + measures[subset]["completeness"] = [] | |
81 | + return measures | |
64 | 82 | |
83 | + | |
65 | 84 | measures = init_measures() |
66 | 85 | |
67 | 86 | for kfold in range(1, 5): |
68 | - print(kfold) | |
69 | - for k in range(kmin, kmax+1): | |
70 | - measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE) | |
71 | - with open(measures_file, 'r') as f: | |
72 | - meas_data = json.load(f) | |
73 | - disequilibriums.append(meas_data["disequilibrium"]) | |
74 | - for subset in subsets: | |
75 | - measures[subset]["entropy"].append(meas_data[subset]["entropy"]) | |
76 | - measures[subset]["vscore"].append(meas_data[subset]["vscore"]) | |
77 | - measures[subset]["homogeneity"].append(meas_data[subset]["homogeneity"]) | |
78 | - measures[subset]["completeness"].append(meas_data[subset]["completeness"]) | |
79 | - for subset in subsets: | |
80 | - plot_values_clusters( | |
81 | - os.path.join(RESULTS_DIR, "entropy_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | |
82 | - measures[subset]["entropy"], | |
83 | - "Entropy " + str(subset) + " set " + str(kfold), | |
84 | - "N clusters", | |
85 | - "Entropy") | |
86 | - plot_values_clusters( | |
87 | - os.path.join(RESULTS_DIR, "vscore_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | |
88 | - measures[subset]["vscore"], | |
89 | - "Vscore " + str(subset) + " set " + str(kfold), | |
90 | - "N clusters", | |
91 | - "Vscore") | |
92 | - plot_values_clusters( | |
93 | - os.path.join(RESULTS_DIR, "homogeneity_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | |
94 | - measures[subset]["homogeneity"], | |
95 | - "Homogeneity " + str(subset) + " set " + str(kfold), | |
96 | - "N clusters", | |
97 | - "Homogeneity") | |
98 | - plot_values_clusters( | |
99 | - os.path.join(RESULTS_DIR, "completeness_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | |
100 | - measures[subset]["completeness"], | |
101 | - "Completeness " + str(subset) + " set " + str(kfold), | |
102 | - "N clusters", | |
103 | - "Completeness") | |
104 | - plot_values_clusters( | |
105 | - os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"), | |
106 | - disequilibriums, | |
107 | - "Disequilibrium set " + str(kfold), | |
108 | - "N clusters", | |
109 | - "Disequilibrium") | |
110 | - | |
111 | - measures = init_measures() | |
112 | - disequilibriums = [] | |
87 | + print("Regrouping on kfold: " + str(kfold)) | |
88 | + # -- REGROUP MEASURES INTO LISTS | |
89 | + for k in range(kmin, kmax+1): | |
90 | + measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE) | |
91 | + with open(measures_file, 'r') as f: | |
92 | + meas_data = json.load(f) | |
93 | + disequilibriums.append(meas_data["disequilibrium"]) | |
94 | + for subset in subsets: | |
95 | + measures[subset]["entropy"].append( | |
96 | + meas_data[subset]["entropy"]) | |
97 | + measures[subset]["vscore"].append( | |
98 | + meas_data[subset]["vscore"]) | |
99 | + measures[subset]["homogeneity"].append( | |
100 | + meas_data[subset]["homogeneity"]) | |
101 | + measures[subset]["completeness"].append( | |
102 | + meas_data[subset]["completeness"]) | |
103 | + | |
104 | + # -- PLOT AND SAVE MEASURES FOR A SPECIFIC SUBSET | |
105 | + for subset in subsets: | |
106 | + # Plot all measures | |
107 | + outf = "measures_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf" | |
108 | + | |
109 | + fig = plt.figure(1) | |
110 | + for i, measure in enumerate(measures[subset]): | |
111 | + | |
112 | + plt.subplot(220 + i + 1) | |
113 | + | |
114 | + plot_values_clusters( | |
115 | + measures[subset][measure], | |
116 | + measure.capitalize() + " " + str(subset) + " set " + str(kfold), | |
117 | + "N clusters", | |
118 | + measure.capitalize()) | |
119 | + plt.subplots_adjust(hspace=0.5, wspace=0.3) | |
120 | + save_plot(os.path.join(RESULTS_DIR, outf)) | |
121 | + | |
122 | + # Save all measures on a csv file | |
123 | + save_results( | |
124 | + os.path.join(RESULTS_DIR, "measures_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".csv"), | |
125 | + [ | |
126 | + measures[subset]["entropy"], | |
127 | + measures[subset]["homogeneity"], | |
128 | + measures[subset]["completeness"], | |
129 | + measures[subset]["vscore"] | |
130 | + ], | |
131 | + [ | |
132 | + "entropy", | |
133 | + "homogeneity", | |
134 | + "completeness", | |
135 | + "vscore" | |
136 | + ] | |
137 | + ) | |
138 | + | |
139 | + # PLOT AND SAVE FOR DISEQUILIBRIUM | |
140 | + plot_values_clusters( | |
141 | + disequilibriums, | |
142 | + "Disequilibrium set " + str(kfold), | |
143 | + "N clusters", | |
144 | + "Disequilibrium") | |
145 | + save_plot(os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf")) | |
146 | + | |
147 | + save_results( | |
148 | + os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".csv"), | |
149 | + [disequilibriums], | |
150 | + ["disequilibrium"]) | |
151 | + | |
152 | + measures = init_measures() | |
153 | + disequilibriums = [] |