Commit ce4a6b1b9e5427788566e32d972b5eec252050a4
1 parent
80c28a0a27
Exists in
master
Plot 4 figures in same with all the measures for k = 2 to 100
Showing 1 changed file with 112 additions and 71 deletions Inline Diff
bin/regroup-measures.py
1 | ''' | 1 | ''' |
2 | Regroup results into one file and a plot. | 2 | Regroup results into one file and a plot. |
3 | TODO: Mettre en valeur les valeurs maximales | ||
4 | TODO: Sauvegarder les valeurs quelques part pour qu'on puisse facilement les retrouver. | ||
5 | |||
3 | ''' | 6 | ''' |
4 | 7 | ||
5 | import numpy as np | 8 | import numpy as np |
6 | import matplotlib.pyplot as plt | 9 | import matplotlib.pyplot as plt |
7 | import argparse | 10 | import argparse |
8 | import os | 11 | import os |
9 | import json | 12 | import json |
10 | 13 | ||
11 | 14 | ||
12 | def plot_values_clusters(filepath, values, title, xlabel, ylabel): | 15 | def plot_values_clusters(values, title, xlabel, ylabel): |
13 | values = np.asarray(values) | 16 | values = np.asarray(values) |
14 | x = np.arange(len(values)) + 2 | 17 | x = np.arange(len(values)) + 2 |
15 | x_ticks = np.arange(len(values), step=5) + 2 | 18 | x_ticks = np.arange(len(values), step=10) + 2 |
16 | y = values | 19 | y = values |
17 | plt.scatter(x, y) | 20 | plt.scatter(x, y, s=1) |
18 | plt.xticks(x_ticks) | 21 | plt.xticks(x_ticks) |
19 | plt.title(title) | 22 | plt.title(title) |
20 | plt.xlabel(xlabel) | 23 | plt.xlabel(xlabel) |
21 | plt.ylabel(ylabel) | 24 | plt.ylabel(ylabel) |
22 | plt.savefig(filepath) | ||
23 | plt.close() | ||
24 | 25 | ||
26 | |||
27 | def save_plot(filepath): | ||
28 | plt.savefig(filepath) | ||
29 | plt.close() | ||
30 | |||
31 | |||
32 | def save_results(outfile, measures, titles): | ||
33 | with open(outfile, "w") as f: | ||
34 | f.write(",".join(titles) + "\n") | ||
35 | n = len(measures[0]) | ||
36 | for i in range(n): | ||
37 | f.write(",".join([str(ms[i]) for ms in measures]) + "\n") | ||
38 | |||
39 | |||
25 | # -- PARSER | 40 | # -- PARSER |
26 | parser = argparse.ArgumentParser(description="") | 41 | parser = argparse.ArgumentParser(description="") |
27 | parser.add_argument("expdir", type=str, help="Directory of experiment") | 42 | parser.add_argument("expdir", type=str, help="Directory of experiment") |
28 | parser.add_argument("--measurefile", type=str, default="measures.json", help="Measure file it searchs in folders") | 43 | parser.add_argument("--measurefile", type=str, default="measures.json", |
29 | parser.add_argument("--suffix", type=str, default="", help="suffix of saved files") | 44 | help="Measure file it searchs in folders") |
45 | parser.add_argument("--suffix", type=str, default="", | ||
46 | help="suffix of saved files") | ||
30 | 47 | ||
31 | args = parser.parse_args() | 48 | args = parser.parse_args() |
32 | EXP_DIR = args.expdir | 49 | EXP_DIR = args.expdir |
33 | MEASURE_FILE=args.measurefile | 50 | MEASURE_FILE = args.measurefile |
34 | SUFFIX = args.suffix | 51 | SUFFIX = args.suffix |
35 | 52 | ||
36 | #EXP_DIR="exp/kmeans_teacher_1/pvector-1" | 53 | # EXP_DIR="exp/kmeans_teacher_1/pvector-1" |
37 | RESULTS_DIR=os.path.join(EXP_DIR, "res") | 54 | RESULTS_DIR = os.path.join(EXP_DIR, "res") |
38 | 55 | ||
39 | # -- CONFIG | 56 | # -- CONFIG |
40 | kmin = 2 | 57 | kmin = 2 |
41 | kmax = 100 | 58 | kmax = 100 |
42 | 59 | ||
43 | 60 | ||
44 | # -- CREATE FOLDER | 61 | # -- CREATE FOLDER |
45 | if not os.path.exists(RESULTS_DIR): | 62 | if not os.path.exists(RESULTS_DIR): |
46 | os.makedirs(RESULTS_DIR) | 63 | os.makedirs(RESULTS_DIR) |
47 | 64 | ||
48 | # -- BEGIN REGROUPMENT | 65 | # -- BEGIN REGROUPMENT |
49 | 66 | ||
50 | subsets = ["train", "val"] | 67 | subsets = ["train", "val"] |
51 | 68 | ||
52 | disequilibriums = [] | 69 | disequilibriums = [] |
53 | 70 | ||
71 | |||
54 | def init_measures(): | 72 | def init_measures(): |
55 | measures = {} | 73 | measures = {} |
56 | 74 | ||
57 | for subset in subsets: | 75 | for subset in subsets: |
58 | measures[subset] = {} | 76 | measures[subset] = {} |
59 | measures[subset]["entropy"] = [] | 77 | measures[subset]["entropy"] = [] |
60 | measures[subset]["vscore"] = [] | 78 | measures[subset]["vscore"] = [] |
61 | measures[subset]["homogeneity"] = [] | 79 | measures[subset]["homogeneity"] = [] |
62 | measures[subset]["completeness"] = [] | 80 | measures[subset]["completeness"] = [] |
63 | return measures | 81 | return measures |
64 | 82 | ||
83 | |||
65 | measures = init_measures() | 84 | measures = init_measures() |
66 | 85 | ||
67 | for kfold in range(1, 5): | 86 | for kfold in range(1, 5): |
68 | print(kfold) | 87 | print("Regrouping on kfold: " + str(kfold)) |
69 | for k in range(kmin, kmax+1): | 88 | # -- REGROUP MEASURES INTO LISTS |
70 | measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE) | 89 | for k in range(kmin, kmax+1): |
71 | with open(measures_file, 'r') as f: | 90 | measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE) |
72 | meas_data = json.load(f) | 91 | with open(measures_file, 'r') as f: |
73 | disequilibriums.append(meas_data["disequilibrium"]) | 92 | meas_data = json.load(f) |
74 | for subset in subsets: | 93 | disequilibriums.append(meas_data["disequilibrium"]) |
75 | measures[subset]["entropy"].append(meas_data[subset]["entropy"]) | 94 | for subset in subsets: |
76 | measures[subset]["vscore"].append(meas_data[subset]["vscore"]) | 95 | measures[subset]["entropy"].append( |
77 | measures[subset]["homogeneity"].append(meas_data[subset]["homogeneity"]) | 96 | meas_data[subset]["entropy"]) |
78 | measures[subset]["completeness"].append(meas_data[subset]["completeness"]) | 97 | measures[subset]["vscore"].append( |
79 | for subset in subsets: | 98 | meas_data[subset]["vscore"]) |
80 | plot_values_clusters( | 99 | measures[subset]["homogeneity"].append( |
81 | os.path.join(RESULTS_DIR, "entropy_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | 100 | meas_data[subset]["homogeneity"]) |
82 | measures[subset]["entropy"], | 101 | measures[subset]["completeness"].append( |
83 | "Entropy " + str(subset) + " set " + str(kfold), | 102 | meas_data[subset]["completeness"]) |
84 | "N clusters", | 103 | |
85 | "Entropy") | 104 | # -- PLOT AND SAVE MEASURES FOR A SPECIFIC SUBSET |
86 | plot_values_clusters( | 105 | for subset in subsets: |
87 | os.path.join(RESULTS_DIR, "vscore_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | 106 | # Plot all measures |
88 | measures[subset]["vscore"], | 107 | outf = "measures_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf" |
89 | "Vscore " + str(subset) + " set " + str(kfold), | 108 | |
90 | "N clusters", | 109 | fig = plt.figure(1) |
91 | "Vscore") | 110 | for i, measure in enumerate(measures[subset]): |
92 | plot_values_clusters( | 111 | |
93 | os.path.join(RESULTS_DIR, "homogeneity_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | 112 | plt.subplot(220 + i + 1) |
94 | measures[subset]["homogeneity"], | 113 | |
95 | "Homogeneity " + str(subset) + " set " + str(kfold), | 114 | plot_values_clusters( |
96 | "N clusters", | 115 | measures[subset][measure], |
97 | "Homogeneity") | 116 | measure.capitalize() + " " + str(subset) + " set " + str(kfold), |
98 | plot_values_clusters( | 117 | "N clusters", |
99 | os.path.join(RESULTS_DIR, "completeness_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), | 118 | measure.capitalize()) |
100 | measures[subset]["completeness"], | 119 | plt.subplots_adjust(hspace=0.5, wspace=0.3) |
101 | "Completeness " + str(subset) + " set " + str(kfold), | 120 | save_plot(os.path.join(RESULTS_DIR, outf)) |
102 | "N clusters", | 121 | |
103 | "Completeness") | 122 | # Save all measures on a csv file |
104 | plot_values_clusters( | 123 | save_results( |
105 | os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"), | 124 | os.path.join(RESULTS_DIR, "measures_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".csv"), |
106 | disequilibriums, | 125 | [ |
107 | "Disequilibrium set " + str(kfold), | 126 | measures[subset]["entropy"], |
108 | "N clusters", | 127 | measures[subset]["homogeneity"], |
109 | "Disequilibrium") | 128 | measures[subset]["completeness"], |
110 | 129 | measures[subset]["vscore"] | |
111 | measures = init_measures() | 130 | ], |
112 | disequilibriums = [] | 131 | [ |
132 | "entropy", | ||
133 | "homogeneity", | ||
134 | "completeness", | ||
135 | "vscore" | ||
136 | ] | ||
137 | ) | ||
138 | |||
139 | # PLOT AND SAVE FOR DISEQUILIBRIUM | ||
140 | plot_values_clusters( | ||
141 | disequilibriums, | ||
142 | "Disequilibrium set " + str(kfold), | ||
143 | "N clusters", | ||
144 | "Disequilibrium") | ||
145 | save_plot(os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf")) | ||
146 | |||
147 | save_results( | ||
148 | os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".csv"), | ||
149 | [disequilibriums], | ||
150 | ["disequilibrium"]) | ||
151 | |||
152 | measures = init_measures() |