Commit ce4a6b1b9e5427788566e32d972b5eec252050a4

Authored by Mathias Quillot
1 parent 80c28a0a27
Exists in master

Plot 4 figures in same with all the measures for k = 2 to 100

Showing 1 changed file with 112 additions and 71 deletions Inline Diff

bin/regroup-measures.py
1 ''' 1 '''
2 Regroup results into one file and a plot. 2 Regroup results into one file and a plot.
3 TODO: Mettre en valeur les valeurs maximales
4 TODO: Sauvegarder les valeurs quelques part pour qu'on puisse facilement les retrouver.
5
3 ''' 6 '''
4 7
5 import numpy as np 8 import numpy as np
6 import matplotlib.pyplot as plt 9 import matplotlib.pyplot as plt
7 import argparse 10 import argparse
8 import os 11 import os
9 import json 12 import json
10 13
11 14
12 def plot_values_clusters(filepath, values, title, xlabel, ylabel): 15 def plot_values_clusters(values, title, xlabel, ylabel):
13 values = np.asarray(values) 16 values = np.asarray(values)
14 x = np.arange(len(values)) + 2 17 x = np.arange(len(values)) + 2
15 x_ticks = np.arange(len(values), step=5) + 2 18 x_ticks = np.arange(len(values), step=10) + 2
16 y = values 19 y = values
17 plt.scatter(x, y) 20 plt.scatter(x, y, s=1)
18 plt.xticks(x_ticks) 21 plt.xticks(x_ticks)
19 plt.title(title) 22 plt.title(title)
20 plt.xlabel(xlabel) 23 plt.xlabel(xlabel)
21 plt.ylabel(ylabel) 24 plt.ylabel(ylabel)
22 plt.savefig(filepath)
23 plt.close()
24 25
26
27 def save_plot(filepath):
28 plt.savefig(filepath)
29 plt.close()
30
31
32 def save_results(outfile, measures, titles):
33 with open(outfile, "w") as f:
34 f.write(",".join(titles) + "\n")
35 n = len(measures[0])
36 for i in range(n):
37 f.write(",".join([str(ms[i]) for ms in measures]) + "\n")
38
39
25 # -- PARSER 40 # -- PARSER
26 parser = argparse.ArgumentParser(description="") 41 parser = argparse.ArgumentParser(description="")
27 parser.add_argument("expdir", type=str, help="Directory of experiment") 42 parser.add_argument("expdir", type=str, help="Directory of experiment")
28 parser.add_argument("--measurefile", type=str, default="measures.json", help="Measure file it searchs in folders") 43 parser.add_argument("--measurefile", type=str, default="measures.json",
29 parser.add_argument("--suffix", type=str, default="", help="suffix of saved files") 44 help="Measure file it searchs in folders")
45 parser.add_argument("--suffix", type=str, default="",
46 help="suffix of saved files")
30 47
31 args = parser.parse_args() 48 args = parser.parse_args()
32 EXP_DIR = args.expdir 49 EXP_DIR = args.expdir
33 MEASURE_FILE=args.measurefile 50 MEASURE_FILE = args.measurefile
34 SUFFIX = args.suffix 51 SUFFIX = args.suffix
35 52
36 #EXP_DIR="exp/kmeans_teacher_1/pvector-1" 53 # EXP_DIR="exp/kmeans_teacher_1/pvector-1"
37 RESULTS_DIR=os.path.join(EXP_DIR, "res") 54 RESULTS_DIR = os.path.join(EXP_DIR, "res")
38 55
39 # -- CONFIG 56 # -- CONFIG
40 kmin = 2 57 kmin = 2
41 kmax = 100 58 kmax = 100
42 59
43 60
44 # -- CREATE FOLDER 61 # -- CREATE FOLDER
45 if not os.path.exists(RESULTS_DIR): 62 if not os.path.exists(RESULTS_DIR):
46 os.makedirs(RESULTS_DIR) 63 os.makedirs(RESULTS_DIR)
47 64
48 # -- BEGIN REGROUPMENT 65 # -- BEGIN REGROUPMENT
49 66
50 subsets = ["train", "val"] 67 subsets = ["train", "val"]
51 68
52 disequilibriums = [] 69 disequilibriums = []
53 70
71
54 def init_measures(): 72 def init_measures():
55 measures = {} 73 measures = {}
56 74
57 for subset in subsets: 75 for subset in subsets:
58 measures[subset] = {} 76 measures[subset] = {}
59 measures[subset]["entropy"] = [] 77 measures[subset]["entropy"] = []
60 measures[subset]["vscore"] = [] 78 measures[subset]["vscore"] = []
61 measures[subset]["homogeneity"] = [] 79 measures[subset]["homogeneity"] = []
62 measures[subset]["completeness"] = [] 80 measures[subset]["completeness"] = []
63 return measures 81 return measures
64 82
83
65 measures = init_measures() 84 measures = init_measures()
66 85
67 for kfold in range(1, 5): 86 for kfold in range(1, 5):
68 print(kfold) 87 print("Regrouping on kfold: " + str(kfold))
69 for k in range(kmin, kmax+1): 88 # -- REGROUP MEASURES INTO LISTS
70 measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE) 89 for k in range(kmin, kmax+1):
71 with open(measures_file, 'r') as f: 90 measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE)
72 meas_data = json.load(f) 91 with open(measures_file, 'r') as f:
73 disequilibriums.append(meas_data["disequilibrium"]) 92 meas_data = json.load(f)
74 for subset in subsets: 93 disequilibriums.append(meas_data["disequilibrium"])
75 measures[subset]["entropy"].append(meas_data[subset]["entropy"]) 94 for subset in subsets:
76 measures[subset]["vscore"].append(meas_data[subset]["vscore"]) 95 measures[subset]["entropy"].append(
77 measures[subset]["homogeneity"].append(meas_data[subset]["homogeneity"]) 96 meas_data[subset]["entropy"])
78 measures[subset]["completeness"].append(meas_data[subset]["completeness"]) 97 measures[subset]["vscore"].append(
79 for subset in subsets: 98 meas_data[subset]["vscore"])
80 plot_values_clusters( 99 measures[subset]["homogeneity"].append(
81 os.path.join(RESULTS_DIR, "entropy_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), 100 meas_data[subset]["homogeneity"])
82 measures[subset]["entropy"], 101 measures[subset]["completeness"].append(
83 "Entropy " + str(subset) + " set " + str(kfold), 102 meas_data[subset]["completeness"])
84 "N clusters", 103
85 "Entropy") 104 # -- PLOT AND SAVE MEASURES FOR A SPECIFIC SUBSET
86 plot_values_clusters( 105 for subset in subsets:
87 os.path.join(RESULTS_DIR, "vscore_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), 106 # Plot all measures
88 measures[subset]["vscore"], 107 outf = "measures_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"
89 "Vscore " + str(subset) + " set " + str(kfold), 108
90 "N clusters", 109 fig = plt.figure(1)
91 "Vscore") 110 for i, measure in enumerate(measures[subset]):
92 plot_values_clusters( 111
93 os.path.join(RESULTS_DIR, "homogeneity_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), 112 plt.subplot(220 + i + 1)
94 measures[subset]["homogeneity"], 113
95 "Homogeneity " + str(subset) + " set " + str(kfold), 114 plot_values_clusters(
96 "N clusters", 115 measures[subset][measure],
97 "Homogeneity") 116 measure.capitalize() + " " + str(subset) + " set " + str(kfold),
98 plot_values_clusters( 117 "N clusters",
99 os.path.join(RESULTS_DIR, "completeness_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), 118 measure.capitalize())
100 measures[subset]["completeness"], 119 plt.subplots_adjust(hspace=0.5, wspace=0.3)
101 "Completeness " + str(subset) + " set " + str(kfold), 120 save_plot(os.path.join(RESULTS_DIR, outf))
102 "N clusters", 121
103 "Completeness") 122 # Save all measures on a csv file
104 plot_values_clusters( 123 save_results(
105 os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"), 124 os.path.join(RESULTS_DIR, "measures_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".csv"),
106 disequilibriums, 125 [
107 "Disequilibrium set " + str(kfold), 126 measures[subset]["entropy"],
108 "N clusters", 127 measures[subset]["homogeneity"],
109 "Disequilibrium") 128 measures[subset]["completeness"],
110 129 measures[subset]["vscore"]
111 measures = init_measures() 130 ],
112 disequilibriums = [] 131 [
132 "entropy",
133 "homogeneity",
134 "completeness",
135 "vscore"
136 ]
137 )
138
139 # PLOT AND SAVE FOR DISEQUILIBRIUM
140 plot_values_clusters(
141 disequilibriums,
142 "Disequilibrium set " + str(kfold),
143 "N clusters",
144 "Disequilibrium")
145 save_plot(os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"))
146
147 save_results(
148 os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".csv"),
149 [disequilibriums],
150 ["disequilibrium"])
151
152 measures = init_measures()