Commit ce4a6b1b9e5427788566e32d972b5eec252050a4

Authored by Mathias Quillot
1 parent 80c28a0a27
Exists in master

Plot 4 figures in same with all the measures for k = 2 to 100

Showing 1 changed file with 112 additions and 71 deletions Side-by-side Diff

bin/regroup-measures.py
1 1 '''
2 2 Regroup results into one file and a plot.
  3 +TODO: Mettre en valeur les valeurs maximales
  4 +TODO: Sauvegarder les valeurs quelques part pour qu'on puisse facilement les retrouver.
  5 +
3 6 '''
4 7  
5 8 import numpy as np
6 9  
7 10  
8 11  
9 12  
... ... @@ -9,32 +12,46 @@
9 12 import json
10 13  
11 14  
12   -def plot_values_clusters(filepath, values, title, xlabel, ylabel):
13   - values = np.asarray(values)
14   - x = np.arange(len(values)) + 2
15   - x_ticks = np.arange(len(values), step=5) + 2
16   - y = values
17   - plt.scatter(x, y)
18   - plt.xticks(x_ticks)
19   - plt.title(title)
20   - plt.xlabel(xlabel)
21   - plt.ylabel(ylabel)
22   - plt.savefig(filepath)
23   - plt.close()
  15 +def plot_values_clusters(values, title, xlabel, ylabel):
  16 + values = np.asarray(values)
  17 + x = np.arange(len(values)) + 2
  18 + x_ticks = np.arange(len(values), step=10) + 2
  19 + y = values
  20 + plt.scatter(x, y, s=1)
  21 + plt.xticks(x_ticks)
  22 + plt.title(title)
  23 + plt.xlabel(xlabel)
  24 + plt.ylabel(ylabel)
24 25  
  26 +
  27 +def save_plot(filepath):
  28 + plt.savefig(filepath)
  29 + plt.close()
  30 +
  31 +
  32 +def save_results(outfile, measures, titles):
  33 + with open(outfile, "w") as f:
  34 + f.write(",".join(titles) + "\n")
  35 + n = len(measures[0])
  36 + for i in range(n):
  37 + f.write(",".join([str(ms[i]) for ms in measures]) + "\n")
  38 +
  39 +
25 40 # -- PARSER
26 41 parser = argparse.ArgumentParser(description="")
27 42 parser.add_argument("expdir", type=str, help="Directory of experiment")
28   -parser.add_argument("--measurefile", type=str, default="measures.json", help="Measure file it searchs in folders")
29   -parser.add_argument("--suffix", type=str, default="", help="suffix of saved files")
  43 +parser.add_argument("--measurefile", type=str, default="measures.json",
  44 + help="Measure file it searchs in folders")
  45 +parser.add_argument("--suffix", type=str, default="",
  46 + help="suffix of saved files")
30 47  
31 48 args = parser.parse_args()
32 49 EXP_DIR = args.expdir
33   -MEASURE_FILE=args.measurefile
  50 +MEASURE_FILE = args.measurefile
34 51 SUFFIX = args.suffix
35 52  
36   -#EXP_DIR="exp/kmeans_teacher_1/pvector-1"
37   -RESULTS_DIR=os.path.join(EXP_DIR, "res")
  53 +# EXP_DIR="exp/kmeans_teacher_1/pvector-1"
  54 +RESULTS_DIR = os.path.join(EXP_DIR, "res")
38 55  
39 56 # -- CONFIG
40 57 kmin = 2
... ... @@ -43,7 +60,7 @@
43 60  
44 61 # -- CREATE FOLDER
45 62 if not os.path.exists(RESULTS_DIR):
46   - os.makedirs(RESULTS_DIR)
  63 + os.makedirs(RESULTS_DIR)
47 64  
48 65 # -- BEGIN REGROUPMENT
49 66  
50 67  
51 68  
52 69  
53 70  
... ... @@ -51,63 +68,87 @@
51 68  
52 69 disequilibriums = []
53 70  
  71 +
54 72 def init_measures():
55   - measures = {}
  73 + measures = {}
56 74  
57   - for subset in subsets:
58   - measures[subset] = {}
59   - measures[subset]["entropy"] = []
60   - measures[subset]["vscore"] = []
61   - measures[subset]["homogeneity"] = []
62   - measures[subset]["completeness"] = []
63   - return measures
  75 + for subset in subsets:
  76 + measures[subset] = {}
  77 + measures[subset]["entropy"] = []
  78 + measures[subset]["vscore"] = []
  79 + measures[subset]["homogeneity"] = []
  80 + measures[subset]["completeness"] = []
  81 + return measures
64 82  
  83 +
65 84 measures = init_measures()
66 85  
67 86 for kfold in range(1, 5):
68   - print(kfold)
69   - for k in range(kmin, kmax+1):
70   - measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE)
71   - with open(measures_file, 'r') as f:
72   - meas_data = json.load(f)
73   - disequilibriums.append(meas_data["disequilibrium"])
74   - for subset in subsets:
75   - measures[subset]["entropy"].append(meas_data[subset]["entropy"])
76   - measures[subset]["vscore"].append(meas_data[subset]["vscore"])
77   - measures[subset]["homogeneity"].append(meas_data[subset]["homogeneity"])
78   - measures[subset]["completeness"].append(meas_data[subset]["completeness"])
79   - for subset in subsets:
80   - plot_values_clusters(
81   - os.path.join(RESULTS_DIR, "entropy_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
82   - measures[subset]["entropy"],
83   - "Entropy " + str(subset) + " set " + str(kfold),
84   - "N clusters",
85   - "Entropy")
86   - plot_values_clusters(
87   - os.path.join(RESULTS_DIR, "vscore_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
88   - measures[subset]["vscore"],
89   - "Vscore " + str(subset) + " set " + str(kfold),
90   - "N clusters",
91   - "Vscore")
92   - plot_values_clusters(
93   - os.path.join(RESULTS_DIR, "homogeneity_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
94   - measures[subset]["homogeneity"],
95   - "Homogeneity " + str(subset) + " set " + str(kfold),
96   - "N clusters",
97   - "Homogeneity")
98   - plot_values_clusters(
99   - os.path.join(RESULTS_DIR, "completeness_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
100   - measures[subset]["completeness"],
101   - "Completeness " + str(subset) + " set " + str(kfold),
102   - "N clusters",
103   - "Completeness")
104   - plot_values_clusters(
105   - os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"),
106   - disequilibriums,
107   - "Disequilibrium set " + str(kfold),
108   - "N clusters",
109   - "Disequilibrium")
110   -
111   - measures = init_measures()
112   - disequilibriums = []
  87 + print("Regrouping on kfold: " + str(kfold))
  88 + # -- REGROUP MEASURES INTO LISTS
  89 + for k in range(kmin, kmax+1):
  90 + measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE)
  91 + with open(measures_file, 'r') as f:
  92 + meas_data = json.load(f)
  93 + disequilibriums.append(meas_data["disequilibrium"])
  94 + for subset in subsets:
  95 + measures[subset]["entropy"].append(
  96 + meas_data[subset]["entropy"])
  97 + measures[subset]["vscore"].append(
  98 + meas_data[subset]["vscore"])
  99 + measures[subset]["homogeneity"].append(
  100 + meas_data[subset]["homogeneity"])
  101 + measures[subset]["completeness"].append(
  102 + meas_data[subset]["completeness"])
  103 +
  104 + # -- PLOT AND SAVE MEASURES FOR A SPECIFIC SUBSET
  105 + for subset in subsets:
  106 + # Plot all measures
  107 + outf = "measures_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"
  108 +
  109 + fig = plt.figure(1)
  110 + for i, measure in enumerate(measures[subset]):
  111 +
  112 + plt.subplot(220 + i + 1)
  113 +
  114 + plot_values_clusters(
  115 + measures[subset][measure],
  116 + measure.capitalize() + " " + str(subset) + " set " + str(kfold),
  117 + "N clusters",
  118 + measure.capitalize())
  119 + plt.subplots_adjust(hspace=0.5, wspace=0.3)
  120 + save_plot(os.path.join(RESULTS_DIR, outf))
  121 +
  122 + # Save all measures on a csv file
  123 + save_results(
  124 + os.path.join(RESULTS_DIR, "measures_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".csv"),
  125 + [
  126 + measures[subset]["entropy"],
  127 + measures[subset]["homogeneity"],
  128 + measures[subset]["completeness"],
  129 + measures[subset]["vscore"]
  130 + ],
  131 + [
  132 + "entropy",
  133 + "homogeneity",
  134 + "completeness",
  135 + "vscore"
  136 + ]
  137 + )
  138 +
  139 + # PLOT AND SAVE FOR DISEQUILIBRIUM
  140 + plot_values_clusters(
  141 + disequilibriums,
  142 + "Disequilibrium set " + str(kfold),
  143 + "N clusters",
  144 + "Disequilibrium")
  145 + save_plot(os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"))
  146 +
  147 + save_results(
  148 + os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".csv"),
  149 + [disequilibriums],
  150 + ["disequilibrium"])
  151 +
  152 + measures = init_measures()
  153 + disequilibriums = []