Authored by Mathias Quillot
1 parent 80c28a0a27
Exists in

### Plot 4 figures in same with all the measures for k = 2 to 100

Showing 1 changed file with 112 additions and 71 deletions

bin/regroup-measures.py

 1 ''' 1 ''' 2 Regroup results into one file and a plot. 2 Regroup results into one file and a plot. 3 TODO: Mettre en valeur les valeurs maximales 4 TODO: Sauvegarder les valeurs quelques part pour qu'on puisse facilement les retrouver. 5 3 ''' 6 ''' 4 7 5 import numpy as np 8 import numpy as np 6 import matplotlib.pyplot as plt 9 import matplotlib.pyplot as plt 7 import argparse 10 import argparse 8 import os 11 import os 9 import json 12 import json 10 13 11 14 12 def plot_values_clusters(filepath, values, title, xlabel, ylabel): 15 def plot_values_clusters(values, title, xlabel, ylabel): 13 values = np.asarray(values) 16 values = np.asarray(values) 14 x = np.arange(len(values)) + 2 17 x = np.arange(len(values)) + 2 15 x_ticks = np.arange(len(values), step=5) + 2 18 x_ticks = np.arange(len(values), step=10) + 2 16 y = values 19 y = values 17 plt.scatter(x, y) 20 plt.scatter(x, y, s=1) 18 plt.xticks(x_ticks) 21 plt.xticks(x_ticks) 19 plt.title(title) 22 plt.title(title) 20 plt.xlabel(xlabel) 23 plt.xlabel(xlabel) 21 plt.ylabel(ylabel) 24 plt.ylabel(ylabel) 22 plt.savefig(filepath) 23 plt.close() 24 25 26 27 def save_plot(filepath): 28 plt.savefig(filepath) 29 plt.close() 30 31 32 def save_results(outfile, measures, titles): 33 with open(outfile, "w") as f: 34 f.write(",".join(titles) + "\n") 35 n = len(measures[0]) 36 for i in range(n): 37 f.write(",".join([str(ms[i]) for ms in measures]) + "\n") 38 39 25 # -- PARSER 40 # -- PARSER 26 parser = argparse.ArgumentParser(description="") 41 parser = argparse.ArgumentParser(description="") 27 parser.add_argument("expdir", type=str, help="Directory of experiment") 42 parser.add_argument("expdir", type=str, help="Directory of experiment") 28 parser.add_argument("--measurefile", type=str, default="measures.json", help="Measure file it searchs in folders") 43 parser.add_argument("--measurefile", type=str, default="measures.json", 29 parser.add_argument("--suffix", type=str, default="", help="suffix of saved files") 44 help="Measure file it searchs in folders") 45 parser.add_argument("--suffix", type=str, default="", 46 help="suffix of saved files") 30 47 31 args = parser.parse_args() 48 args = parser.parse_args() 32 EXP_DIR = args.expdir 49 EXP_DIR = args.expdir 33 MEASURE_FILE=args.measurefile 50 MEASURE_FILE = args.measurefile 34 SUFFIX = args.suffix 51 SUFFIX = args.suffix 35 52 36 #EXP_DIR="exp/kmeans_teacher_1/pvector-1" 53 # EXP_DIR="exp/kmeans_teacher_1/pvector-1" 37 RESULTS_DIR=os.path.join(EXP_DIR, "res") 54 RESULTS_DIR = os.path.join(EXP_DIR, "res") 38 55 39 # -- CONFIG 56 # -- CONFIG 40 kmin = 2 57 kmin = 2 41 kmax = 100 58 kmax = 100 42 59 43 60 44 # -- CREATE FOLDER 61 # -- CREATE FOLDER 45 if not os.path.exists(RESULTS_DIR): 62 if not os.path.exists(RESULTS_DIR): 46 os.makedirs(RESULTS_DIR) 63 os.makedirs(RESULTS_DIR) 47 64 48 # -- BEGIN REGROUPMENT 65 # -- BEGIN REGROUPMENT 49 66 50 subsets = ["train", "val"] 67 subsets = ["train", "val"] 51 68 52 disequilibriums = [] 69 disequilibriums = [] 53 70 71 54 def init_measures(): 72 def init_measures(): 55 measures = {} 73 measures = {} 56 74 57 for subset in subsets: 75 for subset in subsets: 58 measures[subset] = {} 76 measures[subset] = {} 59 measures[subset]["entropy"] = [] 77 measures[subset]["entropy"] = [] 60 measures[subset]["vscore"] = [] 78 measures[subset]["vscore"] = [] 61 measures[subset]["homogeneity"] = [] 79 measures[subset]["homogeneity"] = [] 62 measures[subset]["completeness"] = [] 80 measures[subset]["completeness"] = [] 63 return measures 81 return measures 64 82 83 65 measures = init_measures() 84 measures = init_measures() 66 85 67 for kfold in range(1, 5): 86 for kfold in range(1, 5): 68 print(kfold) 87 print("Regrouping on kfold: " + str(kfold)) 69 for k in range(kmin, kmax+1): 88 # -- REGROUP MEASURES INTO LISTS 70 measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE) 89 for k in range(kmin, kmax+1): 71 with open(measures_file, 'r') as f: 90 measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE) 72 meas_data = json.load(f) 91 with open(measures_file, 'r') as f: 73 disequilibriums.append(meas_data["disequilibrium"]) 92 meas_data = json.load(f) 74 for subset in subsets: 93 disequilibriums.append(meas_data["disequilibrium"]) 75 measures[subset]["entropy"].append(meas_data[subset]["entropy"]) 94 for subset in subsets: 76 measures[subset]["vscore"].append(meas_data[subset]["vscore"]) 95 measures[subset]["entropy"].append( 77 measures[subset]["homogeneity"].append(meas_data[subset]["homogeneity"]) 96 meas_data[subset]["entropy"]) 78 measures[subset]["completeness"].append(meas_data[subset]["completeness"]) 97 measures[subset]["vscore"].append( 79 for subset in subsets: 98 meas_data[subset]["vscore"]) 80 plot_values_clusters( 99 measures[subset]["homogeneity"].append( 81 os.path.join(RESULTS_DIR, "entropy_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), 100 meas_data[subset]["homogeneity"]) 82 measures[subset]["entropy"], 101 measures[subset]["completeness"].append( 83 "Entropy " + str(subset) + " set " + str(kfold), 102 meas_data[subset]["completeness"]) 84 "N clusters", 103 85 "Entropy") 104 # -- PLOT AND SAVE MEASURES FOR A SPECIFIC SUBSET 86 plot_values_clusters( 105 for subset in subsets: 87 os.path.join(RESULTS_DIR, "vscore_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), 106 # Plot all measures 88 measures[subset]["vscore"], 107 outf = "measures_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf" 89 "Vscore " + str(subset) + " set " + str(kfold), 108 90 "N clusters", 109 fig = plt.figure(1) 91 "Vscore") 110 for i, measure in enumerate(measures[subset]): 92 plot_values_clusters( 111 93 os.path.join(RESULTS_DIR, "homogeneity_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), 112 plt.subplot(220 + i + 1) 94 measures[subset]["homogeneity"], 113 95 "Homogeneity " + str(subset) + " set " + str(kfold), 114 plot_values_clusters( 96 "N clusters", 115 measures[subset][measure], 97 "Homogeneity") 116 measure.capitalize() + " " + str(subset) + " set " + str(kfold), 98 plot_values_clusters( 117 "N clusters", 99 os.path.join(RESULTS_DIR, "completeness_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"), 118 measure.capitalize()) 100 measures[subset]["completeness"], 119 plt.subplots_adjust(hspace=0.5, wspace=0.3) 101 "Completeness " + str(subset) + " set " + str(kfold), 120 save_plot(os.path.join(RESULTS_DIR, outf)) 102 "N clusters", 121 103 "Completeness") 122 # Save all measures on a csv file 104 plot_values_clusters( 123 save_results( 105 os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"), 124 os.path.join(RESULTS_DIR, "measures_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".csv"), 106 disequilibriums, 125 [ 107 "Disequilibrium set " + str(kfold), 126 measures[subset]["entropy"], 108 "N clusters", 127 measures[subset]["homogeneity"], 109 "Disequilibrium") 128 measures[subset]["completeness"], 110 129 measures[subset]["vscore"] 111 measures = init_measures() 130 ], 112 disequilibriums = [] 131 [ 132 "entropy", 133 "homogeneity", 134 "completeness", 135 "vscore" 136 ] 137 ) 138 139 # PLOT AND SAVE FOR DISEQUILIBRIUM 140 plot_values_clusters( 141 disequilibriums, 142 "Disequilibrium set " + str(kfold), 143 "N clusters", 144 "Disequilibrium") 145 save_plot(os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf")) 146 147 save_results( 148 os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".csv"), 149 [disequilibriums], 150 ["disequilibrium"]) 151 152 measures = init_measures()