Blame view

bin/regroup-measures.py 3.21 KB
ee5cc2a7e   Mathias Quillot   Regroup all measu...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
  '''
  Regroup results into one file and a plot.
  '''
  
  import numpy as np
  import matplotlib.pyplot as plt
  import argparse
  import os
  import json
  
  
  def plot_values_clusters(filepath, values, title, xlabel, ylabel):
  	values = np.asarray(values)
  	x = np.arange(len(values)) + 2
  	x_ticks = np.arange(len(values), step=5) + 2
  	y = values
  	plt.scatter(x, y) 
  	plt.xticks(x_ticks)
  	plt.title(title)
  	plt.xlabel(xlabel)
  	plt.ylabel(ylabel)
  	plt.savefig(filepath)
  	plt.close()
  
  # -- PARSER
  parser = argparse.ArgumentParser(description="")
  parser.add_argument("expdir", type=str, help="Directory of experiment")
  parser.add_argument("--measurefile", type=str, default="measures.json", help="Measure file it searchs in folders")
  parser.add_argument("--suffix", type=str, default="", help="suffix of saved files")
  
  args = parser.parse_args()
  EXP_DIR = args.expdir
  MEASURE_FILE=args.measurefile
  SUFFIX = args.suffix
  
  #EXP_DIR="exp/kmeans_teacher_1/pvector-1"
  RESULTS_DIR=os.path.join(EXP_DIR, "res")
  
  # -- CONFIG
  kmin = 2
  kmax = 100
  
  
  # -- CREATE FOLDER
  if not os.path.exists(RESULTS_DIR):
  	os.makedirs(RESULTS_DIR)
  
  # -- BEGIN REGROUPMENT
  
  subsets = ["train", "val"]
  
  disequilibriums = []
  
  def init_measures():
  	measures = {}
  
  	for subset in subsets:
  		measures[subset] = {}
  		measures[subset]["entropy"] = []
  		measures[subset]["vscore"] = []
  		measures[subset]["homogeneity"] = []
  		measures[subset]["completeness"] = []
  	return measures
  
  measures = init_measures()
  
  for kfold in range(1, 5):
  	print(kfold)
  	for k in range(kmin, kmax+1):
  		measures_file = os.path.join(EXP_DIR, str(kfold), str(k), MEASURE_FILE)
  		with open(measures_file, 'r') as f:
  			meas_data = json.load(f)
  		disequilibriums.append(meas_data["disequilibrium"])
  		for subset in subsets:
  			measures[subset]["entropy"].append(meas_data[subset]["entropy"])
  			measures[subset]["vscore"].append(meas_data[subset]["vscore"])
  			measures[subset]["homogeneity"].append(meas_data[subset]["homogeneity"])
  			measures[subset]["completeness"].append(meas_data[subset]["completeness"])
  	for subset in subsets:	
  		plot_values_clusters(
  			os.path.join(RESULTS_DIR, "entropy_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
  			measures[subset]["entropy"],
  			"Entropy " + str(subset) + " set " + str(kfold),
  			"N clusters",
  			"Entropy")
  		plot_values_clusters(
  			os.path.join(RESULTS_DIR, "vscore_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
  			measures[subset]["vscore"],
  			"Vscore " + str(subset) + " set " + str(kfold),
  			"N clusters",
  			"Vscore")
  		plot_values_clusters(
  			os.path.join(RESULTS_DIR, "homogeneity_" + str(subset) + "_" + str(kfold) + str(SUFFIX) + ".pdf"),
  			measures[subset]["homogeneity"],
  			"Homogeneity " + str(subset) + " set " + str(kfold),
  			"N clusters",
  			"Homogeneity")
  		plot_values_clusters(
  			os.path.join(RESULTS_DIR, "completeness_" + str(subset) + "_"  + str(kfold) + str(SUFFIX) + ".pdf"),
  			measures[subset]["completeness"],
  			"Completeness " + str(subset) + " set " + str(kfold),
  			"N clusters",
  			"Completeness")
  	plot_values_clusters(
  		os.path.join(RESULTS_DIR, "disequilibrium_" + str(kfold) + str(SUFFIX) + ".pdf"),
  		disequilibriums,
  		"Disequilibrium set " + str(kfold),
  		"N clusters",
  		"Disequilibrium")
  	
  	measures = init_measures()
  	disequilibriums = []