Commit 933b2505a171366ecf2ae150b3a8ba49413d3695

Authored by Mathias Quillot
1 parent e36dbbc98b
Exists in master

Add entropy

Showing 1 changed file with 12 additions and 4 deletions Side-by-side Diff

bin/measure_clustering.py
... ... @@ -47,6 +47,7 @@
47 47 train_classes = np.asarray([class_lst_ind[x[0][0]][x[0][3]][0][1] for x in train_lst])
48 48 train_clusters = np.asarray([clustering_ind[x[0][0]][x[0][3]][0][1] for x in train_lst], dtype=np.int)
49 49  
  50 +
50 51 val_classes = np.asarray([class_lst_ind[x[0][0]][x[0][3]][0][1] for x in val_lst])
51 52 val_clusters = np.asarray([clustering_ind[x[0][0]][x[0][3]][0][1] for x in val_lst], dtype=np.int)
52 53  
... ... @@ -78,7 +79,7 @@
78 79 classe1_unique = np.unique(classes1)
79 80 classe2_unique = np.unique(classes2)
80 81 all_classes = np.unique(np.concatenate((classe1_unique, classe2_unique)))
81   -
  82 +
82 83 # Label Encoder for classes
83 84 le = preprocessing.LabelEncoder()
84 85 le.fit(all_classes)
85 86  
86 87  
... ... @@ -86,12 +87,15 @@
86 87 # Index
87 88 cluster1_unique = np.unique(clusters1)
88 89 cluster2_unique = np.unique(clusters2)
89   -
90 90 all_clusters = np.unique(np.concatenate((cluster1_unique, cluster2_unique)))
91 91  
  92 + # Warning
  93 + if np.max(all_clusters) != len(cluster1_unique)-1:
  94 + print("WARNING: Some clusters are empty. Value max : " + str(np.max(all_clusters)) + " Nb values : " + str(len(cluster1_unique)))
  95 +
92 96 # Create matrix lin(clust) col(class)
93   - counts_matrix1 = np.zeros((len(all_clusters), len(all_classes)))
94   - counts_matrix2 = np.zeros((len(all_clusters), len(all_classes)))
  97 + counts_matrix1 = np.zeros((np.max(all_clusters) + 1, len(all_classes)))
  98 + counts_matrix2 = np.zeros((np.max(all_clusters) + 1, len(all_classes)))
95 99  
96 100 for cluster in all_clusters:
97 101  
98 102  
99 103  
100 104  
... ... @@ -123,18 +127,22 @@
123 127 val_completeness = metrics.completeness_score(val_classes, val_clusters)
124 128  
125 129 counts_matrix1, counts_matrix2 = generate_count_matrix(train_classes, train_clusters, val_classes, val_clusters)
  130 +
126 131 mask, dis_human, dis_measures = disequilibrium(counts_matrix1, counts_matrix2, isGlobal=False)
127 132  
  133 +
128 134 (train_entropy_matrix, train_entropy) = entropy(counts_matrix1)
129 135 (val_entropy_matrix, val_entropy) = entropy(counts_matrix2)
130 136  
131 137 results = {}
132 138 results["train"] = {}
  139 +results["train"]["entropy"] = train_entropy
133 140 results["train"]["vscore"] = train_vscore
134 141 results["train"]["homogeneity"] = train_homogeneity
135 142 results["train"]["completeness"] = val_completeness
136 143  
137 144 results["val"] = {}
  145 +results["val"]["entropy"] = val_entropy
138 146 results["val"]["vscore"] = val_vscore
139 147 results["val"]["homogeneity"] = val_homogeneity
140 148 results["val"]["completeness"] = val_completeness