Commit 933b2505a171366ecf2ae150b3a8ba49413d3695
1 parent
e36dbbc98b
Exists in
master
Add entropy
Showing 1 changed file with 12 additions and 4 deletions Side-by-side Diff
bin/measure_clustering.py
... | ... | @@ -47,6 +47,7 @@ |
47 | 47 | train_classes = np.asarray([class_lst_ind[x[0][0]][x[0][3]][0][1] for x in train_lst]) |
48 | 48 | train_clusters = np.asarray([clustering_ind[x[0][0]][x[0][3]][0][1] for x in train_lst], dtype=np.int) |
49 | 49 | |
50 | + | |
50 | 51 | val_classes = np.asarray([class_lst_ind[x[0][0]][x[0][3]][0][1] for x in val_lst]) |
51 | 52 | val_clusters = np.asarray([clustering_ind[x[0][0]][x[0][3]][0][1] for x in val_lst], dtype=np.int) |
52 | 53 | |
... | ... | @@ -78,7 +79,7 @@ |
78 | 79 | classe1_unique = np.unique(classes1) |
79 | 80 | classe2_unique = np.unique(classes2) |
80 | 81 | all_classes = np.unique(np.concatenate((classe1_unique, classe2_unique))) |
81 | - | |
82 | + | |
82 | 83 | # Label Encoder for classes |
83 | 84 | le = preprocessing.LabelEncoder() |
84 | 85 | le.fit(all_classes) |
85 | 86 | |
86 | 87 | |
... | ... | @@ -86,12 +87,15 @@ |
86 | 87 | # Index |
87 | 88 | cluster1_unique = np.unique(clusters1) |
88 | 89 | cluster2_unique = np.unique(clusters2) |
89 | - | |
90 | 90 | all_clusters = np.unique(np.concatenate((cluster1_unique, cluster2_unique))) |
91 | 91 | |
92 | + # Warning | |
93 | + if np.max(all_clusters) != len(cluster1_unique)-1: | |
94 | + print("WARNING: Some clusters are empty. Value max : " + str(np.max(all_clusters)) + " Nb values : " + str(len(cluster1_unique))) | |
95 | + | |
92 | 96 | # Create matrix lin(clust) col(class) |
93 | - counts_matrix1 = np.zeros((len(all_clusters), len(all_classes))) | |
94 | - counts_matrix2 = np.zeros((len(all_clusters), len(all_classes))) | |
97 | + counts_matrix1 = np.zeros((np.max(all_clusters) + 1, len(all_classes))) | |
98 | + counts_matrix2 = np.zeros((np.max(all_clusters) + 1, len(all_classes))) | |
95 | 99 | |
96 | 100 | for cluster in all_clusters: |
97 | 101 | |
98 | 102 | |
99 | 103 | |
100 | 104 | |
... | ... | @@ -123,18 +127,22 @@ |
123 | 127 | val_completeness = metrics.completeness_score(val_classes, val_clusters) |
124 | 128 | |
125 | 129 | counts_matrix1, counts_matrix2 = generate_count_matrix(train_classes, train_clusters, val_classes, val_clusters) |
130 | + | |
126 | 131 | mask, dis_human, dis_measures = disequilibrium(counts_matrix1, counts_matrix2, isGlobal=False) |
127 | 132 | |
133 | + | |
128 | 134 | (train_entropy_matrix, train_entropy) = entropy(counts_matrix1) |
129 | 135 | (val_entropy_matrix, val_entropy) = entropy(counts_matrix2) |
130 | 136 | |
131 | 137 | results = {} |
132 | 138 | results["train"] = {} |
139 | +results["train"]["entropy"] = train_entropy | |
133 | 140 | results["train"]["vscore"] = train_vscore |
134 | 141 | results["train"]["homogeneity"] = train_homogeneity |
135 | 142 | results["train"]["completeness"] = val_completeness |
136 | 143 | |
137 | 144 | results["val"] = {} |
145 | +results["val"]["entropy"] = val_entropy | |
138 | 146 | results["val"]["vscore"] = val_vscore |
139 | 147 | results["val"]["homogeneity"] = val_homogeneity |
140 | 148 | results["val"]["completeness"] = val_completeness |