Commit aeff19f9513b6f3f650ae1c36eff462c545fbf52
1 parent
4ed3ebc7d7
Exists in
master
purity measure added and tested
Showing 1 changed file with 42 additions and 3 deletions Side-by-side Diff
volia/measures.py
| ... | ... | @@ -90,7 +90,7 @@ |
| 90 | 90 | ) |
| 91 | 91 | |
| 92 | 92 | |
| 93 | -def compute_count_matrix(y_hat, y_truth): | |
| 93 | +def compute_count_matrix(y_truth, y_hat): | |
| 94 | 94 | ''' |
| 95 | 95 | Check the size of the lists with assertion |
| 96 | 96 | ''' |
| ... | ... | @@ -121,7 +121,7 @@ |
| 121 | 121 | return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0) |
| 122 | 122 | |
| 123 | 123 | # Build count matrix |
| 124 | - count_matrix = compute_count_matrix(y_hat, y_truth) | |
| 124 | + count_matrix = compute_count_matrix(y_truth, y_hat) | |
| 125 | 125 | |
| 126 | 126 | # Build dividers vector |
| 127 | 127 | dividers = count_matrix.sum(axis=1) |
| 128 | 128 | |
| 129 | 129 | |
| 130 | 130 | |
| ... | ... | @@ -151,15 +151,54 @@ |
| 151 | 151 | return (result_matrix, result_vector, result) |
| 152 | 152 | |
| 153 | 153 | |
| 154 | +def purity_score(y_truth, y_hat): | |
| 154 | 155 | |
| 156 | + def divide_line(a, divider): | |
| 157 | + ''' | |
| 158 | + Sub function used for dividing matrix by a vector line by line. | |
| 159 | + ''' | |
| 160 | + return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0) | |
| 161 | + | |
| 162 | + def compute_purity_score(count_matrix, axis=0): | |
| 163 | + count_per_row = count_matrix.sum(axis=axis) | |
| 164 | + dividers = np.square(count_per_row) | |
| 165 | + count_matrix_squared = np.square(count_matrix) | |
| 166 | + matrix_divided = np.apply_along_axis(divide_line, 0, np.asarray(count_matrix_squared, dtype=np.float), dividers) | |
| 167 | + vector_purity = np.sum(matrix_divided, axis=axis) | |
| 168 | + | |
| 169 | + scalar_purity = np.average(vector_purity, weights=count_per_row) | |
| 170 | + return (vector_purity, scalar_purity) | |
| 171 | + | |
| 172 | + | |
| 173 | + count_matrix = compute_count_matrix(y_truth, y_hat) | |
| 174 | + _, purity_cluster_score = compute_purity_score(count_matrix, 1) | |
| 175 | + _, purity_class_score = cluster_purity = compute_purity_score(count_matrix, 0) | |
| 176 | + | |
| 177 | + K = np.sqrt(purity_cluster_score * purity_class_score) | |
| 178 | + | |
| 179 | + for i in range(count_matrix.shape[0]): | |
| 180 | + | |
| 181 | + for j in range(count_matrix.shape[1]): | |
| 182 | + count_matrix[i][j] | |
| 183 | + count_matrix[i] | |
| 184 | + return { | |
| 185 | + "purity_class_score": purity_class_score, | |
| 186 | + "purity_cluster_score": purity_cluster_score, | |
| 187 | + "K": K | |
| 188 | + } | |
| 189 | + | |
| 190 | + | |
| 155 | 191 | if __name__ == "__main__": |
| 156 | 192 | # Hypothesis |
| 157 | 193 | y_hat = np.asarray([0, 1, 2, 0, 1, 0, 3, 2, 2, 3, 3, 0]) |
| 158 | 194 | # Truth |
| 159 | 195 | y = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]) |
| 160 | 196 | |
| 161 | - (result_matrix, result_vector, result) = entropy(y, y_hat) | |
| 197 | + (result_matrix, result_vector, result) = entropy_score(y, y_hat) | |
| 162 | 198 | |
| 199 | + | |
| 200 | + print(purity_score(y, y_hat)) | |
| 201 | + exit(1) | |
| 163 | 202 | print("Result matrix: ") |
| 164 | 203 | print(result_matrix) |
| 165 | 204 | print("Result vector: ") |