Commit aeff19f9513b6f3f650ae1c36eff462c545fbf52
1 parent
4ed3ebc7d7
Exists in
master
purity measure added and tested
Showing 1 changed file with 42 additions and 3 deletions Side-by-side Diff
volia/measures.py
... | ... | @@ -90,7 +90,7 @@ |
90 | 90 | ) |
91 | 91 | |
92 | 92 | |
93 | -def compute_count_matrix(y_hat, y_truth): | |
93 | +def compute_count_matrix(y_truth, y_hat): | |
94 | 94 | ''' |
95 | 95 | Check the size of the lists with assertion |
96 | 96 | ''' |
... | ... | @@ -121,7 +121,7 @@ |
121 | 121 | return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0) |
122 | 122 | |
123 | 123 | # Build count matrix |
124 | - count_matrix = compute_count_matrix(y_hat, y_truth) | |
124 | + count_matrix = compute_count_matrix(y_truth, y_hat) | |
125 | 125 | |
126 | 126 | # Build dividers vector |
127 | 127 | dividers = count_matrix.sum(axis=1) |
128 | 128 | |
129 | 129 | |
130 | 130 | |
... | ... | @@ -151,15 +151,54 @@ |
151 | 151 | return (result_matrix, result_vector, result) |
152 | 152 | |
153 | 153 | |
154 | +def purity_score(y_truth, y_hat): | |
154 | 155 | |
156 | + def divide_line(a, divider): | |
157 | + ''' | |
158 | + Sub function used for dividing matrix by a vector line by line. | |
159 | + ''' | |
160 | + return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0) | |
161 | + | |
162 | + def compute_purity_score(count_matrix, axis=0): | |
163 | + count_per_row = count_matrix.sum(axis=axis) | |
164 | + dividers = np.square(count_per_row) | |
165 | + count_matrix_squared = np.square(count_matrix) | |
166 | + matrix_divided = np.apply_along_axis(divide_line, 0, np.asarray(count_matrix_squared, dtype=np.float), dividers) | |
167 | + vector_purity = np.sum(matrix_divided, axis=axis) | |
168 | + | |
169 | + scalar_purity = np.average(vector_purity, weights=count_per_row) | |
170 | + return (vector_purity, scalar_purity) | |
171 | + | |
172 | + | |
173 | + count_matrix = compute_count_matrix(y_truth, y_hat) | |
174 | + _, purity_cluster_score = compute_purity_score(count_matrix, 1) | |
175 | + _, purity_class_score = cluster_purity = compute_purity_score(count_matrix, 0) | |
176 | + | |
177 | + K = np.sqrt(purity_cluster_score * purity_class_score) | |
178 | + | |
179 | + for i in range(count_matrix.shape[0]): | |
180 | + | |
181 | + for j in range(count_matrix.shape[1]): | |
182 | + count_matrix[i][j] | |
183 | + count_matrix[i] | |
184 | + return { | |
185 | + "purity_class_score": purity_class_score, | |
186 | + "purity_cluster_score": purity_cluster_score, | |
187 | + "K": K | |
188 | + } | |
189 | + | |
190 | + | |
155 | 191 | if __name__ == "__main__": |
156 | 192 | # Hypothesis |
157 | 193 | y_hat = np.asarray([0, 1, 2, 0, 1, 0, 3, 2, 2, 3, 3, 0]) |
158 | 194 | # Truth |
159 | 195 | y = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]) |
160 | 196 | |
161 | - (result_matrix, result_vector, result) = entropy(y, y_hat) | |
197 | + (result_matrix, result_vector, result) = entropy_score(y, y_hat) | |
162 | 198 | |
199 | + | |
200 | + print(purity_score(y, y_hat)) | |
201 | + exit(1) | |
163 | 202 | print("Result matrix: ") |
164 | 203 | print(result_matrix) |
165 | 204 | print("Result vector: ") |