1 parent 4ed3ebc7d7
Exists in

### purity measure added and tested

Showing 1 changed file with 42 additions and 3 deletions

volia/measures.py

 ... ... @@ -90,7 +90,7 @@ 90 90 ) 91 91 92 92 93 -def compute_count_matrix(y_hat, y_truth): 93 +def compute_count_matrix(y_truth, y_hat): 94 94 ''' 95 95 Check the size of the lists with assertion 96 96 ''' ... ... @@ -121,7 +121,7 @@ 121 121 return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0) 122 122 123 123 # Build count matrix 124 - count_matrix = compute_count_matrix(y_hat, y_truth) 124 + count_matrix = compute_count_matrix(y_truth, y_hat) 125 125 126 126 # Build dividers vector 127 127 dividers = count_matrix.sum(axis=1) 128 128 129 129 130 130 ... ... @@ -151,15 +151,54 @@ 151 151 return (result_matrix, result_vector, result) 152 152 153 153 154 +def purity_score(y_truth, y_hat): 154 155 156 + def divide_line(a, divider): 157 + ''' 158 + Sub function used for dividing matrix by a vector line by line. 159 + ''' 160 + return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0) 161 + 162 + def compute_purity_score(count_matrix, axis=0): 163 + count_per_row = count_matrix.sum(axis=axis) 164 + dividers = np.square(count_per_row) 165 + count_matrix_squared = np.square(count_matrix) 166 + matrix_divided = np.apply_along_axis(divide_line, 0, np.asarray(count_matrix_squared, dtype=np.float), dividers) 167 + vector_purity = np.sum(matrix_divided, axis=axis) 168 + 169 + scalar_purity = np.average(vector_purity, weights=count_per_row) 170 + return (vector_purity, scalar_purity) 171 + 172 + 173 + count_matrix = compute_count_matrix(y_truth, y_hat) 174 + _, purity_cluster_score = compute_purity_score(count_matrix, 1) 175 + _, purity_class_score = cluster_purity = compute_purity_score(count_matrix, 0) 176 + 177 + K = np.sqrt(purity_cluster_score * purity_class_score) 178 + 179 + for i in range(count_matrix.shape): 180 + 181 + for j in range(count_matrix.shape): 182 + count_matrix[i][j] 183 + count_matrix[i] 184 + return { 185 + "purity_class_score": purity_class_score, 186 + "purity_cluster_score": purity_cluster_score, 187 + "K": K 188 + } 189 + 190 + 155 191 if __name__ == "__main__": 156 192 # Hypothesis 157 193 y_hat = np.asarray([0, 1, 2, 0, 1, 0, 3, 2, 2, 3, 3, 0]) 158 194 # Truth 159 195 y = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]) 160 196 161 - (result_matrix, result_vector, result) = entropy(y, y_hat) 197 + (result_matrix, result_vector, result) = entropy_score(y, y_hat) 162 198 199 + 200 + print(purity_score(y, y_hat)) 201 + exit(1) 163 202 print("Result matrix: ") 164 203 print(result_matrix) 165 204 print("Result vector: ")