Commit aeff19f9513b6f3f650ae1c36eff462c545fbf52

Authored by Mathias
1 parent 4ed3ebc7d7
Exists in master

purity measure added and tested

Showing 1 changed file with 42 additions and 3 deletions Side-by-side Diff

... ... @@ -90,7 +90,7 @@
90 90 )
91 91  
92 92  
93   -def compute_count_matrix(y_hat, y_truth):
  93 +def compute_count_matrix(y_truth, y_hat):
94 94 '''
95 95 Check the size of the lists with assertion
96 96 '''
... ... @@ -121,7 +121,7 @@
121 121 return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0)
122 122  
123 123 # Build count matrix
124   - count_matrix = compute_count_matrix(y_hat, y_truth)
  124 + count_matrix = compute_count_matrix(y_truth, y_hat)
125 125  
126 126 # Build dividers vector
127 127 dividers = count_matrix.sum(axis=1)
128 128  
129 129  
130 130  
... ... @@ -151,15 +151,54 @@
151 151 return (result_matrix, result_vector, result)
152 152  
153 153  
  154 +def purity_score(y_truth, y_hat):
154 155  
  156 + def divide_line(a, divider):
  157 + '''
  158 + Sub function used for dividing matrix by a vector line by line.
  159 + '''
  160 + return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0)
  161 +
  162 + def compute_purity_score(count_matrix, axis=0):
  163 + count_per_row = count_matrix.sum(axis=axis)
  164 + dividers = np.square(count_per_row)
  165 + count_matrix_squared = np.square(count_matrix)
  166 + matrix_divided = np.apply_along_axis(divide_line, 0, np.asarray(count_matrix_squared, dtype=np.float), dividers)
  167 + vector_purity = np.sum(matrix_divided, axis=axis)
  168 +
  169 + scalar_purity = np.average(vector_purity, weights=count_per_row)
  170 + return (vector_purity, scalar_purity)
  171 +
  172 +
  173 + count_matrix = compute_count_matrix(y_truth, y_hat)
  174 + _, purity_cluster_score = compute_purity_score(count_matrix, 1)
  175 + _, purity_class_score = cluster_purity = compute_purity_score(count_matrix, 0)
  176 +
  177 + K = np.sqrt(purity_cluster_score * purity_class_score)
  178 +
  179 + for i in range(count_matrix.shape[0]):
  180 +
  181 + for j in range(count_matrix.shape[1]):
  182 + count_matrix[i][j]
  183 + count_matrix[i]
  184 + return {
  185 + "purity_class_score": purity_class_score,
  186 + "purity_cluster_score": purity_cluster_score,
  187 + "K": K
  188 + }
  189 +
  190 +
155 191 if __name__ == "__main__":
156 192 # Hypothesis
157 193 y_hat = np.asarray([0, 1, 2, 0, 1, 0, 3, 2, 2, 3, 3, 0])
158 194 # Truth
159 195 y = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3])
160 196  
161   - (result_matrix, result_vector, result) = entropy(y, y_hat)
  197 + (result_matrix, result_vector, result) = entropy_score(y, y_hat)
162 198  
  199 +
  200 + print(purity_score(y, y_hat))
  201 + exit(1)
163 202 print("Result matrix: ")
164 203 print(result_matrix)
165 204 print("Result vector: ")