From aeff19f9513b6f3f650ae1c36eff462c545fbf52 Mon Sep 17 00:00:00 2001 From: Mathias Date: Wed, 23 Sep 2020 17:44:31 +0200 Subject: [PATCH] purity measure added and tested --- volia/measures.py | 45 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/volia/measures.py b/volia/measures.py index 3aaebfa..6d5d721 100644 --- a/volia/measures.py +++ b/volia/measures.py @@ -90,7 +90,7 @@ def disequilibrium(matrix1, matrix2, isGlobal=False): ) -def compute_count_matrix(y_hat, y_truth): +def compute_count_matrix(y_truth, y_hat): ''' Check the size of the lists with assertion ''' @@ -121,7 +121,7 @@ def entropy_score(y_truth, y_hat): return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0) # Build count matrix - count_matrix = compute_count_matrix(y_hat, y_truth) + count_matrix = compute_count_matrix(y_truth, y_hat) # Build dividers vector dividers = count_matrix.sum(axis=1) @@ -151,6 +151,42 @@ def entropy_score(y_truth, y_hat): return (result_matrix, result_vector, result) +def purity_score(y_truth, y_hat): + + def divide_line(a, divider): + ''' + Sub function used for dividing matrix by a vector line by line. + ''' + return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0) + + def compute_purity_score(count_matrix, axis=0): + count_per_row = count_matrix.sum(axis=axis) + dividers = np.square(count_per_row) + count_matrix_squared = np.square(count_matrix) + matrix_divided = np.apply_along_axis(divide_line, 0, np.asarray(count_matrix_squared, dtype=np.float), dividers) + vector_purity = np.sum(matrix_divided, axis=axis) + + scalar_purity = np.average(vector_purity, weights=count_per_row) + return (vector_purity, scalar_purity) + + + count_matrix = compute_count_matrix(y_truth, y_hat) + _, purity_cluster_score = compute_purity_score(count_matrix, 1) + _, purity_class_score = cluster_purity = compute_purity_score(count_matrix, 0) + + K = np.sqrt(purity_cluster_score * purity_class_score) + + for i in range(count_matrix.shape[0]): + + for j in range(count_matrix.shape[1]): + count_matrix[i][j] + count_matrix[i] + return { + "purity_class_score": purity_class_score, + "purity_cluster_score": purity_cluster_score, + "K": K + } + if __name__ == "__main__": # Hypothesis @@ -158,8 +194,11 @@ if __name__ == "__main__": # Truth y = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]) - (result_matrix, result_vector, result) = entropy(y, y_hat) + (result_matrix, result_vector, result) = entropy_score(y, y_hat) + + print(purity_score(y, y_hat)) + exit(1) print("Result matrix: ") print(result_matrix) print("Result vector: ") -- 1.8.2.3