measures.py 3.21 KB
``````'''
This module is a part of my library.
It aims to compute some measures for clustering.
'''

import numpy as np

def disequilibrium_(matrix1, matrix2, isGlobal=False, mod=None):
'''
Compute disequilibrium for all the clusters.
The disequilibrium is compute from the difference
between two clustering sets.
isGlobal permet à l'utilisateur de choisir le dénominateur de
la fonction :
- True : divise la valeur par le nombre d'élément du cluster
- False : divise la valeur par le nombre d'élément total

withPower permet à l'utilisateur de décider d'appliquer un carré 2 ou
une valeur absolue.
'''

def divide_line(a, divider):
'''
Sub function used for dividing matrix by a vector line by line.
'''
return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0)

dividers1 = 0
dividers2 = 0

if isGlobal:
dividers1 = matrix1.sum()
dividers2 = matrix2.sum()
else:
dividers1 = matrix1.sum(axis=1)
dividers2 = matrix2.sum(axis=1)

matrix1_divided = np.apply_along_axis(divide_line, 0, np.asarray(matrix1, dtype=np.float), dividers1)

matrix2_divided = np.apply_along_axis(divide_line, 0, np.asarray(matrix2, dtype=np.float), dividers2)

diff = matrix1_divided - matrix2_divided

mask = np.logical_not(np.logical_and(matrix2==0, matrix1==0))

result = diff

if mod != None or mod == "":
for word in mod.split(" "):
if word == "power":
result = np.power(result,2)
elif word == "human":
result = result * 100
elif word == "abs":
result = np.absolute(result)
else:
raise Exception("Need to specify an accepted mod of the disequilibrium (\"power\", \"human\" or \"abs\"")

'''
Mean of disequilibrium
matrix is the disequilibrium calculated
from number of occurences belonging to a class,
for each cluster.
'''
nb_k = len(matrix)
results = np.zeros((nb_k))

for i in range(nb_k):
results[i] = matrix[i].sum() / mask[i].sum()
return results

def disequilibrium(matrix1, matrix2, isGlobal=False):
'''
Disequilibrium matrix
And Disequilibrium value
'''
mask, result = disequilibrium_(matrix1, matrix2, isGlobal)
result_human = result * 100
result_power = np.power(result, 2)

return (
result_human,
)

def entropy(count_matrix):
def divide_line(a, divider):
'''
Sub function used for dividing matrix by a vector line by line.
'''
return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0)

dividers = count_matrix.sum(axis=1)

matrix_divided = np.apply_along_axis(divide_line, 0, np.asarray(count_matrix, dtype=np.float), dividers)

result_matrix = -1 * matrix_divided * np.log2(matrix_divided, where=count_matrix != 0)
result = result_matrix.sum(axis=1) * dividers / dividers.sum()
result = result.sum()
return (result_matrix, result)``````