Commit 6da8f6ca75a6661a2001abf31d7d891a5cb869e5

Authored by Mathias Quillot
1 parent 8004d48e51
Exists in master

Repair error from bad logical computation. Needed a not, it was added

Showing 1 changed file with 3 additions and 1 deletions Inline Diff

1 ''' 1 '''
2 This module is a part of my library. 2 This module is a part of my library.
3 It aims to compute some measures for clustering. 3 It aims to compute some measures for clustering.
4 ''' 4 '''
5 5
6 import numpy as np 6 import numpy as np
7 7
8 def disequilibrium_(matrix1, matrix2, isGlobal=False, mod=None): 8 def disequilibrium_(matrix1, matrix2, isGlobal=False, mod=None):
9 ''' 9 '''
10 Compute disequilibrium for all the clusters. 10 Compute disequilibrium for all the clusters.
11 The disequilibrium is compute from the difference 11 The disequilibrium is compute from the difference
12 between two clustering sets. 12 between two clustering sets.
13 isGlobal permet à l'utilisateur de choisir le dénominateur de 13 isGlobal permet à l'utilisateur de choisir le dénominateur de
14 la fonction : 14 la fonction :
15 - True : divise la valeur par le nombre d'élément du cluster 15 - True : divise la valeur par le nombre d'élément du cluster
16 - False : divise la valeur par le nombre d'élément total 16 - False : divise la valeur par le nombre d'élément total
17 17
18 withPower permet à l'utilisateur de décider d'appliquer un carré 2 ou 18 withPower permet à l'utilisateur de décider d'appliquer un carré 2 ou
19 une valeur absolue. 19 une valeur absolue.
20 ''' 20 '''
21 21
22 def divide_line(a, divider): 22 def divide_line(a, divider):
23 ''' 23 '''
24 Sub function used for dividing matrix by a vector line by line. 24 Sub function used for dividing matrix by a vector line by line.
25 ''' 25 '''
26 return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0) 26 return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0)
27 27
28 dividers1 = 0 28 dividers1 = 0
29 dividers2 = 0 29 dividers2 = 0
30 30
31 if isGlobal: 31 if isGlobal:
32 dividers1 = matrix1.sum() 32 dividers1 = matrix1.sum()
33 dividers2 = matrix2.sum() 33 dividers2 = matrix2.sum()
34 else: 34 else:
35 dividers1 = matrix1.sum(axis=1) 35 dividers1 = matrix1.sum(axis=1)
36 dividers2 = matrix2.sum(axis=1) 36 dividers2 = matrix2.sum(axis=1)
37 37
38 matrix1_divided = np.apply_along_axis(divide_line, 0, np.asarray(matrix1, dtype=np.float), dividers1) 38 matrix1_divided = np.apply_along_axis(divide_line, 0, np.asarray(matrix1, dtype=np.float), dividers1)
39 39
40 matrix2_divided = np.apply_along_axis(divide_line, 0, np.asarray(matrix2, dtype=np.float), dividers2) 40 matrix2_divided = np.apply_along_axis(divide_line, 0, np.asarray(matrix2, dtype=np.float), dividers2)
41 41
42 diff = matrix1_divided - matrix2_divided 42 diff = matrix1_divided - matrix2_divided
43 43
44 mask = (matrix2==0) & (matrix1==0) 44 mask = np.logical_not(np.logical_and(matrix2==0, matrix1==0))
45
45 result = diff 46 result = diff
46 47
47 if mod != None or mod == "": 48 if mod != None or mod == "":
48 for word in mod.split(" "): 49 for word in mod.split(" "):
49 if word == "power": 50 if word == "power":
50 result = np.power(result,2) 51 result = np.power(result,2)
51 elif word == "human": 52 elif word == "human":
52 result = result * 100 53 result = result * 100
53 elif word == "abs": 54 elif word == "abs":
54 result = np.absolute(result) 55 result = np.absolute(result)
55 else: 56 else:
56 raise Exception("Need to specify an accepted mod of the disequilibrium (\"power\", \"human\" or \"abs\"") 57 raise Exception("Need to specify an accepted mod of the disequilibrium (\"power\", \"human\" or \"abs\"")
57 return (mask, result) 58 return (mask, result)
58 59
59 60
60 61
61 def disequilibrium_mean_by_cluster(mask, matrix): 62 def disequilibrium_mean_by_cluster(mask, matrix):
62 ''' 63 '''
63 Mean of disequilibrium 64 Mean of disequilibrium
64 matrix is the disequilibrium calculated 65 matrix is the disequilibrium calculated
65 from number of occurences belonging to a class, 66 from number of occurences belonging to a class,
66 for each cluster. 67 for each cluster.
67 ''' 68 '''
68 nb_k = len(matrix) 69 nb_k = len(matrix)
69 results = np.zeros((nb_k)) 70 results = np.zeros((nb_k))
71
70 for i in range(nb_k): 72 for i in range(nb_k):
71 results[i] = matrix[i].sum() / mask[i].sum() 73 results[i] = matrix[i].sum() / mask[i].sum()
72 return results 74 return results
73 75
74 76
75 def disequilibrium(matrix1, matrix2, isGlobal=False): 77 def disequilibrium(matrix1, matrix2, isGlobal=False):
76 ''' 78 '''
77 Disequilibrium matrix 79 Disequilibrium matrix
78 And Disequilibrium value 80 And Disequilibrium value
79 ''' 81 '''
80 mask, result = disequilibrium_(matrix1, matrix2, isGlobal) 82 mask, result = disequilibrium_(matrix1, matrix2, isGlobal)
81 result_human = result * 100 83 result_human = result * 100
82 result_power = np.power(result, 2) 84 result_power = np.power(result, 2)
83 85
84 return ( 86 return (
85 mask, 87 mask,
86 result_human, 88 result_human,
87 disequilibrium_mean_by_cluster(mask, result_power).sum()/matrix1.shape[0] 89 disequilibrium_mean_by_cluster(mask, result_power).sum()/matrix1.shape[0]
88 ) 90 )
89 91
90 92
91 def entropy(count_matrix): 93 def entropy(count_matrix):
92 def divide_line(a, divider): 94 def divide_line(a, divider):
93 ''' 95 '''
94 Sub function used for dividing matrix by a vector line by line. 96 Sub function used for dividing matrix by a vector line by line.
95 ''' 97 '''
96 return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0) 98 return np.divide(a, divider, out=np.zeros_like(a), where=divider!=0)
97 99
98 dividers = count_matrix.sum(axis=1) 100 dividers = count_matrix.sum(axis=1)
99 101
100 matrix_divided = np.apply_along_axis(divide_line, 0, np.asarray(count_matrix, dtype=np.float), dividers) 102 matrix_divided = np.apply_along_axis(divide_line, 0, np.asarray(count_matrix, dtype=np.float), dividers)
101 103
102 result_matrix = -1 * matrix_divided * np.log2(matrix_divided, where=count_matrix != 0) 104 result_matrix = -1 * matrix_divided * np.log2(matrix_divided, where=count_matrix != 0)
103 result = result_matrix.sum(axis=1) * dividers / dividers.sum() 105 result = result_matrix.sum(axis=1) * dividers / dividers.sum()
104 result = result.sum() 106 result = result.sum()
105 return (result_matrix, result) 107 return (result_matrix, result)
106 108