Blame view

bin/cluster_kmeans_ownmade.py 2.1 KB
cce036f22   Mathias Quillot   Implementation of...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
  '''
  Un petit test pour faire du clustering
  avec une distance de mahalanobis
  '''
  
  import matplotlib.pyplot as plt
  import numpy as np
  from sklearn.manifold import TSNE
  
  N = 18  # Number of individus
  d = 2  # Number of dimensions
  K = 3  # number of clusters
  
  X = np.random.rand(N, d)  # Features
  
  C = np.random.random_sample((K, d))  # Model 0
  
  
  def dist(a, b):
      '''
      Distance euclidienne
      '''
      return np.sum(np.power(np.abs(a - b), 2))
  
  
  def plot_iteration(iteration, points, clusters, centers):
      fig = plt.figure()
      ax = fig.add_subplot(111)
      scatter = ax.scatter(points[:, 0], points[:, 1], c=clusters, s=50)
      for i, j in centers:
          ax.scatter(i, j, s=50, c='red', marker='+')
      ax.set_xlabel('x')
      ax.set_ylabel('y')
      plt.colorbar(scatter)
      plt.ylim(0, 1)
      plt.xlim(0, 1)
      plt.savefig("test_" + str(iteration) + ".pdf")
  
  
  end_algo = False
  i = 0
  while not end_algo:
      if i == 2000:
          exit(1)
      print("Iteration: ", i)
      # Calcul matrix distance
      distances = np.zeros((N, K))
  
      for n in range(N):
          for k in range(K):
              distances[n][k] = dist(X[n], C[k])
      closest_cluster = np.argmin(distances, axis=1)
  
      if i % 1 == 0:
          # -- Debug tool ----------------------
          # TSNE
          X_embedded = np.concatenate((X, C), axis=0)
          # X_embedded = TSNE(n_components=2).fit_transform(np.concatenate((X, C), axis=0))
          # Then plot
          plot_iteration(
              i,
              X_embedded[:X.shape[0]],
              closest_cluster,
              X_embedded[X.shape[0]:]
              )
          # ------------------------------------
  
      end_algo = True
      for k in range(K):
          # Find subset of X with values closed to the centroid c_k.
          X_sub = np.where(closest_cluster == k)
          X_sub = np.take(X, X_sub[0], axis=0)
          np.mean(X_sub, axis=0)
          C_new = np.mean(X_sub, axis=0)
          if end_algo and (not (C[k] == C_new).all()):  # If the same stop
              end_algo = False
          C[k] = C_new
      i = i + 1
  
  plot_iteration(
      i,
      X_embedded[:X.shape[0]],
      closest_cluster,
      X_embedded[X.shape[0]:]
  )