Blame view

volia/stats.py 3.34 KB
9a2c6b4d0   Quillot Mathias   New file that hel...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
  
  import argparse
  
  import os
  import core.data
  import math
  import numpy as np
  import scipy.stats
  import pickle
  import matplotlib.pyplot as plt
  import matplotlib.colors as mcolors
  
  
  
  from cycler import cycler
  
  def stats():
      print("Decisions")
  
  
  print(list(mcolors.TABLEAU_COLORS))
  
  
  if __name__ == "__main__":
  
      # Parser
      parser = argparse.ArgumentParser(description="")
  
      # Arguments
      parser.add_argument("--predictions", type=str, help="prediction file", required=True)
      parser.add_argument("--labels", type=str, help="label file", required=True)
      parser.add_argument("--labelencoder", type=str, help="label encode pickle file", required=True)
      parser.add_argument("--outdir", type=str, help="output file", required=True)
      
      args = parser.parse_args()
  
      predictions = core.data.read_id_values(args.predictions, float)
      labels = core.data.read_labels(args.labels)
  
      le = None
      with open(args.labelencoder, "rb") as f:
          le = pickle.load(f)
      stats = {}
  
      print("PREDICTIONS ---------------------------")
      for id_, predictions_ in predictions.items():
          label = labels[id_][0]
          if label not in stats:
              stats[label] = {
                  "nb_utt": 1,
                  "predictions": np.expand_dims(predictions_, axis=0)
              }
          else:
              stats[label]["nb_utt"] = stats[label]["nb_utt"] + 1
              stats[label]["predictions"] = np.append(stats[label]["predictions"], np.expand_dims(predictions_, axis=0), axis=0)
      
  
      print("CALCULATING ---------------------------")
      
  
      colors = [
          "darkorange",
          "red",
          "blue"
      ]
      custom_cycler = (cycler(color=list(mcolors.TABLEAU_COLORS)) *
          cycler(linestyle=['-', '--', '-.']))
  
      
      for label, stats_ in stats.items():
  
          plt.gca().set_prop_cycle(custom_cycler)
          stats_mean = np.mean(stats_["predictions"], axis=0)
          stats_std = np.std(stats_["predictions"], axis=0)
          
          #print(label)
          #print(stats_mean)
          #print(stats_std)
          kwargs = dict(alpha=0.5)
          
          for i in range(stats_["predictions"].shape[1]):
              label_str = le.inverse_transform([i])[0]
              #plt.hist(stats_["predictions"][:, i], bins=10, label=label_str, **kwargs)
              mu = stats_mean[i]
              variance = stats_std[i] * stats_std[i]
              sigma = stats_std[i]
              # math.sqrt(variance)
              print(f"{i}: mu {mu}, var {variance}, sigma {sigma}")
  
              #x_values = np.arange(-1, 5, 0.1)
  
              #y_values = scipy.stats.norm(mu, variance)
              #y = scipy.stats.norm.pdf(x,mean,std)
  
              #plt.plot(x_values, y_values.pdf(x_values,))
              
              #x, step = np.linspace(mu - 3*sigma, mu + 3*sigma, 1000, retstep=True)
              x = np.linspace(0, 1, 1000)
              #x = np.linspace(mu - 3*sigma, mu + 3*sigma, 1000)
              #x, step = np.linspace(0, 1, 1000, retstep=True)
              
              P = scipy.stats.norm.cdf(x, mu, sigma)
              #print(step)
              plt.plot(x, P, label=label_str, **kwargs)
              #plt.savefig("simple_gaussian.pdf")
              
          plt.legend()
          plt.savefig(os.path.join(args.outdir, f"{label}_prediction_cdf.pdf"))
          plt.clf()
      
  
      # TODO: 
      # One graph for each label. Distribution of their predictions output are displayed.