From 9a2c6b4d026288745fe95708e3ff55f00a2351fd Mon Sep 17 00:00:00 2001
From: Quillot Mathias <mathias.quillot@alumni.univ-avignon.fr>
Date: Tue, 11 May 2021 13:56:05 +0200
Subject: [PATCH] New file that help generating some stats and distribution
 stats (via plots)

---
 volia/stats.py | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 volia/stats.py

diff --git a/volia/stats.py b/volia/stats.py
new file mode 100644
index 0000000..c12c5cd
--- /dev/null
+++ b/volia/stats.py
@@ -0,0 +1,114 @@
+
+import argparse
+
+import os
+import core.data
+import math
+import numpy as np
+import scipy.stats
+import pickle
+import matplotlib.pyplot as plt
+import matplotlib.colors as mcolors
+
+
+
+from cycler import cycler
+
+def stats():
+    print("Decisions")
+
+
+print(list(mcolors.TABLEAU_COLORS))
+
+
+if __name__ == "__main__":
+
+    # Parser
+    parser = argparse.ArgumentParser(description="")
+
+    # Arguments
+    parser.add_argument("--predictions", type=str, help="prediction file", required=True)
+    parser.add_argument("--labels", type=str, help="label file", required=True)
+    parser.add_argument("--labelencoder", type=str, help="label encode pickle file", required=True)
+    parser.add_argument("--outdir", type=str, help="output file", required=True)
+    
+    args = parser.parse_args()
+
+    predictions = core.data.read_id_values(args.predictions, float)
+    labels = core.data.read_labels(args.labels)
+
+    le = None
+    with open(args.labelencoder, "rb") as f:
+        le = pickle.load(f)
+    stats = {}
+
+    print("PREDICTIONS ---------------------------")
+    for id_, predictions_ in predictions.items():
+        label = labels[id_][0]
+        if label not in stats:
+            stats[label] = {
+                "nb_utt": 1,
+                "predictions": np.expand_dims(predictions_, axis=0)
+            }
+        else:
+            stats[label]["nb_utt"] = stats[label]["nb_utt"] + 1
+            stats[label]["predictions"] = np.append(stats[label]["predictions"], np.expand_dims(predictions_, axis=0), axis=0)
+    
+
+    print("CALCULATING ---------------------------")
+    
+
+    colors = [
+        "darkorange",
+        "red",
+        "blue"
+    ]
+    custom_cycler = (cycler(color=list(mcolors.TABLEAU_COLORS)) *
+        cycler(linestyle=['-', '--', '-.']))
+
+    
+    for label, stats_ in stats.items():
+
+        plt.gca().set_prop_cycle(custom_cycler)
+        stats_mean = np.mean(stats_["predictions"], axis=0)
+        stats_std = np.std(stats_["predictions"], axis=0)
+        
+        #print(label)
+        #print(stats_mean)
+        #print(stats_std)
+        kwargs = dict(alpha=0.5)
+        
+        for i in range(stats_["predictions"].shape[1]):
+            label_str = le.inverse_transform([i])[0]
+            #plt.hist(stats_["predictions"][:, i], bins=10, label=label_str, **kwargs)
+            mu = stats_mean[i]
+            variance = stats_std[i] * stats_std[i]
+            sigma = stats_std[i]
+            # math.sqrt(variance)
+            print(f"{i}: mu {mu}, var {variance}, sigma {sigma}")
+
+            #x_values = np.arange(-1, 5, 0.1)
+
+            #y_values = scipy.stats.norm(mu, variance)
+            #y = scipy.stats.norm.pdf(x,mean,std)
+
+            #plt.plot(x_values, y_values.pdf(x_values,))
+            
+            #x, step = np.linspace(mu - 3*sigma, mu + 3*sigma, 1000, retstep=True)
+            x = np.linspace(0, 1, 1000)
+            #x = np.linspace(mu - 3*sigma, mu + 3*sigma, 1000)
+            #x, step = np.linspace(0, 1, 1000, retstep=True)
+            
+            P = scipy.stats.norm.cdf(x, mu, sigma)
+            #print(step)
+            plt.plot(x, P, label=label_str, **kwargs)
+            #plt.savefig("simple_gaussian.pdf")
+            
+        plt.legend()
+        plt.savefig(os.path.join(args.outdir, f"{label}_prediction_cdf.pdf"))
+        plt.clf()
+    
+
+    # TODO: 
+    # One graph for each label. Distribution of their predictions output are displayed.
+        
-- 
1.8.2.3