Commit d27fe6fcc52cc5c3a4ea61289d4d4b0c38c53b83

Authored by Quillot Mathias
1 parent acbafc4147
Exists in master

add utt2dur statistics command and changed original stats command by a subcommand pred-distribution

Showing 1 changed file with 55 additions and 23 deletions Inline Diff

1 1
2 import argparse 2 import argparse
3 3
4 import os 4 import os
5 import core.data 5 import core.data
6 import math 6 import math
7 import numpy as np 7 import numpy as np
8 import scipy.stats 8 import scipy.stats
9 import pickle 9 import pickle
10 import matplotlib.pyplot as plt 10 import matplotlib.pyplot as plt
11 import matplotlib.colors as mcolors 11 import matplotlib.colors as mcolors
12 from utils import SubCommandRunner
12 13
13 14
14
15 from cycler import cycler 15 from cycler import cycler
16 16
17 def stats(): 17 def pred_distribution(predictions: str, labels: str, labelencoder: str, outdir: str):
18 print("Decisions")
19 18
20
21 print(list(mcolors.TABLEAU_COLORS))
22
23
24 if __name__ == "__main__":
25
26 # Parser
27 parser = argparse.ArgumentParser(description="")
28
29 # Arguments
30 parser.add_argument("--predictions", type=str, help="prediction file", required=True)
31 parser.add_argument("--labels", type=str, help="label file", required=True)
32 parser.add_argument("--labelencoder", type=str, help="label encode pickle file", required=True)
33 parser.add_argument("--outdir", type=str, help="output file", required=True)
34
35 args = parser.parse_args()
36
37 predictions = core.data.read_id_values(args.predictions, float) 19 predictions = core.data.read_id_values(args.predictions, float)
38 labels = core.data.read_labels(args.labels) 20 labels = core.data.read_labels(args.labels)
39 21
40 le = None 22 le = None
41 with open(args.labelencoder, "rb") as f: 23 with open(args.labelencoder, "rb") as f:
42 le = pickle.load(f) 24 le = pickle.load(f)
43 stats = {} 25 stats = {}
44 26
45 print("PREDICTIONS ---------------------------") 27 print("PREDICTIONS ---------------------------")
46 for id_, predictions_ in predictions.items(): 28 for id_, predictions_ in predictions.items():
47 label = labels[id_][0] 29 label = labels[id_][0]
48 if label not in stats: 30 if label not in stats:
49 stats[label] = { 31 stats[label] = {
50 "nb_utt": 1, 32 "nb_utt": 1,
51 "predictions": np.expand_dims(predictions_, axis=0) 33 "predictions": np.expand_dims(predictions_, axis=0)
52 } 34 }
53 else: 35 else:
54 stats[label]["nb_utt"] = stats[label]["nb_utt"] + 1 36 stats[label]["nb_utt"] = stats[label]["nb_utt"] + 1
55 stats[label]["predictions"] = np.append(stats[label]["predictions"], np.expand_dims(predictions_, axis=0), axis=0) 37 stats[label]["predictions"] = np.append(stats[label]["predictions"], np.expand_dims(predictions_, axis=0), axis=0)
56 38
57 39
58 print("CALCULATING ---------------------------") 40 print("CALCULATING ---------------------------")
59 41
60 42
61 colors = [ 43 colors = [
62 "darkorange", 44 "darkorange",
63 "red", 45 "red",
64 "blue" 46 "blue"
65 ] 47 ]
66 custom_cycler = (cycler(color=list(mcolors.TABLEAU_COLORS)) * 48 custom_cycler = (cycler(color=list(mcolors.TABLEAU_COLORS)) *
67 cycler(linestyle=['-', '--', '-.'])) 49 cycler(linestyle=['-', '--', '-.']))
68 50
69 51
70 for label, stats_ in stats.items(): 52 for label, stats_ in stats.items():
71 53
72 plt.gca().set_prop_cycle(custom_cycler) 54 plt.gca().set_prop_cycle(custom_cycler)
73 stats_mean = np.mean(stats_["predictions"], axis=0) 55 stats_mean = np.mean(stats_["predictions"], axis=0)
74 stats_std = np.std(stats_["predictions"], axis=0) 56 stats_std = np.std(stats_["predictions"], axis=0)
75 57
76 #print(label) 58 #print(label)
77 #print(stats_mean) 59 #print(stats_mean)
78 #print(stats_std) 60 #print(stats_std)
79 kwargs = dict(alpha=0.5) 61 kwargs = dict(alpha=0.5)
80 62
81 for i in range(stats_["predictions"].shape[1]): 63 for i in range(stats_["predictions"].shape[1]):
82 label_str = le.inverse_transform([i])[0] 64 label_str = le.inverse_transform([i])[0]
83 #plt.hist(stats_["predictions"][:, i], bins=10, label=label_str, **kwargs) 65 #plt.hist(stats_["predictions"][:, i], bins=10, label=label_str, **kwargs)
84 mu = stats_mean[i] 66 mu = stats_mean[i]
85 variance = stats_std[i] * stats_std[i] 67 variance = stats_std[i] * stats_std[i]
86 sigma = stats_std[i] 68 sigma = stats_std[i]
87 # math.sqrt(variance) 69 # math.sqrt(variance)
88 print(f"{i}: mu {mu}, var {variance}, sigma {sigma}") 70 print(f"{i}: mu {mu}, var {variance}, sigma {sigma}")
89 71
90 #x_values = np.arange(-1, 5, 0.1) 72 #x_values = np.arange(-1, 5, 0.1)
91 73
92 #y_values = scipy.stats.norm(mu, variance) 74 #y_values = scipy.stats.norm(mu, variance)
93 #y = scipy.stats.norm.pdf(x,mean,std) 75 #y = scipy.stats.norm.pdf(x,mean,std)
94 76
95 #plt.plot(x_values, y_values.pdf(x_values,)) 77 #plt.plot(x_values, y_values.pdf(x_values,))
96 78
97 #x, step = np.linspace(mu - 3*sigma, mu + 3*sigma, 1000, retstep=True) 79 #x, step = np.linspace(mu - 3*sigma, mu + 3*sigma, 1000, retstep=True)
98 x = np.linspace(0, 1, 1000) 80 x = np.linspace(0, 1, 1000)
99 #x = np.linspace(mu - 3*sigma, mu + 3*sigma, 1000) 81 #x = np.linspace(mu - 3*sigma, mu + 3*sigma, 1000)
100 #x, step = np.linspace(0, 1, 1000, retstep=True) 82 #x, step = np.linspace(0, 1, 1000, retstep=True)
101 83
102 P = scipy.stats.norm.cdf(x, mu, sigma) 84 P = scipy.stats.norm.cdf(x, mu, sigma)
103 #print(step) 85 #print(step)
104 plt.plot(x, P, label=label_str, **kwargs) 86 plt.plot(x, P, label=label_str, **kwargs)
105 #plt.savefig("simple_gaussian.pdf") 87 #plt.savefig("simple_gaussian.pdf")
106 88
107 plt.legend() 89 plt.legend()
108 plt.savefig(os.path.join(args.outdir, f"{label}_prediction_cdf.pdf")) 90 plt.savefig(os.path.join(args.outdir, f"{label}_prediction_cdf.pdf"))
109 plt.clf() 91 plt.clf()
110 92
93 print("Decisions")
111 94
112 # TODO: 95
113 # One graph for each label. Distribution of their predictions output are displayed. 96 def utt2dur(utt2dur: str, labels: str):
114 97 if labels == None:
98 pass
99 else:
100 pass
101
102 durations = []
103 with open(utt2dur, "r") as f:
104 for line in f:
105 splited = line.replace("\n", "").split(" ")
106 durations.append(float(splited[1]))
107
108 durations = np.asarray(durations, dtype=float)
109 print(durations.shape)
110 mean = np.mean(durations)
111 std = np.std(durations)
112
113 print(f"mean: {mean}")
114 print(f"std: {std}")
115
116
117 if __name__ == "__main__":
118
119 # Parser
120 parser = argparse.ArgumentParser(description="Statistics")
121 subparsers = parser.add_subparsers(title="actions")
122
123 # pred-distribution
124 parser_pred_dist = subparsers.add_parser("pred-distribution", help="plot distributions of prediction through labels")
125 parser_pred_dist.add_argument("--predictions", type=str, help="prediction file", required=True)
126 parser_pred_dist.add_argument("--labels", type=str, help="label file", required=True)
127 parser_pred_dist.add_argument("--labelencoder", type=str, help="label encode pickle file", required=True)