Commit d27fe6fcc52cc5c3a4ea61289d4d4b0c38c53b83
1 parent
acbafc4147
Exists in
master
add utt2dur statistics command and changed original stats command by a subcommand pred-distribution
Showing 1 changed file with 55 additions and 23 deletions Inline Diff
volia/stats.py
1 | 1 | ||
2 | import argparse | 2 | import argparse |
3 | 3 | ||
4 | import os | 4 | import os |
5 | import core.data | 5 | import core.data |
6 | import math | 6 | import math |
7 | import numpy as np | 7 | import numpy as np |
8 | import scipy.stats | 8 | import scipy.stats |
9 | import pickle | 9 | import pickle |
10 | import matplotlib.pyplot as plt | 10 | import matplotlib.pyplot as plt |
11 | import matplotlib.colors as mcolors | 11 | import matplotlib.colors as mcolors |
12 | from utils import SubCommandRunner | ||
12 | 13 | ||
13 | 14 | ||
14 | |||
15 | from cycler import cycler | 15 | from cycler import cycler |
16 | 16 | ||
17 | def stats(): | 17 | def pred_distribution(predictions: str, labels: str, labelencoder: str, outdir: str): |
18 | print("Decisions") | ||
19 | 18 | ||
20 | |||
21 | print(list(mcolors.TABLEAU_COLORS)) | ||
22 | |||
23 | |||
24 | if __name__ == "__main__": | ||
25 | |||
26 | # Parser | ||
27 | parser = argparse.ArgumentParser(description="") | ||
28 | |||
29 | # Arguments | ||
30 | parser.add_argument("--predictions", type=str, help="prediction file", required=True) | ||
31 | parser.add_argument("--labels", type=str, help="label file", required=True) | ||
32 | parser.add_argument("--labelencoder", type=str, help="label encode pickle file", required=True) | ||
33 | parser.add_argument("--outdir", type=str, help="output file", required=True) | ||
34 | |||
35 | args = parser.parse_args() | ||
36 | |||
37 | predictions = core.data.read_id_values(args.predictions, float) | 19 | predictions = core.data.read_id_values(args.predictions, float) |
38 | labels = core.data.read_labels(args.labels) | 20 | labels = core.data.read_labels(args.labels) |
39 | 21 | ||
40 | le = None | 22 | le = None |
41 | with open(args.labelencoder, "rb") as f: | 23 | with open(args.labelencoder, "rb") as f: |
42 | le = pickle.load(f) | 24 | le = pickle.load(f) |
43 | stats = {} | 25 | stats = {} |
44 | 26 | ||
45 | print("PREDICTIONS ---------------------------") | 27 | print("PREDICTIONS ---------------------------") |
46 | for id_, predictions_ in predictions.items(): | 28 | for id_, predictions_ in predictions.items(): |
47 | label = labels[id_][0] | 29 | label = labels[id_][0] |
48 | if label not in stats: | 30 | if label not in stats: |
49 | stats[label] = { | 31 | stats[label] = { |
50 | "nb_utt": 1, | 32 | "nb_utt": 1, |
51 | "predictions": np.expand_dims(predictions_, axis=0) | 33 | "predictions": np.expand_dims(predictions_, axis=0) |
52 | } | 34 | } |
53 | else: | 35 | else: |
54 | stats[label]["nb_utt"] = stats[label]["nb_utt"] + 1 | 36 | stats[label]["nb_utt"] = stats[label]["nb_utt"] + 1 |
55 | stats[label]["predictions"] = np.append(stats[label]["predictions"], np.expand_dims(predictions_, axis=0), axis=0) | 37 | stats[label]["predictions"] = np.append(stats[label]["predictions"], np.expand_dims(predictions_, axis=0), axis=0) |
56 | 38 | ||
57 | 39 | ||
58 | print("CALCULATING ---------------------------") | 40 | print("CALCULATING ---------------------------") |
59 | 41 | ||
60 | 42 | ||
61 | colors = [ | 43 | colors = [ |
62 | "darkorange", | 44 | "darkorange", |
63 | "red", | 45 | "red", |
64 | "blue" | 46 | "blue" |
65 | ] | 47 | ] |
66 | custom_cycler = (cycler(color=list(mcolors.TABLEAU_COLORS)) * | 48 | custom_cycler = (cycler(color=list(mcolors.TABLEAU_COLORS)) * |
67 | cycler(linestyle=['-', '--', '-.'])) | 49 | cycler(linestyle=['-', '--', '-.'])) |
68 | 50 | ||
69 | 51 | ||
70 | for label, stats_ in stats.items(): | 52 | for label, stats_ in stats.items(): |
71 | 53 | ||
72 | plt.gca().set_prop_cycle(custom_cycler) | 54 | plt.gca().set_prop_cycle(custom_cycler) |
73 | stats_mean = np.mean(stats_["predictions"], axis=0) | 55 | stats_mean = np.mean(stats_["predictions"], axis=0) |
74 | stats_std = np.std(stats_["predictions"], axis=0) | 56 | stats_std = np.std(stats_["predictions"], axis=0) |
75 | 57 | ||
76 | #print(label) | 58 | #print(label) |
77 | #print(stats_mean) | 59 | #print(stats_mean) |
78 | #print(stats_std) | 60 | #print(stats_std) |
79 | kwargs = dict(alpha=0.5) | 61 | kwargs = dict(alpha=0.5) |
80 | 62 | ||
81 | for i in range(stats_["predictions"].shape[1]): | 63 | for i in range(stats_["predictions"].shape[1]): |
82 | label_str = le.inverse_transform([i])[0] | 64 | label_str = le.inverse_transform([i])[0] |
83 | #plt.hist(stats_["predictions"][:, i], bins=10, label=label_str, **kwargs) | 65 | #plt.hist(stats_["predictions"][:, i], bins=10, label=label_str, **kwargs) |
84 | mu = stats_mean[i] | 66 | mu = stats_mean[i] |
85 | variance = stats_std[i] * stats_std[i] | 67 | variance = stats_std[i] * stats_std[i] |
86 | sigma = stats_std[i] | 68 | sigma = stats_std[i] |
87 | # math.sqrt(variance) | 69 | # math.sqrt(variance) |
88 | print(f"{i}: mu {mu}, var {variance}, sigma {sigma}") | 70 | print(f"{i}: mu {mu}, var {variance}, sigma {sigma}") |
89 | 71 | ||
90 | #x_values = np.arange(-1, 5, 0.1) | 72 | #x_values = np.arange(-1, 5, 0.1) |
91 | 73 | ||
92 | #y_values = scipy.stats.norm(mu, variance) | 74 | #y_values = scipy.stats.norm(mu, variance) |
93 | #y = scipy.stats.norm.pdf(x,mean,std) | 75 | #y = scipy.stats.norm.pdf(x,mean,std) |
94 | 76 | ||
95 | #plt.plot(x_values, y_values.pdf(x_values,)) | 77 | #plt.plot(x_values, y_values.pdf(x_values,)) |
96 | 78 | ||
97 | #x, step = np.linspace(mu - 3*sigma, mu + 3*sigma, 1000, retstep=True) | 79 | #x, step = np.linspace(mu - 3*sigma, mu + 3*sigma, 1000, retstep=True) |
98 | x = np.linspace(0, 1, 1000) | 80 | x = np.linspace(0, 1, 1000) |
99 | #x = np.linspace(mu - 3*sigma, mu + 3*sigma, 1000) | 81 | #x = np.linspace(mu - 3*sigma, mu + 3*sigma, 1000) |
100 | #x, step = np.linspace(0, 1, 1000, retstep=True) | 82 | #x, step = np.linspace(0, 1, 1000, retstep=True) |
101 | 83 | ||
102 | P = scipy.stats.norm.cdf(x, mu, sigma) | 84 | P = scipy.stats.norm.cdf(x, mu, sigma) |
103 | #print(step) | 85 | #print(step) |
104 | plt.plot(x, P, label=label_str, **kwargs) | 86 | plt.plot(x, P, label=label_str, **kwargs) |
105 | #plt.savefig("simple_gaussian.pdf") | 87 | #plt.savefig("simple_gaussian.pdf") |
106 | 88 | ||
107 | plt.legend() | 89 | plt.legend() |
108 | plt.savefig(os.path.join(args.outdir, f"{label}_prediction_cdf.pdf")) | 90 | plt.savefig(os.path.join(args.outdir, f"{label}_prediction_cdf.pdf")) |
109 | plt.clf() | 91 | plt.clf() |
110 | 92 | ||
93 | print("Decisions") | ||
111 | 94 | ||
112 | # TODO: | 95 | |
113 | # One graph for each label. Distribution of their predictions output are displayed. | 96 | def utt2dur(utt2dur: str, labels: str): |
114 | 97 | if labels == None: | |
98 | pass | ||
99 | else: | ||
100 | pass | ||
101 | |||
102 | durations = [] | ||
103 | with open(utt2dur, "r") as f: | ||
104 | for line in f: | ||
105 | splited = line.replace("\n", "").split(" ") | ||
106 | durations.append(float(splited[1])) | ||
107 | |||
108 | durations = np.asarray(durations, dtype=float) | ||
109 | print(durations.shape) | ||
110 | mean = np.mean(durations) | ||
111 | std = np.std(durations) | ||
112 | |||
113 | print(f"mean: {mean}") | ||
114 | print(f"std: {std}") | ||
115 | |||
116 | |||
117 | if __name__ == "__main__": | ||
118 | |||
119 | # Parser | ||
120 | parser = argparse.ArgumentParser(description="Statistics") | ||
121 | subparsers = parser.add_subparsers(title="actions") | ||
122 | |||
123 | # pred-distribution | ||
124 | parser_pred_dist = subparsers.add_parser("pred-distribution", help="plot distributions of prediction through labels") | ||
125 | parser_pred_dist.add_argument("--predictions", type=str, help="prediction file", required=True) | ||
126 | parser_pred_dist.add_argument("--labels", type=str, help="label file", required=True) | ||
127 | parser_pred_dist.add_argument("--labelencoder", type=str, help="label encode pickle file", required=True) |