Quillot Mathias / volia

Browse Code »

Commit d27fe6fcc52cc5c3a4ea61289d4d4b0c38c53b83

Authored by Quillot Mathias 2021-05-19 18:40:16 +0200

1 parent acbafc4147

Exists in master

add utt2dur statistics command and changed original stats command by a subcommand pred-distribution

Showing 1 changed file with 55 additions and 23 deletions Inline Diff

volia/stats.py

volia/stats.py

Diff comments View file @ d27fe6f

1		1
2	import argparse	2	import argparse
3		3
4	import os	4	import os
5	import core.data	5	import core.data
6	import math	6	import math
7	import numpy as np	7	import numpy as np
8	import scipy.stats	8	import scipy.stats
9	import pickle	9	import pickle
10	import matplotlib.pyplot as plt	10	import matplotlib.pyplot as plt
11	import matplotlib.colors as mcolors	11	import matplotlib.colors as mcolors
		12	from utils import SubCommandRunner
12		13
13		14
14
15	from cycler import cycler	15	from cycler import cycler
16		16
17	def stats():	17	def pred_distribution(predictions: str, labels: str, labelencoder: str, outdir: str):
18	print("Decisions")
19		18
20
21	print(list(mcolors.TABLEAU_COLORS))
22
23
24	if __name__ == "__main__":
25
26	# Parser
27	parser = argparse.ArgumentParser(description="")
28
29	# Arguments
30	parser.add_argument("--predictions", type=str, help="prediction file", required=True)
31	parser.add_argument("--labels", type=str, help="label file", required=True)
32	parser.add_argument("--labelencoder", type=str, help="label encode pickle file", required=True)
33	parser.add_argument("--outdir", type=str, help="output file", required=True)
34
35	args = parser.parse_args()
36
37	predictions = core.data.read_id_values(args.predictions, float)	19	predictions = core.data.read_id_values(args.predictions, float)
38	labels = core.data.read_labels(args.labels)	20	labels = core.data.read_labels(args.labels)
39		21
40	le = None	22	le = None
41	with open(args.labelencoder, "rb") as f:	23	with open(args.labelencoder, "rb") as f:
42	le = pickle.load(f)	24	le = pickle.load(f)
43	stats = {}	25	stats = {}
44		26
45	print("PREDICTIONS ---------------------------")	27	print("PREDICTIONS ---------------------------")
46	for id_, predictions_ in predictions.items():	28	for id_, predictions_ in predictions.items():
47	label = labels[id_][0]	29	label = labels[id_][0]
48	if label not in stats:	30	if label not in stats:
49	stats[label] = {	31	stats[label] = {
50	"nb_utt": 1,	32	"nb_utt": 1,
51	"predictions": np.expand_dims(predictions_, axis=0)	33	"predictions": np.expand_dims(predictions_, axis=0)
52	}	34	}
53	else:	35	else:
54	stats[label]["nb_utt"] = stats[label]["nb_utt"] + 1	36	stats[label]["nb_utt"] = stats[label]["nb_utt"] + 1
55	stats[label]["predictions"] = np.append(stats[label]["predictions"], np.expand_dims(predictions_, axis=0), axis=0)	37	stats[label]["predictions"] = np.append(stats[label]["predictions"], np.expand_dims(predictions_, axis=0), axis=0)
56		38
57		39
58	print("CALCULATING ---------------------------")	40	print("CALCULATING ---------------------------")
59		41
60		42
61	colors = [	43	colors = [
62	"darkorange",	44	"darkorange",
63	"red",	45	"red",
64	"blue"	46	"blue"
65	]	47	]
66	custom_cycler = (cycler(color=list(mcolors.TABLEAU_COLORS)) *	48	custom_cycler = (cycler(color=list(mcolors.TABLEAU_COLORS)) *
67	cycler(linestyle=['-', '--', '-.']))	49	cycler(linestyle=['-', '--', '-.']))
68		50
69		51
70	for label, stats_ in stats.items():	52	for label, stats_ in stats.items():
71		53
72	plt.gca().set_prop_cycle(custom_cycler)	54	plt.gca().set_prop_cycle(custom_cycler)
73	stats_mean = np.mean(stats_["predictions"], axis=0)	55	stats_mean = np.mean(stats_["predictions"], axis=0)
74	stats_std = np.std(stats_["predictions"], axis=0)	56	stats_std = np.std(stats_["predictions"], axis=0)
75		57
76	#print(label)	58	#print(label)
77	#print(stats_mean)	59	#print(stats_mean)
78	#print(stats_std)	60	#print(stats_std)
79	kwargs = dict(alpha=0.5)	61	kwargs = dict(alpha=0.5)
80		62
81	for i in range(stats_["predictions"].shape[1]):	63	for i in range(stats_["predictions"].shape[1]):
82	label_str = le.inverse_transform([i])[0]	64	label_str = le.inverse_transform([i])[0]
83	#plt.hist(stats_["predictions"][:, i], bins=10, label=label_str, **kwargs)	65	#plt.hist(stats_["predictions"][:, i], bins=10, label=label_str, **kwargs)
84	mu = stats_mean[i]	66	mu = stats_mean[i]
85	variance = stats_std[i] * stats_std[i]	67	variance = stats_std[i] * stats_std[i]
86	sigma = stats_std[i]	68	sigma = stats_std[i]
87	# math.sqrt(variance)	69	# math.sqrt(variance)
88	print(f"{i}: mu {mu}, var {variance}, sigma {sigma}")	70	print(f"{i}: mu {mu}, var {variance}, sigma {sigma}")
89		71
90	#x_values = np.arange(-1, 5, 0.1)	72	#x_values = np.arange(-1, 5, 0.1)
91		73
92	#y_values = scipy.stats.norm(mu, variance)	74	#y_values = scipy.stats.norm(mu, variance)
93	#y = scipy.stats.norm.pdf(x,mean,std)	75	#y = scipy.stats.norm.pdf(x,mean,std)
94		76
95	#plt.plot(x_values, y_values.pdf(x_values,))	77	#plt.plot(x_values, y_values.pdf(x_values,))
96		78
97	#x, step = np.linspace(mu - 3sigma, mu + 3sigma, 1000, retstep=True)	79	#x, step = np.linspace(mu - 3sigma, mu + 3sigma, 1000, retstep=True)
98	x = np.linspace(0, 1, 1000)	80	x = np.linspace(0, 1, 1000)
99	#x = np.linspace(mu - 3sigma, mu + 3sigma, 1000)	81	#x = np.linspace(mu - 3sigma, mu + 3sigma, 1000)
100	#x, step = np.linspace(0, 1, 1000, retstep=True)	82	#x, step = np.linspace(0, 1, 1000, retstep=True)
101		83
102	P = scipy.stats.norm.cdf(x, mu, sigma)	84	P = scipy.stats.norm.cdf(x, mu, sigma)
103	#print(step)	85	#print(step)
104	plt.plot(x, P, label=label_str, **kwargs)	86	plt.plot(x, P, label=label_str, **kwargs)
105	#plt.savefig("simple_gaussian.pdf")	87	#plt.savefig("simple_gaussian.pdf")
106		88
107	plt.legend()	89	plt.legend()
108	plt.savefig(os.path.join(args.outdir, f"{label}_prediction_cdf.pdf"))	90	plt.savefig(os.path.join(args.outdir, f"{label}_prediction_cdf.pdf"))
109	plt.clf()	91	plt.clf()
110		92
		93	print("Decisions")
111		94
112	# TODO:	95
113	# One graph for each label. Distribution of their predictions output are displayed.	96	def utt2dur(utt2dur: str, labels: str):
114		97	if labels == None:
		98	pass
		99	else:
		100	pass
		101
		102	durations = []
		103	with open(utt2dur, "r") as f:
		104	for line in f:
		105	splited = line.replace("\n", "").split(" ")
		106	durations.append(float(splited[1]))
		107
		108	durations = np.asarray(durations, dtype=float)
		109	print(durations.shape)
		110	mean = np.mean(durations)
		111	std = np.std(durations)
		112
		113	print(f"mean: {mean}")
		114	print(f"std: {std}")
		115
		116
		117	if __name__ == "__main__":
		118
		119	# Parser
		120	parser = argparse.ArgumentParser(description="Statistics")
		121	subparsers = parser.add_subparsers(title="actions")
		122
		123	# pred-distribution
		124	parser_pred_dist = subparsers.add_parser("pred-distribution", help="plot distributions of prediction through labels")
		125	parser_pred_dist.add_argument("--predictions", type=str, help="prediction file", required=True)
		126	parser_pred_dist.add_argument("--labels", type=str, help="label file", required=True)
		127	parser_pred_dist.add_argument("--labelencoder", type=str, help="label encode pickle file", required=True)