Quillot Mathias / volia

Browse Code »

Commit 6957c7c92b9da33af8bd14d110c2c0cb404aed44

Authored by Quillot Mathias 2021-04-28 21:02:41 +0200

1 parent e200c0b6f7

Exists in master

main function for data management

Showing 1 changed file with 80 additions and 17 deletions Inline Diff

volia/core/data.py

volia/core/data.py

Diff comments View file @ 6957c7c

1	'''	1	'''
2	Data management input/output	2	Data management input/output
3	'''	3	'''
4		4
5	# Import packages and modules	5	# Import packages and modules
6	import numpy as np	6	import numpy as np
		7	import sys
7		8
8	# Defining some types	9	# Defining some types
9	from typing import List, Dict	10	from typing import List, Dict
10	KeyToList = Dict[str, List[str]]	11	KeyToList = Dict[str, List[str]]
		12	KeyToLabels = Dict[str, List[str]]
		13	KeyToIntLabels = Dict[str, List[int]]
11	KeyToFeatures = Dict[str, List[float]]	14	KeyToFeatures = Dict[str, List[float]]
12		15
13		16
14	def read_lst(file_path: str) -> KeyToList:	17	def read_lst(file_path: str) -> KeyToList:
15	'''	18	'''
16	Read lst file with this structure:	19	Read lst file with this structure:
17	[id] [value1] [value2] ... [value n]	20	[id_1]
		21	[id_2]
		22	...
		23	[id_n]
		24
		25	Return a list of ids.
		26	'''
		27	lst = []
		28	with open(file_path, "r") as f:
		29	for line in f:
		30	lst.append(line.replace("\n", ""))
		31	return lst
18		32
19	This is a basic function reused by others like read_features.	33
20	returns a dictionary with id as key and a list of value as corresponding values	34	def read_id_values(file_path: str, value_type=str):
21	'''	35	'''
22	# KeyToList type variable	36	Read file where each line is an id with its corresponding values:
23	key_to_list = dict()	37	[id_1] [value_1_1] [value_1_2] ... [value_1_k]
		38	[id_2] [value_2_1] [value_2_2] ... [value_2_k]
		39	...
		40	[id_n] [value_n_1] [value_n_2] ... [value_n_k]
		41
		42	where values are value_type type.
		43
		44	Used in many reader functions with specific value_type.
		45	Return a dictionary with id as key and values as associated values.
		46	'''
		47	id_values = {}
24	with open(file_path, "r") as f:	48	with open(file_path, "r") as f:
25	for line in f:	49	for line in f:
26	splited = line.replace("\n", "").split(" ")	50	splited = line.replace("\n", "").split(" ")
27	id = splited[0]	51	id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type)
28	values = splited[1:]	52	return id_values
29	key_to_list[id] = values
30	return key_to_list
31		53
32		54
33	def read_features(file_path: str) -> KeyToFeatures:	55	def read_features(file_path: str) -> KeyToFeatures:
34	'''	56	'''
		57	Read features files with the following structure:
		58	[id_1] [value_1_1] [value_1_2] ... [value_1_k]
		59	[id_2] [value_2_1] [value_2_2] ... [value_2_k]
		60	...
		61	[id_n] [value_n_1] [value_n_2] ... [value_n_k]
		62
		63	where values are float
		64
		65	Returns a dictionary with id as key and a list of values as associated values
35	'''	66	'''
36	# KeyToFeatures type variable	67	return read_id_values(file_path, float)
37	key_to_features = dict()
38	# and the KeyToList
39	key_to_list = read_lst(file_path)
40
41	for key_, list_ in key_to_list.items():
42	key_to_features[key_] = np.asarray(list_, dtype=float)
43		68
44	return key_to_features	69
		70	def read_labels(file_path: str) -> KeyToLabels:
		71	'''
		72	Read features files with the following structure :
		73	[id_1] [value_1_1] [value_1_2] ... [value_1_k]
		74	[id_2] [value_2_1] [value_2_2] ... [value_2_k]
		75	...
		76	[id_n] [value_n_1] [value_n_2] ... [value_n_k]
		77
		78	where values are int
		79	'''
		80	return read_id_values(file_path, str)
		81
		82
		83	def read_labels_integer(file_path: str) -> KeyToIntLabels:
		84	'''
		85	Read features files with the following structure :
		86	[id_1] [value_1_1] [value_1_2] ... [value_1_k]
		87	[id_2] [value_2_1] [value_2_2] ... [value_2_k]
		88	...
		89	[id_n] [value_n_1] [value_n_2] ... [value_n_k]
		90
		91	where values are int
		92	'''
		93	return read_id_values(file_path, int)
		94
		95
		96	def write_line(id_, values=[], out=sys.stdout):
		97	"""
		98	Write a line in list, labels or features files.
		99	If you want to write a list, specify an empty