Commit 6957c7c92b9da33af8bd14d110c2c0cb404aed44

Authored by Quillot Mathias
1 parent e200c0b6f7
Exists in master

main function for data management

Showing 1 changed file with 80 additions and 17 deletions Inline Diff

1 ''' 1 '''
2 Data management input/output 2 Data management input/output
3 ''' 3 '''
4 4
5 # Import packages and modules 5 # Import packages and modules
6 import numpy as np 6 import numpy as np
7 import sys
7 8
8 # Defining some types 9 # Defining some types
9 from typing import List, Dict 10 from typing import List, Dict
10 KeyToList = Dict[str, List[str]] 11 KeyToList = Dict[str, List[str]]
12 KeyToLabels = Dict[str, List[str]]
13 KeyToIntLabels = Dict[str, List[int]]
11 KeyToFeatures = Dict[str, List[float]] 14 KeyToFeatures = Dict[str, List[float]]
12 15
13 16
14 def read_lst(file_path: str) -> KeyToList: 17 def read_lst(file_path: str) -> KeyToList:
15 ''' 18 '''
16 Read lst file with this structure: 19 Read lst file with this structure:
17 [id] [value1] [value2] ... [value n] 20 [id_1]
21 [id_2]
22 ...
23 [id_n]
24
25 Return a list of ids.
26 '''
27 lst = []
28 with open(file_path, "r") as f:
29 for line in f:
30 lst.append(line.replace("\n", ""))
31 return lst
18 32
19 This is a basic function reused by others like read_features. 33
20 returns a dictionary with id as key and a list of value as corresponding values 34 def read_id_values(file_path: str, value_type=str):
21 ''' 35 '''
22 # KeyToList type variable 36 Read file where each line is an id with its corresponding values:
23 key_to_list = dict() 37 [id_1] [value_1_1] [value_1_2] ... [value_1_k]
38 [id_2] [value_2_1] [value_2_2] ... [value_2_k]
39 ...
40 [id_n] [value_n_1] [value_n_2] ... [value_n_k]
41
42 where values are value_type type.
43
44 Used in many reader functions with specific value_type.
45 Return a dictionary with id as key and values as associated values.
46 '''
47 id_values = {}
24 with open(file_path, "r") as f: 48 with open(file_path, "r") as f:
25 for line in f: 49 for line in f:
26 splited = line.replace("\n", "").split(" ") 50 splited = line.replace("\n", "").split(" ")
27 id = splited[0] 51 id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type)
28 values = splited[1:] 52 return id_values
29 key_to_list[id] = values
30 return key_to_list
31 53
32 54
33 def read_features(file_path: str) -> KeyToFeatures: 55 def read_features(file_path: str) -> KeyToFeatures:
34 ''' 56 '''
57 Read features files with the following structure:
58 [id_1] [value_1_1] [value_1_2] ... [value_1_k]
59 [id_2] [value_2_1] [value_2_2] ... [value_2_k]
60 ...
61 [id_n] [value_n_1] [value_n_2] ... [value_n_k]
62
63 where values are float
64
65 Returns a dictionary with id as key and a list of values as associated values
35 ''' 66 '''
36 # KeyToFeatures type variable 67 return read_id_values(file_path, float)
37 key_to_features = dict()
38 # and the KeyToList
39 key_to_list = read_lst(file_path)
40
41 for key_, list_ in key_to_list.items():
42 key_to_features[key_] = np.asarray(list_, dtype=float)
43 68
44 return key_to_features 69
70 def read_labels(file_path: str) -> KeyToLabels:
71 '''
72 Read features files with the following structure :
73 [id_1] [value_1_1] [value_1_2] ... [value_1_k]
74 [id_2] [value_2_1] [value_2_2] ... [value_2_k]
75 ...
76 [id_n] [value_n_1] [value_n_2] ... [value_n_k]
77
78 where values are int
79 '''
80 return read_id_values(file_path, str)
81
82
83 def read_labels_integer(file_path: str) -> KeyToIntLabels:
84 '''
85 Read features files with the following structure :
86 [id_1] [value_1_1] [value_1_2] ... [value_1_k]
87 [id_2] [value_2_1] [value_2_2] ... [value_2_k]
88 ...
89 [id_n] [value_n_1] [value_n_2] ... [value_n_k]
90
91 where values are int
92 '''
93 return read_id_values(file_path, int)
94
95
96 def write_line(id_, values=[], out=sys.stdout):
97 """
98 Write a line in list, labels or features files.
99 If you want to write a list, specify an empty