Commit 6957c7c92b9da33af8bd14d110c2c0cb404aed44
1 parent
e200c0b6f7
Exists in
master
main function for data management
Showing 1 changed file with 80 additions and 17 deletions Inline Diff
volia/core/data.py
1 | ''' | 1 | ''' |
2 | Data management input/output | 2 | Data management input/output |
3 | ''' | 3 | ''' |
4 | 4 | ||
5 | # Import packages and modules | 5 | # Import packages and modules |
6 | import numpy as np | 6 | import numpy as np |
7 | import sys | ||
7 | 8 | ||
8 | # Defining some types | 9 | # Defining some types |
9 | from typing import List, Dict | 10 | from typing import List, Dict |
10 | KeyToList = Dict[str, List[str]] | 11 | KeyToList = Dict[str, List[str]] |
12 | KeyToLabels = Dict[str, List[str]] | ||
13 | KeyToIntLabels = Dict[str, List[int]] | ||
11 | KeyToFeatures = Dict[str, List[float]] | 14 | KeyToFeatures = Dict[str, List[float]] |
12 | 15 | ||
13 | 16 | ||
14 | def read_lst(file_path: str) -> KeyToList: | 17 | def read_lst(file_path: str) -> KeyToList: |
15 | ''' | 18 | ''' |
16 | Read lst file with this structure: | 19 | Read lst file with this structure: |
17 | [id] [value1] [value2] ... [value n] | 20 | [id_1] |
21 | [id_2] | ||
22 | ... | ||
23 | [id_n] | ||
24 | |||
25 | Return a list of ids. | ||
26 | ''' | ||
27 | lst = [] | ||
28 | with open(file_path, "r") as f: | ||
29 | for line in f: | ||
30 | lst.append(line.replace("\n", "")) | ||
31 | return lst | ||
18 | 32 | ||
19 | This is a basic function reused by others like read_features. | 33 | |
20 | returns a dictionary with id as key and a list of value as corresponding values | 34 | def read_id_values(file_path: str, value_type=str): |
21 | ''' | 35 | ''' |
22 | # KeyToList type variable | 36 | Read file where each line is an id with its corresponding values: |
23 | key_to_list = dict() | 37 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] |
38 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] | ||
39 | ... | ||
40 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] | ||
41 | |||
42 | where values are value_type type. | ||
43 | |||
44 | Used in many reader functions with specific value_type. | ||
45 | Return a dictionary with id as key and values as associated values. | ||
46 | ''' | ||
47 | id_values = {} | ||
24 | with open(file_path, "r") as f: | 48 | with open(file_path, "r") as f: |
25 | for line in f: | 49 | for line in f: |
26 | splited = line.replace("\n", "").split(" ") | 50 | splited = line.replace("\n", "").split(" ") |
27 | id = splited[0] | 51 | id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type) |
28 | values = splited[1:] | 52 | return id_values |
29 | key_to_list[id] = values | ||
30 | return key_to_list | ||
31 | 53 | ||
32 | 54 | ||
33 | def read_features(file_path: str) -> KeyToFeatures: | 55 | def read_features(file_path: str) -> KeyToFeatures: |
34 | ''' | 56 | ''' |
57 | Read features files with the following structure: | ||
58 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] | ||
59 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] | ||
60 | ... | ||
61 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] | ||
62 | |||
63 | where values are float | ||
64 | |||
65 | Returns a dictionary with id as key and a list of values as associated values | ||
35 | ''' | 66 | ''' |
36 | # KeyToFeatures type variable | 67 | return read_id_values(file_path, float) |
37 | key_to_features = dict() | ||
38 | # and the KeyToList | ||
39 | key_to_list = read_lst(file_path) | ||
40 | |||
41 | for key_, list_ in key_to_list.items(): | ||
42 | key_to_features[key_] = np.asarray(list_, dtype=float) | ||
43 | 68 | ||
44 | return key_to_features | 69 | |
70 | def read_labels(file_path: str) -> KeyToLabels: | ||
71 | ''' | ||
72 | Read features files with the following structure : | ||
73 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] | ||
74 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] | ||
75 | ... | ||
76 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] | ||
77 | |||
78 | where values are int | ||
79 | ''' | ||
80 | return read_id_values(file_path, str) | ||
81 | |||
82 | |||
83 | def read_labels_integer(file_path: str) -> KeyToIntLabels: | ||
84 | ''' | ||
85 | Read features files with the following structure : | ||
86 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] | ||
87 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] | ||
88 | ... | ||
89 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] | ||
90 | |||
91 | where values are int | ||
92 | ''' | ||
93 | return read_id_values(file_path, int) | ||
94 | |||
95 | |||
96 | def write_line(id_, values=[], out=sys.stdout): | ||
97 | """ | ||
98 | Write a line in list, labels or features files. | ||
99 | If you want to write a list, specify an empty |