Commit 6957c7c92b9da33af8bd14d110c2c0cb404aed44
1 parent
e200c0b6f7
Exists in
master
main function for data management
Showing 1 changed file with 80 additions and 17 deletions Side-by-side Diff
volia/core/data.py
... | ... | @@ -4,42 +4,105 @@ |
4 | 4 | |
5 | 5 | # Import packages and modules |
6 | 6 | import numpy as np |
7 | +import sys | |
7 | 8 | |
8 | 9 | # Defining some types |
9 | 10 | from typing import List, Dict |
10 | 11 | KeyToList = Dict[str, List[str]] |
12 | +KeyToLabels = Dict[str, List[str]] | |
13 | +KeyToIntLabels = Dict[str, List[int]] | |
11 | 14 | KeyToFeatures = Dict[str, List[float]] |
12 | 15 | |
13 | 16 | |
14 | 17 | def read_lst(file_path: str) -> KeyToList: |
15 | 18 | ''' |
16 | 19 | Read lst file with this structure: |
17 | - [id] [value1] [value2] ... [value n] | |
20 | + [id_1] | |
21 | + [id_2] | |
22 | + ... | |
23 | + [id_n] | |
24 | + | |
25 | + Return a list of ids. | |
26 | + ''' | |
27 | + lst = [] | |
28 | + with open(file_path, "r") as f: | |
29 | + for line in f: | |
30 | + lst.append(line.replace("\n", "")) | |
31 | + return lst | |
18 | 32 | |
19 | - This is a basic function reused by others like read_features. | |
20 | - returns a dictionary with id as key and a list of value as corresponding values | |
33 | + | |
34 | +def read_id_values(file_path: str, value_type=str): | |
21 | 35 | ''' |
22 | - # KeyToList type variable | |
23 | - key_to_list = dict() | |
36 | + Read file where each line is an id with its corresponding values: | |
37 | + [id_1] [value_1_1] [value_1_2] ... [value_1_k] | |
38 | + [id_2] [value_2_1] [value_2_2] ... [value_2_k] | |
39 | + ... | |
40 | + [id_n] [value_n_1] [value_n_2] ... [value_n_k] | |
41 | + | |
42 | + where values are value_type type. | |
43 | + | |
44 | + Used in many reader functions with specific value_type. | |
45 | + Return a dictionary with id as key and values as associated values. | |
46 | + ''' | |
47 | + id_values = {} | |
24 | 48 | with open(file_path, "r") as f: |
25 | 49 | for line in f: |
26 | 50 | splited = line.replace("\n", "").split(" ") |
27 | - id = splited[0] | |
28 | - values = splited[1:] | |
29 | - key_to_list[id] = values | |
30 | - return key_to_list | |
51 | + id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type) | |
52 | + return id_values | |
31 | 53 | |
32 | 54 | |
33 | 55 | def read_features(file_path: str) -> KeyToFeatures: |
34 | 56 | ''' |
57 | + Read features files with the following structure: | |
58 | + [id_1] [value_1_1] [value_1_2] ... [value_1_k] | |
59 | + [id_2] [value_2_1] [value_2_2] ... [value_2_k] | |
60 | + ... | |
61 | + [id_n] [value_n_1] [value_n_2] ... [value_n_k] | |
62 | + | |
63 | + where values are float | |
64 | + | |
65 | + Returns a dictionary with id as key and a list of values as associated values | |
35 | 66 | ''' |
36 | - # KeyToFeatures type variable | |
37 | - key_to_features = dict() | |
38 | - # and the KeyToList | |
39 | - key_to_list = read_lst(file_path) | |
40 | - | |
41 | - for key_, list_ in key_to_list.items(): | |
42 | - key_to_features[key_] = np.asarray(list_, dtype=float) | |
67 | + return read_id_values(file_path, float) | |
43 | 68 | |
44 | - return key_to_features | |
69 | + | |
70 | +def read_labels(file_path: str) -> KeyToLabels: | |
71 | + ''' | |
72 | + Read features files with the following structure : | |
73 | + [id_1] [value_1_1] [value_1_2] ... [value_1_k] | |
74 | + [id_2] [value_2_1] [value_2_2] ... [value_2_k] | |
75 | + ... | |
76 | + [id_n] [value_n_1] [value_n_2] ... [value_n_k] | |
77 | + | |
78 | + where values are int | |
79 | + ''' | |
80 | + return read_id_values(file_path, str) | |
81 | + | |
82 | + | |
83 | +def read_labels_integer(file_path: str) -> KeyToIntLabels: | |
84 | + ''' | |
85 | + Read features files with the following structure : | |
86 | + [id_1] [value_1_1] [value_1_2] ... [value_1_k] | |
87 | + [id_2] [value_2_1] [value_2_2] ... [value_2_k] | |
88 | + ... | |
89 | + [id_n] [value_n_1] [value_n_2] ... [value_n_k] | |
90 | + | |
91 | + where values are int | |
92 | + ''' | |
93 | + return read_id_values(file_path, int) | |
94 | + | |
95 | + | |
96 | +def write_line(id_, values=[], out=sys.stdout): | |
97 | + """ | |
98 | + Write a line in list, labels or features files. | |
99 | + If you want to write a list, specify an empty | |
100 | + array for *values*. | |
101 | + | |
102 | + Args: | |
103 | + id_ (str): id in string. | |
104 | + values (list, optional): list of values to write, features or labels. Defaults to []. | |
105 | + out (_io.TextIOWrapper, optional): . Defaults to sys.stdout. | |
106 | + """ | |
107 | + out.write(str(id_) + " ".join(values) + "\n") |