Commit 3b5a487de871166efef86bab60586cbd7d0ff82e
1 parent
9a2c6b4d02
Exists in
master
Just tried to change float to np.float64 to have larger float
Showing 1 changed file with 1 additions and 1 deletions Inline Diff
volia/core/data.py
1 | ''' | 1 | ''' |
2 | Data management input/output | 2 | Data management input/output |
3 | ''' | 3 | ''' |
4 | 4 | ||
5 | # Import packages and modules | 5 | # Import packages and modules |
6 | import numpy as np | 6 | import numpy as np |
7 | import sys | 7 | import sys |
8 | 8 | ||
9 | # Defining some types | 9 | # Defining some types |
10 | from typing import List, Dict | 10 | from typing import List, Dict |
11 | KeyToList = Dict[str, List[str]] | 11 | KeyToList = Dict[str, List[str]] |
12 | KeyToLabels = Dict[str, List[str]] | 12 | KeyToLabels = Dict[str, List[str]] |
13 | KeyToIntLabels = Dict[str, List[int]] | 13 | KeyToIntLabels = Dict[str, List[int]] |
14 | KeyToFeatures = Dict[str, List[float]] | 14 | KeyToFeatures = Dict[str, List[float]] |
15 | 15 | ||
16 | 16 | ||
17 | def read_lst(file_path: str) -> KeyToList: | 17 | def read_lst(file_path: str) -> KeyToList: |
18 | ''' | 18 | ''' |
19 | Read lst file with this structure: | 19 | Read lst file with this structure: |
20 | [id_1] | 20 | [id_1] |
21 | [id_2] | 21 | [id_2] |
22 | ... | 22 | ... |
23 | [id_n] | 23 | [id_n] |
24 | 24 | ||
25 | Return a list of ids. | 25 | Return a list of ids. |
26 | ''' | 26 | ''' |
27 | lst = [] | 27 | lst = [] |
28 | with open(file_path, "r") as f: | 28 | with open(file_path, "r") as f: |
29 | for line in f: | 29 | for line in f: |
30 | lst.append(line.replace("\n", "")) | 30 | lst.append(line.replace("\n", "")) |
31 | return lst | 31 | return lst |
32 | 32 | ||
33 | 33 | ||
34 | def read_id_values(file_path: str, value_type=str): | 34 | def read_id_values(file_path: str, value_type=str): |
35 | ''' | 35 | ''' |
36 | Read file where each line is an id with its corresponding values: | 36 | Read file where each line is an id with its corresponding values: |
37 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] | 37 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] |
38 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] | 38 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] |
39 | ... | 39 | ... |
40 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] | 40 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] |
41 | 41 | ||
42 | where values are value_type type. | 42 | where values are value_type type. |
43 | 43 | ||
44 | Used in many reader functions with specific value_type. | 44 | Used in many reader functions with specific value_type. |
45 | Return a dictionary with id as key and values as associated values. | 45 | Return a dictionary with id as key and values as associated values. |
46 | ''' | 46 | ''' |
47 | id_values = {} | 47 | id_values = {} |
48 | with open(file_path, "r") as f: | 48 | with open(file_path, "r") as f: |
49 | for line in f: | 49 | for line in f: |
50 | splited = line.replace("\n", "").split(" ") | 50 | splited = line.replace("\n", "").split(" ") |
51 | id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type) | 51 | id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type) |
52 | return id_values | 52 | return id_values |
53 | 53 | ||
54 | 54 | ||
55 | def read_features(file_path: str) -> KeyToFeatures: | 55 | def read_features(file_path: str) -> KeyToFeatures: |
56 | ''' | 56 | ''' |
57 | Read features files with the following structure: | 57 | Read features files with the following structure: |
58 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] | 58 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] |
59 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] | 59 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] |
60 | ... | 60 | ... |
61 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] | 61 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] |
62 | 62 | ||
63 | where values are float | 63 | where values are float |
64 | 64 | ||
65 | Returns a dictionary with id as key and a list of values as associated values | 65 | Returns a dictionary with id as key and a list of values as associated values |
66 | ''' | 66 | ''' |
67 | return read_id_values(file_path, float) | 67 | return read_id_values(file_path, np.float64) |
68 | 68 | ||
69 | 69 | ||
70 | def read_labels(file_path: str) -> KeyToLabels: | 70 | def read_labels(file_path: str) -> KeyToLabels: |
71 | ''' | 71 | ''' |
72 | Read features files with the following structure : | 72 | Read features files with the following structure : |
73 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] | 73 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] |
74 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] | 74 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] |
75 | ... | 75 | ... |
76 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] | 76 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] |
77 | 77 | ||
78 | where values are int | 78 | where values are int |
79 | ''' | 79 | ''' |
80 | return read_id_values(file_path, str) | 80 | return read_id_values(file_path, str) |
81 | 81 | ||
82 | 82 | ||
83 | def read_labels_integer(file_path: str) -> KeyToIntLabels: | 83 | def read_labels_integer(file_path: str) -> KeyToIntLabels: |
84 | ''' | 84 | ''' |
85 | Read features files with the following structure : | 85 | Read features files with the following structure : |
86 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] | 86 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] |
87 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] | 87 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] |
88 | ... | 88 | ... |
89 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] | 89 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] |
90 | 90 | ||
91 | where values are int | 91 | where values are int |
92 | ''' | 92 | ''' |
93 | return read_id_values(file_path, int) | 93 | return read_id_values(file_path, int) |
94 | 94 | ||
95 | 95 | ||
96 | def write_line(id_, values=[], out=sys.stdout): | 96 | def write_line(id_, values=[], out=sys.stdout): |
97 | """ | 97 | """ |
98 | Write a line in list, labels or features files. | 98 | Write a line in list, labels or features files. |
99 | If you want to write a list, specify an empty | 99 | If you want to write a list, specify an empty |
100 | array for *values*. | 100 | array for *values*. |
101 | 101 | ||
102 | Args: | 102 | Args: |
103 | id_ (str): id in string. | 103 | id_ (str): id in string. |
104 | values (list, optional): list of values to write, features or labels. Defaults to []. | 104 | values (list, optional): list of values to write, features or labels. Defaults to []. |
105 | out (_io.TextIOWrapper, optional): . Defaults to sys.stdout. | 105 | out (_io.TextIOWrapper, optional): . Defaults to sys.stdout. |
106 | """ | 106 | """ |
107 | if len(values) == 0: | 107 | if len(values) == 0: |
108 | out.write(str(id_) + "\n") | 108 | out.write(str(id_) + "\n") |
109 | else: | 109 | else: |
110 | out.write(str(id_) + " " + " ".join(values) + "\n") | 110 | out.write(str(id_) + " " + " ".join(values) + "\n") |