Commit 1bcb37e33d763edd37ff803686c6524378ebcd22
1 parent
4309b4a340
Exists in
master
Now, we can write files with only one value without including it in a list.
Showing 1 changed file with 6 additions and 3 deletions Inline Diff
volia/core/data.py
1 | ''' | 1 | ''' |
2 | Data management input/output | 2 | Data management input/output |
3 | ''' | 3 | ''' |
4 | 4 | ||
5 | # Import packages and modules | 5 | # Import packages and modules |
6 | import numpy as np | 6 | import numpy as np |
7 | import sys | 7 | import sys |
8 | 8 | ||
9 | # Defining some types | 9 | # Defining some types |
10 | from typing import List, Dict, Tuple | 10 | from typing import List, Dict, Tuple |
11 | 11 | ||
12 | from numpy.lib.shape_base import expand_dims | 12 | from numpy.lib.shape_base import expand_dims |
13 | KeyToList = Dict[str, List[str]] | 13 | KeyToList = Dict[str, List[str]] |
14 | KeyToLabels = Dict[str, List[str]] | 14 | KeyToLabels = Dict[str, List[str]] |
15 | KeyToIntLabels = Dict[str, List[int]] | 15 | KeyToIntLabels = Dict[str, List[int]] |
16 | KeyToFeatures = Dict[str, List[float]] | 16 | KeyToFeatures = Dict[str, List[float]] |
17 | 17 | ||
18 | 18 | ||
19 | def read_lst(file_path: str) -> KeyToList: | 19 | def read_lst(file_path: str) -> KeyToList: |
20 | ''' | 20 | ''' |
21 | Read lst file with this structure: | 21 | Read lst file with this structure: |
22 | [id_1] | 22 | [id_1] |
23 | [id_2] | 23 | [id_2] |
24 | ... | 24 | ... |
25 | [id_n] | 25 | [id_n] |
26 | 26 | ||
27 | Return a list of ids. | 27 | Return a list of ids. |
28 | ''' | 28 | ''' |
29 | lst = [] | 29 | lst = [] |
30 | with open(file_path, "r") as f: | 30 | with open(file_path, "r") as f: |
31 | for line in f: | 31 | for line in f: |
32 | lst.append(line.replace("\n", "")) | 32 | lst.append(line.replace("\n", "")) |
33 | return lst | 33 | return lst |
34 | 34 | ||
35 | 35 | ||
36 | def read_id_values(file_path: str, value_type=str): | 36 | def read_id_values(file_path: str, value_type=str): |
37 | ''' | 37 | ''' |
38 | Read file where each line is an id with its corresponding values: | 38 | Read file where each line is an id with its corresponding values: |
39 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] | 39 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] |
40 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] | 40 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] |
41 | ... | 41 | ... |
42 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] | 42 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] |
43 | 43 | ||
44 | where values are value_type type. | 44 | where values are value_type type. |
45 | 45 | ||
46 | Used in many reader functions with specific value_type. | 46 | Used in many reader functions with specific value_type. |
47 | Return a dictionary with id as key and values as associated values. | 47 | Return a dictionary with id as key and values as associated values. |
48 | ''' | 48 | ''' |
49 | id_values = {} | 49 | id_values = {} |
50 | with open(file_path, "r") as f: | 50 | with open(file_path, "r") as f: |
51 | for line in f: | 51 | for line in f: |
52 | splited = line.replace("\n", "").split(" ") | 52 | splited = line.replace("\n", "").split(" ") |
53 | id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type) | 53 | id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type) |
54 | return id_values | 54 | return id_values |
55 | 55 | ||
56 | 56 | ||
57 | def read_features(file_path: str) -> KeyToFeatures: | 57 | def read_features(file_path: str) -> KeyToFeatures: |
58 | ''' | 58 | ''' |
59 | Read features files with the following structure: | 59 | Read features files with the following structure: |
60 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] | 60 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] |
61 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] | 61 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] |
62 | ... | 62 | ... |
63 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] | 63 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] |
64 | 64 | ||
65 | where values are float | 65 | where values are float |
66 | 66 | ||
67 | Returns a dictionary with id as key and a list of values as associated values | 67 | Returns a dictionary with id as key and a list of values as associated values |
68 | ''' | 68 | ''' |
69 | return read_id_values(file_path, np.float64) | 69 | return read_id_values(file_path, np.float64) |
70 | 70 | ||
71 | 71 | ||
72 | def read_features_with_matrix(file_path: str) -> Tuple[List[str], np.ndarray]: | 72 | def read_features_with_matrix(file_path: str) -> Tuple[List[str], np.ndarray]: |
73 | """Read a features file and returns the keys (utterances ids) | 73 | """Read a features file and returns the keys (utterances ids) |
74 | with the corresponding matrix of values. | 74 | with the corresponding matrix of values. |
75 | 75 | ||
76 | Args: | 76 | Args: |
77 | file_path (str): path of the features file | 77 | file_path (str): path of the features file |
78 | 78 | ||
79 | Returns: | 79 | Returns: |
80 | [Tuple(List[str], np.ndarray)]: a tuple with a list of keys and the matrix | 80 | [Tuple(List[str], np.ndarray)]: a tuple with a list of keys and the matrix |
81 | """ | 81 | """ |
82 | data = read_id_values(file_path, np.float64) | 82 | data = read_id_values(file_path, np.float64) |
83 | keys = [] | 83 | keys = [] |
84 | matrix = None | 84 | matrix = None |
85 | for key, values in data.items(): | 85 | for key, values in data.items(): |
86 | keys.append(key) | 86 | keys.append(key) |
87 | if matrix is None: | 87 | if matrix is None: |
88 | matrix = np.expand_dims(values, axis=0) | 88 | matrix = np.expand_dims(values, axis=0) |
89 | matrix = np.append(matrix, np.expand_dims(values, axis=0), axis=0) | 89 | matrix = np.append(matrix, np.expand_dims(values, axis=0), axis=0) |
90 | 90 | ||
91 | return (keys, matrix) | 91 | return (keys, matrix) |
92 | 92 | ||
93 | def read_labels(file_path: str) -> KeyToLabels: | 93 | def read_labels(file_path: str) -> KeyToLabels: |
94 | ''' | 94 | ''' |
95 | Read features files with the following structure : | 95 | Read features files with the following structure : |
96 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] | 96 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] |
97 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] | 97 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] |
98 | ... | 98 | ... |
99 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] | 99 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] |
100 | 100 | ||
101 | where values are int | 101 | where values are int |
102 | ''' | 102 | ''' |
103 | return read_id_values(file_path, str) | 103 | return read_id_values(file_path, str) |
104 | 104 | ||
105 | 105 | ||
106 | def read_labels_integer(file_path: str) -> KeyToIntLabels: | 106 | def read_labels_integer(file_path: str) -> KeyToIntLabels: |
107 | ''' | 107 | ''' |
108 | Read features files with the following structure : | 108 | Read features files with the following structure : |
109 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] | 109 | [id_1] [value_1_1] [value_1_2] ... [value_1_k] |
110 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] | 110 | [id_2] [value_2_1] [value_2_2] ... [value_2_k] |
111 | ... | 111 | ... |
112 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] | 112 | [id_n] [value_n_1] [value_n_2] ... [value_n_k] |
113 | 113 | ||
114 | where values are int | 114 | where values are int |
115 | ''' | 115 | ''' |
116 | return read_id_values(file_path, int) | 116 | return read_id_values(file_path, int) |
117 | 117 | ||
118 | 118 | ||
119 | def write_line(id_, values=[], out=sys.stdout): | 119 | def write_line(id_, values=[], out=sys.stdout): |
120 | """ | 120 | """ |
121 | Write a line in list, labels or features files. | 121 | Write a line in list, labels or features files. |
122 | If you want to write a list, specify an empty | 122 | If you want to write a list, specify an empty |
123 | array for *values*. | 123 | array for *values*. |
124 | 124 | ||
125 | Args: | 125 | Args: |
126 | id_ (str): id in string. | 126 | id_ (str): id in string. |
127 | values (list, optional): list of values to write, features or labels. Defaults to []. | 127 | values (list, optional): list of values to write, features or labels. Defaults to []. |
128 | out (_io.TextIOWrapper, optional): . Defaults to sys.stdout. | 128 | out (_io.TextIOWrapper, optional): . Defaults to sys.stdout. |
129 | """ | 129 | """ |
130 | if len(values) == 0: | 130 | if hasattr(values, '__len__'): |
131 | out.write(str(id_) + "\n") | 131 | if len(values) == 0: |
132 | out.write(str(id_) + "\n") | ||
133 | else: | ||
134 | out.write(str(id_) + " " + " ".join(values) + "\n") | ||
132 | else: | 135 | else: |
133 | out.write(str(id_) + " " + " ".join(values) + "\n") | 136 | out.write(str(id_) + " " + str(values) + "\n") |