Commit 1bcb37e33d763edd37ff803686c6524378ebcd22

Authored by quillotm
1 parent 4309b4a340
Exists in master

Now, we can write files with only one value without including it in a list.

Showing 1 changed file with 6 additions and 3 deletions Inline Diff

1 ''' 1 '''
2 Data management input/output 2 Data management input/output
3 ''' 3 '''
4 4
5 # Import packages and modules 5 # Import packages and modules
6 import numpy as np 6 import numpy as np
7 import sys 7 import sys
8 8
9 # Defining some types 9 # Defining some types
10 from typing import List, Dict, Tuple 10 from typing import List, Dict, Tuple
11 11
12 from numpy.lib.shape_base import expand_dims 12 from numpy.lib.shape_base import expand_dims
13 KeyToList = Dict[str, List[str]] 13 KeyToList = Dict[str, List[str]]
14 KeyToLabels = Dict[str, List[str]] 14 KeyToLabels = Dict[str, List[str]]
15 KeyToIntLabels = Dict[str, List[int]] 15 KeyToIntLabels = Dict[str, List[int]]
16 KeyToFeatures = Dict[str, List[float]] 16 KeyToFeatures = Dict[str, List[float]]
17 17
18 18
19 def read_lst(file_path: str) -> KeyToList: 19 def read_lst(file_path: str) -> KeyToList:
20 ''' 20 '''
21 Read lst file with this structure: 21 Read lst file with this structure:
22 [id_1] 22 [id_1]
23 [id_2] 23 [id_2]
24 ... 24 ...
25 [id_n] 25 [id_n]
26 26
27 Return a list of ids. 27 Return a list of ids.
28 ''' 28 '''
29 lst = [] 29 lst = []
30 with open(file_path, "r") as f: 30 with open(file_path, "r") as f:
31 for line in f: 31 for line in f:
32 lst.append(line.replace("\n", "")) 32 lst.append(line.replace("\n", ""))
33 return lst 33 return lst
34 34
35 35
36 def read_id_values(file_path: str, value_type=str): 36 def read_id_values(file_path: str, value_type=str):
37 ''' 37 '''
38 Read file where each line is an id with its corresponding values: 38 Read file where each line is an id with its corresponding values:
39 [id_1] [value_1_1] [value_1_2] ... [value_1_k] 39 [id_1] [value_1_1] [value_1_2] ... [value_1_k]
40 [id_2] [value_2_1] [value_2_2] ... [value_2_k] 40 [id_2] [value_2_1] [value_2_2] ... [value_2_k]
41 ... 41 ...
42 [id_n] [value_n_1] [value_n_2] ... [value_n_k] 42 [id_n] [value_n_1] [value_n_2] ... [value_n_k]
43 43
44 where values are value_type type. 44 where values are value_type type.
45 45
46 Used in many reader functions with specific value_type. 46 Used in many reader functions with specific value_type.
47 Return a dictionary with id as key and values as associated values. 47 Return a dictionary with id as key and values as associated values.
48 ''' 48 '''
49 id_values = {} 49 id_values = {}
50 with open(file_path, "r") as f: 50 with open(file_path, "r") as f:
51 for line in f: 51 for line in f:
52 splited = line.replace("\n", "").split(" ") 52 splited = line.replace("\n", "").split(" ")
53 id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type) 53 id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type)
54 return id_values 54 return id_values
55 55
56 56
57 def read_features(file_path: str) -> KeyToFeatures: 57 def read_features(file_path: str) -> KeyToFeatures:
58 ''' 58 '''
59 Read features files with the following structure: 59 Read features files with the following structure:
60 [id_1] [value_1_1] [value_1_2] ... [value_1_k] 60 [id_1] [value_1_1] [value_1_2] ... [value_1_k]
61 [id_2] [value_2_1] [value_2_2] ... [value_2_k] 61 [id_2] [value_2_1] [value_2_2] ... [value_2_k]
62 ... 62 ...
63 [id_n] [value_n_1] [value_n_2] ... [value_n_k] 63 [id_n] [value_n_1] [value_n_2] ... [value_n_k]
64 64
65 where values are float 65 where values are float
66 66
67 Returns a dictionary with id as key and a list of values as associated values 67 Returns a dictionary with id as key and a list of values as associated values
68 ''' 68 '''
69 return read_id_values(file_path, np.float64) 69 return read_id_values(file_path, np.float64)
70 70
71 71
72 def read_features_with_matrix(file_path: str) -> Tuple[List[str], np.ndarray]: 72 def read_features_with_matrix(file_path: str) -> Tuple[List[str], np.ndarray]:
73 """Read a features file and returns the keys (utterances ids) 73 """Read a features file and returns the keys (utterances ids)
74 with the corresponding matrix of values. 74 with the corresponding matrix of values.
75 75
76 Args: 76 Args:
77 file_path (str): path of the features file 77 file_path (str): path of the features file
78 78
79 Returns: 79 Returns:
80 [Tuple(List[str], np.ndarray)]: a tuple with a list of keys and the matrix 80 [Tuple(List[str], np.ndarray)]: a tuple with a list of keys and the matrix
81 """ 81 """
82 data = read_id_values(file_path, np.float64) 82 data = read_id_values(file_path, np.float64)
83 keys = [] 83 keys = []
84 matrix = None 84 matrix = None
85 for key, values in data.items(): 85 for key, values in data.items():
86 keys.append(key) 86 keys.append(key)
87 if matrix is None: 87 if matrix is None:
88 matrix = np.expand_dims(values, axis=0) 88 matrix = np.expand_dims(values, axis=0)
89 matrix = np.append(matrix, np.expand_dims(values, axis=0), axis=0) 89 matrix = np.append(matrix, np.expand_dims(values, axis=0), axis=0)
90 90
91 return (keys, matrix) 91 return (keys, matrix)
92 92
93 def read_labels(file_path: str) -> KeyToLabels: 93 def read_labels(file_path: str) -> KeyToLabels:
94 ''' 94 '''
95 Read features files with the following structure : 95 Read features files with the following structure :
96 [id_1] [value_1_1] [value_1_2] ... [value_1_k] 96 [id_1] [value_1_1] [value_1_2] ... [value_1_k]
97 [id_2] [value_2_1] [value_2_2] ... [value_2_k] 97 [id_2] [value_2_1] [value_2_2] ... [value_2_k]
98 ... 98 ...
99 [id_n] [value_n_1] [value_n_2] ... [value_n_k] 99 [id_n] [value_n_1] [value_n_2] ... [value_n_k]
100 100
101 where values are int 101 where values are int
102 ''' 102 '''
103 return read_id_values(file_path, str) 103 return read_id_values(file_path, str)
104 104
105 105
106 def read_labels_integer(file_path: str) -> KeyToIntLabels: 106 def read_labels_integer(file_path: str) -> KeyToIntLabels:
107 ''' 107 '''
108 Read features files with the following structure : 108 Read features files with the following structure :
109 [id_1] [value_1_1] [value_1_2] ... [value_1_k] 109 [id_1] [value_1_1] [value_1_2] ... [value_1_k]
110 [id_2] [value_2_1] [value_2_2] ... [value_2_k] 110 [id_2] [value_2_1] [value_2_2] ... [value_2_k]
111 ... 111 ...
112 [id_n] [value_n_1] [value_n_2] ... [value_n_k] 112 [id_n] [value_n_1] [value_n_2] ... [value_n_k]
113 113
114 where values are int 114 where values are int
115 ''' 115 '''
116 return read_id_values(file_path, int) 116 return read_id_values(file_path, int)
117 117
118 118
119 def write_line(id_, values=[], out=sys.stdout): 119 def write_line(id_, values=[], out=sys.stdout):
120 """ 120 """
121 Write a line in list, labels or features files. 121 Write a line in list, labels or features files.
122 If you want to write a list, specify an empty 122 If you want to write a list, specify an empty
123 array for *values*. 123 array for *values*.
124 124
125 Args: 125 Args:
126 id_ (str): id in string. 126 id_ (str): id in string.
127 values (list, optional): list of values to write, features or labels. Defaults to []. 127 values (list, optional): list of values to write, features or labels. Defaults to [].
128 out (_io.TextIOWrapper, optional): . Defaults to sys.stdout. 128 out (_io.TextIOWrapper, optional): . Defaults to sys.stdout.
129 """ 129 """
130 if len(values) == 0: 130 if hasattr(values, '__len__'):
131 out.write(str(id_) + "\n") 131 if len(values) == 0:
132 out.write(str(id_) + "\n")
133 else:
134 out.write(str(id_) + " " + " ".join(values) + "\n")
132 else: 135 else:
133 out.write(str(id_) + " " + " ".join(values) + "\n") 136 out.write(str(id_) + " " + str(values) + "\n")