Blame view
volia/core/data.py
3.76 KB
a556561b2 Basic data manage... |
1 2 3 4 5 6 |
''' Data management input/output ''' # Import packages and modules import numpy as np |
6957c7c92 main function for... |
7 |
import sys |
a556561b2 Basic data manage... |
8 9 |
# Defining some types |
765b51bc7 Little modificati... |
10 11 12 |
from typing import List, Dict, Tuple from numpy.lib.shape_base import expand_dims |
a556561b2 Basic data manage... |
13 |
KeyToList = Dict[str, List[str]] |
6957c7c92 main function for... |
14 15 |
KeyToLabels = Dict[str, List[str]] KeyToIntLabels = Dict[str, List[int]] |
a556561b2 Basic data manage... |
16 17 18 19 20 21 |
KeyToFeatures = Dict[str, List[float]] def read_lst(file_path: str) -> KeyToList: ''' Read lst file with this structure: |
6957c7c92 main function for... |
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
[id_1] [id_2] ... [id_n] Return a list of ids. ''' lst = [] with open(file_path, "r") as f: for line in f: lst.append(line.replace(" ", "")) return lst def read_id_values(file_path: str, value_type=str): ''' Read file where each line is an id with its corresponding values: [id_1] [value_1_1] [value_1_2] ... [value_1_k] [id_2] [value_2_1] [value_2_2] ... [value_2_k] ... [id_n] [value_n_1] [value_n_2] ... [value_n_k] where values are value_type type. |
a556561b2 Basic data manage... |
46 |
|
6957c7c92 main function for... |
47 48 |
Used in many reader functions with specific value_type. Return a dictionary with id as key and values as associated values. |
a556561b2 Basic data manage... |
49 |
''' |
6957c7c92 main function for... |
50 |
id_values = {} |
a556561b2 Basic data manage... |
51 52 53 54 |
with open(file_path, "r") as f: for line in f: splited = line.replace(" ", "").split(" ") |
6957c7c92 main function for... |
55 56 |
id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type) return id_values |
a556561b2 Basic data manage... |
57 58 59 60 |
def read_features(file_path: str) -> KeyToFeatures: ''' |
6957c7c92 main function for... |
61 62 63 64 65 66 67 68 69 |
Read features files with the following structure: [id_1] [value_1_1] [value_1_2] ... [value_1_k] [id_2] [value_2_1] [value_2_2] ... [value_2_k] ... [id_n] [value_n_1] [value_n_2] ... [value_n_k] where values are float Returns a dictionary with id as key and a list of values as associated values |
a556561b2 Basic data manage... |
70 |
''' |
3b5a487de Just tried to cha... |
71 |
return read_id_values(file_path, np.float64) |
6957c7c92 main function for... |
72 |
|
765b51bc7 Little modificati... |
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
def read_features_with_matrix(file_path: str) -> Tuple[List[str], np.ndarray]: """Read a features file and returns the keys (utterances ids) with the corresponding matrix of values. Args: file_path (str): path of the features file Returns: [Tuple(List[str], np.ndarray)]: a tuple with a list of keys and the matrix """ data = read_id_values(file_path, np.float64) keys = [] matrix = None for key, values in data.items(): keys.append(key) if matrix is None: matrix = np.expand_dims(values, axis=0) matrix = np.append(matrix, np.expand_dims(values, axis=0), axis=0) return (keys, matrix) |
6957c7c92 main function for... |
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
def read_labels(file_path: str) -> KeyToLabels: ''' Read features files with the following structure : [id_1] [value_1_1] [value_1_2] ... [value_1_k] [id_2] [value_2_1] [value_2_2] ... [value_2_k] ... [id_n] [value_n_1] [value_n_2] ... [value_n_k] where values are int ''' return read_id_values(file_path, str) def read_labels_integer(file_path: str) -> KeyToIntLabels: ''' Read features files with the following structure : [id_1] [value_1_1] [value_1_2] ... [value_1_k] [id_2] [value_2_1] [value_2_2] ... [value_2_k] ... [id_n] [value_n_1] [value_n_2] ... [value_n_k] where values are int ''' return read_id_values(file_path, int) def write_line(id_, values=[], out=sys.stdout): """ Write a line in list, labels or features files. If you want to write a list, specify an empty array for *values*. |
a556561b2 Basic data manage... |
124 |
|
6957c7c92 main function for... |
125 126 127 128 129 |
Args: id_ (str): id in string. values (list, optional): list of values to write, features or labels. Defaults to []. out (_io.TextIOWrapper, optional): . Defaults to sys.stdout. """ |
1bcb37e33 Now, we can write... |
130 131 132 133 134 135 136 |
if hasattr(values, '__len__'): if len(values) == 0: out.write(str(id_) + " ") else: out.write(str(id_) + " " + " ".join(values) + " ") |
85eea4a87 A space was neede... |
137 |
else: |
1bcb37e33 Now, we can write... |
138 139 |
out.write(str(id_) + " " + str(values) + " ") |