Blame view
volia/core/data.py
2.92 KB
a556561b2 Basic data manage... |
1 2 3 4 5 6 |
''' Data management input/output ''' # Import packages and modules import numpy as np |
6957c7c92 main function for... |
7 |
import sys |
a556561b2 Basic data manage... |
8 9 10 11 |
# Defining some types from typing import List, Dict KeyToList = Dict[str, List[str]] |
6957c7c92 main function for... |
12 13 |
KeyToLabels = Dict[str, List[str]] KeyToIntLabels = Dict[str, List[int]] |
a556561b2 Basic data manage... |
14 15 16 17 18 19 |
KeyToFeatures = Dict[str, List[float]] def read_lst(file_path: str) -> KeyToList: ''' Read lst file with this structure: |
6957c7c92 main function for... |
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
[id_1] [id_2] ... [id_n] Return a list of ids. ''' lst = [] with open(file_path, "r") as f: for line in f: lst.append(line.replace(" ", "")) return lst def read_id_values(file_path: str, value_type=str): ''' Read file where each line is an id with its corresponding values: [id_1] [value_1_1] [value_1_2] ... [value_1_k] [id_2] [value_2_1] [value_2_2] ... [value_2_k] ... [id_n] [value_n_1] [value_n_2] ... [value_n_k] where values are value_type type. |
a556561b2 Basic data manage... |
44 |
|
6957c7c92 main function for... |
45 46 |
Used in many reader functions with specific value_type. Return a dictionary with id as key and values as associated values. |
a556561b2 Basic data manage... |
47 |
''' |
6957c7c92 main function for... |
48 |
id_values = {} |
a556561b2 Basic data manage... |
49 50 51 52 |
with open(file_path, "r") as f: for line in f: splited = line.replace(" ", "").split(" ") |
6957c7c92 main function for... |
53 54 |
id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type) return id_values |
a556561b2 Basic data manage... |
55 56 57 58 |
def read_features(file_path: str) -> KeyToFeatures: ''' |
6957c7c92 main function for... |
59 60 61 62 63 64 65 66 67 |
Read features files with the following structure: [id_1] [value_1_1] [value_1_2] ... [value_1_k] [id_2] [value_2_1] [value_2_2] ... [value_2_k] ... [id_n] [value_n_1] [value_n_2] ... [value_n_k] where values are float Returns a dictionary with id as key and a list of values as associated values |
a556561b2 Basic data manage... |
68 |
''' |
6957c7c92 main function for... |
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
return read_id_values(file_path, float) def read_labels(file_path: str) -> KeyToLabels: ''' Read features files with the following structure : [id_1] [value_1_1] [value_1_2] ... [value_1_k] [id_2] [value_2_1] [value_2_2] ... [value_2_k] ... [id_n] [value_n_1] [value_n_2] ... [value_n_k] where values are int ''' return read_id_values(file_path, str) def read_labels_integer(file_path: str) -> KeyToIntLabels: ''' Read features files with the following structure : [id_1] [value_1_1] [value_1_2] ... [value_1_k] [id_2] [value_2_1] [value_2_2] ... [value_2_k] ... [id_n] [value_n_1] [value_n_2] ... [value_n_k] where values are int ''' return read_id_values(file_path, int) def write_line(id_, values=[], out=sys.stdout): """ Write a line in list, labels or features files. If you want to write a list, specify an empty array for *values*. |
a556561b2 Basic data manage... |
103 |
|
6957c7c92 main function for... |
104 105 106 107 108 |
Args: id_ (str): id in string. values (list, optional): list of values to write, features or labels. Defaults to []. out (_io.TextIOWrapper, optional): . Defaults to sys.stdout. """ |
85eea4a87 A space was neede... |
109 110 111 112 113 114 |
if len(values) == 0: out.write(str(id_) + " ") else: out.write(str(id_) + " " + " ".join(values) + " ") |