diff --git a/volia/core/data.py b/volia/core/data.py index 4c1bdae..300b266 100644 --- a/volia/core/data.py +++ b/volia/core/data.py @@ -4,41 +4,104 @@ Data management input/output # Import packages and modules import numpy as np +import sys # Defining some types from typing import List, Dict KeyToList = Dict[str, List[str]] +KeyToLabels = Dict[str, List[str]] +KeyToIntLabels = Dict[str, List[int]] KeyToFeatures = Dict[str, List[float]] def read_lst(file_path: str) -> KeyToList: ''' Read lst file with this structure: - [id] [value1] [value2] ... [value n] + [id_1] + [id_2] + ... + [id_n] + + Return a list of ids. + ''' + lst = [] + with open(file_path, "r") as f: + for line in f: + lst.append(line.replace("\n", "")) + return lst + + +def read_id_values(file_path: str, value_type=str): + ''' + Read file where each line is an id with its corresponding values: + [id_1] [value_1_1] [value_1_2] ... [value_1_k] + [id_2] [value_2_1] [value_2_2] ... [value_2_k] + ... + [id_n] [value_n_1] [value_n_2] ... [value_n_k] + + where values are value_type type. - This is a basic function reused by others like read_features. - returns a dictionary with id as key and a list of value as corresponding values + Used in many reader functions with specific value_type. + Return a dictionary with id as key and values as associated values. ''' - # KeyToList type variable - key_to_list = dict() + id_values = {} with open(file_path, "r") as f: for line in f: splited = line.replace("\n", "").split(" ") - id = splited[0] - values = splited[1:] - key_to_list[id] = values - return key_to_list + id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type) + return id_values def read_features(file_path: str) -> KeyToFeatures: ''' + Read features files with the following structure: + [id_1] [value_1_1] [value_1_2] ... [value_1_k] + [id_2] [value_2_1] [value_2_2] ... [value_2_k] + ... + [id_n] [value_n_1] [value_n_2] ... [value_n_k] + + where values are float + + Returns a dictionary with id as key and a list of values as associated values ''' - # KeyToFeatures type variable - key_to_features = dict() - # and the KeyToList - key_to_list = read_lst(file_path) - - for key_, list_ in key_to_list.items(): - key_to_features[key_] = np.asarray(list_, dtype=float) + return read_id_values(file_path, float) + + +def read_labels(file_path: str) -> KeyToLabels: + ''' + Read features files with the following structure : + [id_1] [value_1_1] [value_1_2] ... [value_1_k] + [id_2] [value_2_1] [value_2_2] ... [value_2_k] + ... + [id_n] [value_n_1] [value_n_2] ... [value_n_k] + + where values are int + ''' + return read_id_values(file_path, str) + + +def read_labels_integer(file_path: str) -> KeyToIntLabels: + ''' + Read features files with the following structure : + [id_1] [value_1_1] [value_1_2] ... [value_1_k] + [id_2] [value_2_1] [value_2_2] ... [value_2_k] + ... + [id_n] [value_n_1] [value_n_2] ... [value_n_k] + + where values are int + ''' + return read_id_values(file_path, int) + + +def write_line(id_, values=[], out=sys.stdout): + """ + Write a line in list, labels or features files. + If you want to write a list, specify an empty + array for *values*. - return key_to_features \ No newline at end of file + Args: + id_ (str): id in string. + values (list, optional): list of values to write, features or labels. Defaults to []. + out (_io.TextIOWrapper, optional): . Defaults to sys.stdout. + """ + out.write(str(id_) + " ".join(values) + "\n") \ No newline at end of file