Commit 6957c7c92b9da33af8bd14d110c2c0cb404aed44

Authored by Quillot Mathias
1 parent e200c0b6f7
Exists in master

main function for data management

Showing 1 changed file with 80 additions and 17 deletions Side-by-side Diff

... ... @@ -4,42 +4,105 @@
4 4  
5 5 # Import packages and modules
6 6 import numpy as np
  7 +import sys
7 8  
8 9 # Defining some types
9 10 from typing import List, Dict
10 11 KeyToList = Dict[str, List[str]]
  12 +KeyToLabels = Dict[str, List[str]]
  13 +KeyToIntLabels = Dict[str, List[int]]
11 14 KeyToFeatures = Dict[str, List[float]]
12 15  
13 16  
14 17 def read_lst(file_path: str) -> KeyToList:
15 18 '''
16 19 Read lst file with this structure:
17   - [id] [value1] [value2] ... [value n]
  20 + [id_1]
  21 + [id_2]
  22 + ...
  23 + [id_n]
  24 +
  25 + Return a list of ids.
  26 + '''
  27 + lst = []
  28 + with open(file_path, "r") as f:
  29 + for line in f:
  30 + lst.append(line.replace("\n", ""))
  31 + return lst
18 32  
19   - This is a basic function reused by others like read_features.
20   - returns a dictionary with id as key and a list of value as corresponding values
  33 +
  34 +def read_id_values(file_path: str, value_type=str):
21 35 '''
22   - # KeyToList type variable
23   - key_to_list = dict()
  36 + Read file where each line is an id with its corresponding values:
  37 + [id_1] [value_1_1] [value_1_2] ... [value_1_k]
  38 + [id_2] [value_2_1] [value_2_2] ... [value_2_k]
  39 + ...
  40 + [id_n] [value_n_1] [value_n_2] ... [value_n_k]
  41 +
  42 + where values are value_type type.
  43 +
  44 + Used in many reader functions with specific value_type.
  45 + Return a dictionary with id as key and values as associated values.
  46 + '''
  47 + id_values = {}
24 48 with open(file_path, "r") as f:
25 49 for line in f:
26 50 splited = line.replace("\n", "").split(" ")
27   - id = splited[0]
28   - values = splited[1:]
29   - key_to_list[id] = values
30   - return key_to_list
  51 + id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type)
  52 + return id_values
31 53  
32 54  
33 55 def read_features(file_path: str) -> KeyToFeatures:
34 56 '''
  57 + Read features files with the following structure:
  58 + [id_1] [value_1_1] [value_1_2] ... [value_1_k]
  59 + [id_2] [value_2_1] [value_2_2] ... [value_2_k]
  60 + ...
  61 + [id_n] [value_n_1] [value_n_2] ... [value_n_k]
  62 +
  63 + where values are float
  64 +
  65 + Returns a dictionary with id as key and a list of values as associated values
35 66 '''
36   - # KeyToFeatures type variable
37   - key_to_features = dict()
38   - # and the KeyToList
39   - key_to_list = read_lst(file_path)
40   -
41   - for key_, list_ in key_to_list.items():
42   - key_to_features[key_] = np.asarray(list_, dtype=float)
  67 + return read_id_values(file_path, float)
43 68  
44   - return key_to_features
  69 +
  70 +def read_labels(file_path: str) -> KeyToLabels:
  71 + '''
  72 + Read features files with the following structure :
  73 + [id_1] [value_1_1] [value_1_2] ... [value_1_k]
  74 + [id_2] [value_2_1] [value_2_2] ... [value_2_k]
  75 + ...
  76 + [id_n] [value_n_1] [value_n_2] ... [value_n_k]
  77 +
  78 + where values are int
  79 + '''
  80 + return read_id_values(file_path, str)
  81 +
  82 +
  83 +def read_labels_integer(file_path: str) -> KeyToIntLabels:
  84 + '''
  85 + Read features files with the following structure :
  86 + [id_1] [value_1_1] [value_1_2] ... [value_1_k]
  87 + [id_2] [value_2_1] [value_2_2] ... [value_2_k]
  88 + ...
  89 + [id_n] [value_n_1] [value_n_2] ... [value_n_k]
  90 +
  91 + where values are int
  92 + '''
  93 + return read_id_values(file_path, int)
  94 +
  95 +
  96 +def write_line(id_, values=[], out=sys.stdout):
  97 + """
  98 + Write a line in list, labels or features files.
  99 + If you want to write a list, specify an empty
  100 + array for *values*.
  101 +
  102 + Args:
  103 + id_ (str): id in string.
  104 + values (list, optional): list of values to write, features or labels. Defaults to [].
  105 + out (_io.TextIOWrapper, optional): . Defaults to sys.stdout.
  106 + """
  107 + out.write(str(id_) + " ".join(values) + "\n")