Blame view

volia/core/data.py 2.92 KB
a556561b2   Mathias   Basic data manage...
1
2
3
4
5
6
  '''
  Data management input/output
  '''
  
  # Import packages and modules
  import numpy as np
6957c7c92   Quillot Mathias   main function for...
7
  import sys
a556561b2   Mathias   Basic data manage...
8
9
10
11
  
  # Defining some types
  from typing import List, Dict
  KeyToList = Dict[str, List[str]]
6957c7c92   Quillot Mathias   main function for...
12
13
  KeyToLabels = Dict[str, List[str]]
  KeyToIntLabels = Dict[str, List[int]]
a556561b2   Mathias   Basic data manage...
14
15
16
17
18
19
  KeyToFeatures = Dict[str, List[float]]
  
  
  def read_lst(file_path: str) -> KeyToList:
      '''
      Read lst file with this structure:
6957c7c92   Quillot Mathias   main function for...
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
      [id_1]
      [id_2]
      ...
      [id_n]
      
      Return a list of ids.
      '''
      lst = []
      with open(file_path, "r") as f:
          for line in f:
              lst.append(line.replace("
  ", ""))
      return lst
  
  
  def read_id_values(file_path: str, value_type=str):
      '''
      Read file where each line is an id with its corresponding values:
      [id_1] [value_1_1] [value_1_2] ... [value_1_k]
      [id_2] [value_2_1] [value_2_2] ... [value_2_k]
      ...
      [id_n] [value_n_1] [value_n_2] ... [value_n_k]
  
      where values are value_type type.
a556561b2   Mathias   Basic data manage...
44

6957c7c92   Quillot Mathias   main function for...
45
46
      Used in many reader functions with specific value_type.
      Return a dictionary with id as key and values as associated values.
a556561b2   Mathias   Basic data manage...
47
      '''
6957c7c92   Quillot Mathias   main function for...
48
      id_values = {}
a556561b2   Mathias   Basic data manage...
49
50
51
52
      with open(file_path, "r") as f:
          for line in f:
              splited = line.replace("
  ", "").split(" ")
6957c7c92   Quillot Mathias   main function for...
53
54
              id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type)
      return id_values
a556561b2   Mathias   Basic data manage...
55
56
57
58
  
  
  def read_features(file_path: str) -> KeyToFeatures:
      '''
6957c7c92   Quillot Mathias   main function for...
59
60
61
62
63
64
65
66
67
      Read features files with the following structure:
      [id_1] [value_1_1] [value_1_2] ... [value_1_k]
      [id_2] [value_2_1] [value_2_2] ... [value_2_k]
      ...
      [id_n] [value_n_1] [value_n_2] ... [value_n_k]
  
      where values are float
  
      Returns a dictionary with id as key and a list of values as associated values
a556561b2   Mathias   Basic data manage...
68
      '''
6957c7c92   Quillot Mathias   main function for...
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
      return read_id_values(file_path, float)
  
  
  def read_labels(file_path: str) -> KeyToLabels:
      '''
      Read features files with the following structure :
      [id_1] [value_1_1] [value_1_2] ... [value_1_k]
      [id_2] [value_2_1] [value_2_2] ... [value_2_k]
      ...
      [id_n] [value_n_1] [value_n_2] ... [value_n_k]
  
      where values are int
      '''
      return read_id_values(file_path, str)
  
  
  def read_labels_integer(file_path: str) -> KeyToIntLabels:
      '''
      Read features files with the following structure :
      [id_1] [value_1_1] [value_1_2] ... [value_1_k]
      [id_2] [value_2_1] [value_2_2] ... [value_2_k]
      ...
      [id_n] [value_n_1] [value_n_2] ... [value_n_k]
  
      where values are int
      '''
      return read_id_values(file_path, int)
  
  
  def write_line(id_, values=[], out=sys.stdout):
      """
      Write a line in list, labels or features files.
      If you want to write a list, specify an empty
      array for *values*. 
a556561b2   Mathias   Basic data manage...
103

6957c7c92   Quillot Mathias   main function for...
104
105
106
107
108
      Args:
          id_ (str): id in string.
          values (list, optional): list of values to write, features or labels. Defaults to [].
          out (_io.TextIOWrapper, optional): . Defaults to sys.stdout.
      """
85eea4a87   Quillot Mathias   A space was neede...
109
110
111
112
113
114
      if len(values) == 0:
          out.write(str(id_) + "
  ")
      else:
          out.write(str(id_) + " " + " ".join(values) + "
  ")