Blame view

volia/core/data.py 3.76 KB
a556561b2   Mathias   Basic data manage...
1
2
3
4
5
6
  '''
  Data management input/output
  '''
  
  # Import packages and modules
  import numpy as np
6957c7c92   Quillot Mathias   main function for...
7
  import sys
a556561b2   Mathias   Basic data manage...
8
9
  
  # Defining some types
765b51bc7   Quillot Mathias   Little modificati...
10
11
12
  from typing import List, Dict, Tuple
  
  from numpy.lib.shape_base import expand_dims
a556561b2   Mathias   Basic data manage...
13
  KeyToList = Dict[str, List[str]]
6957c7c92   Quillot Mathias   main function for...
14
15
  KeyToLabels = Dict[str, List[str]]
  KeyToIntLabels = Dict[str, List[int]]
a556561b2   Mathias   Basic data manage...
16
17
18
19
20
21
  KeyToFeatures = Dict[str, List[float]]
  
  
  def read_lst(file_path: str) -> KeyToList:
      '''
      Read lst file with this structure:
6957c7c92   Quillot Mathias   main function for...
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
      [id_1]
      [id_2]
      ...
      [id_n]
      
      Return a list of ids.
      '''
      lst = []
      with open(file_path, "r") as f:
          for line in f:
              lst.append(line.replace("
  ", ""))
      return lst
  
  
  def read_id_values(file_path: str, value_type=str):
      '''
      Read file where each line is an id with its corresponding values:
      [id_1] [value_1_1] [value_1_2] ... [value_1_k]
      [id_2] [value_2_1] [value_2_2] ... [value_2_k]
      ...
      [id_n] [value_n_1] [value_n_2] ... [value_n_k]
  
      where values are value_type type.
a556561b2   Mathias   Basic data manage...
46

6957c7c92   Quillot Mathias   main function for...
47
48
      Used in many reader functions with specific value_type.
      Return a dictionary with id as key and values as associated values.
a556561b2   Mathias   Basic data manage...
49
      '''
6957c7c92   Quillot Mathias   main function for...
50
      id_values = {}
a556561b2   Mathias   Basic data manage...
51
52
53
54
      with open(file_path, "r") as f:
          for line in f:
              splited = line.replace("
  ", "").split(" ")
6957c7c92   Quillot Mathias   main function for...
55
56
              id_values[splited[0]] = np.asarray(splited[1:], dtype=value_type)
      return id_values
a556561b2   Mathias   Basic data manage...
57
58
59
60
  
  
  def read_features(file_path: str) -> KeyToFeatures:
      '''
6957c7c92   Quillot Mathias   main function for...
61
62
63
64
65
66
67
68
69
      Read features files with the following structure:
      [id_1] [value_1_1] [value_1_2] ... [value_1_k]
      [id_2] [value_2_1] [value_2_2] ... [value_2_k]
      ...
      [id_n] [value_n_1] [value_n_2] ... [value_n_k]
  
      where values are float
  
      Returns a dictionary with id as key and a list of values as associated values
a556561b2   Mathias   Basic data manage...
70
      '''
3b5a487de   Quillot Mathias   Just tried to cha...
71
      return read_id_values(file_path, np.float64)
6957c7c92   Quillot Mathias   main function for...
72

765b51bc7   Quillot Mathias   Little modificati...
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
  def read_features_with_matrix(file_path: str) -> Tuple[List[str], np.ndarray]:
      """Read a features file and returns the keys (utterances ids) 
      with the corresponding matrix of values.
  
      Args:
          file_path (str): path of the features file
  
      Returns:
          [Tuple(List[str], np.ndarray)]: a tuple with a list of keys and the matrix
      """
      data = read_id_values(file_path, np.float64)
      keys = []
      matrix = None
      for key, values in data.items():
          keys.append(key)
          if matrix is None:
              matrix = np.expand_dims(values, axis=0)
          matrix = np.append(matrix, np.expand_dims(values, axis=0), axis=0)
      
      return (keys, matrix)
6957c7c92   Quillot Mathias   main function for...
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
  def read_labels(file_path: str) -> KeyToLabels:
      '''
      Read features files with the following structure :
      [id_1] [value_1_1] [value_1_2] ... [value_1_k]
      [id_2] [value_2_1] [value_2_2] ... [value_2_k]
      ...
      [id_n] [value_n_1] [value_n_2] ... [value_n_k]
  
      where values are int
      '''
      return read_id_values(file_path, str)
  
  
  def read_labels_integer(file_path: str) -> KeyToIntLabels:
      '''
      Read features files with the following structure :
      [id_1] [value_1_1] [value_1_2] ... [value_1_k]
      [id_2] [value_2_1] [value_2_2] ... [value_2_k]
      ...
      [id_n] [value_n_1] [value_n_2] ... [value_n_k]
  
      where values are int
      '''
      return read_id_values(file_path, int)
  
  
  def write_line(id_, values=[], out=sys.stdout):
      """
      Write a line in list, labels or features files.
      If you want to write a list, specify an empty
      array for *values*. 
a556561b2   Mathias   Basic data manage...
124

6957c7c92   Quillot Mathias   main function for...
125
126
127
128
129
      Args:
          id_ (str): id in string.
          values (list, optional): list of values to write, features or labels. Defaults to [].
          out (_io.TextIOWrapper, optional): . Defaults to sys.stdout.
      """
1bcb37e33   quillotm   Now, we can write...
130
131
132
133
134
135
136
      if hasattr(values, '__len__'):
          if len(values) == 0:
              out.write(str(id_) + "
  ")
          else:
              out.write(str(id_) + " " + " ".join(values) + "
  ")
85eea4a87   Quillot Mathias   A space was neede...
137
      else:
1bcb37e33   quillotm   Now, we can write...
138
139
          out.write(str(id_) + " " + str(values) + "
  ")