Commit 0bc4a3e394eb5c9587896e91379a61bb340f28d2

Authored by Mathias Quillot
1 parent a7fec48aaf
Exists in master

New implementation of data functions for skyrim

Showing 1 changed file with 48 additions and 0 deletions Side-by-side Diff

... ... @@ -10,6 +10,7 @@
10 10  
11 11 import sys
12 12  
  13 +
13 14 def read_file(filepath):
14 15 '''
15 16 Read the file and return an array with pairs
... ... @@ -26,6 +27,24 @@
26 27 return data
27 28  
28 29  
  30 +def read_file_skyrim(filepath):
  31 + '''
  32 + Read the file and return an array with pairs
  33 + where each pair is composed by the metas and the
  34 + features.
  35 +
  36 + This is for Skyrim files.
  37 + '''
  38 + data = []
  39 + with open(filepath, "r") as f:
  40 + for line in f:
  41 + splited = line.replace("\n", "").split(" ")
  42 + metas = splited[0].split(".")
  43 + features = splited[1:]
  44 + data.append((metas, features))
  45 + return data
  46 +
  47 +
29 48 def index_by(data, num_col):
30 49 '''
31 50 Allows the user to index data by number of columns.
... ... @@ -58,6 +77,24 @@
58 77 return indexed
59 78  
60 79  
  80 +def index_by_id_skyrim(data):
  81 + '''
  82 + Allows the user to index data by id.
  83 + Index data by id consists in indexing two times
  84 + because data have two keys. On with the language
  85 + and the other one with the id of the sentence.
  86 + '''
  87 + indexed = {}
  88 + for line in data:
  89 + metas = line[0]
  90 + id_sen = metas[2]
  91 + lang = metas[0]
  92 + if lang not in indexed:
  93 + indexed[lang] = {}
  94 + indexed[lang][id_sen] = line
  95 + return indexed
  96 +
  97 +
61 98 def write_line(metas, features, f=sys.stdout):
62 99 '''
63 100 Just print the line. No need to specify a file.
... ... @@ -67,4 +104,15 @@
67 104 f: file to write it
68 105 '''
69 106 print(",".join(metas) + " " + " ".join(features), file=f)
  107 +
  108 +
  109 +def write_line_skyrim(metas, features, f=sys.stdout):
  110 + '''
  111 + Just print the line. No need to specify a file.
  112 +
  113 + metas: meta information on list
  114 + features: feature vector
  115 + f: file to write it
  116 + '''
  117 + print(".".join(metas) + " " + " ".join(features), file=f)