From 0bc4a3e394eb5c9587896e91379a61bb340f28d2 Mon Sep 17 00:00:00 2001 From: Mathias Quillot Date: Wed, 11 Sep 2019 22:18:47 +0200 Subject: [PATCH] New implementation of data functions for skyrim --- bin/data.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/bin/data.py b/bin/data.py index fb25050..d077d7c 100644 --- a/bin/data.py +++ b/bin/data.py @@ -10,6 +10,7 @@ the README file please. import sys + def read_file(filepath): ''' Read the file and return an array with pairs @@ -26,6 +27,24 @@ def read_file(filepath): return data +def read_file_skyrim(filepath): + ''' + Read the file and return an array with pairs + where each pair is composed by the metas and the + features. + + This is for Skyrim files. + ''' + data = [] + with open(filepath, "r") as f: + for line in f: + splited = line.replace("\n", "").split(" ") + metas = splited[0].split(".") + features = splited[1:] + data.append((metas, features)) + return data + + def index_by(data, num_col): ''' Allows the user to index data by number of columns. @@ -58,6 +77,24 @@ def index_by_id(data): return indexed +def index_by_id_skyrim(data): + ''' + Allows the user to index data by id. + Index data by id consists in indexing two times + because data have two keys. On with the language + and the other one with the id of the sentence. + ''' + indexed = {} + for line in data: + metas = line[0] + id_sen = metas[2] + lang = metas[0] + if lang not in indexed: + indexed[lang] = {} + indexed[lang][id_sen] = line + return indexed + + def write_line(metas, features, f=sys.stdout): ''' Just print the line. No need to specify a file. @@ -67,3 +104,14 @@ def write_line(metas, features, f=sys.stdout): f: file to write it ''' print(",".join(metas) + " " + " ".join(features), file=f) + + +def write_line_skyrim(metas, features, f=sys.stdout): + ''' + Just print the line. No need to specify a file. + + metas: meta information on list + features: feature vector + f: file to write it + ''' + print(".".join(metas) + " " + " ".join(features), file=f) -- 1.8.2.3