Commit 0bc4a3e394eb5c9587896e91379a61bb340f28d2

Authored by Mathias Quillot
1 parent a7fec48aaf
Exists in master

New implementation of data functions for skyrim

Showing 1 changed file with 48 additions and 0 deletions Inline Diff

1 ''' 1 '''
2 This module aim in loading and writing files. 2 This module aim in loading and writing files.
3 Our files respect a specific format that 3 Our files respect a specific format that
4 is not standard. This is why i hope these 4 is not standard. This is why i hope these
5 function make the read of file easier. 5 function make the read of file easier.
6 6
7 For more information about the data, read 7 For more information about the data, read
8 the README file please. 8 the README file please.
9 ''' 9 '''
10 10
11 import sys 11 import sys
12 12
13
13 def read_file(filepath): 14 def read_file(filepath):
14 ''' 15 '''
15 Read the file and return an array with pairs 16 Read the file and return an array with pairs
16 where each pair is composed by the metas and the 17 where each pair is composed by the metas and the
17 features. 18 features.
18 ''' 19 '''
19 data = [] 20 data = []
20 with open(filepath, "r") as f: 21 with open(filepath, "r") as f:
21 for line in f: 22 for line in f:
22 splited = line.replace("\n", "").split(" ") 23 splited = line.replace("\n", "").split(" ")
23 metas = splited[0].split(",") 24 metas = splited[0].split(",")
24 features = splited[1:] 25 features = splited[1:]
25 data.append((metas, features)) 26 data.append((metas, features))
26 return data 27 return data
27 28
28 29
30 def read_file_skyrim(filepath):
31 '''
32 Read the file and return an array with pairs
33 where each pair is composed by the metas and the
34 features.
35
36 This is for Skyrim files.
37 '''
38 data = []
39 with open(filepath, "r") as f:
40 for line in f:
41 splited = line.replace("\n", "").split(" ")
42 metas = splited[0].split(".")
43 features = splited[1:]
44 data.append((metas, features))
45 return data
46
47
29 def index_by(data, num_col): 48 def index_by(data, num_col):
30 ''' 49 '''
31 Allows the user to index data by number of columns. 50 Allows the user to index data by number of columns.
32 ''' 51 '''
33 indexed = {} 52 indexed = {}
34 for line in data: 53 for line in data:
35 metas = line[0] 54 metas = line[0]
36 features = line[1] 55 features = line[1]
37 if metas[num_col] not in indexed: 56 if metas[num_col] not in indexed:
38 indexed[metas[num_col]] = [] 57 indexed[metas[num_col]] = []
39 indexed[metas[num_col]].append((metas, features)) 58 indexed[metas[num_col]].append((metas, features))
40 return indexed 59 return indexed
41 60
42 61
43 def index_by_id(data): 62 def index_by_id(data):
44 ''' 63 '''
45 Allows the user to index data by id. 64 Allows the user to index data by id.
46 Index data by id consists in indexing two times 65 Index data by id consists in indexing two times
47 because data have two keys. On with the language 66 because data have two keys. On with the language
48 and the other one with the id of the sentence. 67 and the other one with the id of the sentence.
49 ''' 68 '''
50 indexed = {} 69 indexed = {}
51 for line in data: 70 for line in data:
52 metas = line[0] 71 metas = line[0]
53 id_sen = metas[3] 72 id_sen = metas[3]
54 lang = metas[0] 73 lang = metas[0]
55 if lang not in indexed: 74 if lang not in indexed:
56 indexed[lang] = {} 75 indexed[lang] = {}
57 indexed[lang][id_sen] = line 76 indexed[lang][id_sen] = line
58 return indexed 77 return indexed
59 78
60 79
80 def index_by_id_skyrim(data):
81 '''
82 Allows the user to index data by id.
83 Index data by id consists in indexing two times
84 because data have two keys. On with the language
85 and the other one with the id of the sentence.
86 '''
87 indexed = {}
88 for line in data:
89 metas = line[0]
90 id_sen = metas[2]
91 lang = metas[0]
92 if lang not in indexed:
93 indexed[lang] = {}
94 indexed[lang][id_sen] = line
95 return indexed
96
97
61 def write_line(metas, features, f=sys.stdout): 98 def write_line(metas, features, f=sys.stdout):
62 ''' 99 '''
63 Just print the line. No need to specify a file. 100 Just print the line. No need to specify a file.
64 101
65 metas: meta information on list 102 metas: meta information on list
66 features: feature vector 103 features: feature vector
67 f: file to write it 104 f: file to write it
68 ''' 105 '''
69 print(",".join(metas) + " " + " ".join(features), file=f) 106 print(",".join(metas) + " " + " ".join(features), file=f)
107
108
109 def write_line_skyrim(metas, features, f=sys.stdout):
110 '''
111 Just print the line. No need to specify a file.
112
113 metas: meta information on list
114 features: feature vector
115 f: file to write it
116 '''
117 print(".".join(metas) + " " + " ".join(features), file=f)
70 118