Commit 0bc4a3e394eb5c9587896e91379a61bb340f28d2
1 parent
a7fec48aaf
Exists in
master
New implementation of data functions for skyrim
Showing 1 changed file with 48 additions and 0 deletions Inline Diff
bin/data.py
1 | ''' | 1 | ''' |
2 | This module aim in loading and writing files. | 2 | This module aim in loading and writing files. |
3 | Our files respect a specific format that | 3 | Our files respect a specific format that |
4 | is not standard. This is why i hope these | 4 | is not standard. This is why i hope these |
5 | function make the read of file easier. | 5 | function make the read of file easier. |
6 | 6 | ||
7 | For more information about the data, read | 7 | For more information about the data, read |
8 | the README file please. | 8 | the README file please. |
9 | ''' | 9 | ''' |
10 | 10 | ||
11 | import sys | 11 | import sys |
12 | 12 | ||
13 | |||
13 | def read_file(filepath): | 14 | def read_file(filepath): |
14 | ''' | 15 | ''' |
15 | Read the file and return an array with pairs | 16 | Read the file and return an array with pairs |
16 | where each pair is composed by the metas and the | 17 | where each pair is composed by the metas and the |
17 | features. | 18 | features. |
18 | ''' | 19 | ''' |
19 | data = [] | 20 | data = [] |
20 | with open(filepath, "r") as f: | 21 | with open(filepath, "r") as f: |
21 | for line in f: | 22 | for line in f: |
22 | splited = line.replace("\n", "").split(" ") | 23 | splited = line.replace("\n", "").split(" ") |
23 | metas = splited[0].split(",") | 24 | metas = splited[0].split(",") |
24 | features = splited[1:] | 25 | features = splited[1:] |
25 | data.append((metas, features)) | 26 | data.append((metas, features)) |
26 | return data | 27 | return data |
27 | 28 | ||
28 | 29 | ||
30 | def read_file_skyrim(filepath): | ||
31 | ''' | ||
32 | Read the file and return an array with pairs | ||
33 | where each pair is composed by the metas and the | ||
34 | features. | ||
35 | |||
36 | This is for Skyrim files. | ||
37 | ''' | ||
38 | data = [] | ||
39 | with open(filepath, "r") as f: | ||
40 | for line in f: | ||
41 | splited = line.replace("\n", "").split(" ") | ||
42 | metas = splited[0].split(".") | ||
43 | features = splited[1:] | ||
44 | data.append((metas, features)) | ||
45 | return data | ||
46 | |||
47 | |||
29 | def index_by(data, num_col): | 48 | def index_by(data, num_col): |
30 | ''' | 49 | ''' |
31 | Allows the user to index data by number of columns. | 50 | Allows the user to index data by number of columns. |
32 | ''' | 51 | ''' |
33 | indexed = {} | 52 | indexed = {} |
34 | for line in data: | 53 | for line in data: |
35 | metas = line[0] | 54 | metas = line[0] |
36 | features = line[1] | 55 | features = line[1] |
37 | if metas[num_col] not in indexed: | 56 | if metas[num_col] not in indexed: |
38 | indexed[metas[num_col]] = [] | 57 | indexed[metas[num_col]] = [] |
39 | indexed[metas[num_col]].append((metas, features)) | 58 | indexed[metas[num_col]].append((metas, features)) |
40 | return indexed | 59 | return indexed |
41 | 60 | ||
42 | 61 | ||
43 | def index_by_id(data): | 62 | def index_by_id(data): |
44 | ''' | 63 | ''' |
45 | Allows the user to index data by id. | 64 | Allows the user to index data by id. |
46 | Index data by id consists in indexing two times | 65 | Index data by id consists in indexing two times |
47 | because data have two keys. On with the language | 66 | because data have two keys. On with the language |
48 | and the other one with the id of the sentence. | 67 | and the other one with the id of the sentence. |
49 | ''' | 68 | ''' |
50 | indexed = {} | 69 | indexed = {} |
51 | for line in data: | 70 | for line in data: |
52 | metas = line[0] | 71 | metas = line[0] |
53 | id_sen = metas[3] | 72 | id_sen = metas[3] |
54 | lang = metas[0] | 73 | lang = metas[0] |
55 | if lang not in indexed: | 74 | if lang not in indexed: |
56 | indexed[lang] = {} | 75 | indexed[lang] = {} |
57 | indexed[lang][id_sen] = line | 76 | indexed[lang][id_sen] = line |
58 | return indexed | 77 | return indexed |
59 | 78 | ||
60 | 79 | ||
80 | def index_by_id_skyrim(data): | ||
81 | ''' | ||
82 | Allows the user to index data by id. | ||
83 | Index data by id consists in indexing two times | ||
84 | because data have two keys. On with the language | ||
85 | and the other one with the id of the sentence. | ||
86 | ''' | ||
87 | indexed = {} | ||
88 | for line in data: | ||
89 | metas = line[0] | ||
90 | id_sen = metas[2] | ||
91 | lang = metas[0] | ||
92 | if lang not in indexed: | ||
93 | indexed[lang] = {} | ||
94 | indexed[lang][id_sen] = line | ||
95 | return indexed | ||
96 | |||
97 | |||
61 | def write_line(metas, features, f=sys.stdout): | 98 | def write_line(metas, features, f=sys.stdout): |
62 | ''' | 99 | ''' |
63 | Just print the line. No need to specify a file. | 100 | Just print the line. No need to specify a file. |
64 | 101 | ||
65 | metas: meta information on list | 102 | metas: meta information on list |
66 | features: feature vector | 103 | features: feature vector |
67 | f: file to write it | 104 | f: file to write it |
68 | ''' | 105 | ''' |
69 | print(",".join(metas) + " " + " ".join(features), file=f) | 106 | print(",".join(metas) + " " + " ".join(features), file=f) |
107 | |||
108 | |||
109 | def write_line_skyrim(metas, features, f=sys.stdout): | ||
110 | ''' | ||
111 | Just print the line. No need to specify a file. | ||
112 | |||
113 | metas: meta information on list | ||
114 | features: feature vector | ||
115 | f: file to write it | ||
116 | ''' | ||
117 | print(".".join(metas) + " " + " ".join(features), file=f) | ||
70 | 118 |