Commit 6c40a57b2cbad2a09bd384f5b8ca94360a470343
1 parent
ee5cc2a7e7
Exists in
master
Allow you to replace labels from a given meta file with elses from an other one.
Showing 1 changed file with 73 additions and 0 deletions Side-by-side Diff
bin/replace_label.py
1 | +''' | |
2 | +This script aims to replace label from data file (meta or features) | |
3 | +with given new labels. | |
4 | +This new labels is taken from character information file. | |
5 | +''' | |
6 | +import argparse | |
7 | +import numpy as np | |
8 | +import csv | |
9 | +from data import read_file, index_by_id, write_line | |
10 | + | |
11 | +# -- ARGPARSE | |
12 | +parser = argparse.ArgumentParser("") | |
13 | +parser.add_argument("metas", type=str, help="metas file (or features) with character label") | |
14 | +parser.add_argument("char_info_file", type=str, help="csv file with char info") | |
15 | +parser.add_argument("--field", type=str, default="gender", help="field of info char file that you want to give as replacement") | |
16 | +parser.add_argument("--outfile", type=str, default="out.lst", help="outfile") | |
17 | +parser.add_argument("--lst", default=None, type=str, help="Given list to only take a subset") | |
18 | + | |
19 | +args = parser.parse_args() | |
20 | + | |
21 | +METAS = args.metas | |
22 | +CHAR_INFO_FILE = args.char_info_file | |
23 | +FIELD = args.field | |
24 | +LST = args.lst | |
25 | +OUTFILE = args.outfile | |
26 | + | |
27 | +# -- READ FILES | |
28 | +metas = read_file(METAS) | |
29 | +metas_ind = index_by_id(metas) | |
30 | + | |
31 | +char_info = [] | |
32 | +char_info_ind = {} | |
33 | +with open(CHAR_INFO_FILE, newline='') as f: | |
34 | + reader = csv.DictReader(f) | |
35 | + for row in reader: | |
36 | + char_info.append(row) | |
37 | + char_info_ind[row["character_id"]] = row | |
38 | + | |
39 | +lst = None | |
40 | +ids = [] | |
41 | +if LST is not None: | |
42 | + lst = read_file(LST) | |
43 | + ids = [(x[0][0], x[0][3]) for x in lst] | |
44 | +else: | |
45 | + ids = [(x[0][0], x[0][3]) for x in metas] | |
46 | + | |
47 | +# -- GET CHARACTERS FOR EACH FILE | |
48 | +meta_chars = [] | |
49 | +meta_chars_uniq = [] | |
50 | + | |
51 | +if LST is not None: | |
52 | + meta_chars = [metas_ind[x[0][0]][x[0][3]][0][1] for x in lst] | |
53 | +else: | |
54 | + meta_chars = [x[0][1] for x in metas] | |
55 | +meta_chars_uniq = np.unique(np.asarray(meta_chars)) | |
56 | + | |
57 | +info_chars = list(char_info_ind.keys()) | |
58 | + | |
59 | +# -- CHECK IF A CHARACTERS MATCH | |
60 | +for char in meta_chars_uniq: | |
61 | + if not char in info_chars: | |
62 | + print("A character is not in the information file: " + str(char)) | |
63 | + exit(-1) | |
64 | + | |
65 | +# -- REPLACE ORIGINAL VALUES AND WRITE FILE | |
66 | +with open(OUTFILE, "w") as f: | |
67 | + for line in ids: | |
68 | + line_ = metas_ind[line[0]][line[1]] | |
69 | + meta = line_[0] | |
70 | + meta[1] = char_info_ind[meta[1]][FIELD] | |
71 | + write_line(meta, line_[1], f) |