Commit 6c40a57b2cbad2a09bd384f5b8ca94360a470343

Authored by Mathias Quillot
1 parent ee5cc2a7e7
Exists in master

Allow you to replace labels from a given meta file with elses from an other one.

Showing 1 changed file with 73 additions and 0 deletions Inline Diff

bin/replace_label.py
File was created 1 '''
2 This script aims to replace label from data file (meta or features)
3 with given new labels.
4 This new labels is taken from character information file.
5 '''
6 import argparse
7 import numpy as np
8 import csv
9 from data import read_file, index_by_id, write_line
10
11 # -- ARGPARSE
12 parser = argparse.ArgumentParser("")
13 parser.add_argument("metas", type=str, help="metas file (or features) with character label")
14 parser.add_argument("char_info_file", type=str, help="csv file with char info")
15 parser.add_argument("--field", type=str, default="gender", help="field of info char file that you want to give as replacement")
16 parser.add_argument("--outfile", type=str, default="out.lst", help="outfile")
17 parser.add_argument("--lst", default=None, type=str, help="Given list to only take a subset")
18
19 args = parser.parse_args()
20
21 METAS = args.metas
22 CHAR_INFO_FILE = args.char_info_file
23 FIELD = args.field
24 LST = args.lst
25 OUTFILE = args.outfile
26
27 # -- READ FILES
28 metas = read_file(METAS)
29 metas_ind = index_by_id(metas)
30
31 char_info = []
32 char_info_ind = {}
33 with open(CHAR_INFO_FILE, newline='') as f:
34 reader = csv.DictReader(f)
35 for row in reader:
36 char_info.append(row)
37 char_info_ind[row["character_id"]] = row
38
39 lst = None
40 ids = []
41 if LST is not None:
42 lst = read_file(LST)
43 ids = [(x[0][0], x[0][3]) for x in lst]
44 else:
45 ids = [(x[0][0], x[0][3]) for x in metas]
46
47 # -- GET CHARACTERS FOR EACH FILE
48 meta_chars = []
49 meta_chars_uniq = []
50
51 if LST is not None:
52 meta_chars = [metas_ind[x[0][0]][x[0][3]][0][1] for x in lst]
53 else:
54 meta_chars = [x[0][1] for x in metas]
55 meta_chars_uniq = np.unique(np.asarray(meta_chars))
56
57 info_chars = list(char_info_ind.keys())
58
59 # -- CHECK IF A CHARACTERS MATCH
60 for char in meta_chars_uniq:
61 if not char in info_chars:
62 print("A character is not in the information file: " + str(char))
63 exit(-1)
64
65 # -- REPLACE ORIGINAL VALUES AND WRITE FILE
66 with open(OUTFILE, "w") as f:
67 for line in ids:
68 line_ = metas_ind[line[0]][line[1]]
69 meta = line_[0]
70 meta[1] = char_info_ind[meta[1]][FIELD]
71 write_line(meta, line_[1], f)
72
73
74