Blame view

bin/replace_label.py 2.06 KB
6c40a57b2   Mathias Quillot   Allow you to repl...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
  '''
  This script aims to replace label from data file (meta or features) 
  with given new labels. 
  This new labels is taken from character information file. 
  '''
  import argparse
  import numpy as np
  import csv
  from data import read_file, index_by_id, write_line
  
  # -- ARGPARSE
  parser = argparse.ArgumentParser("")
  parser.add_argument("metas", type=str, help="metas file (or features) with character label")
  parser.add_argument("char_info_file", type=str, help="csv file with char info")
  parser.add_argument("--field", type=str, default="gender", help="field of info char file that you want to give as replacement")
  parser.add_argument("--outfile", type=str, default="out.lst", help="outfile")
  parser.add_argument("--lst", default=None, type=str, help="Given list to only take a subset")
  
  args = parser.parse_args()
  
  METAS = args.metas
  CHAR_INFO_FILE = args.char_info_file
  FIELD = args.field
  LST = args.lst
  OUTFILE = args.outfile
  
  # -- READ FILES
  metas = read_file(METAS)
  metas_ind = index_by_id(metas)
  
  char_info = []
  char_info_ind = {}
  with open(CHAR_INFO_FILE, newline='') as f:
      reader = csv.DictReader(f)
      for row in reader:
          char_info.append(row)
          char_info_ind[row["character_id"]] = row
   
  lst = None
  ids = []
  if LST is not None:
      lst = read_file(LST)
      ids = [(x[0][0], x[0][3]) for x in lst]
  else:
      ids = [(x[0][0], x[0][3]) for x in metas]
  
  # -- GET CHARACTERS FOR EACH FILE
  meta_chars = []
  meta_chars_uniq = []
  
  if LST is not None:
      meta_chars = [metas_ind[x[0][0]][x[0][3]][0][1]  for x in lst]
  else:
      meta_chars = [x[0][1] for x in metas]
  meta_chars_uniq = np.unique(np.asarray(meta_chars))
  
  info_chars = list(char_info_ind.keys())
  
  # -- CHECK IF A CHARACTERS MATCH
  for char in meta_chars_uniq:
      if not char in info_chars:
          print("A character is not in the information file: " + str(char)) 
          exit(-1)
  
  # -- REPLACE ORIGINAL VALUES AND WRITE FILE
  with open(OUTFILE, "w") as f:
      for line in ids:
          line_ = metas_ind[line[0]][line[1]]
          meta = line_[0]
          meta[1] = char_info_ind[meta[1]][FIELD]
          write_line(meta, line_[1], f)