Commit 6c40a57b2cbad2a09bd384f5b8ca94360a470343

Authored by Mathias Quillot
1 parent ee5cc2a7e7
Exists in master

Allow you to replace labels from a given meta file with elses from an other one.

Showing 1 changed file with 73 additions and 0 deletions Side-by-side Diff

bin/replace_label.py
  1 +'''
  2 +This script aims to replace label from data file (meta or features)
  3 +with given new labels.
  4 +This new labels is taken from character information file.
  5 +'''
  6 +import argparse
  7 +import numpy as np
  8 +import csv
  9 +from data import read_file, index_by_id, write_line
  10 +
  11 +# -- ARGPARSE
  12 +parser = argparse.ArgumentParser("")
  13 +parser.add_argument("metas", type=str, help="metas file (or features) with character label")
  14 +parser.add_argument("char_info_file", type=str, help="csv file with char info")
  15 +parser.add_argument("--field", type=str, default="gender", help="field of info char file that you want to give as replacement")
  16 +parser.add_argument("--outfile", type=str, default="out.lst", help="outfile")
  17 +parser.add_argument("--lst", default=None, type=str, help="Given list to only take a subset")
  18 +
  19 +args = parser.parse_args()
  20 +
  21 +METAS = args.metas
  22 +CHAR_INFO_FILE = args.char_info_file
  23 +FIELD = args.field
  24 +LST = args.lst
  25 +OUTFILE = args.outfile
  26 +
  27 +# -- READ FILES
  28 +metas = read_file(METAS)
  29 +metas_ind = index_by_id(metas)
  30 +
  31 +char_info = []
  32 +char_info_ind = {}
  33 +with open(CHAR_INFO_FILE, newline='') as f:
  34 + reader = csv.DictReader(f)
  35 + for row in reader:
  36 + char_info.append(row)
  37 + char_info_ind[row["character_id"]] = row
  38 +
  39 +lst = None
  40 +ids = []
  41 +if LST is not None:
  42 + lst = read_file(LST)
  43 + ids = [(x[0][0], x[0][3]) for x in lst]
  44 +else:
  45 + ids = [(x[0][0], x[0][3]) for x in metas]
  46 +
  47 +# -- GET CHARACTERS FOR EACH FILE
  48 +meta_chars = []
  49 +meta_chars_uniq = []
  50 +
  51 +if LST is not None:
  52 + meta_chars = [metas_ind[x[0][0]][x[0][3]][0][1] for x in lst]
  53 +else:
  54 + meta_chars = [x[0][1] for x in metas]
  55 +meta_chars_uniq = np.unique(np.asarray(meta_chars))
  56 +
  57 +info_chars = list(char_info_ind.keys())
  58 +
  59 +# -- CHECK IF A CHARACTERS MATCH
  60 +for char in meta_chars_uniq:
  61 + if not char in info_chars:
  62 + print("A character is not in the information file: " + str(char))
  63 + exit(-1)
  64 +
  65 +# -- REPLACE ORIGINAL VALUES AND WRITE FILE
  66 +with open(OUTFILE, "w") as f:
  67 + for line in ids:
  68 + line_ = metas_ind[line[0]][line[1]]
  69 + meta = line_[0]
  70 + meta[1] = char_info_ind[meta[1]][FIELD]
  71 + write_line(meta, line_[1], f)