replace_label.py 2.06 KB
'''
This script aims to replace label from data file (meta or features) 
with given new labels. 
This new labels is taken from character information file. 
'''
import argparse
import numpy as np
import csv
from data import read_file, index_by_id, write_line

# -- ARGPARSE
parser = argparse.ArgumentParser("")
parser.add_argument("metas", type=str, help="metas file (or features) with character label")
parser.add_argument("char_info_file", type=str, help="csv file with char info")
parser.add_argument("--field", type=str, default="gender", help="field of info char file that you want to give as replacement")
parser.add_argument("--outfile", type=str, default="out.lst", help="outfile")
parser.add_argument("--lst", default=None, type=str, help="Given list to only take a subset")

args = parser.parse_args()

METAS = args.metas
CHAR_INFO_FILE = args.char_info_file
FIELD = args.field
LST = args.lst
OUTFILE = args.outfile

# -- READ FILES
metas = read_file(METAS)
metas_ind = index_by_id(metas)

char_info = []
char_info_ind = {}
with open(CHAR_INFO_FILE, newline='') as f:
    reader = csv.DictReader(f)
    for row in reader:
        char_info.append(row)
        char_info_ind[row["character_id"]] = row
 
lst = None
ids = []
if LST is not None:
    lst = read_file(LST)
    ids = [(x[0][0], x[0][3]) for x in lst]
else:
    ids = [(x[0][0], x[0][3]) for x in metas]

# -- GET CHARACTERS FOR EACH FILE
meta_chars = []
meta_chars_uniq = []

if LST is not None:
    meta_chars = [metas_ind[x[0][0]][x[0][3]][0][1]  for x in lst]
else:
    meta_chars = [x[0][1] for x in metas]
meta_chars_uniq = np.unique(np.asarray(meta_chars))

info_chars = list(char_info_ind.keys())

# -- CHECK IF A CHARACTERS MATCH
for char in meta_chars_uniq:
    if not char in info_chars:
        print("A character is not in the information file: " + str(char)) 
        exit(-1)

# -- REPLACE ORIGINAL VALUES AND WRITE FILE
with open(OUTFILE, "w") as f:
    for line in ids:
        line_ = metas_ind[line[0]][line[1]]
        meta = line_[0]
        meta[1] = char_info_ind[meta[1]][FIELD]
        write_line(meta, line_[1], f)