Blame view
bin/replace_label.py
2.06 KB
6c40a57b2 Allow you to repl... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
''' This script aims to replace label from data file (meta or features) with given new labels. This new labels is taken from character information file. ''' import argparse import numpy as np import csv from data import read_file, index_by_id, write_line # -- ARGPARSE parser = argparse.ArgumentParser("") parser.add_argument("metas", type=str, help="metas file (or features) with character label") parser.add_argument("char_info_file", type=str, help="csv file with char info") parser.add_argument("--field", type=str, default="gender", help="field of info char file that you want to give as replacement") parser.add_argument("--outfile", type=str, default="out.lst", help="outfile") parser.add_argument("--lst", default=None, type=str, help="Given list to only take a subset") args = parser.parse_args() METAS = args.metas CHAR_INFO_FILE = args.char_info_file FIELD = args.field LST = args.lst OUTFILE = args.outfile # -- READ FILES metas = read_file(METAS) metas_ind = index_by_id(metas) char_info = [] char_info_ind = {} with open(CHAR_INFO_FILE, newline='') as f: reader = csv.DictReader(f) for row in reader: char_info.append(row) char_info_ind[row["character_id"]] = row lst = None ids = [] if LST is not None: lst = read_file(LST) ids = [(x[0][0], x[0][3]) for x in lst] else: ids = [(x[0][0], x[0][3]) for x in metas] # -- GET CHARACTERS FOR EACH FILE meta_chars = [] meta_chars_uniq = [] if LST is not None: meta_chars = [metas_ind[x[0][0]][x[0][3]][0][1] for x in lst] else: meta_chars = [x[0][1] for x in metas] meta_chars_uniq = np.unique(np.asarray(meta_chars)) info_chars = list(char_info_ind.keys()) # -- CHECK IF A CHARACTERS MATCH for char in meta_chars_uniq: if not char in info_chars: print("A character is not in the information file: " + str(char)) exit(-1) # -- REPLACE ORIGINAL VALUES AND WRITE FILE with open(OUTFILE, "w") as f: for line in ids: line_ = metas_ind[line[0]][line[1]] meta = line_[0] meta[1] = char_info_ind[meta[1]][FIELD] write_line(meta, line_[1], f) |