Blame view
bin/extract_kmeans.py
1.26 KB
ac78b07ea All base bin file... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
''' This script aims to extract k-means clustering from a a priori trained k-means. ''' import argparse import numpy as np import pickle from data import read_file, index_by_id, write_line import sys # -- ARGPARSE -- parser = argparse.ArgumentParser(description="extract clusters") parser.add_argument("model", type=str, help="k-means model pickle") parser.add_argument("features", type=str, help="features") parser.add_argument("list", type=str, help="list file") parser.add_argument("--outfile", type=str, default=None, help="output file std") args = vars(parser.parse_args()) MODEL = args["model"] FEATURES = args["features"] LST = args["list"] OUTFILE = args["outfile"] if OUTFILE == None: OUTFILE = sys.stdout else: OUTFILE = open(OUTFILE, "w") # -- READ FILE -- features = read_file(FEATURES) feat_ind = index_by_id(features) lst = read_file(LST) kmeans = pickle.load(open(MODEL, "rb")) # -- CONVERT TO NUMPY -- X = np.asarray([feat_ind[x[0][0]][x[0][3]][1] for x in lst]) predictions = kmeans.predict(X) for i, line in enumerate(lst): meta = line[0] meta[1] = str(predictions[i]) write_line( meta, feat_ind[meta[0]][meta[3]][1], OUTFILE ) # -- CLOSE OUT FILE IF NECESSARY -- if not OUTFILE == sys.stdout: OUTFILE.close() |