extract_vectors.py 1.18 KB
'''
The goal of this script is to extract vectors from a list. 
One file is the full content, and the list only enumerate the 
vectors you want to keep.
'''

import os
import numpy as np
import argparse
from data import read_file, index_by_id, write_line

parser = argparse.ArgumentParser(description='Extract a subset of vectors')
parser.add_argument('vectorsfile', type=str,
                    help='the path of the file containing the convectors')
parser.add_argument('listfile', type=str,
                    help='the path of the file containing the list of vectors kept')
parser.add_argument('-o', '--output', type=str,
                    default='a.out',
                    help='the path the output file containing the vectors kept')

args = parser.parse_args()

# Editing global variable
VECTOR_FILE = args.vectorsfile
LIST_FILE = args.listfile
OUTPUT_FILE = args.output

# READ VECTOR DATA
features = read_file(VECTOR_FILE)
features_ind = index_by_id(features)
lst = read_file(LIST_FILE)


# COMPUTE KEPT FEATS
kept_feats = [features_ind[x[0][0]][x[0][3]] for x in lst]

# WRITE IN FILE
with open(OUTPUT_FILE, 'w') as f:
    for feat in kept_feats:
        write_line(feat[0], feat[1], f=f)