Blame view

bin/extract_vectors.py 1.18 KB
ac78b07ea   Mathias Quillot   All base bin file...
1
2
3
4
5
6
7
8
9
  '''
  The goal of this script is to extract vectors from a list. 
  One file is the full content, and the list only enumerate the 
  vectors you want to keep.
  '''
  
  import os
  import numpy as np
  import argparse
e36dbbc98   Mathias Quillot   Improving code style
10
  from data import read_file, index_by_id, write_line
ac78b07ea   Mathias Quillot   All base bin file...
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
  
  parser = argparse.ArgumentParser(description='Extract a subset of vectors')
  parser.add_argument('vectorsfile', type=str,
                      help='the path of the file containing the convectors')
  parser.add_argument('listfile', type=str,
                      help='the path of the file containing the list of vectors kept')
  parser.add_argument('-o', '--output', type=str,
                      default='a.out',
                      help='the path the output file containing the vectors kept')
  
  args = parser.parse_args()
  
  # Editing global variable
  VECTOR_FILE = args.vectorsfile
  LIST_FILE = args.listfile
  OUTPUT_FILE = args.output
  
  # READ VECTOR DATA
e36dbbc98   Mathias Quillot   Improving code style
29
30
31
32
33
34
35
36
37
38
39
40
  features = read_file(VECTOR_FILE)
  features_ind = index_by_id(features)
  lst = read_file(LIST_FILE)
  
  
  # COMPUTE KEPT FEATS
  kept_feats = [features_ind[x[0][0]][x[0][3]] for x in lst]
  
  # WRITE IN FILE
  with open(OUTPUT_FILE, 'w') as f:
      for feat in kept_feats:
          write_line(feat[0], feat[1], f=f)
ac78b07ea   Mathias Quillot   All base bin file...
41