Commit e36dbbc98b8ad182980cb714b6c6024b467ac072

Authored by Mathias Quillot
1 parent 9aec207cb0
Exists in master

Improving code style

Showing 1 changed file with 13 additions and 24 deletions Side-by-side Diff

bin/extract_vectors.py
... ... @@ -7,6 +7,7 @@
7 7 import os
8 8 import numpy as np
9 9 import argparse
  10 +from data import read_file, index_by_id, write_line
10 11  
11 12 parser = argparse.ArgumentParser(description='Extract a subset of vectors')
12 13 parser.add_argument('vectorsfile', type=str,
13 14  
... ... @@ -25,28 +26,16 @@
25 26 OUTPUT_FILE = args.output
26 27  
27 28 # READ VECTOR DATA
28   -data = {}
29   -data["en-us"] = {}
30   -data["fr-fr"] = {}
31   -with open(VECTOR_FILE, "r") as f:
32   - for i, line in enumerate(f):
33   - if TOY_VERSION == True and i > 100:
34   - break
35   - spl_line = line.split(" ")
36   - if(len(pvectors) == 0):
37   - pvectors = np.empty((0, len(spl_line[1:])), np.float32)
38   - spl_meta = spl_line.split(",")
39   - lang = spl_meta[0]
40   - iden = spl_meta[3]
41   - data[lang][iden] = line
  29 +features = read_file(VECTOR_FILE)
  30 +features_ind = index_by_id(features)
  31 +lst = read_file(LIST_FILE)
42 32  
43   -# READ LIST AND WRITE NEW FILE
44   -with open(LIST_FILE, "r") as f, open(OUTPUT_FILE, "w") as o:
45   - for i, line in enumerate(LIST_FILE):
46   - if TOY_VERSION == True and i > 100:
47   - break
48   - spl_meta = line.split(",")
49   - lang = spl_meta[0]
50   - iden = spl_meta[3]
51   - OUTPUT_FILE.write(data[lang][iden])
  33 +
  34 +# COMPUTE KEPT FEATS
  35 +kept_feats = [features_ind[x[0][0]][x[0][3]] for x in lst]
  36 +
  37 +# WRITE IN FILE
  38 +with open(OUTPUT_FILE, 'w') as f:
  39 + for feat in kept_feats:
  40 + write_line(feat[0], feat[1], f=f)