Blame view

bin/tsne_pvector.py 2.23 KB
ac78b07ea   Mathias Quillot   All base bin file...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
  '''
  The goal of this script is to display calculate tsne of pvectors.
  '''
  
  import os
  import argparse
  import numpy as np
  from sklearn.manifold import TSNE
  
  # Defining argparse 
  parser = argparse.ArgumentParser(prog='pvector tsne', description='Calculate the tsne representation of pvector in 3 or 2d')
  parser.add_argument('filepath', type=str,
                      help='the path of the file you want to calculate tsne')
  parser.add_argument('-o', '--output', type=str,
                      default='.',
                      help='the path of the output file.')
  parser.add_argument('-n', '--n-comp', type=int, choices=[2, 3],
                      default='2',
                      help='number of components output of tsne')
  parser.add_argument('-t', '--toy', action='store_true',
                      help='test the script on a toy example. Do not test all the file content.')
  args = parser.parse_args()
  
  # Editing global variable
  FILE_PATH=args.filepath
  OUTFILE_PATH=args.output
  TOY_VERSION=args.toy
  N_COMP=args.n_comp
  
  # Defining pvectors with default number of column
  pvectors = np.empty((0, 64), np.float32)
  metas = np.empty((0, 4), np.float32)
  
  
  # READ DATA
  with open(os.path.join(FILE_PATH), "r") as f:
  	for i, line in enumerate(f):
  		if TOY_VERSION == True and i > 100:
  			break
  		spl_line = line.split(" ")
  		if(len(pvectors) == 0):
  			pvectors = np.empty((0, len(spl_line[1:])), np.float32)
  		metas = np.append(
  			metas,
  			np.asarray([spl_line[0].split(",")]),
  			axis=0)
  		pvectors = np.append(
                          pvectors,
                          np.asarray([spl_line[1:]], dtype=np.float32),
                          axis=0)
  
  
  
  # PREPARE SAVE FILE FUNCTION
  def save_file(filepath, metas, values):
          with open(filepath, "w") as f:
                  for i, value  in enumerate(values):
                          metas_str = ",".join(str(v) for v in metas[i])
                          try:
                                  infos_str = " ".join(str(v) for v in values[i])
                          except TypeError as te:
                                  infos_str = str(values[i])
                          f.write(metas_str + " " + infos_str + "
  ")
  
  # CALCULATE T-SNE
  X_embedded = TSNE(n_components=N_COMP).fit_transform(pvectors)
  save_file(OUTFILE_PATH, metas, X_embedded)