tsne_pvector.py
2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
'''
The goal of this script is to display calculate tsne of pvectors.
'''
import os
import argparse
import numpy as np
from sklearn.manifold import TSNE
# Defining argparse
parser = argparse.ArgumentParser(prog='pvector tsne', description='Calculate the tsne representation of pvector in 3 or 2d')
parser.add_argument('filepath', type=str,
help='the path of the file you want to calculate tsne')
parser.add_argument('-o', '--output', type=str,
default='.',
help='the path of the output file.')
parser.add_argument('-n', '--n-comp', type=int, choices=[2, 3],
default='2',
help='number of components output of tsne')
parser.add_argument('-t', '--toy', action='store_true',
help='test the script on a toy example. Do not test all the file content.')
args = parser.parse_args()
# Editing global variable
FILE_PATH=args.filepath
OUTFILE_PATH=args.output
TOY_VERSION=args.toy
N_COMP=args.n_comp
# Defining pvectors with default number of column
pvectors = np.empty((0, 64), np.float32)
metas = np.empty((0, 4), np.float32)
# READ DATA
with open(os.path.join(FILE_PATH), "r") as f:
for i, line in enumerate(f):
if TOY_VERSION == True and i > 100:
break
spl_line = line.split(" ")
if(len(pvectors) == 0):
pvectors = np.empty((0, len(spl_line[1:])), np.float32)
metas = np.append(
metas,
np.asarray([spl_line[0].split(",")]),
axis=0)
pvectors = np.append(
pvectors,
np.asarray([spl_line[1:]], dtype=np.float32),
axis=0)
# PREPARE SAVE FILE FUNCTION
def save_file(filepath, metas, values):
with open(filepath, "w") as f:
for i, value in enumerate(values):
metas_str = ",".join(str(v) for v in metas[i])
try:
infos_str = " ".join(str(v) for v in values[i])
except TypeError as te:
infos_str = str(values[i])
f.write(metas_str + " " + infos_str + "\n")
# CALCULATE T-SNE
X_embedded = TSNE(n_components=N_COMP).fit_transform(pvectors)
save_file(OUTFILE_PATH, metas, X_embedded)