tsne.py 1.38 KB
'''
The goal of this script is to display calculate tsne of pvectors.
'''

import os
from os.path import isfile
import argparse
import numpy as np
from sklearn.manifold import TSNE

from core.data import read_features

if __name__ == "__main__":
    # Defining argparse 
    parser = argparse.ArgumentParser(prog='pvector tsne', description='Calculate the tsne representation of pvector in 3 or 2d')
    parser.add_argument('features', type=str,
                        help='the path of the file you want to calculate tsne')
    parser.add_argument('-o', '--outfile', type=str,
                        default='.',
                        help='the path of the output file.')
    parser.add_argument('-n', '--n-comp', type=int, choices=[2, 3],
                        default='2',
                        help='number of components output of tsne')

    args = parser.parse_args()

    assert isfile(args.features)

    features_list = read_features(args.features)
    tuples_key_feat = np.vstack([ (key, feats) for key, feats in features_list.items()])
    keys, features = zip(*tuples_key_feat)
    feat_tsne = TSNE(n_components=args.n_comp).fit_transform(features)
    
    with open(args.outfile, "w") as of:
        for i in range(len(keys)):
            of.write(keys[i] + " " + " ".join([str(feat) for feat in feat_tsne[i]]) + "\n")
    print("TSNE finished. Check if everything has been done well.")