Commit ef25886510b9b7e1e17f87b7039e624070f152bc

Authored by Mathias
1 parent cd9123115d
Exists in master

Filter the given file with a list. Only keep same keys and an error is thrown if…

… a key on the filter does not exist in the source file.

Showing 3 changed files with 60 additions and 37 deletions Inline Diff

scripts/data-management/filter_ids.py
File was created 1 import argparse
2 from os.path import isfile
3 from volia.data_io import read_lst
4
5 if __name__ == "__main__":
6 parser = argparse.ArgumentParser(description="Filter ids of the given file to only keep a subset")
7 parser.add_argument("file", type=str, help="")
8 parser.add_argument("--filter", default=None, type=str, help="")
9 parser.add_argument("--outfile", default="out.txt", type=str, help="")
10
11 args = parser.parse_args()
12
13 assert args.filter is not None
14 assert isfile(args.file)
15
16 list_ = read_lst(args.file)
17 filter_ = read_lst(args.filter)
18
19 with open(args.outfile, "w") as of:
20 for key in filter_.keys():
21 of.write(key + " " + " ".join(list_[key]) + "\n")
22
23 print("File filtered and written in: ", args.outfile)
scripts/dim-reduction/tsne.py
File was created 1 '''
2 The goal of this script is to display calculate tsne of pvectors.
3 '''
4
5 import os
6 from os.path import isfile
7 import argparse
8 import numpy as np
9 from sklearn.manifold import TSNE
10
11 from volia.data_io import read_features
12
13 if __name__ == "__main__":
14 # Defining argparse
15 parser = argparse.ArgumentParser(prog='pvector tsne', description='Calculate the tsne representation of pvector in 3 or 2d')
16 parser.add_argument('features', type=str,
17 help='the path of the file you want to calculate tsne')
18 parser.add_argument('-o', '--outfile', type=str,
19 default='.',
20 help='the path of the output file.')
21 parser.add_argument('-n', '--n-comp', type=int, choices=[2, 3],
22 default='2',
23 help='number of components output of tsne')
24
25 args = parser.parse_args()
26
27 assert isfile(args.features)
28
29 features_list = read_features(args.features)
30 tuples_key_feat = np.vstack([ (key, feats) for key, feats in features_list.items()])
31 keys, features = zip(*tuples_key_feat)
32 feat_tsne = TSNE(n_components=args.n_comp).fit_transform(features)
33
34 with open(args.outfile, "w") as of:
35 for i in range(len(keys)):
36 of.write(keys[i] + " " + " ".join([str(feat) for feat in feat_tsne[i]]) + "\n")
37 print("TSNE finished. Check if everything has been done well.")
scripts/dimreduction/tsne.py
1 ''' File was deleted
2 The goal of this script is to display calculate tsne of pvectors.
3 '''
4
5 import os
6 from os.path import isfile
7 import argparse
8 import numpy as np
9 from sklearn.manifold import TSNE
10
11 from volia.data_io import read_features
12
13 if __name__ == "__main__":
14 # Defining argparse
15 parser = argparse.ArgumentParser(prog='pvector tsne', description='Calculate the tsne representation of pvector in 3 or 2d')
16 parser.add_argument('features', type=str,
17 help='the path of the file you want to calculate tsne')
18 parser.add_argument('-o', '--outfile', type=str,
19 default='.',
20 help='the path of the output file.')
21 parser.add_argument('-n', '--n-comp', type=int, choices=[2, 3],
22 default='2',
23 help='number of components output of tsne')
24
25 args = parser.parse_args()
26
27 assert isfile(args.features)
28
29 features_list = read_features(args.features)
30 tuples_key_feat = np.vstack([ (key, feats) for key, feats in features_list.items()])
31 keys, features = zip(*tuples_key_feat)
32 feat_tsne = TSNE(n_components=args.n_comp).fit_transform(features)
33
34 with open(args.outfile, "w") as of:
35 for i in range(len(keys)):
36 of.write(keys[i] + " " + " ".join([str(feat) for feat in feat_tsne[i]]) + "\n")
37 print("TSNE finished. Check if everything has been done well.")