Commit ef25886510b9b7e1e17f87b7039e624070f152bc
1 parent
cd9123115d
Exists in
master
Filter the given file with a list. Only keep same keys and an error is thrown if…
… a key on the filter does not exist in the source file.
Showing 3 changed files with 60 additions and 37 deletions Inline Diff
scripts/data-management/filter_ids.py
File was created | 1 | import argparse | |
2 | from os.path import isfile | ||
3 | from volia.data_io import read_lst | ||
4 | |||
5 | if __name__ == "__main__": | ||
6 | parser = argparse.ArgumentParser(description="Filter ids of the given file to only keep a subset") | ||
7 | parser.add_argument("file", type=str, help="") | ||
8 | parser.add_argument("--filter", default=None, type=str, help="") | ||
9 | parser.add_argument("--outfile", default="out.txt", type=str, help="") | ||
10 | |||
11 | args = parser.parse_args() | ||
12 | |||
13 | assert args.filter is not None | ||
14 | assert isfile(args.file) | ||
15 | |||
16 | list_ = read_lst(args.file) | ||
17 | filter_ = read_lst(args.filter) | ||
18 | |||
19 | with open(args.outfile, "w") as of: | ||
20 | for key in filter_.keys(): | ||
21 | of.write(key + " " + " ".join(list_[key]) + "\n") | ||
22 | |||
23 | print("File filtered and written in: ", args.outfile) |
scripts/dim-reduction/tsne.py
File was created | 1 | ''' | |
2 | The goal of this script is to display calculate tsne of pvectors. | ||
3 | ''' | ||
4 | |||
5 | import os | ||
6 | from os.path import isfile | ||
7 | import argparse | ||
8 | import numpy as np | ||
9 | from sklearn.manifold import TSNE | ||
10 | |||
11 | from volia.data_io import read_features | ||
12 | |||
13 | if __name__ == "__main__": | ||
14 | # Defining argparse | ||
15 | parser = argparse.ArgumentParser(prog='pvector tsne', description='Calculate the tsne representation of pvector in 3 or 2d') | ||
16 | parser.add_argument('features', type=str, | ||
17 | help='the path of the file you want to calculate tsne') | ||
18 | parser.add_argument('-o', '--outfile', type=str, | ||
19 | default='.', | ||
20 | help='the path of the output file.') | ||
21 | parser.add_argument('-n', '--n-comp', type=int, choices=[2, 3], | ||
22 | default='2', | ||
23 | help='number of components output of tsne') | ||
24 | |||
25 | args = parser.parse_args() | ||
26 | |||
27 | assert isfile(args.features) | ||
28 | |||
29 | features_list = read_features(args.features) | ||
30 | tuples_key_feat = np.vstack([ (key, feats) for key, feats in features_list.items()]) | ||
31 | keys, features = zip(*tuples_key_feat) | ||
32 | feat_tsne = TSNE(n_components=args.n_comp).fit_transform(features) | ||
33 | |||
34 | with open(args.outfile, "w") as of: | ||
35 | for i in range(len(keys)): | ||
36 | of.write(keys[i] + " " + " ".join([str(feat) for feat in feat_tsne[i]]) + "\n") | ||
37 | print("TSNE finished. Check if everything has been done well.") |
scripts/dimreduction/tsne.py
1 | ''' | File was deleted | |
2 | The goal of this script is to display calculate tsne of pvectors. | ||
3 | ''' | ||
4 | |||
5 | import os | ||
6 | from os.path import isfile | ||
7 | import argparse | ||
8 | import numpy as np | ||
9 | from sklearn.manifold import TSNE | ||
10 | |||
11 | from volia.data_io import read_features | ||
12 | |||
13 | if __name__ == "__main__": | ||
14 | # Defining argparse | ||
15 | parser = argparse.ArgumentParser(prog='pvector tsne', description='Calculate the tsne representation of pvector in 3 or 2d') | ||
16 | parser.add_argument('features', type=str, | ||
17 | help='the path of the file you want to calculate tsne') | ||
18 | parser.add_argument('-o', '--outfile', type=str, | ||
19 | default='.', | ||
20 | help='the path of the output file.') | ||
21 | parser.add_argument('-n', '--n-comp', type=int, choices=[2, 3], | ||
22 | default='2', | ||
23 | help='number of components output of tsne') | ||
24 | |||
25 | args = parser.parse_args() | ||
26 | |||
27 | assert isfile(args.features) | ||
28 | |||
29 | features_list = read_features(args.features) | ||
30 | tuples_key_feat = np.vstack([ (key, feats) for key, feats in features_list.items()]) | ||
31 | keys, features = zip(*tuples_key_feat) | ||
32 | feat_tsne = TSNE(n_components=args.n_comp).fit_transform(features) | ||
33 | |||
34 | with open(args.outfile, "w") as of: | ||
35 | for i in range(len(keys)): | ||
36 | of.write(keys[i] + " " + " ".join([str(feat) for feat in feat_tsne[i]]) + "\n") | ||
37 | print("TSNE finished. Check if everything has been done well.") |