Commit ef25886510b9b7e1e17f87b7039e624070f152bc
1 parent
cd9123115d
Exists in
master
Filter the given file with a list. Only keep same keys and an error is thrown if…
… a key on the filter does not exist in the source file.
Showing 3 changed files with 60 additions and 37 deletions Side-by-side Diff
scripts/data-management/filter_ids.py
1 | +import argparse | |
2 | +from os.path import isfile | |
3 | +from volia.data_io import read_lst | |
4 | + | |
5 | +if __name__ == "__main__": | |
6 | + parser = argparse.ArgumentParser(description="Filter ids of the given file to only keep a subset") | |
7 | + parser.add_argument("file", type=str, help="") | |
8 | + parser.add_argument("--filter", default=None, type=str, help="") | |
9 | + parser.add_argument("--outfile", default="out.txt", type=str, help="") | |
10 | + | |
11 | + args = parser.parse_args() | |
12 | + | |
13 | + assert args.filter is not None | |
14 | + assert isfile(args.file) | |
15 | + | |
16 | + list_ = read_lst(args.file) | |
17 | + filter_ = read_lst(args.filter) | |
18 | + | |
19 | + with open(args.outfile, "w") as of: | |
20 | + for key in filter_.keys(): | |
21 | + of.write(key + " " + " ".join(list_[key]) + "\n") | |
22 | + | |
23 | + print("File filtered and written in: ", args.outfile) |
scripts/dim-reduction/tsne.py
1 | +''' | |
2 | +The goal of this script is to display calculate tsne of pvectors. | |
3 | +''' | |
4 | + | |
5 | +import os | |
6 | +from os.path import isfile | |
7 | +import argparse | |
8 | +import numpy as np | |
9 | +from sklearn.manifold import TSNE | |
10 | + | |
11 | +from volia.data_io import read_features | |
12 | + | |
13 | +if __name__ == "__main__": | |
14 | + # Defining argparse | |
15 | + parser = argparse.ArgumentParser(prog='pvector tsne', description='Calculate the tsne representation of pvector in 3 or 2d') | |
16 | + parser.add_argument('features', type=str, | |
17 | + help='the path of the file you want to calculate tsne') | |
18 | + parser.add_argument('-o', '--outfile', type=str, | |
19 | + default='.', | |
20 | + help='the path of the output file.') | |
21 | + parser.add_argument('-n', '--n-comp', type=int, choices=[2, 3], | |
22 | + default='2', | |
23 | + help='number of components output of tsne') | |
24 | + | |
25 | + args = parser.parse_args() | |
26 | + | |
27 | + assert isfile(args.features) | |
28 | + | |
29 | + features_list = read_features(args.features) | |
30 | + tuples_key_feat = np.vstack([ (key, feats) for key, feats in features_list.items()]) | |
31 | + keys, features = zip(*tuples_key_feat) | |
32 | + feat_tsne = TSNE(n_components=args.n_comp).fit_transform(features) | |
33 | + | |
34 | + with open(args.outfile, "w") as of: | |
35 | + for i in range(len(keys)): | |
36 | + of.write(keys[i] + " " + " ".join([str(feat) for feat in feat_tsne[i]]) + "\n") | |
37 | + print("TSNE finished. Check if everything has been done well.") |
scripts/dimreduction/tsne.py
1 | -''' | |
2 | -The goal of this script is to display calculate tsne of pvectors. | |
3 | -''' | |
4 | - | |
5 | -import os | |
6 | -from os.path import isfile | |
7 | -import argparse | |
8 | -import numpy as np | |
9 | -from sklearn.manifold import TSNE | |
10 | - | |
11 | -from volia.data_io import read_features | |
12 | - | |
13 | -if __name__ == "__main__": | |
14 | - # Defining argparse | |
15 | - parser = argparse.ArgumentParser(prog='pvector tsne', description='Calculate the tsne representation of pvector in 3 or 2d') | |
16 | - parser.add_argument('features', type=str, | |
17 | - help='the path of the file you want to calculate tsne') | |
18 | - parser.add_argument('-o', '--outfile', type=str, | |
19 | - default='.', | |
20 | - help='the path of the output file.') | |
21 | - parser.add_argument('-n', '--n-comp', type=int, choices=[2, 3], | |
22 | - default='2', | |
23 | - help='number of components output of tsne') | |
24 | - | |
25 | - args = parser.parse_args() | |
26 | - | |
27 | - assert isfile(args.features) | |
28 | - | |
29 | - features_list = read_features(args.features) | |
30 | - tuples_key_feat = np.vstack([ (key, feats) for key, feats in features_list.items()]) | |
31 | - keys, features = zip(*tuples_key_feat) | |
32 | - feat_tsne = TSNE(n_components=args.n_comp).fit_transform(features) | |
33 | - | |
34 | - with open(args.outfile, "w") as of: | |
35 | - for i in range(len(keys)): | |
36 | - of.write(keys[i] + " " + " ".join([str(feat) for feat in feat_tsne[i]]) + "\n") | |
37 | - print("TSNE finished. Check if everything has been done well.") |