Commit ef25886510b9b7e1e17f87b7039e624070f152bc
1 parent
cd9123115d
Exists in
master
Filter the given file with a list. Only keep same keys and an error is thrown if…
… a key on the filter does not exist in the source file.
Showing 3 changed files with 60 additions and 37 deletions Side-by-side Diff
scripts/data-management/filter_ids.py
| 1 | +import argparse | |
| 2 | +from os.path import isfile | |
| 3 | +from volia.data_io import read_lst | |
| 4 | + | |
| 5 | +if __name__ == "__main__": | |
| 6 | + parser = argparse.ArgumentParser(description="Filter ids of the given file to only keep a subset") | |
| 7 | + parser.add_argument("file", type=str, help="") | |
| 8 | + parser.add_argument("--filter", default=None, type=str, help="") | |
| 9 | + parser.add_argument("--outfile", default="out.txt", type=str, help="") | |
| 10 | + | |
| 11 | + args = parser.parse_args() | |
| 12 | + | |
| 13 | + assert args.filter is not None | |
| 14 | + assert isfile(args.file) | |
| 15 | + | |
| 16 | + list_ = read_lst(args.file) | |
| 17 | + filter_ = read_lst(args.filter) | |
| 18 | + | |
| 19 | + with open(args.outfile, "w") as of: | |
| 20 | + for key in filter_.keys(): | |
| 21 | + of.write(key + " " + " ".join(list_[key]) + "\n") | |
| 22 | + | |
| 23 | + print("File filtered and written in: ", args.outfile) |
scripts/dim-reduction/tsne.py
| 1 | +''' | |
| 2 | +The goal of this script is to display calculate tsne of pvectors. | |
| 3 | +''' | |
| 4 | + | |
| 5 | +import os | |
| 6 | +from os.path import isfile | |
| 7 | +import argparse | |
| 8 | +import numpy as np | |
| 9 | +from sklearn.manifold import TSNE | |
| 10 | + | |
| 11 | +from volia.data_io import read_features | |
| 12 | + | |
| 13 | +if __name__ == "__main__": | |
| 14 | + # Defining argparse | |
| 15 | + parser = argparse.ArgumentParser(prog='pvector tsne', description='Calculate the tsne representation of pvector in 3 or 2d') | |
| 16 | + parser.add_argument('features', type=str, | |
| 17 | + help='the path of the file you want to calculate tsne') | |
| 18 | + parser.add_argument('-o', '--outfile', type=str, | |
| 19 | + default='.', | |
| 20 | + help='the path of the output file.') | |
| 21 | + parser.add_argument('-n', '--n-comp', type=int, choices=[2, 3], | |
| 22 | + default='2', | |
| 23 | + help='number of components output of tsne') | |
| 24 | + | |
| 25 | + args = parser.parse_args() | |
| 26 | + | |
| 27 | + assert isfile(args.features) | |
| 28 | + | |
| 29 | + features_list = read_features(args.features) | |
| 30 | + tuples_key_feat = np.vstack([ (key, feats) for key, feats in features_list.items()]) | |
| 31 | + keys, features = zip(*tuples_key_feat) | |
| 32 | + feat_tsne = TSNE(n_components=args.n_comp).fit_transform(features) | |
| 33 | + | |
| 34 | + with open(args.outfile, "w") as of: | |
| 35 | + for i in range(len(keys)): | |
| 36 | + of.write(keys[i] + " " + " ".join([str(feat) for feat in feat_tsne[i]]) + "\n") | |
| 37 | + print("TSNE finished. Check if everything has been done well.") |
scripts/dimreduction/tsne.py
| 1 | -''' | |
| 2 | -The goal of this script is to display calculate tsne of pvectors. | |
| 3 | -''' | |
| 4 | - | |
| 5 | -import os | |
| 6 | -from os.path import isfile | |
| 7 | -import argparse | |
| 8 | -import numpy as np | |
| 9 | -from sklearn.manifold import TSNE | |
| 10 | - | |
| 11 | -from volia.data_io import read_features | |
| 12 | - | |
| 13 | -if __name__ == "__main__": | |
| 14 | - # Defining argparse | |
| 15 | - parser = argparse.ArgumentParser(prog='pvector tsne', description='Calculate the tsne representation of pvector in 3 or 2d') | |
| 16 | - parser.add_argument('features', type=str, | |
| 17 | - help='the path of the file you want to calculate tsne') | |
| 18 | - parser.add_argument('-o', '--outfile', type=str, | |
| 19 | - default='.', | |
| 20 | - help='the path of the output file.') | |
| 21 | - parser.add_argument('-n', '--n-comp', type=int, choices=[2, 3], | |
| 22 | - default='2', | |
| 23 | - help='number of components output of tsne') | |
| 24 | - | |
| 25 | - args = parser.parse_args() | |
| 26 | - | |
| 27 | - assert isfile(args.features) | |
| 28 | - | |
| 29 | - features_list = read_features(args.features) | |
| 30 | - tuples_key_feat = np.vstack([ (key, feats) for key, feats in features_list.items()]) | |
| 31 | - keys, features = zip(*tuples_key_feat) | |
| 32 | - feat_tsne = TSNE(n_components=args.n_comp).fit_transform(features) | |
| 33 | - | |
| 34 | - with open(args.outfile, "w") as of: | |
| 35 | - for i in range(len(keys)): | |
| 36 | - of.write(keys[i] + " " + " ".join([str(feat) for feat in feat_tsne[i]]) + "\n") | |
| 37 | - print("TSNE finished. Check if everything has been done well.") |