Commit ef25886510b9b7e1e17f87b7039e624070f152bc

Authored by Mathias
1 parent cd9123115d
Exists in master

Filter the given file with a list. Only keep same keys and an error is thrown if…

… a key on the filter does not exist in the source file.

Showing 3 changed files with 60 additions and 37 deletions Side-by-side Diff

scripts/data-management/filter_ids.py
  1 +import argparse
  2 +from os.path import isfile
  3 +from volia.data_io import read_lst
  4 +
  5 +if __name__ == "__main__":
  6 + parser = argparse.ArgumentParser(description="Filter ids of the given file to only keep a subset")
  7 + parser.add_argument("file", type=str, help="")
  8 + parser.add_argument("--filter", default=None, type=str, help="")
  9 + parser.add_argument("--outfile", default="out.txt", type=str, help="")
  10 +
  11 + args = parser.parse_args()
  12 +
  13 + assert args.filter is not None
  14 + assert isfile(args.file)
  15 +
  16 + list_ = read_lst(args.file)
  17 + filter_ = read_lst(args.filter)
  18 +
  19 + with open(args.outfile, "w") as of:
  20 + for key in filter_.keys():
  21 + of.write(key + " " + " ".join(list_[key]) + "\n")
  22 +
  23 + print("File filtered and written in: ", args.outfile)
scripts/dim-reduction/tsne.py
  1 +'''
  2 +The goal of this script is to display calculate tsne of pvectors.
  3 +'''
  4 +
  5 +import os
  6 +from os.path import isfile
  7 +import argparse
  8 +import numpy as np
  9 +from sklearn.manifold import TSNE
  10 +
  11 +from volia.data_io import read_features
  12 +
  13 +if __name__ == "__main__":
  14 + # Defining argparse
  15 + parser = argparse.ArgumentParser(prog='pvector tsne', description='Calculate the tsne representation of pvector in 3 or 2d')
  16 + parser.add_argument('features', type=str,
  17 + help='the path of the file you want to calculate tsne')
  18 + parser.add_argument('-o', '--outfile', type=str,
  19 + default='.',
  20 + help='the path of the output file.')
  21 + parser.add_argument('-n', '--n-comp', type=int, choices=[2, 3],
  22 + default='2',
  23 + help='number of components output of tsne')
  24 +
  25 + args = parser.parse_args()
  26 +
  27 + assert isfile(args.features)
  28 +
  29 + features_list = read_features(args.features)
  30 + tuples_key_feat = np.vstack([ (key, feats) for key, feats in features_list.items()])
  31 + keys, features = zip(*tuples_key_feat)
  32 + feat_tsne = TSNE(n_components=args.n_comp).fit_transform(features)
  33 +
  34 + with open(args.outfile, "w") as of:
  35 + for i in range(len(keys)):
  36 + of.write(keys[i] + " " + " ".join([str(feat) for feat in feat_tsne[i]]) + "\n")
  37 + print("TSNE finished. Check if everything has been done well.")
scripts/dimreduction/tsne.py
1   -'''
2   -The goal of this script is to display calculate tsne of pvectors.
3   -'''
4   -
5   -import os
6   -from os.path import isfile
7   -import argparse
8   -import numpy as np
9   -from sklearn.manifold import TSNE
10   -
11   -from volia.data_io import read_features
12   -
13   -if __name__ == "__main__":
14   - # Defining argparse
15   - parser = argparse.ArgumentParser(prog='pvector tsne', description='Calculate the tsne representation of pvector in 3 or 2d')
16   - parser.add_argument('features', type=str,
17   - help='the path of the file you want to calculate tsne')
18   - parser.add_argument('-o', '--outfile', type=str,
19   - default='.',
20   - help='the path of the output file.')
21   - parser.add_argument('-n', '--n-comp', type=int, choices=[2, 3],
22   - default='2',
23   - help='number of components output of tsne')
24   -
25   - args = parser.parse_args()
26   -
27   - assert isfile(args.features)
28   -
29   - features_list = read_features(args.features)
30   - tuples_key_feat = np.vstack([ (key, feats) for key, feats in features_list.items()])
31   - keys, features = zip(*tuples_key_feat)
32   - feat_tsne = TSNE(n_components=args.n_comp).fit_transform(features)
33   -
34   - with open(args.outfile, "w") as of:
35   - for i in range(len(keys)):
36   - of.write(keys[i] + " " + " ".join([str(feat) for feat in feat_tsne[i]]) + "\n")
37   - print("TSNE finished. Check if everything has been done well.")