Blame view
bin/plot_character.py
2.14 KB
ac78b07ea All base bin file... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
''' Take a file and plot its data onto a 2d or 3d axis depending on the data. Automatic detection of the number of dimension. ''' import os import numpy as np from sklearn.cluster import KMeans import matplotlib.pyplot as plt import argparse import json import pandas as pd # Defining useful functions ''' Read the file whose content is metas and vectors. Returns two numpy array : (metas, vectors) ''' def read_vector_file(filename, toy_version=False): vectors = np.empty((0, 1), np.float32) metas = np.empty((0, 4), np.float32) with open(filename, "r") as f: for i, line in enumerate(f): if toy_version == True and i > 100: break spl_line = line.split(" ") if(len(vectors) == 0): vectors = np.empty((0, len(spl_line[1:])), np.float32) metas = np.append( metas, np.asarray([spl_line[0].split(",")]), axis=0) vectors = np.append( vectors, np.asarray([spl_line[1:]], dtype=np.float32), axis=0) return (metas, vectors) # Defining argparse parser = argparse.ArgumentParser(description='Plot a file of 2d ou 3d dimension') parser.add_argument('vectorfile', type=str, help='the path of the vectors file') parser.add_argument('-o-', '--output', type=str, default='plot.pdf', help='the path of the ploted file') parser.add_argument('-t', '--toy', action='store_true', help='test the script on a toy example. Do not test all the file content') args = parser.parse_args() # Editing global variable VECTORFILE_PATH=args.vectorfile OUTFILE_PATH = args.output TOY_VERSION = args.toy # Get Vectors metas, vectors = read_vector_file(VECTORFILE_PATH, toy_version = TOY_VERSION) vectors_T = np.transpose(vectors) print("Number of characters: " + str(len(np.unique(np.transpose(metas)[1])))) df = pd.DataFrame(dict( x=vectors_T[0], y=vectors_T[1], character=np.transpose(metas)[1] )) groups = df.groupby('character') # Plot fig, ax = plt.subplots() for character, group in groups: ax.plot(group.x, group.y, marker='o', linestyle='', ms=2, label=character) plt.savefig(OUTFILE_PATH) print("Your plot is saved well (no check of this affirmation)") |