diff --git a/bin/plot_clusters.py b/bin/plot_clusters.py index 8542e2f..833f73f 100644 --- a/bin/plot_clusters.py +++ b/bin/plot_clusters.py @@ -55,7 +55,7 @@ def check_files(vector_file, cluster_file): - +from data import read_file, index_by_id # Defining argparse parser = argparse.ArgumentParser(prog='Plotter', description='Plot a file of 2d ou 3d dimension') @@ -66,8 +66,6 @@ parser.add_argument('vectorfile', type=str, parser.add_argument('-o-', '--output', type=str, default='plot.pdf', help='the path of the ploted file') -parser.add_argument('-t', '--toy', action='store_true', - help='test the script on a toy example. Do not test all the file content') args = parser.parse_args() @@ -75,12 +73,24 @@ args = parser.parse_args() CLUSTERFILE_PATH=args.clusterfile VECTORFILE_PATH=args.vectorfile OUTFILE_PATH = args.output -TOY_VERSION = args.toy -if check_files(VECTORFILE_PATH, CLUSTERFILE_PATH) == False: - print("Les fichiers ne sont pas dans le meme ordre. Dans une version futur, cela générera une exception. On stop le processus.") - exit(1) +data_vector = read_file(VECTORFILE_PATH) +features = np.asarray([x[1] for x in data_vector]) +features_T = np.transpose(features) + +data_cluster = read_file(CLUSTERFILE_PATH) +data_cluster_ind = index_by_id(data_cluster) +clusters = [data_cluster_ind[x[0][0]][x[0][3]][0][1] for x in data_vector] + +# TODO: compute tsne file +# TODO: adapt the script for the new library +df = pd.DataFrame(dict( + x=features_T[0], + y=features_T[1], + cluster=np.transpose(clusters)[0] + )) +exit(1) # Get Vectors metas, vectors = read_vector_file(VECTORFILE_PATH, toy_version = TOY_VERSION) vectors_T = np.transpose(vectors) @@ -90,11 +100,6 @@ metas, clusters = read_vector_file(CLUSTERFILE_PATH, toy_version = TOY_VERSION) #print(np.transpose(clusters)[0]) #print(np.transpose(metas)[0]) -df = pd.DataFrame(dict( - x=vectors_T[0], - y=vectors_T[1], - cluster=np.transpose(clusters)[0] - )) groups = df.groupby('cluster')