diff --git a/scripts/evaluations/clustering.py b/scripts/evaluations/clustering.py index 8d509b3..ead07be 100644 --- a/scripts/evaluations/clustering.py +++ b/scripts/evaluations/clustering.py @@ -18,7 +18,7 @@ from sklearn.metrics import f1_score, homogeneity_score, completeness_score, v_m import matplotlib.pyplot as plt from volia.data_io import read_features,read_lst -from volia.measures import entropy_score +from volia.measures import entropy_score, purity_score ''' TODO: @@ -77,6 +77,12 @@ def train_clustering(label_encoder, feats, classes, outdir): # V-Measure v_measure = v_measure_score(labels, predicted_labels) + # Purity + purity_scores = purity_score(labels, predicted_labels) + purity_class_score = purity_scores["purity_class_score"] + purity_cluster_score = purity_scores["purity_cluster_score"] + K = purity_scores["K"] + # Write results with open(os.path.join(outdir, f"_" + args.prefix + "eval_clustering.log"), "w") as fd: print(f"F1-scores for each classes:\n{fscores_str}", file=fd) @@ -85,6 +91,9 @@ def train_clustering(label_encoder, feats, classes, outdir): print(f"Homogeneity: {homogeneity}", file=fd) print(f"completeness: {completeness}", file=fd) print(f"v-measure: {v_measure}", file=fd) + print(f"purity class score: {purity_class_score}", file=fd) + print(f"purity cluster score: {purity_cluster_score}", file=fd) + print(f"purity overall evaluation criterion (K): {K}", file=fd) # Process t-SNE and plot tsne_estimator = TSNE() @@ -128,9 +137,13 @@ def train_clustering(label_encoder, feats, classes, outdir): "entropy": entropy, "homogeneity": homogeneity, "completeness": completeness, - "v-measure": v_measure + "v-measure": v_measure, + "purity_class_score": purity_class_score, + "purity_cluster score": purity_cluster_score, + "K": K } + if __name__ == "__main__": # Argparse parser = argparse.ArgumentParser("Compute clustering on a latent space")