# tests for hdbscan-0.8.37-py312h9a8786e_0 (this is a generated file); print('===== testing package: hdbscan-0.8.37-py312h9a8786e_0 ====='); print('running run_test.py'); # --- run_test.py (begin) --- # -*- coding: utf-8 -*- """ =================================== Demo of HDBSCAN clustering algorithm =================================== Finds a clustering that has the greatest stability over a range of epsilon values for standard DBSCAN. This allows clusterings of different densities unlike DBSCAN. """ print(__doc__) import numpy as np from hdbscan import HDBSCAN from sklearn.cluster import DBSCAN from sklearn import metrics from sklearn.datasets import make_blobs from sklearn.preprocessing import StandardScaler import time def make_var_density_blobs(n_samples=750, centers=[[0,0]], cluster_std=[0.5], random_state=0): samples_per_blob = n_samples // len(centers) blobs = [make_blobs(n_samples=samples_per_blob, centers=[c], cluster_std=cluster_std[i])[0] for i, c in enumerate(centers)] labels = [i * np.ones(samples_per_blob) for i in range(len(centers))] return np.vstack(blobs), np.hstack(labels) ############################################################################## # Generate sample data centers = [[1, 1], [-1, -1], [1, -1]] densities = [0.2, 0.35, 0.5] X, labels_true = make_var_density_blobs(n_samples=750, centers=centers, cluster_std=densities, random_state=0) X = StandardScaler().fit_transform(X) ############################################################################## # Compute DBSCAN hdb_t1 = time.time() hdb = HDBSCAN(min_cluster_size=10).fit(X) hdb_labels = hdb.labels_ hdb_elapsed_time = time.time() - hdb_t1 db_t1 = time.time() db = DBSCAN(eps=0.1).fit(X) db_labels = db.labels_ db_elapsed_time = time.time() - db_t1 # Number of clusters in labels, ignoring noise if present. n_clusters_hdb_ = len(set(hdb_labels)) - (1 if -1 in hdb_labels else 0) print('\n\n++ HDBSCAN Results') print('Estimated number of clusters: %d' % n_clusters_hdb_) print('Elapsed time to cluster: %.4f s' % hdb_elapsed_time) print('Homogeneity: %0.3f' % metrics.homogeneity_score(labels_true, hdb_labels)) print('Completeness: %0.3f' % metrics.completeness_score(labels_true, hdb_labels)) print('V-measure: %0.3f' % metrics.v_measure_score(labels_true, hdb_labels)) print('Adjusted Rand Index: %0.3f' % metrics.adjusted_rand_score(labels_true, hdb_labels)) print('Adjusted Mutual Information: %0.3f' % metrics.adjusted_mutual_info_score(labels_true, hdb_labels)) print('Silhouette Coefficient: %0.3f' % metrics.silhouette_score(X, hdb_labels)) n_clusters_db_ = len(set(db_labels)) - (1 if -1 in db_labels else 0) print('\n\n++ DBSCAN Results') print('Estimated number of clusters: %d' % n_clusters_db_) print('Elapsed time to cluster: %.4f s' % db_elapsed_time) print('Homogeneity: %0.3f' % metrics.homogeneity_score(labels_true, db_labels)) print('Completeness: %0.3f' % metrics.completeness_score(labels_true, db_labels)) print('V-measure: %0.3f' % metrics.v_measure_score(labels_true, db_labels)) print('Adjusted Rand Index: %0.3f' % metrics.adjusted_rand_score(labels_true, db_labels)) print('Adjusted Mutual Information: %0.3f' % metrics.adjusted_mutual_info_score(labels_true, db_labels)) if n_clusters_db_ > 1: print('Silhouette Coefficient: %0.3f' % metrics.silhouette_score(X, db_labels)) else: print('Silhouette Coefficient: NaN (too few clusters)') # --- run_test.py (end) --- print('===== hdbscan-0.8.37-py312h9a8786e_0 OK ====='); print("import: 'hdbscan'") import hdbscan