Source code for mesmerize.analysis.clustering_metrics

from sklearn.metrics import pairwise_distances
from typing import Tuple, Optional, Union
import numpy as np
from ..common import get_sys_config


[docs]def get_centerlike(cluster_members: np.ndarray, metric: Optional[Union[str, callable]] = None, dist_matrix: Optional[np.ndarray] = None) -> Tuple[np.ndarray, int]: """ Finds the 1D time-series within a cluster that is the most centerlike :param cluster_members: 2D numpy array in the form [n_samples, 1D time_series] :param metric: Metric to use for pairwise distance calculation, simply passed to sklearn.metrics.pairwise_distances :param dist_matrix: Distance matrix of the cluster members :return: The cluster member which is most centerlike, and its index in the cluster_members array """ if dist_matrix is None: dist_matrix = pairwise_distances(cluster_members, metric=metric, n_jobs=get_sys_config()['_MESMERIZE_N_THREADS']) c_ix = np.argmin(np.sum(dist_matrix, axis=0)) c = cluster_members[c_ix, :] return c, c_ix
[docs]def get_cluster_radius(cluster_members: np.ndarray, metric: Optional[Union[str, callable]] = None, dist_matrix: Optional[np.ndarray] = None, centerlike_index: Optional[int] = None) -> float: """ Returns the cluster radius according to chosen distance metric :param cluster_members: 2D numpy array in the form [n_samples, 1D time_series] :param metric: Metric to use for pairwise distance calculation, simply passed to sklearn.metrics.pairwise_distances :param dist_matrix: Distance matrix of the cluster members :param centerlike_index: Index of the centerlike cluster member within the cluster_members array :return: The cluster radius, average between the most centerlike member and all other members """ if dist_matrix is None: dist_matrix = pairwise_distances(cluster_members, metric=metric, n_jobs=get_sys_config()['_MESMERIZE_N_THREADS']) if centerlike_index is None: c, c_ix = get_centerlike(cluster_members, dist_matrix) else: c_ix = centerlike_index ra = np.sum(dist_matrix[c_ix, :]) / (cluster_members.shape[0] - 1) return ra
[docs]def davies_bouldin_score(data: np.ndarray, cluster_labels: np.ndarray, metric: Union[str, callable]) -> float: """ Adopted from sklearn.metrics.davies_bouldin_score to use any distance metric :param data: Data that was used for clustering, [n_samples, 1D time_series] :param metric: Metric to use for pairwise distance calculation, simply passed to sklearn.metrics.pairwise_distances :param cluster_labels: Cluster labels :return: Davies Bouldin Score using EMD """ radii = np.zeros(cluster_labels.size) centroids = np.zeros((cluster_labels.size, data.shape[1])) for i, c in enumerate(np.unique(cluster_labels)): members = np.take(data, np.where(cluster_labels == c)[0], axis=0) dist_m = pairwise_distances(members, metric=metric, n_jobs=get_sys_config()['_MESMERIZE_N_THREADS']) centroids[i, :], c_ix = get_centerlike(members, dist_matrix=dist_m) radii[i] = get_cluster_radius(members, dist_matrix=dist_m, centerlike_index=c_ix) centroid_distances = pairwise_distances(centroids, metric=metric, n_jobs=get_sys_config()['_MESMERIZE_N_THREADS']) if np.allclose(radii, 0) or np.allclose(centroid_distances, 0): return 0.0 centroid_distances[centroid_distances == 0] = np.inf combined_intra_dists = radii[:, None] + radii scores = np.max(combined_intra_dists / centroid_distances, axis=1) return np.mean(scores)