diff --git a/aeon/clustering/feature_based/__init__.py b/aeon/clustering/feature_based/__init__.py index 6eeb19ebcf..7aa9214884 100644 --- a/aeon/clustering/feature_based/__init__.py +++ b/aeon/clustering/feature_based/__init__.py @@ -8,8 +8,10 @@ "Catch22Clusterer", "SummaryClusterer", "TSFreshClusterer", + "RClusterer", ] from aeon.clustering.feature_based._catch22 import Catch22Clusterer +from aeon.clustering.feature_based._r_cluster import RClusterer from aeon.clustering.feature_based._summary import SummaryClusterer from aeon.clustering.feature_based._tsfresh import TSFreshClusterer diff --git a/aeon/clustering/feature_based/_r_cluster.py b/aeon/clustering/feature_based/_r_cluster.py new file mode 100644 index 0000000000..6b70515b16 --- /dev/null +++ b/aeon/clustering/feature_based/_r_cluster.py @@ -0,0 +1,525 @@ +"""Time series RClusterer.""" + +__maintainer__ = ["Ramana-Raja"] +__all__ = ["RClusterer"] + +import numpy as np +from numba import get_num_threads, set_num_threads +from sklearn.cluster import KMeans +from sklearn.decomposition import PCA +from sklearn.preprocessing import StandardScaler + +from aeon.clustering.base import BaseClusterer +from aeon.transformations.collection.convolution_based._minirocket import ( + _fit_biases, + _fit_dilations, + _quantiles, + _static_transform_uni, +) + + +class RClusterer(BaseClusterer): + """Implementation of Time Series R Cluster. + + Adapted from the implementation used in [1]_ + + Parameters + ---------- + num_kernels : int , default = 84 + The number of convolutional kernels used to transform the input time series + These kernels are fixed and pre-defined (not random) and are + optimized for computational speed and + feature diversity + max_dilations_per_kernel : int , default = 32 + The maximum number of dilation rates applied to each kernel + Dilations control the spacing of the kernel's receptive field + over the time series,capturing patterns at varying scales + num_cluster : int , default = 8 + The number of clusters used + num_features : int, default=500 + Number of features need for fit_dilations method. + n_init : int, default=10 + Number of times the R-Cluster algorithm will be run with different + centroid seeds. The final result will be the best output of n_init + consecutive runs in terms of inertia. + random_state : int, Random state or None, default=None + n_jobs : int, default=1 + The number of jobs to run in parallel for `transform`. ``-1`` + means using all + processors. + + Notes + ----- + Adapted from the implementation from source code + https://github.com/jorgemarcoes/R-Clustering/blob/main/R_Clustering_on_UCR_Archive.ipynb + + References + ---------- + .. [1] Time series clustering with random convolutional kernels + https://link.springer.com/article/10.1007/s10618-024-01018-x + """ + + _tags = { + "capability:multivariate": False, + "capability:multithreading": True, + "capability:unequal_length": False, + "capability:missing_values": False, + } + + def __init__( + self, + n_kernels=84, + max_dilations_per_kernel=32, + n_clusters=8, + n_init=10, + num_features=500, + random_state=None, + n_jobs=1, + ): + + self.n_kernels = n_kernels + self.max_dilations_per_kernel = max_dilations_per_kernel + self.n_clusters = n_clusters + self.num_features = num_features + self.n_init = n_init + self.random_state = random_state + self.n_jobs = n_jobs + super().__init__() + + def _get_parameterised_data(self, X): + """ + Generate parameters for transformation. + + This method prepares the required parameters for transforming + time-series data using MiniRocket's _fit_biases. + + Parameters + ---------- + X : np.ndarray + Input data of shape (n_samples, n_channels, n_timepoints). + + Returns + ------- + tuple + Contains processed parameters including dilations, features, and biases. + """ + random_state = np.random.RandomState(self.random_state) + X = X.astype(np.float32) + + _, n_channels, n_timepoints = X.shape + + dilations, num_features_per_dilation = _fit_dilations( + n_timepoints, self.num_features, self.max_dilations_per_kernel + ) + + num_features_per_kernel = np.sum(num_features_per_dilation) + + quantiles = _quantiles(self.n_kernels * num_features_per_kernel) + + quantiles = random_state.permutation(quantiles) + + n_dilations = len(dilations) + n_combinations = self.n_kernels * n_dilations + max_n_channels = min(n_channels, 9) + max_exponent = np.log2(max_n_channels + 1) + n_channels_per_combination = ( + 2 ** np.random.uniform(0, max_exponent, n_combinations) + ).astype(np.int32) + channel_indices = np.zeros(n_channels_per_combination.sum(), dtype=np.int32) + n_channels_start = 0 + for combination_index in range(n_combinations): + n_channels_this_combination = n_channels_per_combination[combination_index] + n_channels_end = n_channels_start + n_channels_this_combination + channel_indices[n_channels_start:n_channels_end] = np.random.choice( + n_channels, n_channels_this_combination, replace=False + ) + n_channels_start = n_channels_end + biases = _fit_biases( + X, + n_channels_per_combination, + channel_indices, + dilations, + num_features_per_dilation, + quantiles, + self.indices, + self.random_state, + ) + + return ( + np.array([_], dtype=np.int32), + np.array([_], dtype=np.int32), + dilations, + num_features_per_dilation, + biases, + ) + + def _get_transformed_data(self, X, parameters): + """ + Transform input data using extracted parameters. + + Parameters + ---------- + X : np.ndarray + Input data. + parameters : tuple + Precomputed parameters for transformation. + + Returns + ------- + np.ndarray + Transformed data. + """ + prev_threads = get_num_threads() + X = X.squeeze(1).astype(np.float32) + X_ = _static_transform_uni(X, parameters, self.indices) + set_num_threads(prev_threads) + return X_ + + def _fit(self, X, y=None): + """ + Fit the clustering model. + + Parameters + ---------- + X : np.ndarray + Input data. + y : None + Ignored. + """ + self.indices = _get_indices() + + prev_threads = get_num_threads() + set_num_threads(self.n_jobs) + + self.parameters = self._get_parameterised_data(X) + + transformed_data = self._get_transformed_data(X=X, parameters=self.parameters) + + set_num_threads(prev_threads) + + self._scaler = StandardScaler() + X_std = self._scaler.fit_transform(transformed_data) + + pca = PCA().fit(X_std) + optimal_dimensions = np.argmax(pca.explained_variance_ratio_ < 0.01) + + self._pca = PCA(n_components=optimal_dimensions, random_state=self.random_state) + self._pca.fit(X_std) + transformed_data_pca = self._pca.transform(X_std) + + self._estimator = KMeans( + n_clusters=self.n_clusters, + random_state=self.random_state, + n_init=self.n_init, + ) + self._estimator.fit(transformed_data_pca) + self.labels_ = self._estimator.labels_ + + def _predict(self, X, y=None) -> np.ndarray: + """ + Predict cluster labels for the input data. + + Parameters + ---------- + X : np.ndarray + Input data. + y : None + Ignored. + + Returns + ------- + labels : np.ndarray + Array of cluster labels for each time series. + """ + prev_threads = get_num_threads() + set_num_threads(self.n_jobs) + + transformed_data = self._get_transformed_data(X=X, parameters=self.parameters) + + set_num_threads(prev_threads) + + X_std = self._scaler.transform(transformed_data) + transformed_data_pca = self._pca.transform(X_std) + + return self._estimator.predict(transformed_data_pca) + + @classmethod + def _get_test_params(cls, parameter_set="default") -> dict: + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict, default={} + Parameters to create testing instances of the class + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + """ + return { + "n_clusters": 2, + } + + +def _get_indices(): + return np.array( + ( + 1, + 3, + 6, + 1, + 2, + 7, + 1, + 2, + 3, + 0, + 2, + 3, + 1, + 4, + 5, + 0, + 1, + 3, + 3, + 5, + 6, + 0, + 1, + 2, + 2, + 5, + 8, + 1, + 3, + 7, + 0, + 1, + 8, + 4, + 6, + 7, + 0, + 1, + 4, + 3, + 4, + 6, + 0, + 4, + 5, + 2, + 6, + 7, + 5, + 6, + 7, + 0, + 1, + 6, + 4, + 5, + 7, + 4, + 7, + 8, + 1, + 6, + 8, + 0, + 2, + 6, + 5, + 6, + 8, + 2, + 5, + 7, + 0, + 1, + 7, + 0, + 7, + 8, + 0, + 3, + 5, + 0, + 3, + 7, + 2, + 3, + 8, + 2, + 3, + 4, + 1, + 4, + 6, + 3, + 4, + 5, + 0, + 3, + 8, + 4, + 5, + 8, + 0, + 4, + 6, + 1, + 4, + 8, + 6, + 7, + 8, + 4, + 6, + 8, + 0, + 3, + 4, + 1, + 3, + 4, + 1, + 5, + 7, + 1, + 4, + 7, + 1, + 2, + 8, + 0, + 6, + 7, + 1, + 6, + 7, + 1, + 3, + 5, + 0, + 1, + 5, + 0, + 4, + 8, + 4, + 5, + 6, + 0, + 2, + 5, + 3, + 5, + 7, + 0, + 2, + 4, + 2, + 6, + 8, + 2, + 3, + 7, + 2, + 5, + 6, + 2, + 4, + 8, + 0, + 2, + 7, + 3, + 6, + 8, + 2, + 3, + 6, + 3, + 7, + 8, + 0, + 5, + 8, + 1, + 2, + 6, + 2, + 3, + 5, + 1, + 5, + 8, + 3, + 6, + 7, + 3, + 4, + 7, + 0, + 4, + 7, + 3, + 5, + 8, + 2, + 4, + 5, + 1, + 2, + 5, + 2, + 7, + 8, + 2, + 4, + 6, + 0, + 5, + 6, + 3, + 4, + 8, + 0, + 6, + 8, + 2, + 4, + 7, + 0, + 2, + 8, + 0, + 3, + 6, + 5, + 7, + 8, + 1, + 5, + 6, + 1, + 2, + 4, + 0, + 5, + 7, + 1, + 3, + 8, + 1, + 7, + 8, + ), + dtype=np.int32, + ).reshape(84, 3) diff --git a/aeon/clustering/feature_based/tests/__init__.py b/aeon/clustering/feature_based/tests/__init__.py new file mode 100644 index 0000000000..d6e8de2247 --- /dev/null +++ b/aeon/clustering/feature_based/tests/__init__.py @@ -0,0 +1 @@ +"""Feature Based learning clustering tests.""" diff --git a/aeon/clustering/feature_based/tests/test_r_cluster.py b/aeon/clustering/feature_based/tests/test_r_cluster.py new file mode 100644 index 0000000000..8d81397c98 --- /dev/null +++ b/aeon/clustering/feature_based/tests/test_r_cluster.py @@ -0,0 +1,177 @@ +"""Test For RCluster.""" + +import numpy as np +from sklearn import metrics + +from aeon.clustering.feature_based._r_cluster import RClusterer +from aeon.datasets import load_gunpoint + +X_ = [ + [ + 1.5980065, + 1.5994389, + 1.5705293, + 1.5504735, + 1.507371, + 1.4343414, + 1.3689859, + 1.3052934, + 1.2103053, + 1.1166533, + ], + [ + 1.7011456, + 1.670645, + 1.6188844, + 1.5468045, + 1.4754685, + 1.3912091, + 1.3058823, + 1.237313, + 1.1534138, + 1.0696899, + ], + [ + 1.722342, + 1.6953288, + 1.656946, + 1.6063123, + 1.5118241, + 1.4141477, + 1.3136877, + 1.2132338, + 1.1129779, + 1.0150805, + ], + [ + 1.7262632, + 1.659836, + 1.5731083, + 1.4962643, + 1.4090704, + 1.3324426, + 1.2457422, + 1.1588819, + 1.0733612, + 0.9871649, + ], + [ + 1.7789757, + 1.7612025, + 1.7030841, + 1.610572, + 1.4920881, + 1.3686543, + 1.2447608, + 1.1209, + 1.0107619, + 0.9001682, + ], + [ + 1.7996215, + 1.7427012, + 1.6864861, + 1.6326717, + 1.5324101, + 1.4225861, + 1.3113219, + 1.2012383, + 1.0899248, + 0.9785759, + ], + [ + 1.7490938, + 1.7266423, + 1.6593817, + 1.5595723, + 1.4572895, + 1.355191, + 1.2521086, + 1.1618543, + 1.0623266, + 0.9609945, + ], + [ + 1.3476895, + 1.2373582, + 1.1288056, + 1.0218658, + 0.9392247, + 0.84710395, + 0.75024295, + 0.65884495, + 0.56604975, + 0.4741342, + ], + [ + 1.6956215, + 1.633777, + 1.5959885, + 1.5069915, + 1.4142802, + 1.3230939, + 1.2419277, + 1.1857506, + 1.1216865, + 1.0483568, + ], + [ + 1.722719, + 1.7132868, + 1.6652519, + 1.586769, + 1.4954436, + 1.4038439, + 1.3122748, + 1.2204062, + 1.1295636, + 1.0408053, + ], +] +Y = ["22", "28", "21", "15", "2", "18", "21", "36", "11", "21"] + + +def test_r_cluster_custom_dataset(): + """Test implementation of RCluster.""" + X_train = np.array(X_) + X = np.expand_dims(X_train, axis=1) + Rcluster = RClusterer(n_clusters=8, n_init=10, random_state=1) + labels_pred1 = Rcluster.fit_predict(X) + score = metrics.adjusted_rand_score(labels_true=Y, labels_pred=labels_pred1) + assert score > 0.36 + + +def test_r_cluster_dataset(): + """Test implementation of RCluster using aeon dataset.""" + X_train, y_train = load_gunpoint(split="train") + X_test, y_test = load_gunpoint(split="test") + num_points = 20 + + X_train = X_train[:num_points] + y_train = y_train[:num_points] + X_test = X_test[:num_points] + y_test = y_test[:num_points] + + rcluster = RClusterer( + random_state=1, + n_init=2, + n_clusters=2, + ) + train_result = rcluster.fit_predict(X_train) + train_score = metrics.rand_score(y_train, train_result) + test_result = rcluster.predict(X_test) + test_score = metrics.rand_score(y_test, test_result) + assert np.array_equal( + test_result, + [1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + ) + assert np.array_equal( + train_result, + [1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + ) + assert test_score == 0.5210526315789473 + assert train_score == 0.5210526315789473 + assert rcluster._estimator.n_iter_ == 3 + assert np.array_equal( + rcluster.labels_, [1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0] + ) diff --git a/docs/api_reference/clustering.rst b/docs/api_reference/clustering.rst index 20c8e779da..d10c9303ed 100644 --- a/docs/api_reference/clustering.rst +++ b/docs/api_reference/clustering.rst @@ -26,6 +26,7 @@ Clustering Algorithms TimeSeriesCLARANS ElasticSOM KSpectralCentroid + RClusterer Deep learning -------------