From 49db0541fa37feb0991dfbc8ec9694bbad7a1993 Mon Sep 17 00:00:00 2001 From: "ili i." Date: Tue, 29 Oct 2024 13:20:13 +0000 Subject: [PATCH 1/2] No need to check or require a symmetrical sparse matrix if the kNN is recomputed. --- umap/umap_.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/umap/umap_.py b/umap/umap_.py index f5fef0d2..c12fc7ae 100644 --- a/umap/umap_.py +++ b/umap/umap_.py @@ -2480,18 +2480,18 @@ def fit(self, X, y=None, force_all_finite=True, **kwargs): print(ts(), "Construct fuzzy simplicial set") if self.metric == "precomputed" and self._sparse_data: - # For sparse precomputed distance matrices, we just argsort the rows to find - # nearest neighbors. To make this easier, we expect matrices that are - # symmetrical (so we can find neighbors by looking at rows in isolation, - # rather than also having to consider that sample's column too). - # print("Computing KNNs for sparse precomputed distances...") - if sparse_tril(X).getnnz() != sparse_triu(X).getnnz(): - raise ValueError( - "Sparse precomputed distance matrices should be symmetrical!" - ) if not np.all(X.diagonal() == 0): raise ValueError("Non-zero distances from samples to themselves!") if self.knn_dists is None: + # For sparse precomputed distance matrices, we just argsort the rows to find + # nearest neighbors. To make this easier, we expect matrices that are + # symmetrical (so we can find neighbors by looking at rows in isolation, + # rather than also having to consider that sample's column too). + # print("Computing KNNs for sparse precomputed distances...") + if sparse_tril(X).getnnz() != sparse_triu(X).getnnz(): + raise ValueError( + "Sparse precomputed distance matrices should be symmetrical!" + ) self._knn_indices = np.zeros((X.shape[0], self.n_neighbors), dtype=int) self._knn_dists = np.zeros(self._knn_indices.shape, dtype=float) for row_id in range(X.shape[0]): From 90929e2d16be69894a9e6fc1d7d13d30824c33e0 Mon Sep 17 00:00:00 2001 From: "ili i." Date: Tue, 29 Oct 2024 14:05:11 +0000 Subject: [PATCH 2/2] comply with black --- umap/umap_.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/umap/umap_.py b/umap/umap_.py index c12fc7ae..e3cce51c 100644 --- a/umap/umap_.py +++ b/umap/umap_.py @@ -2483,15 +2483,16 @@ def fit(self, X, y=None, force_all_finite=True, **kwargs): if not np.all(X.diagonal() == 0): raise ValueError("Non-zero distances from samples to themselves!") if self.knn_dists is None: - # For sparse precomputed distance matrices, we just argsort the rows to find - # nearest neighbors. To make this easier, we expect matrices that are - # symmetrical (so we can find neighbors by looking at rows in isolation, - # rather than also having to consider that sample's column too). + # For sparse precomputed distance matrices, we just argsort the rows + # to find nearest neighbors. To make this easier, we expect matrices + # that are symmetrical (so we can find neighbors by looking at rows + # in isolation, rather than also having to consider that sample's + # column too). # print("Computing KNNs for sparse precomputed distances...") if sparse_tril(X).getnnz() != sparse_triu(X).getnnz(): raise ValueError( "Sparse precomputed distance matrices should be symmetrical!" - ) + ) self._knn_indices = np.zeros((X.shape[0], self.n_neighbors), dtype=int) self._knn_dists = np.zeros(self._knn_indices.shape, dtype=float) for row_id in range(X.shape[0]):