scverse · amalia-k510 · Apr 25, 2025 · Apr 25, 2025 · Apr 25, 2025 · May 7, 2025
diff --git a/docs/release-notes/3616.feature.md b/docs/release-notes/3616.feature.md
@@ -0,0 +1 @@
+Add modularity scoring via {func}`modularity_adata` with support for directed/undirected graphs {smaller}`A. Karesh`
diff --git a/src/scanpy/metrics/__init__.py b/src/scanpy/metrics/__init__.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from ._gearys_c import gearys_c
-from ._metrics import confusion_matrix
+from ._metrics import confusion_matrix, modularity, modularity_adata
 from ._morans_i import morans_i
 
-__all__ = ["confusion_matrix", "gearys_c", "morans_i"]
+__all__ = ["confusion_matrix", "gearys_c", "modularity", "modularity_adata", "morans_i"]
diff --git a/src/scanpy/metrics/_metrics.py b/src/scanpy/metrics/_metrics.py
@@ -8,10 +8,16 @@
 import pandas as pd
 from natsort import natsorted
 from pandas.api.types import CategoricalDtype
+from scipy.sparse import coo_matrix
+
+from .._compat import SpBase
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
+    from anndata import AnnData
+    from numpy.typing import ArrayLike
+
 
 def confusion_matrix(
     orig: pd.Series | np.ndarray | Sequence,
@@ -60,7 +66,7 @@ def confusion_matrix(
     orig, new = pd.Series(orig), pd.Series(new)
     assert len(orig) == len(new)
 
-    unique_labels = pd.unique(np.concatenate((orig.values, new.values)))
+    unique_labels = pd.unique(np.concatenate((orig.to_numpy(), new.to_numpy())))
 
     # Compute
     mtx = _confusion_matrix(orig, new, labels=unique_labels)
@@ -89,3 +95,90 @@ def confusion_matrix(
     df = df.loc[np.array(orig_idx), np.array(new_idx)]
 
     return df
+
+
+def modularity(
+    connectivities: ArrayLike | SpBase,
+    labels: pd.Series | ArrayLike,
+    *,
+    is_directed: bool,
+) -> float:
+    """Compute the modularity of a graph given its connectivities and labels.
+
+    Parameters
+    ----------
+    connectivities:
+        Weighted adjacency matrix representing the graph. Can be a dense NumPy array or a sparse CSR matrix.
+    labels:
+        Cluster labels for each node in the graph.
+    is_directed:
+        Whether the graph is directed or undirected. If True, the graph is treated as directed; otherwise, it is treated as undirected.
+
+    Returns
+    -------
+    float
+        The modularity of the graph based on the provided clustering.
+    """
+    try:
+        # try to import igraph in case the user wants to calculate modularity
+        # not in the main module to avoid import errors
+        import igraph as ig
+    except ImportError as e:
+        msg = "igraph is require for computing modularity"
+        raise ImportError(msg) from e
+    if isinstance(connectivities, SpBase):
+        # check if the connectivities is a sparse matrix
+        coo = coo_matrix(connectivities)
+        edges = list(zip(coo.row, coo.col, strict=False))
+        # converting to the coo format to extract the edges and weights
+        # storing only non-zero elements and their indices
+        weights = coo.data.tolist()
+        graph = ig.Graph(edges=edges, directed=is_directed)
+        graph.es["weight"] = weights
+    else:
+        # if the graph is dense, creates it directly using igraph's adjacency matrix
+        dense_array = np.asarray(connectivities)
+        igraph_mode = ig.ADJ_DIRECTED if is_directed else ig.ADJ_UNDIRECTED
+        graph = ig.Graph.Weighted_Adjacency(dense_array.tolist(), mode=igraph_mode)
+    # cluster labels to integer codes required by igraph
+    labels = pd.Categorical(np.asarray(labels)).codes
+
+    return graph.modularity(labels)
+
+
+def modularity_adata(
+    adata: AnnData,
+    *,
+    label_key: str | ArrayLike = "leiden",
+    key: str = "neighbors",
+    is_directed: bool,
+) -> float:
+    # default to leiden labels and connectivities as it is more common
+    """Compute modularity from an AnnData object using stored graph and clustering labels.
+
+    Parameters
+    ----------
+    adata:
+        The AnnData object containing the data.
+    label_key:
+        The key in `adata.obs` that contains the clustering labels. Defaults to "leiden".
+    key:
+        The key in `adata.obsp` that contains the connectivities. Defaults to "neighbors".
+    is_directed:
+        Whether the graph is directed or undirected. If True, the graph is treated as directed; otherwise, it is treated as undirected.
+
+    Returns
+    -------
+    float
+        The modularity of the graph based on the provided clustering.
+    """
+    label_array = adata.obs[label_key] if isinstance(label_key, str) else label_key
+    connectivities_key = adata.uns[key]["connectivities_key"]
+    params = adata.uns[key]["params"]
+    connectivities = adata.obsp[connectivities_key]
+    is_directed = params["is_directed"]
+
+    if isinstance(connectivities, pd.DataFrame):
+        connectivities = connectivities.values
+
+    return modularity(connectivities, label_array, is_directed=is_directed)
diff --git a/src/scanpy/neighbors/__init__.py b/src/scanpy/neighbors/__init__.py
@@ -21,15 +21,17 @@
 from .._utils import NeighborsView, _doc_params, get_literal_vals
 from . import _connectivity
 from ._common import (
+    _get_indices_distances_from_dense_matrix,
     _get_indices_distances_from_sparse_matrix,
     _get_sparse_matrix_from_indices_distances,
 )
+from ._connectivity import umap
 from ._doc import doc_n_pcs, doc_use_rep
 from ._types import _KnownTransformer, _Method
 
 if TYPE_CHECKING:
     from collections.abc import Callable, MutableMapping
-    from typing import Any, Literal, NotRequired
+    from typing import Any, Literal, NotRequired, Unpack
 
     from anndata import AnnData
     from igraph import Graph
@@ -58,6 +60,13 @@ class KwdsForTransformer(TypedDict):
     random_state: _LegacyRandom
 
 
+class NeighborsDict(TypedDict):  # noqa: D101
+    connectivities_key: str
+    distances_key: str
+    params: NeighborsParams
+    rp_forest: NotRequired[RPForestDict]
+
+
 class NeighborsParams(TypedDict):  # noqa: D101
     n_neighbors: int
     method: _Method
@@ -74,6 +83,7 @@ def neighbors(  # noqa: PLR0913
     n_neighbors: int = 15,
     n_pcs: int | None = None,
     *,
+    distances: np.ndarray | SpBase | None = None,
     use_rep: str | None = None,
     knn: bool = True,
     method: _Method = "umap",
@@ -83,6 +93,7 @@ def neighbors(  # noqa: PLR0913
     random_state: _LegacyRandom = 0,
     key_added: str | None = None,
     copy: bool = False,
+    is_directed: bool = False,
 ) -> AnnData | None:
     """Compute the nearest neighbors distance matrix and a neighborhood graph of observations :cite:p:`McInnes2018`.
 
@@ -135,6 +146,7 @@ def neighbors(  # noqa: PLR0913
                Use :func:`rapids_singlecell.pp.neighbors` instead.
     metric
         A known metric’s name or a callable that returns a distance.
+        If `distances` is given, this parameter is simply stored in `.uns` (see below).
 
         *ignored if ``transformer`` is an instance.*
     metric_kwds
@@ -186,6 +198,18 @@ def neighbors(  # noqa: PLR0913
     :doc:`/how-to/knn-transformers`
 
     """
+    if distances is not None:
+        if callable(metric):
+            msg = "`metric` must be a string if `distances` is given."
+            raise TypeError(msg)
+        # if a precomputed distance matrix is provided, skip the PCA and distance computation
+        return neighbors_from_distance(
+            adata,
+            distances,
+            n_neighbors=n_neighbors,
+            metric=metric,
+            method=method,
+        )
     start = logg.info("computing neighbors")
     adata = adata.copy() if copy else adata
     if adata.is_view:  # we shouldn't need this here...
@@ -203,51 +227,124 @@ def neighbors(  # noqa: PLR0913
         random_state=random_state,
     )
 
-    if key_added is None:
-        key_added = "neighbors"
-        conns_key = "connectivities"
-        dists_key = "distances"
-    else:
-        conns_key = key_added + "_connectivities"
-        dists_key = key_added + "_distances"
-
-    adata.uns[key_added] = {}
-
-    neighbors_dict = adata.uns[key_added]
-
-    neighbors_dict["connectivities_key"] = conns_key
-    neighbors_dict["distances_key"] = dists_key
-
-    neighbors_dict["params"] = NeighborsParams(
+    key_added, neighbors_dict = _get_metadata(
+        key_added,
         n_neighbors=neighbors.n_neighbors,
         method=method,
         random_state=random_state,
         metric=metric,
+        **({} if not metric_kwds else dict(metric_kwds=metric_kwds)),
+        **({} if use_rep is None else dict(use_rep=use_rep)),
+        **({} if n_pcs is None else dict(n_pcs=n_pcs)),
     )
-    if metric_kwds:
-        neighbors_dict["params"]["metric_kwds"] = metric_kwds
-    if use_rep is not None:
-        neighbors_dict["params"]["use_rep"] = use_rep
-    if n_pcs is not None:
-        neighbors_dict["params"]["n_pcs"] = n_pcs
 
-    adata.obsp[dists_key] = neighbors.distances
-    adata.obsp[conns_key] = neighbors.connectivities
+    neighbors_dict["params"]["is_directed"] = is_directed
 
     if neighbors.rp_forest is not None:
         neighbors_dict["rp_forest"] = neighbors.rp_forest
+
+    adata.uns[key_added] = neighbors_dict
+    adata.obsp[neighbors_dict["distances_key"]] = neighbors.distances
+    adata.obsp[neighbors_dict["connectivities_key"]] = neighbors.connectivities
+
     logg.info(
         "    finished",
         time=start,
         deep=(
             f"added to `.uns[{key_added!r}]`\n"
-            f"    `.obsp[{dists_key!r}]`, distances for each pair of neighbors\n"
-            f"    `.obsp[{conns_key!r}]`, weighted adjacency matrix"
+            f"    `.obsp[{neighbors_dict['distances_key']!r}]`, distances for each pair of neighbors\n"
+            f"    `.obsp[{neighbors_dict['connectivities_key']!r}]`, weighted adjacency matrix"
         ),
     )
     return adata if copy else None
 
 
+def neighbors_from_distance(
+    adata: AnnData,
+    distances: np.ndarray | SpBase,
+    *,
+    n_neighbors: int = 15,
+    metric: _Metric = "euclidean",
+    method: _Method = "umap",  # default to umap
+    key_added: str | None = None,
+) -> AnnData:
+    """Compute neighbors from a precomputer distance matrix.
+
+    Parameters
+    ----------
+    adata
+        Annotated data matrix.
+    distances
+        Precomputed dense or sparse distance matrix.
+    n_neighbors
+        Number of nearest neighbors to use in the graph.
+    method
+        Method to use for computing the graph. Currently only 'umap' is supported.
+    key_added
+        Optional key under which to store the results. Default is 'neighbors'.
+
+    Returns
+    -------
+    adata
+        Annotated data with computed distances and connectivities.
+    """
+    if isinstance(distances, SpBase):
+        distances = sparse.csr_matrix(distances)  # noqa: TID251
+        distances.setdiag(0)
+        distances.eliminate_zeros()
+    else:
+        distances = np.asarray(distances)
+        np.fill_diagonal(distances, 0)
+
+    if method == "umap":
+        if isinstance(distances, CSRBase):
+            knn_indices, knn_distances = _get_indices_distances_from_sparse_matrix(
+                distances, n_neighbors
+            )
+        else:
+            knn_indices, knn_distances = _get_indices_distances_from_dense_matrix(
+                distances, n_neighbors
+            )
+        connectivities = umap(
+            knn_indices, knn_distances, n_obs=adata.n_obs, n_neighbors=n_neighbors
+        )
+    elif method == "gauss":
+        distances = sparse.csr_matrix(distances)  # noqa: TID251
+        connectivities = _connectivity.gauss(distances, n_neighbors, knn=True)
+    else:
+        msg = f"Method {method} not implemented."
+        raise NotImplementedError(msg)
+
+    key_added, neighbors_dict = _get_metadata(
+        key_added,
+        n_neighbors=n_neighbors,
+        method=method,
+        random_state=0,
+        metric=metric,
+    )
+    adata.uns[key_added] = neighbors_dict
+    adata.obsp[neighbors_dict["distances_key"]] = distances
+    adata.obsp[neighbors_dict["connectivities_key"]] = connectivities
+    return adata
+
+
+def _get_metadata(
+    key_added: str | None,
+    **params: Unpack[NeighborsParams],
+) -> tuple[str, NeighborsDict]:
+    if key_added is None:
+        return "neighbors", NeighborsDict(
+            connectivities_key="connectivities",
+            distances_key="distances",
+            params=params,
+        )
+    return key_added, NeighborsDict(
+        connectivities_key=f"{key_added}_connectivities",
+        distances_key=f"{key_added}_distances",
+        params=params,
+    )
+
+
 class FlatTree(NamedTuple):  # noqa: D101
     hyperplanes: None
     offsets: None
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add modularity scoring via {func}`modularity_adata` with support for directed/undirected graphs {smaller}`A. Karesh`