Skip to content

Draft PR: Add modularity and modularity_adata functions to scanpy.metrics #3613

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 35 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
f76dc7b
Modularity score functions with comments
amalia-k510 Apr 25, 2025
f092469
typo fix
amalia-k510 Apr 25, 2025
7ffa1ec
Merge branch 'scverse:main' into main
amalia-k510 Apr 25, 2025
c0d0c52
Merge branch 'scverse:main' into main
amalia-k510 May 7, 2025
68652a7
modularity code updated and 6 tests written for modularity
amalia-k510 May 7, 2025
948319a
error fixing from pipelines
amalia-k510 May 7, 2025
6a64330
ruff error fix
amalia-k510 May 7, 2025
793351f
keywords variable fix
amalia-k510 May 7, 2025
92d8e26
neighbors from a precomputed distance matrix, still need to make sure…
amalia-k510 May 7, 2025
198c4fb
revert back
amalia-k510 May 7, 2025
e7fb67a
code only for the prexisting distance matrix
amalia-k510 May 7, 2025
14cb441
initial changes for the neighborhors
amalia-k510 May 8, 2025
0ce8c15
distances name switch and sparse array allowed
amalia-k510 May 12, 2025
914b87d
input fix
amalia-k510 May 12, 2025
d285203
variable input fixes
amalia-k510 May 12, 2025
50705b3
test added
amalia-k510 May 12, 2025
4730667
numpy issue fix for one line
amalia-k510 May 12, 2025
4b9fe3e
avoid densifying sparse matrices
amalia-k510 May 12, 2025
7d754c7
switched to @needs
amalia-k510 May 12, 2025
15320af
switched to @needs
amalia-k510 May 12, 2025
623a86c
variable fix input
amalia-k510 May 12, 2025
e8c9a25
code from separate PR removed
amalia-k510 May 12, 2025
040b8b7
unify metadata assembly
flying-sheep May 16, 2025
d6a9aee
Discard changes to src/scanpy/neighbors/__init__.py
flying-sheep May 16, 2025
c03b863
comments fix and release notes
amalia-k510 May 23, 2025
473a437
comments fix typo
amalia-k510 May 23, 2025
c6e5d1f
Merge branch 'scverse:main' into main
amalia-k510 May 25, 2025
ac0a6b3
before neighbour merge
amalia-k510 May 25, 2025
1c033f0
notes
amalia-k510 May 25, 2025
662534b
Merge branch 'main' of https://github.com/amalia-k510/scanpy into main
amalia-k510 May 25, 2025
32116f0
Merge branch 'matrix_exist' into main
amalia-k510 May 25, 2025
a1b2033
merge error fix
amalia-k510 May 25, 2025
4cdc729
post merge and call form neighbor
amalia-k510 May 25, 2025
cb7aaf6
release notes fix
amalia-k510 May 26, 2025
7e34ce2
Merge branch 'main' into main
flying-sheep May 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/3616.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add modularity scoring via {func}`modularity_adata` with support for directed/undirected graphs {smaller}`A. Karesh`
4 changes: 2 additions & 2 deletions src/scanpy/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

from ._gearys_c import gearys_c
from ._metrics import confusion_matrix
from ._metrics import confusion_matrix, modularity, modularity_adata
from ._morans_i import morans_i

__all__ = ["confusion_matrix", "gearys_c", "morans_i"]
__all__ = ["confusion_matrix", "gearys_c", "modularity", "modularity_adata", "morans_i"]
95 changes: 94 additions & 1 deletion src/scanpy/metrics/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,16 @@
import pandas as pd
from natsort import natsorted
from pandas.api.types import CategoricalDtype
from scipy.sparse import coo_matrix

from .._compat import SpBase

if TYPE_CHECKING:
from collections.abc import Sequence

from anndata import AnnData
from numpy.typing import ArrayLike


def confusion_matrix(
orig: pd.Series | np.ndarray | Sequence,
Expand Down Expand Up @@ -60,7 +66,7 @@ def confusion_matrix(
orig, new = pd.Series(orig), pd.Series(new)
assert len(orig) == len(new)

unique_labels = pd.unique(np.concatenate((orig.values, new.values)))
unique_labels = pd.unique(np.concatenate((orig.to_numpy(), new.to_numpy())))

# Compute
mtx = _confusion_matrix(orig, new, labels=unique_labels)
Expand Down Expand Up @@ -89,3 +95,90 @@ def confusion_matrix(
df = df.loc[np.array(orig_idx), np.array(new_idx)]

return df


def modularity(
connectivities: ArrayLike | SpBase,
labels: pd.Series | ArrayLike,
*,
is_directed: bool,
) -> float:
"""Compute the modularity of a graph given its connectivities and labels.

Parameters
----------
connectivities:
Weighted adjacency matrix representing the graph. Can be a dense NumPy array or a sparse CSR matrix.
labels:
Cluster labels for each node in the graph.
is_directed:
Whether the graph is directed or undirected. If True, the graph is treated as directed; otherwise, it is treated as undirected.

Returns
-------
float
The modularity of the graph based on the provided clustering.
"""
try:
# try to import igraph in case the user wants to calculate modularity
# not in the main module to avoid import errors
import igraph as ig
except ImportError as e:
msg = "igraph is require for computing modularity"
raise ImportError(msg) from e
if isinstance(connectivities, SpBase):
# check if the connectivities is a sparse matrix
coo = coo_matrix(connectivities)
edges = list(zip(coo.row, coo.col, strict=False))
# converting to the coo format to extract the edges and weights
# storing only non-zero elements and their indices
weights = coo.data.tolist()
graph = ig.Graph(edges=edges, directed=is_directed)
graph.es["weight"] = weights
else:
# if the graph is dense, creates it directly using igraph's adjacency matrix
dense_array = np.asarray(connectivities)
igraph_mode = ig.ADJ_DIRECTED if is_directed else ig.ADJ_UNDIRECTED
graph = ig.Graph.Weighted_Adjacency(dense_array.tolist(), mode=igraph_mode)
# cluster labels to integer codes required by igraph
labels = pd.Categorical(np.asarray(labels)).codes

return graph.modularity(labels)


def modularity_adata(
adata: AnnData,
*,
label_key: str | ArrayLike = "leiden",
key: str = "neighbors",
is_directed: bool,
) -> float:
# default to leiden labels and connectivities as it is more common
"""Compute modularity from an AnnData object using stored graph and clustering labels.

Parameters
----------
adata:
The AnnData object containing the data.
label_key:
The key in `adata.obs` that contains the clustering labels. Defaults to "leiden".
key:
The key in `adata.obsp` that contains the connectivities. Defaults to "neighbors".
is_directed:
Whether the graph is directed or undirected. If True, the graph is treated as directed; otherwise, it is treated as undirected.

Returns
-------
float
The modularity of the graph based on the provided clustering.
"""
label_array = adata.obs[label_key] if isinstance(label_key, str) else label_key
connectivities_key = adata.uns[key]["connectivities_key"]
params = adata.uns[key]["params"]
connectivities = adata.obsp[connectivities_key]
is_directed = params["is_directed"]

if isinstance(connectivities, pd.DataFrame):
connectivities = connectivities.values

return modularity(connectivities, label_array, is_directed=is_directed)
151 changes: 124 additions & 27 deletions src/scanpy/neighbors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,17 @@
from .._utils import NeighborsView, _doc_params, get_literal_vals
from . import _connectivity
from ._common import (
_get_indices_distances_from_dense_matrix,
_get_indices_distances_from_sparse_matrix,
_get_sparse_matrix_from_indices_distances,
)
from ._connectivity import umap
from ._doc import doc_n_pcs, doc_use_rep
from ._types import _KnownTransformer, _Method

if TYPE_CHECKING:
from collections.abc import Callable, MutableMapping
from typing import Any, Literal, NotRequired
from typing import Any, Literal, NotRequired, Unpack

from anndata import AnnData
from igraph import Graph
Expand Down Expand Up @@ -58,6 +60,13 @@ class KwdsForTransformer(TypedDict):
random_state: _LegacyRandom


class NeighborsDict(TypedDict): # noqa: D101
connectivities_key: str
distances_key: str
params: NeighborsParams
rp_forest: NotRequired[RPForestDict]


class NeighborsParams(TypedDict): # noqa: D101
n_neighbors: int
method: _Method
Expand All @@ -74,6 +83,7 @@ def neighbors( # noqa: PLR0913
n_neighbors: int = 15,
n_pcs: int | None = None,
*,
distances: np.ndarray | SpBase | None = None,
use_rep: str | None = None,
knn: bool = True,
method: _Method = "umap",
Expand All @@ -83,6 +93,7 @@ def neighbors( # noqa: PLR0913
random_state: _LegacyRandom = 0,
key_added: str | None = None,
copy: bool = False,
is_directed: bool = False,
) -> AnnData | None:
"""Compute the nearest neighbors distance matrix and a neighborhood graph of observations :cite:p:`McInnes2018`.

Expand Down Expand Up @@ -135,6 +146,7 @@ def neighbors( # noqa: PLR0913
Use :func:`rapids_singlecell.pp.neighbors` instead.
metric
A known metric’s name or a callable that returns a distance.
If `distances` is given, this parameter is simply stored in `.uns` (see below).

*ignored if ``transformer`` is an instance.*
metric_kwds
Expand Down Expand Up @@ -186,6 +198,18 @@ def neighbors( # noqa: PLR0913
:doc:`/how-to/knn-transformers`

"""
if distances is not None:
if callable(metric):
msg = "`metric` must be a string if `distances` is given."
raise TypeError(msg)
# if a precomputed distance matrix is provided, skip the PCA and distance computation
return neighbors_from_distance(
adata,
distances,
n_neighbors=n_neighbors,
metric=metric,
method=method,
)
start = logg.info("computing neighbors")
adata = adata.copy() if copy else adata
if adata.is_view: # we shouldn't need this here...
Expand All @@ -203,51 +227,124 @@ def neighbors( # noqa: PLR0913
random_state=random_state,
)

if key_added is None:
key_added = "neighbors"
conns_key = "connectivities"
dists_key = "distances"
else:
conns_key = key_added + "_connectivities"
dists_key = key_added + "_distances"

adata.uns[key_added] = {}

neighbors_dict = adata.uns[key_added]

neighbors_dict["connectivities_key"] = conns_key
neighbors_dict["distances_key"] = dists_key

neighbors_dict["params"] = NeighborsParams(
key_added, neighbors_dict = _get_metadata(
key_added,
n_neighbors=neighbors.n_neighbors,
method=method,
random_state=random_state,
metric=metric,
**({} if not metric_kwds else dict(metric_kwds=metric_kwds)),
**({} if use_rep is None else dict(use_rep=use_rep)),
**({} if n_pcs is None else dict(n_pcs=n_pcs)),
)
if metric_kwds:
neighbors_dict["params"]["metric_kwds"] = metric_kwds
if use_rep is not None:
neighbors_dict["params"]["use_rep"] = use_rep
if n_pcs is not None:
neighbors_dict["params"]["n_pcs"] = n_pcs

adata.obsp[dists_key] = neighbors.distances
adata.obsp[conns_key] = neighbors.connectivities
neighbors_dict["params"]["is_directed"] = is_directed

if neighbors.rp_forest is not None:
neighbors_dict["rp_forest"] = neighbors.rp_forest

adata.uns[key_added] = neighbors_dict
adata.obsp[neighbors_dict["distances_key"]] = neighbors.distances
adata.obsp[neighbors_dict["connectivities_key"]] = neighbors.connectivities

logg.info(
" finished",
time=start,
deep=(
f"added to `.uns[{key_added!r}]`\n"
f" `.obsp[{dists_key!r}]`, distances for each pair of neighbors\n"
f" `.obsp[{conns_key!r}]`, weighted adjacency matrix"
f" `.obsp[{neighbors_dict['distances_key']!r}]`, distances for each pair of neighbors\n"
f" `.obsp[{neighbors_dict['connectivities_key']!r}]`, weighted adjacency matrix"
),
)
return adata if copy else None


def neighbors_from_distance(
adata: AnnData,
distances: np.ndarray | SpBase,
*,
n_neighbors: int = 15,
metric: _Metric = "euclidean",
method: _Method = "umap", # default to umap
key_added: str | None = None,
) -> AnnData:
"""Compute neighbors from a precomputer distance matrix.

Parameters
----------
adata
Annotated data matrix.
distances
Precomputed dense or sparse distance matrix.
n_neighbors
Number of nearest neighbors to use in the graph.
method
Method to use for computing the graph. Currently only 'umap' is supported.
key_added
Optional key under which to store the results. Default is 'neighbors'.

Returns
-------
adata
Annotated data with computed distances and connectivities.
"""
if isinstance(distances, SpBase):
distances = sparse.csr_matrix(distances) # noqa: TID251
distances.setdiag(0)
distances.eliminate_zeros()
else:
distances = np.asarray(distances)
np.fill_diagonal(distances, 0)

if method == "umap":
if isinstance(distances, CSRBase):
knn_indices, knn_distances = _get_indices_distances_from_sparse_matrix(
distances, n_neighbors
)
else:
knn_indices, knn_distances = _get_indices_distances_from_dense_matrix(
distances, n_neighbors
)
connectivities = umap(
knn_indices, knn_distances, n_obs=adata.n_obs, n_neighbors=n_neighbors
)
elif method == "gauss":
distances = sparse.csr_matrix(distances) # noqa: TID251
connectivities = _connectivity.gauss(distances, n_neighbors, knn=True)
else:
msg = f"Method {method} not implemented."
raise NotImplementedError(msg)

key_added, neighbors_dict = _get_metadata(
key_added,
n_neighbors=n_neighbors,
method=method,
random_state=0,
metric=metric,
)
adata.uns[key_added] = neighbors_dict
adata.obsp[neighbors_dict["distances_key"]] = distances
adata.obsp[neighbors_dict["connectivities_key"]] = connectivities
return adata


def _get_metadata(
key_added: str | None,
**params: Unpack[NeighborsParams],
) -> tuple[str, NeighborsDict]:
if key_added is None:
return "neighbors", NeighborsDict(
connectivities_key="connectivities",
distances_key="distances",
params=params,
)
return key_added, NeighborsDict(
connectivities_key=f"{key_added}_connectivities",
distances_key=f"{key_added}_distances",
params=params,
)


class FlatTree(NamedTuple): # noqa: D101
hyperplanes: None
offsets: None
Expand Down
Loading
Loading