diff --git a/docs/whats_new/v0.1.rst b/docs/whats_new/v0.1.rst index bb87bb11f..f332b726a 100644 --- a/docs/whats_new/v0.1.rst +++ b/docs/whats_new/v0.1.rst @@ -25,6 +25,7 @@ Version 0.1 Changelog --------- +- |Feature| Introduce chain graphs and validity checks, and refactor CPDAG and chain graphs to use a directed-undirected private class, by `Jaron Lee`_ (:pr:`73`) - |Feature| Add keyword argument for graph labels in :func:`pywhy_graphs.viz.draw`, by `Aryan Roy`_ (:pr:`71`) - |Feature| Implement minimal m-separator function in :func:`pywhy_graphs.networkx.minimal_m_separator` with a BFS approach, by `Jaron Lee`_ (:pr:`53`) - |Feature| Implement m-separation :func:`pywhy_graphs.networkx.m_separated` with the BallTree approach, by `Jaron Lee`_ (:pr:`48`) diff --git a/pywhy_graphs/__init__.py b/pywhy_graphs/__init__.py index 6b1b212e3..ec84cf71a 100644 --- a/pywhy_graphs/__init__.py +++ b/pywhy_graphs/__init__.py @@ -1,6 +1,7 @@ from ._version import __version__ # noqa: F401 from .classes import ( ADMG, + CG, CPDAG, PAG, AugmentedGraph, diff --git a/pywhy_graphs/algorithms/__init__.py b/pywhy_graphs/algorithms/__init__.py index 4698bf3da..3043e89d7 100644 --- a/pywhy_graphs/algorithms/__init__.py +++ b/pywhy_graphs/algorithms/__init__.py @@ -1,3 +1,4 @@ +from .cg import * # noqa: F403 from .cyclic import * # noqa: F403 from .generic import * # noqa: F403 from .pag import * # noqa: F403 diff --git a/pywhy_graphs/algorithms/cg.py b/pywhy_graphs/algorithms/cg.py new file mode 100644 index 000000000..28a4d7241 --- /dev/null +++ b/pywhy_graphs/algorithms/cg.py @@ -0,0 +1,86 @@ +import copy +from collections import OrderedDict, deque + +from pywhy_graphs import CG + +__all__ = ["is_valid_cg"] + + +def is_valid_cg(graph: CG): + """ + Checks if a supplied chain graph is valid. + + This implements the original definition of a (Lauritzen Wermuth Frydenberg) chain graph as + presented in [1]_. + + Define a cycle as a series of nodes X_1 -o X_2 ... X_n -o X_1 where the edges may be directed or + undirected. Note that directed edges in a cycle must all be aligned in the same direction. A + chain graph may only contain cycles consisting of only undirected edges. Equivalently, a chain + graph does not contain any cycles with one or more directed edges. + + Parameters + __________ + graph : CG + The graph. + + Returns + _______ + is_valid : bool + Whether supplied `graph` is a valid chain graph. + + References + ---------- + .. [1] Frydenberg, Morten. “The Chain Graph Markov Property.” Scandinavian Journal of + Statistics, vol. 17, no. 4, 1990, pp. 333–53. JSTOR, http://www.jstor.org/stable/4616181. + Accessed 15 Apr. 2023. + + + """ + + # Check if directed edges are acyclic + undirected_edge_name = graph.undirected_edge_name + directed_edge_name = graph.directed_edge_name + all_nodes = graph.nodes() + G_undirected = graph.get_graphs(edge_type=undirected_edge_name) + G_directed = graph.get_graphs(edge_type=directed_edge_name) + + # Search over all nodes. + for v in all_nodes: + queue = deque([]) + # Fill queue with paths from v starting with outgoing directed edge + # OrderedDict used for O(1) set membership and ordering + for _, z in G_directed.out_edges(nbunch=v): + d = OrderedDict() + d[v] = None + d[z] = None + queue.append(d) + + while queue: + # For each path in queue, progress along edges in certain + # manner + path = queue.popleft() + rev_path = reversed(path) + last_added = next(rev_path) + second_last_added = next(rev_path) + + # For directed edges progress is allowed for outgoing edges + # only + for _, node in G_directed.out_edges(nbunch=last_added): + if node in path: + return False + new_path = copy.deepcopy(path) + new_path[node] = None + queue.append(new_path) + + # For undirected edges, progress is allowed for neighbors + # which were not visited. E.g. if the path is currently A - B, + # do not consider adding A when iterating over neighbors of B. + for node in G_undirected.neighbors(last_added): + if node != second_last_added: + if node in path: + return False + new_path = copy.deepcopy(path) + new_path[node] = None + queue.append(new_path) + + return True diff --git a/pywhy_graphs/algorithms/tests/test_cg.py b/pywhy_graphs/algorithms/tests/test_cg.py new file mode 100644 index 000000000..479348dfe --- /dev/null +++ b/pywhy_graphs/algorithms/tests/test_cg.py @@ -0,0 +1,124 @@ +import pytest + +from pywhy_graphs import CG +from pywhy_graphs.algorithms import is_valid_cg + + +@pytest.fixture +def cg_simple_partially_directed_cycle(): + graph = CG() + graph.add_nodes_from(["A", "B", "C", "D"]) + graph.add_edge("A", "B", graph.directed_edge_name) + graph.add_edge("D", "C", graph.directed_edge_name) + graph.add_edge("B", "D", graph.undirected_edge_name) + graph.add_edge("A", "C", graph.undirected_edge_name) + + return graph + + +@pytest.fixture +def cg_multiple_blocks_partially_directed_cycle(): + + graph = CG() + graph.add_nodes_from(["A", "B", "C", "D", "E", "F", "G"]) + graph.add_edge("A", "B", graph.directed_edge_name) + graph.add_edge("D", "C", graph.directed_edge_name) + graph.add_edge("B", "D", graph.undirected_edge_name) + graph.add_edge("A", "C", graph.undirected_edge_name) + graph.add_edge("E", "F", graph.undirected_edge_name) + graph.add_edge("F", "G", graph.undirected_edge_name) + graph.add_edge("G", "E", graph.undirected_edge_name) + + return graph + + +@pytest.fixture +def square_graph(): + graph = CG() + graph.add_nodes_from(["A", "B", "C", "D"]) + graph.add_edge("A", "B", graph.undirected_edge_name) + graph.add_edge("B", "C", graph.undirected_edge_name) + graph.add_edge("C", "D", graph.undirected_edge_name) + graph.add_edge("C", "A", graph.undirected_edge_name) + + return graph + + +@pytest.fixture +def fig_g1_frydenberg(): + graph = CG() + graph.add_nodes_from(["a", "b", "g", "m", "d"]) + graph.add_edge("a", "b", graph.undirected_edge_name) + graph.add_edge("b", "g", graph.directed_edge_name) + graph.add_edge("g", "d", graph.undirected_edge_name) + graph.add_edge("d", "m", graph.undirected_edge_name) + graph.add_edge("a", "m", graph.directed_edge_name) + + return graph + + +@pytest.fixture +def fig_g2_frydenberg(): + graph = CG() + graph.add_nodes_from(["b", "g", "d", "m", "a"]) + graph.add_edge("a", "m", graph.directed_edge_name) + graph.add_edge("m", "g", graph.undirected_edge_name) + graph.add_edge("m", "d", graph.directed_edge_name) + graph.add_edge("g", "d", graph.directed_edge_name) + graph.add_edge("b", "g", graph.directed_edge_name) + + return graph + + +@pytest.fixture +def fig_g3_frydenberg(): + graph = CG() + graph.add_nodes_from(["a", "b", "g"]) + graph.add_edge("b", "a", graph.undirected_edge_name) + graph.add_edge("a", "g", graph.undirected_edge_name) + graph.add_edge("b", "g", graph.directed_edge_name) + + return graph + + +@pytest.fixture +def fig_g4_frydenberg(): + graph = CG() + graph.add_nodes_from(["b", "g", "d", "m", "a"]) + graph.add_edge("b", "g", graph.directed_edge_name) + graph.add_edge("a", "b", graph.undirected_edge_name) + graph.add_edge("g", "d", graph.undirected_edge_name) + graph.add_edge("d", "m", graph.undirected_edge_name) + graph.add_edge("m", "a", graph.undirected_edge_name) + graph.add_edge("a", "g", graph.directed_edge_name) + + return graph + + +@pytest.mark.parametrize( + "G", + [ + "cg_simple_partially_directed_cycle", + "cg_multiple_blocks_partially_directed_cycle", + "fig_g3_frydenberg", + "fig_g4_frydenberg", + ], +) +def test_graphs_are_not_valid_cg(G, request): + graph = request.getfixturevalue(G) + + assert not is_valid_cg(graph) + + +@pytest.mark.parametrize( + "G", + [ + "square_graph", + "fig_g1_frydenberg", + "fig_g2_frydenberg", + ], +) +def test_graphs_are_valid_cg(G, request): + graph = request.getfixturevalue(G) + + assert is_valid_cg(graph) diff --git a/pywhy_graphs/classes/__init__.py b/pywhy_graphs/classes/__init__.py index 3a2fe76d4..ee36f646a 100644 --- a/pywhy_graphs/classes/__init__.py +++ b/pywhy_graphs/classes/__init__.py @@ -1,6 +1,6 @@ from . import timeseries from .admg import ADMG -from .cpdag import CPDAG +from .diungraph import CG, CPDAG from .intervention import IPAG, AugmentedGraph, PsiPAG from .pag import PAG from .timeseries import ( diff --git a/pywhy_graphs/classes/cpdag.py b/pywhy_graphs/classes/diungraph.py similarity index 60% rename from pywhy_graphs/classes/cpdag.py rename to pywhy_graphs/classes/diungraph.py index 960d96ace..28a90c477 100644 --- a/pywhy_graphs/classes/cpdag.py +++ b/pywhy_graphs/classes/diungraph.py @@ -8,13 +8,13 @@ from .base import AncestralMixin, ConservativeMixin -class CPDAG(pywhy_nx.MixedEdgeGraph, AncestralMixin, ConservativeMixin): - """Completed partially directed acyclic graphs (CPDAG). +class DiUnGraph(pywhy_nx.MixedEdgeGraph, AncestralMixin): + """ + Private class that represents an abstract MixedEdgeGraph with + only directed and undirected edges. - CPDAGs generalize causal DAGs by allowing undirected edges. - Undirected edges imply uncertainty in the orientation of the causal - relationship. For example, ``A - B``, can be ``A -> B`` or ``A <- B``, - allowing for a Markov equivalence class of DAGs for each CPDAG. + This class is not intended for public use, and exists to reduce + duplication of code. Parameters ---------- @@ -31,43 +31,11 @@ class CPDAG(pywhy_nx.MixedEdgeGraph, AncestralMixin, ConservativeMixin): attr : keyword arguments, optional (default= no attributes) Attributes to add to graph as key=value pairs. - See Also + See also -------- - networkx.DiGraph - networkx.Graph - pywhy_graphs.ADMG - pywhy_graphs.networkx.MixedEdgeGraph - - Notes - ----- - CPDAGs are Markov equivalence class of causal DAGs. The implicit assumption in - these causal graphs are the Structural Causal Model (or SCM) is Markovian, inducing - causal sufficiency, where there is no unobserved latent confounder. This allows CPDAGs - to be learned from score-based (such as the "GES" algorithm) and constraint-based - (such as the PC algorithm) approaches for causal structure learning. - - One should not use CPDAGs if they suspect their data has unobserved latent confounders. - - **Edge Type Subgraphs** - The data structure underneath the hood is stored in two networkx graphs: - ``networkx.Graph`` and ``networkx.DiGraph`` to represent the non-directed - edges and directed edges. Non-directed edges in an CPDAG can be present as - undirected edges standing for uncertainty in which directino the directed - edge is in. - - - Directed edges (<-, ->, indicating causal relationship) = `networkx.DiGraph` - The subgraph of directed edges may be accessed by the - `CPDAG.sub_directed_graph`. Their edges in networkx format can be - accessed by `CPDAG.directed_edges` and the corresponding name of the - edge type by `CPDAG.directed_edge_name`. - - Undirected edges (--, indicating uncertainty) = `networkx.Graph` - The subgraph of undirected edges may be accessed by the - `CPDAG.sub_undirected_graph`. Their edges in networkx format can be - accessed by `CPDAG.undirected_edges` and the corresponding name of the - edge type by `CPDAG.undirected_edge_name`. - - By definition, no cycles may exist due to the directed edges. + pywhy_graphs.CG + pywhy_graphs.CPDAG """ def __init__( @@ -85,15 +53,6 @@ def __init__( self._directed_name = directed_edge_name self._undirected_name = undirected_edge_name - from pywhy_graphs import is_valid_mec_graph - - # check that construction of PAG was valid - is_valid_mec_graph(self) - - # extended patterns store unfaithful triples - # these can be used for conservative structure learning algorithm - self._unfaithful_triples: Dict[FrozenSet[Node], None] = dict() - @property def undirected_edge_name(self) -> str: """Name of the undirected edge internal graph.""" @@ -184,6 +143,90 @@ def possible_parents(self, n: Node) -> Iterator[Node]: """ return self.sub_undirected_graph().neighbors(n) + +class CPDAG(DiUnGraph, ConservativeMixin): + """Completed partially directed acyclic graphs (CPDAG). + + CPDAGs generalize causal DAGs by allowing undirected edges. + Undirected edges imply uncertainty in the orientation of the causal + relationship. For example, ``A - B``, can be ``A -> B`` or ``A <- B``, + allowing for a Markov equivalence class of DAGs for each CPDAG. + + Parameters + ---------- + incoming_directed_edges : input directed edges (optional, default: None) + Data to initialize directed edges. All arguments that are accepted + by `networkx.DiGraph` are accepted. + incoming_undirected_edges : input undirected edges (optional, default: None) + Data to initialize undirected edges. All arguments that are accepted + by `networkx.Graph` are accepted. + directed_edge_name : str + The name for the directed edges. By default 'directed'. + undirected_edge_name : str + The name for the directed edges. By default 'undirected'. + attr : keyword arguments, optional (default= no attributes) + Attributes to add to graph as key=value pairs. + + See Also + -------- + networkx.DiGraph + networkx.Graph + pywhy_graphs.ADMG + pywhy_graphs.networkx.MixedEdgeGraph + + Notes + ----- + CPDAGs are Markov equivalence class of causal DAGs. The implicit assumption in + these causal graphs are the Structural Causal Model (or SCM) is Markovian, inducing + causal sufficiency, where there is no unobserved latent confounder. This allows CPDAGs + to be learned from score-based (such as the "GES" algorithm) and constraint-based + (such as the PC algorithm) approaches for causal structure learning. + + One should not use CPDAGs if they suspect their data has unobserved latent confounders. + + **Edge Type Subgraphs** + + The data structure underneath the hood is stored in two networkx graphs: + ``networkx.Graph`` and ``networkx.DiGraph`` to represent the non-directed + edges and directed edges. + + - Directed edges (<-, ->, indicating causal relationship) = `networkx.DiGraph` + The subgraph of directed edges may be accessed by the + `CPDAG.sub_directed_graph`. Their edges in networkx format can be + accessed by `CPDAG.directed_edges` and the corresponding name of the + edge type by `CPDAG.directed_edge_name`. + - Undirected edges (--, indicating uncertainty) = `networkx.Graph` + The subgraph of undirected edges may be accessed by the + `CPDAG.sub_undirected_graph`. Their edges in networkx format can be + accessed by `CPDAG.undirected_edges` and the corresponding name of the + edge type by `CPDAG.undirected_edge_name`. + + By definition, no cycles may exist due to the directed edges. + """ + + def __init__( + self, + incoming_directed_edges=None, + incoming_undirected_edges=None, + directed_edge_name: str = "directed", + undirected_edge_name: str = "undirected", + **attr, + ): + super().__init__( + incoming_directed_edges=incoming_directed_edges, + incoming_undirected_edges=incoming_undirected_edges, + directed_edge_name=directed_edge_name, + undirected_edge_name=undirected_edge_name, + ) + from pywhy_graphs import is_valid_mec_graph + + # check that construction of PAG was valid + is_valid_mec_graph(self) + + # extended patterns store unfaithful triples + # these can be used for conservative structure learning algorithm + self._unfaithful_triples: Dict[FrozenSet[Node], None] = dict() + def add_edge(self, u_of_edge, v_of_edge, edge_type="all", **attr): from pywhy_graphs.algorithms.generic import _check_adding_cpdag_edge @@ -200,3 +243,87 @@ def add_edges_from(self, ebunch_to_add, edge_type, **attr): self, u_of_edge=u_of_edge, v_of_edge=v_of_edge, edge_type=edge_type ) return super().add_edges_from(ebunch_to_add, edge_type, **attr) + + +class CG(DiUnGraph): + """Chain Graphs (CG). + + Chain graphs represent a generalization of DAGs and undirected graphs. + Undirected edges ``A - B`` in a chain graph represent a symmetric association of + two variables due to processes such as dynamic feedback (where ``A`` + influences ``B`` and vice versa) or an artefact of selection bias (where the selection + of the sample induces association between ``A`` and ``B``) [1]_. + + + The implementation supports representation of both Lauritzen-Wermuth-Frydenberg (LWF) + and Andersen-Madigan-Perlman (AMP) chain graphs. + + + Parameters + ---------- + incoming_directed_edges : input directed edges (optional, default: None) + Data to initialize directed edges. All arguments that are accepted + by `networkx.DiGraph` are accepted. + incoming_undirected_edges : input undirected edges (optional, default: None) + Data to initialize undirected edges. All arguments that are accepted + by `networkx.Graph` are accepted. + directed_edge_name : str + The name for the directed edges. By default 'directed'. + undirected_edge_name : str + The name for the directed edges. By default 'undirected'. + attr : keyword arguments, optional (default= no attributes) + Attributes to add to graph as key=value pairs. + + References + ---------- + .. [1] Lauritzen, Steffen L., and Thomas S. Richardson. "Chain + graph models and their causal interpretations." Journal of the + Royal Statistical Society: Series B (Statistical Methodology) + 64.3 (2002): 321-348. + + + + + See Also + -------- + networkx.DiGraph + networkx.Graph + pywhy_graphs.ADMG + pywhy_graphs.networkx.MixedEdgeGraph + + Notes + ----- + **Edge Type Subgraphs** + + The data structure underneath the hood is stored in two networkx graphs: + ``networkx.Graph`` and ``networkx.DiGraph`` to represent the non-directed + edges and directed edges. + + - Directed edges (<-, ->, indicating causal relationship) = `networkx.DiGraph` + The subgraph of directed edges may be accessed by the + `CG.sub_directed_graph`. Their edges in networkx format can be + accessed by `CG.directed_edges` and the corresponding name of the + edge type by `CG.directed_edge_name`. + - Undirected edges (--, indicating uncertainty) = `networkx.Graph` + The subgraph of undirected edges may be accessed by the + `CG.sub_undirected_graph`. Their edges in networkx format can be + accessed by `CG.undirected_edges` and the corresponding name of the + edge type by `CG.undirected_edge_name`. + + By definition, no cycles may exist due to the directed edges. + """ + + def __init__( + self, + incoming_directed_edges=None, + incoming_undirected_edges=None, + directed_edge_name: str = "directed", + undirected_edge_name: str = "undirected", + **attr, + ): + super().__init__( + incoming_directed_edges=incoming_directed_edges, + incoming_undirected_edges=incoming_undirected_edges, + directed_edge_name=directed_edge_name, + undirected_edge_name=undirected_edge_name, + )