From cb812be98cf74df56c0f441498d2d45c162e63af Mon Sep 17 00:00:00 2001 From: Luca Mondada Date: Thu, 8 May 2025 09:02:00 +0200 Subject: [PATCH] Squashed commit of feat/persistenthugr changes --- Cargo.lock | 55 ++- Cargo.toml | 1 + hugr-core/Cargo.toml | 1 + hugr-core/src/hugr.rs | 1 + hugr-core/src/hugr/patch/simple_replace.rs | 47 +++ hugr-core/src/hugr/persistent.rs | 385 +++++++++++++++++++ hugr-core/src/hugr/persistent/resolver.rs | 43 +++ hugr-core/src/hugr/persistent/state_space.rs | 289 ++++++++++++++ hugr-core/src/hugr/persistent/tests.rs | 376 ++++++++++++++++++ hugr-core/src/hugr/views/sibling_subgraph.rs | 26 ++ 10 files changed, 1219 insertions(+), 5 deletions(-) create mode 100644 hugr-core/src/hugr/persistent.rs create mode 100644 hugr-core/src/hugr/persistent/resolver.rs create mode 100644 hugr-core/src/hugr/persistent/state_space.rs create mode 100644 hugr-core/src/hugr/persistent/tests.rs diff --git a/Cargo.lock b/Cargo.lock index 5fe1d43ca..a012c4b8a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -493,6 +493,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + [[package]] name = "cool_asserts" version = "2.0.3" @@ -624,6 +630,30 @@ dependencies = [ "syn", ] +[[package]] +name = "derive-where" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2364b9aa47e460ce9bca6ac1777d14c98eef7e274eb077beed49f3adc94183ed" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_more" +version = "0.99.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn", +] + [[package]] name = "derive_more" version = "1.0.0" @@ -1109,7 +1139,7 @@ dependencies = [ "clap", "clap-verbosity-flag", "clio", - "derive_more", + "derive_more 1.0.0", "hugr", "predicates", "rstest", @@ -1123,7 +1153,7 @@ dependencies = [ "cgmath", "cool_asserts", "delegate", - "derive_more", + "derive_more 1.0.0", "downcast-rs", "enum_dispatch", "fxhash", @@ -1141,6 +1171,7 @@ dependencies = [ "proptest", "proptest-derive", "regex", + "relrc", "rstest", "semver", "serde", @@ -1160,7 +1191,7 @@ version = "0.15.4" dependencies = [ "anyhow", "delegate", - "derive_more", + "derive_more 1.0.0", "hugr-core", "hugr-llvm", "inkwell", @@ -1180,7 +1211,7 @@ dependencies = [ "base64", "bumpalo", "capnp", - "derive_more", + "derive_more 1.0.0", "fxhash", "indexmap", "insta", @@ -1202,7 +1233,7 @@ name = "hugr-passes" version = "0.15.4" dependencies = [ "ascent", - "derive_more", + "derive_more 1.0.0", "hugr-core", "itertools 0.14.0", "lazy_static", @@ -2351,6 +2382,20 @@ version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" +[[package]] +name = "relrc" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "036a8b094257e8a5bae0f9978044f2593619afd53476d534cfe1f31a5198ebeb" +dependencies = [ + "derive-where", + "derive_more 0.99.20", + "fxhash", + "itertools 0.13.0", + "petgraph 0.8.1", + "thiserror 1.0.69", +] + [[package]] name = "reqwest" version = "0.12.15" diff --git a/Cargo.toml b/Cargo.toml index 97dad7dea..5902f48f9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -83,6 +83,7 @@ pest_derive = "2.8.0" pretty = "0.12.4" pretty_assertions = "1.4.1" zstd = "0.13.2" +relrc = "0.4.1" # These public dependencies usually require breaking changes downstream, so we # try to be as permissive as possible. diff --git a/hugr-core/Cargo.toml b/hugr-core/Cargo.toml index 653e47b73..c5f402f43 100644 --- a/hugr-core/Cargo.toml +++ b/hugr-core/Cargo.toml @@ -54,6 +54,7 @@ thiserror = { workspace = true } typetag = { workspace = true } semver = { workspace = true, features = ["serde"] } zstd = { workspace = true, optional = true } +relrc = { workspace = true, features = ["petgraph"] } [dev-dependencies] rstest = { workspace = true } diff --git a/hugr-core/src/hugr.rs b/hugr-core/src/hugr.rs index 16152b298..f2345f9af 100644 --- a/hugr-core/src/hugr.rs +++ b/hugr-core/src/hugr.rs @@ -5,6 +5,7 @@ pub mod hugrmut; pub(crate) mod ident; pub mod internal; pub mod patch; +pub mod persistent; pub mod serialize; pub mod validate; pub mod views; diff --git a/hugr-core/src/hugr/patch/simple_replace.rs b/hugr-core/src/hugr/patch/simple_replace.rs index 8fc7c7617..d0a906bb1 100644 --- a/hugr-core/src/hugr/patch/simple_replace.rs +++ b/hugr-core/src/hugr/patch/simple_replace.rs @@ -147,6 +147,21 @@ impl OutputBoundaryMap { } } } + + fn map_nodes(&self, node_map: impl Fn(N) -> M) -> OutputBoundaryMap { + match self { + OutputBoundaryMap::ByIncoming(map) => OutputBoundaryMap::ByIncoming( + map.iter() + .map(|(&(node, port), &val)| ((node_map(node), port), val)) + .collect(), + ), + OutputBoundaryMap::ByOutgoing(map) => OutputBoundaryMap::ByOutgoing( + map.iter() + .map(|(&(node, port), &val)| ((node_map(node), port), val)) + .collect(), + ), + } + } } impl SimpleReplacement { @@ -392,6 +407,38 @@ impl SimpleReplacement { .chain(outgoing_boundary) .chain(host_to_host_boundary) } + + /// Map the host nodes in `self` according to `node_map`. + /// + /// `node_map` must map nodes in the current HUGR of the subgraph to + /// its equivalent nodes in some `new_hugr`. + /// + /// This converts a replacement that acts on nodes of type `HostNode` to + /// a replacement that acts on `new_hugr`, with nodes of type `N`. + /// + /// This does not check convexity. It is up to the caller to ensure that + /// the mapped replacement obtained from this applies on a convex subgraph + /// of the new HUGR. + pub(crate) fn map_host_nodes( + &self, + node_map: impl Fn(HostNode) -> N, + ) -> SimpleReplacement { + let Self { + subgraph, + replacement, + nu_inp, + nu_out, + } = self; + let nu_inp = nu_inp + .iter() + .map(|(&repl_node_port, &(host_node, host_port))| { + (repl_node_port, (node_map(host_node), host_port)) + }) + .collect(); + let nu_out = nu_out.map_nodes(&node_map); + let subgraph = subgraph.map_nodes(node_map); + SimpleReplacement::new(subgraph, replacement.clone(), nu_inp, nu_out) + } } impl PatchVerification for SimpleReplacement { diff --git a/hugr-core/src/hugr/persistent.rs b/hugr-core/src/hugr/persistent.rs new file mode 100644 index 000000000..757f9f693 --- /dev/null +++ b/hugr-core/src/hugr/persistent.rs @@ -0,0 +1,385 @@ +//! Persistent data structure for HUGR mutations. +//! +//! This module provides a persistent data structure [`PersistentHugr`] that +//! implements [`crate::HugrView`]; mutations to the data are stored +//! persistently as a set of [`Commit`]s along with the dependencies between the +//! commits. +//! +//! As a result of persistency, the entire mutation history of a HUGR can be +//! traversed and references to previous versions of the data remain valid even +//! as the HUGR graph is "mutated" by applying patches: the patches are in +//! effect added to the history as new commits. +//! +//! The data structure underlying [`PersistentHugr`], which stores the history +//! of all commits, is [`CommitStateSpace`]. Multiple [`PersistentHugr`] can be +//! stored within a single [`CommitStateSpace`], which allows for the efficient +//! exploration of the space of all possible graph rewrites. +//! +//! ## Overlapping commits +//! +//! In general, [`CommitStateSpace`] may contain overlapping commits. Such +//! mutations are mutually exclusive as they modify the same nodes. It is +//! therefore not possible to apply all commits in a [`CommitStateSpace`] +//! simultaneously. A [`PersistentHugr`] on the other hand always corresponds to +//! a subgraph of a [`CommitStateSpace`] that is guaranteed to contain only +//! non-overlapping, compatible commits. By applying all commits in a +//! [`PersistentHugr`], we can materialize a [`Hugr`]. Traversing the +//! materialized HUGR is equivalent to using the [`crate::HugrView`] +//! implementation of the corresponding [`PersistentHugr`]. +//! +//! ## Summary of data types +//! +//! - [`Commit`] A modification to a [`Hugr`] (currently a +//! [`SimpleReplacement`]) that forms the atomic unit of change for a +//! [`PersistentHugr`] (like a commit in git). This is a reference-counted +//! value that is cheap to clone and will be freed when the last reference is +//! dropped. +//! - [`PersistentHugr`] A data structure that implements [`crate::HugrView`] +//! and can be used as a drop-in replacement for a [`crate::Hugr`] for +//! read-only access and mutations through the [`PatchVerification`] and +//! [`Patch`] traits. Mutations are stored as a history of commits. Unlike +//! [`CommitStateSpace`], it maintains the invariant that all contained +//! commits are compatible with eachother. +//! - [`CommitStateSpace`] Stores commits, recording the dependencies between +//! them. Includes the base HUGR and any number of possibly incompatible +//! (overlapping) commits. Unlike a [`PersistentHugr`], a state space can +//! contain mutually exclusive commits. +//! +//! ## Usage +//! +//! A [`PersistentHugr`] can be created from a base HUGR using +//! [`PersistentHugr::with_base`]. Replacements can then be applied to it +//! using [`PersistentHugr::add_replacement`]. Alternatively, if you already +//! have a populated state space, use [`PersistentHugr::try_new`] to create a +//! new HUGR with those commits. +//! +//! Add a sequence of commits to a state space by merging a [`PersistentHugr`] +//! into it using [`CommitStateSpace::extend`] or directly using +//! [`CommitStateSpace::try_add_commit`]. +//! +//! To obtain a [`PersistentHugr`] from your state space, use +//! [`CommitStateSpace::try_extract_hugr`]. A [`PersistentHugr`] can always be +//! materialized into a [`Hugr`] type using [`PersistentHugr::to_hugr`]. + +mod resolver; +mod state_space; + +use std::{ + collections::{BTreeMap, BTreeSet}, + mem, vec, +}; + +use delegate::delegate; +use derive_more::derive::From; +use itertools::Itertools; +use relrc::RelRc; +use state_space::{CommitData, CommitId}; +pub use state_space::{CommitStateSpace, InvalidCommit, PatchNode}; + +pub use resolver::PointerEqResolver; + +use crate::{ + hugr::patch::{simple_replace, Patch, PatchVerification}, + Hugr, HugrView, Node, SimpleReplacement, +}; + +/// A replacement operation that can be applied to a [`PersistentHugr`]. +pub type PersistentReplacement = SimpleReplacement; + +/// A patch that can be applied to a [`PersistentHugr`] or a +/// [`CommitStateSpace`] as an atomic commit. +/// +/// Commits are cheap to clone: they are reference-counted pointers to the +/// patch data. They also maintain strong references to the ancestor commits +/// that the patch may depend on (i.e. other patches that must be applied +/// before `self` can be applied). +/// +/// Currently, patches must be [`SimpleReplacement`]s. +#[derive(Debug, Clone, From)] +#[repr(transparent)] +pub struct Commit(RelRc); + +impl Commit { + /// Create a commit from a simple replacement. + /// + /// Requires a reference to the commit state space that the nodes in + /// `replacement` refer to. + /// + /// The replacement must act on a non-empty subgraph, otherwise this + /// function will return an [`InvalidCommit::EmptyReplacement`] error. + /// + /// If any of the parents of the replacement are not in the commit state + /// space, this function will return an [`InvalidCommit::UnknownParent`] + /// error. + pub fn try_from_replacement( + replacement: PersistentReplacement, + graph: &CommitStateSpace, + ) -> Result { + if replacement.subgraph().nodes().is_empty() { + return Err(InvalidCommit::EmptyReplacement); + } + let parent_ids = replacement.invalidation_set().map(|n| n.0).unique(); + let parents = parent_ids + .map(|id| { + if graph.contains_id(id) { + Ok(graph.get_commit(id).clone()) + } else { + Err(InvalidCommit::UnknownParent(id)) + } + }) + .collect::, _>>()?; + let rc = RelRc::with_parents( + replacement.into(), + parents.into_iter().map(|p| (p.into(), ())), + ); + Ok(Self(rc)) + } + + fn as_relrc(&self) -> &RelRc { + &self.0 + } + + fn replacement(&self) -> Option<&PersistentReplacement> { + match self.0.value() { + CommitData::Base(_) => None, + CommitData::Replacement(replacement) => Some(replacement), + } + } + + /// Get the set of nodes invalidated by applying `self`. + fn invalidation_set(&self) -> impl Iterator + '_ { + self.replacement() + .into_iter() + .flat_map(|r| r.invalidation_set()) + } + + delegate! { + to self.0 { + fn value(&self) -> &CommitData; + } + } +} + +impl From for RelRc { + fn from(Commit(data): Commit) -> Self { + data + } +} + +impl<'a> From<&'a RelRc> for &'a Commit { + fn from(rc: &'a RelRc) -> Self { + // SAFETY: Commit is a transparent wrapper around RelRc + unsafe { mem::transmute(rc) } + } +} + +/// A HUGR-like object that tracks its mutation history. +/// +/// When mutations are applied to a [`PersistentHugr`], the object is mutated +/// as expected but all references to previous versions of the object remain +/// valid. Furthermore, older versions of the data can be recovered by +/// traversing the object's history with [`Self::as_state_space`]. +/// +/// Multiple references to various versions of a Hugr can be maintained in +/// parallel by extracting them from a shared [`CommitStateSpace`]. +/// +/// ## Supported access and mutation +/// +/// [`PersistentHugr`] implements [`crate::HugrView`], so that it can used as +/// a drop-in substitute for a Hugr wherever read-only access is required. It +/// does not implement [`HugrMut`](crate::hugr::HugrMut), however. Mutations +/// must be performed by applying patches (see [`PatchVerification`] and +/// [`Patch`]). Currently, only [`SimpleReplacement`] patches are supported. You +/// can use [`Self::add_replacement`] to add a patch to `self`, or use the +/// aforementioned patch traits. +/// +/// ## Patches, commits and history +/// +/// A [`PersistentHugr`] is composed of a unique base HUGR, along with a set of +/// mutations applied to it. All mutations are stored in the form of commits +/// that store the patches applied on top of a base HUGR. You may think of it +/// as a "queue" of patches: whenever the patch of a commit is "applied", it is +/// in reality just added to the queue. In practice, the total order of the +/// queue is irrelevant, as patches only depend on a subset of the previously +/// applied patches. This creates a partial order on the commits: a directed +/// acyclic graph that we call the "commit history". A commit history is in +/// effect a subgraph of a commit state space, with the additional invariant +/// that all commits within the history are compatible. +#[derive(Clone, Debug)] +pub struct PersistentHugr { + /// The state space of all commits. + /// + /// Invariant: all commits are "compatible", meaning that no two patches + /// invalidate the same node. + state_space: CommitStateSpace, +} + +impl PersistentHugr { + /// Create a [`PersistentHugr`] with `hugr` as its base HUGR. + /// + /// All replacements added in the future will apply on top of `hugr`. + pub fn with_base(hugr: Hugr) -> Self { + let state_space = CommitStateSpace::with_base(hugr); + Self { state_space } + } + + /// Create a [`PersistentHugr`] from a list of commits. + /// + /// `Self` will correspond to the HUGR obtained by applying the patches of + /// the given commits and of all their ancestors. + /// + /// If the state space of the commits would include two commits which are + /// incompatible, or if the commits do not share a common base HUGR, then + /// an error is returned. + pub fn try_new(commits: impl IntoIterator) -> Result { + let graph = CommitStateSpace::try_from_commits(commits)?; + graph.try_extract_hugr(graph.all_commit_ids()) + } + + /// Construct a [`PersistentHugr`] from a [`CommitStateSpace`]. + /// + /// Does not check that the commits are compatible. + fn from_state_space_unsafe(state_space: CommitStateSpace) -> Self { + Self { state_space } + } + + /// Add a replacement to `self`. + /// + /// The effect of this is equivalent to applying `replacement` to the + /// equivalent HUGR, i.e. `self.to_hugr().apply(replacement)` is + /// equivalent to `self.add_replacement(replacement).to_hugr()`. + /// + /// This will panic if the replacement is invalid. Use + /// [`PersistentHugr::try_add_replacement`] instead for more graceful error + /// handling. + pub fn add_replacement(&mut self, replacement: PersistentReplacement) -> CommitId { + self.try_add_replacement(replacement) + .expect("invalid replacement") + } + + /// Add a replacement to `self`, with error handling. + /// + /// Return the ID of the commit if it was added successfully. This may + /// return the following errors: + /// - a [`InvalidCommit::IncompatibleHistory`] error if the replacement is + /// incompatible with another commit already in `self`, or + /// - a [`InvalidCommit::UnknownParent`] error if one of the commits that + /// `replacement` applies on top of is not contained in `self`. + pub fn try_add_replacement( + &mut self, + replacement: PersistentReplacement, + ) -> Result { + // Check that `replacement` does not conflict with siblings at any of its + // parents + let new_invalid_nodes = replacement + .subgraph() + .nodes() + .iter() + .map(|&PatchNode(id, node)| (id, node)) + .into_grouping_map() + .collect::>(); + for (parent, new_invalid_nodes) in new_invalid_nodes { + let invalidation_set = self.invalidation_set(parent).collect(); + if let Some(&node) = new_invalid_nodes.intersection(&invalidation_set).next() { + return Err(InvalidCommit::IncompatibleHistory(parent, node)); + } + } + + self.state_space.try_add_replacement(replacement) + } + + /// Convert this `PersistentHugr` to a materialized HUGR by applying all + /// commits in `self`. + /// + /// This operation may be expensive and should be avoided in + /// performance-critical paths. For read-only views into the data, rely + /// instead on the [`crate::HugrView`] implementation when possible. + pub fn to_hugr(&self) -> Hugr { + let mut hugr = self.state_space.base_hugr().clone(); + let mut node_map = BTreeMap::from_iter( + hugr.nodes() + .map(|n| (PatchNode(self.state_space.base(), n), n)), + ); + for commit_id in self.toposort_commits() { + let Some(repl) = self.state_space.get_commit(commit_id).replacement() else { + continue; + }; + let repl = repl.map_host_nodes(|n| node_map[&n]); + + let simple_replace::Outcome { + node_map: new_node_map, + .. + } = repl.apply(&mut hugr).expect("invalid replacement"); + for (old_node, new_node) in new_node_map { + let old_patch_node = PatchNode(commit_id, old_node); + node_map.insert(old_patch_node, new_node); + } + } + hugr + } + + /// Get a reference to the underlying state space of `self`. + pub fn as_state_space(&self) -> &CommitStateSpace { + &self.state_space + } + + /// Convert `self` into its underlying [`CommitStateSpace`]. + pub fn into_state_space(self) -> CommitStateSpace { + self.state_space + } + + delegate! { + to self.state_space { + /// Check if `commit` is in the PersistentHugr. + pub fn contains(&self, commit: &Commit) -> bool; + /// Get the base commit ID. + pub fn base(&self) -> CommitId; + /// Get the base [`Hugr`]. + pub fn base_hugr(&self) -> &Hugr; + /// Get the base commit. + pub fn base_commit(&self) -> &Commit; + } + } + + /// Get all commits in `self` in topological order. + fn toposort_commits(&self) -> Vec { + petgraph::algo::toposort(self.state_space.as_history_graph(), None) + .expect("history is a DAG") + } + + /// Iterator over the commit IDs in the history. + /// + /// The commits are not guaranteed to be in any particular order. + fn commit_ids(&self) -> impl Iterator + '_ { + self.state_space.all_commit_ids() + } + + /// Get the set of nodes of `commit_id` that are invalidated by applying + /// children commits of `commit_id`. + fn invalidation_set(&self, commit_id: CommitId) -> impl Iterator + '_ { + let children = self + .state_space + .children(commit_id) + .filter(|child_id| self.commit_ids().contains(child_id)); + children + .flat_map(move |child_id| self.state_space.invalidation_set(child_id, commit_id)) + .unique() + } +} + +impl IntoIterator for PersistentHugr { + type Item = Commit; + + type IntoIter = vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.state_space + .all_commit_ids() + .map(|id| self.state_space.get_commit(id).clone()) + .collect_vec() + .into_iter() + } +} + +#[cfg(test)] +mod tests; diff --git a/hugr-core/src/hugr/persistent/resolver.rs b/hugr-core/src/hugr/persistent/resolver.rs new file mode 100644 index 000000000..2390a51f2 --- /dev/null +++ b/hugr-core/src/hugr/persistent/resolver.rs @@ -0,0 +1,43 @@ +use relrc::EquivalenceResolver; + +/// A resolver that considers two nodes equivalent if they are the same pointer. +/// +/// Resolvers determine when two patches are equivalent and should be merged +/// in the patch history. +/// +/// This is a trivial resolver (to be expanded on later), that considers two +/// patches equivalent if they point to the same data in memory. +#[derive(Clone, Debug, Default)] +pub struct PointerEqResolver; + +impl EquivalenceResolver for PointerEqResolver { + type MergeMapping = (); + + type DedupKey = *const N; + + fn id(&self) -> String { + "PointerEqResolver".to_string() + } + + fn dedup_key(&self, value: &N, _incoming_edges: &[&E]) -> Self::DedupKey { + value as *const N + } + + fn try_merge_mapping( + &self, + a_value: &N, + _a_incoming_edges: &[&E], + b_value: &N, + _b_incoming_edges: &[&E], + ) -> Result { + if std::ptr::eq(a_value, b_value) { + Ok(()) + } else { + Err(relrc::resolver::NotEquivalent) + } + } + + fn move_edge_source(&self, _mapping: &Self::MergeMapping, edge: &E) -> E { + edge.clone() + } +} diff --git a/hugr-core/src/hugr/persistent/state_space.rs b/hugr-core/src/hugr/persistent/state_space.rs new file mode 100644 index 000000000..cb172cb3f --- /dev/null +++ b/hugr-core/src/hugr/persistent/state_space.rs @@ -0,0 +1,289 @@ +use std::collections::{BTreeSet, VecDeque}; + +use delegate::delegate; +use derive_more::From; +use itertools::Itertools; +use relrc::{HistoryGraph, RelRc}; +use thiserror::Error; + +use super::{Commit, PersistentHugr, PersistentReplacement, PointerEqResolver}; +use crate::{Hugr, Node}; + +/// A copyable handle to a [`Commit`] vertex within a [`CommitStateSpace`] +pub(super) type CommitId = relrc::NodeId; + +/// A HUGR node within a commit of the commit state space +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug, Hash)] +pub struct PatchNode(pub CommitId, pub Node); + +impl std::fmt::Display for PatchNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } +} + +/// The data stored in a [`Commit`], either the base [`Hugr`] (on which all +/// other commits apply), or a [`PersistentReplacement`] +#[derive(Debug, Clone, From)] +pub(super) enum CommitData { + Base(Hugr), + Replacement(PersistentReplacement), +} + +/// A set of commits with directed (acyclic) dependencies between them. +/// +/// Vertices in the [`CommitStateSpace`] are [`Commit`]s and there is an edge +/// from a commit `c1` to a commit `c2` if `c1` must be applied before `c2`: +/// in other words, if `c2` deletes nodes that are introduced in `c1`. We say +/// `c2` depends on (or is a child of) `c1`. +/// +/// A [`CommitStateSpace`] always has a unique base commit (the root of the +/// graph). All other commits are [`PersistentReplacement`]s that apply on top +/// of it. Commits are stored as [`RelRc`]s: they are reference-counted pointers +/// to the patch data that also maintain strong references to the commit's +/// parents. This means that commits can be cloned cheaply and dropped freely; +/// the memory of a commit will be released whenever no other commit in scope +/// depends on it. +/// +/// Commits in a [`CommitStateSpace`] DO NOT represent a valid history in the +/// general case: pairs of commits may be mutually exclusive if they modify the +/// same subgraph. Use [`Self::try_extract_hugr`] to get a [`PersistentHugr`] +/// with a set of compatible commits. +#[derive(Clone, Debug)] +pub struct CommitStateSpace { + /// A set of commits with directed (acyclic) dependencies between them. + /// + /// Each commit is stored as a [`RelRc`]. + graph: HistoryGraph, + /// The unique root of the commit graph. + /// + /// The only commit in the graph with variant [`CommitData::Base`]. All + /// other commits are [`CommitData::Replacement`]s, and are descendants + /// of this. + base_commit: CommitId, +} + +impl CommitStateSpace { + /// Create a new commit state space with a single base commit. + pub fn with_base(hugr: Hugr) -> Self { + let commit = RelRc::new(CommitData::Base(hugr)); + let graph = HistoryGraph::new([commit.clone()], PointerEqResolver); + let base_commit = graph + .all_node_ids() + .exactly_one() + .ok() + .expect("graph has exactly one commit (just added)"); + Self { graph, base_commit } + } + + /// Create a new commit state space from a set of commits. + /// + /// Return a [`InvalidCommit::NonUniqueBase`] error if the commits do + /// not share a unique common ancestor base commit. + pub fn try_from_commits( + commits: impl IntoIterator, + ) -> Result { + let graph = HistoryGraph::new(commits.into_iter().map_into(), PointerEqResolver); + let base_commits = graph + .all_node_ids() + .filter(|&id| matches!(graph.get_node(id).value(), CommitData::Base(_))) + .collect_vec(); + let base_commit = base_commits + .into_iter() + .exactly_one() + .map_err(|err| InvalidCommit::NonUniqueBase(err.len()))?; + Ok(Self { graph, base_commit }) + } + + /// Add a replacement commit to the graph. + /// + /// Return an [`InvalidCommit::EmptyReplacement`] error if the replacement + /// is empty. + pub fn try_add_replacement( + &mut self, + replacement: PersistentReplacement, + ) -> Result { + let commit = Commit::try_from_replacement(replacement, self)?; + self.try_add_commit(commit) + } + + /// Add a set of commits to the state space. + /// + /// Commits must be valid replacement commits or coincide with the existing + /// base commit. + pub fn extend(&mut self, commits: impl IntoIterator) { + // TODO: make this more efficient + for commit in commits { + self.try_add_commit(commit) + .expect("invalid commit in extend"); + } + } + + /// Add a commit to the state space. + /// + /// Returns an [`InvalidCommit::NonUniqueBase`] error if the commit is a + /// base commit and does not coincide with the existing base commit. + pub fn try_add_commit(&mut self, commit: Commit) -> Result { + if matches!(commit.value(), CommitData::Base(_) if !commit.0.ptr_eq(&self.base_commit().0)) + { + return Err(InvalidCommit::NonUniqueBase(2)); + } + let commit = commit.into(); + Ok(self.graph.insert_node(commit)) + } + + /// Check if `commit` is in the commit state space. + pub fn contains(&self, commit: &Commit) -> bool { + self.graph.contains(commit.as_relrc()) + } + + /// Check if `commit_id` is in the commit state space. + pub fn contains_id(&self, commit_id: CommitId) -> bool { + self.graph.contains_id(commit_id) + } + + /// Extract a `PersistentHugr` from this state space, consisting of + /// `commits` and their ancestors. + /// + /// All commits in the resulting `PersistentHugr` are guaranteed to be + /// compatible. If the selected commits would include two commits which + /// are incompatible, a [`InvalidCommit::IncompatibleHistory`] error is + /// returned. + pub fn try_extract_hugr( + &self, + commits: impl IntoIterator, + ) -> Result { + // Define commits as the set of all ancestors of the given commits + let commits = get_all_ancestors(&self.graph, commits); + + // Check that all commits are compatible + for &commit_id in &commits { + if let Some(node) = find_conflicting_node(self, commit_id, &commits) { + return Err(InvalidCommit::IncompatibleHistory(commit_id, node)); + } + } + + let commits = commits + .into_iter() + .map(|id| self.get_commit(id).as_relrc().clone()); + let subgraph = HistoryGraph::new(commits, PointerEqResolver); + + Ok(PersistentHugr::from_state_space_unsafe(Self { + graph: subgraph, + base_commit: self.base_commit, + })) + } + + /// Get the base commit ID. + pub fn base(&self) -> CommitId { + self.base_commit + } + + /// Get the base [`Hugr`]. + pub fn base_hugr(&self) -> &Hugr { + let CommitData::Base(hugr) = self.graph.get_node(self.base_commit).value() else { + panic!("base commit is not a base hugr"); + }; + hugr + } + + /// Get the base commit. + pub fn base_commit(&self) -> &Commit { + self.get_commit(self.base_commit) + } + + /// Get the commit with ID `commit_id`. + pub fn get_commit(&self, commit_id: CommitId) -> &Commit { + self.graph.get_node(commit_id).into() + } + + /// Get an iterator over all commit IDs in the state space. + pub fn all_commit_ids(&self) -> impl Iterator + '_ { + self.graph.all_node_ids() + } + + /// Get the set of nodes invalidated by `commit_id` in `parent`. + pub(super) fn invalidation_set( + &self, + commit_id: CommitId, + parent: CommitId, + ) -> impl Iterator + '_ { + let commit = self.get_commit(commit_id); + let ret = commit + .invalidation_set() + .filter(move |n| n.0 == parent) + .map(|n| n.1); + Some(ret).into_iter().flatten() + } + + delegate! { + to self.graph { + /// Get the parents of `commit_id` + pub fn parents(&self, commit_id: CommitId) -> impl Iterator + '_; + /// Get the children of `commit_id` + pub fn children(&self, commit_id: CommitId) -> impl Iterator + '_; + } + } + + pub(super) fn as_history_graph(&self) -> &HistoryGraph { + &self.graph + } +} + +fn get_all_ancestors( + graph: &HistoryGraph, + commits: impl IntoIterator, +) -> BTreeSet { + let mut queue = VecDeque::from_iter(commits); + let mut ancestors = BTreeSet::from_iter(queue.iter().copied()); + while let Some(commit_id) = queue.pop_front() { + for parent in graph.parents(commit_id) { + if ancestors.insert(parent) { + queue.push_back(parent); + } + } + } + ancestors +} + +/// An error that occurs when trying to add a commit to a commit state space. +#[derive(Debug, Clone, Error)] +#[non_exhaustive] +pub enum InvalidCommit { + /// The commit conflicts with existing commits in the state space. + #[error("Incompatible history: children of commit {0:?} conflict in {1:?}")] + IncompatibleHistory(CommitId, Node), + + /// The commit has a parent not present in the state space. + #[error("Missing parent commit: {0:?}")] + UnknownParent(CommitId), + + /// The commit is not a replacement. + #[error("Commit is not a replacement")] + NotReplacement, + + /// The set of commits contains zero or more than one base commit. + #[error("{0} base commits found (should be 1)")] + NonUniqueBase(usize), + + /// The commit is an empty replacement. + #[error("Not allowed: empty replacement")] + EmptyReplacement, +} + +/// Find a node that is invalidated by more than one child of `commit_id`. +fn find_conflicting_node( + graph: &CommitStateSpace, + commit_id: CommitId, + commits: &BTreeSet, +) -> Option { + let mut all_invalidated = BTreeSet::new(); + let mut children = graph + .children(commit_id) + .filter(|&child_id| commits.contains(&child_id)); + + children.find_map(|child_id| { + let mut new_invalidated = graph.invalidation_set(child_id, commit_id); + new_invalidated.find(|&n| !all_invalidated.insert(n)) + }) +} diff --git a/hugr-core/src/hugr/persistent/tests.rs b/hugr-core/src/hugr/persistent/tests.rs new file mode 100644 index 000000000..5925f9d0f --- /dev/null +++ b/hugr-core/src/hugr/persistent/tests.rs @@ -0,0 +1,376 @@ +use std::collections::{BTreeMap, HashMap}; + +use rstest::*; + +use crate::{ + builder::{inout_sig, DFGBuilder, Dataflow, DataflowHugr}, + extension::prelude::bool_t, + hugr::{patch::Patch, persistent::PatchNode, views::SiblingSubgraph, Hugr, HugrView}, + ops::handle::NodeHandle, + std_extensions::logic::LogicOp, + IncomingPort, Node, OutgoingPort, SimpleReplacement, +}; + +use super::{state_space::CommitId, CommitStateSpace}; + +/// Creates a simple test Hugr with a DFG that contains a small boolean circuit +/// +/// Graph structure: +/// ``` +/// ┌─────────┐ +/// ───┤ (0) NOT ├─┐ ┌─────────┐ +/// └─────────┘ └────┤ │ +/// ┌─────────┐ │ (2) AND ├─── +/// ───┤ (1) NOT ├──────┤ │ +/// └─────────┘ └─────────┘ +/// ``` +/// +/// Returns (Hugr, [not0_node, not1_node, and_node]) +fn simple_hugr() -> (Hugr, [Node; 3]) { + let mut dfg_builder = + DFGBuilder::new(inout_sig(vec![bool_t(), bool_t()], vec![bool_t()])).unwrap(); + + let [b0, b1] = dfg_builder.input_wires_arr(); + + let not0 = dfg_builder.add_dataflow_op(LogicOp::Not, vec![b0]).unwrap(); + let [b0_not] = not0.outputs_arr(); + + let not1 = dfg_builder.add_dataflow_op(LogicOp::Not, vec![b1]).unwrap(); + let [b1_not] = not1.outputs_arr(); + + let and = dfg_builder + .add_dataflow_op(LogicOp::And, vec![b0_not, b1_not]) + .unwrap(); + + let hugr = dfg_builder.finish_hugr_with_outputs(and.outputs()).unwrap(); + + (hugr, [not0.node(), not1.node(), and.node()]) +} + +/// Creates a replacement that replaces a node with a sequence of two NOT gates +fn create_double_not_replacement(hugr: &Hugr, node_to_replace: Node) -> SimpleReplacement { + // Create a simple hugr with two NOT gates in sequence + let mut dfg_builder = DFGBuilder::new(inout_sig(vec![bool_t()], vec![bool_t()])).unwrap(); + let [input_wire] = dfg_builder.input_wires_arr(); + + // Add first NOT gate + let not1 = dfg_builder + .add_dataflow_op(LogicOp::Not, vec![input_wire]) + .unwrap(); + let [not1_out] = not1.outputs_arr(); + + // Add second NOT gate + let not2 = dfg_builder + .add_dataflow_op(LogicOp::Not, vec![not1_out]) + .unwrap(); + let [not2_out] = not2.outputs_arr(); + + let replacement_hugr = dfg_builder.finish_hugr_with_outputs([not2_out]).unwrap(); + + // Create the mappings + let mut nu_inp = HashMap::new(); + nu_inp.insert( + (not1.node(), IncomingPort::from(0)), + (node_to_replace, IncomingPort::from(0)), + ); + + let mut nu_out = HashMap::new(); + nu_out.insert( + (node_to_replace, OutgoingPort::from(0)), + IncomingPort::from(0), + ); + + // Create the subgraph with the single node + let subgraph = SiblingSubgraph::try_from_nodes(vec![node_to_replace], hugr).unwrap(); + + // Create the replacement + SimpleReplacement::new(subgraph, replacement_hugr, nu_inp, nu_out) +} + +/// Creates a replacement that replaces the unique AND gate in `hugr` and its +/// predecessor NOT gate on 1st input with an XOR gate +fn create_not_and_to_xor_replacement(hugr: &Hugr) -> SimpleReplacement { + // Create second replacement that replaces the second NOT gate from the first + // replacement + // Find the AND gate in the hugr + let and_gate = hugr + .nodes() + .find(|&n| hugr.get_optype(n) == &LogicOp::And.into()) + .unwrap(); + // The NOT gate before the AND on its first input + let not_node = hugr.input_neighbours(and_gate).next().unwrap(); + + // Create a replacement for the AND and the NOT0 with an XOR gate + let mut dfg_builder = + DFGBuilder::new(inout_sig(vec![bool_t(), bool_t()], vec![bool_t()])).unwrap(); + let [in1, in2] = dfg_builder.input_wires_arr(); + + // Add an XOR gate + let xor_op = dfg_builder + .add_dataflow_op(LogicOp::Xor, vec![in1, in2]) + .unwrap(); + + let replacement_hugr = dfg_builder + .finish_hugr_with_outputs(xor_op.outputs()) + .unwrap(); + + // Create mappings for the inputs + let mut nu_inp = HashMap::new(); + + // Map the first input of XOR to the input of the NOT gate + nu_inp.insert( + (xor_op.node(), IncomingPort::from(0)), + (not_node, IncomingPort::from(0)), + ); + + // Map the second input of XOR to the second input of the AND gate + nu_inp.insert( + (xor_op.node(), IncomingPort::from(1)), + (and_gate, IncomingPort::from(1)), + ); + + // Output mapping - AND gate's output to XOR's output + let mut nu_out = HashMap::new(); + nu_out.insert((and_gate, OutgoingPort::from(0)), IncomingPort::from(0)); + + // Create subgraph with both the AND gate and NOT0 node + let subgraph = SiblingSubgraph::try_from_nodes(vec![not_node, and_gate], &hugr).unwrap(); + + SimpleReplacement::new(subgraph, replacement_hugr, nu_inp, nu_out) +} + +#[fixture] +pub(super) fn test_state_space() -> (CommitStateSpace, [CommitId; 4]) { + let (base_hugr, [not0_node, not1_node, _and_node]) = simple_hugr(); + + let mut state_space = CommitStateSpace::with_base(base_hugr); + + // Create first replacement (replace NOT0 with two NOT gates) + let replacement1 = create_double_not_replacement(state_space.base_hugr(), not0_node); + + // Add first commit to state space, replacing NOT0 with two NOT gates + let commit1 = { + let to_patch_node = |n: Node| PatchNode(state_space.base(), n); + // translate replacement1 to patch nodes in the base commit of the state space + let replacement1 = replacement1.map_host_nodes(to_patch_node); + state_space.try_add_replacement(replacement1).unwrap() + }; + + // Add second commit to state space, that applies on top of `commit1` and + // replaces the second NOT gate and the (original) AND gate with an XOR gate + let commit2 = { + // Create second replacement (replace NOT+AND with XOR) that applies on + // the result of the first + let mut direct_hugr = state_space.base_hugr().clone(); + let node_map = replacement1 + .clone() + .apply(&mut direct_hugr) + .unwrap() + .node_map; + let replacement2 = create_not_and_to_xor_replacement(&direct_hugr); + + // The hard part: figure out the node map between nodes in `direct_hugr` + // and nodes in the state space + let inv_node_map = { + let mut inv = BTreeMap::new(); + for (repl_node, hugr_node) in node_map { + inv.insert(hugr_node, repl_node); + } + inv + }; + let to_patch_node = { + let base_commit = state_space.base(); + move |n| { + if let Some(&n) = inv_node_map.get(&n) { + // node was replaced by commit1 + PatchNode(commit1, n) + } else { + // node is in base hugr + PatchNode(base_commit, n) + } + } + }; + + // translate replacement2 to patch nodes + let replacement2 = replacement2.map_host_nodes(to_patch_node); + state_space.try_add_replacement(replacement2).unwrap() + }; + + // Create a third commit that will conflict with `commit1`, replacing NOT0 + // and AND with XOR + let commit3 = { + let replacement3 = create_not_and_to_xor_replacement(state_space.base_hugr()); + let to_patch_node = |n: Node| PatchNode(state_space.base(), n); + state_space + .try_add_replacement(replacement3.map_host_nodes(to_patch_node)) + .unwrap() + }; + + // Create a fourth commit that is disjoint from `commit1`, replacing NOT1 + // with two NOT gates + let commit4 = { + let replacement4 = create_double_not_replacement(state_space.base_hugr(), not1_node); + let to_patch_node = |n: Node| PatchNode(state_space.base(), n); + let replacement4 = replacement4.map_host_nodes(to_patch_node); + state_space.try_add_replacement(replacement4).unwrap() + }; + + (state_space, [commit1, commit2, commit3, commit4]) +} + +#[rstest] +fn test_successive_replacements(test_state_space: (CommitStateSpace, [CommitId; 4])) { + let (state_space, [commit1, commit2, _commit3, _commit4]) = test_state_space; + let (mut hugr, [not0_node, _not1_node, _and_node]) = simple_hugr(); + + // Apply first replacement (replace NOT0 with two NOT gates) + let replacement1 = create_double_not_replacement(&hugr, not0_node); + replacement1.clone().apply(&mut hugr).unwrap(); + + // Apply second replacement (replace NOT+AND with XOR) + let replacement2 = create_not_and_to_xor_replacement(&hugr); + replacement2.clone().apply(&mut hugr).unwrap(); + + // Create a persistent hugr + let persistent_hugr = state_space + .try_extract_hugr([commit1, commit2]) + .expect("commit1 and commit2 are compatible"); + + // Get the final hugr from the persistent context + let persistent_final_hugr = persistent_hugr.to_hugr(); + + // Check we have the expected number of patches (original + 2 replacements) + assert_eq!(persistent_hugr.commit_ids().count(), 3); + + assert_eq!(hugr.validate(), Ok(())); + assert_eq!(persistent_final_hugr.validate(), Ok(())); + // TODO: use node-invariant equivalence check, e.g. hash-based comparison + assert_eq!( + hugr.mermaid_string(), + persistent_final_hugr.mermaid_string() + ); +} + +#[rstest] +fn test_conflicting_replacements(test_state_space: (CommitStateSpace, [CommitId; 4])) { + let (state_space, [commit1, _commit2, commit3, _commit4]) = test_state_space; + let (hugr, [not0_node, _not1_node, _and_node]) = simple_hugr(); + + // Apply first replacement directly to a clone + let hugr1 = { + let mut hugr = hugr.clone(); + let replacement1 = create_double_not_replacement(&hugr, not0_node); + replacement1.apply(&mut hugr).unwrap(); + hugr + }; + + // Apply second replacement directly to another clone + let hugr2 = { + let mut hugr = hugr.clone(); + let replacement2 = create_not_and_to_xor_replacement(&hugr); + replacement2.apply(&mut hugr).unwrap(); + hugr + }; + + // Create a persistent hugr and add first replacement + let persistent_hugr1 = state_space.try_extract_hugr([commit1]).unwrap(); + + // Create another persistent hugr and add second replacement + let persistent_hugr2 = state_space.try_extract_hugr([commit3]).unwrap(); + + // Both individual replacements should be valid + assert_eq!(persistent_hugr1.to_hugr().validate(), Ok(())); + assert_eq!(persistent_hugr2.to_hugr().validate(), Ok(())); + + // But trying to create a history with both replacements should fail + let common_state_space = { + let mut space = persistent_hugr1.clone().into_state_space(); + space.extend(persistent_hugr2.clone()); + space + }; + assert_eq!(common_state_space.all_commit_ids().count(), 3); + let result = common_state_space.try_extract_hugr(common_state_space.all_commit_ids()); + assert!( + result.is_err(), + "Creating history with conflicting patches should fail" + ); + + // TODO: use node-invariant equivalence check, e.g. hash-based comparison + assert_eq!( + hugr1.mermaid_string(), + persistent_hugr1.to_hugr().mermaid_string() + ); + + // TODO: use node-invariant equivalence check, e.g. hash-based comparison + assert_eq!( + hugr2.mermaid_string(), + persistent_hugr2.to_hugr().mermaid_string() + ); +} + +#[rstest] +fn test_disjoint_replacements(test_state_space: (CommitStateSpace, [CommitId; 4])) { + let (state_space, [commit1, _commit2, _commit3, commit4]) = test_state_space; + let (mut hugr, [not0_node, not1_node, _and_node]) = simple_hugr(); + + // Create and apply non-overlapping replacements for NOT0 and NOT1 + let replacement1 = create_double_not_replacement(&hugr, not0_node); + let replacement2 = create_double_not_replacement(&hugr, not1_node); + replacement1.clone().apply(&mut hugr).unwrap(); + replacement2.clone().apply(&mut hugr).unwrap(); + + // Create a persistent hugr and add both replacements + let persistent_hugr = state_space.try_extract_hugr([commit1, commit4]).unwrap(); + + // Get the final hugr + let persistent_final_hugr = persistent_hugr.to_hugr(); + + // Both hugrs should be valid + assert_eq!(hugr.validate(), Ok(())); + assert_eq!(persistent_final_hugr.validate(), Ok(())); + + // We should have 3 patches (base + 2 replacements) + assert_eq!(persistent_hugr.commit_ids().count(), 3); + + // TODO: use node-invariant equivalence check, e.g. hash-based comparison + assert_eq!( + hugr.mermaid_string(), + persistent_final_hugr.mermaid_string() + ); +} + +#[rstest] +fn test_try_add_replacement(test_state_space: (CommitStateSpace, [CommitId; 4])) { + let (state_space, [commit1, commit2, commit3, commit4]) = test_state_space; + + // Create a persistent hugr and add first replacement + let persistent_hugr = state_space.try_extract_hugr([commit1, commit2]).unwrap(); + + { + let mut persistent_hugr = persistent_hugr.clone(); + let repl4 = state_space.get_commit(commit4).replacement().unwrap(); + let result = persistent_hugr.try_add_replacement(repl4.clone()); + assert!( + result.is_ok(), + "[commit1, commit2] + [commit4] are compatible. Got {:?}", + result + ); + let hugr = persistent_hugr.to_hugr(); + let exp_hugr = state_space + .try_extract_hugr([commit1, commit2, commit4]) + .unwrap() + .to_hugr(); + assert_eq!(hugr.mermaid_string(), exp_hugr.mermaid_string()); + } + + { + let mut persistent_hugr = persistent_hugr.clone(); + let repl3 = state_space.get_commit(commit3).replacement().unwrap(); + let result = persistent_hugr.try_add_replacement(repl3.clone()); + assert!( + result.is_err(), + "[commit1, commit2] + [commit3] are incompatible. Got {:?}", + result + ); + } +} diff --git a/hugr-core/src/hugr/views/sibling_subgraph.rs b/hugr-core/src/hugr/views/sibling_subgraph.rs index 5402de65d..44055cb41 100644 --- a/hugr-core/src/hugr/views/sibling_subgraph.rs +++ b/hugr-core/src/hugr/views/sibling_subgraph.rs @@ -356,6 +356,32 @@ impl SiblingSubgraph { hugr.get_parent(self.nodes[0]).expect("invalid subgraph") } + /// Map the nodes in the subgraph according to `node_map`. + /// + /// This does not check convexity. It is up to the caller to ensure that + /// the mapped subgraph obtained from this is convex in the new Hugr. + pub(crate) fn map_nodes( + &self, + node_map: impl Fn(N) -> N2, + ) -> SiblingSubgraph { + let nodes = self.nodes.iter().map(|&n| node_map(n)).collect_vec(); + let inputs = self + .inputs + .iter() + .map(|part| part.iter().map(|&(n, p)| (node_map(n), p)).collect_vec()) + .collect_vec(); + let outputs = self + .outputs + .iter() + .map(|&(n, p)| (node_map(n), p)) + .collect_vec(); + SiblingSubgraph { + nodes, + inputs, + outputs, + } + } + /// Construct a [`SimpleReplacement`] to replace `self` with `replacement`. /// /// `replacement` must be a hugr with DFG root and its signature must