diff --git a/src/merkle/mod.rs b/src/merkle/mod.rs index d9c3a2da..fa391087 100644 --- a/src/merkle/mod.rs +++ b/src/merkle/mod.rs @@ -5,7 +5,9 @@ use std::hash::BuildHasher; pub use self::proof::{MerkleProof, MerkleProofBuilder, MerkleProofExtBuilder, MerkleProofRef}; pub use self::pruned_branch::make_pruned_branch; -pub use self::update::{MerkleUpdate, MerkleUpdateBuilder}; +pub use self::update::{ + MerkleApplyResult, MerkleBuildResult, MerkleStats, MerkleUpdate, MerkleUpdateBuilder, +}; use crate::cell::{HashBytes, UsageTree, UsageTreeWithSubtrees}; mod proof; diff --git a/src/merkle/proof.rs b/src/merkle/proof.rs index 776d59c4..a0963f0b 100644 --- a/src/merkle/proof.rs +++ b/src/merkle/proof.rs @@ -394,7 +394,7 @@ where pub fn build_raw_ext<'c: 'a>( self, context: &'c dyn CellContext, - ) -> Result<(Cell, ahash::HashSet<&'a HashBytes>), Error> { + ) -> Result<(Cell, ahash::HashSet<&'a HashBytes>, usize), Error> { let mut pruned_branches = Default::default(); let mut builder = BuilderImpl { root: self.root, @@ -406,7 +406,8 @@ where prune_big_cells: self.prune_big_cells, }; let cell = ok!(builder.build()); - Ok((cell, pruned_branches)) + let cells_count = builder.cells.len(); + Ok((cell, pruned_branches, cells_count)) } } @@ -420,7 +421,14 @@ where self, context: &'c (dyn CellContext + Send + Sync), split_at: ahash::HashSet, - ) -> Result<(Cell, dashmap::DashSet<&'a HashBytes, ahash::RandomState>), Error> { + ) -> Result< + ( + Cell, + dashmap::DashSet<&'a HashBytes, ahash::RandomState>, + usize, + ), + Error, + > { let pruned_branches = Default::default(); let builder = ParBuilderImpl { root: self.root, @@ -433,7 +441,8 @@ where prune_big_cells: self.prune_big_cells, }; let cell = ok!(builder.build()); - Ok((cell, pruned_branches)) + let cells_count = builder.cells.len(); + Ok((cell, pruned_branches, cells_count)) } } diff --git a/src/merkle/update.rs b/src/merkle/update.rs index d2648987..6dbf6de2 100644 --- a/src/merkle/update.rs +++ b/src/merkle/update.rs @@ -147,18 +147,32 @@ impl MerkleUpdate { /// Tries to apply this Merkle update to the specified cell, /// producing a new cell and using an empty cell context. pub fn apply(&self, old: &Cell) -> Result { - self.apply_ext(old, Cell::empty_context()) + self.apply_ext_with_stats(old, Cell::empty_context()) + .map(|r| r.cell) } /// Tries to apply this Merkle update to the specified cell, - /// producing a new cell and using an empty cell context. - pub fn apply_ext(&self, old: &Cell, context: &dyn CellContext) -> Result { + /// producing a new cell with stats and using an empty cell context. + pub fn apply_with_stats(&self, old: &Cell) -> Result { + self.apply_ext_with_stats(old, Cell::empty_context()) + } + + /// Tries to apply this Merkle update to the specified cell, + /// producing a new cell with stats and using an empty cell context. + pub fn apply_ext_with_stats( + &self, + old: &Cell, + context: &dyn CellContext, + ) -> Result { if old.as_ref().repr_hash() != &self.old_hash { return Err(Error::InvalidData); } if self.old_hash == self.new_hash { - return Ok(old.clone()); + return Ok(MerkleApplyResult { + cell: old.clone(), + stats: MerkleStats::default(), + }); } struct Applier<'a> { @@ -263,15 +277,22 @@ impl MerkleUpdate { }; // Apply changed cells - let new = Applier { + let mut applier = Applier { old_cells, new_cells: Default::default(), context, - } - .run(self.new.as_ref(), 0)?; + }; + + let new = applier.run(self.new.as_ref(), 0)?; if new.as_ref().repr_hash() == &self.new_hash { - Ok(new) + // Note: +1 for root + let new_cells_count = applier.new_cells.len() + 1; + + Ok(MerkleApplyResult { + cell: new, + stats: MerkleStats { new_cells_count }, + }) } else { Err(Error::InvalidData) } @@ -284,23 +305,37 @@ impl MerkleUpdate { old: &Cell, old_split_at: &ahash::HashSet, ) -> Result { - self.par_apply_ext(old, old_split_at, Cell::empty_context()) + self.par_apply_ext_with_stats(old, old_split_at, Cell::empty_context()) + .map(|r| r.cell) } - /// Tries to apply Merkle update in parallel + /// Tries to apply Merkle update in parallel with stats #[cfg(all(feature = "rayon", feature = "sync"))] - pub fn par_apply_ext( + pub fn par_apply_with_stats( + &self, + old: &Cell, + old_split_at: &ahash::HashSet, + ) -> Result { + self.par_apply_ext_with_stats(old, old_split_at, Cell::empty_context()) + } + + /// Tries to apply Merkle update in parallel with collecting stats + #[cfg(all(feature = "rayon", feature = "sync"))] + pub fn par_apply_ext_with_stats( &self, old: &Cell, old_split_at: &ahash::HashSet, context: &(dyn CellContext + Send + Sync), - ) -> Result { + ) -> Result { if old.as_ref().repr_hash() != &self.old_hash { return Err(Error::InvalidData); } if self.old_hash == self.new_hash { - return Ok(old.clone()); + return Ok(MerkleApplyResult { + cell: old.clone(), + stats: MerkleStats::default(), + }); } struct Applier<'a> { @@ -512,7 +547,7 @@ impl MerkleUpdate { }; // Apply changed cells - let new = { + let (new, new_cells_count) = { let applier = Applier { old_cells, new_cells: Default::default(), @@ -520,11 +555,19 @@ impl MerkleUpdate { }; let new = rayon::scope(|scope| applier.run(self.new.as_ref(), 0, 0, Some(scope)))?; - new.resolve(context)? + let cell = new.resolve(context)?; + + // Note: +1 for root + let count = applier.new_cells.len() + 1; + + (cell, count) }; if new.as_ref().repr_hash() == &self.new_hash { - Ok(new) + Ok(MerkleApplyResult { + cell: new, + stats: MerkleStats { new_cells_count }, + }) } else { Err(Error::InvalidData) } @@ -902,6 +945,31 @@ impl MerkleUpdate { } } +/// Metadata collected during `MerkleUpdate` operations. +#[derive(Debug, Clone, Copy, Default)] +pub struct MerkleStats { + /// Number of new (not pruned) cells. + pub new_cells_count: usize, +} + +/// Result of applying a Merkle update with metadata. +#[derive(Debug, Clone)] +pub struct MerkleApplyResult { + /// The new cell. + pub cell: Cell, + /// Metadata. + pub stats: MerkleStats, +} + +/// Result of building a Merkle update with metadata. +#[derive(Debug, Clone)] +pub struct MerkleBuildResult { + /// The Merkle update. + pub update: MerkleUpdate, + /// Metadata. + pub stats: MerkleStats, +} + /// Helper struct to build a Merkle update. pub struct MerkleUpdateBuilder<'a, F> { old: &'a DynCell, @@ -923,8 +991,11 @@ where } } - /// Builds a Merkle update using the specified cell context. - pub fn build_ext(self, context: &dyn CellContext) -> Result { + /// Builds a Merkle update with stats using the specified cell context. + pub fn build_ext_with_stats( + self, + context: &dyn CellContext, + ) -> Result { BuilderImpl { old: self.old, new: self.new, @@ -936,7 +1007,13 @@ where /// Builds a Merkle update using an empty cell context. pub fn build(self) -> Result { - self.build_ext(Cell::empty_context()) + self.build_ext_with_stats(Cell::empty_context()) + .map(|r| r.update) + } + + /// Builds a Merkle update with stats using an empty cell context. + pub fn build_with_stats(self) -> Result { + self.build_ext_with_stats(Cell::empty_context()) } } @@ -945,14 +1022,14 @@ impl<'a, F> MerkleUpdateBuilder<'a, F> where F: MerkleFilter + Send + Sync, { - /// Multithread build of a Merkle update using the specified cell context + /// Multithread build of a Merkle update with stats using the specified cell context /// and sets of cells which to handle in parallel. - pub fn par_build_ext( + pub fn par_build_ext_with_stats( self, old_split_at: ahash::HashSet, new_split_at: ahash::HashSet, context: &(dyn CellContext + Send + Sync), - ) -> Result { + ) -> Result { ParBuilderImpl { old: self.old, new: self.new, @@ -969,7 +1046,18 @@ where old_split_at: ahash::HashSet, new_split_at: ahash::HashSet, ) -> Result { - self.par_build_ext(old_split_at, new_split_at, Cell::empty_context()) + self.par_build_ext_with_stats(old_split_at, new_split_at, Cell::empty_context()) + .map(|r| r.update) + } + + /// Multithread build of a Merkle update with stats using the default cell context + /// and sets of cells which to handle in parallel. + pub fn par_build_with_stats( + self, + old_split_at: ahash::HashSet, + new_split_at: ahash::HashSet, + ) -> Result { + self.par_build_ext_with_stats(old_split_at, new_split_at, Cell::empty_context()) } } @@ -981,7 +1069,7 @@ struct BuilderImpl<'a, 'b, 'c: 'a> { } impl<'a: 'b, 'b, 'c: 'a> BuilderImpl<'a, 'b, 'c> { - fn build(self) -> Result { + fn build(self) -> Result { struct Resolver<'a> { pruned_branches: ahash::HashSet<&'a HashBytes>, visited: ahash::HashMap<&'a HashBytes, bool>, @@ -1054,18 +1142,21 @@ impl<'a: 'b, 'b, 'c: 'a> BuilderImpl<'a, 'b, 'c> { // Handle the simplest case with empty Merkle update if old_hash == new_hash { let pruned = ok!(make_pruned_branch(self.old, 0, self.context)); - return Ok(MerkleUpdate { - old_hash: *old_hash, - new_hash: *old_hash, - old_depth, - new_depth: old_depth, - old: pruned.clone(), - new: pruned, + return Ok(MerkleBuildResult { + update: MerkleUpdate { + old_hash: *old_hash, + new_hash: *old_hash, + old_depth, + new_depth: old_depth, + old: pruned.clone(), + new: pruned, + }, + stats: MerkleStats::default(), }); } // Create Merkle proof cell which contains only new cells - let (new, pruned_branches) = ok! { + let (new, pruned_branches, new_cells_count) = ok! { MerkleProofBuilder::<_>::new( self.new, InvertedFilter(self.filter) @@ -1100,13 +1191,16 @@ impl<'a: 'b, 'b, 'c: 'a> BuilderImpl<'a, 'b, 'c> { }; // Done - Ok(MerkleUpdate { - old_hash: *old_hash, - new_hash: *new_hash, - old_depth, - new_depth, - old, - new, + Ok(MerkleBuildResult { + update: MerkleUpdate { + old_hash: *old_hash, + new_hash: *new_hash, + old_depth, + new_depth, + old, + new, + }, + stats: MerkleStats { new_cells_count }, }) } } @@ -1125,7 +1219,7 @@ impl<'a: 'b, 'b, 'c: 'a> ParBuilderImpl<'a, 'b, 'c> { self, old_split_at: ahash::HashSet, new_split_at: ahash::HashSet, - ) -> Result { + ) -> Result { enum CheckResult<'a> { Immediate(bool), Parts { @@ -1334,18 +1428,21 @@ impl<'a: 'b, 'b, 'c: 'a> ParBuilderImpl<'a, 'b, 'c> { // Handle the simplest case with empty Merkle update if old_hash == new_hash { let pruned = ok!(make_pruned_branch(self.old, 0, self.context)); - return Ok(MerkleUpdate { - old_hash: *old_hash, - new_hash: *old_hash, - old_depth, - new_depth: old_depth, - old: pruned.clone(), - new: pruned, + return Ok(MerkleBuildResult { + update: MerkleUpdate { + old_hash: *old_hash, + new_hash: *old_hash, + old_depth, + new_depth: old_depth, + old: pruned.clone(), + new: pruned, + }, + stats: MerkleStats::default(), }); } // Create Merkle proof cell which contains only new cells - let (new, pruned_branches) = ok! { + let (new, pruned_branches, new_cells_count) = ok! { MerkleProofBuilder::<_>::new( self.new, InvertedFilter(self.filter) @@ -1380,13 +1477,16 @@ impl<'a: 'b, 'b, 'c: 'a> ParBuilderImpl<'a, 'b, 'c> { }; // Done - Ok(MerkleUpdate { - old_hash: *old_hash, - new_hash: *new_hash, - old_depth, - new_depth, - old, - new, + Ok(MerkleBuildResult { + update: MerkleUpdate { + old_hash: *old_hash, + new_hash: *new_hash, + old_depth, + new_depth, + old, + new, + }, + stats: MerkleStats { new_cells_count }, }) } } @@ -1640,4 +1740,192 @@ mod tests { batch.is_empty() } } + + /// Helper function to count new cells by traversing. + fn count_new_cells(merkle_update: &MerkleUpdate) -> usize { + if merkle_update.old_hash == merkle_update.new_hash { + return 0; + } + + let mut visited = ahash::HashSet::default(); + let mut count = 0; + let mut stack = vec![merkle_update.new.as_ref()]; + + while let Some(cell) = stack.pop() { + let hash = cell.repr_hash(); + + // Skip already visited cells + if !visited.insert(hash) { + continue; + } + + // Count only non-pruned cells + if !cell.descriptor().is_pruned_branch() { + count += 1; + + // Add children only for non-pruned cells + for child in cell.references() { + stack.push(child); + } + } + } + + count + } + + #[test] + fn test_count_new_cells() { + // Create dict with keys 0..20 + let mut dict = Dict::::new(); + for i in 0..20 { + dict.add(i, i * 10).unwrap(); + } + + // Serialize old dict + let old_dict_cell = CellBuilder::build_from(&dict).unwrap(); + let old_dict_hashes = visit_all_cells(&old_dict_cell); + + // Modify dict + dict.set(0, 1).unwrap(); + dict.set(5, 999).unwrap(); + dict.set(10, 9999).unwrap(); + dict.set(15, 99999).unwrap(); + let new_dict_cell = CellBuilder::build_from(dict).unwrap(); + + assert_ne!(old_dict_cell.as_ref(), new_dict_cell.as_ref()); + + // Create merkle update + let merkle_update = MerkleUpdate::create( + old_dict_cell.as_ref(), + new_dict_cell.as_ref(), + old_dict_hashes, + ) + .build() + .unwrap(); + + // Count new cells via traversal + let count_via_traversal = count_new_cells(&merkle_update); + + // Count new cells via apply + let result = merkle_update.apply_with_stats(&old_dict_cell).unwrap(); + + assert_eq!(result.cell.as_ref(), new_dict_cell.as_ref()); + assert_eq!(count_via_traversal, result.stats.new_cells_count,); + } + + #[test] + #[cfg(all(feature = "rayon", feature = "sync"))] + fn test_par_apply_new_cells_count() { + // Create dict with keys 0..20 + let mut dict = Dict::::new(); + for i in 0..20 { + dict.add(i, i * 10).unwrap(); + } + + // Serialize old dict + let old_dict_cell = CellBuilder::build_from(&dict).unwrap(); + let old_dict_hashes = visit_all_cells(&old_dict_cell); + + // Modify dict + // Modify dict + dict.set(0, 1).unwrap(); + dict.set(5, 999).unwrap(); + dict.set(10, 9999).unwrap(); + dict.set(15, 99999).unwrap(); + let new_dict_cell = CellBuilder::build_from(dict).unwrap(); + + assert_ne!(old_dict_cell.as_ref(), new_dict_cell.as_ref()); + + // Create merkle update + let merkle_update = MerkleUpdate::create( + old_dict_cell.as_ref(), + new_dict_cell.as_ref(), + old_dict_hashes, + ) + .build() + .unwrap(); + + // Count via traversal + let count_via_traversal = count_new_cells(&merkle_update); + + // Count via par_apply + let split_at = ahash::HashSet::default(); + let result = merkle_update + .par_apply_with_stats(&old_dict_cell, &split_at) + .unwrap(); + + assert_eq!(result.cell.as_ref(), new_dict_cell.as_ref()); + assert_eq!(count_via_traversal, result.stats.new_cells_count,); + } + + #[test] + fn test_build_new_cells_count() { + // Create dict with keys 0..20 + let mut dict = Dict::::new(); + for i in 0..20 { + dict.add(i, i * 10).unwrap(); + } + + // Serialize old dict + let old_dict_cell = CellBuilder::build_from(&dict).unwrap(); + let old_dict_hashes = visit_all_cells(&old_dict_cell); + + // Modify dict + dict.set(0, 1).unwrap(); + dict.set(5, 999).unwrap(); + dict.set(10, 9999).unwrap(); + dict.set(15, 99999).unwrap(); + let new_dict_cell = CellBuilder::build_from(dict).unwrap(); + + assert_ne!(old_dict_cell.as_ref(), new_dict_cell.as_ref()); + + // Create merkle update with stats + let result = MerkleUpdate::create( + old_dict_cell.as_ref(), + new_dict_cell.as_ref(), + old_dict_hashes, + ) + .build_with_stats() + .unwrap(); + + // Count new cells via traversal + let count_via_traversal = count_new_cells(&result.update); + assert_eq!(count_via_traversal, result.stats.new_cells_count); + } + + #[test] + #[cfg(all(feature = "rayon", feature = "sync"))] + fn test_par_build_new_cells_count() { + // Create dict with keys 0..20 + let mut dict = Dict::::new(); + for i in 0..20 { + dict.add(i, i * 10).unwrap(); + } + + // Serialize old dict + let old_dict_cell = CellBuilder::build_from(&dict).unwrap(); + let old_dict_hashes = visit_all_cells(&old_dict_cell); + + // Modify dict + dict.set(0, 1).unwrap(); + dict.set(5, 999).unwrap(); + dict.set(10, 9999).unwrap(); + dict.set(15, 99999).unwrap(); + let new_dict_cell = CellBuilder::build_from(dict).unwrap(); + + assert_ne!(old_dict_cell.as_ref(), new_dict_cell.as_ref()); + + // Create merkle update with stats using parallel build + let result = MerkleUpdate::create( + old_dict_cell.as_ref(), + new_dict_cell.as_ref(), + old_dict_hashes, + ) + .par_build_with_stats(Default::default(), Default::default()) + .unwrap(); + + // Count new cells via traversal + let count_via_traversal = count_new_cells(&result.update); + assert_eq!(count_via_traversal, result.stats.new_cells_count); + } }