From 314700d876f6ff6abe53155bbba120d1f8daf2c3 Mon Sep 17 00:00:00 2001 From: Cameron Date: Fri, 7 Jul 2023 13:17:46 +0100 Subject: [PATCH 01/15] before move to map-like structure --- Cargo.lock | 138 +++++++++++-- Cargo.toml | 2 +- indexer/Cargo.toml | 2 +- indexer/src/store.rs | 2 +- smirk/Cargo.toml | 16 ++ smirk/src/hash.rs | 167 ++++++++++++++++ smirk/src/lib.rs | 9 + smirk/src/storage/error.rs | 17 ++ smirk/src/storage/mod.rs | 35 ++++ smirk/src/storage/noop.rs | 17 ++ smirk/src/storage/rocksdb/mod.rs | 123 ++++++++++++ smirk/src/testing.rs | 63 ++++++ smirk/src/tree/impls.rs | 122 ++++++++++++ smirk/src/tree/mod.rs | 321 +++++++++++++++++++++++++++++++ smirk/src/tree/tests.rs | 15 ++ smirk/src/tree/visitor.rs | 76 ++++++++ 16 files changed, 1111 insertions(+), 14 deletions(-) create mode 100644 smirk/Cargo.toml create mode 100644 smirk/src/hash.rs create mode 100644 smirk/src/lib.rs create mode 100644 smirk/src/storage/error.rs create mode 100644 smirk/src/storage/mod.rs create mode 100644 smirk/src/storage/noop.rs create mode 100644 smirk/src/storage/rocksdb/mod.rs create mode 100644 smirk/src/testing.rs create mode 100644 smirk/src/tree/impls.rs create mode 100644 smirk/src/tree/mod.rs create mode 100644 smirk/src/tree/tests.rs create mode 100644 smirk/src/tree/visitor.rs diff --git a/Cargo.lock b/Cargo.lock index 33ded033..e8f7411b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -748,9 +748,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.64.0" +version = "0.65.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4243e6031260db77ede97ad86c27e501d646a27ab57b59a574f725d98ab1fb4" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" dependencies = [ "bitflags", "cexpr", @@ -758,12 +758,13 @@ dependencies = [ "lazy_static", "lazycell", "peeking_take_while", + "prettyplease 0.2.9", "proc-macro2", "quote", "regex", "rustc-hash", "shlex", - "syn 1.0.109", + "syn 2.0.16", ] [[package]] @@ -1818,6 +1819,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "fuchsia-cprng" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" + [[package]] name = "futures" version = "0.3.28" @@ -2832,9 +2839,9 @@ dependencies = [ [[package]] name = "librocksdb-sys" -version = "0.8.3+7.4.4" +version = "0.11.0+8.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "557b255ff04123fcc176162f56ed0c9cd42d8f357cf55b3fabeb60f7413741b3" +checksum = "d3386f101bcb4bd252d8e9d2fb41ec3b0862a15a62b478c355b2982efa469e3e" dependencies = [ "bindgen", "bzip2-sys", @@ -2842,6 +2849,7 @@ dependencies = [ "glob", "libc", "libz-sys", + "lz4-sys", "zstd-sys", ] @@ -2914,6 +2922,16 @@ dependencies = [ "linked-hash-map", ] +[[package]] +name = "lz4-sys" +version = "1.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "match_cfg" version = "0.1.0" @@ -2988,7 +3006,7 @@ name = "miden-core" version = "0.5.0" source = "git+https://github.com/0xPolygonMiden/miden-vm?tag=v0.5.0#4195475d75ab2d586bdb01d1ff3ea2cd626eaf7b" dependencies = [ - "miden-crypto", + "miden-crypto 0.2.0", "winter-crypto 0.6.4", "winter-math 0.6.4", "winter-utils 0.6.4", @@ -3006,6 +3024,18 @@ dependencies = [ "winter-utils 0.6.4", ] +[[package]] +name = "miden-crypto" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cf95db953ee5fc8ef5a1df2d2e6e55e41587861c793fa26b45a214d3cc0f798" +dependencies = [ + "blake3", + "winter-crypto 0.6.4", + "winter-math 0.6.4", + "winter-utils 0.6.4", +] + [[package]] name = "miden-processor" version = "0.5.0" @@ -3778,7 +3808,7 @@ dependencies = [ "serde", "serde_json", "wasm-bindgen", - "winter-math 0.4.2", + "winter-math 0.6.4", ] [[package]] @@ -3864,6 +3894,16 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "prettyplease" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9825a04601d60621feed79c4e6b56d65db77cdca55cef43b46b0de1096d1c282" +dependencies = [ + "proc-macro2", + "syn 2.0.16", +] + [[package]] name = "proc-macro-crate" version = "1.1.3" @@ -3950,7 +3990,7 @@ dependencies = [ "log", "multimap", "petgraph", - "prettyplease", + "prettyplease 0.1.25", "prost", "prost-types", "regex", @@ -4058,6 +4098,19 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +dependencies = [ + "fuchsia-cprng", + "libc", + "rand_core 0.3.1", + "rdrand", + "winapi", +] + [[package]] name = "rand" version = "0.7.3" @@ -4102,6 +4155,21 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +dependencies = [ + "rand_core 0.4.2", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" + [[package]] name = "rand_core" version = "0.5.1" @@ -4163,6 +4231,15 @@ dependencies = [ "yasna", ] +[[package]] +name = "rdrand" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +dependencies = [ + "rand_core 0.3.1", +] + [[package]] name = "redox_syscall" version = "0.2.16" @@ -4224,6 +4301,15 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + [[package]] name = "reqwest" version = "0.11.18" @@ -4299,9 +4385,9 @@ dependencies = [ [[package]] name = "rocksdb" -version = "0.19.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e9562ea1d70c0cc63a34a22d977753b50cca91cc6b6527750463bd5dd8697bc" +checksum = "bb6f170a4041d50a0ce04b0d2e14916d6ca863ea2e422689a5b694395d299ffe" dependencies = [ "libc", "librocksdb-sys", @@ -4839,6 +4925,20 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +[[package]] +name = "smirk" +version = "0.1.0" +dependencies = [ + "hex", + "miden-crypto 0.6.0", + "rocksdb", + "serde", + "serde_json", + "tempdir", + "thiserror", + "traversal", +] + [[package]] name = "snow" version = "0.9.2" @@ -5053,6 +5153,16 @@ dependencies = [ "libc", ] +[[package]] +name = "tempdir" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" +dependencies = [ + "rand 0.4.6", + "remove_dir_all", +] + [[package]] name = "tempfile" version = "3.5.0" @@ -5348,7 +5458,7 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bf5e9b9c0f7e0a7c027dcfaba7b2c60816c7049171f679d99ee2ff65d0de8c4" dependencies = [ - "prettyplease", + "prettyplease 0.1.25", "proc-macro2", "prost-build", "quote", @@ -5492,6 +5602,12 @@ dependencies = [ "tracing-serde", ] +[[package]] +name = "traversal" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0ec9745d7517c8b8e8c0a65cba2d84e42f95fd348a01693c5e4da1bc6d00c99" + [[package]] name = "trust-dns-proto" version = "0.22.0" diff --git a/Cargo.toml b/Cargo.toml index 30cf5ce0..f30777c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [workspace] -members = ["polybase", "indexer", "gateway", "solid"] +members = ["polybase", "indexer", "gateway", "solid", "smirk"] [profile.release] debug-assertions = true diff --git a/indexer/Cargo.toml b/indexer/Cargo.toml index 0e060d75..e58f8a55 100644 --- a/indexer/Cargo.toml +++ b/indexer/Cargo.toml @@ -16,7 +16,7 @@ once_cell = "1.17.0" polylang = { git = "https://github.com/polybase/polylang", branch = "main", version = "0.1.0" } prost = "0.11" prost-types = "0.11" -rocksdb = "0.19" +rocksdb = "0.21" secp256k1 = { version = "0.26", features = ["rand-std"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" diff --git a/indexer/src/store.rs b/indexer/src/store.rs index 26158035..48cb9935 100644 --- a/indexer/src/store.rs +++ b/indexer/src/store.rs @@ -69,7 +69,7 @@ impl Store { pub fn open(path: impl AsRef) -> Result { let mut options = rocksdb::Options::default(); options.create_if_missing(true); - options.set_comparator("polybase", keys::comparator); + options.set_comparator("polybase", Box::new(keys::comparator)); let db = rocksdb::DB::open(&options, path)?; diff --git a/smirk/Cargo.toml b/smirk/Cargo.toml new file mode 100644 index 00000000..b78ae5ca --- /dev/null +++ b/smirk/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "smirk" +version = "0.1.0" +edition = "2021" + +[dependencies] +miden-crypto = "0.6" +rocksdb = "0.21" +thiserror = "1" +hex = "0.4" +traversal = "0.1" +serde = { version = "1", features = ["derive"] } +serde_json = "1" + +[dev-dependencies] +tempdir = "0.3" diff --git a/smirk/src/hash.rs b/smirk/src/hash.rs new file mode 100644 index 00000000..b784b63b --- /dev/null +++ b/smirk/src/hash.rs @@ -0,0 +1,167 @@ +use std::fmt::Display; + +use miden_crypto::{ + hash::rpo::{Rpo256, RpoDigest}, + utils::{Deserializable, Serializable, SliceReader}, + Felt, +}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Hash(RpoDigest); + +impl std::hash::Hash for Hash { + fn hash(&self, state: &mut H) { + self.0.to_bytes().hash(state); + } +} + +mod serde_impl { + use miden_crypto::utils::{Deserializable, Serializable, SliceReader}; + use serde::{de::Visitor, Deserializer, Serializer}; + + use super::*; + + impl Serialize for Hash { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut bytes = vec![0; 32]; + self.0.write_into(&mut bytes); + serializer.serialize_bytes(&bytes) + } + } + + impl<'de> Deserialize<'de> for Hash { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct V; + impl Visitor<'_> for V { + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("bytes representing a rescue-prime optimized hash") + } + + type Value = RpoDigest; + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: serde::de::Error, + { + if v.len() != 32 { + return Err(E::custom(format!( + "wrong number of bytes - expected 32, found {}", + v.len() + ))); + } + + let mut reader = SliceReader::new(v); + RpoDigest::read_from(&mut reader) + .map_err(|e| E::custom(format!("deserialization error: {e}"))) + } + } + + deserializer.deserialize_bytes(V).map(Hash) + } + } +} + +impl Display for Hash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Hash({})", hex::encode(self.0.as_bytes())) + } +} + +impl Hash { + pub const NULL: Hash = Hash(RpoDigest::new([Felt::new(0); 4])); + + /// The length of this hash in bytes + pub const LEN: usize = 32; + + /// The bytes of this hash + #[inline] + pub fn to_bytes(&self) -> [u8; Self::LEN] { + // to_bytes is a more appropriate name, since this method copies into a new slice + self.0.as_bytes() + } + + #[inline] + pub fn encode(&self) -> Vec { + let mut bytes = vec![0; 32]; + self.0.write_into(&mut bytes); + bytes + } + + #[inline] + pub fn decode(bytes: &[u8]) -> Option { + if bytes.len() != 32 { + return None; + } + + let mut reader = SliceReader::new(bytes); + RpoDigest::read_from(&mut reader).ok().map(Hash) + } + + /// Hash the given bytes + #[inline] + pub fn calculate(bytes: &[u8]) -> Self { + Self(Rpo256::hash(bytes)) + } + + #[inline] + pub fn digest(&self) -> RpoDigest { + self.0 + } + + /// Replace `self` with `rpo256(this + other)` + #[inline] + pub fn merge(&mut self, other: &Hash) { + self.0 = Rpo256::merge(&[self.0, other.0]); + } +} + +impl From for Hash { + fn from(value: RpoDigest) -> Self { + Self(value) + } +} + +pub struct MerklePath { + /// The components of the path, with the root at the end + components: Vec, +} + +impl MerklePath { + /// Create a new [`MerklePath`] from the given components + /// + /// The components should be the hashes that form the path, with the root of the tree at the + /// end + #[inline] + pub fn new(components: Vec) -> Self { + Self { components } + } + + /// Get a slice of hashes representing the components of the path + #[inline] + pub fn components(&self) -> &[Hash] { + &self.components + } + + /// Get a mutable slice of hashes representing the components of the path + #[inline] + pub fn components_mut(&mut self) -> &mut [Hash] { + &mut self.components + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn null_hash_is_all_zeroes() { + assert_eq!(Hash::NULL.to_bytes(), [0; 32]); + } +} diff --git a/smirk/src/lib.rs b/smirk/src/lib.rs new file mode 100644 index 00000000..b7924a53 --- /dev/null +++ b/smirk/src/lib.rs @@ -0,0 +1,9 @@ +mod hash; +pub mod storage; +pub mod tree; + +pub use tree::MerkleTree; + +#[cfg(test)] +mod testing; + diff --git a/smirk/src/storage/error.rs b/smirk/src/storage/error.rs new file mode 100644 index 00000000..32db740b --- /dev/null +++ b/smirk/src/storage/error.rs @@ -0,0 +1,17 @@ +use crate::hash::Hash; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("A key referenced this hash as a child, but it wasn't present: {0}")] + MissingKeyReferenced(Hash), + #[error("the `structure` key was not defined")] + Unknown(#[from] Box), + #[error("json: {0}")] + Json(#[from] serde_json::Error), +} + +impl From for Error { + fn from(value: rocksdb::Error) -> Self { + Self::Unknown(Box::new(value)) + } +} diff --git a/smirk/src/storage/mod.rs b/smirk/src/storage/mod.rs new file mode 100644 index 00000000..63a39b1b --- /dev/null +++ b/smirk/src/storage/mod.rs @@ -0,0 +1,35 @@ +use std::fmt::Debug; + +use crate::tree::MerkleTree; + +pub mod noop; +pub mod rocksdb; + +mod error; +pub use error::Error; +use serde::{Deserialize, Serialize}; + +/// Types which can act as a storage backend for a Merkle tree +pub trait Storage: Debug { + fn store_tree(&self, tree: &MerkleTree) -> Result<(), Error>; + + fn load_tree(&self) -> Result>, Error>; +} + +#[cfg(test)] +mod tests { + use crate::testing::{example_tree, TestDb}; + + use super::*; + + #[test] + fn simple_example() { + let test_db = TestDb::new(); + let tree = example_tree(); + + assert!(test_db.load_tree().unwrap().is_none()); + + test_db.store_tree(&tree).unwrap(); + let tree_again = test_db.load_tree().unwrap().unwrap(); + } +} diff --git a/smirk/src/storage/noop.rs b/smirk/src/storage/noop.rs new file mode 100644 index 00000000..8b286b9a --- /dev/null +++ b/smirk/src/storage/noop.rs @@ -0,0 +1,17 @@ +use crate::MerkleTree; + +use super::{Error, Storage}; + +/// A dummy storage type which does nothing +#[derive(Debug, Default)] +pub struct NoopStorage; + +impl Storage for NoopStorage { + fn load_tree(&self) -> Result>, Error> { + Ok(None) + } + + fn store_tree(&self, _tree: &MerkleTree) -> Result<(), Error> { + Ok(()) + } +} diff --git a/smirk/src/storage/rocksdb/mod.rs b/smirk/src/storage/rocksdb/mod.rs new file mode 100644 index 00000000..0d575dc9 --- /dev/null +++ b/smirk/src/storage/rocksdb/mod.rs @@ -0,0 +1,123 @@ +use std::{collections::HashMap, fmt::Debug, path::Path}; + +use serde::{Deserialize, Serialize}; + +use crate::{hash::Hash, tree::TreeNode, MerkleTree}; + +use super::{Error, Storage}; + +pub struct RocksDb { + inner: rocksdb::TransactionDB, +} + +impl Debug for RocksDb { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "RocksDb") + } +} + +impl RocksDb { + pub fn open(path: &Path) -> Result { + let inner = rocksdb::TransactionDB::open_default(path)?; + + Ok(Self { inner }) + } +} + +/// Struct to represent structure of the tree without storing the actual data +/// +/// json was chosen as an "obviously bad" encoding - we should decide on a proper representation +#[derive(Debug, Clone, Serialize, Deserialize)] +struct NodeStructure { + hash: Hash, + left: Option>, + right: Option>, +} + +impl Storage for RocksDb { + fn store_tree(&self, tree: &MerkleTree) -> Result<(), Error> { + let tx = self.inner.transaction(); + for elem in tree.depth_first() { + let key = Hash::calculate(elem.as_ref()); + let value = elem.as_ref(); + + tx.put(&key.to_bytes(), value)?; + } + + let structure_bytes = + serde_json::to_string(&tree.inner.as_ref().map(|node| get_structure(node)))?; + + tx.put("structure".as_bytes(), structure_bytes)?; + + tx.commit()?; + + Ok(()) + } + + fn load_tree>>(&self) -> Result>, Error> { + let tx = self.inner.transaction(); + + let Some(structure) = tx.get("structure".as_bytes())? else { return Ok(None) }; + let structure = serde_json::from_str( + &String::from_utf8(structure).expect("we're not actually going to use json"), + )?; + + let Some(structure) = structure else { return Ok(Some(MerkleTree { inner: None })) }; + + let mut data = HashMap::new(); + + for result in tx.iterator(rocksdb::IteratorMode::Start).into_iter() { + let (key, value) = result?; + let Some(hash) = Hash::decode(&key) else { continue }; + let value = value.to_vec().into(); + + data.insert(hash, value); + } + + let mut tree = rebuild_tree(structure, &data)?; + tree.update_height(); + + Ok(Some(MerkleTree { + inner: Some(Box::new(tree)), + })) + } +} + +fn rebuild_tree( + structure: NodeStructure, + data: &HashMap, +) -> Result, Error> { + let this = data + .get(&structure.hash) + .ok_or(Error::MissingKeyReferenced(structure.hash))?; + + let left = structure + .left + .map(|structure| rebuild_tree(*structure, data)) + .transpose()? + .map(Box::new); + + let right = structure + .right + .map(|structure| rebuild_tree(*structure, data)) + .transpose()? + .map(Box::new); + + Ok(TreeNode { + value: this.clone(), + left, + right, + height: 0, + }) +} + +fn get_structure>(node: &TreeNode) -> NodeStructure { + NodeStructure { + hash: node.hash(), + left: node.left.as_ref().map(|node| Box::new(get_structure(node))), + right: node + .right + .as_ref() + .map(|node| Box::new(get_structure(node))), + } +} diff --git a/smirk/src/testing.rs b/smirk/src/testing.rs new file mode 100644 index 00000000..7c6e4a28 --- /dev/null +++ b/smirk/src/testing.rs @@ -0,0 +1,63 @@ +use tempdir::TempDir; + +use crate::{ + storage::{rocksdb::RocksDb, Storage}, + tree::{MerkleTree, TreeNode}, +}; + +// 1 +// |\ +// 2 5 +// |\ +// 3 4 +pub fn example_node() -> TreeNode { + let mut node = TreeNode { + value: 1, + left: Some(Box::new(TreeNode { + value: 2, + left: Some(Box::new(TreeNode::new(3))), + right: Some(Box::new(TreeNode::new(4))), + height: 0, + })), + right: Some(Box::new(TreeNode::new(5))), + height: 0, + }; + node.update_height(); + node +} + +pub fn example_tree() -> MerkleTree { + MerkleTree { + inner: Some(Box::new(example_node())), + } +} + +#[derive(Debug)] +pub struct TestDb { + _dir: TempDir, + db: RocksDb, +} + +impl TestDb { + pub fn new() -> Self { + let dir = TempDir::new("smirk").unwrap(); + let db = RocksDb::open(dir.path()).unwrap(); + + Self { _dir: dir, db } + } +} + +impl Storage for TestDb { + fn store_tree>( + &self, + tree: &MerkleTree, + ) -> Result<(), crate::storage::Error> { + self.db.store_tree(tree) + } + + fn load_tree>>( + &self, + ) -> Result>, crate::storage::Error> { + self.db.load_tree() + } +} diff --git a/smirk/src/tree/impls.rs b/smirk/src/tree/impls.rs new file mode 100644 index 00000000..860cd4cf --- /dev/null +++ b/smirk/src/tree/impls.rs @@ -0,0 +1,122 @@ +use std::{iter::Chain, option}; + +use traversal::{Bft, DftPre}; + +use super::{MerkleTree, TreeNode}; + +impl FromIterator for MerkleTree { + fn from_iter>(iter: I) -> Self { + // TODO: this is probably pretty inefficient, clean this up later + let mut tree = MerkleTree::new(); + + for elem in iter { + tree.insert(elem); + } + + tree + } +} + +impl MerkleTree { + pub fn depth_first<'a>(&'a self) -> DepthFirstIter<'a, T> { + match &self.inner { + None => DepthFirstIter { inner: None }, + Some(node) => node.depth_first(), + } + } + + pub fn breadth_first<'a>(&'a self) -> BreadthFirstIter<'a, T> { + match &self.inner { + None => BreadthFirstIter { inner: None }, + Some(node) => node.breadth_first(), + } + } +} + +impl TreeNode { + /// Get an iterator over the values in this node in depth-first order + pub fn depth_first<'a>(&'a self) -> DepthFirstIter<'a, T> { + let inner = DftPre::new(self, children); + let inner = Box::new(inner.map(|(_, node)| node)); + + DepthFirstIter { inner: Some(inner) } + } + + /// Get an iterator over the values in this node in breadth-first order + pub fn breadth_first(&self) -> BreadthFirstIter<'_, T> { + let inner = Bft::new(self, children); + let inner = Box::new(inner.map(|(_, node)| node)); + + BreadthFirstIter { inner: Some(inner) } + } +} + +fn children<'a, T>(node: &'a TreeNode) -> ChildIter<'a, T> { + node.left + .as_deref() + .into_iter() + .chain(node.right.as_deref().into_iter()) +} + +type ChildIter<'a, T> = Chain>, option::IntoIter<&'a TreeNode>>; + +pub struct DepthFirstIter<'a, T> { + inner: Option> + 'a>>, +} + +impl<'a, T> Iterator for DepthFirstIter<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + self.inner + .as_mut() + .map(|iter| iter.next().map(|node| &node.value)) + .flatten() + } +} + +pub struct BreadthFirstIter<'a, T> { + inner: Option> + 'a>>, +} + +impl<'a, T> Iterator for BreadthFirstIter<'a, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + self.inner + .as_mut() + .map(|iter| iter.next().map(|node| &node.value)) + .flatten() + } +} + +#[cfg(test)] +mod tests { + use crate::{testing::example_node, tree::MerkleTree}; + + #[test] + fn depth_first_test() { + let tree = example_node(); + let items: Vec<_> = tree.depth_first().copied().collect(); + assert_eq!(items, vec![1, 2, 3, 4, 5]); + + assert_eq!( + MerkleTree::from_iter::<[i32; 0]>([]).depth_first().count(), + 0 + ); + } + + #[test] + fn breadth_first_test() { + let tree = example_node(); + let items: Vec<_> = tree.breadth_first().copied().collect(); + assert_eq!(items, vec![1, 2, 5, 3, 4]); + + assert_eq!( + MerkleTree::from_iter::<[i32; 0]>([]) + .breadth_first() + .count(), + 0 + ); + } +} diff --git a/smirk/src/tree/mod.rs b/smirk/src/tree/mod.rs new file mode 100644 index 00000000..b37324b0 --- /dev/null +++ b/smirk/src/tree/mod.rs @@ -0,0 +1,321 @@ +use std::cmp::Ordering; + +use miden_crypto::hash::rpo::{Rpo256, RpoDigest}; + +use crate::hash::{Hash, MerklePath}; + +mod impls; +pub mod visitor; + +#[cfg(test)] +mod tests; + +/// A Merkle tree, with a customizable storage backend and hash function +/// +/// ```rust +/// # use smirk::tree::MerkleTree; +/// let mut tree = MerkleTree::new(); +/// tree.insert(123); +/// +/// assert_eq!(tree.size(), 1); +/// ``` +/// You can walk the tree in depth-first or breadth-first ordering: +/// ```rust +/// # use smirk::tree::MerkleTree; +/// let tree = MerkleTree::from_iter([1, 2, 3]); +/// +/// for i in tree.depth_first() { +/// println!("{i}"); +/// } +/// +/// for i in tree.breadth_first() { +/// println!("{i}"); +/// } +/// ``` +#[derive(Debug, Clone)] +pub struct MerkleTree { + pub(crate) inner: Option>>, +} + +impl MerkleTree { + /// Create a new [`MerkleTree`] with the given storage backend + /// + /// ```rust + /// # use smirk::tree::MerkleTree; + /// let tree = MerkleTree::::new(); + /// ``` + pub fn new() -> Self { + Self { inner: None } + } + + pub fn insert(&mut self, value: T) + where + T: Ord, + { + self.inner = Some(Self::insert_node(self.inner.take(), value)); + } + + fn insert_node(node: Option>>, value: T) -> Box> + where + T: Ord, + { + let mut node = match node { + None => return Box::new(TreeNode::new(value)), + Some(node) => node, + }; + + if value < node.value { + node.left = Some(Self::insert_node(node.left.take(), value)); + } else if value > node.value { + node.right = Some(Self::insert_node(node.right.take(), value)); + } else { + return node; // Duplicates not allowed + } + + node.update_height(); + Self::balance(node) + } + + fn balance(mut node: Box>) -> Box> { + let balance = node.balance_factor(); + + if balance > 1 { + if node.left.as_ref().unwrap().balance_factor() < 0 { + node.left = Some(Self::rotate_left(node.left.unwrap())); + } + node = Self::rotate_right(node); + } else if balance < -1 { + if node.right.as_ref().unwrap().balance_factor() > 0 { + node.right = Some(Self::rotate_right(node.right.unwrap())); + } + node = Self::rotate_left(node); + } + + node + } + + fn rotate_left(mut root: Box>) -> Box> { + let mut new_root = root.right.take().unwrap(); + root.right = new_root.left.take(); + new_root.left = Some(root); + + new_root.left.as_mut().unwrap().update_height(); + new_root.update_height(); + + new_root + } + + fn rotate_right(mut root: Box>) -> Box> { + let mut new_root = root.left.take().unwrap(); + root.left = new_root.right.take(); + new_root.right = Some(root); + new_root.right.as_mut().unwrap().update_height(); + new_root.update_height(); + + new_root + } + + /// The number of elements in the tree + /// + /// ```rust + /// # use smirk::MerkleTree; + /// let tree = MerkleTree::from_iter([1, 2, 3]); + /// + /// assert_eq!(tree.size(), 3); + /// ``` + pub fn size(&self) -> usize { + struct Counter(usize); + impl visitor::Visitor for Counter { + fn visit(&mut self, _item: &T) { + self.0 += 1; + } + } + + let mut counter = Counter(0); + self.visit(&mut counter); + + counter.0 + } + + /// The height of this tree + #[inline] + pub fn height(&self) -> usize { + match &self.inner { + None => 0, + Some(node) => node.height() as usize, + } + } + + /// Return the node associated with the given value + pub fn get(&self, item: &T) -> Option<&TreeNode> + where + T: Ord, + { + self.inner.as_ref().and_then(|node| node.get(item)) + } + + pub fn get_mut(&mut self, item: &T) -> Option<&mut TreeNode> + where + T: Ord, + { + self.inner.as_mut().and_then(|node| node.get_mut(item)) + } + + /// Get the root hash of the Merkle tree + pub fn root_hash(&self) -> Hash + where + T: AsRef<[u8]>, + { + match &self.inner { + None => Hash::NULL, // should this function return an option? + Some(node) => node.hash(), + } + } + + /// Generate a [`MerklePath`] for the a given value + pub fn path_for(&self, value: &T) -> Option + where + T: Ord + AsRef<[u8]>, + { + let mut components = Vec::with_capacity(self.height()); + + let mut opt_node = self.inner.as_deref(); + + loop { + let node = opt_node?; + + components.push(node.hash()); + + match value.cmp(&node.value) { + Ordering::Less => opt_node = node.left.as_deref(), + Ordering::Greater => opt_node = node.right.as_deref(), + Ordering::Equal => { + components.reverse(); + return Some(MerklePath::new(components)); + } + } + } + } + + pub fn verify(&self, path: &MerklePath, value: &T) -> bool + where + T: AsRef<[u8]> + Ord, + { + if path.components().last() != Some(&self.root_hash()) { + return false; + } + + let mut hash = Hash::calculate(value.as_ref()); + + for slice in path.components().windows(2) { + let first = &slice[0]; + let second = &slice[1]; + + hash.merge(first); + if hash != *second { + return false; + } + } + + true + } +} + +/// An individual node in a Merkle tree +#[derive(Debug, Clone)] +pub struct TreeNode { + pub(crate) value: T, + pub(crate) left: Option>>, + pub(crate) right: Option>>, + pub(crate) height: isize, +} + +impl TreeNode { + /// Create a new [`TreeNode`] with no children + pub fn new(value: T) -> Self { + Self { + value, + left: None, + right: None, + height: 0, + } + } + + /// The height of the tree + /// + /// For example, in a tree with 3 nodes A, B, C, where A is the parent of B and C: + /// - A has height 1 + /// - B has height 0 + /// - C has height 0 + #[inline] + pub fn height(&self) -> isize { + self.height + } + + pub(crate) fn update_height(&mut self) { + let left_height = self.left.as_ref().map_or(0, |x| x.height()); + let right_height = self.right.as_ref().map_or(0, |x| x.height()); + self.height = 1 + std::cmp::max(left_height, right_height); + } + + fn balance_factor(&self) -> isize { + let left_height = self.left.as_ref().map_or(0, |x| x.height()); + let right_height = self.right.as_ref().map_or(0, |x| x.height()); + left_height - right_height + } + + pub fn get(&self, item: &T) -> Option<&TreeNode> + where + T: Ord, + { + match item.cmp(&self.value) { + Ordering::Less => self.left.as_ref().and_then(|node| node.get(item)), + Ordering::Greater => self.right.as_ref().and_then(|node| node.get(item)), + Ordering::Equal => Some(self), + } + } + + pub fn get_mut(&mut self, item: &T) -> Option<&mut TreeNode> + where + T: Ord, + { + match item.cmp(&self.value) { + Ordering::Less => self.left.as_mut().and_then(|node| node.get_mut(item)), + Ordering::Greater => self.right.as_mut().and_then(|node| node.get_mut(item)), + Ordering::Equal => Some(self), + } + } + + /// The hash of the value contained in this node (ignoring any children) + pub fn hash_of_value(&self) -> Hash + where + T: AsRef<[u8]>, + { + // should we memoize this? + let bytes = self.value.as_ref(); + Hash::calculate(bytes) + } + + /// The hash of this value (and all child values) + + // we should probably memoize this + pub fn hash(&self) -> Hash + where + T: AsRef<[u8]>, + { + let left = self + .left + .as_ref() + .map(|node| node.hash()) + .unwrap_or(Hash::NULL); + let this = Hash::calculate(self.value.as_ref()); + let right = self + .right + .as_ref() + .map(|node| node.hash()) + .unwrap_or(Hash::NULL); + + let left_this = Rpo256::merge(&[left.digest(), this.digest()]); + Hash::from(Rpo256::merge(&[left_this, right.digest()])) + } +} diff --git a/smirk/src/tree/tests.rs b/smirk/src/tree/tests.rs new file mode 100644 index 00000000..4f140067 --- /dev/null +++ b/smirk/src/tree/tests.rs @@ -0,0 +1,15 @@ +use super::*; + +#[test] +fn simple_example() { + let mut tree = MerkleTree::from_iter([1, 2, 3]); + + assert_eq!(tree.size(), 3); + + tree.insert(4); + assert_eq!(tree.size(), 4); + + println!("{tree:#?}"); + + let _items: Vec<_> = tree.depth_first().copied().collect(); +} diff --git a/smirk/src/tree/visitor.rs b/smirk/src/tree/visitor.rs new file mode 100644 index 00000000..71cec34d --- /dev/null +++ b/smirk/src/tree/visitor.rs @@ -0,0 +1,76 @@ +use super::{MerkleTree, TreeNode}; + +pub trait Visitor { + fn visit(&mut self, item: &T); +} + +impl Visitor for &mut V +where + V: Visitor, +{ + fn visit(&mut self, item: &T) { + V::visit(self, item) + } +} + +impl MerkleTree { + pub fn visit>(&self, mut visitor: V) { + if let Some(inner) = &self.inner { + inner.visit(&mut visitor); + } + } +} + +impl TreeNode { + pub fn visit>(&self, visitor: &mut V) { + visitor.visit(&self.value); + + if let Some(left) = &self.left { + left.visit(visitor); + } + + if let Some(right) = &self.right { + right.visit(visitor); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn counter_example() { + struct Counter(usize); + + impl Visitor for Counter { + fn visit(&mut self, _item: &T) { + self.0 += 1; + } + } + + let tree = MerkleTree::from_iter([1, 2, 3]); + let mut counter = Counter(0); + tree.visit(&mut counter); + + assert_eq!(counter.0, 3); + } + + #[test] + fn sum_example() { + struct Sum(i32); + + impl Visitor for Sum { + fn visit(&mut self, item: &i32) { + self.0 += *item; + } + } + + let tree = MerkleTree::from_iter([1, 2, 3]); + let mut sum = Sum(0); + tree.visit(&mut sum); + + assert_eq!(sum.0, 6); + + } +} From 56be1b7e5d341276edba115dd59846610b26a5a4 Mon Sep 17 00:00:00 2001 From: Cameron Date: Mon, 10 Jul 2023 18:49:54 +0100 Subject: [PATCH 02/15] big refactor done except storage --- Cargo.lock | 2 + flake.lock | 39 +--- flake.nix | 5 +- smirk/Cargo.toml | 4 + smirk/src/hash.rs | 167 ---------------- smirk/src/hash/hashable.rs | 139 +++++++++++++ smirk/src/hash/mod.rs | 132 ++++++++++++ smirk/src/hash/proptest_impls.rs | 12 ++ smirk/src/hash/serde_impls.rs | 50 +++++ smirk/src/lib.rs | 43 +++- smirk/src/storage/error.rs | 5 +- smirk/src/storage/mod.rs | 31 +-- smirk/src/storage/noop.rs | 14 +- smirk/src/storage/rocksdb/mod.rs | 172 ++++++++-------- smirk/src/testing.rs | 62 +++--- smirk/src/tree/impls.rs | 96 +++++---- smirk/src/tree/macros.rs | 75 +++++++ smirk/src/tree/mod.rs | 333 +++++++++++++++++++++---------- smirk/src/tree/tests.rs | 12 +- smirk/src/tree/visitor.rs | 44 ++-- 20 files changed, 916 insertions(+), 521 deletions(-) delete mode 100644 smirk/src/hash.rs create mode 100644 smirk/src/hash/hashable.rs create mode 100644 smirk/src/hash/mod.rs create mode 100644 smirk/src/hash/proptest_impls.rs create mode 100644 smirk/src/hash/serde_impls.rs create mode 100644 smirk/src/tree/macros.rs diff --git a/Cargo.lock b/Cargo.lock index e8f7411b..f4f54d07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4931,10 +4931,12 @@ version = "0.1.0" dependencies = [ "hex", "miden-crypto 0.6.0", + "proptest", "rocksdb", "serde", "serde_json", "tempdir", + "test-strategy", "thiserror", "traversal", ] diff --git a/flake.lock b/flake.lock index a1b3bafb..520c0027 100644 --- a/flake.lock +++ b/flake.lock @@ -36,39 +36,7 @@ "type": "github" } }, - "naersk": { - "inputs": { - "nixpkgs": "nixpkgs" - }, - "locked": { - "lastModified": 1679567394, - "narHash": "sha256-ZvLuzPeARDLiQUt6zSZFGOs+HZmE+3g4QURc8mkBsfM=", - "owner": "nix-community", - "repo": "naersk", - "rev": "88cd22380154a2c36799fe8098888f0f59861a15", - "type": "github" - }, - "original": { - "owner": "nix-community", - "repo": "naersk", - "type": "github" - } - }, "nixpkgs": { - "locked": { - "lastModified": 1685894048, - "narHash": "sha256-QKqv1QS+22k9oxncj1AnAxeqS5jGnQiUW3Jq3B+dI1w=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "2e56a850786211972d99d2bb39665a9b5a1801d6", - "type": "github" - }, - "original": { - "id": "nixpkgs", - "type": "indirect" - } - }, - "nixpkgs_2": { "locked": { "lastModified": 1685947919, "narHash": "sha256-v282Pwz8tPwKqby4lQVGz3EwSXvYHAz4HXWTQcMGh04=", @@ -83,7 +51,7 @@ "type": "github" } }, - "nixpkgs_3": { + "nixpkgs_2": { "locked": { "lastModified": 1681358109, "narHash": "sha256-eKyxW4OohHQx9Urxi7TQlFBTDWII+F+x2hklDOQPB50=", @@ -102,15 +70,14 @@ "root": { "inputs": { "flake-utils": "flake-utils", - "naersk": "naersk", - "nixpkgs": "nixpkgs_2", + "nixpkgs": "nixpkgs", "rust-overlay": "rust-overlay" } }, "rust-overlay": { "inputs": { "flake-utils": "flake-utils_2", - "nixpkgs": "nixpkgs_3" + "nixpkgs": "nixpkgs_2" }, "locked": { "lastModified": 1685932304, diff --git a/flake.nix b/flake.nix index d6efb3e8..6ceff2ae 100644 --- a/flake.nix +++ b/flake.nix @@ -4,10 +4,9 @@ nixpkgs.url = "github:NixOS/nixpkgs"; flake-utils.url = "github:numtide/flake-utils"; rust-overlay.url = "github:oxalica/rust-overlay"; - naersk.url = "github:nix-community/naersk"; }; - outputs = { self, nixpkgs, flake-utils, rust-overlay, ... }@inputs: + outputs = { self, nixpkgs, flake-utils, rust-overlay, ... }: flake-utils.lib.eachDefaultSystem (system: let pkgs = import nixpkgs { @@ -15,7 +14,7 @@ overlays = [ (import rust-overlay) ]; }; - rustToolchain = pkgs.rust-bin.stable.latest.default.override { + rustToolchain = pkgs.rust-bin.stable."1.68.0".default.override { extensions = [ "rust-src" ]; }; diff --git a/smirk/Cargo.toml b/smirk/Cargo.toml index b78ae5ca..483021d2 100644 --- a/smirk/Cargo.toml +++ b/smirk/Cargo.toml @@ -12,5 +12,9 @@ traversal = "0.1" serde = { version = "1", features = ["derive"] } serde_json = "1" +proptest = { version = "1", optional = true } + [dev-dependencies] tempdir = "0.3" +proptest = "1" +test-strategy = "0.3" diff --git a/smirk/src/hash.rs b/smirk/src/hash.rs deleted file mode 100644 index b784b63b..00000000 --- a/smirk/src/hash.rs +++ /dev/null @@ -1,167 +0,0 @@ -use std::fmt::Display; - -use miden_crypto::{ - hash::rpo::{Rpo256, RpoDigest}, - utils::{Deserializable, Serializable, SliceReader}, - Felt, -}; -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub struct Hash(RpoDigest); - -impl std::hash::Hash for Hash { - fn hash(&self, state: &mut H) { - self.0.to_bytes().hash(state); - } -} - -mod serde_impl { - use miden_crypto::utils::{Deserializable, Serializable, SliceReader}; - use serde::{de::Visitor, Deserializer, Serializer}; - - use super::*; - - impl Serialize for Hash { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let mut bytes = vec![0; 32]; - self.0.write_into(&mut bytes); - serializer.serialize_bytes(&bytes) - } - } - - impl<'de> Deserialize<'de> for Hash { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct V; - impl Visitor<'_> for V { - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("bytes representing a rescue-prime optimized hash") - } - - type Value = RpoDigest; - - fn visit_bytes(self, v: &[u8]) -> Result - where - E: serde::de::Error, - { - if v.len() != 32 { - return Err(E::custom(format!( - "wrong number of bytes - expected 32, found {}", - v.len() - ))); - } - - let mut reader = SliceReader::new(v); - RpoDigest::read_from(&mut reader) - .map_err(|e| E::custom(format!("deserialization error: {e}"))) - } - } - - deserializer.deserialize_bytes(V).map(Hash) - } - } -} - -impl Display for Hash { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Hash({})", hex::encode(self.0.as_bytes())) - } -} - -impl Hash { - pub const NULL: Hash = Hash(RpoDigest::new([Felt::new(0); 4])); - - /// The length of this hash in bytes - pub const LEN: usize = 32; - - /// The bytes of this hash - #[inline] - pub fn to_bytes(&self) -> [u8; Self::LEN] { - // to_bytes is a more appropriate name, since this method copies into a new slice - self.0.as_bytes() - } - - #[inline] - pub fn encode(&self) -> Vec { - let mut bytes = vec![0; 32]; - self.0.write_into(&mut bytes); - bytes - } - - #[inline] - pub fn decode(bytes: &[u8]) -> Option { - if bytes.len() != 32 { - return None; - } - - let mut reader = SliceReader::new(bytes); - RpoDigest::read_from(&mut reader).ok().map(Hash) - } - - /// Hash the given bytes - #[inline] - pub fn calculate(bytes: &[u8]) -> Self { - Self(Rpo256::hash(bytes)) - } - - #[inline] - pub fn digest(&self) -> RpoDigest { - self.0 - } - - /// Replace `self` with `rpo256(this + other)` - #[inline] - pub fn merge(&mut self, other: &Hash) { - self.0 = Rpo256::merge(&[self.0, other.0]); - } -} - -impl From for Hash { - fn from(value: RpoDigest) -> Self { - Self(value) - } -} - -pub struct MerklePath { - /// The components of the path, with the root at the end - components: Vec, -} - -impl MerklePath { - /// Create a new [`MerklePath`] from the given components - /// - /// The components should be the hashes that form the path, with the root of the tree at the - /// end - #[inline] - pub fn new(components: Vec) -> Self { - Self { components } - } - - /// Get a slice of hashes representing the components of the path - #[inline] - pub fn components(&self) -> &[Hash] { - &self.components - } - - /// Get a mutable slice of hashes representing the components of the path - #[inline] - pub fn components_mut(&mut self) -> &mut [Hash] { - &mut self.components - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn null_hash_is_all_zeroes() { - assert_eq!(Hash::NULL.to_bytes(), [0; 32]); - } -} diff --git a/smirk/src/hash/hashable.rs b/smirk/src/hash/hashable.rs new file mode 100644 index 00000000..5341b40d --- /dev/null +++ b/smirk/src/hash/hashable.rs @@ -0,0 +1,139 @@ +use std::{ + borrow::Cow, + ffi::{OsStr, OsString}, + path::{Path, PathBuf}, + rc::Rc, + sync::Arc, +}; + +use miden_crypto::hash::rpo::Rpo256; + +use super::Digest; + +/// Types which can be hashed +/// +/// This trait is primarily used as a bound on the value type for most useful functions on +/// [`MerkleTree`] +/// +/// [`MerkleTree`]: crate::MerkleTree +pub trait Hashable { + /// Compute the hash of this object + /// + /// ```rust + /// # use smirk::hash::Hashable; + /// let digest = 1i32.hash(); + /// println!("the hash of 1 is {digest}"); + /// ``` + fn hash(&self) -> Digest; +} + +// POINTER IMPLS + +impl Hashable for &T +where + T: Hashable, +{ + fn hash(&self) -> Digest { + ::hash(self) + } +} + +impl Hashable for &mut T +where + T: Hashable, +{ + fn hash(&self) -> Digest { + ::hash(self) + } +} + +impl Hashable for Box +where + T: Hashable, +{ + fn hash(&self) -> Digest { + ::hash(&self) + } +} + +impl<'a, T: ?Sized> Hashable for Cow<'a, T> +where + T: Hashable + Clone, +{ + fn hash(&self) -> Digest { + ::hash(&self) + } +} + +impl Hashable for Rc +where + T: Hashable, +{ + fn hash(&self) -> Digest { + ::hash(&self) + } +} + +impl Hashable for Arc +where + T: Hashable, +{ + fn hash(&self) -> Digest { + ::hash(&self) + } +} + +/// COLLECTION IMPLS + + + + + +macro_rules! int_impl { + ($int:ty) => { + impl Hashable for $int { + fn hash(&self) -> Digest { + Digest(Rpo256::hash(&self.to_be_bytes())) + } + } + }; +} + +int_impl!(i8); +int_impl!(i16); +int_impl!(i32); +int_impl!(i64); +int_impl!(i128); +int_impl!(isize); +int_impl!(u8); +int_impl!(u16); +int_impl!(u32); +int_impl!(u64); +int_impl!(u128); +int_impl!(usize); + +/// impl for any type that implements `AsRef<[u8]>` +macro_rules! as_ref_impl { + ($t:ty) => { + impl Hashable for $t { + fn hash(&self) -> Digest { + let bytes = <$t as AsRef<[u8]>>::as_ref(self); + Digest(Rpo256::hash(bytes)) + } + } + }; +} + +impl Hashable for [u8; N] { + fn hash(&self) -> Digest { + let bytes = <[u8; N] as AsRef<[u8]>>::as_ref(self); + Digest(Rpo256::hash(bytes)) + } +} + +as_ref_impl!([u8]); +as_ref_impl!(Vec); +as_ref_impl!(str); +as_ref_impl!(String); + + diff --git a/smirk/src/hash/mod.rs b/smirk/src/hash/mod.rs new file mode 100644 index 00000000..208b8f9e --- /dev/null +++ b/smirk/src/hash/mod.rs @@ -0,0 +1,132 @@ +//! Items relating to hashing data +//! +//! In particular, the [`Digest`] type and the [`Hashable`] trait + +use std::fmt::Display; + +use miden_crypto::{ + hash::rpo::{Rpo256, RpoDigest}, + utils::{Deserializable, SliceReader}, + Felt, +}; + +mod hashable; +pub use hashable::Hashable; +mod serde_impls; +#[cfg(any(test, feature = "proptest"))] +mod proptest_impls; + +/// A Rescue-Prime Optimized digest +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Digest(RpoDigest); + +impl Display for Digest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Hash({})", hex::encode(self.0.as_bytes())) + } +} + + +impl Digest { + /// The null hash + /// + /// This represents the hash of "nothing" (for example, an empty Merkle tree will have this as + /// the root hash) + /// + /// ```rust + /// # use smirk::hash::Digest; + /// # use smirk::MerkleTree; + /// let empty_tree = MerkleTree::::new(); + /// assert_eq!(empty_tree.root_hash(), Digest::NULL); + /// ``` + pub const NULL: Digest = Digest(RpoDigest::new([Felt::new(0); 4])); + + /// The length of this hash in bytes + const LEN: usize = 32; + + /// Get the representation of this hash as a byte array + #[inline] + pub fn to_bytes(&self) -> [u8; Self::LEN] { + self.0.as_bytes() + } + + /// Create a [`Digest`] from the byte array representation + /// + /// Note: this returns an `Option` because not all possible byte arrays are valid [`Digest`]s + /// + /// Any byte array returned from [`Digets::to_bytes`] will be valid for this function, and the + /// resulting hash will be equal to the hash that created the byte array + #[inline] + pub fn from_bytes(bytes: [u8; 32]) -> Option { + let mut reader = SliceReader::new(&bytes); + RpoDigest::read_from(&mut reader).ok().map(Digest) + } + + /// Calculate the hash of the given bytes + #[inline] + pub fn calculate(bytes: &[u8]) -> Self { + Self(Rpo256::hash(bytes)) + } + + /// Replace `self` with `rpo256(this + other)` + #[inline] + pub fn merge(&mut self, other: &Digest) { + self.0 = Rpo256::merge(&[self.0, other.0]); + } +} + +impl From for Digest { + fn from(value: RpoDigest) -> Self { + Self(value) + } +} + +/// A Merkle path that can be used to prove the existance of a value in the tree +pub struct MerklePath { + /// The components of the path, with the root at the end + components: Vec, +} + +impl MerklePath { + /// Create a new [`MerklePath`] from the given components + /// + /// The components should be the hashes that form the path, with the root of the tree at the + /// end + #[inline] + pub fn new(components: Vec) -> Self { + Self { components } + } + + /// Get a slice of hashes representing the components of the path + #[inline] + pub fn components(&self) -> &[Digest] { + &self.components + } + + /// Get a mutable slice of hashes representing the components of the path + #[inline] + pub fn components_mut(&mut self) -> &mut [Digest] { + &mut self.components + } +} + +#[cfg(test)] +mod tests { + use proptest::prop_assert_eq; + use test_strategy::proptest; + + use super::*; + + #[test] + fn null_hash_is_all_zeroes() { + assert_eq!(Digest::NULL.to_bytes(), [0; 32]); + } + + #[proptest] + fn digest_bytes_round_trip(digest: Digest) { + let bytes = digest.to_bytes(); + let digest_again = Digest::from_bytes(bytes).unwrap(); + + prop_assert_eq!(digest, digest_again); + } +} diff --git a/smirk/src/hash/proptest_impls.rs b/smirk/src/hash/proptest_impls.rs new file mode 100644 index 00000000..193dffbd --- /dev/null +++ b/smirk/src/hash/proptest_impls.rs @@ -0,0 +1,12 @@ +use super::*; + +use proptest::{arbitrary::StrategyFor, prelude::*, strategy::Map}; + +impl Arbitrary for Digest { + type Parameters = (); + type Strategy = Map, fn([u64; 4]) -> Self>; + + fn arbitrary_with(_: Self::Parameters) -> Self::Strategy { + any::<[u64; 4]>().prop_map(|nums| Digest(RpoDigest::new(nums.map(Felt::new)))) + } +} diff --git a/smirk/src/hash/serde_impls.rs b/smirk/src/hash/serde_impls.rs new file mode 100644 index 00000000..6295444e --- /dev/null +++ b/smirk/src/hash/serde_impls.rs @@ -0,0 +1,50 @@ +use super::*; + +use miden_crypto::utils::{Deserializable, Serializable, SliceReader}; +use serde::{de::Visitor, Deserializer, Serializer}; +use serde::{Deserialize, Serialize}; + +impl Serialize for Digest { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut bytes = vec![0; 32]; + self.0.write_into(&mut bytes); + serializer.serialize_bytes(&bytes) + } +} + +impl<'de> Deserialize<'de> for Digest { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct V; + impl Visitor<'_> for V { + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("bytes representing a rescue-prime optimized hash") + } + + type Value = RpoDigest; + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: serde::de::Error, + { + if v.len() != 32 { + return Err(E::custom(format!( + "wrong number of bytes - expected 32, found {}", + v.len() + ))); + } + + let mut reader = SliceReader::new(v); + RpoDigest::read_from(&mut reader) + .map_err(|e| E::custom(format!("deserialization error: {e}"))) + } + } + + deserializer.deserialize_bytes(V).map(Digest) + } +} diff --git a/smirk/src/lib.rs b/smirk/src/lib.rs index b7924a53..743b31e0 100644 --- a/smirk/src/lib.rs +++ b/smirk/src/lib.rs @@ -1,9 +1,44 @@ -mod hash; +#![warn(clippy::pedantic)] +#![deny(missing_docs)] + +//! Persistent Merkle tree +//! +//! This library provides `MerkleTree`, a Merkle tree that uses the [Rescue-Prime Optimized][rpo] +//! hash function, with a map-like API. There is also a [`Storage`] API for persisting the tree in +//! [rocksdb][db] +//! +//! ```rust +//! # use smirk::{MerkleTree, smirk}; +//! let mut tree = MerkleTree::new(); +//! tree.insert(1, "hello"); +//! tree.insert(2, "world"); +//! +//! // or you can use the macro to create a new tree +//! let tree = smirk! { +//! 1 => "hello", +//! 2 => "world", +//! }; +//! +//! assert_eq!(tree.get(&1), Some(&"hello")); +//! assert_eq!(tree.get(&2), Some(&"world")); +//! assert_eq!(tree.get(&3), None); +//! +//! ``` +//! +//! Types provided by this library implement [`Arbitrary`], for use with [`proptest`], gated behind +//! the `proptest` feature flag. +//! +//! [rpo]: https://eprint.iacr.org/2022/1577.pdf +//! [db]: https://github.com/facebook/rocksdb +//! +//! [`Storage`]: storage::Storage +//! [`Arbitrary`]: proptest::prelude::Arbitrary + +pub mod hash; pub mod storage; -pub mod tree; -pub use tree::MerkleTree; +mod tree; +pub use tree::{visitor::Visitor, MerkleTree, TreeNode}; #[cfg(test)] mod testing; - diff --git a/smirk/src/storage/error.rs b/smirk/src/storage/error.rs index 32db740b..ee3878fd 100644 --- a/smirk/src/storage/error.rs +++ b/smirk/src/storage/error.rs @@ -1,9 +1,10 @@ -use crate::hash::Hash; +use crate::hash::Digest; +/// An error en #[derive(Debug, thiserror::Error)] pub enum Error { #[error("A key referenced this hash as a child, but it wasn't present: {0}")] - MissingKeyReferenced(Hash), + MissingKeyReferenced(Digest), #[error("the `structure` key was not defined")] Unknown(#[from] Box), #[error("json: {0}")] diff --git a/smirk/src/storage/mod.rs b/smirk/src/storage/mod.rs index 63a39b1b..0bd4d099 100644 --- a/smirk/src/storage/mod.rs +++ b/smirk/src/storage/mod.rs @@ -1,35 +1,36 @@ +//! Persistence backends for Merkle trees use std::fmt::Debug; use crate::tree::MerkleTree; -pub mod noop; -pub mod rocksdb; +// pub mod noop; +// pub mod rocksdb; -mod error; -pub use error::Error; +// mod error; +// pub use error::Error; use serde::{Deserialize, Serialize}; /// Types which can act as a storage backend for a Merkle tree pub trait Storage: Debug { - fn store_tree(&self, tree: &MerkleTree) -> Result<(), Error>; - - fn load_tree(&self) -> Result>, Error>; + // fn store_tree(&self, tree: &MerkleTree) -> Result<(), Error>; + // + // fn load_tree(&self) -> Result>, Error>; } #[cfg(test)] mod tests { - use crate::testing::{example_tree, TestDb}; + use crate::testing::example_tree; use super::*; #[test] fn simple_example() { - let test_db = TestDb::new(); - let tree = example_tree(); - - assert!(test_db.load_tree().unwrap().is_none()); - - test_db.store_tree(&tree).unwrap(); - let tree_again = test_db.load_tree().unwrap().unwrap(); + // let test_db = TestDb::new(); + // let tree = example_tree(); + // + // assert!(test_db.load_tree().unwrap().is_none()); + // + // test_db.store_tree(&tree).unwrap(); + // let tree_again = test_db.load_tree().unwrap().unwrap(); } } diff --git a/smirk/src/storage/noop.rs b/smirk/src/storage/noop.rs index 8b286b9a..afeb1b4e 100644 --- a/smirk/src/storage/noop.rs +++ b/smirk/src/storage/noop.rs @@ -7,11 +7,11 @@ use super::{Error, Storage}; pub struct NoopStorage; impl Storage for NoopStorage { - fn load_tree(&self) -> Result>, Error> { - Ok(None) - } - - fn store_tree(&self, _tree: &MerkleTree) -> Result<(), Error> { - Ok(()) - } + // fn load_tree(&self) -> Result>, Error> { + // Ok(None) + // } + // + // fn store_tree(&self, _tree: &MerkleTree) -> Result<(), Error> { + // Ok(()) + // } } diff --git a/smirk/src/storage/rocksdb/mod.rs b/smirk/src/storage/rocksdb/mod.rs index 0d575dc9..18046e6b 100644 --- a/smirk/src/storage/rocksdb/mod.rs +++ b/smirk/src/storage/rocksdb/mod.rs @@ -2,7 +2,7 @@ use std::{collections::HashMap, fmt::Debug, path::Path}; use serde::{Deserialize, Serialize}; -use crate::{hash::Hash, tree::TreeNode, MerkleTree}; +use crate::{hash::Digest, tree::TreeNode, MerkleTree}; use super::{Error, Storage}; @@ -29,95 +29,95 @@ impl RocksDb { /// json was chosen as an "obviously bad" encoding - we should decide on a proper representation #[derive(Debug, Clone, Serialize, Deserialize)] struct NodeStructure { - hash: Hash, + hash: Digest, left: Option>, right: Option>, } impl Storage for RocksDb { - fn store_tree(&self, tree: &MerkleTree) -> Result<(), Error> { - let tx = self.inner.transaction(); - for elem in tree.depth_first() { - let key = Hash::calculate(elem.as_ref()); - let value = elem.as_ref(); - - tx.put(&key.to_bytes(), value)?; - } - - let structure_bytes = - serde_json::to_string(&tree.inner.as_ref().map(|node| get_structure(node)))?; - - tx.put("structure".as_bytes(), structure_bytes)?; - - tx.commit()?; - - Ok(()) - } - - fn load_tree>>(&self) -> Result>, Error> { - let tx = self.inner.transaction(); - - let Some(structure) = tx.get("structure".as_bytes())? else { return Ok(None) }; - let structure = serde_json::from_str( - &String::from_utf8(structure).expect("we're not actually going to use json"), - )?; - - let Some(structure) = structure else { return Ok(Some(MerkleTree { inner: None })) }; - - let mut data = HashMap::new(); - - for result in tx.iterator(rocksdb::IteratorMode::Start).into_iter() { - let (key, value) = result?; - let Some(hash) = Hash::decode(&key) else { continue }; - let value = value.to_vec().into(); - - data.insert(hash, value); - } - - let mut tree = rebuild_tree(structure, &data)?; - tree.update_height(); - - Ok(Some(MerkleTree { - inner: Some(Box::new(tree)), - })) - } + // fn store_tree(&self, tree: &MerkleTree) -> Result<(), Error> { + // let tx = self.inner.transaction(); + // for elem in tree.depth_first() { + // let key = Digest::calculate(elem.as_ref()); + // let value = elem.as_ref(); + // + // tx.put(&key.to_bytes(), value)?; + // } + // + // let structure_bytes = + // serde_json::to_string(&tree.inner.as_ref().map(|node| get_structure(node)))?; + // + // tx.put("structure".as_bytes(), structure_bytes)?; + // + // tx.commit()?; + // + // Ok(()) + // } + // + // fn load_tree>>(&self) -> Result>, Error> { + // let tx = self.inner.transaction(); + // + // let Some(structure) = tx.get("structure".as_bytes())? else { return Ok(None) }; + // let structure = serde_json::from_str( + // &String::from_utf8(structure).expect("we're not actually going to use json"), + // )?; + // + // let Some(structure) = structure else { return Ok(Some(MerkleTree { inner: None })) }; + // + // let mut data = HashMap::new(); + // + // for result in tx.iterator(rocksdb::IteratorMode::Start).into_iter() { + // let (key, value) = result?; + // let Some(hash) = Digest::decode(&key) else { continue }; + // let value = value.to_vec().into(); + // + // data.insert(hash, value); + // } + // + // let mut tree = rebuild_tree(structure, &data)?; + // tree.update_height(); + // + // Ok(Some(MerkleTree { + // inner: Some(Box::new(tree)), + // })) + // } } -fn rebuild_tree( - structure: NodeStructure, - data: &HashMap, -) -> Result, Error> { - let this = data - .get(&structure.hash) - .ok_or(Error::MissingKeyReferenced(structure.hash))?; - - let left = structure - .left - .map(|structure| rebuild_tree(*structure, data)) - .transpose()? - .map(Box::new); - - let right = structure - .right - .map(|structure| rebuild_tree(*structure, data)) - .transpose()? - .map(Box::new); - - Ok(TreeNode { - value: this.clone(), - left, - right, - height: 0, - }) -} - -fn get_structure>(node: &TreeNode) -> NodeStructure { - NodeStructure { - hash: node.hash(), - left: node.left.as_ref().map(|node| Box::new(get_structure(node))), - right: node - .right - .as_ref() - .map(|node| Box::new(get_structure(node))), - } -} +// fn rebuild_tree( +// structure: NodeStructure, +// data: &HashMap, +// ) -> Result, Error> { +// let this = data +// .get(&structure.hash) +// .ok_or(Error::MissingKeyReferenced(structure.hash))?; +// +// let left = structure +// .left +// .map(|structure| rebuild_tree(*structure, data)) +// .transpose()? +// .map(Box::new); +// +// let right = structure +// .right +// .map(|structure| rebuild_tree(*structure, data)) +// .transpose()? +// .map(Box::new); +// +// Ok(TreeNode { +// value: this.clone(), +// left, +// right, +// height: 0, +// }) +// } +// +// fn get_structure>(node: &TreeNode) -> NodeStructure { +// NodeStructure { +// hash: node.hash(), +// left: node.left.as_ref().map(|node| Box::new(get_structure(node))), +// right: node +// .right +// .as_ref() +// .map(|node| Box::new(get_structure(node))), +// } +// } diff --git a/smirk/src/testing.rs b/smirk/src/testing.rs index 7c6e4a28..bcc4689c 100644 --- a/smirk/src/testing.rs +++ b/smirk/src/testing.rs @@ -1,7 +1,8 @@ use tempdir::TempDir; use crate::{ - storage::{rocksdb::RocksDb, Storage}, + hash::Digest, + // storage::{rocksdb::RocksDb, Storage}, tree::{MerkleTree, TreeNode}, }; @@ -10,54 +11,45 @@ use crate::{ // 2 5 // |\ // 3 4 -pub fn example_node() -> TreeNode { +pub fn example_node() -> TreeNode { let mut node = TreeNode { + key: 1, value: 1, + hash: Digest::NULL, left: Some(Box::new(TreeNode { + key: 2, value: 2, - left: Some(Box::new(TreeNode::new(3))), - right: Some(Box::new(TreeNode::new(4))), + hash: Digest::NULL, + left: Some(Box::new(TreeNode::new(3, 3))), + right: Some(Box::new(TreeNode::new(4, 4))), height: 0, })), - right: Some(Box::new(TreeNode::new(5))), + right: Some(Box::new(TreeNode::new(5, 5))), height: 0, }; node.update_height(); node } -pub fn example_tree() -> MerkleTree { +pub fn example_tree() -> MerkleTree { MerkleTree { inner: Some(Box::new(example_node())), } } -#[derive(Debug)] -pub struct TestDb { - _dir: TempDir, - db: RocksDb, -} - -impl TestDb { - pub fn new() -> Self { - let dir = TempDir::new("smirk").unwrap(); - let db = RocksDb::open(dir.path()).unwrap(); - - Self { _dir: dir, db } - } -} - -impl Storage for TestDb { - fn store_tree>( - &self, - tree: &MerkleTree, - ) -> Result<(), crate::storage::Error> { - self.db.store_tree(tree) - } - - fn load_tree>>( - &self, - ) -> Result>, crate::storage::Error> { - self.db.load_tree() - } -} +// #[derive(Debug)] +// pub struct TestDb { +// _dir: TempDir, +// db: RocksDb, +// } +// +// impl TestDb { +// pub fn new() -> Self { +// let dir = TempDir::new("smirk").unwrap(); +// let db = RocksDb::open(dir.path()).unwrap(); +// +// Self { _dir: dir, db } +// } +// } +// +// impl Storage for TestDb {} diff --git a/smirk/src/tree/impls.rs b/smirk/src/tree/impls.rs index 860cd4cf..e442978a 100644 --- a/smirk/src/tree/impls.rs +++ b/smirk/src/tree/impls.rs @@ -2,30 +2,33 @@ use std::{iter::Chain, option}; use traversal::{Bft, DftPre}; +use crate::hash::Hashable; + use super::{MerkleTree, TreeNode}; -impl FromIterator for MerkleTree { - fn from_iter>(iter: I) -> Self { - // TODO: this is probably pretty inefficient, clean this up later +impl FromIterator<(K, V)> for MerkleTree { + fn from_iter>(iter: T) -> Self { let mut tree = MerkleTree::new(); - for elem in iter { - tree.insert(elem); + for (key, value) in iter { + tree.insert(key, value); } tree } } -impl MerkleTree { - pub fn depth_first<'a>(&'a self) -> DepthFirstIter<'a, T> { +impl MerkleTree { + /// Returns an iterator over the keys and values in depth-first order + pub fn depth_first<'a>(&'a self) -> DepthFirstIter<'a, K, V> { match &self.inner { None => DepthFirstIter { inner: None }, Some(node) => node.depth_first(), } } - pub fn breadth_first<'a>(&'a self) -> BreadthFirstIter<'a, T> { + /// Returns an iterator over the keys and values in breadth-first order + pub fn breadth_first<'a>(&'a self) -> BreadthFirstIter<'a, K, V> { match &self.inner { None => BreadthFirstIter { inner: None }, Some(node) => node.breadth_first(), @@ -33,60 +36,77 @@ impl MerkleTree { } } -impl TreeNode { +impl TreeNode { /// Get an iterator over the values in this node in depth-first order - pub fn depth_first<'a>(&'a self) -> DepthFirstIter<'a, T> { + fn depth_first<'a>(&'a self) -> DepthFirstIter<'a, K, V> { let inner = DftPre::new(self, children); - let inner = Box::new(inner.map(|(_, node)| node)); + let inner = Box::new(inner.map(|(_, node)| (&node.key, &node.value))); DepthFirstIter { inner: Some(inner) } } /// Get an iterator over the values in this node in breadth-first order - pub fn breadth_first(&self) -> BreadthFirstIter<'_, T> { + fn breadth_first(&self) -> BreadthFirstIter<'_, K, V> { let inner = Bft::new(self, children); - let inner = Box::new(inner.map(|(_, node)| node)); + let inner = Box::new(inner.map(|(_, node)| (&node.key, &node.value))); BreadthFirstIter { inner: Some(inner) } } } -fn children<'a, T>(node: &'a TreeNode) -> ChildIter<'a, T> { +fn children<'a, K, V>(node: &'a TreeNode) -> ChildIter<'a, K, V> { node.left .as_deref() .into_iter() .chain(node.right.as_deref().into_iter()) } -type ChildIter<'a, T> = Chain>, option::IntoIter<&'a TreeNode>>; +type ChildIter<'a, K, V> = + Chain>, option::IntoIter<&'a TreeNode>>; -pub struct DepthFirstIter<'a, T> { - inner: Option> + 'a>>, +pub struct DepthFirstIter<'a, K, V> { + inner: Option + 'a>>, } -impl<'a, T> Iterator for DepthFirstIter<'a, T> { - type Item = &'a T; +impl<'a, K, V> Iterator for DepthFirstIter<'a, K, V> { + type Item = (&'a K, &'a V); fn next(&mut self) -> Option { - self.inner - .as_mut() - .map(|iter| iter.next().map(|node| &node.value)) - .flatten() + self.inner.as_mut().map(|iter| iter.next()).flatten() } } -pub struct BreadthFirstIter<'a, T> { - inner: Option> + 'a>>, +pub struct BreadthFirstIter<'a, K, V> { + inner: Option + 'a>>, } -impl<'a, T> Iterator for BreadthFirstIter<'a, T> { - type Item = &'a T; +impl<'a, K, V> Iterator for BreadthFirstIter<'a, K, V> { + type Item = (&'a K, &'a V); fn next(&mut self) -> Option { - self.inner - .as_mut() - .map(|iter| iter.next().map(|node| &node.value)) - .flatten() + self.inner.as_mut().map(|iter| iter.next()).flatten() + } +} + +#[cfg(any(test, feature = "proptest"))] +mod proptest_impls { + use std::fmt::Debug; + + use super::*; + + use proptest::{arbitrary::StrategyFor, prelude::*, strategy::Map}; + + impl Arbitrary for MerkleTree + where + K: Debug + Arbitrary + Ord, + V: Debug + Arbitrary + Hashable, + { + type Parameters = (); + type Strategy = Map>, fn(Vec<(K, V)>) -> Self>; + + fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy { + any::>().prop_map(|v| v.into_iter().collect()) + } } } @@ -97,11 +117,13 @@ mod tests { #[test] fn depth_first_test() { let tree = example_node(); - let items: Vec<_> = tree.depth_first().copied().collect(); - assert_eq!(items, vec![1, 2, 3, 4, 5]); + let items: Vec<_> = tree.depth_first().map(|(k, v)| (*k, *v)).collect(); + assert_eq!(items, vec![(1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]); assert_eq!( - MerkleTree::from_iter::<[i32; 0]>([]).depth_first().count(), + MerkleTree::from_iter::<[(i32, i32); 0]>([]) + .depth_first() + .count(), 0 ); } @@ -109,11 +131,11 @@ mod tests { #[test] fn breadth_first_test() { let tree = example_node(); - let items: Vec<_> = tree.breadth_first().copied().collect(); - assert_eq!(items, vec![1, 2, 5, 3, 4]); + let items: Vec<_> = tree.breadth_first().map(|(k, v)| (*k, *v)).collect(); + assert_eq!(items, vec![(1, 1), (2, 2), (5, 5), (3, 3), (4, 4)]); assert_eq!( - MerkleTree::from_iter::<[i32; 0]>([]) + MerkleTree::from_iter::<[(i32, i32); 0]>([]) .breadth_first() .count(), 0 diff --git a/smirk/src/tree/macros.rs b/smirk/src/tree/macros.rs new file mode 100644 index 00000000..a4571bfd --- /dev/null +++ b/smirk/src/tree/macros.rs @@ -0,0 +1,75 @@ +/// Macro to generate a [`MerkleTree`] with a more convenient syntax +/// +/// ```rust +/// # use smirk::smirk; +/// let tree = smirk! { +/// 1 => "hello".to_string(), +/// 2 => "world".to_string(), +/// }; +/// +/// assert_eq!(tree.get(&1).unwrap(), "hello"); +/// ``` +/// +/// [`MerkleTree`]: crate::MerkleTree +#[macro_export] +macro_rules! smirk { + {} => {{ $crate::MerkleTree::new() }}; + { $key:expr => $value:expr $(,)? } => {{ + let mut tree = $crate::MerkleTree::new(); + tree.insert($key, $value); + tree + }}; + + { $key:expr => $value:expr, $($t:tt)* } => {{ + let mut tree = smirk!{ $($t)* }; + tree.insert($key, $value); + tree + }}; +} + +#[cfg(test)] +mod tests { + use crate::MerkleTree; + + #[test] + fn simple_syntax_test() { + let tree = smirk! { + 1 => "hello", + 2 => "world" // without trailing comma + }; + + let other_tree = smirk! { + 1 => "hello", + 2 => "world", // with trailing comma + }; + + assert_eq!(tree.root_hash(), other_tree.root_hash()); + + assert_eq!(*tree.get(&1).unwrap(), "hello"); + assert_eq!(*tree.get(&2).unwrap(), "world"); + assert_eq!(tree.get(&3), None); + + let _many_items = smirk! { + 1 => "hello", + 2 => "world", + 3 => "foo", + 4 => "bar", + }; + let _many_items_no_trailing = smirk! { + 1 => "hello", + 2 => "world", + 3 => "foo", + 4 => "bar" + }; + + let _single_item = smirk! { + 1 => "hello", + }; + + let _single_item_no_trailing = smirk! { + 1 => "hello" + }; + + let _empty: MerkleTree = smirk! {}; + } +} diff --git a/smirk/src/tree/mod.rs b/smirk/src/tree/mod.rs index b37324b0..b5e2ccfd 100644 --- a/smirk/src/tree/mod.rs +++ b/smirk/src/tree/mod.rs @@ -1,10 +1,9 @@ -use std::cmp::Ordering; +use std::{borrow::Borrow, cmp::Ordering}; -use miden_crypto::hash::rpo::{Rpo256, RpoDigest}; - -use crate::hash::{Hash, MerklePath}; +use crate::hash::{Digest, Hashable, MerklePath}; mod impls; +pub mod macros; pub mod visitor; #[cfg(test)] @@ -13,70 +12,101 @@ mod tests; /// A Merkle tree, with a customizable storage backend and hash function /// /// ```rust -/// # use smirk::tree::MerkleTree; +/// # use smirk::{MerkleTree, smirk}; /// let mut tree = MerkleTree::new(); -/// tree.insert(123); +/// tree.insert(123, "hello"); +/// +/// // or you can use the macro to create a tree +/// let tree = smirk! { +/// 123 => "hello", +/// }; /// /// assert_eq!(tree.size(), 1); /// ``` /// You can walk the tree in depth-first or breadth-first ordering: /// ```rust -/// # use smirk::tree::MerkleTree; -/// let tree = MerkleTree::from_iter([1, 2, 3]); +/// # use smirk::smirk; +/// let tree = smirk! { +/// 1 => 123, +/// 2 => 234, +/// 3 => 345, +/// }; /// -/// for i in tree.depth_first() { -/// println!("{i}"); +/// for (k, v) in tree.depth_first() { +/// println!("key: {k} - value: {v}"); /// } /// -/// for i in tree.breadth_first() { -/// println!("{i}"); +/// for (k, v) in tree.breadth_first() { +/// println!("key: {k} - value: {v}"); /// } /// ``` +/// Broadly speaking, to do anything useful with a Merkle tree, the key type must implement +/// [`Ord`], and the value type must implement [`Hashable`] +/// +/// Warning: *DO NOT* use types with interior mutability as either the +/// key or value in this tree, since it can potentially invalidate hashes/ordering guarantees that +/// the tree otherwise maintains. +/// +/// If this happens, behaviour of the tree is unspecified, but not +/// undefined. In other words, the usual soundness rules will be upheld, but any function performed +/// on the tree itself may give incorrect results #[derive(Debug, Clone)] -pub struct MerkleTree { - pub(crate) inner: Option>>, +pub struct MerkleTree { + pub(crate) inner: Option>>, } -impl MerkleTree { - /// Create a new [`MerkleTree`] with the given storage backend +impl MerkleTree { + /// Create a new, empty [`MerkleTree`] /// /// ```rust - /// # use smirk::tree::MerkleTree; - /// let tree = MerkleTree::::new(); + /// # use smirk::MerkleTree; + /// let tree = MerkleTree::::new(); /// ``` pub fn new() -> Self { Self { inner: None } } - pub fn insert(&mut self, value: T) + /// Insert a new key-value pair into the tree + /// + /// ```rust + /// # use smirk::MerkleTree; + /// let mut tree = MerkleTree::new(); + /// tree.insert(1, "hello".to_string()); + /// + /// assert_eq!(tree.get(&1).unwrap(), "hello"); + /// ``` + pub fn insert(&mut self, key: K, value: V) where - T: Ord, + K: Ord, + V: Hashable, { - self.inner = Some(Self::insert_node(self.inner.take(), value)); + self.inner = Some(Self::insert_node(self.inner.take(), key, value)); } - fn insert_node(node: Option>>, value: T) -> Box> + fn insert_node(node: Option>>, key: K, value: V) -> Box> where - T: Ord, + K: Ord, + V: Hashable, { let mut node = match node { - None => return Box::new(TreeNode::new(value)), + None => return Box::new(TreeNode::new(key, value)), Some(node) => node, }; - if value < node.value { - node.left = Some(Self::insert_node(node.left.take(), value)); - } else if value > node.value { - node.right = Some(Self::insert_node(node.right.take(), value)); + if key < node.key { + node.left = Some(Self::insert_node(node.left.take(), key, value)); + } else if key > node.key { + node.right = Some(Self::insert_node(node.right.take(), key, value)); } else { return node; // Duplicates not allowed } node.update_height(); + node.recalculate_hash_recursive(); Self::balance(node) } - fn balance(mut node: Box>) -> Box> { + fn balance(mut node: Box>) -> Box> { let balance = node.balance_factor(); if balance > 1 { @@ -94,7 +124,7 @@ impl MerkleTree { node } - fn rotate_left(mut root: Box>) -> Box> { + fn rotate_left(mut root: Box>) -> Box> { let mut new_root = root.right.take().unwrap(); root.right = new_root.left.take(); new_root.left = Some(root); @@ -105,7 +135,7 @@ impl MerkleTree { new_root } - fn rotate_right(mut root: Box>) -> Box> { + fn rotate_right(mut root: Box>) -> Box> { let mut new_root = root.left.take().unwrap(); root.left = new_root.right.take(); new_root.right = Some(root); @@ -115,18 +145,39 @@ impl MerkleTree { new_root } + /// Remove the entry corresponding to the given key (if it exists) + /// + /// If the key does exist, the key-value pair is returned, otherwise, `None` is returned + pub fn remove(&mut self, key: &Q) -> Option<(K, V)> + where + Q: Borrow + ?Sized, + K: Ord, + { + // TODO: this impelementation is HORRIBLE, but we're probably not gonna be removing much - + // fix this later anyways though + if !self.contains(key) { + return None; + } + + + } + /// The number of elements in the tree /// /// ```rust - /// # use smirk::MerkleTree; - /// let tree = MerkleTree::from_iter([1, 2, 3]); + /// # use smirk::smirk; + /// let tree = smirk! { + /// 1 => "hello", + /// 2 => "world", + /// 3 => "foo", + /// }; /// /// assert_eq!(tree.size(), 3); /// ``` pub fn size(&self) -> usize { struct Counter(usize); - impl visitor::Visitor for Counter { - fn visit(&mut self, _item: &T) { + impl visitor::Visitor for Counter { + fn visit(&mut self, _: &K, _: &V) { self.0 += 1; } } @@ -137,6 +188,25 @@ impl MerkleTree { counter.0 } + /// Returns `true` if and only if `key` is present in the tree + /// + /// ```rust + /// # use smirk::smirk; + /// let tree = smirk! { + /// 1 => "hello", + /// }; + /// + /// assert!(tree.contains(&1)); + /// assert!(!tree.contains(&2)); + /// ``` + pub fn contains(&self, key: &Q) -> bool + where + Q: Borrow + ?Sized, + K: Ord, + { + self.get(key).is_some() + } + /// The height of this tree #[inline] pub fn height(&self) -> usize { @@ -146,36 +216,69 @@ impl MerkleTree { } } - /// Return the node associated with the given value - pub fn get(&self, item: &T) -> Option<&TreeNode> + /// Get the value associated with the given key + /// + /// If you need access to the node itself, consider using [`MerkleTree::get_node`] + /// + /// ```rust + /// # use smirk::smirk; + /// let tree = smirk! { + /// 1 => "hello".to_string(), + /// }; + /// + /// assert_eq!(tree.get(&1).unwrap(), "hello"); + /// assert!(tree.get(&2).is_none()); + /// ``` + pub fn get(&self, key: &Q) -> Option<&V> where - T: Ord, + Q: Borrow + ?Sized, + K: Ord, { - self.inner.as_ref().and_then(|node| node.get(item)) + self.inner.as_ref().and_then(|node| node.get(key)) } - pub fn get_mut(&mut self, item: &T) -> Option<&mut TreeNode> + /// Get the node associated with the given key + /// + /// If you only need access to the value stored in this node, consider using [`MerkleTree::get`] + /// + /// ```rust + /// # use smirk::smirk; + /// # use smirk::hash::Digest; + /// let tree = smirk! { + /// 1 => "hello".to_string(), + /// }; + /// + /// let node = tree.get_node(&1).unwrap(); + /// + /// assert_eq!(*node.key(), 1); + /// assert_eq!(*node.value(), "hello"); + /// let _hash = node.hash(); // the hash of this node plus all the children + /// ``` + pub fn get_node(&self, key: &Q) -> Option<&TreeNode> where - T: Ord, + Q: Borrow + ?Sized, + K: Ord, { - self.inner.as_mut().and_then(|node| node.get_mut(item)) + self.inner.as_ref().and_then(|node| node.get_node(key)) } /// Get the root hash of the Merkle tree - pub fn root_hash(&self) -> Hash + pub fn root_hash(&self) -> Digest where - T: AsRef<[u8]>, + V: Hashable, { match &self.inner { - None => Hash::NULL, // should this function return an option? + None => Digest::NULL, // should this function return an option? Some(node) => node.hash(), } } /// Generate a [`MerklePath`] for the a given value - pub fn path_for(&self, value: &T) -> Option + pub fn path_for(&self, key: &Q) -> Option where - T: Ord + AsRef<[u8]>, + Q: Borrow + ?Sized, + K: Ord, + V: Hashable, { let mut components = Vec::with_capacity(self.height()); @@ -186,7 +289,7 @@ impl MerkleTree { components.push(node.hash()); - match value.cmp(&node.value) { + match key.borrow().cmp(&node.key) { Ordering::Less => opt_node = node.left.as_deref(), Ordering::Greater => opt_node = node.right.as_deref(), Ordering::Equal => { @@ -197,15 +300,17 @@ impl MerkleTree { } } - pub fn verify(&self, path: &MerklePath, value: &T) -> bool + /// Verify that the given value exists in the tree, by using the provided [`MerklePath`] + pub fn verify(&self, path: &MerklePath, value: &V) -> bool where - T: AsRef<[u8]> + Ord, + Q: Ord + Borrow + ?Sized, + V: Hashable, { if path.components().last() != Some(&self.root_hash()) { return false; } - let mut hash = Hash::calculate(value.as_ref()); + let mut hash = value.hash(); for slice in path.components().windows(2) { let first = &slice[0]; @@ -223,24 +328,16 @@ impl MerkleTree { /// An individual node in a Merkle tree #[derive(Debug, Clone)] -pub struct TreeNode { - pub(crate) value: T, - pub(crate) left: Option>>, - pub(crate) right: Option>>, +pub struct TreeNode { + pub(crate) key: K, + pub(crate) value: V, + pub(crate) hash: Digest, + pub(crate) left: Option>>, + pub(crate) right: Option>>, pub(crate) height: isize, } -impl TreeNode { - /// Create a new [`TreeNode`] with no children - pub fn new(value: T) -> Self { - Self { - value, - left: None, - right: None, - height: 0, - } - } - +impl TreeNode { /// The height of the tree /// /// For example, in a tree with 3 nodes A, B, C, where A is the parent of B and C: @@ -252,6 +349,7 @@ impl TreeNode { self.height } + // pub(crate) for testing only pub(crate) fn update_height(&mut self) { let left_height = self.left.as_ref().map_or(0, |x| x.height()); let right_height = self.right.as_ref().map_or(0, |x| x.height()); @@ -264,58 +362,81 @@ impl TreeNode { left_height - right_height } - pub fn get(&self, item: &T) -> Option<&TreeNode> + fn get(&self, key: &Q) -> Option<&V> where - T: Ord, + Q: Borrow + ?Sized, + K: Ord, { - match item.cmp(&self.value) { - Ordering::Less => self.left.as_ref().and_then(|node| node.get(item)), - Ordering::Greater => self.right.as_ref().and_then(|node| node.get(item)), - Ordering::Equal => Some(self), - } + let node = self.get_node(key)?; + Some(&node.value) } - pub fn get_mut(&mut self, item: &T) -> Option<&mut TreeNode> + fn get_node(&self, key: &Q) -> Option<&TreeNode> where - T: Ord, + Q: Borrow + ?Sized, + K: Ord, { - match item.cmp(&self.value) { - Ordering::Less => self.left.as_mut().and_then(|node| node.get_mut(item)), - Ordering::Greater => self.right.as_mut().and_then(|node| node.get_mut(item)), + match key.borrow().cmp(&self.key) { + Ordering::Less => self.left.as_ref().and_then(|node| node.get_node(key)), + Ordering::Greater => self.right.as_ref().and_then(|node| node.get_node(key)), Ordering::Equal => Some(self), } } +} - /// The hash of the value contained in this node (ignoring any children) - pub fn hash_of_value(&self) -> Hash - where - T: AsRef<[u8]>, - { - // should we memoize this? - let bytes = self.value.as_ref(); - Hash::calculate(bytes) +impl TreeNode { + // pub(crate) for testing only + pub(crate) fn new(key: K, value: V) -> Self { + let hash = value.hash(); + + Self { + key, + value, + hash, + left: None, + right: None, + height: 0, + } } - /// The hash of this value (and all child values) + /// The key associated with this node + pub fn key(&self) -> &K { + &self.key + } - // we should probably memoize this - pub fn hash(&self) -> Hash - where - T: AsRef<[u8]>, - { - let left = self - .left - .as_ref() - .map(|node| node.hash()) - .unwrap_or(Hash::NULL); - let this = Hash::calculate(self.value.as_ref()); - let right = self - .right - .as_ref() - .map(|node| node.hash()) - .unwrap_or(Hash::NULL); - - let left_this = Rpo256::merge(&[left.digest(), this.digest()]); - Hash::from(Rpo256::merge(&[left_this, right.digest()])) + /// The value associated with this node + pub fn value(&self) -> &V { + &self.value + } + + /// The hash of this node and all child nodes + #[inline] + pub fn hash(&self) -> Digest { + self.hash + } + + /// The hash of the value contained in this node + /// + /// Note: this is unaffected by the value of child nodes + #[inline] + pub fn hash_of_value(&self) -> Digest { + self.value.hash() + } + + /// Update the `hash` field of this node, and all child nodes + fn recalculate_hash_recursive(&mut self) { + let mut new_hash = self.value.hash(); + + if let Some(left) = &mut self.left { + left.recalculate_hash_recursive(); + new_hash.merge(&left.hash); + } + + if let Some(right) = &mut self.right { + right.recalculate_hash_recursive(); + new_hash.merge(&right.hash); + } + + self.hash = new_hash; } } diff --git a/smirk/src/tree/tests.rs b/smirk/src/tree/tests.rs index 4f140067..3a0d01ec 100644 --- a/smirk/src/tree/tests.rs +++ b/smirk/src/tree/tests.rs @@ -1,15 +1,19 @@ -use super::*; +use crate::smirk; #[test] fn simple_example() { - let mut tree = MerkleTree::from_iter([1, 2, 3]); + let mut tree = smirk! { + 1 => 1, + 2 => 2, + 3 => 3, + }; assert_eq!(tree.size(), 3); - tree.insert(4); + tree.insert(4, 4); assert_eq!(tree.size(), 4); println!("{tree:#?}"); - let _items: Vec<_> = tree.depth_first().copied().collect(); + let _items: Vec<_> = tree.depth_first().collect(); } diff --git a/smirk/src/tree/visitor.rs b/smirk/src/tree/visitor.rs index 71cec34d..7ae8e27a 100644 --- a/smirk/src/tree/visitor.rs +++ b/smirk/src/tree/visitor.rs @@ -1,29 +1,36 @@ use super::{MerkleTree, TreeNode}; -pub trait Visitor { - fn visit(&mut self, item: &T); +/// A trait for types which can visit nodes in a Merkle tree +/// +/// Note: currently only immutable access is given to prevent invalidating memoized hashes +pub trait Visitor { + /// The function to be called on each node + fn visit(&mut self, key: &K, value: &V); } -impl Visitor for &mut V +impl Visitor for &mut Vis where - V: Visitor, + Vis: Visitor, { - fn visit(&mut self, item: &T) { - V::visit(self, item) + fn visit(&mut self, key: &K, value: &V) { + Vis::visit(self, key, value) } } -impl MerkleTree { - pub fn visit>(&self, mut visitor: V) { +impl MerkleTree { + /// Apply a visitor to all the nodes in a tree + /// + /// The visitor will run on `self`, then `left` (if it is `Some`), then `right` (if it is `Some`) + pub fn visit>(&self, mut visitor: Vis) { if let Some(inner) = &self.inner { inner.visit(&mut visitor); } } } -impl TreeNode { - pub fn visit>(&self, visitor: &mut V) { - visitor.visit(&self.value); +impl TreeNode { + fn visit>(&self, visitor: &mut Vis) { + visitor.visit(&self.key, &self.value); if let Some(left) = &self.left { left.visit(visitor); @@ -43,13 +50,13 @@ mod tests { fn counter_example() { struct Counter(usize); - impl Visitor for Counter { - fn visit(&mut self, _item: &T) { + impl Visitor for Counter { + fn visit(&mut self, _: &K, _: &V) { self.0 += 1; } } - let tree = MerkleTree::from_iter([1, 2, 3]); + let tree = MerkleTree::from_iter([(1, 1), (2, 2), (3, 3)]); let mut counter = Counter(0); tree.visit(&mut counter); @@ -60,17 +67,16 @@ mod tests { fn sum_example() { struct Sum(i32); - impl Visitor for Sum { - fn visit(&mut self, item: &i32) { - self.0 += *item; + impl Visitor for Sum { + fn visit(&mut self, key: &i32, _value: &i32) { + self.0 += *key; } } - let tree = MerkleTree::from_iter([1, 2, 3]); + let tree = MerkleTree::from_iter([(1, 1), (2, 2), (3, 3)]); let mut sum = Sum(0); tree.visit(&mut sum); assert_eq!(sum.0, 6); - } } From cad8612609df79dcff3301819f68e95e210650ae Mon Sep 17 00:00:00 2001 From: Cameron Date: Tue, 11 Jul 2023 11:13:48 +0100 Subject: [PATCH 03/15] mostly done, a few issues remain --- Cargo.lock | 24 +- smirk/Cargo.toml | 3 +- smirk/proptest-regressions/hash/mod.txt | 7 + .../storage/rocksdb/codec.txt | 7 + smirk/src/hash/hashable.rs | 14 +- smirk/src/hash/mod.rs | 25 +- smirk/src/hash/serde_impls.rs | 21 +- smirk/src/storage/error.rs | 41 +++- smirk/src/storage/mod.rs | 54 ++--- smirk/src/storage/noop.rs | 17 -- smirk/src/storage/rocksdb/codec.rs | 49 ++++ smirk/src/storage/rocksdb/mod.rs | 215 +++++++++--------- smirk/src/storage/rocksdb/structure.rs | 92 ++++++++ smirk/src/testing.rs | 94 ++++---- smirk/src/tree/impls.rs | 43 ++-- smirk/src/tree/iterator.rs | 89 ++++++++ smirk/src/tree/mod.rs | 38 ++-- smirk/src/tree/tests.rs | 9 + 18 files changed, 573 insertions(+), 269 deletions(-) create mode 100644 smirk/proptest-regressions/hash/mod.txt create mode 100644 smirk/proptest-regressions/storage/rocksdb/codec.txt delete mode 100644 smirk/src/storage/noop.rs create mode 100644 smirk/src/storage/rocksdb/codec.rs create mode 100644 smirk/src/storage/rocksdb/structure.rs create mode 100644 smirk/src/tree/iterator.rs diff --git a/Cargo.lock b/Cargo.lock index f4f54d07..0d70c191 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4383,6 +4383,28 @@ dependencies = [ "winapi", ] +[[package]] +name = "rmp" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44519172358fd6d58656c86ab8e7fbc9e1490c3e8f14d35ed78ca0dd07403c9f" +dependencies = [ + "byteorder", + "num-traits", + "paste", +] + +[[package]] +name = "rmp-serde" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5b13be192e0220b8afb7222aa5813cb62cc269ebb5cac346ca6487681d2913e" +dependencies = [ + "byteorder", + "rmp", + "serde", +] + [[package]] name = "rocksdb" version = "0.21.0" @@ -4932,9 +4954,9 @@ dependencies = [ "hex", "miden-crypto 0.6.0", "proptest", + "rmp-serde", "rocksdb", "serde", - "serde_json", "tempdir", "test-strategy", "thiserror", diff --git a/smirk/Cargo.toml b/smirk/Cargo.toml index 483021d2..490b208e 100644 --- a/smirk/Cargo.toml +++ b/smirk/Cargo.toml @@ -10,10 +10,11 @@ thiserror = "1" hex = "0.4" traversal = "0.1" serde = { version = "1", features = ["derive"] } -serde_json = "1" +rmp-serde = "1" proptest = { version = "1", optional = true } + [dev-dependencies] tempdir = "0.3" proptest = "1" diff --git a/smirk/proptest-regressions/hash/mod.txt b/smirk/proptest-regressions/hash/mod.txt new file mode 100644 index 00000000..d6674a49 --- /dev/null +++ b/smirk/proptest-regressions/hash/mod.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc fd176da074662e3129e51f7bbe4112d4bb9ee9e596bb25f76ddc4cbd9d526c12 # shrinks to input = _DigestBytesSerdeRoundTripArgs { digest: Digest(RpoDigest([BaseElement(0), BaseElement(0), BaseElement(0), BaseElement(0)])) } diff --git a/smirk/proptest-regressions/storage/rocksdb/codec.txt b/smirk/proptest-regressions/storage/rocksdb/codec.txt new file mode 100644 index 00000000..b5b6f723 --- /dev/null +++ b/smirk/proptest-regressions/storage/rocksdb/codec.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc aaf2606728b143c354f5fedf3ddd6894dcd25805eea6d1e69b019dd202f8e72b # shrinks to input = _EncodeDecodeBijectiveArgs { key: Digest(RpoDigest([BaseElement(0), BaseElement(0), BaseElement(0), BaseElement(0)])), value: CoolCustomType { foo: "", bar: [], coords: [(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0)] } } diff --git a/smirk/src/hash/hashable.rs b/smirk/src/hash/hashable.rs index 5341b40d..f98de1d7 100644 --- a/smirk/src/hash/hashable.rs +++ b/smirk/src/hash/hashable.rs @@ -1,10 +1,4 @@ -use std::{ - borrow::Cow, - ffi::{OsStr, OsString}, - path::{Path, PathBuf}, - rc::Rc, - sync::Arc, -}; +use std::{borrow::Cow, rc::Rc, sync::Arc}; use miden_crypto::hash::rpo::Rpo256; @@ -85,10 +79,6 @@ where /// COLLECTION IMPLS - - - - macro_rules! int_impl { ($int:ty) => { impl Hashable for $int { @@ -135,5 +125,3 @@ as_ref_impl!([u8]); as_ref_impl!(Vec); as_ref_impl!(str); as_ref_impl!(String); - - diff --git a/smirk/src/hash/mod.rs b/smirk/src/hash/mod.rs index 208b8f9e..3d56d604 100644 --- a/smirk/src/hash/mod.rs +++ b/smirk/src/hash/mod.rs @@ -2,7 +2,7 @@ //! //! In particular, the [`Digest`] type and the [`Hashable`] trait -use std::fmt::Display; +use std::{fmt::{Display, Debug}, hash::Hash}; use miden_crypto::{ hash::rpo::{Rpo256, RpoDigest}, @@ -12,20 +12,31 @@ use miden_crypto::{ mod hashable; pub use hashable::Hashable; -mod serde_impls; #[cfg(any(test, feature = "proptest"))] mod proptest_impls; +mod serde_impls; /// A Rescue-Prime Optimized digest -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct Digest(RpoDigest); +impl Debug for Digest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Hash({})", hex::encode(self.0.as_bytes())) + } +} + impl Display for Digest { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "Hash({})", hex::encode(self.0.as_bytes())) } } +impl Hash for Digest { + fn hash(&self, state: &mut H) { + <[u8; 32] as Hash>::hash(&self.to_bytes(), state) + } +} impl Digest { /// The null hash @@ -129,4 +140,12 @@ mod tests { prop_assert_eq!(digest, digest_again); } + + #[proptest] + fn digest_bytes_serde_round_trip(digest: Digest) { + let mp_bytes = rmp_serde::to_vec(&digest).unwrap(); + let digest_again: Digest = rmp_serde::from_slice(&mp_bytes).unwrap(); + + prop_assert_eq!(digest, digest_again); + } } diff --git a/smirk/src/hash/serde_impls.rs b/smirk/src/hash/serde_impls.rs index 6295444e..b471e7b4 100644 --- a/smirk/src/hash/serde_impls.rs +++ b/smirk/src/hash/serde_impls.rs @@ -1,6 +1,5 @@ use super::*; -use miden_crypto::utils::{Deserializable, Serializable, SliceReader}; use serde::{de::Visitor, Deserializer, Serializer}; use serde::{Deserialize, Serialize}; @@ -9,8 +8,7 @@ impl Serialize for Digest { where S: Serializer, { - let mut bytes = vec![0; 32]; - self.0.write_into(&mut bytes); + let bytes = self.to_bytes(); serializer.serialize_bytes(&bytes) } } @@ -26,25 +24,20 @@ impl<'de> Deserialize<'de> for Digest { formatter.write_str("bytes representing a rescue-prime optimized hash") } - type Value = RpoDigest; + type Value = Digest; fn visit_bytes(self, v: &[u8]) -> Result where E: serde::de::Error, { - if v.len() != 32 { - return Err(E::custom(format!( - "wrong number of bytes - expected 32, found {}", - v.len() - ))); - } + let bytes = v + .try_into() + .map_err(|_| E::custom(format!("incorrect number of bytes: {}", v.len())))?; - let mut reader = SliceReader::new(v); - RpoDigest::read_from(&mut reader) - .map_err(|e| E::custom(format!("deserialization error: {e}"))) + Digest::from_bytes(bytes).ok_or(E::custom("deserialization error")) } } - deserializer.deserialize_bytes(V).map(Digest) + deserializer.deserialize_bytes(V) } } diff --git a/smirk/src/storage/error.rs b/smirk/src/storage/error.rs index ee3878fd..1e1553a6 100644 --- a/smirk/src/storage/error.rs +++ b/smirk/src/storage/error.rs @@ -1,18 +1,35 @@ use crate::hash::Digest; -/// An error en +use super::rocksdb::DecodeError; + +/// An error encountered while persisting or restoring a [`MerkleTree`] #[derive(Debug, thiserror::Error)] pub enum Error { - #[error("A key referenced this hash as a child, but it wasn't present: {0}")] - MissingKeyReferenced(Digest), - #[error("the `structure` key was not defined")] - Unknown(#[from] Box), - #[error("json: {0}")] - Json(#[from] serde_json::Error), -} + /// Invalid hash bytes as key + #[error("the following bytes were used as a key, but were not a valid RPO hash: {0:?}")] + InvalidHashBytes(Vec), + + /// Hash mismatch + #[error("the hash didn't match the computed hash of the stored value - computed: {computed}, stored: {stored}")] + HashMismatch { + /// The hash that was computed by hashing the stored value + computed: Digest, + /// The hash that was stored in the database + stored: Digest, + }, + + /// The database referenced data in the structure that was not found in the database + #[error("no data assocated with {hash}, but found in structure")] + StructureReferenceMissing { + /// The hash that was missing + hash: Digest, + }, + + /// Malformed structure + #[error("malformed structure: {0}")] + MalformedStructure(DecodeError), -impl From for Error { - fn from(value: rocksdb::Error) -> Self { - Self::Unknown(Box::new(value)) - } + /// Unknown error + #[error("unknown error: {0}")] + Unknown(Box), } diff --git a/smirk/src/storage/mod.rs b/smirk/src/storage/mod.rs index 0bd4d099..2b718d70 100644 --- a/smirk/src/storage/mod.rs +++ b/smirk/src/storage/mod.rs @@ -1,36 +1,38 @@ //! Persistence backends for Merkle trees +//! +//! The main backend provided is [rocksdb][rocksdb], but the design is kept modular to allow +//! possible future storage backends. +//! +//! +//! [rocksdb]: https://github.com/facebook/rocksdb use std::fmt::Debug; -use crate::tree::MerkleTree; +use crate::{hash::Hashable, tree::MerkleTree}; -// pub mod noop; -// pub mod rocksdb; +/// A rocksdb-based storage implementation +pub mod rocksdb; -// mod error; -// pub use error::Error; +mod error; +pub use error::Error; use serde::{Deserialize, Serialize}; /// Types which can act as a storage backend for a Merkle tree -pub trait Storage: Debug { - // fn store_tree(&self, tree: &MerkleTree) -> Result<(), Error>; - // - // fn load_tree(&self) -> Result>, Error>; -} - -#[cfg(test)] -mod tests { - use crate::testing::example_tree; - - use super::*; +pub trait Storage: Debug +where + K: Ord + 'static, + V: Hashable + 'static, +{ + /// Persist the given tree to storage + fn store_tree(&self, tree: &MerkleTree) -> Result<(), Error> + where + K: Serialize, + V: Serialize; - #[test] - fn simple_example() { - // let test_db = TestDb::new(); - // let tree = example_tree(); - // - // assert!(test_db.load_tree().unwrap().is_none()); - // - // test_db.store_tree(&tree).unwrap(); - // let tree_again = test_db.load_tree().unwrap().unwrap(); - } + /// Load a tree from storage + /// + /// If no tree has been persisted, `None` should be returned + fn load_tree(&self) -> Result>, Error> + where + K: for<'a> Deserialize<'a>, + V: for<'a> Deserialize<'a>; } diff --git a/smirk/src/storage/noop.rs b/smirk/src/storage/noop.rs deleted file mode 100644 index afeb1b4e..00000000 --- a/smirk/src/storage/noop.rs +++ /dev/null @@ -1,17 +0,0 @@ -use crate::MerkleTree; - -use super::{Error, Storage}; - -/// A dummy storage type which does nothing -#[derive(Debug, Default)] -pub struct NoopStorage; - -impl Storage for NoopStorage { - // fn load_tree(&self) -> Result>, Error> { - // Ok(None) - // } - // - // fn store_tree(&self, _tree: &MerkleTree) -> Result<(), Error> { - // Ok(()) - // } -} diff --git a/smirk/src/storage/rocksdb/codec.rs b/smirk/src/storage/rocksdb/codec.rs new file mode 100644 index 00000000..758be030 --- /dev/null +++ b/smirk/src/storage/rocksdb/codec.rs @@ -0,0 +1,49 @@ +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +/// An error encountered when encoding data to its database format +#[derive(Debug, Error)] +#[error("encode error: {0}")] +pub struct EncodeError(rmp_serde::encode::Error); + +/// An error encountered when decoding data in its database format +#[derive(Debug, Error)] +#[error("decode error: {0}")] +pub struct DecodeError(rmp_serde::decode::Error); + +pub(super) fn encode<'a, T>(t: &'a T) -> Result, EncodeError> +where + T: Serialize, +{ + rmp_serde::encode::to_vec(t).map_err(EncodeError) +} + +pub(super) fn decode Deserialize<'a> + 'static>(bytes: &[u8]) -> Result { + rmp_serde::decode::from_slice(bytes).map_err(DecodeError) +} + +#[cfg(test)] +mod tests { + use proptest::prop_assert_eq; + use test_strategy::{proptest, Arbitrary}; + + use crate::hash::Digest; + + use super::*; + + #[derive(Debug, Deserialize, Serialize, Arbitrary, PartialEq, Eq)] + struct CoolCustomType { + foo: String, + bar: Vec, + coords: [(i32, i32); 10], + } + + #[proptest] + fn encode_decode_bijective(key: Digest, value: CoolCustomType) { + let bytes = encode(&(&key, &value)).unwrap(); + let (key_again, value_again): (Digest, CoolCustomType) = decode(&bytes).unwrap(); + + prop_assert_eq!(key, key_again); + prop_assert_eq!(value, value_again); + } +} diff --git a/smirk/src/storage/rocksdb/mod.rs b/smirk/src/storage/rocksdb/mod.rs index 18046e6b..c9baad51 100644 --- a/smirk/src/storage/rocksdb/mod.rs +++ b/smirk/src/storage/rocksdb/mod.rs @@ -1,123 +1,132 @@ -use std::{collections::HashMap, fmt::Debug, path::Path}; +use std::{fmt::Debug, path::Path}; +use rocksdb::{IteratorMode, TransactionDB, DB}; use serde::{Deserialize, Serialize}; -use crate::{hash::Digest, tree::TreeNode, MerkleTree}; +use crate::{ + hash::{Digest, Hashable}, + MerkleTree, +}; + +use self::structure::Structure; use super::{Error, Storage}; -pub struct RocksDb { - inner: rocksdb::TransactionDB, +mod codec; +pub use codec::{DecodeError, EncodeError}; +mod structure; + +/// A struct that acts as a [`Storage`] backend by persisting data in [rocksdb][db] +/// +/// Broadly speaking, this type works by: +/// - serializing a tree-like "structure" object to the key `"structure"` +/// - serializing binary encoded key-value pairs to the key `rpo(value)` (note - this is not the +/// hash of a given node, which includes the children in the hash) +/// +/// [db]: https://github.com/facebook/rocksdb +pub struct RocksdbStorage { + pub(crate) instance: TransactionDB, } -impl Debug for RocksDb { +impl Debug for RocksdbStorage { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "RocksDb") + write!(f, "RocksdbStorage") } } -impl RocksDb { - pub fn open(path: &Path) -> Result { - let inner = rocksdb::TransactionDB::open_default(path)?; +impl RocksdbStorage { + /// Create a new [`RocksdbStorage`] from an existing rocksdb instance + /// + /// This is useful if you want to create transactions that modify both data managed by smirk, + /// as well as data external to smirk + pub fn from_instance(instance: TransactionDB) -> Self { + Self { instance } + } - Ok(Self { inner }) + /// Create a new [`RocksdbStorage`] by opening a new rocksdb instance at the given path + pub fn open(path: &Path) -> Result { + let instance = TransactionDB::open_default(path)?; + Ok(Self { instance }) } + + const STRUCTURE_KEY: &[u8] = b"structure"; } -/// Struct to represent structure of the tree without storing the actual data -/// -/// json was chosen as an "obviously bad" encoding - we should decide on a proper representation -#[derive(Debug, Clone, Serialize, Deserialize)] -struct NodeStructure { - hash: Digest, - left: Option>, - right: Option>, +impl Storage for RocksdbStorage +where + K: Ord + 'static, + V: Hashable + 'static, +{ + fn store_tree(&self, tree: &MerkleTree) -> Result<(), Error> + where + K: Serialize, + V: Serialize, + { + let tx = self.instance.transaction(); + + let structure = structure::Structure::from_tree(tree); + let structure_bytes = codec::encode(&structure).map_err(err)?; + + tx.put(Self::STRUCTURE_KEY, structure_bytes).map_err(err)?; + + for node in tree.iter() { + let hash = node.value().hash(); + let bytes = codec::encode(&(node.key(), node.value())).map_err(err)?; + + tx.put(&hash.to_bytes(), &bytes).map_err(err)?; + } + + tx.commit().map_err(err)?; + + Ok(()) + } + + fn load_tree(&self) -> Result>, Error> + where + K: for<'a> Deserialize<'a>, + V: for<'a> Deserialize<'a>, + { + let Some(structure_bytes) = self.instance.get(Self::STRUCTURE_KEY).map_err(err)? else { + return Ok(None); + }; + + let structure: Option = codec::decode(&structure_bytes).map_err(Error::MalformedStructure)?; + let Some(structure) = structure else { return Ok(Some(MerkleTree::new())) }; + + let mut values = self.instance.iterator(IteratorMode::Start).filter(|result| { + match result { + // don't try to deserialize this key + Ok((hash, _data)) => hash.as_ref() != Self::STRUCTURE_KEY, + Err(_) => true, + } + }).map(|result| { + let (hash, data) = result.map_err(err)?; + + let hash = get_hash(&hash)?; + let data: (K, V) = codec::decode(&data).map_err(err)?; + + Ok((hash, data)) + + }).collect::>()?; + + let tree = structure.to_tree(&mut values)?; + + Ok(Some(tree)) + } } -impl Storage for RocksDb { - // fn store_tree(&self, tree: &MerkleTree) -> Result<(), Error> { - // let tx = self.inner.transaction(); - // for elem in tree.depth_first() { - // let key = Digest::calculate(elem.as_ref()); - // let value = elem.as_ref(); - // - // tx.put(&key.to_bytes(), value)?; - // } - // - // let structure_bytes = - // serde_json::to_string(&tree.inner.as_ref().map(|node| get_structure(node)))?; - // - // tx.put("structure".as_bytes(), structure_bytes)?; - // - // tx.commit()?; - // - // Ok(()) - // } - // - // fn load_tree>>(&self) -> Result>, Error> { - // let tx = self.inner.transaction(); - // - // let Some(structure) = tx.get("structure".as_bytes())? else { return Ok(None) }; - // let structure = serde_json::from_str( - // &String::from_utf8(structure).expect("we're not actually going to use json"), - // )?; - // - // let Some(structure) = structure else { return Ok(Some(MerkleTree { inner: None })) }; - // - // let mut data = HashMap::new(); - // - // for result in tx.iterator(rocksdb::IteratorMode::Start).into_iter() { - // let (key, value) = result?; - // let Some(hash) = Digest::decode(&key) else { continue }; - // let value = value.to_vec().into(); - // - // data.insert(hash, value); - // } - // - // let mut tree = rebuild_tree(structure, &data)?; - // tree.update_height(); - // - // Ok(Some(MerkleTree { - // inner: Some(Box::new(tree)), - // })) - // } +fn get_hash(bytes: &[u8]) -> Result { + let hash_bytes = bytes + .as_ref() + .try_into() + .map_err(|_| Error::InvalidHashBytes(bytes.to_vec()))?; + + let hash = Digest::from_bytes(hash_bytes) + .ok_or_else(|| Error::InvalidHashBytes(hash_bytes.to_vec()))?; + + Ok(hash) } -// fn rebuild_tree( -// structure: NodeStructure, -// data: &HashMap, -// ) -> Result, Error> { -// let this = data -// .get(&structure.hash) -// .ok_or(Error::MissingKeyReferenced(structure.hash))?; -// -// let left = structure -// .left -// .map(|structure| rebuild_tree(*structure, data)) -// .transpose()? -// .map(Box::new); -// -// let right = structure -// .right -// .map(|structure| rebuild_tree(*structure, data)) -// .transpose()? -// .map(Box::new); -// -// Ok(TreeNode { -// value: this.clone(), -// left, -// right, -// height: 0, -// }) -// } -// -// fn get_structure>(node: &TreeNode) -> NodeStructure { -// NodeStructure { -// hash: node.hash(), -// left: node.left.as_ref().map(|node| Box::new(get_structure(node))), -// right: node -// .right -// .as_ref() -// .map(|node| Box::new(get_structure(node))), -// } -// } +fn err(e: E) -> Error { + Error::Unknown(Box::new(e)) +} diff --git a/smirk/src/storage/rocksdb/structure.rs b/smirk/src/storage/rocksdb/structure.rs new file mode 100644 index 00000000..a18fe53b --- /dev/null +++ b/smirk/src/storage/rocksdb/structure.rs @@ -0,0 +1,92 @@ +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +use crate::{ + hash::{Digest, Hashable}, + storage::Error, + MerkleTree, TreeNode, +}; + +/// The structure of a tree +#[derive(Debug, Serialize, Deserialize)] +pub(super) struct Structure { + pub hash: Digest, + pub left: Option>, + pub right: Option>, +} + +impl Structure { + pub fn from_tree(tree: &MerkleTree) -> Option { + tree.inner.as_deref().map(Self::from_node) + } + + fn from_node(node: &TreeNode) -> Self { + let hash = node.value().hash(); + let left = node.left.as_deref().map(Self::from_node).map(Box::new); + let right = node.left.as_deref().map(Self::from_node).map(Box::new); + + Self { hash, left, right } + } + + pub fn to_tree( + &self, + values: &mut HashMap, + ) -> Result, Error> { + dbg!(values.keys().collect::>()); + let node = Self::to_node(&self, values)?; + + Ok(MerkleTree { + inner: Some(Box::new(node)), + }) + } + + fn to_node( + &self, + values: &mut HashMap, + ) -> Result, Error> { + let hash = self.hash; + let Some((key, value)) = values.remove(&hash) else { + return Err(Error::StructureReferenceMissing { hash }); + }; + + let left = self + .left + .as_deref() + .map(|s| Self::to_node(s, values).map(Box::new)) + .transpose()?; + let right = self + .right + .as_deref() + .map(|s| Self::to_node(s, values).map(Box::new)) + .transpose()?; + + let mut node = TreeNode { + // this hash is the hash including children, `hash` that is in scope is the hash + // excluding children, so we just use null for now and clean up later + hash: Digest::NULL, + key, + value, + right, + left, + height: 0, + }; + + node.update_height(); + node.recalculate_hash_recursive(); + + Ok(node) + } +} + +#[cfg(test)] +mod tests { + use test_strategy::proptest; + + use super::*; + + #[proptest] + fn to_from_structure(tree: MerkleTree) { + + } +} diff --git a/smirk/src/testing.rs b/smirk/src/testing.rs index bcc4689c..763409f8 100644 --- a/smirk/src/testing.rs +++ b/smirk/src/testing.rs @@ -1,55 +1,59 @@ use tempdir::TempDir; use crate::{ - hash::Digest, - // storage::{rocksdb::RocksDb, Storage}, - tree::{MerkleTree, TreeNode}, + hash::Hashable, + storage::{rocksdb::RocksdbStorage, Storage}, + tree::MerkleTree, }; -// 1 -// |\ -// 2 5 -// |\ -// 3 4 -pub fn example_node() -> TreeNode { - let mut node = TreeNode { - key: 1, - value: 1, - hash: Digest::NULL, - left: Some(Box::new(TreeNode { - key: 2, - value: 2, - hash: Digest::NULL, - left: Some(Box::new(TreeNode::new(3, 3))), - right: Some(Box::new(TreeNode::new(4, 4))), - height: 0, - })), - right: Some(Box::new(TreeNode::new(5, 5))), - height: 0, - }; - node.update_height(); - node +/// Helper struct that makes it easier to test against a rocksdb instance +#[derive(Debug)] +pub struct TestStorage { + _dir: TempDir, + db: RocksdbStorage, } -pub fn example_tree() -> MerkleTree { - MerkleTree { - inner: Some(Box::new(example_node())), +impl TestStorage { + pub fn new() -> Self { + let dir = TempDir::new("smirk").unwrap(); + let db = RocksdbStorage::open(dir.path()).unwrap(); + + Self { _dir: dir, db } + } +} + +impl Storage for TestStorage +where + K: Ord + 'static, + V: Hashable + 'static, +{ + fn store_tree(&self, tree: &MerkleTree) -> Result<(), crate::storage::Error> + where + K: serde::Serialize, + V: serde::Serialize, + { + self.db.store_tree(tree) + } + + fn load_tree(&self) -> Result>, crate::storage::Error> + where + K: for<'a> serde::Deserialize<'a>, + V: for<'a> serde::Deserialize<'a>, + { + self.db.load_tree() } } -// #[derive(Debug)] -// pub struct TestDb { -// _dir: TempDir, -// db: RocksDb, -// } -// -// impl TestDb { -// pub fn new() -> Self { -// let dir = TempDir::new("smirk").unwrap(); -// let db = RocksDb::open(dir.path()).unwrap(); -// -// Self { _dir: dir, db } -// } -// } -// -// impl Storage for TestDb {} +#[test] +fn simple_storage_test() { + let db = TestStorage::new(); + + assert_eq!(db.load_tree().unwrap(), None::>); + + let tree = (0..10).map(|i| (i, format!("the data is {i}"))).collect(); + db.store_tree(&tree).unwrap(); + + let tree_again: MerkleTree = db.load_tree().unwrap().unwrap(); + + assert_eq!(tree, tree_again); +} diff --git a/smirk/src/tree/impls.rs b/smirk/src/tree/impls.rs index e442978a..6caaa804 100644 --- a/smirk/src/tree/impls.rs +++ b/smirk/src/tree/impls.rs @@ -2,22 +2,8 @@ use std::{iter::Chain, option}; use traversal::{Bft, DftPre}; -use crate::hash::Hashable; - use super::{MerkleTree, TreeNode}; -impl FromIterator<(K, V)> for MerkleTree { - fn from_iter>(iter: T) -> Self { - let mut tree = MerkleTree::new(); - - for (key, value) in iter { - tree.insert(key, value); - } - - tree - } -} - impl MerkleTree { /// Returns an iterator over the keys and values in depth-first order pub fn depth_first<'a>(&'a self) -> DepthFirstIter<'a, K, V> { @@ -92,6 +78,8 @@ impl<'a, K, V> Iterator for BreadthFirstIter<'a, K, V> { mod proptest_impls { use std::fmt::Debug; + use crate::hash::Hashable; + use super::*; use proptest::{arbitrary::StrategyFor, prelude::*, strategy::Map}; @@ -112,7 +100,32 @@ mod proptest_impls { #[cfg(test)] mod tests { - use crate::{testing::example_node, tree::MerkleTree}; + use crate::{hash::Digest, tree::MerkleTree, TreeNode}; + + // 1 + // |\ + // 2 5 + // |\ + // 3 4 + fn example_node() -> TreeNode { + let mut node = TreeNode { + key: 1, + value: 1, + hash: Digest::NULL, + left: Some(Box::new(TreeNode { + key: 2, + value: 2, + hash: Digest::NULL, + left: Some(Box::new(TreeNode::new(3, 3))), + right: Some(Box::new(TreeNode::new(4, 4))), + height: 0, + })), + right: Some(Box::new(TreeNode::new(5, 5))), + height: 0, + }; + node.update_height(); + node + } #[test] fn depth_first_test() { diff --git a/smirk/src/tree/iterator.rs b/smirk/src/tree/iterator.rs new file mode 100644 index 00000000..1d1c5f6b --- /dev/null +++ b/smirk/src/tree/iterator.rs @@ -0,0 +1,89 @@ +use std::iter::empty; + +use crate::{hash::Hashable, MerkleTree, TreeNode}; + +impl FromIterator<(K, V)> for MerkleTree { + fn from_iter>(iter: T) -> Self { + let mut tree = MerkleTree::new(); + + for (key, value) in iter { + tree.insert(key, value); + } + + tree + } +} + +impl<'a, K, V> MerkleTree { + /// Create an [`Iterator`] over the nodes in key order (i.e. the order specified by the `Ord` + /// impl for `K`) + /// + /// ```rust + /// # use smirk::{smirk, MerkleTree}; + /// let tree = smirk! { + /// 1 => "hello", + /// 2 => "world", + /// 3 => "foo", + /// }; + /// + /// let keys: Vec<_> = tree.iter().map(|node| *node.key()).collect(); + /// + /// assert_eq!(keys, vec![1, 2, 3]); + /// ``` + pub fn iter(&'a self) -> Iter<'a, K, V> { + match &self.inner { + None => Iter::empty(), + Some(node) => Iter::node(node), + } + } +} + +pub struct Iter<'a, K, V>(Box> + 'a>); + +impl<'a, K, V> Iterator for Iter<'a, K, V> { + type Item = &'a TreeNode; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +impl<'a, K, V> Iter<'a, K, V> { + fn empty() -> Self { + Self(Box::new(empty())) + } + + fn node(node: &'a TreeNode) -> Self { + Self(Box::new(iter(node))) + } +} + +fn iter<'a, K, V>(node: &'a TreeNode) -> Box> + 'a> { + let left_iter = node.left.iter().flat_map(|node| iter(node)); + let right_iter = node.right.iter().flat_map(|node| iter(node)); + + Box::new(left_iter.chain(Some(node)).chain(right_iter)) +} + +#[cfg(test)] +mod tests { + use proptest::prop_assert_eq; + use test_strategy::proptest; + + use super::*; + + #[proptest(cases = 100)] + fn iter_order_is_correct(mut vec: Vec) { + vec.sort(); + + let mut tree = MerkleTree::new(); + + for elem in &vec { + tree.insert(*elem, *elem); + } + + let vec_again: Vec<_> = tree.iter().map(|node| *node.key()).collect(); + + prop_assert_eq!(vec, vec_again); + } +} diff --git a/smirk/src/tree/mod.rs b/smirk/src/tree/mod.rs index b5e2ccfd..7d1bb81b 100644 --- a/smirk/src/tree/mod.rs +++ b/smirk/src/tree/mod.rs @@ -2,8 +2,13 @@ use std::{borrow::Borrow, cmp::Ordering}; use crate::hash::{Digest, Hashable, MerklePath}; +mod iterator; +pub use iterator::*; + mod impls; -pub mod macros; +pub use impls::*; + +mod macros; pub mod visitor; #[cfg(test)] @@ -55,6 +60,12 @@ pub struct MerkleTree { pub(crate) inner: Option>>, } +impl PartialEq for MerkleTree { + fn eq(&self, other: &Self) -> bool { + self.root_hash() == other.root_hash() + } +} + impl MerkleTree { /// Create a new, empty [`MerkleTree`] /// @@ -75,6 +86,7 @@ impl MerkleTree { /// /// assert_eq!(tree.get(&1).unwrap(), "hello"); /// ``` + /// If the key is already present in the tree, the tree is left unchanged pub fn insert(&mut self, key: K, value: V) where K: Ord, @@ -145,23 +157,6 @@ impl MerkleTree { new_root } - /// Remove the entry corresponding to the given key (if it exists) - /// - /// If the key does exist, the key-value pair is returned, otherwise, `None` is returned - pub fn remove(&mut self, key: &Q) -> Option<(K, V)> - where - Q: Borrow + ?Sized, - K: Ord, - { - // TODO: this impelementation is HORRIBLE, but we're probably not gonna be removing much - - // fix this later anyways though - if !self.contains(key) { - return None; - } - - - } - /// The number of elements in the tree /// /// ```rust @@ -188,6 +183,11 @@ impl MerkleTree { counter.0 } + /// Returns true if and only if the tree contains no elements + pub fn is_empty(&self) -> bool { + self.size() == 0 + } + /// Returns `true` if and only if `key` is present in the tree /// /// ```rust @@ -424,7 +424,7 @@ impl TreeNode { } /// Update the `hash` field of this node, and all child nodes - fn recalculate_hash_recursive(&mut self) { + pub(crate) fn recalculate_hash_recursive(&mut self) { let mut new_hash = self.value.hash(); if let Some(left) = &mut self.left { diff --git a/smirk/src/tree/tests.rs b/smirk/src/tree/tests.rs index 3a0d01ec..3fece7a9 100644 --- a/smirk/src/tree/tests.rs +++ b/smirk/src/tree/tests.rs @@ -17,3 +17,12 @@ fn simple_example() { let _items: Vec<_> = tree.depth_first().collect(); } + +#[test] +fn insert_already_exists() { + let mut tree = smirk! { 1 => "hello" }; + + tree.insert(1, "world"); + + assert_eq!(*tree.get(&1).unwrap(), "hello"); +} From a5223ef1412ee2bc3a1c3a4cdf79166af38fc171 Mon Sep 17 00:00:00 2001 From: Cameron Date: Tue, 11 Jul 2023 11:21:43 +0100 Subject: [PATCH 04/15] mostly done, a few issues remain --- .../storage/rocksdb/structure.txt | 7 +++++++ smirk/src/hash/hashable.rs | 8 ++++---- smirk/src/hash/mod.rs | 12 ++++++++++-- smirk/src/hash/serde_impls.rs | 4 ++-- smirk/src/storage/rocksdb/codec.rs | 2 +- smirk/src/storage/rocksdb/mod.rs | 2 +- smirk/src/storage/rocksdb/structure.rs | 15 +++++++++++++++ 7 files changed, 40 insertions(+), 10 deletions(-) create mode 100644 smirk/proptest-regressions/storage/rocksdb/structure.txt diff --git a/smirk/proptest-regressions/storage/rocksdb/structure.txt b/smirk/proptest-regressions/storage/rocksdb/structure.txt new file mode 100644 index 00000000..9144d629 --- /dev/null +++ b/smirk/proptest-regressions/storage/rocksdb/structure.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 5b113e630643d3a9a4729f49b9171d84486ae81c7923dd58c98914f871c90691 # shrinks to input = _ToFromStructureArgs { tree: MerkleTree { inner: Some(TreeNode { key: 0, value: "", hash: Hash(f0db3924f3e2d677a51924b09ecef8a12416a6ceb09fadd39785bb4f685cab66), left: Some(TreeNode { key: -1, value: "", hash: Hash(0000000000000000000000000000000000000000000000000000000000000000), left: None, right: None, height: 0 }), right: None, height: 1 }) } } diff --git a/smirk/src/hash/hashable.rs b/smirk/src/hash/hashable.rs index f98de1d7..38c8a689 100644 --- a/smirk/src/hash/hashable.rs +++ b/smirk/src/hash/hashable.rs @@ -46,7 +46,7 @@ where T: Hashable, { fn hash(&self) -> Digest { - ::hash(&self) + ::hash(self) } } @@ -55,7 +55,7 @@ where T: Hashable + Clone, { fn hash(&self) -> Digest { - ::hash(&self) + ::hash(self) } } @@ -64,7 +64,7 @@ where T: Hashable, { fn hash(&self) -> Digest { - ::hash(&self) + ::hash(self) } } @@ -73,7 +73,7 @@ where T: Hashable, { fn hash(&self) -> Digest { - ::hash(&self) + ::hash(self) } } diff --git a/smirk/src/hash/mod.rs b/smirk/src/hash/mod.rs index 3d56d604..2327741b 100644 --- a/smirk/src/hash/mod.rs +++ b/smirk/src/hash/mod.rs @@ -2,7 +2,10 @@ //! //! In particular, the [`Digest`] type and the [`Hashable`] trait -use std::{fmt::{Display, Debug}, hash::Hash}; +use std::{ + fmt::{Debug, Display}, + hash::Hash, +}; use miden_crypto::{ hash::rpo::{Rpo256, RpoDigest}, @@ -34,7 +37,7 @@ impl Display for Digest { impl Hash for Digest { fn hash(&self, state: &mut H) { - <[u8; 32] as Hash>::hash(&self.to_bytes(), state) + <[u8; 32] as Hash>::hash(&self.to_bytes(), state); } } @@ -57,6 +60,7 @@ impl Digest { /// Get the representation of this hash as a byte array #[inline] + #[must_use] pub fn to_bytes(&self) -> [u8; Self::LEN] { self.0.as_bytes() } @@ -68,6 +72,7 @@ impl Digest { /// Any byte array returned from [`Digets::to_bytes`] will be valid for this function, and the /// resulting hash will be equal to the hash that created the byte array #[inline] + #[must_use] pub fn from_bytes(bytes: [u8; 32]) -> Option { let mut reader = SliceReader::new(&bytes); RpoDigest::read_from(&mut reader).ok().map(Digest) @@ -75,6 +80,7 @@ impl Digest { /// Calculate the hash of the given bytes #[inline] + #[must_use] pub fn calculate(bytes: &[u8]) -> Self { Self(Rpo256::hash(bytes)) } @@ -104,12 +110,14 @@ impl MerklePath { /// The components should be the hashes that form the path, with the root of the tree at the /// end #[inline] + #[must_use] pub fn new(components: Vec) -> Self { Self { components } } /// Get a slice of hashes representing the components of the path #[inline] + #[must_use] pub fn components(&self) -> &[Digest] { &self.components } diff --git a/smirk/src/hash/serde_impls.rs b/smirk/src/hash/serde_impls.rs index b471e7b4..e16bab37 100644 --- a/smirk/src/hash/serde_impls.rs +++ b/smirk/src/hash/serde_impls.rs @@ -1,8 +1,8 @@ -use super::*; - use serde::{de::Visitor, Deserializer, Serializer}; use serde::{Deserialize, Serialize}; +use super::Digest; + impl Serialize for Digest { fn serialize(&self, serializer: S) -> Result where diff --git a/smirk/src/storage/rocksdb/codec.rs b/smirk/src/storage/rocksdb/codec.rs index 758be030..fbd74bdd 100644 --- a/smirk/src/storage/rocksdb/codec.rs +++ b/smirk/src/storage/rocksdb/codec.rs @@ -11,7 +11,7 @@ pub struct EncodeError(rmp_serde::encode::Error); #[error("decode error: {0}")] pub struct DecodeError(rmp_serde::decode::Error); -pub(super) fn encode<'a, T>(t: &'a T) -> Result, EncodeError> +pub(super) fn encode(t: &T) -> Result, EncodeError> where T: Serialize, { diff --git a/smirk/src/storage/rocksdb/mod.rs b/smirk/src/storage/rocksdb/mod.rs index c9baad51..f70363f5 100644 --- a/smirk/src/storage/rocksdb/mod.rs +++ b/smirk/src/storage/rocksdb/mod.rs @@ -1,6 +1,6 @@ use std::{fmt::Debug, path::Path}; -use rocksdb::{IteratorMode, TransactionDB, DB}; +use rocksdb::{IteratorMode, TransactionDB}; use serde::{Deserialize, Serialize}; use crate::{ diff --git a/smirk/src/storage/rocksdb/structure.rs b/smirk/src/storage/rocksdb/structure.rs index a18fe53b..f31337d6 100644 --- a/smirk/src/storage/rocksdb/structure.rs +++ b/smirk/src/storage/rocksdb/structure.rs @@ -81,12 +81,27 @@ impl Structure { #[cfg(test)] mod tests { + use proptest::prop_assert_eq; use test_strategy::proptest; use super::*; #[proptest] fn to_from_structure(tree: MerkleTree) { + let structure = Structure::from_tree(&tree).unwrap(); + let mut values = tree + .iter() + .map(|node| { + let hash = node.value().hash(); + let key = *node.key(); + let value = node.value().clone(); + (hash, (key, value)) + }) + .collect(); + + let tree_again = structure.to_tree(&mut values).unwrap(); + + prop_assert_eq!(tree, tree_again); } } From 5b6b08eacaf6325e9548d7b6d3794fae9013c415 Mon Sep 17 00:00:00 2001 From: Cameron Date: Wed, 12 Jul 2023 11:54:25 +0100 Subject: [PATCH 05/15] remove storage abstraction, replace with single struct --- .../storage/structure.txt | 7 + smirk/src/storage/{rocksdb => }/codec.rs | 0 smirk/src/storage/error.rs | 16 +- smirk/src/storage/mod.rs | 144 ++++++++++++++---- smirk/src/storage/rocksdb/mod.rs | 132 ---------------- smirk/src/storage/{rocksdb => }/structure.rs | 0 smirk/src/testing.rs | 34 ++--- smirk/src/tree/mod.rs | 67 +------- smirk/src/tree/proof.rs | 77 ++++++++++ 9 files changed, 223 insertions(+), 254 deletions(-) create mode 100644 smirk/proptest-regressions/storage/structure.txt rename smirk/src/storage/{rocksdb => }/codec.rs (100%) delete mode 100644 smirk/src/storage/rocksdb/mod.rs rename smirk/src/storage/{rocksdb => }/structure.rs (100%) create mode 100644 smirk/src/tree/proof.rs diff --git a/smirk/proptest-regressions/storage/structure.txt b/smirk/proptest-regressions/storage/structure.txt new file mode 100644 index 00000000..475de029 --- /dev/null +++ b/smirk/proptest-regressions/storage/structure.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 6e85196b5344d7133cab69db71ca9a46a12257da4c53012c14c15a50073fa318 # shrinks to input = _ToFromStructureArgs { tree: MerkleTree { inner: Some(TreeNode { key: 0, value: "", hash: Hash(f0db3924f3e2d677a51924b09ecef8a12416a6ceb09fadd39785bb4f685cab66), left: Some(TreeNode { key: -1, value: "", hash: Hash(0000000000000000000000000000000000000000000000000000000000000000), left: None, right: None, height: 0 }), right: None, height: 1 }) } } diff --git a/smirk/src/storage/rocksdb/codec.rs b/smirk/src/storage/codec.rs similarity index 100% rename from smirk/src/storage/rocksdb/codec.rs rename to smirk/src/storage/codec.rs diff --git a/smirk/src/storage/error.rs b/smirk/src/storage/error.rs index 1e1553a6..8272a140 100644 --- a/smirk/src/storage/error.rs +++ b/smirk/src/storage/error.rs @@ -1,6 +1,6 @@ use crate::hash::Digest; -use super::rocksdb::DecodeError; +use super::{DecodeError, EncodeError}; /// An error encountered while persisting or restoring a [`MerkleTree`] #[derive(Debug, thiserror::Error)] @@ -29,7 +29,15 @@ pub enum Error { #[error("malformed structure: {0}")] MalformedStructure(DecodeError), - /// Unknown error - #[error("unknown error: {0}")] - Unknown(Box), + /// Error encoding data to binary format + #[error("error encoding data to binary format: {0}")] + Encode(#[from] EncodeError), + + /// Error decoding data from binary format + #[error("error decoding data from binary format: {0}")] + Decode(#[from] DecodeError), + + /// Rocksdb error + #[error("rocksdb error: {0}")] + Unknown(#[from] rocksdb::Error), } diff --git a/smirk/src/storage/mod.rs b/smirk/src/storage/mod.rs index 2b718d70..a1c2199b 100644 --- a/smirk/src/storage/mod.rs +++ b/smirk/src/storage/mod.rs @@ -1,38 +1,124 @@ -//! Persistence backends for Merkle trees -//! -//! The main backend provided is [rocksdb][rocksdb], but the design is kept modular to allow -//! possible future storage backends. -//! -//! -//! [rocksdb]: https://github.com/facebook/rocksdb -use std::fmt::Debug; +//! Persistence layer for [`MerkleTree`]s +use std::{fmt::Debug, path::Path}; -use crate::{hash::Hashable, tree::MerkleTree}; - -/// A rocksdb-based storage implementation -pub mod rocksdb; +use crate::{hash::{Hashable, Digest}, tree::MerkleTree}; +use rocksdb::{TransactionDB, IteratorMode}; +use serde::{Deserialize, Serialize}; +mod structure; +mod codec; +pub use codec::{EncodeError, DecodeError}; mod error; pub use error::Error; -use serde::{Deserialize, Serialize}; -/// Types which can act as a storage backend for a Merkle tree -pub trait Storage: Debug -where - K: Ord + 'static, - V: Hashable + 'static, -{ - /// Persist the given tree to storage - fn store_tree(&self, tree: &MerkleTree) -> Result<(), Error> - where - K: Serialize, - V: Serialize; +use self::structure::Structure; + + +/// A rocksdb-based storage mechanism for [`MerkleTree`]s +/// +/// ```rust +/// +/// ``` +pub struct Storage { + instance: TransactionDB, +} + +impl Debug for Storage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Storage") + } +} - /// Load a tree from storage +impl Storage { + /// Create a new [`RocksdbStorage`] from an existing rocksdb instance /// - /// If no tree has been persisted, `None` should be returned - fn load_tree(&self) -> Result>, Error> + /// This is useful if you want to create transactions that modify both data managed by smirk, + /// as well as data external to smirk + pub fn from_instance(instance: TransactionDB) -> Self { + Self { instance } + } + + /// Create a new [`RocksdbStorage`] by opening a new rocksdb instance at the given path + pub fn open(path: &Path) -> Result { + let instance = TransactionDB::open_default(path)?; + Ok(Self { instance }) + } + + const STRUCTURE_KEY: &[u8] = b"structure"; +} + +impl Storage { + /// Store a tree + pub fn store_tree(&self, tree: &MerkleTree) -> Result<(), Error> + where + K: Serialize + 'static + Ord, + V: Serialize + 'static + Hashable, + { + let tx = self.instance.transaction(); + + let structure = Structure::from_tree(tree); + let structure_bytes = codec::encode(&structure)?; + + tx.put(Self::STRUCTURE_KEY, structure_bytes)?; + + for node in tree.iter() { + let hash = node.value().hash(); + let bytes = codec::encode(&(node.key(), node.value()))?; + + tx.put(&hash.to_bytes(), &bytes)?; + } + + tx.commit()?; + + Ok(()) + } + + /// Load a tree from storage, if it is present + pub fn load_tree(&self) -> Result>, Error> where - K: for<'a> Deserialize<'a>, - V: for<'a> Deserialize<'a>; + K: for<'a> Deserialize<'a> + 'static + Ord, + V: for<'a> Deserialize<'a> + 'static + Hashable, + { + let Some(structure_bytes) = self.instance.get(Self::STRUCTURE_KEY)? else { + return Ok(None); + }; + + let structure: Option = codec::decode(&structure_bytes).map_err(Error::MalformedStructure)?; + let Some(structure) = structure else { return Ok(Some(MerkleTree::new())) }; + + let mut values = self.instance.iterator(IteratorMode::Start).filter(|result| { + match result { + // don't try to deserialize this key + Ok((hash, _data)) => hash.as_ref() != Self::STRUCTURE_KEY, + Err(_) => true, + } + }).map(|result| { + let (hash, data) = result?; + + let hash = get_hash(&hash)?; + let data: (K, V) = codec::decode(&data)?; + + Ok((hash, data)) + + }).collect::>()?; + + let tree = structure.to_tree(&mut values)?; + + Ok(Some(tree)) + } + +} + +fn get_hash(bytes: &[u8]) -> Result { + let hash_bytes = bytes + .as_ref() + .try_into() + .map_err(|_| Error::InvalidHashBytes(bytes.to_vec()))?; + + let hash = Digest::from_bytes(hash_bytes) + .ok_or_else(|| Error::InvalidHashBytes(hash_bytes.to_vec()))?; + + Ok(hash) } + + diff --git a/smirk/src/storage/rocksdb/mod.rs b/smirk/src/storage/rocksdb/mod.rs deleted file mode 100644 index f70363f5..00000000 --- a/smirk/src/storage/rocksdb/mod.rs +++ /dev/null @@ -1,132 +0,0 @@ -use std::{fmt::Debug, path::Path}; - -use rocksdb::{IteratorMode, TransactionDB}; -use serde::{Deserialize, Serialize}; - -use crate::{ - hash::{Digest, Hashable}, - MerkleTree, -}; - -use self::structure::Structure; - -use super::{Error, Storage}; - -mod codec; -pub use codec::{DecodeError, EncodeError}; -mod structure; - -/// A struct that acts as a [`Storage`] backend by persisting data in [rocksdb][db] -/// -/// Broadly speaking, this type works by: -/// - serializing a tree-like "structure" object to the key `"structure"` -/// - serializing binary encoded key-value pairs to the key `rpo(value)` (note - this is not the -/// hash of a given node, which includes the children in the hash) -/// -/// [db]: https://github.com/facebook/rocksdb -pub struct RocksdbStorage { - pub(crate) instance: TransactionDB, -} - -impl Debug for RocksdbStorage { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "RocksdbStorage") - } -} - -impl RocksdbStorage { - /// Create a new [`RocksdbStorage`] from an existing rocksdb instance - /// - /// This is useful if you want to create transactions that modify both data managed by smirk, - /// as well as data external to smirk - pub fn from_instance(instance: TransactionDB) -> Self { - Self { instance } - } - - /// Create a new [`RocksdbStorage`] by opening a new rocksdb instance at the given path - pub fn open(path: &Path) -> Result { - let instance = TransactionDB::open_default(path)?; - Ok(Self { instance }) - } - - const STRUCTURE_KEY: &[u8] = b"structure"; -} - -impl Storage for RocksdbStorage -where - K: Ord + 'static, - V: Hashable + 'static, -{ - fn store_tree(&self, tree: &MerkleTree) -> Result<(), Error> - where - K: Serialize, - V: Serialize, - { - let tx = self.instance.transaction(); - - let structure = structure::Structure::from_tree(tree); - let structure_bytes = codec::encode(&structure).map_err(err)?; - - tx.put(Self::STRUCTURE_KEY, structure_bytes).map_err(err)?; - - for node in tree.iter() { - let hash = node.value().hash(); - let bytes = codec::encode(&(node.key(), node.value())).map_err(err)?; - - tx.put(&hash.to_bytes(), &bytes).map_err(err)?; - } - - tx.commit().map_err(err)?; - - Ok(()) - } - - fn load_tree(&self) -> Result>, Error> - where - K: for<'a> Deserialize<'a>, - V: for<'a> Deserialize<'a>, - { - let Some(structure_bytes) = self.instance.get(Self::STRUCTURE_KEY).map_err(err)? else { - return Ok(None); - }; - - let structure: Option = codec::decode(&structure_bytes).map_err(Error::MalformedStructure)?; - let Some(structure) = structure else { return Ok(Some(MerkleTree::new())) }; - - let mut values = self.instance.iterator(IteratorMode::Start).filter(|result| { - match result { - // don't try to deserialize this key - Ok((hash, _data)) => hash.as_ref() != Self::STRUCTURE_KEY, - Err(_) => true, - } - }).map(|result| { - let (hash, data) = result.map_err(err)?; - - let hash = get_hash(&hash)?; - let data: (K, V) = codec::decode(&data).map_err(err)?; - - Ok((hash, data)) - - }).collect::>()?; - - let tree = structure.to_tree(&mut values)?; - - Ok(Some(tree)) - } -} - -fn get_hash(bytes: &[u8]) -> Result { - let hash_bytes = bytes - .as_ref() - .try_into() - .map_err(|_| Error::InvalidHashBytes(bytes.to_vec()))?; - - let hash = Digest::from_bytes(hash_bytes) - .ok_or_else(|| Error::InvalidHashBytes(hash_bytes.to_vec()))?; - - Ok(hash) -} - -fn err(e: E) -> Error { - Error::Unknown(Box::new(e)) -} diff --git a/smirk/src/storage/rocksdb/structure.rs b/smirk/src/storage/structure.rs similarity index 100% rename from smirk/src/storage/rocksdb/structure.rs rename to smirk/src/storage/structure.rs diff --git a/smirk/src/testing.rs b/smirk/src/testing.rs index 763409f8..ab0f081b 100644 --- a/smirk/src/testing.rs +++ b/smirk/src/testing.rs @@ -1,46 +1,30 @@ +use std::ops::Deref; + use tempdir::TempDir; -use crate::{ - hash::Hashable, - storage::{rocksdb::RocksdbStorage, Storage}, - tree::MerkleTree, -}; +use crate::{storage::Storage, tree::MerkleTree}; /// Helper struct that makes it easier to test against a rocksdb instance #[derive(Debug)] pub struct TestStorage { _dir: TempDir, - db: RocksdbStorage, + db: Storage, } impl TestStorage { pub fn new() -> Self { let dir = TempDir::new("smirk").unwrap(); - let db = RocksdbStorage::open(dir.path()).unwrap(); + let db = Storage::open(dir.path()).unwrap(); Self { _dir: dir, db } } } -impl Storage for TestStorage -where - K: Ord + 'static, - V: Hashable + 'static, -{ - fn store_tree(&self, tree: &MerkleTree) -> Result<(), crate::storage::Error> - where - K: serde::Serialize, - V: serde::Serialize, - { - self.db.store_tree(tree) - } +impl Deref for TestStorage { + type Target = Storage; - fn load_tree(&self) -> Result>, crate::storage::Error> - where - K: for<'a> serde::Deserialize<'a>, - V: for<'a> serde::Deserialize<'a>, - { - self.db.load_tree() + fn deref(&self) -> &Self::Target { + &self.db } } diff --git a/smirk/src/tree/mod.rs b/smirk/src/tree/mod.rs index 7d1bb81b..25f6d2eb 100644 --- a/smirk/src/tree/mod.rs +++ b/smirk/src/tree/mod.rs @@ -1,6 +1,6 @@ use std::{borrow::Borrow, cmp::Ordering}; -use crate::hash::{Digest, Hashable, MerklePath}; +use crate::hash::{Digest, Hashable}; mod iterator; pub use iterator::*; @@ -11,6 +11,8 @@ pub use impls::*; mod macros; pub mod visitor; +mod proof; + #[cfg(test)] mod tests; @@ -261,69 +263,6 @@ impl MerkleTree { { self.inner.as_ref().and_then(|node| node.get_node(key)) } - - /// Get the root hash of the Merkle tree - pub fn root_hash(&self) -> Digest - where - V: Hashable, - { - match &self.inner { - None => Digest::NULL, // should this function return an option? - Some(node) => node.hash(), - } - } - - /// Generate a [`MerklePath`] for the a given value - pub fn path_for(&self, key: &Q) -> Option - where - Q: Borrow + ?Sized, - K: Ord, - V: Hashable, - { - let mut components = Vec::with_capacity(self.height()); - - let mut opt_node = self.inner.as_deref(); - - loop { - let node = opt_node?; - - components.push(node.hash()); - - match key.borrow().cmp(&node.key) { - Ordering::Less => opt_node = node.left.as_deref(), - Ordering::Greater => opt_node = node.right.as_deref(), - Ordering::Equal => { - components.reverse(); - return Some(MerklePath::new(components)); - } - } - } - } - - /// Verify that the given value exists in the tree, by using the provided [`MerklePath`] - pub fn verify(&self, path: &MerklePath, value: &V) -> bool - where - Q: Ord + Borrow + ?Sized, - V: Hashable, - { - if path.components().last() != Some(&self.root_hash()) { - return false; - } - - let mut hash = value.hash(); - - for slice in path.components().windows(2) { - let first = &slice[0]; - let second = &slice[1]; - - hash.merge(first); - if hash != *second { - return false; - } - } - - true - } } /// An individual node in a Merkle tree diff --git a/smirk/src/tree/proof.rs b/smirk/src/tree/proof.rs new file mode 100644 index 00000000..03dc7e69 --- /dev/null +++ b/smirk/src/tree/proof.rs @@ -0,0 +1,77 @@ +use std::{borrow::Borrow, cmp::Ordering}; + +use crate::{ + hash::{Digest, Hashable, MerklePath}, + MerkleTree, +}; + +impl MerkleTree { + /// Generate a [`MerklePath`] that proves that a given key exists in the tree + /// + /// ```rust + /// # use smirk::{smirk, MerklePath}; + /// let tree = smirk! { + /// 1 => "hello", + /// 2 => "world", + /// }; + /// + /// assert!(tree.prove(&1).is_some()); + /// assert!(tree.prove(&2).is_some()); + /// assert!(tree.prove(&3).is_none()); + /// ``` + pub fn prove(&self, key: &Q) -> Option + where + Q: Borrow + ?Sized, + K: Ord, + { + let Some(mut node) = self.inner.as_deref() else { return None }; + let mut components = Vec::with_capacity(node.height() as usize); + + loop { + components.push(node.hash()); + + match key.borrow().cmp(node.key()) { + Ordering::Less => node = node.left.as_deref()?, + Ordering::Greater => node = node.right.as_deref()?, + Ordering::Equal => { + components.reverse(); + return Some(MerklePath::new(components)); + } + } + } + } + + /// Get the root hash of the Merkle tree + pub fn root_hash(&self) -> Digest { + match &self.inner { + None => Digest::NULL, // should this function return an option? + Some(node) => node.hash(), + } + } + + /// Generate + /// Verify that the given value exists in the tree, by using the provided [`MerklePath`] + pub fn verify(&self, path: &MerklePath, value: &V) -> bool + where + V: Hashable, + { + if path.components().last() != Some(&self.root_hash()) { + dbg!("not end root hash"); + return false; + } + + let mut hash = value.hash(); + + for slice in path.components().windows(2) { + let first = &slice[0]; + let second = &slice[1]; + + hash.merge(first); + if hash != *second { + return false; + } + } + + true + } +} From 0776be57840f5ca4d9cbc6a90d3164f561069cfe Mon Sep 17 00:00:00 2001 From: Cameron Date: Thu, 20 Jul 2023 20:47:58 +0100 Subject: [PATCH 06/15] it works finally lol --- Cargo.lock | 267 +++++++++++++++++- flake.nix | 3 + smirk/Cargo.toml | 3 + smirk/README.md | 40 +++ smirk/proptest-regressions/hash/mod.txt | 7 - .../storage/rocksdb/codec.txt | 7 - .../storage/rocksdb/structure.txt | 7 - .../storage/structure.txt | 7 - smirk/src/hash/from_iter.rs | 78 +++++ smirk/src/hash/hashable.rs | 5 +- smirk/src/hash/mod.rs | 68 ++--- smirk/src/hash/path.rs | 70 +++++ smirk/src/hash/proptest_impls.rs | 3 +- smirk/src/lib.rs | 46 +-- .../smirk__testing__root_hash_snapshot.snap | 5 + smirk/src/storage/codec.rs | 89 +++++- smirk/src/storage/error.rs | 30 +- smirk/src/storage/mod.rs | 130 +++++---- smirk/src/storage/structure.rs | 107 ------- smirk/src/storage/tests.rs | 73 +++++ smirk/src/testing.rs | 17 +- smirk/src/tree/batch.rs | 68 +++++ smirk/src/tree/hash.rs | 130 +++++++++ smirk/src/tree/impls.rs | 48 ++-- smirk/src/tree/iterator.rs | 43 ++- smirk/src/tree/mod.rs | 132 +++++---- smirk/src/tree/proof.rs | 103 +++++-- smirk/src/tree/tests.rs | 40 ++- smirk/src/tree/visitor.rs | 2 +- solid/Cargo.toml | 2 +- 30 files changed, 1178 insertions(+), 452 deletions(-) create mode 100644 smirk/README.md delete mode 100644 smirk/proptest-regressions/hash/mod.txt delete mode 100644 smirk/proptest-regressions/storage/rocksdb/codec.txt delete mode 100644 smirk/proptest-regressions/storage/rocksdb/structure.txt delete mode 100644 smirk/proptest-regressions/storage/structure.txt create mode 100644 smirk/src/hash/from_iter.rs create mode 100644 smirk/src/hash/path.rs create mode 100644 smirk/src/snapshots/smirk__testing__root_hash_snapshot.snap delete mode 100644 smirk/src/storage/structure.rs create mode 100644 smirk/src/storage/tests.rs create mode 100644 smirk/src/tree/batch.rs create mode 100644 smirk/src/tree/hash.rs diff --git a/Cargo.lock b/Cargo.lock index 0d70c191..ceed7fc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -400,6 +400,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.3.2" @@ -941,6 +947,12 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.0.79" @@ -1017,6 +1029,33 @@ dependencies = [ "winapi", ] +[[package]] +name = "ciborium" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" + +[[package]] +name = "ciborium-ll" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "cid" version = "0.10.1" @@ -1126,6 +1165,18 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "console" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "windows-sys 0.45.0", +] + [[package]] name = "console_error_panic_hook" version = "0.1.7" @@ -1223,6 +1274,76 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset 0.9.0", + "scopeguard", +] + [[package]] name = "crossbeam-utils" version = "0.8.15" @@ -1655,6 +1776,12 @@ dependencies = [ "log", ] +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "encoding_rs" version = "0.8.32" @@ -2057,6 +2184,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + [[package]] name = "hashbrown" version = "0.12.3" @@ -2344,6 +2477,19 @@ dependencies = [ "generic-array", ] +[[package]] +name = "insta" +version = "1.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0770b0a3d4c70567f0d58331f3088b0e4c4f56c9b8d764efe654b4a5d46de3a" +dependencies = [ + "console", + "lazy_static", + "linked-hash-map", + "similar", + "yaml-rust", +] + [[package]] name = "instant" version = "0.1.12" @@ -2983,6 +3129,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + [[package]] name = "miden-air" version = "0.5.0" @@ -3323,7 +3478,7 @@ dependencies = [ "bitflags", "cfg-if", "libc", - "memoffset", + "memoffset 0.6.5", ] [[package]] @@ -3447,6 +3602,12 @@ version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + [[package]] name = "opaque-debug" version = "0.3.0" @@ -3687,6 +3848,34 @@ version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3d7ddaed09e0eb771a79ab0fd64609ba0afb0a8366421957936ad14cbd13630" +[[package]] +name = "plotters" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" + +[[package]] +name = "plotters-svg" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" +dependencies = [ + "plotters-backend", +] + [[package]] name = "polling" version = "2.8.0" @@ -3884,6 +4073,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" +[[package]] +name = "pretty_assertions" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +dependencies = [ + "diff", + "yansi", +] + [[package]] name = "prettyplease" version = "0.1.25" @@ -4206,6 +4405,28 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "rayon" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + [[package]] name = "rcgen" version = "0.9.3" @@ -4559,6 +4780,15 @@ version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.21" @@ -4926,6 +5156,12 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "similar" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "420acb44afdae038210c99e69aae24109f32f15500aa708e81d46c9f29d55fcf" + [[package]] name = "siphasher" version = "0.3.10" @@ -4951,8 +5187,11 @@ checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" name = "smirk" version = "0.1.0" dependencies = [ + "criterion", "hex", + "insta", "miden-crypto 0.6.0", + "pretty_assertions", "proptest", "rmp-serde", "rocksdb", @@ -5000,7 +5239,6 @@ dependencies = [ "futures", "futures-timer", "hex", - "libp2p-core", "multihash 0.18.1", "parking_lot", "proptest", @@ -5930,6 +6168,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca" +[[package]] +name = "walkdir" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.0" @@ -6682,6 +6930,15 @@ dependencies = [ "time 0.3.21", ] +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "yamux" version = "0.10.2" @@ -6696,6 +6953,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + [[package]] name = "yasna" version = "0.5.2" diff --git a/flake.nix b/flake.nix index 6ceff2ae..bee958e8 100644 --- a/flake.nix +++ b/flake.nix @@ -28,6 +28,9 @@ protobuf clang # required for rocksdb + + cargo-insta # snapshot testing for smirk + gnuplot # criterion graphs ]; LIBCLANG_PATH = "${pkgs.libclang.lib}/lib/"; diff --git a/smirk/Cargo.toml b/smirk/Cargo.toml index 490b208e..d7d5df0e 100644 --- a/smirk/Cargo.toml +++ b/smirk/Cargo.toml @@ -19,3 +19,6 @@ proptest = { version = "1", optional = true } tempdir = "0.3" proptest = "1" test-strategy = "0.3" +pretty_assertions = "1" +insta = "1" +criterion = "0.5" diff --git a/smirk/README.md b/smirk/README.md new file mode 100644 index 00000000..5ebfe3b7 --- /dev/null +++ b/smirk/README.md @@ -0,0 +1,40 @@ +# `smirk` - Persistent Merkle Tree + +`smirk` = "stable `merk`" + +This library provides `MerkleTree`, a Merkle tree that uses the [Rescue-Prime Optimized][rpo] +hash function, with a map-like API. There is also a [`Storage`] API for persisting the tree in +[rocksdb][db] + +```rust +# use smirk::{MerkleTree, smirk}; +let mut tree = MerkleTree::new(); +tree.insert(1, "hello"); +tree.insert(2, "world"); + +// or you can use the macro to create a new tree +let tree = smirk! { + 1 => "hello", + 2 => "world", +}; + +assert_eq!(tree.get(&1), Some(&"hello")); +assert_eq!(tree.get(&2), Some(&"world")); +assert_eq!(tree.get(&3), None); + +``` + +Types provided by this library implement [`Arbitrary`], for use with [`proptest`], gated behind +the `proptest` feature flag. + +## Todo + + - benchmarks + - batch update API for storage + - use a slab allocator internally + +[rpo]: https://eprint.iacr.org/2022/1577.pdf +[db]: https://github.com/facebook/rocksdb + +[`Storage`]: storage::Storage +[`Arbitrary`]: proptest::prelude::Arbitrary diff --git a/smirk/proptest-regressions/hash/mod.txt b/smirk/proptest-regressions/hash/mod.txt deleted file mode 100644 index d6674a49..00000000 --- a/smirk/proptest-regressions/hash/mod.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Seeds for failure cases proptest has generated in the past. It is -# automatically read and these particular cases re-run before any -# novel cases are generated. -# -# It is recommended to check this file in to source control so that -# everyone who runs the test benefits from these saved cases. -cc fd176da074662e3129e51f7bbe4112d4bb9ee9e596bb25f76ddc4cbd9d526c12 # shrinks to input = _DigestBytesSerdeRoundTripArgs { digest: Digest(RpoDigest([BaseElement(0), BaseElement(0), BaseElement(0), BaseElement(0)])) } diff --git a/smirk/proptest-regressions/storage/rocksdb/codec.txt b/smirk/proptest-regressions/storage/rocksdb/codec.txt deleted file mode 100644 index b5b6f723..00000000 --- a/smirk/proptest-regressions/storage/rocksdb/codec.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Seeds for failure cases proptest has generated in the past. It is -# automatically read and these particular cases re-run before any -# novel cases are generated. -# -# It is recommended to check this file in to source control so that -# everyone who runs the test benefits from these saved cases. -cc aaf2606728b143c354f5fedf3ddd6894dcd25805eea6d1e69b019dd202f8e72b # shrinks to input = _EncodeDecodeBijectiveArgs { key: Digest(RpoDigest([BaseElement(0), BaseElement(0), BaseElement(0), BaseElement(0)])), value: CoolCustomType { foo: "", bar: [], coords: [(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0)] } } diff --git a/smirk/proptest-regressions/storage/rocksdb/structure.txt b/smirk/proptest-regressions/storage/rocksdb/structure.txt deleted file mode 100644 index 9144d629..00000000 --- a/smirk/proptest-regressions/storage/rocksdb/structure.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Seeds for failure cases proptest has generated in the past. It is -# automatically read and these particular cases re-run before any -# novel cases are generated. -# -# It is recommended to check this file in to source control so that -# everyone who runs the test benefits from these saved cases. -cc 5b113e630643d3a9a4729f49b9171d84486ae81c7923dd58c98914f871c90691 # shrinks to input = _ToFromStructureArgs { tree: MerkleTree { inner: Some(TreeNode { key: 0, value: "", hash: Hash(f0db3924f3e2d677a51924b09ecef8a12416a6ceb09fadd39785bb4f685cab66), left: Some(TreeNode { key: -1, value: "", hash: Hash(0000000000000000000000000000000000000000000000000000000000000000), left: None, right: None, height: 0 }), right: None, height: 1 }) } } diff --git a/smirk/proptest-regressions/storage/structure.txt b/smirk/proptest-regressions/storage/structure.txt deleted file mode 100644 index 475de029..00000000 --- a/smirk/proptest-regressions/storage/structure.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Seeds for failure cases proptest has generated in the past. It is -# automatically read and these particular cases re-run before any -# novel cases are generated. -# -# It is recommended to check this file in to source control so that -# everyone who runs the test benefits from these saved cases. -cc 6e85196b5344d7133cab69db71ca9a46a12257da4c53012c14c15a50073fa318 # shrinks to input = _ToFromStructureArgs { tree: MerkleTree { inner: Some(TreeNode { key: 0, value: "", hash: Hash(f0db3924f3e2d677a51924b09ecef8a12416a6ceb09fadd39785bb4f685cab66), left: Some(TreeNode { key: -1, value: "", hash: Hash(0000000000000000000000000000000000000000000000000000000000000000), left: None, right: None, height: 0 }), right: None, height: 1 }) } } diff --git a/smirk/src/hash/from_iter.rs b/smirk/src/hash/from_iter.rs new file mode 100644 index 00000000..ca2eaa16 --- /dev/null +++ b/smirk/src/hash/from_iter.rs @@ -0,0 +1,78 @@ +use std::{backtrace::Backtrace, borrow::Borrow, fmt::Debug}; + +use super::Digest; + +impl FromIterator for Digest +where + H: Borrow + Debug, +{ + fn from_iter>(iter: T) -> Self { + let vec: Vec<_> = iter.into_iter().collect(); + let mut iter = vec.iter(); + + let Some(hash) = iter.next() else { return Digest::NULL }; + let mut hash = *hash.borrow(); + + for new_hash in iter { + hash.merge(new_hash.borrow()); + } + + if format!("{hash}") + .contains("e54944d3c80d00cc318e861d5d56c76a2b1bf9e7638422c0ec636e48ae8b4c0f") + { + let bt = Backtrace::capture(); + println!("{bt}"); + println!("{vec:?}"); + // panic!("uh oh"); + } + + hash + } +} + +#[cfg(test)] +mod tests { + use std::{borrow::Cow, rc::Rc, sync::Arc}; + + use crate::hash::Hashable; + + use super::*; + + #[test] + fn can_collect_various_types() { + let mut d = Digest::calculate(&[]); + + let _: Digest = [d].into_iter().collect(); + let _: Digest = [&d].into_iter().collect(); + let _: Digest = [&mut d].into_iter().collect(); + let _: Digest = [Box::new(d)].into_iter().collect(); + let _: Digest = [Cow::Owned(d)].into_iter().collect(); + let _: Digest = [Rc::new(d)].into_iter().collect(); + let _: Digest = [Arc::new(d)].into_iter().collect(); + } + + #[test] + fn collecting_empty_hash_is_null() { + let hash: Digest = Vec::::new().into_iter().collect(); + assert_eq!(hash, Digest::NULL); + } + + #[test] + fn collecting_single_hash_is_unchanged() { + let hash: Digest = vec![1.hash()].iter().collect(); + assert_eq!(hash, 1.hash()); + } + + #[test] + fn collecting_multiple_hashes() { + let hash: Digest = [1.hash(), "hello".hash(), [1u8, 2, 3].hash()] + .iter() + .collect(); + + let mut expected = 1.hash(); + expected.merge(&"hello".hash()); + expected.merge(&[1u8, 2, 3].hash()); + + assert_eq!(hash, expected); + } +} diff --git a/smirk/src/hash/hashable.rs b/smirk/src/hash/hashable.rs index 38c8a689..cdaac6ce 100644 --- a/smirk/src/hash/hashable.rs +++ b/smirk/src/hash/hashable.rs @@ -21,7 +21,6 @@ pub trait Hashable { fn hash(&self) -> Digest; } -// POINTER IMPLS impl Hashable for &T where @@ -77,8 +76,6 @@ where } } -/// COLLECTION IMPLS - macro_rules! int_impl { ($int:ty) => { impl Hashable for $int { @@ -121,6 +118,8 @@ impl Hashable for [u8; N] { } } +// note, we implement the trait on `[u8]`, not `&[u8]` so it works with the above impls for types +// like `Arc<[u8]>` or `Box<[u8]>` - the same logic applies to `str` as_ref_impl!([u8]); as_ref_impl!(Vec); as_ref_impl!(str); diff --git a/smirk/src/hash/mod.rs b/smirk/src/hash/mod.rs index 2327741b..3c5827c5 100644 --- a/smirk/src/hash/mod.rs +++ b/smirk/src/hash/mod.rs @@ -1,11 +1,13 @@ -//! Items relating to hashing data +//! Items relating to hashing datahashmod //! //! In particular, the [`Digest`] type and the [`Hashable`] trait +//! +//! This module also contains [`MerklePath`], which can be used to verify the existance of a key in +//! a [`MerkleTree`] +//! +//! [`MerkleTree`]: crate::MerkleTree -use std::{ - fmt::{Debug, Display}, - hash::Hash, -}; +use std::fmt::{Debug, Display}; use miden_crypto::{ hash::rpo::{Rpo256, RpoDigest}, @@ -13,11 +15,17 @@ use miden_crypto::{ Felt, }; +mod from_iter; mod hashable; +mod path; +mod serde_impls; + pub use hashable::Hashable; +pub use path::MerklePath; +pub(crate) use path::Stage; + #[cfg(any(test, feature = "proptest"))] mod proptest_impls; -mod serde_impls; /// A Rescue-Prime Optimized digest #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] @@ -35,9 +43,9 @@ impl Display for Digest { } } -impl Hash for Digest { +impl std::hash::Hash for Digest { fn hash(&self, state: &mut H) { - <[u8; 32] as Hash>::hash(&self.to_bytes(), state); + <[u8; 32] as std::hash::Hash>::hash(&self.to_bytes(), state); } } @@ -59,6 +67,9 @@ impl Digest { const LEN: usize = 32; /// Get the representation of this hash as a byte array + /// + /// These bytes can be converted back to a [`Digest`] using [`Digest::from_bytes`] (though this + /// function returns an `Option` since it can fail) #[inline] #[must_use] pub fn to_bytes(&self) -> [u8; Self::LEN] { @@ -69,7 +80,7 @@ impl Digest { /// /// Note: this returns an `Option` because not all possible byte arrays are valid [`Digest`]s /// - /// Any byte array returned from [`Digets::to_bytes`] will be valid for this function, and the + /// Any byte array returned from [`Digest::to_bytes`] will be valid for this function, and the /// resulting hash will be equal to the hash that created the byte array #[inline] #[must_use] @@ -85,6 +96,14 @@ impl Digest { Self(Rpo256::hash(bytes)) } + /// Convert this [`Digest`] to its hex representation (i.e. the hex encoding of + /// [`Digets::to_bytes`]) + #[inline] + #[must_use] + pub fn to_hex(&self) -> String { + hex::encode(self.to_bytes()) + } + /// Replace `self` with `rpo256(this + other)` #[inline] pub fn merge(&mut self, other: &Digest) { @@ -98,37 +117,6 @@ impl From for Digest { } } -/// A Merkle path that can be used to prove the existance of a value in the tree -pub struct MerklePath { - /// The components of the path, with the root at the end - components: Vec, -} - -impl MerklePath { - /// Create a new [`MerklePath`] from the given components - /// - /// The components should be the hashes that form the path, with the root of the tree at the - /// end - #[inline] - #[must_use] - pub fn new(components: Vec) -> Self { - Self { components } - } - - /// Get a slice of hashes representing the components of the path - #[inline] - #[must_use] - pub fn components(&self) -> &[Digest] { - &self.components - } - - /// Get a mutable slice of hashes representing the components of the path - #[inline] - pub fn components_mut(&mut self) -> &mut [Digest] { - &mut self.components - } -} - #[cfg(test)] mod tests { use proptest::prop_assert_eq; diff --git a/smirk/src/hash/path.rs b/smirk/src/hash/path.rs new file mode 100644 index 00000000..12469e00 --- /dev/null +++ b/smirk/src/hash/path.rs @@ -0,0 +1,70 @@ +use serde::{Deserialize, Serialize}; + +use super::Digest; + +/// A Merkle path that can be used to prove the existance of a value in the tree +/// +/// This type provides [`MerklePath::to_bytes`] and [`MerklePath::from_bytes`] for serialization +/// purposes. It also implements [`Serialize`] and [`Deserialize`], if more control over exact +/// serialization details is needed. +/// +/// Note: no [`Arbitrary`] implementation is provided for this type, since it has no public +/// constructors. The only way to create one is to prove the existance of a key-value pair in a +/// [`MerkleTree`]. +/// +/// Luckily, [`MerkleTree`] *does* implement [`Arbitrary`] +/// +/// [`Aritrary`]: proptest::prelude::Arbitrary +/// [`MerkleTree`]: crate::MerkleTree +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MerklePath { + /// The intermediate stages between the root hash and the target node + pub(crate) stages: Vec, + + /// The root hash of the tree that generated this path + pub(crate) root_hash: Digest, + + /// The digest of the left sub-tree of the node that contained the target key-value pair + pub(crate) left: Option, + + /// The digest of the right sub-tree of the node that contained the target key-value pair + pub(crate) right: Option, +} + +impl MerklePath { + /// The root hash of the tree that generated this [`MerklePath`] + #[inline] + #[must_use] + pub fn root_hash(&self) -> Digest { + self.root_hash + } + + /// Convert this [`MerklePath`] to a canonical serialized representation. + /// + /// The exact details of the representation are not specified, other than that it can be + /// reversed with [`MerklePath::from_bytes`] + #[must_use] + pub fn to_bytes(&self) -> Vec { + rmp_serde::to_vec(&self).unwrap() + } + + /// Create a [`MerklePath`] from its canonical serialized representation + /// + /// The exact details of the representation are not specified, other than that it can be + /// reversed with [`MerklePath::to_bytes`] + #[must_use] + pub fn from_bytes(bytes: &[u8]) -> Option { + rmp_serde::from_slice(bytes).ok() + } +} + +/// A stage in a merkle proof (i.e. a single step in the binary search algorithm) +/// +/// - `this` is the hash of the key-value pair of the visited node in this stage +/// - `left`/`right` is the root hash of the "other side" of the tree +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub(crate) enum Stage { + Left { this: Digest, right: Option }, + Right { this: Digest, left: Option }, +} + diff --git a/smirk/src/hash/proptest_impls.rs b/smirk/src/hash/proptest_impls.rs index 193dffbd..058fbcf8 100644 --- a/smirk/src/hash/proptest_impls.rs +++ b/smirk/src/hash/proptest_impls.rs @@ -1,5 +1,6 @@ -use super::*; +use super::{Digest, RpoDigest}; +use miden_crypto::Felt; use proptest::{arbitrary::StrategyFor, prelude::*, strategy::Map}; impl Arbitrary for Digest { diff --git a/smirk/src/lib.rs b/smirk/src/lib.rs index 743b31e0..ee0c1739 100644 --- a/smirk/src/lib.rs +++ b/smirk/src/lib.rs @@ -1,44 +1,22 @@ +#![doc = include_str!("../README.md")] #![warn(clippy::pedantic)] #![deny(missing_docs)] - -//! Persistent Merkle tree -//! -//! This library provides `MerkleTree`, a Merkle tree that uses the [Rescue-Prime Optimized][rpo] -//! hash function, with a map-like API. There is also a [`Storage`] API for persisting the tree in -//! [rocksdb][db] -//! -//! ```rust -//! # use smirk::{MerkleTree, smirk}; -//! let mut tree = MerkleTree::new(); -//! tree.insert(1, "hello"); -//! tree.insert(2, "world"); -//! -//! // or you can use the macro to create a new tree -//! let tree = smirk! { -//! 1 => "hello", -//! 2 => "world", -//! }; -//! -//! assert_eq!(tree.get(&1), Some(&"hello")); -//! assert_eq!(tree.get(&2), Some(&"world")); -//! assert_eq!(tree.get(&3), None); -//! -//! ``` -//! -//! Types provided by this library implement [`Arbitrary`], for use with [`proptest`], gated behind -//! the `proptest` feature flag. -//! -//! [rpo]: https://eprint.iacr.org/2022/1577.pdf -//! [db]: https://github.com/facebook/rocksdb -//! -//! [`Storage`]: storage::Storage -//! [`Arbitrary`]: proptest::prelude::Arbitrary +#![deny(unsafe_code)] +#![deny(clippy::integer_arithmetic)] // explicitly choose wrapping/saturating/checked +#![allow( + clippy::module_name_repetitions, + clippy::match_bool, // overly restrictive style lint + clippy::bool_assert_comparison, // overly restrictive style lint + clippy::derive_partial_eq_without_eq, // semver hazard + clippy::missing_panics_doc, // implementation of lint is buggy + clippy::missing_errors_doc, // error is usually obvious from context, this forces useless docs +)] pub mod hash; pub mod storage; mod tree; -pub use tree::{visitor::Visitor, MerkleTree, TreeNode}; +pub use tree::{batch, key_value_hash, visitor::Visitor, MerkleTree, TreeNode}; #[cfg(test)] mod testing; diff --git a/smirk/src/snapshots/smirk__testing__root_hash_snapshot.snap b/smirk/src/snapshots/smirk__testing__root_hash_snapshot.snap new file mode 100644 index 00000000..eddfd3c7 --- /dev/null +++ b/smirk/src/snapshots/smirk__testing__root_hash_snapshot.snap @@ -0,0 +1,5 @@ +--- +source: smirk/src/testing.rs +expression: tree.root_hash().to_hex() +--- +f68a4fd087e98ee6af95ced5378e794cf85a68f58f46f9d2ccfc83c9872d8876 diff --git a/smirk/src/storage/codec.rs b/smirk/src/storage/codec.rs index fbd74bdd..4499dedf 100644 --- a/smirk/src/storage/codec.rs +++ b/smirk/src/storage/codec.rs @@ -1,24 +1,101 @@ +use rocksdb::{Transaction, TransactionDB}; use serde::{Deserialize, Serialize}; -use thiserror::Error; + +use crate::{hash::Hashable, MerkleTree, TreeNode}; + +use super::{Error, Storage}; /// An error encountered when encoding data to its database format -#[derive(Debug, Error)] +#[derive(Debug, thiserror::Error)] #[error("encode error: {0}")] pub struct EncodeError(rmp_serde::encode::Error); /// An error encountered when decoding data in its database format -#[derive(Debug, Error)] +#[derive(Debug, thiserror::Error)] #[error("decode error: {0}")] pub struct DecodeError(rmp_serde::decode::Error); -pub(super) fn encode(t: &T) -> Result, EncodeError> +#[derive(Debug, Serialize, Deserialize)] +struct NodeFormat { + value: Vec, + left: Option>, + right: Option>, +} + +/// Note - this function doesn't actually write, the caller needs to call `tx.commit()` +pub(super) fn write_tree_to_tx( + tx: &Transaction, + tree: &MerkleTree, +) -> Result<(), Error> +where + K: Serialize, + V: Serialize, +{ + let root_value = tree + .inner + .as_deref() + .map(|node| encode(&node.key)) + .transpose()? + .unwrap_or(vec![]); + + tx.put(Storage::ROOT_KEY, &root_value)?; + + for node in tree.iter() { + let (key, value) = encode_single_node(node)?; + println!("writing to {}", hex::encode(&key)); + tx.put(&key, &value)?; + } + + Ok(()) +} + +pub(super) fn load_node( + tx: &Transaction, + key: &[u8], +) -> Result, Error> +where + K: for<'de> Deserialize<'de> + Hashable + Ord, + V: for<'de> Deserialize<'de> + Hashable, +{ + let value_bytes = tx + .get(key)? + .ok_or_else(|| Error::KeyMissing(key.to_vec()))?; + + let NodeFormat { value, left, right } = decode(&value_bytes)?; + + let value = decode(&value)?; + let key = decode(key)?; + + let left = left.map(|key| load_node(tx, &key)).transpose()?; + let right = right.map(|key| load_node(tx, &key)).transpose()?; + + Ok(TreeNode::new(key, value, left, right)) +} + +fn encode_single_node(node: &TreeNode) -> Result<(Vec, Vec), Error> where - T: Serialize, + K: Serialize, + V: Serialize, { + let enc = |node: &TreeNode| encode(&node.key); + + let value = encode(&node.value)?; + let left = node.left.as_deref().map(enc).transpose()?; + let right = node.right.as_deref().map(enc).transpose()?; + + let value = NodeFormat { value, left, right }; + + let value_bytes = encode(&value)?; + let key_bytes = encode(&node.key)?; + + Ok((key_bytes, value_bytes)) +} + +fn encode(t: &T) -> Result, EncodeError> { rmp_serde::encode::to_vec(t).map_err(EncodeError) } -pub(super) fn decode Deserialize<'a> + 'static>(bytes: &[u8]) -> Result { +fn decode<'de, 'a: 'de, T: Deserialize<'de>>(bytes: &'a [u8]) -> Result { rmp_serde::decode::from_slice(bytes).map_err(DecodeError) } diff --git a/smirk/src/storage/error.rs b/smirk/src/storage/error.rs index 8272a140..897821bb 100644 --- a/smirk/src/storage/error.rs +++ b/smirk/src/storage/error.rs @@ -1,33 +1,13 @@ -use crate::hash::Digest; - use super::{DecodeError, EncodeError}; /// An error encountered while persisting or restoring a [`MerkleTree`] +/// +/// [`MerkleTree`]: crate::MerkleTree #[derive(Debug, thiserror::Error)] pub enum Error { - /// Invalid hash bytes as key - #[error("the following bytes were used as a key, but were not a valid RPO hash: {0:?}")] - InvalidHashBytes(Vec), - - /// Hash mismatch - #[error("the hash didn't match the computed hash of the stored value - computed: {computed}, stored: {stored}")] - HashMismatch { - /// The hash that was computed by hashing the stored value - computed: Digest, - /// The hash that was stored in the database - stored: Digest, - }, - - /// The database referenced data in the structure that was not found in the database - #[error("no data assocated with {hash}, but found in structure")] - StructureReferenceMissing { - /// The hash that was missing - hash: Digest, - }, - - /// Malformed structure - #[error("malformed structure: {0}")] - MalformedStructure(DecodeError), + /// Key didn't exist in the database + #[error("couldn't find key in database: 0x{}", hex::encode(.0))] + KeyMissing(Vec), /// Error encoding data to binary format #[error("error encoding data to binary format: {0}")] diff --git a/smirk/src/storage/mod.rs b/smirk/src/storage/mod.rs index a1c2199b..ef582b69 100644 --- a/smirk/src/storage/mod.rs +++ b/smirk/src/storage/mod.rs @@ -1,24 +1,43 @@ //! Persistence layer for [`MerkleTree`]s use std::{fmt::Debug, path::Path}; -use crate::{hash::{Hashable, Digest}, tree::MerkleTree}; -use rocksdb::{TransactionDB, IteratorMode}; +use crate::{hash::Hashable, tree::MerkleTree}; +use rocksdb::{Transaction, TransactionDB}; use serde::{Deserialize, Serialize}; -mod structure; mod codec; -pub use codec::{EncodeError, DecodeError}; mod error; -pub use error::Error; -use self::structure::Structure; +#[cfg(test)] +mod tests; +pub use codec::{DecodeError, EncodeError}; +pub use error::Error; /// A rocksdb-based storage mechanism for [`MerkleTree`]s /// -/// ```rust -/// +/// ```rust,no_run +/// # use std::path::Path; +/// # use smirk::storage::Storage; +/// # use smirk::smirk; +/// let storage = Storage::open(Path::new("./db")).unwrap(); +/// +/// let tree = smirk! { +/// 1 => "hello".to_string(), +/// 2 => "world".to_string(), +/// }; +/// +/// storage.store_tree(&tree).unwrap(); +/// +/// // 2x .unwrap() because it returns `Ok(None)` if no tree has been stored yet +/// let tree_again = storage.load_tree().unwrap().unwrap(); +/// +/// // the root hashes are the same (since this is what the `Eq` impl for `MerkleTree` uses) +/// assert_eq!(tree, tree_again); /// ``` +/// +/// This storage preserves the tree structure, meaning the root hash will not be changed by +/// loading it from storage. pub struct Storage { instance: TransactionDB, } @@ -30,7 +49,7 @@ impl Debug for Storage { } impl Storage { - /// Create a new [`RocksdbStorage`] from an existing rocksdb instance + /// Create a new [`Storage`] from an existing rocksdb instance /// /// This is useful if you want to create transactions that modify both data managed by smirk, /// as well as data external to smirk @@ -38,13 +57,14 @@ impl Storage { Self { instance } } - /// Create a new [`RocksdbStorage`] by opening a new rocksdb instance at the given path - pub fn open(path: &Path) -> Result { + /// Create a new [`Storage`] by opening a new rocksdb instance at the given path + pub fn open(path: &Path) -> Result { let instance = TransactionDB::open_default(path)?; Ok(Self { instance }) } - const STRUCTURE_KEY: &[u8] = b"structure"; + /// Key used to store the value of the root of the database + const ROOT_KEY: &[u8] = b"root"; } impl Storage { @@ -55,70 +75,60 @@ impl Storage { V: Serialize + 'static + Hashable, { let tx = self.instance.transaction(); - - let structure = Structure::from_tree(tree); - let structure_bytes = codec::encode(&structure)?; - - tx.put(Self::STRUCTURE_KEY, structure_bytes)?; - - for node in tree.iter() { - let hash = node.value().hash(); - let bytes = codec::encode(&(node.key(), node.value()))?; - - tx.put(&hash.to_bytes(), &bytes)?; - } - + self.store_tree_with_tx(tree, &tx)?; tx.commit()?; Ok(()) } + /// Store a tree with a given transaction + pub fn store_tree_with_tx( + &self, + tree: &MerkleTree, + tx: &Transaction, + ) -> Result<(), Error> + where + K: Serialize + 'static + Ord, + V: Serialize + 'static + Hashable, + { + codec::write_tree_to_tx(&tx, tree) + } + /// Load a tree from storage, if it is present pub fn load_tree(&self) -> Result>, Error> where - K: for<'a> Deserialize<'a> + 'static + Ord, + K: for<'a> Deserialize<'a> + 'static + Hashable + Ord, V: for<'a> Deserialize<'a> + 'static + Hashable, { - let Some(structure_bytes) = self.instance.get(Self::STRUCTURE_KEY)? else { - return Ok(None); - }; + let tx = self.instance.transaction(); + let tree = self.load_tree_with_tx(&tx)?; + tx.commit()?; - let structure: Option = codec::decode(&structure_bytes).map_err(Error::MalformedStructure)?; - let Some(structure) = structure else { return Ok(Some(MerkleTree::new())) }; + Ok(tree) + } - let mut values = self.instance.iterator(IteratorMode::Start).filter(|result| { - match result { - // don't try to deserialize this key - Ok((hash, _data)) => hash.as_ref() != Self::STRUCTURE_KEY, - Err(_) => true, - } - }).map(|result| { - let (hash, data) = result?; + /// Load a tree from storage, if it is present, using the given transaction + pub fn load_tree_with_tx( + &self, + tx: &Transaction, + ) -> Result>, Error> + where + K: for<'a> Deserialize<'a> + 'static + Hashable + Ord, + V: for<'a> Deserialize<'a> + 'static + Hashable, + { + let key = tx.get(Self::ROOT_KEY)?; - let hash = get_hash(&hash)?; - let data: (K, V) = codec::decode(&data)?; + let Some(key) = key else { return Ok(None) }; - Ok((hash, data)) + if key.is_empty() { + return Ok(Some(MerkleTree::new())); + } - }).collect::>()?; - - let tree = structure.to_tree(&mut values)?; + let node = codec::load_node(&tx, &key)?; + let tree = MerkleTree { + inner: Some(Box::new(node)), + }; Ok(Some(tree)) } - -} - -fn get_hash(bytes: &[u8]) -> Result { - let hash_bytes = bytes - .as_ref() - .try_into() - .map_err(|_| Error::InvalidHashBytes(bytes.to_vec()))?; - - let hash = Digest::from_bytes(hash_bytes) - .ok_or_else(|| Error::InvalidHashBytes(hash_bytes.to_vec()))?; - - Ok(hash) } - - diff --git a/smirk/src/storage/structure.rs b/smirk/src/storage/structure.rs deleted file mode 100644 index f31337d6..00000000 --- a/smirk/src/storage/structure.rs +++ /dev/null @@ -1,107 +0,0 @@ -use std::collections::HashMap; - -use serde::{Deserialize, Serialize}; - -use crate::{ - hash::{Digest, Hashable}, - storage::Error, - MerkleTree, TreeNode, -}; - -/// The structure of a tree -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct Structure { - pub hash: Digest, - pub left: Option>, - pub right: Option>, -} - -impl Structure { - pub fn from_tree(tree: &MerkleTree) -> Option { - tree.inner.as_deref().map(Self::from_node) - } - - fn from_node(node: &TreeNode) -> Self { - let hash = node.value().hash(); - let left = node.left.as_deref().map(Self::from_node).map(Box::new); - let right = node.left.as_deref().map(Self::from_node).map(Box::new); - - Self { hash, left, right } - } - - pub fn to_tree( - &self, - values: &mut HashMap, - ) -> Result, Error> { - dbg!(values.keys().collect::>()); - let node = Self::to_node(&self, values)?; - - Ok(MerkleTree { - inner: Some(Box::new(node)), - }) - } - - fn to_node( - &self, - values: &mut HashMap, - ) -> Result, Error> { - let hash = self.hash; - let Some((key, value)) = values.remove(&hash) else { - return Err(Error::StructureReferenceMissing { hash }); - }; - - let left = self - .left - .as_deref() - .map(|s| Self::to_node(s, values).map(Box::new)) - .transpose()?; - let right = self - .right - .as_deref() - .map(|s| Self::to_node(s, values).map(Box::new)) - .transpose()?; - - let mut node = TreeNode { - // this hash is the hash including children, `hash` that is in scope is the hash - // excluding children, so we just use null for now and clean up later - hash: Digest::NULL, - key, - value, - right, - left, - height: 0, - }; - - node.update_height(); - node.recalculate_hash_recursive(); - - Ok(node) - } -} - -#[cfg(test)] -mod tests { - use proptest::prop_assert_eq; - use test_strategy::proptest; - - use super::*; - - #[proptest] - fn to_from_structure(tree: MerkleTree) { - let structure = Structure::from_tree(&tree).unwrap(); - let mut values = tree - .iter() - .map(|node| { - let hash = node.value().hash(); - let key = *node.key(); - let value = node.value().clone(); - - (hash, (key, value)) - }) - .collect(); - - let tree_again = structure.to_tree(&mut values).unwrap(); - - prop_assert_eq!(tree, tree_again); - } -} diff --git a/smirk/src/storage/tests.rs b/smirk/src/storage/tests.rs new file mode 100644 index 00000000..ce56f91a --- /dev/null +++ b/smirk/src/storage/tests.rs @@ -0,0 +1,73 @@ +use super::*; + +use pretty_assertions::assert_eq; +use proptest::prop_assert_eq; +use test_strategy::proptest; + +use crate::{key_value_hash, smirk, testing::TestStorage}; + +#[test] +fn empty_db_returns_none() { + let db = TestStorage::new(); + + assert!(db.load_tree::().unwrap().is_none()); +} + +// test rocksdb behaviour, since we rely on this for storing empty trees +#[test] +fn store_empty_bytes_does_something() { + let db = TestStorage::new(); + assert_eq!(db.instance.get(b"hello").unwrap(), None); + db.instance.put(b"hello", []).unwrap(); + assert_eq!(db.instance.get(b"hello").unwrap(), Some(vec![])); +} + +#[test] +fn storing_empty_tree_returns_empty_tree() { + let db = TestStorage::new(); + let tree = MerkleTree::::new(); + + db.store_tree(&tree).unwrap(); + + assert_eq!(db.load_tree().unwrap(), Some(tree)); +} + +#[test] +fn storing_simple_tree() { + let db = TestStorage::new(); + let tree = smirk! { + 1 => "hello".to_string(), + 2 => "world".to_string(), + 3 => "foo".to_string(), + }; + + println!("hash: {}", tree.get_node(&3).unwrap().hash()); + + db.store_tree(&tree).unwrap(); + let mut tree_again = db.load_tree().unwrap().unwrap(); + + let changed = tree_again.recalculate_hash_recursive(); + dbg!(changed); + + assert_eq!( + tree_again.get_node(&1).unwrap().hash(), + key_value_hash(&1, "hello") + ); + + assert_eq!( + tree_again.get_node(&3).unwrap().hash(), + key_value_hash(&3, "foo") + ); + + assert_eq!(tree, tree_again); +} + +#[proptest] +fn storage_round_trip(tree: MerkleTree) { + let db = TestStorage::new(); + + db.store_tree(&tree).unwrap(); + let tree_again = db.load_tree::().unwrap().unwrap(); + + prop_assert_eq!(tree, tree_again); +} diff --git a/smirk/src/testing.rs b/smirk/src/testing.rs index ab0f081b..5851a93a 100644 --- a/smirk/src/testing.rs +++ b/smirk/src/testing.rs @@ -2,7 +2,7 @@ use std::ops::Deref; use tempdir::TempDir; -use crate::{storage::Storage, tree::MerkleTree}; +use crate::{storage::Storage, MerkleTree}; /// Helper struct that makes it easier to test against a rocksdb instance #[derive(Debug)] @@ -28,16 +28,11 @@ impl Deref for TestStorage { } } +// snapshot test for a well-known tree - if we accidentally change how the hash is calculated, this +// test will fail #[test] -fn simple_storage_test() { - let db = TestStorage::new(); +fn root_hash_snapshot() { + let tree: MerkleTree<_, _> = (0..100).map(|i| (i, format!("the value is {i}"))).collect(); - assert_eq!(db.load_tree().unwrap(), None::>); - - let tree = (0..10).map(|i| (i, format!("the data is {i}"))).collect(); - db.store_tree(&tree).unwrap(); - - let tree_again: MerkleTree = db.load_tree().unwrap().unwrap(); - - assert_eq!(tree, tree_again); + insta::assert_snapshot!(tree.root_hash().to_hex()); } diff --git a/smirk/src/tree/batch.rs b/smirk/src/tree/batch.rs new file mode 100644 index 00000000..44783383 --- /dev/null +++ b/smirk/src/tree/batch.rs @@ -0,0 +1,68 @@ +use crate::{hash::Hashable, MerkleTree}; + +/// An operation that represents an update to the tree +#[derive(Debug, Clone, Copy)] +#[non_exhaustive] +pub enum Operation { + /// Insert the following key-value pair + Insert(K, V), +} + +impl Operation { + fn key(&self) -> &K { + match self { + Operation::Insert(key, ..) => key, + } + } +} + +/// A batch of operations that can be applied to a [`MerkleTree`] +/// +/// If there are multiple operations +#[derive(Debug, Clone)] +pub struct Batch { + operations: Vec>, +} + +impl Batch { + /// Create a new [`Batch`] from a list of [`Operation`]s + /// + /// Note, if two operations reference the same key, they will be applied in the order they + /// exist in `operations`. No other guarantees about the order of execution are made + pub fn from_operations(mut operations: Vec>) -> Self + where + K: Ord, + { + operations.sort_by(|a, b| a.key().cmp(b.key())); + Self { operations } + } +} + +impl FromIterator> for Batch +where + K: Ord, +{ + fn from_iter>>(iter: T) -> Self { + let vec = Vec::from_iter(iter); + Batch::from_operations(vec) + } +} + +impl MerkleTree +where + K: Hashable + Ord, + V: Hashable, +{ + /// Apply a [`Batch`] of operations to the tree + pub fn apply(&mut self, batch: Batch) { + for operation in batch.operations { + match operation { + Operation::Insert(key, value) => self.insert_without_update(key, value), + } + } + + if let Some(inner) = self.inner.as_mut() { + inner.recalculate_hash_recursive(); + } + } +} diff --git a/smirk/src/tree/hash.rs b/smirk/src/tree/hash.rs new file mode 100644 index 00000000..609d6299 --- /dev/null +++ b/smirk/src/tree/hash.rs @@ -0,0 +1,130 @@ +use std::iter::once; + +use crate::{ + hash::{Digest, Hashable}, + MerkleTree, TreeNode, +}; + +impl MerkleTree { + pub(crate) fn recalculate_hash_recursive(&mut self) -> bool { + match self.inner.as_mut() { + Some(inner) => inner.recalculate_hash_recursive(), + None => false, + } + } +} + +impl TreeNode { + /// The hash of the left subtree (if it exists) + pub(crate) fn left_hash(&self) -> Option { + self.left.as_ref().map(|node| node.hash) + } + + /// The hash of the right subtree (if it exists) + pub(crate) fn right_hash(&self) -> Option { + self.right.as_ref().map(|node| node.hash) + } + + /// Update the `hash` field of this node, and all child nodes + pub(crate) fn recalculate_hash_recursive(&mut self) -> bool { + if let Some(left) = &mut self.left { + left.recalculate_hash_recursive(); + } + + if let Some(right) = &mut self.right { + right.recalculate_hash_recursive(); + } + + let this = key_value_hash(self.key(), self.value()); + let left = self.left.as_ref().map(|node| node.hash); + let right = self.right.as_ref().map(|node| node.hash); + let new_hash = hash_left_right_this(this, left, right); + + let changed = self.hash != new_hash; + + self.hash = new_hash; + + changed + } +} + +/// Compute the hash of a pair of values (i.e. a key-value pair) +/// +/// The hash will change if either input changes: +/// +/// ```rust +/// # use smirk::key_value_hash; +/// let hash1 = key_value_hash(&1, "hello"); +/// let hash2 = key_value_hash(&2, "hello"); +/// let hash3 = key_value_hash(&1, "world"); +/// +/// assert_ne!(hash1, hash2); +/// assert_ne!(hash1, hash3); +/// assert_ne!(hash2, hash3); +/// ``` +/// +/// This is guaranteed to be the root hash of a tree with a single entry (with the same key + value) +/// +/// ```rust +/// # use smirk::{key_value_hash, smirk}; +/// let tree = smirk! { 1 => "hello" }; +/// let root_hash = key_value_hash(&1, &"hello"); +/// +/// assert_eq!(root_hash, tree.root_hash()); +/// ``` +#[must_use] +pub fn key_value_hash(key: &K, value: &V) -> Digest { + [key.hash(), value.hash()].into_iter().collect() +} + +/// Helper to a +pub(crate) fn hash_left_right_this( + this: Digest, + left: Option, + right: Option, +) -> Digest { + once(this).chain(left).chain(right).collect() +} + +#[cfg(test)] +mod tests { + use proptest::prop_assert_eq; + use test_strategy::proptest; + + use crate::{key_value_hash, smirk, MerkleTree}; + + #[test] + fn root_hash_is_probably_deterministic() { + let make = || { + smirk! { + 1 => "hello", + 2 => "world", + 3 => "foo", + } + }; + + let root_hash = make().root_hash(); + + for _ in 0..1000 { + let root_hash_again = make().root_hash(); + assert_eq!(root_hash, root_hash_again); + } + } + + #[proptest] + fn root_hash_doesnt_change_when_recalculating(mut tree: MerkleTree) { + let hash_before = tree.root_hash(); + tree.recalculate_hash_recursive(); + let hash_after = tree.root_hash(); + + assert_eq!(hash_before, hash_after); + } + + #[proptest] + fn single_element_tree_root_hash_is_kv_hash(key: i32, value: String) { + let hash = key_value_hash(&key, &value); + let tree = smirk! { key => value }; + + prop_assert_eq!(hash, tree.root_hash()); + } +} diff --git a/smirk/src/tree/impls.rs b/smirk/src/tree/impls.rs index 6caaa804..bc330676 100644 --- a/smirk/src/tree/impls.rs +++ b/smirk/src/tree/impls.rs @@ -6,7 +6,8 @@ use super::{MerkleTree, TreeNode}; impl MerkleTree { /// Returns an iterator over the keys and values in depth-first order - pub fn depth_first<'a>(&'a self) -> DepthFirstIter<'a, K, V> { + #[allow(clippy::must_use_candidate)] + pub fn depth_first(&self) -> DepthFirstIter { match &self.inner { None => DepthFirstIter { inner: None }, Some(node) => node.depth_first(), @@ -14,7 +15,8 @@ impl MerkleTree { } /// Returns an iterator over the keys and values in breadth-first order - pub fn breadth_first<'a>(&'a self) -> BreadthFirstIter<'a, K, V> { + #[allow(clippy::must_use_candidate)] + pub fn breadth_first(&self) -> BreadthFirstIter { match &self.inner { None => BreadthFirstIter { inner: None }, Some(node) => node.breadth_first(), @@ -24,7 +26,7 @@ impl MerkleTree { impl TreeNode { /// Get an iterator over the values in this node in depth-first order - fn depth_first<'a>(&'a self) -> DepthFirstIter<'a, K, V> { + fn depth_first(&self) -> DepthFirstIter { let inner = DftPre::new(self, children); let inner = Box::new(inner.map(|(_, node)| (&node.key, &node.value))); @@ -40,7 +42,7 @@ impl TreeNode { } } -fn children<'a, K, V>(node: &'a TreeNode) -> ChildIter<'a, K, V> { +fn children(node: &TreeNode) -> ChildIter { node.left .as_deref() .into_iter() @@ -58,7 +60,7 @@ impl<'a, K, V> Iterator for DepthFirstIter<'a, K, V> { type Item = (&'a K, &'a V); fn next(&mut self) -> Option { - self.inner.as_mut().map(|iter| iter.next()).flatten() + self.inner.as_mut().and_then(Iterator::next) } } @@ -70,7 +72,7 @@ impl<'a, K, V> Iterator for BreadthFirstIter<'a, K, V> { type Item = (&'a K, &'a V); fn next(&mut self) -> Option { - self.inner.as_mut().map(|iter| iter.next()).flatten() + self.inner.as_mut().and_then(Iterator::next) } } @@ -80,13 +82,13 @@ mod proptest_impls { use crate::hash::Hashable; - use super::*; + use super::MerkleTree; use proptest::{arbitrary::StrategyFor, prelude::*, strategy::Map}; impl Arbitrary for MerkleTree where - K: Debug + Arbitrary + Ord, + K: Debug + Arbitrary + Hashable + Ord, V: Debug + Arbitrary + Hashable, { type Parameters = (); @@ -100,7 +102,7 @@ mod proptest_impls { #[cfg(test)] mod tests { - use crate::{hash::Digest, tree::MerkleTree, TreeNode}; + use crate::{tree::MerkleTree, TreeNode}; // 1 // |\ @@ -108,23 +110,17 @@ mod tests { // |\ // 3 4 fn example_node() -> TreeNode { - let mut node = TreeNode { - key: 1, - value: 1, - hash: Digest::NULL, - left: Some(Box::new(TreeNode { - key: 2, - value: 2, - hash: Digest::NULL, - left: Some(Box::new(TreeNode::new(3, 3))), - right: Some(Box::new(TreeNode::new(4, 4))), - height: 0, - })), - right: Some(Box::new(TreeNode::new(5, 5))), - height: 0, - }; - node.update_height(); - node + TreeNode::new( + 1, + 1, + Some(TreeNode::new( + 2, + 2, + Some(TreeNode::new(3, 3, None, None)), + Some(TreeNode::new(4, 4, None, None)), + )), + Some(TreeNode::new(5, 5, None, None)), + ) } #[test] diff --git a/smirk/src/tree/iterator.rs b/smirk/src/tree/iterator.rs index 1d1c5f6b..4b057a6c 100644 --- a/smirk/src/tree/iterator.rs +++ b/smirk/src/tree/iterator.rs @@ -1,15 +1,17 @@ use std::iter::empty; -use crate::{hash::Hashable, MerkleTree, TreeNode}; +use crate::{batch::Operation, hash::Hashable, MerkleTree, TreeNode}; -impl FromIterator<(K, V)> for MerkleTree { +impl FromIterator<(K, V)> for MerkleTree { fn from_iter>(iter: T) -> Self { let mut tree = MerkleTree::new(); - for (key, value) in iter { - tree.insert(key, value); - } + let batch = iter + .into_iter() + .map(|(key, value)| Operation::Insert(key, value)) + .collect(); + tree.apply(batch); tree } } @@ -30,14 +32,22 @@ impl<'a, K, V> MerkleTree { /// /// assert_eq!(keys, vec![1, 2, 3]); /// ``` + #[allow(clippy::must_use_candidate)] pub fn iter(&'a self) -> Iter<'a, K, V> { match &self.inner { - None => Iter::empty(), - Some(node) => Iter::node(node), + None => Iter(Box::new(empty())), + Some(node) => Iter(Box::new(iter(node))), } } } +fn iter<'a, K, V>(node: &'a TreeNode) -> Box> + 'a> { + let left_iter = node.left.iter().flat_map(|node| iter(node)); + let right_iter = node.right.iter().flat_map(|node| iter(node)); + + Box::new(left_iter.chain(Some(node)).chain(right_iter)) +} + pub struct Iter<'a, K, V>(Box> + 'a>); impl<'a, K, V> Iterator for Iter<'a, K, V> { @@ -48,23 +58,6 @@ impl<'a, K, V> Iterator for Iter<'a, K, V> { } } -impl<'a, K, V> Iter<'a, K, V> { - fn empty() -> Self { - Self(Box::new(empty())) - } - - fn node(node: &'a TreeNode) -> Self { - Self(Box::new(iter(node))) - } -} - -fn iter<'a, K, V>(node: &'a TreeNode) -> Box> + 'a> { - let left_iter = node.left.iter().flat_map(|node| iter(node)); - let right_iter = node.right.iter().flat_map(|node| iter(node)); - - Box::new(left_iter.chain(Some(node)).chain(right_iter)) -} - #[cfg(test)] mod tests { use proptest::prop_assert_eq; @@ -74,7 +67,7 @@ mod tests { #[proptest(cases = 100)] fn iter_order_is_correct(mut vec: Vec) { - vec.sort(); + vec.sort_unstable(); let mut tree = MerkleTree::new(); diff --git a/smirk/src/tree/mod.rs b/smirk/src/tree/mod.rs index 25f6d2eb..d5951d05 100644 --- a/smirk/src/tree/mod.rs +++ b/smirk/src/tree/mod.rs @@ -2,6 +2,8 @@ use std::{borrow::Borrow, cmp::Ordering}; use crate::hash::{Digest, Hashable}; +/// Batch API for performing many operations on a [`MerkleTree`] at once +pub mod batch; mod iterator; pub use iterator::*; @@ -13,6 +15,9 @@ pub mod visitor; mod proof; +mod hash; +pub use hash::key_value_hash; + #[cfg(test)] mod tests; @@ -48,7 +53,7 @@ mod tests; /// } /// ``` /// Broadly speaking, to do anything useful with a Merkle tree, the key type must implement -/// [`Ord`], and the value type must implement [`Hashable`] +/// [`Ord`] and [`Hashable`], and the value type must implement [`Hashable`] /// /// Warning: *DO NOT* use types with interior mutability as either the /// key or value in this tree, since it can potentially invalidate hashes/ordering guarantees that @@ -62,12 +67,18 @@ pub struct MerkleTree { pub(crate) inner: Option>>, } -impl PartialEq for MerkleTree { +impl PartialEq for MerkleTree { fn eq(&self, other: &Self) -> bool { self.root_hash() == other.root_hash() } } +impl Default for MerkleTree { + fn default() -> Self { + Self::new() + } +} + impl MerkleTree { /// Create a new, empty [`MerkleTree`] /// @@ -75,6 +86,8 @@ impl MerkleTree { /// # use smirk::MerkleTree; /// let tree = MerkleTree::::new(); /// ``` + #[inline] + #[must_use] pub fn new() -> Self { Self { inner: None } } @@ -89,9 +102,24 @@ impl MerkleTree { /// assert_eq!(tree.get(&1).unwrap(), "hello"); /// ``` /// If the key is already present in the tree, the tree is left unchanged + /// + /// Note: inserting a single value will potentially rebalance the tree, and also recompute hash + /// values, which can be expensive. If you are inserting many items, consider using + /// [`MerkleTree::apply_batch`] pub fn insert(&mut self, key: K, value: V) where - K: Ord, + K: Hashable + Ord, + V: Hashable, + { + self.insert_without_update(key, value); + self.recalculate_hash_recursive(); + } + + /// Basically [`MerkleTree::insert`] but without updating the hashes - performance optimization + /// for batch API + pub(crate) fn insert_without_update(&mut self, key: K, value: V) + where + K: Hashable + Ord, V: Hashable, { self.inner = Some(Self::insert_node(self.inner.take(), key, value)); @@ -99,24 +127,22 @@ impl MerkleTree { fn insert_node(node: Option>>, key: K, value: V) -> Box> where - K: Ord, + K: Hashable + Ord, V: Hashable, { - let mut node = match node { - None => return Box::new(TreeNode::new(key, value)), - Some(node) => node, - }; + let Some(mut node) = node else { return Box::new(TreeNode::new(key, value, None, None)) }; - if key < node.key { - node.left = Some(Self::insert_node(node.left.take(), key, value)); - } else if key > node.key { - node.right = Some(Self::insert_node(node.right.take(), key, value)); - } else { - return node; // Duplicates not allowed + match key.cmp(&node.key) { + Ordering::Equal => return node, + Ordering::Less => { + node.left = Some(Self::insert_node(node.left.take(), key, value)); + } + Ordering::Greater => { + node.right = Some(Self::insert_node(node.right.take(), key, value)); + } } node.update_height(); - node.recalculate_hash_recursive(); Self::balance(node) } @@ -171,11 +197,15 @@ impl MerkleTree { /// /// assert_eq!(tree.size(), 3); /// ``` + #[must_use] pub fn size(&self) -> usize { struct Counter(usize); impl visitor::Visitor for Counter { fn visit(&mut self, _: &K, _: &V) { - self.0 += 1; + self.0 = self + .0 + .checked_add(1) + .expect("this is never going to overflow"); } } @@ -186,6 +216,7 @@ impl MerkleTree { } /// Returns true if and only if the tree contains no elements + #[must_use] pub fn is_empty(&self) -> bool { self.size() == 0 } @@ -211,17 +242,17 @@ impl MerkleTree { /// The height of this tree #[inline] + #[must_use] pub fn height(&self) -> usize { match &self.inner { None => 0, - Some(node) => node.height() as usize, + Some(node) => node.height(), } } /// Get the value associated with the given key /// /// If you need access to the node itself, consider using [`MerkleTree::get_node`] - /// /// ```rust /// # use smirk::smirk; /// let tree = smirk! { @@ -242,7 +273,6 @@ impl MerkleTree { /// Get the node associated with the given key /// /// If you only need access to the value stored in this node, consider using [`MerkleTree::get`] - /// /// ```rust /// # use smirk::smirk; /// # use smirk::hash::Digest; @@ -273,7 +303,7 @@ pub struct TreeNode { pub(crate) hash: Digest, pub(crate) left: Option>>, pub(crate) right: Option>>, - pub(crate) height: isize, + pub(crate) height: usize, } impl TreeNode { @@ -283,8 +313,8 @@ impl TreeNode { /// - A has height 1 /// - B has height 0 /// - C has height 0 - #[inline] - pub fn height(&self) -> isize { + #[must_use] + pub fn height(&self) -> usize { self.height } @@ -292,13 +322,21 @@ impl TreeNode { pub(crate) fn update_height(&mut self) { let left_height = self.left.as_ref().map_or(0, |x| x.height()); let right_height = self.right.as_ref().map_or(0, |x| x.height()); - self.height = 1 + std::cmp::max(left_height, right_height); + self.height = std::cmp::max(left_height, right_height) + .checked_add(1) + .expect("this is never going to overflow"); } fn balance_factor(&self) -> isize { let left_height = self.left.as_ref().map_or(0, |x| x.height()); let right_height = self.right.as_ref().map_or(0, |x| x.height()); - left_height - right_height + + let left_height = isize::try_from(left_height).expect("height never overflows"); + let right_height = isize::try_from(right_height).expect("height never overflows"); + + left_height + .checked_sub(right_height) + .expect("this is never going to over/underflow") } fn get(&self, key: &Q) -> Option<&V> @@ -323,19 +361,30 @@ impl TreeNode { } } -impl TreeNode { - // pub(crate) for testing only - pub(crate) fn new(key: K, value: V) -> Self { - let hash = value.hash(); - - Self { +impl TreeNode { + pub(crate) fn new( + key: K, + value: V, + left: Option>, + right: Option>, + ) -> Self { + let hash = Digest::NULL; + let left = left.map(Box::new); + let right = right.map(Box::new); + + let mut node = Self { key, value, hash, - left: None, - right: None, + left, + right, height: 0, - } + }; + + node.update_height(); + node.recalculate_hash_recursive(); + + node } /// The key associated with this node @@ -361,21 +410,4 @@ impl TreeNode { pub fn hash_of_value(&self) -> Digest { self.value.hash() } - - /// Update the `hash` field of this node, and all child nodes - pub(crate) fn recalculate_hash_recursive(&mut self) { - let mut new_hash = self.value.hash(); - - if let Some(left) = &mut self.left { - left.recalculate_hash_recursive(); - new_hash.merge(&left.hash); - } - - if let Some(right) = &mut self.right { - right.recalculate_hash_recursive(); - new_hash.merge(&right.hash); - } - - self.hash = new_hash; - } } diff --git a/smirk/src/tree/proof.rs b/smirk/src/tree/proof.rs index 03dc7e69..1601f29e 100644 --- a/smirk/src/tree/proof.rs +++ b/smirk/src/tree/proof.rs @@ -1,15 +1,17 @@ use std::{borrow::Borrow, cmp::Ordering}; use crate::{ - hash::{Digest, Hashable, MerklePath}, - MerkleTree, + hash::{Digest, Hashable, MerklePath, Stage}, + key_value_hash, MerkleTree, }; -impl MerkleTree { +use super::hash::hash_left_right_this; + +impl MerkleTree { /// Generate a [`MerklePath`] that proves that a given key exists in the tree /// /// ```rust - /// # use smirk::{smirk, MerklePath}; + /// # use smirk::{smirk, hash::MerklePath}; /// let tree = smirk! { /// 1 => "hello", /// 2 => "world", @@ -25,53 +27,92 @@ impl MerkleTree { K: Ord, { let Some(mut node) = self.inner.as_deref() else { return None }; - let mut components = Vec::with_capacity(node.height() as usize); + let mut stages = Vec::with_capacity(node.height()); loop { - components.push(node.hash()); + match key.borrow().cmp(&node.key) { + Ordering::Less => { + let this = key_value_hash(&node.key, &node.value); + let right = node.right_hash(); + let stage = Stage::Left { this, right }; + stages.push(stage); + + node = node.left.as_deref()?; + } + Ordering::Greater => { + let this = key_value_hash(&node.key, &node.value); + let left = node.left_hash(); + let stage = Stage::Right { this, left }; + stages.push(stage); - match key.borrow().cmp(node.key()) { - Ordering::Less => node = node.left.as_deref()?, - Ordering::Greater => node = node.right.as_deref()?, + node = node.left.as_deref()?; + } Ordering::Equal => { - components.reverse(); - return Some(MerklePath::new(components)); + let left = node.left_hash(); + let right = node.right_hash(); + let root_hash = self.root_hash(); + + let path = MerklePath { + stages, + root_hash, + left, + right, + }; + + return Some(path); } } } } /// Get the root hash of the Merkle tree + /// + /// The root hash can be viewed as a "summary" of the whole tree - any change to any key or + /// value will change the root hash. Changing the "layout" of the tree will also change the + /// root hash + #[must_use] pub fn root_hash(&self) -> Digest { match &self.inner { None => Digest::NULL, // should this function return an option? - Some(node) => node.hash(), + Some(node) => node.hash, } } +} - /// Generate - /// Verify that the given value exists in the tree, by using the provided [`MerklePath`] - pub fn verify(&self, path: &MerklePath, value: &V) -> bool - where - V: Hashable, - { - if path.components().last() != Some(&self.root_hash()) { - dbg!("not end root hash"); - return false; +impl MerklePath { + /// Verify that the given key-value pair exists in the tree that generated this [`MerklePath`] + #[must_use = "this function indicates a verification failure by returning false"] + pub fn verify(&self, key: &K, value: &V) -> bool { + let mut hash = key_value_hash(key, value); + + for stage in self.stages.iter().rev() { + match *stage { + Stage::Left { this, right } => hash = hash_left_right_this(this, Some(hash), right), + Stage::Right { this, left } => hash = hash_left_right_this(this, left, Some(hash)), + } } - let mut hash = value.hash(); + hash == self.root_hash + } +} - for slice in path.components().windows(2) { - let first = &slice[0]; - let second = &slice[1]; +#[cfg(test)] +mod tests { + use crate::smirk; - hash.merge(first); - if hash != *second { - return false; - } - } + #[test] + fn simple_proof_example() { + let tree = smirk! { + 1 => "hello", + 2 => "world", + }; + + let path = tree.prove(&1).unwrap(); + + assert!(path.verify(&1, &"hello")); + assert!(!path.verify(&2, &"hello")); + assert!(!path.verify(&1, &"world")); - true + assert!(tree.prove(&3).is_none()); } } diff --git a/smirk/src/tree/tests.rs b/smirk/src/tree/tests.rs index 3fece7a9..01a0de51 100644 --- a/smirk/src/tree/tests.rs +++ b/smirk/src/tree/tests.rs @@ -1,4 +1,6 @@ -use crate::smirk; +use test_strategy::proptest; + +use crate::{hash::{Hashable, Digest}, smirk, MerkleTree}; #[test] fn simple_example() { @@ -26,3 +28,39 @@ fn insert_already_exists() { assert_eq!(*tree.get(&1).unwrap(), "hello"); } + +#[test] +fn new_tree_is_empty() { + let tree = MerkleTree::::new(); + assert!(tree.is_empty()); +} + +#[proptest(cases = 100)] +fn collecting_tree_has_same_length(items: Vec) { + let len = items.len(); + let tree: MerkleTree<_, _> = items.into_iter().map(|i| (i, i)).collect(); + + assert_eq!(tree.size(), len); +} + +#[test] +fn hash_includes_key_and_value() { + let tree = smirk! { 1 => "hello" }; + let different_key = smirk! { 2 => "hello" }; + let different_value = smirk! { 1 => "world" }; + + let hash = |tree: &MerkleTree| tree.inner.as_ref().unwrap().hash; + + assert_ne!(hash(&tree), hash(&different_key)); + assert_ne!(hash(&tree), hash(&different_value)); +} + +#[test] +fn hash_of_leaf_is_correct() { + let tree = smirk! { 1 => "hello" }; + let hash = tree.inner.as_ref().unwrap().hash; + + let expected: Digest = [1.hash(), "hello".hash()].iter().collect(); + + assert_eq!(hash, expected); +} diff --git a/smirk/src/tree/visitor.rs b/smirk/src/tree/visitor.rs index 7ae8e27a..06bb84df 100644 --- a/smirk/src/tree/visitor.rs +++ b/smirk/src/tree/visitor.rs @@ -13,7 +13,7 @@ where Vis: Visitor, { fn visit(&mut self, key: &K, value: &V) { - Vis::visit(self, key, value) + Vis::visit(self, key, value); } } diff --git a/solid/Cargo.toml b/solid/Cargo.toml index 0753a01c..d4a4efbe 100644 --- a/solid/Cargo.toml +++ b/solid/Cargo.toml @@ -10,7 +10,7 @@ tokio = { version = "1", features = ["full"] } serde = { version = "1.0", features = ["derive"] } sha2 = "0.10.6" uint = "0.9.5" -libp2p-core = { versin = "0.39.0" } +# libp2p-core = { versin = "0.39.0" } futures-timer = "3.0.2" futures = "0.3.26" prost = "0.11" From 521a2acc7c8b8087679ba9a60136e7f700ec71f5 Mon Sep 17 00:00:00 2001 From: Cameron Date: Fri, 21 Jul 2023 17:14:48 +0100 Subject: [PATCH 07/15] path bug fixed --- flake.nix | 2 +- smirk/proptest-regressions/tree/proof.txt | 7 ++++ smirk/src/testing.rs | 1 - smirk/src/tree/proof.rs | 51 +++++++++++++++++++++-- 4 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 smirk/proptest-regressions/tree/proof.txt diff --git a/flake.nix b/flake.nix index bee958e8..8cbbe15c 100644 --- a/flake.nix +++ b/flake.nix @@ -6,7 +6,7 @@ rust-overlay.url = "github:oxalica/rust-overlay"; }; - outputs = { self, nixpkgs, flake-utils, rust-overlay, ... }: + outputs = { nixpkgs, flake-utils, rust-overlay, ... }: flake-utils.lib.eachDefaultSystem (system: let pkgs = import nixpkgs { diff --git a/smirk/proptest-regressions/tree/proof.txt b/smirk/proptest-regressions/tree/proof.txt new file mode 100644 index 00000000..b0875095 --- /dev/null +++ b/smirk/proptest-regressions/tree/proof.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 11f3355419ebef051267901902f25574e0b80b19569d95c61c31896e0784316a # shrinks to input = _ProofIsValidArgs { tree: MerkleTree { inner: Some(TreeNode { key: 64, value: "", hash: Hash(0dedccd3ac132ee8b462a08bd5e2dc9abfa3334956bcb9e93c2f418f54145da6), left: Some(TreeNode { key: 0, value: "", hash: Hash(58e925583acf136b13a5fefc2f7dd2267a1d70305ceadc7dedf6302bbedcb78e), left: None, right: None, height: 1 }), right: Some(TreeNode { key: 65, value: "", hash: Hash(00743760e488b30cd9cd9a9ba0714899cc5273b79d3f9a4e19709c5e025b1b0b), left: None, right: None, height: 1 }), height: 2 }) }, key: 64 } diff --git a/smirk/src/testing.rs b/smirk/src/testing.rs index 5851a93a..a1a17558 100644 --- a/smirk/src/testing.rs +++ b/smirk/src/testing.rs @@ -33,6 +33,5 @@ impl Deref for TestStorage { #[test] fn root_hash_snapshot() { let tree: MerkleTree<_, _> = (0..100).map(|i| (i, format!("the value is {i}"))).collect(); - insta::assert_snapshot!(tree.root_hash().to_hex()); } diff --git a/smirk/src/tree/proof.rs b/smirk/src/tree/proof.rs index 1601f29e..04a4d4cf 100644 --- a/smirk/src/tree/proof.rs +++ b/smirk/src/tree/proof.rs @@ -45,7 +45,7 @@ impl MerkleTree { let stage = Stage::Right { this, left }; stages.push(stage); - node = node.left.as_deref()?; + node = node.right.as_deref()?; } Ordering::Equal => { let left = node.left_hash(); @@ -67,6 +67,17 @@ impl MerkleTree { /// Get the root hash of the Merkle tree /// + /// ```rust + /// # use smirk::smirk; + /// let mut tree = smirk! { 1 => "hello" }; + /// let hash = tree.root_hash(); + /// + /// tree.insert(2, "world"); + /// let new_hash = tree.root_hash(); + /// + /// assert_ne!(hash, new_hash); + /// ``` + /// /// The root hash can be viewed as a "summary" of the whole tree - any change to any key or /// value will change the root hash. Changing the "layout" of the tree will also change the /// root hash @@ -84,6 +95,7 @@ impl MerklePath { #[must_use = "this function indicates a verification failure by returning false"] pub fn verify(&self, key: &K, value: &V) -> bool { let mut hash = key_value_hash(key, value); + hash = hash_left_right_this(hash, self.left, self.right); for stage in self.stages.iter().rev() { match *stage { @@ -98,13 +110,16 @@ impl MerklePath { #[cfg(test)] mod tests { - use crate::smirk; + use test_strategy::proptest; + + use crate::{smirk, MerkleTree}; #[test] fn simple_proof_example() { let tree = smirk! { 1 => "hello", 2 => "world", + 3 => "foo", }; let path = tree.prove(&1).unwrap(); @@ -113,6 +128,36 @@ mod tests { assert!(!path.verify(&2, &"hello")); assert!(!path.verify(&1, &"world")); - assert!(tree.prove(&3).is_none()); + assert!(tree.prove(&4).is_none()); + } + + #[proptest] + fn all_proof_root_hash_match(tree: MerkleTree) { + for node in tree.iter() { + let proof = tree.prove(node.key()).unwrap(); + assert_eq!(proof.root_hash(), tree.root_hash()); + } + } + + // we use u8 as the key type to improve the chances of it being in the tree + #[proptest] + fn proof_succeeds_iff_key_contained(tree: MerkleTree, key: u8) { + let tree_contains_key = tree.contains(&key); + let proof_valid = tree.prove(&key).is_some(); + + assert_eq!(tree_contains_key, proof_valid); + } + + #[proptest] + fn proof_is_valid(tree: MerkleTree, key: u8) { + let proof = tree.prove(&key); + + let Some(value) = tree.get(&key) else { return Ok(()); }; + let proof = proof.unwrap(); + + let valid = proof.verify(&key, value); + assert!(valid); + + assert_eq!(tree.root_hash(), proof.root_hash()); } } From c485449035507db1a677b2d2c7e9401df36b3c27 Mon Sep 17 00:00:00 2001 From: Cameron Date: Sun, 23 Jul 2023 16:45:58 +0100 Subject: [PATCH 08/15] batch API --- README.md | 5 +- smirk/README.md | 54 ++- smirk/path/for/rocksdb/000004.log | Bin 0 -> 70 bytes smirk/path/for/rocksdb/CURRENT | 1 + smirk/path/for/rocksdb/IDENTITY | 1 + smirk/path/for/rocksdb/LOCK | 0 smirk/path/for/rocksdb/LOG | 107 ++++++ .../path/for/rocksdb/LOG.old.1690123945383009 | 333 ++++++++++++++++++ smirk/path/for/rocksdb/MANIFEST-000005 | Bin 0 -> 79 bytes smirk/path/for/rocksdb/OPTIONS-000007 | 198 +++++++++++ smirk/path/for/rocksdb/OPTIONS-000009 | 198 +++++++++++ smirk/proptest-regressions/tree/proof.txt | 7 - smirk/src/hash/mod.rs | 4 +- smirk/src/hash/path.rs | 25 +- smirk/src/storage/mod.rs | 4 +- smirk/src/tree/batch.rs | 2 + smirk/src/tree/mod.rs | 39 +- 17 files changed, 953 insertions(+), 25 deletions(-) create mode 100644 smirk/path/for/rocksdb/000004.log create mode 100644 smirk/path/for/rocksdb/CURRENT create mode 100644 smirk/path/for/rocksdb/IDENTITY create mode 100644 smirk/path/for/rocksdb/LOCK create mode 100644 smirk/path/for/rocksdb/LOG create mode 100644 smirk/path/for/rocksdb/LOG.old.1690123945383009 create mode 100644 smirk/path/for/rocksdb/MANIFEST-000005 create mode 100644 smirk/path/for/rocksdb/OPTIONS-000007 create mode 100644 smirk/path/for/rocksdb/OPTIONS-000009 delete mode 100644 smirk/proptest-regressions/tree/proof.txt diff --git a/README.md b/README.md index 3fa2fb0e..e4c07b1f 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,9 @@ cd polybase && cargo run -- generate_key cargo test ``` +Note, `smirk` contains some property tests which are relatively slow. +You may find that you have a faster dev cycle running tests with `--release`, since the test suite is relatively fast to compile but slow to run. + ## API API server runs on port 8080 by default: @@ -125,4 +128,4 @@ From variables in the web3.js example above, you can build a header like this: const headers = { "X--Signature": `${publicKey ? `pk=${publicKey},` : ""}sig=${signature},t=${timestamp},v0=1,h=eth-personal-sign`, }; -``` \ No newline at end of file +``` diff --git a/smirk/README.md b/smirk/README.md index 5ebfe3b7..f13b1d35 100644 --- a/smirk/README.md +++ b/smirk/README.md @@ -7,7 +7,7 @@ hash function, with a map-like API. There is also a [`Storage`] API for persisti [rocksdb][db] ```rust -# use smirk::{MerkleTree, smirk}; +# use smirk::{MerkleTree, smirk, storage::Storage}; let mut tree = MerkleTree::new(); tree.insert(1, "hello"); tree.insert(2, "world"); @@ -21,12 +21,62 @@ let tree = smirk! { assert_eq!(tree.get(&1), Some(&"hello")); assert_eq!(tree.get(&2), Some(&"world")); assert_eq!(tree.get(&3), None); +``` + +You can persist trees with the [`Storage`] API: +```rust,no_run +# use std::path::Path; +# use smirk::{smirk, storage::Storage}; +let path = Path::new("path/for/rocksdb"); +let storage = Storage::open(path).unwrap(); + +let tree = smirk! { + 1 => 123, + 2 => 234, +}; +storage.store_tree(&tree).unwrap(); +let tree_again = storage.load_tree().unwrap().unwrap(); + +assert_eq!(tree, tree_again); +``` + +Any type that implements [`Serialize`] and [`Deserialize`] can be used + +```rust,no_run +# use std::path::Path; +# use serde::{Serialize, Deserialize}; +# use smirk::{smirk, storage::Storage, hash::{Hashable, Digest}}; +#[derive(Debug, Serialize, Deserialize)] +struct MyCoolType { + foo: i32, + bar: String, +} + +impl Hashable for MyCoolType { + fn hash(&self) -> Digest { + [self.foo.hash(), self.bar.hash()].into_iter().collect() + } +} + +let path = Path::new("path/for/rocksdb"); +let storage = Storage::open(path).unwrap(); + +let tree = smirk! { + 1 => MyCoolType { foo: 123, bar: "hello".to_string() }, + 2 => MyCoolType { foo: 234, bar: "world".to_string() }, +}; + +storage.store_tree(&tree).unwrap(); +let tree_again = storage.load_tree().unwrap().unwrap(); + +assert_eq!(tree, tree_again); ``` Types provided by this library implement [`Arbitrary`], for use with [`proptest`], gated behind the `proptest` feature flag. + ## Todo - benchmarks @@ -38,3 +88,5 @@ the `proptest` feature flag. [`Storage`]: storage::Storage [`Arbitrary`]: proptest::prelude::Arbitrary +[`Serialize`]: serde::Serialize +[`Deserialize`]: serde::Deserialize diff --git a/smirk/path/for/rocksdb/000004.log b/smirk/path/for/rocksdb/000004.log new file mode 100644 index 0000000000000000000000000000000000000000..be9399823de4fd05a1a31e5a4ec0246d80b921d5 GIT binary patch literal 70 zcmX@6blXRpfsv5`0+@jmFC$A)etrof6C)!d>*R^m2M#bYGI36xbmq*NR}&czd|a07 N#LK|Q0@VSccmW^r4{ZPd literal 0 HcmV?d00001 diff --git a/smirk/path/for/rocksdb/CURRENT b/smirk/path/for/rocksdb/CURRENT new file mode 100644 index 00000000..aa5bb8ea --- /dev/null +++ b/smirk/path/for/rocksdb/CURRENT @@ -0,0 +1 @@ +MANIFEST-000005 diff --git a/smirk/path/for/rocksdb/IDENTITY b/smirk/path/for/rocksdb/IDENTITY new file mode 100644 index 00000000..0d4ffc44 --- /dev/null +++ b/smirk/path/for/rocksdb/IDENTITY @@ -0,0 +1 @@ +fda2968c-dc40-46ae-93c7-6c66e47096c0 \ No newline at end of file diff --git a/smirk/path/for/rocksdb/LOCK b/smirk/path/for/rocksdb/LOCK new file mode 100644 index 00000000..e69de29b diff --git a/smirk/path/for/rocksdb/LOG b/smirk/path/for/rocksdb/LOG new file mode 100644 index 00000000..4aca7f1c --- /dev/null +++ b/smirk/path/for/rocksdb/LOG @@ -0,0 +1,107 @@ +2023/07/23-15:52:25.383264 268744 RocksDB version: 8.1.1 +2023/07/23-15:52:25.383315 268744 Compile date 2023-04-06 16:38:52 +2023/07/23-15:52:25.383318 268744 DB SUMMARY +2023/07/23-15:52:25.383320 268744 DB Session ID: IMT9NYJINBM3XOR7GJAU +2023/07/23-15:52:25.383333 268744 CURRENT file: CURRENT +2023/07/23-15:52:25.383334 268744 IDENTITY file: IDENTITY +2023/07/23-15:52:25.383336 268744 MANIFEST file: MANIFEST-000001 size: 13 Bytes +2023/07/23-15:52:25.383338 268744 MANIFEST file: MANIFEST-000005 size: 66 Bytes +2023/07/23-15:52:25.383339 268744 SST files in path/for/rocksdb dir, Total Num: 0, files: +2023/07/23-15:52:25.383340 268744 Write Ahead Log file in path/for/rocksdb: 000004.log size: 0 ; +2023/07/23-15:52:25.383341 268744 Options.error_if_exists: 0 +2023/07/23-15:52:25.383342 268744 Options.create_if_missing: 1 +2023/07/23-15:52:25.383342 268744 Options.paranoid_checks: 1 +2023/07/23-15:52:25.383343 268744 Options.flush_verify_memtable_count: 1 +2023/07/23-15:52:25.383343 268744 Options.track_and_verify_wals_in_manifest: 0 +2023/07/23-15:52:25.383344 268744 Options.verify_sst_unique_id_in_manifest: 1 +2023/07/23-15:52:25.383344 268744 Options.env: 0x55b1e66a3a88 +2023/07/23-15:52:25.383345 268744 Options.fs: PosixFileSystem +2023/07/23-15:52:25.383346 268744 Options.info_log: 0x55b1e73f6ff0 +2023/07/23-15:52:25.383346 268744 Options.max_file_opening_threads: 16 +2023/07/23-15:52:25.383347 268744 Options.statistics: (nil) +2023/07/23-15:52:25.383347 268744 Options.use_fsync: 0 +2023/07/23-15:52:25.383348 268744 Options.max_log_file_size: 0 +2023/07/23-15:52:25.383348 268744 Options.max_manifest_file_size: 1073741824 +2023/07/23-15:52:25.383349 268744 Options.log_file_time_to_roll: 0 +2023/07/23-15:52:25.383349 268744 Options.keep_log_file_num: 1000 +2023/07/23-15:52:25.383350 268744 Options.recycle_log_file_num: 0 +2023/07/23-15:52:25.383350 268744 Options.allow_fallocate: 1 +2023/07/23-15:52:25.383351 268744 Options.allow_mmap_reads: 0 +2023/07/23-15:52:25.383351 268744 Options.allow_mmap_writes: 0 +2023/07/23-15:52:25.383352 268744 Options.use_direct_reads: 0 +2023/07/23-15:52:25.383352 268744 Options.use_direct_io_for_flush_and_compaction: 0 +2023/07/23-15:52:25.383353 268744 Options.create_missing_column_families: 0 +2023/07/23-15:52:25.383353 268744 Options.db_log_dir: +2023/07/23-15:52:25.383354 268744 Options.wal_dir: +2023/07/23-15:52:25.383354 268744 Options.table_cache_numshardbits: 6 +2023/07/23-15:52:25.383355 268744 Options.WAL_ttl_seconds: 0 +2023/07/23-15:52:25.383355 268744 Options.WAL_size_limit_MB: 0 +2023/07/23-15:52:25.383356 268744 Options.max_write_batch_group_size_bytes: 1048576 +2023/07/23-15:52:25.383356 268744 Options.manifest_preallocation_size: 4194304 +2023/07/23-15:52:25.383357 268744 Options.is_fd_close_on_exec: 1 +2023/07/23-15:52:25.383357 268744 Options.advise_random_on_open: 1 +2023/07/23-15:52:25.383358 268744 Options.db_write_buffer_size: 0 +2023/07/23-15:52:25.383358 268744 Options.write_buffer_manager: 0x55b1e73f7140 +2023/07/23-15:52:25.383359 268744 Options.access_hint_on_compaction_start: 1 +2023/07/23-15:52:25.383359 268744 Options.random_access_max_buffer_size: 1048576 +2023/07/23-15:52:25.383359 268744 Options.use_adaptive_mutex: 0 +2023/07/23-15:52:25.383360 268744 Options.rate_limiter: (nil) +2023/07/23-15:52:25.383361 268744 Options.sst_file_manager.rate_bytes_per_sec: 0 +2023/07/23-15:52:25.383362 268744 Options.wal_recovery_mode: 2 +2023/07/23-15:52:25.383363 268744 Options.enable_thread_tracking: 0 +2023/07/23-15:52:25.383364 268744 Options.enable_pipelined_write: 0 +2023/07/23-15:52:25.383364 268744 Options.unordered_write: 0 +2023/07/23-15:52:25.383364 268744 Options.allow_concurrent_memtable_write: 1 +2023/07/23-15:52:25.383365 268744 Options.enable_write_thread_adaptive_yield: 1 +2023/07/23-15:52:25.383365 268744 Options.write_thread_max_yield_usec: 100 +2023/07/23-15:52:25.383366 268744 Options.write_thread_slow_yield_usec: 3 +2023/07/23-15:52:25.383366 268744 Options.row_cache: None +2023/07/23-15:52:25.383367 268744 Options.wal_filter: None +2023/07/23-15:52:25.383367 268744 Options.avoid_flush_during_recovery: 0 +2023/07/23-15:52:25.383368 268744 Options.allow_ingest_behind: 0 +2023/07/23-15:52:25.383368 268744 Options.two_write_queues: 0 +2023/07/23-15:52:25.383369 268744 Options.manual_wal_flush: 0 +2023/07/23-15:52:25.383369 268744 Options.wal_compression: 0 +2023/07/23-15:52:25.383370 268744 Options.atomic_flush: 0 +2023/07/23-15:52:25.383370 268744 Options.avoid_unnecessary_blocking_io: 0 +2023/07/23-15:52:25.383371 268744 Options.persist_stats_to_disk: 0 +2023/07/23-15:52:25.383371 268744 Options.write_dbid_to_manifest: 0 +2023/07/23-15:52:25.383372 268744 Options.log_readahead_size: 0 +2023/07/23-15:52:25.383372 268744 Options.file_checksum_gen_factory: Unknown +2023/07/23-15:52:25.383373 268744 Options.best_efforts_recovery: 0 +2023/07/23-15:52:25.383373 268744 Options.max_bgerror_resume_count: 2147483647 +2023/07/23-15:52:25.383374 268744 Options.bgerror_resume_retry_interval: 1000000 +2023/07/23-15:52:25.383374 268744 Options.allow_data_in_errors: 0 +2023/07/23-15:52:25.383375 268744 Options.db_host_id: __hostname__ +2023/07/23-15:52:25.383375 268744 Options.enforce_single_del_contracts: true +2023/07/23-15:52:25.383376 268744 Options.max_background_jobs: 2 +2023/07/23-15:52:25.383376 268744 Options.max_background_compactions: -1 +2023/07/23-15:52:25.383377 268744 Options.max_subcompactions: 1 +2023/07/23-15:52:25.383377 268744 Options.avoid_flush_during_shutdown: 0 +2023/07/23-15:52:25.383378 268744 Options.writable_file_max_buffer_size: 1048576 +2023/07/23-15:52:25.383378 268744 Options.delayed_write_rate : 16777216 +2023/07/23-15:52:25.383379 268744 Options.max_total_wal_size: 0 +2023/07/23-15:52:25.383379 268744 Options.delete_obsolete_files_period_micros: 21600000000 +2023/07/23-15:52:25.383380 268744 Options.stats_dump_period_sec: 600 +2023/07/23-15:52:25.383380 268744 Options.stats_persist_period_sec: 600 +2023/07/23-15:52:25.383380 268744 Options.stats_history_buffer_size: 1048576 +2023/07/23-15:52:25.383381 268744 Options.max_open_files: -1 +2023/07/23-15:52:25.383381 268744 Options.bytes_per_sync: 0 +2023/07/23-15:52:25.383382 268744 Options.wal_bytes_per_sync: 0 +2023/07/23-15:52:25.383383 268744 Options.strict_bytes_per_sync: 0 +2023/07/23-15:52:25.383383 268744 Options.compaction_readahead_size: 0 +2023/07/23-15:52:25.383383 268744 Options.max_background_flushes: -1 +2023/07/23-15:52:25.383384 268744 Compression algorithms supported: +2023/07/23-15:52:25.383387 268744 kZSTD supported: 1 +2023/07/23-15:52:25.383388 268744 kXpressCompression supported: 0 +2023/07/23-15:52:25.383388 268744 kBZip2Compression supported: 1 +2023/07/23-15:52:25.383389 268744 kZSTDNotFinalCompression supported: 1 +2023/07/23-15:52:25.383389 268744 kLZ4Compression supported: 1 +2023/07/23-15:52:25.383400 268744 kZlibCompression supported: 1 +2023/07/23-15:52:25.383400 268744 kLZ4HCCompression supported: 1 +2023/07/23-15:52:25.383401 268744 kSnappyCompression supported: 1 +2023/07/23-15:52:25.383402 268744 Fast CRC32 supported: Not supported on x86 +2023/07/23-15:52:25.383403 268744 DMutex implementation: pthread_mutex_t +2023/07/23-15:52:25.383426 268744 [WARN] [db/db_impl/db_impl_open.cc:2075] DB::Open() failed: IO error: While lock file: path/for/rocksdb/LOCK: Resource temporarily unavailable +2023/07/23-15:52:25.383435 268744 [db/db_impl/db_impl.cc:490] Shutdown: canceling all background work +2023/07/23-15:52:25.383454 268744 [db/db_impl/db_impl.cc:692] Shutdown complete diff --git a/smirk/path/for/rocksdb/LOG.old.1690123945383009 b/smirk/path/for/rocksdb/LOG.old.1690123945383009 new file mode 100644 index 00000000..f26a8cef --- /dev/null +++ b/smirk/path/for/rocksdb/LOG.old.1690123945383009 @@ -0,0 +1,333 @@ +2023/07/23-15:52:25.342449 268726 RocksDB version: 8.1.1 +2023/07/23-15:52:25.342586 268726 Compile date 2023-04-06 16:38:52 +2023/07/23-15:52:25.342591 268726 DB SUMMARY +2023/07/23-15:52:25.342593 268726 DB Session ID: TGDHPLXGV6GEJBVANPGW +2023/07/23-15:52:25.342609 268726 SST files in path/for/rocksdb dir, Total Num: 0, files: +2023/07/23-15:52:25.342611 268726 Write Ahead Log file in path/for/rocksdb: +2023/07/23-15:52:25.342613 268726 Options.error_if_exists: 0 +2023/07/23-15:52:25.342614 268726 Options.create_if_missing: 1 +2023/07/23-15:52:25.342614 268726 Options.paranoid_checks: 1 +2023/07/23-15:52:25.342615 268726 Options.flush_verify_memtable_count: 1 +2023/07/23-15:52:25.342616 268726 Options.track_and_verify_wals_in_manifest: 0 +2023/07/23-15:52:25.342617 268726 Options.verify_sst_unique_id_in_manifest: 1 +2023/07/23-15:52:25.342618 268726 Options.env: 0x55cd828ffa88 +2023/07/23-15:52:25.342619 268726 Options.fs: PosixFileSystem +2023/07/23-15:52:25.342620 268726 Options.info_log: 0x55cd839a3ff0 +2023/07/23-15:52:25.342620 268726 Options.max_file_opening_threads: 16 +2023/07/23-15:52:25.342621 268726 Options.statistics: (nil) +2023/07/23-15:52:25.342622 268726 Options.use_fsync: 0 +2023/07/23-15:52:25.342623 268726 Options.max_log_file_size: 0 +2023/07/23-15:52:25.342624 268726 Options.max_manifest_file_size: 1073741824 +2023/07/23-15:52:25.342632 268726 Options.log_file_time_to_roll: 0 +2023/07/23-15:52:25.342633 268726 Options.keep_log_file_num: 1000 +2023/07/23-15:52:25.342633 268726 Options.recycle_log_file_num: 0 +2023/07/23-15:52:25.342634 268726 Options.allow_fallocate: 1 +2023/07/23-15:52:25.342635 268726 Options.allow_mmap_reads: 0 +2023/07/23-15:52:25.342636 268726 Options.allow_mmap_writes: 0 +2023/07/23-15:52:25.342637 268726 Options.use_direct_reads: 0 +2023/07/23-15:52:25.342637 268726 Options.use_direct_io_for_flush_and_compaction: 0 +2023/07/23-15:52:25.342638 268726 Options.create_missing_column_families: 0 +2023/07/23-15:52:25.342639 268726 Options.db_log_dir: +2023/07/23-15:52:25.342640 268726 Options.wal_dir: +2023/07/23-15:52:25.342640 268726 Options.table_cache_numshardbits: 6 +2023/07/23-15:52:25.342641 268726 Options.WAL_ttl_seconds: 0 +2023/07/23-15:52:25.342642 268726 Options.WAL_size_limit_MB: 0 +2023/07/23-15:52:25.342643 268726 Options.max_write_batch_group_size_bytes: 1048576 +2023/07/23-15:52:25.342643 268726 Options.manifest_preallocation_size: 4194304 +2023/07/23-15:52:25.342644 268726 Options.is_fd_close_on_exec: 1 +2023/07/23-15:52:25.342645 268726 Options.advise_random_on_open: 1 +2023/07/23-15:52:25.342646 268726 Options.db_write_buffer_size: 0 +2023/07/23-15:52:25.342646 268726 Options.write_buffer_manager: 0x55cd839a4140 +2023/07/23-15:52:25.342647 268726 Options.access_hint_on_compaction_start: 1 +2023/07/23-15:52:25.342648 268726 Options.random_access_max_buffer_size: 1048576 +2023/07/23-15:52:25.342649 268726 Options.use_adaptive_mutex: 0 +2023/07/23-15:52:25.342649 268726 Options.rate_limiter: (nil) +2023/07/23-15:52:25.342650 268726 Options.sst_file_manager.rate_bytes_per_sec: 0 +2023/07/23-15:52:25.342651 268726 Options.wal_recovery_mode: 2 +2023/07/23-15:52:25.342652 268726 Options.enable_thread_tracking: 0 +2023/07/23-15:52:25.342653 268726 Options.enable_pipelined_write: 0 +2023/07/23-15:52:25.342654 268726 Options.unordered_write: 0 +2023/07/23-15:52:25.342656 268726 Options.allow_concurrent_memtable_write: 1 +2023/07/23-15:52:25.342657 268726 Options.enable_write_thread_adaptive_yield: 1 +2023/07/23-15:52:25.342657 268726 Options.write_thread_max_yield_usec: 100 +2023/07/23-15:52:25.342658 268726 Options.write_thread_slow_yield_usec: 3 +2023/07/23-15:52:25.342659 268726 Options.row_cache: None +2023/07/23-15:52:25.342660 268726 Options.wal_filter: None +2023/07/23-15:52:25.342661 268726 Options.avoid_flush_during_recovery: 0 +2023/07/23-15:52:25.342661 268726 Options.allow_ingest_behind: 0 +2023/07/23-15:52:25.342662 268726 Options.two_write_queues: 0 +2023/07/23-15:52:25.342663 268726 Options.manual_wal_flush: 0 +2023/07/23-15:52:25.342664 268726 Options.wal_compression: 0 +2023/07/23-15:52:25.342664 268726 Options.atomic_flush: 0 +2023/07/23-15:52:25.342665 268726 Options.avoid_unnecessary_blocking_io: 0 +2023/07/23-15:52:25.342666 268726 Options.persist_stats_to_disk: 0 +2023/07/23-15:52:25.342667 268726 Options.write_dbid_to_manifest: 0 +2023/07/23-15:52:25.342667 268726 Options.log_readahead_size: 0 +2023/07/23-15:52:25.342668 268726 Options.file_checksum_gen_factory: Unknown +2023/07/23-15:52:25.342669 268726 Options.best_efforts_recovery: 0 +2023/07/23-15:52:25.342669 268726 Options.max_bgerror_resume_count: 2147483647 +2023/07/23-15:52:25.342670 268726 Options.bgerror_resume_retry_interval: 1000000 +2023/07/23-15:52:25.342671 268726 Options.allow_data_in_errors: 0 +2023/07/23-15:52:25.342672 268726 Options.db_host_id: __hostname__ +2023/07/23-15:52:25.342673 268726 Options.enforce_single_del_contracts: true +2023/07/23-15:52:25.342673 268726 Options.max_background_jobs: 2 +2023/07/23-15:52:25.342674 268726 Options.max_background_compactions: -1 +2023/07/23-15:52:25.342675 268726 Options.max_subcompactions: 1 +2023/07/23-15:52:25.342676 268726 Options.avoid_flush_during_shutdown: 0 +2023/07/23-15:52:25.342677 268726 Options.writable_file_max_buffer_size: 1048576 +2023/07/23-15:52:25.342677 268726 Options.delayed_write_rate : 16777216 +2023/07/23-15:52:25.342678 268726 Options.max_total_wal_size: 0 +2023/07/23-15:52:25.342679 268726 Options.delete_obsolete_files_period_micros: 21600000000 +2023/07/23-15:52:25.342680 268726 Options.stats_dump_period_sec: 600 +2023/07/23-15:52:25.342681 268726 Options.stats_persist_period_sec: 600 +2023/07/23-15:52:25.342681 268726 Options.stats_history_buffer_size: 1048576 +2023/07/23-15:52:25.342682 268726 Options.max_open_files: -1 +2023/07/23-15:52:25.342683 268726 Options.bytes_per_sync: 0 +2023/07/23-15:52:25.342684 268726 Options.wal_bytes_per_sync: 0 +2023/07/23-15:52:25.342684 268726 Options.strict_bytes_per_sync: 0 +2023/07/23-15:52:25.342685 268726 Options.compaction_readahead_size: 0 +2023/07/23-15:52:25.342686 268726 Options.max_background_flushes: -1 +2023/07/23-15:52:25.342687 268726 Compression algorithms supported: +2023/07/23-15:52:25.342691 268726 kZSTD supported: 1 +2023/07/23-15:52:25.342691 268726 kXpressCompression supported: 0 +2023/07/23-15:52:25.342692 268726 kBZip2Compression supported: 1 +2023/07/23-15:52:25.342693 268726 kZSTDNotFinalCompression supported: 1 +2023/07/23-15:52:25.342694 268726 kLZ4Compression supported: 1 +2023/07/23-15:52:25.342695 268726 kZlibCompression supported: 1 +2023/07/23-15:52:25.342696 268726 kLZ4HCCompression supported: 1 +2023/07/23-15:52:25.342697 268726 kSnappyCompression supported: 1 +2023/07/23-15:52:25.342699 268726 Fast CRC32 supported: Not supported on x86 +2023/07/23-15:52:25.342700 268726 DMutex implementation: pthread_mutex_t +2023/07/23-15:52:25.352744 268726 [db/db_impl/db_impl_open.cc:315] Creating manifest 1 +2023/07/23-15:52:25.364937 268726 [db/version_set.cc:5662] Recovering from manifest file: path/for/rocksdb/MANIFEST-000001 +2023/07/23-15:52:25.365079 268726 [db/column_family.cc:621] --------------- Options for column family [default]: +2023/07/23-15:52:25.365082 268726 Options.comparator: leveldb.BytewiseComparator +2023/07/23-15:52:25.365084 268726 Options.merge_operator: None +2023/07/23-15:52:25.365084 268726 Options.compaction_filter: None +2023/07/23-15:52:25.365085 268726 Options.compaction_filter_factory: None +2023/07/23-15:52:25.365086 268726 Options.sst_partitioner_factory: None +2023/07/23-15:52:25.365087 268726 Options.memtable_factory: SkipListFactory +2023/07/23-15:52:25.365088 268726 Options.table_factory: BlockBasedTable +2023/07/23-15:52:25.365117 268726 table_factory options: flush_block_policy_factory: FlushBlockBySizePolicyFactory (0x55cd83999930) + cache_index_and_filter_blocks: 0 + cache_index_and_filter_blocks_with_high_priority: 1 + pin_l0_filter_and_index_blocks_in_cache: 0 + pin_top_level_index_and_filter: 1 + index_type: 0 + data_block_index_type: 0 + index_shortening: 1 + data_block_hash_table_util_ratio: 0.750000 + checksum: 4 + no_block_cache: 0 + block_cache: 0x55cd8399dff0 + block_cache_name: LRUCache + block_cache_options: + capacity : 8388608 + num_shard_bits : 4 + strict_capacity_limit : 0 + memory_allocator : None + high_pri_pool_ratio: 0.000 + low_pri_pool_ratio: 0.000 + persistent_cache: (nil) + block_size: 4096 + block_size_deviation: 10 + block_restart_interval: 16 + index_block_restart_interval: 1 + metadata_block_size: 4096 + partition_filters: 0 + use_delta_encoding: 1 + filter_policy: nullptr + whole_key_filtering: 1 + verify_compression: 0 + read_amp_bytes_per_bit: 0 + format_version: 5 + enable_index_compression: 1 + block_align: 0 + max_auto_readahead_size: 262144 + prepopulate_block_cache: 0 + initial_auto_readahead_size: 8192 + num_file_reads_for_auto_readahead: 2 +2023/07/23-15:52:25.365133 268726 Options.write_buffer_size: 67108864 +2023/07/23-15:52:25.365134 268726 Options.max_write_buffer_number: 2 +2023/07/23-15:52:25.365139 268726 Options.compression: Snappy +2023/07/23-15:52:25.365140 268726 Options.bottommost_compression: Disabled +2023/07/23-15:52:25.365141 268726 Options.prefix_extractor: nullptr +2023/07/23-15:52:25.365142 268726 Options.memtable_insert_with_hint_prefix_extractor: nullptr +2023/07/23-15:52:25.365143 268726 Options.num_levels: 7 +2023/07/23-15:52:25.365143 268726 Options.min_write_buffer_number_to_merge: 1 +2023/07/23-15:52:25.365144 268726 Options.max_write_buffer_number_to_maintain: 0 +2023/07/23-15:52:25.365145 268726 Options.max_write_buffer_size_to_maintain: 134217728 +2023/07/23-15:52:25.365146 268726 Options.bottommost_compression_opts.window_bits: -14 +2023/07/23-15:52:25.365147 268726 Options.bottommost_compression_opts.level: 32767 +2023/07/23-15:52:25.365148 268726 Options.bottommost_compression_opts.strategy: 0 +2023/07/23-15:52:25.365148 268726 Options.bottommost_compression_opts.max_dict_bytes: 0 +2023/07/23-15:52:25.365149 268726 Options.bottommost_compression_opts.zstd_max_train_bytes: 0 +2023/07/23-15:52:25.365150 268726 Options.bottommost_compression_opts.parallel_threads: 1 +2023/07/23-15:52:25.365151 268726 Options.bottommost_compression_opts.enabled: false +2023/07/23-15:52:25.365152 268726 Options.bottommost_compression_opts.max_dict_buffer_bytes: 0 +2023/07/23-15:52:25.365153 268726 Options.bottommost_compression_opts.use_zstd_dict_trainer: true +2023/07/23-15:52:25.365154 268726 Options.compression_opts.window_bits: -14 +2023/07/23-15:52:25.365154 268726 Options.compression_opts.level: 32767 +2023/07/23-15:52:25.365155 268726 Options.compression_opts.strategy: 0 +2023/07/23-15:52:25.365158 268726 Options.compression_opts.max_dict_bytes: 0 +2023/07/23-15:52:25.365159 268726 Options.compression_opts.zstd_max_train_bytes: 0 +2023/07/23-15:52:25.365160 268726 Options.compression_opts.use_zstd_dict_trainer: true +2023/07/23-15:52:25.365161 268726 Options.compression_opts.parallel_threads: 1 +2023/07/23-15:52:25.365162 268726 Options.compression_opts.enabled: false +2023/07/23-15:52:25.365163 268726 Options.compression_opts.max_dict_buffer_bytes: 0 +2023/07/23-15:52:25.365163 268726 Options.level0_file_num_compaction_trigger: 4 +2023/07/23-15:52:25.365164 268726 Options.level0_slowdown_writes_trigger: 20 +2023/07/23-15:52:25.365165 268726 Options.level0_stop_writes_trigger: 36 +2023/07/23-15:52:25.365166 268726 Options.target_file_size_base: 67108864 +2023/07/23-15:52:25.365167 268726 Options.target_file_size_multiplier: 1 +2023/07/23-15:52:25.365168 268726 Options.max_bytes_for_level_base: 268435456 +2023/07/23-15:52:25.365168 268726 Options.level_compaction_dynamic_level_bytes: 0 +2023/07/23-15:52:25.365169 268726 Options.max_bytes_for_level_multiplier: 10.000000 +2023/07/23-15:52:25.365171 268726 Options.max_bytes_for_level_multiplier_addtl[0]: 1 +2023/07/23-15:52:25.365172 268726 Options.max_bytes_for_level_multiplier_addtl[1]: 1 +2023/07/23-15:52:25.365173 268726 Options.max_bytes_for_level_multiplier_addtl[2]: 1 +2023/07/23-15:52:25.365174 268726 Options.max_bytes_for_level_multiplier_addtl[3]: 1 +2023/07/23-15:52:25.365175 268726 Options.max_bytes_for_level_multiplier_addtl[4]: 1 +2023/07/23-15:52:25.365175 268726 Options.max_bytes_for_level_multiplier_addtl[5]: 1 +2023/07/23-15:52:25.365176 268726 Options.max_bytes_for_level_multiplier_addtl[6]: 1 +2023/07/23-15:52:25.365177 268726 Options.max_sequential_skip_in_iterations: 8 +2023/07/23-15:52:25.365178 268726 Options.max_compaction_bytes: 1677721600 +2023/07/23-15:52:25.365179 268726 Options.ignore_max_compaction_bytes_for_input: true +2023/07/23-15:52:25.365180 268726 Options.arena_block_size: 1048576 +2023/07/23-15:52:25.365180 268726 Options.soft_pending_compaction_bytes_limit: 68719476736 +2023/07/23-15:52:25.365181 268726 Options.hard_pending_compaction_bytes_limit: 274877906944 +2023/07/23-15:52:25.365182 268726 Options.disable_auto_compactions: 1 +2023/07/23-15:52:25.365184 268726 Options.compaction_style: kCompactionStyleLevel +2023/07/23-15:52:25.365185 268726 Options.compaction_pri: kMinOverlappingRatio +2023/07/23-15:52:25.365186 268726 Options.compaction_options_universal.size_ratio: 1 +2023/07/23-15:52:25.365187 268726 Options.compaction_options_universal.min_merge_width: 2 +2023/07/23-15:52:25.365188 268726 Options.compaction_options_universal.max_merge_width: 4294967295 +2023/07/23-15:52:25.365189 268726 Options.compaction_options_universal.max_size_amplification_percent: 200 +2023/07/23-15:52:25.365190 268726 Options.compaction_options_universal.compression_size_percent: -1 +2023/07/23-15:52:25.365191 268726 Options.compaction_options_universal.stop_style: kCompactionStopStyleTotalSize +2023/07/23-15:52:25.365191 268726 Options.compaction_options_fifo.max_table_files_size: 1073741824 +2023/07/23-15:52:25.365192 268726 Options.compaction_options_fifo.allow_compaction: 0 +2023/07/23-15:52:25.365200 268726 Options.table_properties_collectors: +2023/07/23-15:52:25.365201 268726 Options.inplace_update_support: 0 +2023/07/23-15:52:25.365202 268726 Options.inplace_update_num_locks: 10000 +2023/07/23-15:52:25.365203 268726 Options.memtable_prefix_bloom_size_ratio: 0.000000 +2023/07/23-15:52:25.365204 268726 Options.memtable_whole_key_filtering: 0 +2023/07/23-15:52:25.365205 268726 Options.memtable_huge_page_size: 0 +2023/07/23-15:52:25.365206 268726 Options.bloom_locality: 0 +2023/07/23-15:52:25.365209 268726 Options.max_successive_merges: 0 +2023/07/23-15:52:25.365210 268726 Options.optimize_filters_for_hits: 0 +2023/07/23-15:52:25.365210 268726 Options.paranoid_file_checks: 0 +2023/07/23-15:52:25.365212 268726 Options.force_consistency_checks: 1 +2023/07/23-15:52:25.365212 268726 Options.report_bg_io_stats: 0 +2023/07/23-15:52:25.365213 268726 Options.ttl: 2592000 +2023/07/23-15:52:25.365214 268726 Options.periodic_compaction_seconds: 0 +2023/07/23-15:52:25.365215 268726 Options.preclude_last_level_data_seconds: 0 +2023/07/23-15:52:25.365216 268726 Options.preserve_internal_time_seconds: 0 +2023/07/23-15:52:25.365216 268726 Options.enable_blob_files: false +2023/07/23-15:52:25.365217 268726 Options.min_blob_size: 0 +2023/07/23-15:52:25.365218 268726 Options.blob_file_size: 268435456 +2023/07/23-15:52:25.365219 268726 Options.blob_compression_type: NoCompression +2023/07/23-15:52:25.365220 268726 Options.enable_blob_garbage_collection: false +2023/07/23-15:52:25.365221 268726 Options.blob_garbage_collection_age_cutoff: 0.250000 +2023/07/23-15:52:25.365222 268726 Options.blob_garbage_collection_force_threshold: 1.000000 +2023/07/23-15:52:25.365223 268726 Options.blob_compaction_readahead_size: 0 +2023/07/23-15:52:25.365224 268726 Options.blob_file_starting_level: 0 +2023/07/23-15:52:25.365224 268726 Options.experimental_mempurge_threshold: 0.000000 +2023/07/23-15:52:25.366096 268726 [db/version_set.cc:5713] Recovered from manifest file:path/for/rocksdb/MANIFEST-000001 succeeded,manifest_file_number is 1, next_file_number is 3, last_sequence is 0, log_number is 0,prev_log_number is 0,max_column_family is 0,min_log_number_to_keep is 0 +2023/07/23-15:52:25.366102 268726 [db/version_set.cc:5722] Column family [default] (ID 0), log number is 0 +2023/07/23-15:52:25.366138 268726 [db/db_impl/db_impl_open.cc:537] DB ID: fda2968c-dc40-46ae-93c7-6c66e47096c0 +2023/07/23-15:52:25.366245 268726 [db/version_set.cc:5180] Creating manifest 5 +2023/07/23-15:52:25.388349 268726 [db/db_impl/db_impl_open.cc:1977] SstFileManager instance 0x55cd839a1fa0 +2023/07/23-15:52:25.388375 268726 DB pointer 0x55cd839a4680 +2023/07/23-15:52:25.388438 268726 [WARN] [utilities/transactions/pessimistic_transaction_db.cc:262] Transaction write_policy is 0 +2023/07/23-15:52:25.388575 268747 [db/db_impl/db_impl.cc:1085] ------- DUMPING STATS ------- +2023/07/23-15:52:25.388584 268747 [db/db_impl/db_impl.cc:1086] +** DB Stats ** +Uptime(secs): 0.0 total, 0.0 interval +Cumulative writes: 0 writes, 0 keys, 0 commit groups, 0.0 writes per commit group, ingest: 0.00 GB, 0.00 MB/s +Cumulative WAL: 0 writes, 0 syncs, 0.00 writes per sync, written: 0.00 GB, 0.00 MB/s +Cumulative stall: 00:00:0.000 H:M:S, 0.0 percent +Interval writes: 0 writes, 0 keys, 0 commit groups, 0.0 writes per commit group, ingest: 0.00 MB, 0.00 MB/s +Interval WAL: 0 writes, 0 syncs, 0.00 writes per sync, written: 0.00 GB, 0.00 MB/s +Interval stall: 00:00:0.000 H:M:S, 0.0 percent +Write Stall (count): write-buffer-manager-limit-stops: 0, +** Compaction Stats [default] ** +Level Files Size Score Read(GB) Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) CompMergeCPU(sec) Comp(cnt) Avg(sec) KeyIn KeyDrop Rblob(GB) Wblob(GB) +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Sum 0/0 0.00 KB 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.00 0.00 0 0.000 0 0 0.0 0.0 + Int 0/0 0.00 KB 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.00 0.00 0 0.000 0 0 0.0 0.0 + +** Compaction Stats [default] ** +Priority Files Size Score Read(GB) Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) CompMergeCPU(sec) Comp(cnt) Avg(sec) KeyIn KeyDrop Rblob(GB) Wblob(GB) +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +Blob file count: 0, total size: 0.0 GB, garbage size: 0.0 GB, space amp: 0.0 + +Uptime(secs): 0.0 total, 0.0 interval +Flush(GB): cumulative 0.000, interval 0.000 +AddFile(GB): cumulative 0.000, interval 0.000 +AddFile(Total Files): cumulative 0, interval 0 +AddFile(L0 Files): cumulative 0, interval 0 +AddFile(Keys): cumulative 0, interval 0 +Cumulative compaction: 0.00 GB write, 0.00 MB/s write, 0.00 GB read, 0.00 MB/s read, 0.0 seconds +Interval compaction: 0.00 GB write, 0.00 MB/s write, 0.00 GB read, 0.00 MB/s read, 0.0 seconds +Write Stall (count): cf-l0-file-count-limit-delays-with-ongoing-compaction: 0, cf-l0-file-count-limit-stops-with-ongoing-compaction: 0, l0-file-count-limit-delays: 0, l0-file-count-limit-stops: 0, memtable-limit-delays: 0, memtable-limit-stops: 0, pending-compaction-bytes-delays: 0, pending-compaction-bytes-stops: 0, total-delays: 0, total-stops: 0, Block cache LRUCache@0x55cd8399dff0#268726 capacity: 8.00 MB usage: 0.08 KB table_size: 256 occupancy: 87 collections: 1 last_copies: 0 last_secs: 2.9e-05 secs_since: 0 +Block cache entry stats(count,size,portion): Misc(1,0.00 KB,0%) + +** File Read Latency Histogram By Level [default] ** +2023/07/23-15:52:25.401583 268726 [db/db_impl/db_impl.cc:1187] SetOptions() on column family [default], inputs: +2023/07/23-15:52:25.401592 268726 [db/db_impl/db_impl.cc:1190] disable_auto_compactions: false +2023/07/23-15:52:25.401593 268726 [db/db_impl/db_impl.cc:1194] [default] SetOptions() succeeded +2023/07/23-15:52:25.401594 268726 [options/cf_options.cc:1004] write_buffer_size: 67108864 +2023/07/23-15:52:25.401595 268726 [options/cf_options.cc:1006] max_write_buffer_number: 2 +2023/07/23-15:52:25.401595 268726 [options/cf_options.cc:1009] arena_block_size: 1048576 +2023/07/23-15:52:25.401596 268726 [options/cf_options.cc:1011] memtable_prefix_bloom_ratio: 0.000000 +2023/07/23-15:52:25.401599 268726 [options/cf_options.cc:1013] memtable_whole_key_filtering: 0 +2023/07/23-15:52:25.401599 268726 [options/cf_options.cc:1016] memtable_huge_page_size: 0 +2023/07/23-15:52:25.401600 268726 [options/cf_options.cc:1019] max_successive_merges: 0 +2023/07/23-15:52:25.401600 268726 [options/cf_options.cc:1022] inplace_update_num_locks: 10000 +2023/07/23-15:52:25.401601 268726 [options/cf_options.cc:1026] prefix_extractor: nullptr +2023/07/23-15:52:25.401602 268726 [options/cf_options.cc:1028] disable_auto_compactions: 0 +2023/07/23-15:52:25.401602 268726 [options/cf_options.cc:1030] soft_pending_compaction_bytes_limit: 68719476736 +2023/07/23-15:52:25.401603 268726 [options/cf_options.cc:1032] hard_pending_compaction_bytes_limit: 274877906944 +2023/07/23-15:52:25.401603 268726 [options/cf_options.cc:1034] level0_file_num_compaction_trigger: 4 +2023/07/23-15:52:25.401603 268726 [options/cf_options.cc:1036] level0_slowdown_writes_trigger: 20 +2023/07/23-15:52:25.401604 268726 [options/cf_options.cc:1038] level0_stop_writes_trigger: 36 +2023/07/23-15:52:25.401605 268726 [options/cf_options.cc:1040] max_compaction_bytes: 1677721600 +2023/07/23-15:52:25.401605 268726 [options/cf_options.cc:1042] ignore_max_compaction_bytes_for_input: true +2023/07/23-15:52:25.401606 268726 [options/cf_options.cc:1044] target_file_size_base: 67108864 +2023/07/23-15:52:25.401606 268726 [options/cf_options.cc:1046] target_file_size_multiplier: 1 +2023/07/23-15:52:25.401607 268726 [options/cf_options.cc:1048] max_bytes_for_level_base: 268435456 +2023/07/23-15:52:25.401607 268726 [options/cf_options.cc:1050] max_bytes_for_level_multiplier: 10.000000 +2023/07/23-15:52:25.401609 268726 [options/cf_options.cc:1052] ttl: 2592000 +2023/07/23-15:52:25.401609 268726 [options/cf_options.cc:1054] periodic_compaction_seconds: 0 +2023/07/23-15:52:25.401611 268726 [options/cf_options.cc:1068] max_bytes_for_level_multiplier_additional: 1, 1, 1, 1, 1, 1, 1 +2023/07/23-15:52:25.401612 268726 [options/cf_options.cc:1070] max_sequential_skip_in_iterations: 8 +2023/07/23-15:52:25.401612 268726 [options/cf_options.cc:1072] check_flush_compaction_key_order: 1 +2023/07/23-15:52:25.401613 268726 [options/cf_options.cc:1074] paranoid_file_checks: 0 +2023/07/23-15:52:25.401613 268726 [options/cf_options.cc:1076] report_bg_io_stats: 0 +2023/07/23-15:52:25.401614 268726 [options/cf_options.cc:1078] compression: 1 +2023/07/23-15:52:25.401614 268726 [options/cf_options.cc:1081] experimental_mempurge_threshold: 0.000000 +2023/07/23-15:52:25.401615 268726 [options/cf_options.cc:1085] compaction_options_universal.size_ratio : 1 +2023/07/23-15:52:25.401615 268726 [options/cf_options.cc:1087] compaction_options_universal.min_merge_width : 2 +2023/07/23-15:52:25.401616 268726 [options/cf_options.cc:1089] compaction_options_universal.max_merge_width : -1 +2023/07/23-15:52:25.401616 268726 [options/cf_options.cc:1092] compaction_options_universal.max_size_amplification_percent : 200 +2023/07/23-15:52:25.401617 268726 [options/cf_options.cc:1095] compaction_options_universal.compression_size_percent : -1 +2023/07/23-15:52:25.401617 268726 [options/cf_options.cc:1097] compaction_options_universal.stop_style : 1 +2023/07/23-15:52:25.401618 268726 [options/cf_options.cc:1100] compaction_options_universal.allow_trivial_move : 0 +2023/07/23-15:52:25.401618 268726 [options/cf_options.cc:1102] compaction_options_universal.incremental : 0 +2023/07/23-15:52:25.401619 268726 [options/cf_options.cc:1106] compaction_options_fifo.max_table_files_size : 1073741824 +2023/07/23-15:52:25.401619 268726 [options/cf_options.cc:1108] compaction_options_fifo.allow_compaction : 0 +2023/07/23-15:52:25.401620 268726 [options/cf_options.cc:1112] enable_blob_files: false +2023/07/23-15:52:25.401620 268726 [options/cf_options.cc:1114] min_blob_size: 0 +2023/07/23-15:52:25.401621 268726 [options/cf_options.cc:1116] blob_file_size: 268435456 +2023/07/23-15:52:25.401622 268726 [options/cf_options.cc:1118] blob_compression_type: NoCompression +2023/07/23-15:52:25.401622 268726 [options/cf_options.cc:1120] enable_blob_garbage_collection: false +2023/07/23-15:52:25.401623 268726 [options/cf_options.cc:1122] blob_garbage_collection_age_cutoff: 0.250000 +2023/07/23-15:52:25.401623 268726 [options/cf_options.cc:1124] blob_garbage_collection_force_threshold: 1.000000 +2023/07/23-15:52:25.401624 268726 [options/cf_options.cc:1126] blob_compaction_readahead_size: 0 +2023/07/23-15:52:25.401629 268726 [options/cf_options.cc:1128] blob_file_starting_level: 0 +2023/07/23-15:52:25.401630 268726 [options/cf_options.cc:1132] prepopulate_blob_cache: disable +2023/07/23-15:52:25.401631 268726 [options/cf_options.cc:1134] last_level_temperature: 0 +2023/07/23-15:52:25.402034 268726 [db/db_impl/db_impl.cc:490] Shutdown: canceling all background work +2023/07/23-15:52:25.402266 268726 [db/db_impl/db_impl.cc:692] Shutdown complete diff --git a/smirk/path/for/rocksdb/MANIFEST-000005 b/smirk/path/for/rocksdb/MANIFEST-000005 new file mode 100644 index 0000000000000000000000000000000000000000..d0fb94bd2934a563da72946c03fd8c5c7cf0f6f1 GIT binary patch literal 79 zcmZS8)^KKEU<~`Yer`cxQDRAc(HCZ(C>91r eCI%LUKRRkZ*%%l(8JO8v81~k5W String { diff --git a/smirk/src/hash/path.rs b/smirk/src/hash/path.rs index 12469e00..43703d14 100644 --- a/smirk/src/hash/path.rs +++ b/smirk/src/hash/path.rs @@ -14,9 +14,9 @@ use super::Digest; /// /// Luckily, [`MerkleTree`] *does* implement [`Arbitrary`] /// -/// [`Aritrary`]: proptest::prelude::Arbitrary +/// [`Arbitrary`]: proptest::prelude::Arbitrary /// [`MerkleTree`]: crate::MerkleTree -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct MerklePath { /// The intermediate stages between the root hash and the target node pub(crate) stages: Vec, @@ -62,9 +62,28 @@ impl MerklePath { /// /// - `this` is the hash of the key-value pair of the visited node in this stage /// - `left`/`right` is the root hash of the "other side" of the tree -#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub(crate) enum Stage { Left { this: Digest, right: Option }, Right { this: Digest, left: Option }, } +#[cfg(test)] +mod tests { + use test_strategy::proptest; + + use crate::MerkleTree; + + use super::*; + + #[proptest] + fn proof_serialization_round_trip(tree: MerkleTree) { + for node in tree.iter() { + let proof = tree.prove(node.key()).unwrap(); + let bytes = proof.to_bytes(); + let proof_again = MerklePath::from_bytes(&bytes).unwrap(); + + assert_eq!(proof, proof_again); + } + } +} diff --git a/smirk/src/storage/mod.rs b/smirk/src/storage/mod.rs index ef582b69..5b79adb6 100644 --- a/smirk/src/storage/mod.rs +++ b/smirk/src/storage/mod.rs @@ -91,7 +91,7 @@ impl Storage { K: Serialize + 'static + Ord, V: Serialize + 'static + Hashable, { - codec::write_tree_to_tx(&tx, tree) + codec::write_tree_to_tx(tx, tree) } /// Load a tree from storage, if it is present @@ -124,7 +124,7 @@ impl Storage { return Ok(Some(MerkleTree::new())); } - let node = codec::load_node(&tx, &key)?; + let node = codec::load_node(tx, &key)?; let tree = MerkleTree { inner: Some(Box::new(node)), }; diff --git a/smirk/src/tree/batch.rs b/smirk/src/tree/batch.rs index 44783383..3bac6c09 100644 --- a/smirk/src/tree/batch.rs +++ b/smirk/src/tree/batch.rs @@ -29,10 +29,12 @@ impl Batch { /// /// Note, if two operations reference the same key, they will be applied in the order they /// exist in `operations`. No other guarantees about the order of execution are made + #[must_use] pub fn from_operations(mut operations: Vec>) -> Self where K: Ord, { + // preserve order of operations, so don't use sort_unstable operations.sort_by(|a, b| a.key().cmp(b.key())); Self { operations } } diff --git a/smirk/src/tree/mod.rs b/smirk/src/tree/mod.rs index d5951d05..a09d97e6 100644 --- a/smirk/src/tree/mod.rs +++ b/smirk/src/tree/mod.rs @@ -21,7 +21,7 @@ pub use hash::key_value_hash; #[cfg(test)] mod tests; -/// A Merkle tree, with a customizable storage backend and hash function +/// A Merkle tree with a map-like API /// /// ```rust /// # use smirk::{MerkleTree, smirk}; @@ -35,7 +35,9 @@ mod tests; /// /// assert_eq!(tree.size(), 1); /// ``` -/// You can walk the tree in depth-first or breadth-first ordering: +/// You can use [`MerkleTree::iter`] to get an iterator over tuples of key-value pairs +/// +/// The order will be the order specified by the [`Ord`] implementation for the key type /// ```rust /// # use smirk::smirk; /// let tree = smirk! { @@ -44,14 +46,30 @@ mod tests; /// 3 => 345, /// }; /// -/// for (k, v) in tree.depth_first() { -/// println!("key: {k} - value: {v}"); -/// } +/// let pairs: Vec<(i32, i32)> = tree +/// .iter() +/// .map(|node| (*node.key(), *node.value())) +/// .collect(); +/// +/// assert_eq!(pairs, vec![ +/// (1, 123), +/// (2, 234), +/// (3, 345), +/// ]); +/// ``` +/// You can also go the other way via [`FromIterator`], just like you would for a [`HashMap`]: +/// ```rust +/// # use smirk::MerkleTree; +/// let pairs = vec![ +/// (1, 123), +/// (2, 234), +/// (3, 345), +/// ]; +/// let tree: MerkleTree<_, _> = pairs.into_iter().collect(); /// -/// for (k, v) in tree.breadth_first() { -/// println!("key: {k} - value: {v}"); -/// } +/// assert_eq!(tree.size(), 3); /// ``` +/// /// Broadly speaking, to do anything useful with a Merkle tree, the key type must implement /// [`Ord`] and [`Hashable`], and the value type must implement [`Hashable`] /// @@ -62,6 +80,9 @@ mod tests; /// If this happens, behaviour of the tree is unspecified, but not /// undefined. In other words, the usual soundness rules will be upheld, but any function performed /// on the tree itself may give incorrect results +/// +/// [`HashMap`]: std::collections::HashMap +/// #[derive(Debug, Clone)] pub struct MerkleTree { pub(crate) inner: Option>>, @@ -105,7 +126,7 @@ impl MerkleTree { /// /// Note: inserting a single value will potentially rebalance the tree, and also recompute hash /// values, which can be expensive. If you are inserting many items, consider using - /// [`MerkleTree::apply_batch`] + /// [`MerkleTree::apply`] pub fn insert(&mut self, key: K, value: V) where K: Hashable + Ord, From d3f3433507edeec4294d154e578e5d971422beee Mon Sep 17 00:00:00 2001 From: Cameron Date: Sun, 23 Jul 2023 21:53:14 +0100 Subject: [PATCH 09/15] initial benchmarks - not good :( --- Cargo.lock | 2 + smirk/Cargo.toml | 6 + smirk/benches/insert.rs | 33 ++ smirk/path/for/rocksdb/000004.log | Bin 70 -> 0 bytes smirk/path/for/rocksdb/CURRENT | 1 - smirk/path/for/rocksdb/IDENTITY | 1 - smirk/path/for/rocksdb/LOCK | 0 smirk/path/for/rocksdb/LOG | 107 ------ .../path/for/rocksdb/LOG.old.1690123945383009 | 333 ------------------ smirk/path/for/rocksdb/MANIFEST-000005 | Bin 79 -> 0 bytes smirk/path/for/rocksdb/OPTIONS-000007 | 198 ----------- smirk/path/for/rocksdb/OPTIONS-000009 | 198 ----------- smirk/src/lib.rs | 19 + 13 files changed, 60 insertions(+), 838 deletions(-) create mode 100644 smirk/benches/insert.rs delete mode 100644 smirk/path/for/rocksdb/000004.log delete mode 100644 smirk/path/for/rocksdb/CURRENT delete mode 100644 smirk/path/for/rocksdb/IDENTITY delete mode 100644 smirk/path/for/rocksdb/LOCK delete mode 100644 smirk/path/for/rocksdb/LOG delete mode 100644 smirk/path/for/rocksdb/LOG.old.1690123945383009 delete mode 100644 smirk/path/for/rocksdb/MANIFEST-000005 delete mode 100644 smirk/path/for/rocksdb/OPTIONS-000007 delete mode 100644 smirk/path/for/rocksdb/OPTIONS-000009 diff --git a/Cargo.lock b/Cargo.lock index ceed7fc8..842a9451 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5193,6 +5193,8 @@ dependencies = [ "miden-crypto 0.6.0", "pretty_assertions", "proptest", + "rand 0.8.5", + "rand_chacha 0.3.1", "rmp-serde", "rocksdb", "serde", diff --git a/smirk/Cargo.toml b/smirk/Cargo.toml index d7d5df0e..f3f9e736 100644 --- a/smirk/Cargo.toml +++ b/smirk/Cargo.toml @@ -22,3 +22,9 @@ test-strategy = "0.3" pretty_assertions = "1" insta = "1" criterion = "0.5" +rand = "0.8" +rand_chacha = "0.3" + +[[bench]] +name = "insert" +harness = false diff --git a/smirk/benches/insert.rs b/smirk/benches/insert.rs new file mode 100644 index 00000000..52b033c4 --- /dev/null +++ b/smirk/benches/insert.rs @@ -0,0 +1,33 @@ +use std::hint::black_box; + +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaChaRng; +use smirk::smirk; + +pub fn insert_benchmark(c: &mut Criterion) { + let mut rng = ChaChaRng::from_seed([0; 32]); + let mut nums = vec![0; 1000]; + rng.fill(nums.as_mut_slice()); + + c.bench_with_input( + BenchmarkId::new("insert", "1k random"), + &nums.as_slice(), + |bencher, nums| { + bencher.iter(|| { + let mut tree = smirk! {}; + for i in *nums { + tree.insert(i, i); + } + black_box(tree); + }); + }, + ); +} + +criterion_group! { + name = benches; + config = Criterion::default().sample_size(10); + targets = insert_benchmark +} +criterion_main!(benches); diff --git a/smirk/path/for/rocksdb/000004.log b/smirk/path/for/rocksdb/000004.log deleted file mode 100644 index be9399823de4fd05a1a31e5a4ec0246d80b921d5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 70 zcmX@6blXRpfsv5`0+@jmFC$A)etrof6C)!d>*R^m2M#bYGI36xbmq*NR}&czd|a07 N#LK|Q0@VSccmW^r4{ZPd diff --git a/smirk/path/for/rocksdb/CURRENT b/smirk/path/for/rocksdb/CURRENT deleted file mode 100644 index aa5bb8ea..00000000 --- a/smirk/path/for/rocksdb/CURRENT +++ /dev/null @@ -1 +0,0 @@ -MANIFEST-000005 diff --git a/smirk/path/for/rocksdb/IDENTITY b/smirk/path/for/rocksdb/IDENTITY deleted file mode 100644 index 0d4ffc44..00000000 --- a/smirk/path/for/rocksdb/IDENTITY +++ /dev/null @@ -1 +0,0 @@ -fda2968c-dc40-46ae-93c7-6c66e47096c0 \ No newline at end of file diff --git a/smirk/path/for/rocksdb/LOCK b/smirk/path/for/rocksdb/LOCK deleted file mode 100644 index e69de29b..00000000 diff --git a/smirk/path/for/rocksdb/LOG b/smirk/path/for/rocksdb/LOG deleted file mode 100644 index 4aca7f1c..00000000 --- a/smirk/path/for/rocksdb/LOG +++ /dev/null @@ -1,107 +0,0 @@ -2023/07/23-15:52:25.383264 268744 RocksDB version: 8.1.1 -2023/07/23-15:52:25.383315 268744 Compile date 2023-04-06 16:38:52 -2023/07/23-15:52:25.383318 268744 DB SUMMARY -2023/07/23-15:52:25.383320 268744 DB Session ID: IMT9NYJINBM3XOR7GJAU -2023/07/23-15:52:25.383333 268744 CURRENT file: CURRENT -2023/07/23-15:52:25.383334 268744 IDENTITY file: IDENTITY -2023/07/23-15:52:25.383336 268744 MANIFEST file: MANIFEST-000001 size: 13 Bytes -2023/07/23-15:52:25.383338 268744 MANIFEST file: MANIFEST-000005 size: 66 Bytes -2023/07/23-15:52:25.383339 268744 SST files in path/for/rocksdb dir, Total Num: 0, files: -2023/07/23-15:52:25.383340 268744 Write Ahead Log file in path/for/rocksdb: 000004.log size: 0 ; -2023/07/23-15:52:25.383341 268744 Options.error_if_exists: 0 -2023/07/23-15:52:25.383342 268744 Options.create_if_missing: 1 -2023/07/23-15:52:25.383342 268744 Options.paranoid_checks: 1 -2023/07/23-15:52:25.383343 268744 Options.flush_verify_memtable_count: 1 -2023/07/23-15:52:25.383343 268744 Options.track_and_verify_wals_in_manifest: 0 -2023/07/23-15:52:25.383344 268744 Options.verify_sst_unique_id_in_manifest: 1 -2023/07/23-15:52:25.383344 268744 Options.env: 0x55b1e66a3a88 -2023/07/23-15:52:25.383345 268744 Options.fs: PosixFileSystem -2023/07/23-15:52:25.383346 268744 Options.info_log: 0x55b1e73f6ff0 -2023/07/23-15:52:25.383346 268744 Options.max_file_opening_threads: 16 -2023/07/23-15:52:25.383347 268744 Options.statistics: (nil) -2023/07/23-15:52:25.383347 268744 Options.use_fsync: 0 -2023/07/23-15:52:25.383348 268744 Options.max_log_file_size: 0 -2023/07/23-15:52:25.383348 268744 Options.max_manifest_file_size: 1073741824 -2023/07/23-15:52:25.383349 268744 Options.log_file_time_to_roll: 0 -2023/07/23-15:52:25.383349 268744 Options.keep_log_file_num: 1000 -2023/07/23-15:52:25.383350 268744 Options.recycle_log_file_num: 0 -2023/07/23-15:52:25.383350 268744 Options.allow_fallocate: 1 -2023/07/23-15:52:25.383351 268744 Options.allow_mmap_reads: 0 -2023/07/23-15:52:25.383351 268744 Options.allow_mmap_writes: 0 -2023/07/23-15:52:25.383352 268744 Options.use_direct_reads: 0 -2023/07/23-15:52:25.383352 268744 Options.use_direct_io_for_flush_and_compaction: 0 -2023/07/23-15:52:25.383353 268744 Options.create_missing_column_families: 0 -2023/07/23-15:52:25.383353 268744 Options.db_log_dir: -2023/07/23-15:52:25.383354 268744 Options.wal_dir: -2023/07/23-15:52:25.383354 268744 Options.table_cache_numshardbits: 6 -2023/07/23-15:52:25.383355 268744 Options.WAL_ttl_seconds: 0 -2023/07/23-15:52:25.383355 268744 Options.WAL_size_limit_MB: 0 -2023/07/23-15:52:25.383356 268744 Options.max_write_batch_group_size_bytes: 1048576 -2023/07/23-15:52:25.383356 268744 Options.manifest_preallocation_size: 4194304 -2023/07/23-15:52:25.383357 268744 Options.is_fd_close_on_exec: 1 -2023/07/23-15:52:25.383357 268744 Options.advise_random_on_open: 1 -2023/07/23-15:52:25.383358 268744 Options.db_write_buffer_size: 0 -2023/07/23-15:52:25.383358 268744 Options.write_buffer_manager: 0x55b1e73f7140 -2023/07/23-15:52:25.383359 268744 Options.access_hint_on_compaction_start: 1 -2023/07/23-15:52:25.383359 268744 Options.random_access_max_buffer_size: 1048576 -2023/07/23-15:52:25.383359 268744 Options.use_adaptive_mutex: 0 -2023/07/23-15:52:25.383360 268744 Options.rate_limiter: (nil) -2023/07/23-15:52:25.383361 268744 Options.sst_file_manager.rate_bytes_per_sec: 0 -2023/07/23-15:52:25.383362 268744 Options.wal_recovery_mode: 2 -2023/07/23-15:52:25.383363 268744 Options.enable_thread_tracking: 0 -2023/07/23-15:52:25.383364 268744 Options.enable_pipelined_write: 0 -2023/07/23-15:52:25.383364 268744 Options.unordered_write: 0 -2023/07/23-15:52:25.383364 268744 Options.allow_concurrent_memtable_write: 1 -2023/07/23-15:52:25.383365 268744 Options.enable_write_thread_adaptive_yield: 1 -2023/07/23-15:52:25.383365 268744 Options.write_thread_max_yield_usec: 100 -2023/07/23-15:52:25.383366 268744 Options.write_thread_slow_yield_usec: 3 -2023/07/23-15:52:25.383366 268744 Options.row_cache: None -2023/07/23-15:52:25.383367 268744 Options.wal_filter: None -2023/07/23-15:52:25.383367 268744 Options.avoid_flush_during_recovery: 0 -2023/07/23-15:52:25.383368 268744 Options.allow_ingest_behind: 0 -2023/07/23-15:52:25.383368 268744 Options.two_write_queues: 0 -2023/07/23-15:52:25.383369 268744 Options.manual_wal_flush: 0 -2023/07/23-15:52:25.383369 268744 Options.wal_compression: 0 -2023/07/23-15:52:25.383370 268744 Options.atomic_flush: 0 -2023/07/23-15:52:25.383370 268744 Options.avoid_unnecessary_blocking_io: 0 -2023/07/23-15:52:25.383371 268744 Options.persist_stats_to_disk: 0 -2023/07/23-15:52:25.383371 268744 Options.write_dbid_to_manifest: 0 -2023/07/23-15:52:25.383372 268744 Options.log_readahead_size: 0 -2023/07/23-15:52:25.383372 268744 Options.file_checksum_gen_factory: Unknown -2023/07/23-15:52:25.383373 268744 Options.best_efforts_recovery: 0 -2023/07/23-15:52:25.383373 268744 Options.max_bgerror_resume_count: 2147483647 -2023/07/23-15:52:25.383374 268744 Options.bgerror_resume_retry_interval: 1000000 -2023/07/23-15:52:25.383374 268744 Options.allow_data_in_errors: 0 -2023/07/23-15:52:25.383375 268744 Options.db_host_id: __hostname__ -2023/07/23-15:52:25.383375 268744 Options.enforce_single_del_contracts: true -2023/07/23-15:52:25.383376 268744 Options.max_background_jobs: 2 -2023/07/23-15:52:25.383376 268744 Options.max_background_compactions: -1 -2023/07/23-15:52:25.383377 268744 Options.max_subcompactions: 1 -2023/07/23-15:52:25.383377 268744 Options.avoid_flush_during_shutdown: 0 -2023/07/23-15:52:25.383378 268744 Options.writable_file_max_buffer_size: 1048576 -2023/07/23-15:52:25.383378 268744 Options.delayed_write_rate : 16777216 -2023/07/23-15:52:25.383379 268744 Options.max_total_wal_size: 0 -2023/07/23-15:52:25.383379 268744 Options.delete_obsolete_files_period_micros: 21600000000 -2023/07/23-15:52:25.383380 268744 Options.stats_dump_period_sec: 600 -2023/07/23-15:52:25.383380 268744 Options.stats_persist_period_sec: 600 -2023/07/23-15:52:25.383380 268744 Options.stats_history_buffer_size: 1048576 -2023/07/23-15:52:25.383381 268744 Options.max_open_files: -1 -2023/07/23-15:52:25.383381 268744 Options.bytes_per_sync: 0 -2023/07/23-15:52:25.383382 268744 Options.wal_bytes_per_sync: 0 -2023/07/23-15:52:25.383383 268744 Options.strict_bytes_per_sync: 0 -2023/07/23-15:52:25.383383 268744 Options.compaction_readahead_size: 0 -2023/07/23-15:52:25.383383 268744 Options.max_background_flushes: -1 -2023/07/23-15:52:25.383384 268744 Compression algorithms supported: -2023/07/23-15:52:25.383387 268744 kZSTD supported: 1 -2023/07/23-15:52:25.383388 268744 kXpressCompression supported: 0 -2023/07/23-15:52:25.383388 268744 kBZip2Compression supported: 1 -2023/07/23-15:52:25.383389 268744 kZSTDNotFinalCompression supported: 1 -2023/07/23-15:52:25.383389 268744 kLZ4Compression supported: 1 -2023/07/23-15:52:25.383400 268744 kZlibCompression supported: 1 -2023/07/23-15:52:25.383400 268744 kLZ4HCCompression supported: 1 -2023/07/23-15:52:25.383401 268744 kSnappyCompression supported: 1 -2023/07/23-15:52:25.383402 268744 Fast CRC32 supported: Not supported on x86 -2023/07/23-15:52:25.383403 268744 DMutex implementation: pthread_mutex_t -2023/07/23-15:52:25.383426 268744 [WARN] [db/db_impl/db_impl_open.cc:2075] DB::Open() failed: IO error: While lock file: path/for/rocksdb/LOCK: Resource temporarily unavailable -2023/07/23-15:52:25.383435 268744 [db/db_impl/db_impl.cc:490] Shutdown: canceling all background work -2023/07/23-15:52:25.383454 268744 [db/db_impl/db_impl.cc:692] Shutdown complete diff --git a/smirk/path/for/rocksdb/LOG.old.1690123945383009 b/smirk/path/for/rocksdb/LOG.old.1690123945383009 deleted file mode 100644 index f26a8cef..00000000 --- a/smirk/path/for/rocksdb/LOG.old.1690123945383009 +++ /dev/null @@ -1,333 +0,0 @@ -2023/07/23-15:52:25.342449 268726 RocksDB version: 8.1.1 -2023/07/23-15:52:25.342586 268726 Compile date 2023-04-06 16:38:52 -2023/07/23-15:52:25.342591 268726 DB SUMMARY -2023/07/23-15:52:25.342593 268726 DB Session ID: TGDHPLXGV6GEJBVANPGW -2023/07/23-15:52:25.342609 268726 SST files in path/for/rocksdb dir, Total Num: 0, files: -2023/07/23-15:52:25.342611 268726 Write Ahead Log file in path/for/rocksdb: -2023/07/23-15:52:25.342613 268726 Options.error_if_exists: 0 -2023/07/23-15:52:25.342614 268726 Options.create_if_missing: 1 -2023/07/23-15:52:25.342614 268726 Options.paranoid_checks: 1 -2023/07/23-15:52:25.342615 268726 Options.flush_verify_memtable_count: 1 -2023/07/23-15:52:25.342616 268726 Options.track_and_verify_wals_in_manifest: 0 -2023/07/23-15:52:25.342617 268726 Options.verify_sst_unique_id_in_manifest: 1 -2023/07/23-15:52:25.342618 268726 Options.env: 0x55cd828ffa88 -2023/07/23-15:52:25.342619 268726 Options.fs: PosixFileSystem -2023/07/23-15:52:25.342620 268726 Options.info_log: 0x55cd839a3ff0 -2023/07/23-15:52:25.342620 268726 Options.max_file_opening_threads: 16 -2023/07/23-15:52:25.342621 268726 Options.statistics: (nil) -2023/07/23-15:52:25.342622 268726 Options.use_fsync: 0 -2023/07/23-15:52:25.342623 268726 Options.max_log_file_size: 0 -2023/07/23-15:52:25.342624 268726 Options.max_manifest_file_size: 1073741824 -2023/07/23-15:52:25.342632 268726 Options.log_file_time_to_roll: 0 -2023/07/23-15:52:25.342633 268726 Options.keep_log_file_num: 1000 -2023/07/23-15:52:25.342633 268726 Options.recycle_log_file_num: 0 -2023/07/23-15:52:25.342634 268726 Options.allow_fallocate: 1 -2023/07/23-15:52:25.342635 268726 Options.allow_mmap_reads: 0 -2023/07/23-15:52:25.342636 268726 Options.allow_mmap_writes: 0 -2023/07/23-15:52:25.342637 268726 Options.use_direct_reads: 0 -2023/07/23-15:52:25.342637 268726 Options.use_direct_io_for_flush_and_compaction: 0 -2023/07/23-15:52:25.342638 268726 Options.create_missing_column_families: 0 -2023/07/23-15:52:25.342639 268726 Options.db_log_dir: -2023/07/23-15:52:25.342640 268726 Options.wal_dir: -2023/07/23-15:52:25.342640 268726 Options.table_cache_numshardbits: 6 -2023/07/23-15:52:25.342641 268726 Options.WAL_ttl_seconds: 0 -2023/07/23-15:52:25.342642 268726 Options.WAL_size_limit_MB: 0 -2023/07/23-15:52:25.342643 268726 Options.max_write_batch_group_size_bytes: 1048576 -2023/07/23-15:52:25.342643 268726 Options.manifest_preallocation_size: 4194304 -2023/07/23-15:52:25.342644 268726 Options.is_fd_close_on_exec: 1 -2023/07/23-15:52:25.342645 268726 Options.advise_random_on_open: 1 -2023/07/23-15:52:25.342646 268726 Options.db_write_buffer_size: 0 -2023/07/23-15:52:25.342646 268726 Options.write_buffer_manager: 0x55cd839a4140 -2023/07/23-15:52:25.342647 268726 Options.access_hint_on_compaction_start: 1 -2023/07/23-15:52:25.342648 268726 Options.random_access_max_buffer_size: 1048576 -2023/07/23-15:52:25.342649 268726 Options.use_adaptive_mutex: 0 -2023/07/23-15:52:25.342649 268726 Options.rate_limiter: (nil) -2023/07/23-15:52:25.342650 268726 Options.sst_file_manager.rate_bytes_per_sec: 0 -2023/07/23-15:52:25.342651 268726 Options.wal_recovery_mode: 2 -2023/07/23-15:52:25.342652 268726 Options.enable_thread_tracking: 0 -2023/07/23-15:52:25.342653 268726 Options.enable_pipelined_write: 0 -2023/07/23-15:52:25.342654 268726 Options.unordered_write: 0 -2023/07/23-15:52:25.342656 268726 Options.allow_concurrent_memtable_write: 1 -2023/07/23-15:52:25.342657 268726 Options.enable_write_thread_adaptive_yield: 1 -2023/07/23-15:52:25.342657 268726 Options.write_thread_max_yield_usec: 100 -2023/07/23-15:52:25.342658 268726 Options.write_thread_slow_yield_usec: 3 -2023/07/23-15:52:25.342659 268726 Options.row_cache: None -2023/07/23-15:52:25.342660 268726 Options.wal_filter: None -2023/07/23-15:52:25.342661 268726 Options.avoid_flush_during_recovery: 0 -2023/07/23-15:52:25.342661 268726 Options.allow_ingest_behind: 0 -2023/07/23-15:52:25.342662 268726 Options.two_write_queues: 0 -2023/07/23-15:52:25.342663 268726 Options.manual_wal_flush: 0 -2023/07/23-15:52:25.342664 268726 Options.wal_compression: 0 -2023/07/23-15:52:25.342664 268726 Options.atomic_flush: 0 -2023/07/23-15:52:25.342665 268726 Options.avoid_unnecessary_blocking_io: 0 -2023/07/23-15:52:25.342666 268726 Options.persist_stats_to_disk: 0 -2023/07/23-15:52:25.342667 268726 Options.write_dbid_to_manifest: 0 -2023/07/23-15:52:25.342667 268726 Options.log_readahead_size: 0 -2023/07/23-15:52:25.342668 268726 Options.file_checksum_gen_factory: Unknown -2023/07/23-15:52:25.342669 268726 Options.best_efforts_recovery: 0 -2023/07/23-15:52:25.342669 268726 Options.max_bgerror_resume_count: 2147483647 -2023/07/23-15:52:25.342670 268726 Options.bgerror_resume_retry_interval: 1000000 -2023/07/23-15:52:25.342671 268726 Options.allow_data_in_errors: 0 -2023/07/23-15:52:25.342672 268726 Options.db_host_id: __hostname__ -2023/07/23-15:52:25.342673 268726 Options.enforce_single_del_contracts: true -2023/07/23-15:52:25.342673 268726 Options.max_background_jobs: 2 -2023/07/23-15:52:25.342674 268726 Options.max_background_compactions: -1 -2023/07/23-15:52:25.342675 268726 Options.max_subcompactions: 1 -2023/07/23-15:52:25.342676 268726 Options.avoid_flush_during_shutdown: 0 -2023/07/23-15:52:25.342677 268726 Options.writable_file_max_buffer_size: 1048576 -2023/07/23-15:52:25.342677 268726 Options.delayed_write_rate : 16777216 -2023/07/23-15:52:25.342678 268726 Options.max_total_wal_size: 0 -2023/07/23-15:52:25.342679 268726 Options.delete_obsolete_files_period_micros: 21600000000 -2023/07/23-15:52:25.342680 268726 Options.stats_dump_period_sec: 600 -2023/07/23-15:52:25.342681 268726 Options.stats_persist_period_sec: 600 -2023/07/23-15:52:25.342681 268726 Options.stats_history_buffer_size: 1048576 -2023/07/23-15:52:25.342682 268726 Options.max_open_files: -1 -2023/07/23-15:52:25.342683 268726 Options.bytes_per_sync: 0 -2023/07/23-15:52:25.342684 268726 Options.wal_bytes_per_sync: 0 -2023/07/23-15:52:25.342684 268726 Options.strict_bytes_per_sync: 0 -2023/07/23-15:52:25.342685 268726 Options.compaction_readahead_size: 0 -2023/07/23-15:52:25.342686 268726 Options.max_background_flushes: -1 -2023/07/23-15:52:25.342687 268726 Compression algorithms supported: -2023/07/23-15:52:25.342691 268726 kZSTD supported: 1 -2023/07/23-15:52:25.342691 268726 kXpressCompression supported: 0 -2023/07/23-15:52:25.342692 268726 kBZip2Compression supported: 1 -2023/07/23-15:52:25.342693 268726 kZSTDNotFinalCompression supported: 1 -2023/07/23-15:52:25.342694 268726 kLZ4Compression supported: 1 -2023/07/23-15:52:25.342695 268726 kZlibCompression supported: 1 -2023/07/23-15:52:25.342696 268726 kLZ4HCCompression supported: 1 -2023/07/23-15:52:25.342697 268726 kSnappyCompression supported: 1 -2023/07/23-15:52:25.342699 268726 Fast CRC32 supported: Not supported on x86 -2023/07/23-15:52:25.342700 268726 DMutex implementation: pthread_mutex_t -2023/07/23-15:52:25.352744 268726 [db/db_impl/db_impl_open.cc:315] Creating manifest 1 -2023/07/23-15:52:25.364937 268726 [db/version_set.cc:5662] Recovering from manifest file: path/for/rocksdb/MANIFEST-000001 -2023/07/23-15:52:25.365079 268726 [db/column_family.cc:621] --------------- Options for column family [default]: -2023/07/23-15:52:25.365082 268726 Options.comparator: leveldb.BytewiseComparator -2023/07/23-15:52:25.365084 268726 Options.merge_operator: None -2023/07/23-15:52:25.365084 268726 Options.compaction_filter: None -2023/07/23-15:52:25.365085 268726 Options.compaction_filter_factory: None -2023/07/23-15:52:25.365086 268726 Options.sst_partitioner_factory: None -2023/07/23-15:52:25.365087 268726 Options.memtable_factory: SkipListFactory -2023/07/23-15:52:25.365088 268726 Options.table_factory: BlockBasedTable -2023/07/23-15:52:25.365117 268726 table_factory options: flush_block_policy_factory: FlushBlockBySizePolicyFactory (0x55cd83999930) - cache_index_and_filter_blocks: 0 - cache_index_and_filter_blocks_with_high_priority: 1 - pin_l0_filter_and_index_blocks_in_cache: 0 - pin_top_level_index_and_filter: 1 - index_type: 0 - data_block_index_type: 0 - index_shortening: 1 - data_block_hash_table_util_ratio: 0.750000 - checksum: 4 - no_block_cache: 0 - block_cache: 0x55cd8399dff0 - block_cache_name: LRUCache - block_cache_options: - capacity : 8388608 - num_shard_bits : 4 - strict_capacity_limit : 0 - memory_allocator : None - high_pri_pool_ratio: 0.000 - low_pri_pool_ratio: 0.000 - persistent_cache: (nil) - block_size: 4096 - block_size_deviation: 10 - block_restart_interval: 16 - index_block_restart_interval: 1 - metadata_block_size: 4096 - partition_filters: 0 - use_delta_encoding: 1 - filter_policy: nullptr - whole_key_filtering: 1 - verify_compression: 0 - read_amp_bytes_per_bit: 0 - format_version: 5 - enable_index_compression: 1 - block_align: 0 - max_auto_readahead_size: 262144 - prepopulate_block_cache: 0 - initial_auto_readahead_size: 8192 - num_file_reads_for_auto_readahead: 2 -2023/07/23-15:52:25.365133 268726 Options.write_buffer_size: 67108864 -2023/07/23-15:52:25.365134 268726 Options.max_write_buffer_number: 2 -2023/07/23-15:52:25.365139 268726 Options.compression: Snappy -2023/07/23-15:52:25.365140 268726 Options.bottommost_compression: Disabled -2023/07/23-15:52:25.365141 268726 Options.prefix_extractor: nullptr -2023/07/23-15:52:25.365142 268726 Options.memtable_insert_with_hint_prefix_extractor: nullptr -2023/07/23-15:52:25.365143 268726 Options.num_levels: 7 -2023/07/23-15:52:25.365143 268726 Options.min_write_buffer_number_to_merge: 1 -2023/07/23-15:52:25.365144 268726 Options.max_write_buffer_number_to_maintain: 0 -2023/07/23-15:52:25.365145 268726 Options.max_write_buffer_size_to_maintain: 134217728 -2023/07/23-15:52:25.365146 268726 Options.bottommost_compression_opts.window_bits: -14 -2023/07/23-15:52:25.365147 268726 Options.bottommost_compression_opts.level: 32767 -2023/07/23-15:52:25.365148 268726 Options.bottommost_compression_opts.strategy: 0 -2023/07/23-15:52:25.365148 268726 Options.bottommost_compression_opts.max_dict_bytes: 0 -2023/07/23-15:52:25.365149 268726 Options.bottommost_compression_opts.zstd_max_train_bytes: 0 -2023/07/23-15:52:25.365150 268726 Options.bottommost_compression_opts.parallel_threads: 1 -2023/07/23-15:52:25.365151 268726 Options.bottommost_compression_opts.enabled: false -2023/07/23-15:52:25.365152 268726 Options.bottommost_compression_opts.max_dict_buffer_bytes: 0 -2023/07/23-15:52:25.365153 268726 Options.bottommost_compression_opts.use_zstd_dict_trainer: true -2023/07/23-15:52:25.365154 268726 Options.compression_opts.window_bits: -14 -2023/07/23-15:52:25.365154 268726 Options.compression_opts.level: 32767 -2023/07/23-15:52:25.365155 268726 Options.compression_opts.strategy: 0 -2023/07/23-15:52:25.365158 268726 Options.compression_opts.max_dict_bytes: 0 -2023/07/23-15:52:25.365159 268726 Options.compression_opts.zstd_max_train_bytes: 0 -2023/07/23-15:52:25.365160 268726 Options.compression_opts.use_zstd_dict_trainer: true -2023/07/23-15:52:25.365161 268726 Options.compression_opts.parallel_threads: 1 -2023/07/23-15:52:25.365162 268726 Options.compression_opts.enabled: false -2023/07/23-15:52:25.365163 268726 Options.compression_opts.max_dict_buffer_bytes: 0 -2023/07/23-15:52:25.365163 268726 Options.level0_file_num_compaction_trigger: 4 -2023/07/23-15:52:25.365164 268726 Options.level0_slowdown_writes_trigger: 20 -2023/07/23-15:52:25.365165 268726 Options.level0_stop_writes_trigger: 36 -2023/07/23-15:52:25.365166 268726 Options.target_file_size_base: 67108864 -2023/07/23-15:52:25.365167 268726 Options.target_file_size_multiplier: 1 -2023/07/23-15:52:25.365168 268726 Options.max_bytes_for_level_base: 268435456 -2023/07/23-15:52:25.365168 268726 Options.level_compaction_dynamic_level_bytes: 0 -2023/07/23-15:52:25.365169 268726 Options.max_bytes_for_level_multiplier: 10.000000 -2023/07/23-15:52:25.365171 268726 Options.max_bytes_for_level_multiplier_addtl[0]: 1 -2023/07/23-15:52:25.365172 268726 Options.max_bytes_for_level_multiplier_addtl[1]: 1 -2023/07/23-15:52:25.365173 268726 Options.max_bytes_for_level_multiplier_addtl[2]: 1 -2023/07/23-15:52:25.365174 268726 Options.max_bytes_for_level_multiplier_addtl[3]: 1 -2023/07/23-15:52:25.365175 268726 Options.max_bytes_for_level_multiplier_addtl[4]: 1 -2023/07/23-15:52:25.365175 268726 Options.max_bytes_for_level_multiplier_addtl[5]: 1 -2023/07/23-15:52:25.365176 268726 Options.max_bytes_for_level_multiplier_addtl[6]: 1 -2023/07/23-15:52:25.365177 268726 Options.max_sequential_skip_in_iterations: 8 -2023/07/23-15:52:25.365178 268726 Options.max_compaction_bytes: 1677721600 -2023/07/23-15:52:25.365179 268726 Options.ignore_max_compaction_bytes_for_input: true -2023/07/23-15:52:25.365180 268726 Options.arena_block_size: 1048576 -2023/07/23-15:52:25.365180 268726 Options.soft_pending_compaction_bytes_limit: 68719476736 -2023/07/23-15:52:25.365181 268726 Options.hard_pending_compaction_bytes_limit: 274877906944 -2023/07/23-15:52:25.365182 268726 Options.disable_auto_compactions: 1 -2023/07/23-15:52:25.365184 268726 Options.compaction_style: kCompactionStyleLevel -2023/07/23-15:52:25.365185 268726 Options.compaction_pri: kMinOverlappingRatio -2023/07/23-15:52:25.365186 268726 Options.compaction_options_universal.size_ratio: 1 -2023/07/23-15:52:25.365187 268726 Options.compaction_options_universal.min_merge_width: 2 -2023/07/23-15:52:25.365188 268726 Options.compaction_options_universal.max_merge_width: 4294967295 -2023/07/23-15:52:25.365189 268726 Options.compaction_options_universal.max_size_amplification_percent: 200 -2023/07/23-15:52:25.365190 268726 Options.compaction_options_universal.compression_size_percent: -1 -2023/07/23-15:52:25.365191 268726 Options.compaction_options_universal.stop_style: kCompactionStopStyleTotalSize -2023/07/23-15:52:25.365191 268726 Options.compaction_options_fifo.max_table_files_size: 1073741824 -2023/07/23-15:52:25.365192 268726 Options.compaction_options_fifo.allow_compaction: 0 -2023/07/23-15:52:25.365200 268726 Options.table_properties_collectors: -2023/07/23-15:52:25.365201 268726 Options.inplace_update_support: 0 -2023/07/23-15:52:25.365202 268726 Options.inplace_update_num_locks: 10000 -2023/07/23-15:52:25.365203 268726 Options.memtable_prefix_bloom_size_ratio: 0.000000 -2023/07/23-15:52:25.365204 268726 Options.memtable_whole_key_filtering: 0 -2023/07/23-15:52:25.365205 268726 Options.memtable_huge_page_size: 0 -2023/07/23-15:52:25.365206 268726 Options.bloom_locality: 0 -2023/07/23-15:52:25.365209 268726 Options.max_successive_merges: 0 -2023/07/23-15:52:25.365210 268726 Options.optimize_filters_for_hits: 0 -2023/07/23-15:52:25.365210 268726 Options.paranoid_file_checks: 0 -2023/07/23-15:52:25.365212 268726 Options.force_consistency_checks: 1 -2023/07/23-15:52:25.365212 268726 Options.report_bg_io_stats: 0 -2023/07/23-15:52:25.365213 268726 Options.ttl: 2592000 -2023/07/23-15:52:25.365214 268726 Options.periodic_compaction_seconds: 0 -2023/07/23-15:52:25.365215 268726 Options.preclude_last_level_data_seconds: 0 -2023/07/23-15:52:25.365216 268726 Options.preserve_internal_time_seconds: 0 -2023/07/23-15:52:25.365216 268726 Options.enable_blob_files: false -2023/07/23-15:52:25.365217 268726 Options.min_blob_size: 0 -2023/07/23-15:52:25.365218 268726 Options.blob_file_size: 268435456 -2023/07/23-15:52:25.365219 268726 Options.blob_compression_type: NoCompression -2023/07/23-15:52:25.365220 268726 Options.enable_blob_garbage_collection: false -2023/07/23-15:52:25.365221 268726 Options.blob_garbage_collection_age_cutoff: 0.250000 -2023/07/23-15:52:25.365222 268726 Options.blob_garbage_collection_force_threshold: 1.000000 -2023/07/23-15:52:25.365223 268726 Options.blob_compaction_readahead_size: 0 -2023/07/23-15:52:25.365224 268726 Options.blob_file_starting_level: 0 -2023/07/23-15:52:25.365224 268726 Options.experimental_mempurge_threshold: 0.000000 -2023/07/23-15:52:25.366096 268726 [db/version_set.cc:5713] Recovered from manifest file:path/for/rocksdb/MANIFEST-000001 succeeded,manifest_file_number is 1, next_file_number is 3, last_sequence is 0, log_number is 0,prev_log_number is 0,max_column_family is 0,min_log_number_to_keep is 0 -2023/07/23-15:52:25.366102 268726 [db/version_set.cc:5722] Column family [default] (ID 0), log number is 0 -2023/07/23-15:52:25.366138 268726 [db/db_impl/db_impl_open.cc:537] DB ID: fda2968c-dc40-46ae-93c7-6c66e47096c0 -2023/07/23-15:52:25.366245 268726 [db/version_set.cc:5180] Creating manifest 5 -2023/07/23-15:52:25.388349 268726 [db/db_impl/db_impl_open.cc:1977] SstFileManager instance 0x55cd839a1fa0 -2023/07/23-15:52:25.388375 268726 DB pointer 0x55cd839a4680 -2023/07/23-15:52:25.388438 268726 [WARN] [utilities/transactions/pessimistic_transaction_db.cc:262] Transaction write_policy is 0 -2023/07/23-15:52:25.388575 268747 [db/db_impl/db_impl.cc:1085] ------- DUMPING STATS ------- -2023/07/23-15:52:25.388584 268747 [db/db_impl/db_impl.cc:1086] -** DB Stats ** -Uptime(secs): 0.0 total, 0.0 interval -Cumulative writes: 0 writes, 0 keys, 0 commit groups, 0.0 writes per commit group, ingest: 0.00 GB, 0.00 MB/s -Cumulative WAL: 0 writes, 0 syncs, 0.00 writes per sync, written: 0.00 GB, 0.00 MB/s -Cumulative stall: 00:00:0.000 H:M:S, 0.0 percent -Interval writes: 0 writes, 0 keys, 0 commit groups, 0.0 writes per commit group, ingest: 0.00 MB, 0.00 MB/s -Interval WAL: 0 writes, 0 syncs, 0.00 writes per sync, written: 0.00 GB, 0.00 MB/s -Interval stall: 00:00:0.000 H:M:S, 0.0 percent -Write Stall (count): write-buffer-manager-limit-stops: 0, -** Compaction Stats [default] ** -Level Files Size Score Read(GB) Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) CompMergeCPU(sec) Comp(cnt) Avg(sec) KeyIn KeyDrop Rblob(GB) Wblob(GB) ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Sum 0/0 0.00 KB 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.00 0.00 0 0.000 0 0 0.0 0.0 - Int 0/0 0.00 KB 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.00 0.00 0 0.000 0 0 0.0 0.0 - -** Compaction Stats [default] ** -Priority Files Size Score Read(GB) Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) Comp(sec) CompMergeCPU(sec) Comp(cnt) Avg(sec) KeyIn KeyDrop Rblob(GB) Wblob(GB) ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -Blob file count: 0, total size: 0.0 GB, garbage size: 0.0 GB, space amp: 0.0 - -Uptime(secs): 0.0 total, 0.0 interval -Flush(GB): cumulative 0.000, interval 0.000 -AddFile(GB): cumulative 0.000, interval 0.000 -AddFile(Total Files): cumulative 0, interval 0 -AddFile(L0 Files): cumulative 0, interval 0 -AddFile(Keys): cumulative 0, interval 0 -Cumulative compaction: 0.00 GB write, 0.00 MB/s write, 0.00 GB read, 0.00 MB/s read, 0.0 seconds -Interval compaction: 0.00 GB write, 0.00 MB/s write, 0.00 GB read, 0.00 MB/s read, 0.0 seconds -Write Stall (count): cf-l0-file-count-limit-delays-with-ongoing-compaction: 0, cf-l0-file-count-limit-stops-with-ongoing-compaction: 0, l0-file-count-limit-delays: 0, l0-file-count-limit-stops: 0, memtable-limit-delays: 0, memtable-limit-stops: 0, pending-compaction-bytes-delays: 0, pending-compaction-bytes-stops: 0, total-delays: 0, total-stops: 0, Block cache LRUCache@0x55cd8399dff0#268726 capacity: 8.00 MB usage: 0.08 KB table_size: 256 occupancy: 87 collections: 1 last_copies: 0 last_secs: 2.9e-05 secs_since: 0 -Block cache entry stats(count,size,portion): Misc(1,0.00 KB,0%) - -** File Read Latency Histogram By Level [default] ** -2023/07/23-15:52:25.401583 268726 [db/db_impl/db_impl.cc:1187] SetOptions() on column family [default], inputs: -2023/07/23-15:52:25.401592 268726 [db/db_impl/db_impl.cc:1190] disable_auto_compactions: false -2023/07/23-15:52:25.401593 268726 [db/db_impl/db_impl.cc:1194] [default] SetOptions() succeeded -2023/07/23-15:52:25.401594 268726 [options/cf_options.cc:1004] write_buffer_size: 67108864 -2023/07/23-15:52:25.401595 268726 [options/cf_options.cc:1006] max_write_buffer_number: 2 -2023/07/23-15:52:25.401595 268726 [options/cf_options.cc:1009] arena_block_size: 1048576 -2023/07/23-15:52:25.401596 268726 [options/cf_options.cc:1011] memtable_prefix_bloom_ratio: 0.000000 -2023/07/23-15:52:25.401599 268726 [options/cf_options.cc:1013] memtable_whole_key_filtering: 0 -2023/07/23-15:52:25.401599 268726 [options/cf_options.cc:1016] memtable_huge_page_size: 0 -2023/07/23-15:52:25.401600 268726 [options/cf_options.cc:1019] max_successive_merges: 0 -2023/07/23-15:52:25.401600 268726 [options/cf_options.cc:1022] inplace_update_num_locks: 10000 -2023/07/23-15:52:25.401601 268726 [options/cf_options.cc:1026] prefix_extractor: nullptr -2023/07/23-15:52:25.401602 268726 [options/cf_options.cc:1028] disable_auto_compactions: 0 -2023/07/23-15:52:25.401602 268726 [options/cf_options.cc:1030] soft_pending_compaction_bytes_limit: 68719476736 -2023/07/23-15:52:25.401603 268726 [options/cf_options.cc:1032] hard_pending_compaction_bytes_limit: 274877906944 -2023/07/23-15:52:25.401603 268726 [options/cf_options.cc:1034] level0_file_num_compaction_trigger: 4 -2023/07/23-15:52:25.401603 268726 [options/cf_options.cc:1036] level0_slowdown_writes_trigger: 20 -2023/07/23-15:52:25.401604 268726 [options/cf_options.cc:1038] level0_stop_writes_trigger: 36 -2023/07/23-15:52:25.401605 268726 [options/cf_options.cc:1040] max_compaction_bytes: 1677721600 -2023/07/23-15:52:25.401605 268726 [options/cf_options.cc:1042] ignore_max_compaction_bytes_for_input: true -2023/07/23-15:52:25.401606 268726 [options/cf_options.cc:1044] target_file_size_base: 67108864 -2023/07/23-15:52:25.401606 268726 [options/cf_options.cc:1046] target_file_size_multiplier: 1 -2023/07/23-15:52:25.401607 268726 [options/cf_options.cc:1048] max_bytes_for_level_base: 268435456 -2023/07/23-15:52:25.401607 268726 [options/cf_options.cc:1050] max_bytes_for_level_multiplier: 10.000000 -2023/07/23-15:52:25.401609 268726 [options/cf_options.cc:1052] ttl: 2592000 -2023/07/23-15:52:25.401609 268726 [options/cf_options.cc:1054] periodic_compaction_seconds: 0 -2023/07/23-15:52:25.401611 268726 [options/cf_options.cc:1068] max_bytes_for_level_multiplier_additional: 1, 1, 1, 1, 1, 1, 1 -2023/07/23-15:52:25.401612 268726 [options/cf_options.cc:1070] max_sequential_skip_in_iterations: 8 -2023/07/23-15:52:25.401612 268726 [options/cf_options.cc:1072] check_flush_compaction_key_order: 1 -2023/07/23-15:52:25.401613 268726 [options/cf_options.cc:1074] paranoid_file_checks: 0 -2023/07/23-15:52:25.401613 268726 [options/cf_options.cc:1076] report_bg_io_stats: 0 -2023/07/23-15:52:25.401614 268726 [options/cf_options.cc:1078] compression: 1 -2023/07/23-15:52:25.401614 268726 [options/cf_options.cc:1081] experimental_mempurge_threshold: 0.000000 -2023/07/23-15:52:25.401615 268726 [options/cf_options.cc:1085] compaction_options_universal.size_ratio : 1 -2023/07/23-15:52:25.401615 268726 [options/cf_options.cc:1087] compaction_options_universal.min_merge_width : 2 -2023/07/23-15:52:25.401616 268726 [options/cf_options.cc:1089] compaction_options_universal.max_merge_width : -1 -2023/07/23-15:52:25.401616 268726 [options/cf_options.cc:1092] compaction_options_universal.max_size_amplification_percent : 200 -2023/07/23-15:52:25.401617 268726 [options/cf_options.cc:1095] compaction_options_universal.compression_size_percent : -1 -2023/07/23-15:52:25.401617 268726 [options/cf_options.cc:1097] compaction_options_universal.stop_style : 1 -2023/07/23-15:52:25.401618 268726 [options/cf_options.cc:1100] compaction_options_universal.allow_trivial_move : 0 -2023/07/23-15:52:25.401618 268726 [options/cf_options.cc:1102] compaction_options_universal.incremental : 0 -2023/07/23-15:52:25.401619 268726 [options/cf_options.cc:1106] compaction_options_fifo.max_table_files_size : 1073741824 -2023/07/23-15:52:25.401619 268726 [options/cf_options.cc:1108] compaction_options_fifo.allow_compaction : 0 -2023/07/23-15:52:25.401620 268726 [options/cf_options.cc:1112] enable_blob_files: false -2023/07/23-15:52:25.401620 268726 [options/cf_options.cc:1114] min_blob_size: 0 -2023/07/23-15:52:25.401621 268726 [options/cf_options.cc:1116] blob_file_size: 268435456 -2023/07/23-15:52:25.401622 268726 [options/cf_options.cc:1118] blob_compression_type: NoCompression -2023/07/23-15:52:25.401622 268726 [options/cf_options.cc:1120] enable_blob_garbage_collection: false -2023/07/23-15:52:25.401623 268726 [options/cf_options.cc:1122] blob_garbage_collection_age_cutoff: 0.250000 -2023/07/23-15:52:25.401623 268726 [options/cf_options.cc:1124] blob_garbage_collection_force_threshold: 1.000000 -2023/07/23-15:52:25.401624 268726 [options/cf_options.cc:1126] blob_compaction_readahead_size: 0 -2023/07/23-15:52:25.401629 268726 [options/cf_options.cc:1128] blob_file_starting_level: 0 -2023/07/23-15:52:25.401630 268726 [options/cf_options.cc:1132] prepopulate_blob_cache: disable -2023/07/23-15:52:25.401631 268726 [options/cf_options.cc:1134] last_level_temperature: 0 -2023/07/23-15:52:25.402034 268726 [db/db_impl/db_impl.cc:490] Shutdown: canceling all background work -2023/07/23-15:52:25.402266 268726 [db/db_impl/db_impl.cc:692] Shutdown complete diff --git a/smirk/path/for/rocksdb/MANIFEST-000005 b/smirk/path/for/rocksdb/MANIFEST-000005 deleted file mode 100644 index d0fb94bd2934a563da72946c03fd8c5c7cf0f6f1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 79 zcmZS8)^KKEU<~`Yer`cxQDRAc(HCZ(C>91r eCI%LUKRRkZ*%%l(8JO8v81~k5W Date: Sun, 23 Jul 2023 22:05:58 +0100 Subject: [PATCH 10/15] collect benchmark --- smirk/Cargo.toml | 2 +- .../benches/{insert.rs => tree_benchmark.rs} | 21 +++++++++++++++++-- smirk/src/lib.rs | 17 --------------- 3 files changed, 20 insertions(+), 20 deletions(-) rename smirk/benches/{insert.rs => tree_benchmark.rs} (58%) diff --git a/smirk/Cargo.toml b/smirk/Cargo.toml index f3f9e736..861e74fa 100644 --- a/smirk/Cargo.toml +++ b/smirk/Cargo.toml @@ -26,5 +26,5 @@ rand = "0.8" rand_chacha = "0.3" [[bench]] -name = "insert" +name = "tree_benchmark" harness = false diff --git a/smirk/benches/insert.rs b/smirk/benches/tree_benchmark.rs similarity index 58% rename from smirk/benches/insert.rs rename to smirk/benches/tree_benchmark.rs index 52b033c4..49f5bf32 100644 --- a/smirk/benches/insert.rs +++ b/smirk/benches/tree_benchmark.rs @@ -3,7 +3,7 @@ use std::hint::black_box; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use rand::{Rng, SeedableRng}; use rand_chacha::ChaChaRng; -use smirk::smirk; +use smirk::{smirk, MerkleTree}; pub fn insert_benchmark(c: &mut Criterion) { let mut rng = ChaChaRng::from_seed([0; 32]); @@ -25,9 +25,26 @@ pub fn insert_benchmark(c: &mut Criterion) { ); } +pub fn collect_benchmark(c: &mut Criterion) { + let mut rng = ChaChaRng::from_seed([0; 32]); + let mut nums = vec![0; 1000]; + rng.fill(nums.as_mut_slice()); + + c.bench_with_input( + BenchmarkId::new("insert", "1k random"), + &nums.as_slice(), + |bencher, nums| { + bencher.iter(|| { + let tree: MerkleTree<_, _> = nums.iter().copied().map(|i| (i, i)).collect(); + black_box(tree); + }); + }, + ); +} + criterion_group! { name = benches; config = Criterion::default().sample_size(10); - targets = insert_benchmark + targets = insert_benchmark, collect_benchmark } criterion_main!(benches); diff --git a/smirk/src/lib.rs b/smirk/src/lib.rs index 99796cc1..eaba7be2 100644 --- a/smirk/src/lib.rs +++ b/smirk/src/lib.rs @@ -16,26 +16,9 @@ pub mod hash; pub mod storage; mod tree; -use std::time::Instant; pub use tree::{batch, key_value_hash, visitor::Visitor, MerkleTree, TreeNode}; #[cfg(test)] mod testing; -#[test] -fn foo() { - use rand::{Rng, SeedableRng}; - - let mut rng = rand_chacha::ChaChaRng::from_seed([0; 32]); - let mut nums = vec![0; 1000]; - rng.fill(nums.as_mut_slice()); - - let instant = Instant::now(); - let mut tree = smirk! {}; - for i in nums { - tree.insert(i, i); - } - - println!("{}", instant.elapsed().as_millis()); -} From 0bd3f8b22842a0d693c9037926bfd5a2a096063b Mon Sep 17 00:00:00 2001 From: Cameron Date: Mon, 24 Jul 2023 15:46:49 +0100 Subject: [PATCH 11/15] fmt --- gateway/src/lib.rs | 7 ++++--- smirk/benches/tree_benchmark.rs | 29 ++++++++++++++++++++++++++--- smirk/src/hash/hashable.rs | 1 - smirk/src/lib.rs | 1 - smirk/src/tree/mod.rs | 4 +++- smirk/src/tree/tests.rs | 29 +++++++++++++++++++++++++++-- 6 files changed, 60 insertions(+), 11 deletions(-) diff --git a/gateway/src/lib.rs b/gateway/src/lib.rs index 04513c1a..e8d77197 100644 --- a/gateway/src/lib.rs +++ b/gateway/src/lib.rs @@ -305,10 +305,10 @@ fn reference_records( .ok_or(GatewayUserError::CollectionRecordIdNotFound)? .as_str() .ok_or(GatewayUserError::RecordIdNotString)?; - + let foreign_collection_id = collection_namespace.to_string() + "/" + &fr.collection; - + Ok( serde_json::json!({ "id": id, "collectionId": foreign_collection_id }), ) @@ -695,7 +695,8 @@ impl Gateway { instance = serde_json::to_string(&instance_json).unwrap_or_default(), args = serde_json::to_string(&args).unwrap_or_default(), auth = serde_json::to_string(&auth).unwrap_or_default(), - output = serde_json::to_string(&output.as_ref().map_err(|e| e.to_string())).unwrap_or_default(), + output = serde_json::to_string(&output.as_ref().map_err(|e| e.to_string())) + .unwrap_or_default(), "function output" ); diff --git a/smirk/benches/tree_benchmark.rs b/smirk/benches/tree_benchmark.rs index 49f5bf32..4837e1da 100644 --- a/smirk/benches/tree_benchmark.rs +++ b/smirk/benches/tree_benchmark.rs @@ -3,7 +3,10 @@ use std::hint::black_box; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use rand::{Rng, SeedableRng}; use rand_chacha::ChaChaRng; -use smirk::{smirk, MerkleTree}; +use smirk::{ + batch::{Batch, Operation}, + smirk, MerkleTree, +}; pub fn insert_benchmark(c: &mut Criterion) { let mut rng = ChaChaRng::from_seed([0; 32]); @@ -31,7 +34,7 @@ pub fn collect_benchmark(c: &mut Criterion) { rng.fill(nums.as_mut_slice()); c.bench_with_input( - BenchmarkId::new("insert", "1k random"), + BenchmarkId::new("collect", "1k random"), &nums.as_slice(), |bencher, nums| { bencher.iter(|| { @@ -42,9 +45,29 @@ pub fn collect_benchmark(c: &mut Criterion) { ); } +pub fn batch_insert_benchmark(c: &mut Criterion) { + let mut rng = ChaChaRng::from_seed([0; 32]); + let mut nums = vec![0; 1000]; + rng.fill(nums.as_mut_slice()); + + let batch = Batch::from_operations(nums.into_iter().map(|i| Operation::Insert(i, i)).collect()); + + c.bench_with_input( + BenchmarkId::new("batch insert", "1k random"), + &batch, + |bencher, batch| { + bencher.iter(|| { + let mut tree = smirk! {}; + tree.apply(batch.clone()); + black_box(tree); + }); + }, + ); +} + criterion_group! { name = benches; config = Criterion::default().sample_size(10); - targets = insert_benchmark, collect_benchmark + targets = insert_benchmark, collect_benchmark, batch_insert_benchmark } criterion_main!(benches); diff --git a/smirk/src/hash/hashable.rs b/smirk/src/hash/hashable.rs index cdaac6ce..3373d529 100644 --- a/smirk/src/hash/hashable.rs +++ b/smirk/src/hash/hashable.rs @@ -21,7 +21,6 @@ pub trait Hashable { fn hash(&self) -> Digest; } - impl Hashable for &T where T: Hashable, diff --git a/smirk/src/lib.rs b/smirk/src/lib.rs index eaba7be2..542707a3 100644 --- a/smirk/src/lib.rs +++ b/smirk/src/lib.rs @@ -21,4 +21,3 @@ pub use tree::{batch, key_value_hash, visitor::Visitor, MerkleTree, TreeNode}; #[cfg(test)] mod testing; - diff --git a/smirk/src/tree/mod.rs b/smirk/src/tree/mod.rs index a09d97e6..26d7a0ca 100644 --- a/smirk/src/tree/mod.rs +++ b/smirk/src/tree/mod.rs @@ -154,7 +154,9 @@ impl MerkleTree { let Some(mut node) = node else { return Box::new(TreeNode::new(key, value, None, None)) }; match key.cmp(&node.key) { - Ordering::Equal => return node, + Ordering::Equal => { + node.value = value; + } Ordering::Less => { node.left = Some(Self::insert_node(node.left.take(), key, value)); } diff --git a/smirk/src/tree/tests.rs b/smirk/src/tree/tests.rs index 01a0de51..d812d92d 100644 --- a/smirk/src/tree/tests.rs +++ b/smirk/src/tree/tests.rs @@ -1,6 +1,9 @@ use test_strategy::proptest; -use crate::{hash::{Hashable, Digest}, smirk, MerkleTree}; +use crate::{ + hash::{Digest, Hashable}, + smirk, MerkleTree, +}; #[test] fn simple_example() { @@ -26,7 +29,7 @@ fn insert_already_exists() { tree.insert(1, "world"); - assert_eq!(*tree.get(&1).unwrap(), "hello"); + assert_eq!(*tree.get(&1).unwrap(), "world"); } #[test] @@ -64,3 +67,25 @@ fn hash_of_leaf_is_correct() { assert_eq!(hash, expected); } + +#[test] +fn stays_balanced_in_order_inserts() { + let values = (0..1000).map(|i| (i, i)).collect(); + stays_balanced(values); +} + +#[proptest] +fn tree_stays_balanced(values: Vec<(i32, i32)>) { + stays_balanced(values); +} + +fn stays_balanced(values: Vec<(i32, i32)>) { + let mut tree = smirk! {}; + + for (key, value) in values { + tree.insert(key, value); + let balance = tree.inner.as_ref().unwrap().balance_factor(); + assert!(balance <= 1); + assert!(balance >= -1); + } +} From f8375e3e87ef816afda5acea689a1f6920f0bb88 Mon Sep 17 00:00:00 2001 From: Cameron Date: Tue, 25 Jul 2023 12:05:27 +0100 Subject: [PATCH 12/15] add docs to CI --- .github/workflows/test.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4afd90d2..2366f11b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,6 +4,7 @@ name: Test env: RUSTFLAGS: "-Dwarnings --cfg tracing_unstable" + RUSTDOCFLAGS: "-Dwarnings" jobs: cargo-build: @@ -157,3 +158,19 @@ jobs: - name: Run Clippy run: cargo clippy --all-targets --all-features + + docs: + name: Build smirk docs + runs-on: ubuntu-latest-16-cores + needs: cargo-build + steps: + - uses: actions/checkout@v3 + + - uses: Swatinem/rust-cache@v2 + + - name: Install protoc + run: | + sudo apt-get install -y protobuf-compiler + + - name: Run Clippy + run: cargo doc --all-features -psmirk From 362d2670bd3d9d99ae49184dd1f1bdaa9b4cd411 Mon Sep 17 00:00:00 2001 From: Cameron Date: Tue, 25 Jul 2023 12:38:09 +0100 Subject: [PATCH 13/15] update dockerfile to copy smirk --- docker/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/Dockerfile b/docker/Dockerfile index 5ce203a1..3d7ade9e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -46,6 +46,7 @@ COPY polybase ./polybase/ COPY indexer ./indexer/ COPY gateway ./gateway/ COPY solid ./solid/ +COPY smirk ./smirk/ RUN --mount=type=cache,target=/usr/local/cargo/registry \ cargo build $(if [ "$RELEASE" = "1" ]; then echo "--release"; fi) From 4a5f28871a4834e7e22f51922c1ec243d0403009 Mon Sep 17 00:00:00 2001 From: Cameron Date: Tue, 25 Jul 2023 12:45:30 +0100 Subject: [PATCH 14/15] update dockerfile to copy smirk again --- docker/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/Dockerfile b/docker/Dockerfile index 3d7ade9e..395d06fe 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -26,6 +26,7 @@ COPY polybase ./polybase/ COPY indexer ./indexer/ COPY gateway ./gateway/ COPY solid ./solid/ +COPY smirk ./smirk/ RUN cargo chef prepare --recipe-path /recipe.json From 689694092c2d89ee11b9cc65d648a956e25e44bf Mon Sep 17 00:00:00 2001 From: Cameron Date: Tue, 25 Jul 2023 14:13:03 +0100 Subject: [PATCH 15/15] add hashable impl for merkletree --- smirk/src/lib.rs | 2 +- smirk/src/tree/mod.rs | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/smirk/src/lib.rs b/smirk/src/lib.rs index 542707a3..af531a38 100644 --- a/smirk/src/lib.rs +++ b/smirk/src/lib.rs @@ -2,7 +2,7 @@ #![warn(clippy::pedantic)] #![deny(missing_docs)] #![deny(unsafe_code)] -#![deny(clippy::integer_arithmetic)] // explicitly choose wrapping/saturating/checked +#![deny(clippy::arithmetic_side_effects)] // explicitly choose wrapping/saturating/checked #![allow( clippy::module_name_repetitions, clippy::match_bool, // overly restrictive style lint diff --git a/smirk/src/tree/mod.rs b/smirk/src/tree/mod.rs index 26d7a0ca..dbc40b9e 100644 --- a/smirk/src/tree/mod.rs +++ b/smirk/src/tree/mod.rs @@ -94,6 +94,16 @@ impl PartialEq for MerkleTree { } } +impl Hashable for MerkleTree +where + K: Hashable, + V: Hashable, +{ + fn hash(&self) -> Digest { + self.root_hash() + } +} + impl Default for MerkleTree { fn default() -> Self { Self::new() @@ -146,6 +156,7 @@ impl MerkleTree { self.inner = Some(Self::insert_node(self.inner.take(), key, value)); } + #[allow(clippy::unnecesary_box_returns)] fn insert_node(node: Option>>, key: K, value: V) -> Box> where K: Hashable + Ord,