diff --git a/ledger/src/shred.rs b/ledger/src/shred.rs index 5462797a05b59f..0e01eebcae36bb 100644 --- a/ledger/src/shred.rs +++ b/ledger/src/shred.rs @@ -77,6 +77,7 @@ use { mod common; mod legacy; +mod merkle; mod shred_code; mod shred_data; mod stats; @@ -129,6 +130,8 @@ pub enum Error { InvalidDataSize { size: u16, payload: usize }, #[error("Invalid erasure shard index: {0:?}")] InvalidErasureShardIndex(/*headers:*/ Box), + #[error("Invalid merkle proof")] + InvalidMerkleProof, #[error("Invalid num coding shreds: {0}")] InvalidNumCodingShreds(u16), #[error("Invalid parent_offset: {parent_offset}, slot: {slot}")] @@ -137,12 +140,16 @@ pub enum Error { InvalidParentSlot { slot: Slot, parent_slot: Slot }, #[error("Invalid payload size: {0}")] InvalidPayloadSize(/*payload size:*/ usize), + #[error("Invalid proof size: {0}")] + InvalidProofSize(/*proof_size:*/ u8), #[error("Invalid shred flags: {0}")] InvalidShredFlags(u8), #[error("Invalid shred type")] InvalidShredType, #[error("Invalid shred variant")] InvalidShredVariant, + #[error(transparent)] + IoError(#[from] std::io::Error), } #[repr(u8)] @@ -171,6 +178,9 @@ pub enum ShredType { enum ShredVariant { LegacyCode, // 0b0101_1010 LegacyData, // 0b1010_0101 + // proof_size is the number of proof entries in the merkle tree branch. + MerkleCode(/*proof_size:*/ u8), // 0b0100_???? + MerkleData(/*proof_size:*/ u8), // 0b1000_???? } /// A common header that is present in data and code shred headers @@ -325,6 +335,14 @@ impl Shred { let shred = legacy::ShredData::from_payload(shred)?; Self::from(ShredData::from(shred)) } + ShredVariant::MerkleCode(_) => { + let shred = merkle::ShredCode::from_payload(shred)?; + Self::from(ShredCode::from(shred)) + } + ShredVariant::MerkleData(_) => { + let shred = merkle::ShredData::from_payload(shred)?; + Self::from(ShredData::from(shred)) + } }) } @@ -557,6 +575,12 @@ pub mod layout { pub(crate) fn get_signed_message_range(shred: &[u8]) -> Option> { let range = match get_shred_variant(shred).ok()? { ShredVariant::LegacyCode | ShredVariant::LegacyData => legacy::SIGNED_MESSAGE_RANGE, + ShredVariant::MerkleCode(proof_size) => { + merkle::ShredCode::get_signed_message_range(proof_size)? + } + ShredVariant::MerkleData(proof_size) => { + merkle::ShredData::get_signed_message_range(proof_size)? + } }; (shred.len() <= range.end).then(|| range) } @@ -593,6 +617,8 @@ impl From for ShredType { match shred_variant { ShredVariant::LegacyCode => ShredType::Code, ShredVariant::LegacyData => ShredType::Data, + ShredVariant::MerkleCode(_) => ShredType::Code, + ShredVariant::MerkleData(_) => ShredType::Data, } } } @@ -602,6 +628,8 @@ impl From for u8 { match shred_variant { ShredVariant::LegacyCode => u8::from(ShredType::Code), ShredVariant::LegacyData => u8::from(ShredType::Data), + ShredVariant::MerkleCode(proof_size) => proof_size | 0x40, + ShredVariant::MerkleData(proof_size) => proof_size | 0x80, } } } @@ -614,7 +642,11 @@ impl TryFrom for ShredVariant { } else if shred_variant == u8::from(ShredType::Data) { Ok(ShredVariant::LegacyData) } else { - Err(Error::InvalidShredVariant) + match shred_variant & 0xF0 { + 0x40 => Ok(ShredVariant::MerkleCode(shred_variant & 0x0F)), + 0x80 => Ok(ShredVariant::MerkleData(shred_variant & 0x0F)), + _ => Err(Error::InvalidShredVariant), + } } } } @@ -673,7 +705,7 @@ pub fn max_entries_per_n_shred( num_shreds: u64, shred_data_size: Option, ) -> u64 { - let data_buffer_size = ShredData::capacity().unwrap(); + let data_buffer_size = ShredData::capacity(/*merkle_proof_size:*/ None).unwrap(); let shred_data_size = shred_data_size.unwrap_or(data_buffer_size) as u64; let vec_size = bincode::serialized_size(&vec![entry]).unwrap(); let entry_size = bincode::serialized_size(entry).unwrap(); @@ -786,7 +818,7 @@ mod tests { ); assert_eq!( SIZE_OF_SHRED_VARIANT, - bincode::serialized_size(&ShredVariant::LegacyCode).unwrap() as usize + bincode::serialized_size(&ShredVariant::MerkleCode(15)).unwrap() as usize ); assert_eq!( SIZE_OF_SHRED_SLOT, @@ -988,6 +1020,74 @@ mod tests { bincode::deserialize::(&[0b1010_0101]), Ok(ShredVariant::LegacyData) ); + // Merkle coding shred. + assert_eq!(u8::from(ShredVariant::MerkleCode(5)), 0b0100_0101); + assert_eq!( + ShredType::from(ShredVariant::MerkleCode(5)), + ShredType::Code + ); + assert_matches!( + ShredVariant::try_from(0b0100_0101), + Ok(ShredVariant::MerkleCode(5)) + ); + let buf = bincode::serialize(&ShredVariant::MerkleCode(5)).unwrap(); + assert_eq!(buf, vec![0b0100_0101]); + assert_matches!( + bincode::deserialize::(&[0b0100_0101]), + Ok(ShredVariant::MerkleCode(5)) + ); + for proof_size in 0..=15u8 { + let byte = proof_size | 0b0100_0000; + assert_eq!(u8::from(ShredVariant::MerkleCode(proof_size)), byte); + assert_eq!( + ShredType::from(ShredVariant::MerkleCode(proof_size)), + ShredType::Code + ); + assert_eq!( + ShredVariant::try_from(byte).unwrap(), + ShredVariant::MerkleCode(proof_size) + ); + let buf = bincode::serialize(&ShredVariant::MerkleCode(proof_size)).unwrap(); + assert_eq!(buf, vec![byte]); + assert_eq!( + bincode::deserialize::(&[byte]).unwrap(), + ShredVariant::MerkleCode(proof_size) + ); + } + // Merkle data shred. + assert_eq!(u8::from(ShredVariant::MerkleData(10)), 0b1000_1010); + assert_eq!( + ShredType::from(ShredVariant::MerkleData(10)), + ShredType::Data + ); + assert_matches!( + ShredVariant::try_from(0b1000_1010), + Ok(ShredVariant::MerkleData(10)) + ); + let buf = bincode::serialize(&ShredVariant::MerkleData(10)).unwrap(); + assert_eq!(buf, vec![0b1000_1010]); + assert_matches!( + bincode::deserialize::(&[0b1000_1010]), + Ok(ShredVariant::MerkleData(10)) + ); + for proof_size in 0..=15u8 { + let byte = proof_size | 0b1000_0000; + assert_eq!(u8::from(ShredVariant::MerkleData(proof_size)), byte); + assert_eq!( + ShredType::from(ShredVariant::MerkleData(proof_size)), + ShredType::Data + ); + assert_eq!( + ShredVariant::try_from(byte).unwrap(), + ShredVariant::MerkleData(proof_size) + ); + let buf = bincode::serialize(&ShredVariant::MerkleData(proof_size)).unwrap(); + assert_eq!(buf, vec![byte]); + assert_eq!( + bincode::deserialize::(&[byte]).unwrap(), + ShredVariant::MerkleData(proof_size) + ); + } } #[test] diff --git a/ledger/src/shred/common.rs b/ledger/src/shred/common.rs index 910f7ecc63db01..3478001a0cbff5 100644 --- a/ledger/src/shred/common.rs +++ b/ledger/src/shred/common.rs @@ -4,6 +4,7 @@ macro_rules! dispatch { $vis fn $name(&self $(, $arg:$ty)?) $(-> $out)? { match self { Self::Legacy(shred) => shred.$name($($arg, )?), + Self::Merkle(shred) => shred.$name($($arg, )?), } } }; @@ -12,6 +13,7 @@ macro_rules! dispatch { $vis fn $name(self $(, $arg:$ty)?) $(-> $out)? { match self { Self::Legacy(shred) => shred.$name($($arg, )?), + Self::Merkle(shred) => shred.$name($($arg, )?), } } }; @@ -20,6 +22,7 @@ macro_rules! dispatch { $vis fn $name(&mut self $(, $arg:$ty)?) $(-> $out)? { match self { Self::Legacy(shred) => shred.$name($($arg, )?), + Self::Merkle(shred) => shred.$name($($arg, )?), } } } diff --git a/ledger/src/shred/merkle.rs b/ledger/src/shred/merkle.rs new file mode 100644 index 00000000000000..7eb3bf2d69fe16 --- /dev/null +++ b/ledger/src/shred/merkle.rs @@ -0,0 +1,528 @@ +use { + crate::shred::{ + common::impl_shred_common, + shred_code, shred_data, + traits::{Shred, ShredCode as ShredCodeTrait, ShredData as ShredDataTrait}, + CodingShredHeader, DataShredHeader, Error, ShredCommonHeader, ShredFlags, ShredVariant, + SIZE_OF_CODING_SHRED_HEADERS, SIZE_OF_COMMON_SHRED_HEADER, SIZE_OF_DATA_SHRED_HEADERS, + SIZE_OF_SIGNATURE, + }, + solana_perf::packet::deserialize_from_with_limit, + solana_sdk::{ + clock::Slot, + hash::{hashv, Hash}, + signature::Signature, + }, + static_assertions::const_assert_eq, + std::{ + io::{Cursor, Seek, SeekFrom}, + iter::repeat_with, + ops::Range, + }, +}; + +const_assert_eq!(SIZE_OF_MERKLE_ROOT, 20); +const SIZE_OF_MERKLE_ROOT: usize = std::mem::size_of::(); +const_assert_eq!(SIZE_OF_MERKLE_PROOF_ENTRY, 20); +const SIZE_OF_MERKLE_PROOF_ENTRY: usize = std::mem::size_of::(); +const_assert_eq!(ShredData::SIZE_OF_PAYLOAD, 1203); + +// Defense against second preimage attack: +// https://en.wikipedia.org/wiki/Merkle_tree#Second_preimage_attack +const MERKLE_HASH_PREFIX_LEAF: &[u8] = &[0x00]; +const MERKLE_HASH_PREFIX_NODE: &[u8] = &[0x01]; + +type MerkleRoot = MerkleProofEntry; +type MerkleProofEntry = [u8; 20]; + +// Layout: {common, data} headers | data buffer | merkle branch +// The slice past signature and before merkle branch is erasure coded. +// Same slice is hashed to generate merkle tree. +// The root of merkle tree is signed. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct ShredData { + common_header: ShredCommonHeader, + data_header: DataShredHeader, + merkle_branch: MerkleBranch, + payload: Vec, +} + +// Layout: {common, coding} headers | erasure coded shard | merkle branch +// The slice past signature and before merkle branch is hashed to generate +// merkle tree. The root of merkle tree is signed. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct ShredCode { + common_header: ShredCommonHeader, + coding_header: CodingShredHeader, + merkle_branch: MerkleBranch, + payload: Vec, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +struct MerkleBranch { + root: MerkleRoot, + proof: Vec, +} + +impl ShredData { + // proof_size is the number of proof entries in the merkle tree branch. + fn proof_size(&self) -> Result { + match self.common_header.shred_variant { + ShredVariant::MerkleData(proof_size) => Ok(proof_size), + _ => Err(Error::InvalidShredVariant), + } + } + + // Maximum size of ledger data that can be embedded in a data-shred. + // Also equal to: + // ShredCode::size_of_erasure_encoded_slice(proof_size).unwrap() + // - SIZE_OF_DATA_SHRED_HEADERS + // + SIZE_OF_SIGNATURE + pub(super) fn capacity(proof_size: u8) -> Result { + Self::SIZE_OF_PAYLOAD + .checked_sub( + SIZE_OF_DATA_SHRED_HEADERS + + SIZE_OF_MERKLE_ROOT + + usize::from(proof_size) * SIZE_OF_MERKLE_PROOF_ENTRY, + ) + .ok_or(Error::InvalidProofSize(proof_size)) + } + + pub(super) fn get_signed_message_range(proof_size: u8) -> Option> { + let data_buffer_size = Self::capacity(proof_size).ok()?; + let offset = SIZE_OF_DATA_SHRED_HEADERS + data_buffer_size; + Some(offset..offset + SIZE_OF_MERKLE_ROOT) + } + + fn merkle_tree_node(&self) -> Result { + let chunk = self.erasure_shard_as_slice()?; + Ok(hashv(&[MERKLE_HASH_PREFIX_LEAF, chunk])) + } + + fn verify_merkle_proof(&self) -> Result { + let node = self.merkle_tree_node()?; + let index = self.erasure_shard_index()?; + Ok(verify_merkle_proof(index, node, &self.merkle_branch)) + } +} + +impl ShredCode { + // proof_size is the number of proof entries in the merkle tree branch. + fn proof_size(&self) -> Result { + match self.common_header.shred_variant { + ShredVariant::MerkleCode(proof_size) => Ok(proof_size), + _ => Err(Error::InvalidShredVariant), + } + } + + // Size of the chunk of payload which will be erasure coded. + fn size_of_erasure_encoded_slice(proof_size: u8) -> Result { + // Merkle branch is generated and signed after coding shreds are + // generated. Coding shred headers cannot be erasure coded either. + Self::SIZE_OF_PAYLOAD + .checked_sub( + SIZE_OF_CODING_SHRED_HEADERS + + SIZE_OF_MERKLE_ROOT + + SIZE_OF_MERKLE_PROOF_ENTRY * usize::from(proof_size), + ) + .ok_or(Error::InvalidProofSize(proof_size)) + } + + fn merkle_tree_node(&self) -> Result { + let proof_size = self.proof_size()?; + let shard_size = Self::size_of_erasure_encoded_slice(proof_size)?; + let chunk = self + .payload + .get(SIZE_OF_SIGNATURE..SIZE_OF_CODING_SHRED_HEADERS + shard_size) + .ok_or(Error::InvalidPayloadSize(self.payload.len()))?; + Ok(hashv(&[MERKLE_HASH_PREFIX_LEAF, chunk])) + } + + fn verify_merkle_proof(&self) -> Result { + let node = self.merkle_tree_node()?; + let index = self.erasure_shard_index()?; + Ok(verify_merkle_proof(index, node, &self.merkle_branch)) + } + + pub(super) fn get_signed_message_range(proof_size: u8) -> Option> { + let offset = + SIZE_OF_CODING_SHRED_HEADERS + Self::size_of_erasure_encoded_slice(proof_size).ok()?; + Some(offset..offset + SIZE_OF_MERKLE_ROOT) + } + + pub(super) fn erasure_mismatch(&self, other: &ShredCode) -> bool { + shred_code::erasure_mismatch(self, other) + || self.merkle_branch.root != other.merkle_branch.root + || self.common_header.signature != other.common_header.signature + } +} + +impl Shred for ShredData { + impl_shred_common!(); + + // Also equal to: + // SIZE_OF_DATA_SHRED_HEADERS + // + ShredData::capacity(proof_size).unwrap() + // + SIZE_OF_MERKLE_ROOT + // + usize::from(proof_size) * SIZE_OF_MERKLE_PROOF_ENTRY + const SIZE_OF_PAYLOAD: usize = + ShredCode::SIZE_OF_PAYLOAD - SIZE_OF_CODING_SHRED_HEADERS + SIZE_OF_SIGNATURE; + + fn from_payload(mut payload: Vec) -> Result { + if payload.len() < Self::SIZE_OF_PAYLOAD { + return Err(Error::InvalidPayloadSize(payload.len())); + } + payload.truncate(Self::SIZE_OF_PAYLOAD); + let mut cursor = Cursor::new(&payload[..]); + let common_header: ShredCommonHeader = deserialize_from_with_limit(&mut cursor)?; + let proof_size = match common_header.shred_variant { + ShredVariant::MerkleData(proof_size) => proof_size, + _ => return Err(Error::InvalidShredVariant), + }; + let data_header = deserialize_from_with_limit(&mut cursor)?; + // Skip data buffer. + let data_buffer_size = Self::capacity(proof_size)?; + let data_buffer_size = i64::try_from(data_buffer_size).unwrap(); + cursor.seek(SeekFrom::Current(data_buffer_size))?; + // Deserialize merkle branch. + let root = deserialize_from_with_limit(&mut cursor)?; + let proof = repeat_with(|| deserialize_from_with_limit(&mut cursor)) + .take(usize::from(proof_size)) + .collect::>()?; + let merkle_branch = MerkleBranch { root, proof }; + let shred = Self { + common_header, + data_header, + merkle_branch, + payload, + }; + shred.sanitize().map(|_| shred) + } + + fn erasure_shard_index(&self) -> Result { + shred_data::erasure_shard_index(self).ok_or_else(|| { + let headers = Box::new((self.common_header, self.data_header)); + Error::InvalidErasureShardIndex(headers) + }) + } + + fn erasure_shard(self) -> Result, Error> { + if self.payload.len() != Self::SIZE_OF_PAYLOAD { + return Err(Error::InvalidPayloadSize(self.payload.len())); + } + let proof_size = self.proof_size()?; + let data_buffer_size = Self::capacity(proof_size)?; + let mut shard = self.payload; + shard.truncate(SIZE_OF_DATA_SHRED_HEADERS + data_buffer_size); + shard.drain(0..SIZE_OF_SIGNATURE); + Ok(shard) + } + + fn erasure_shard_as_slice(&self) -> Result<&[u8], Error> { + if self.payload.len() != Self::SIZE_OF_PAYLOAD { + return Err(Error::InvalidPayloadSize(self.payload.len())); + } + let proof_size = self.proof_size()?; + let data_buffer_size = Self::capacity(proof_size)?; + self.payload + .get(SIZE_OF_SIGNATURE..SIZE_OF_DATA_SHRED_HEADERS + data_buffer_size) + .ok_or(Error::InvalidPayloadSize(self.payload.len())) + } + + fn sanitize(&self) -> Result<(), Error> { + match self.common_header.shred_variant { + ShredVariant::MerkleData(proof_size) => { + if self.merkle_branch.proof.len() != usize::from(proof_size) { + return Err(Error::InvalidProofSize(proof_size)); + } + } + _ => return Err(Error::InvalidShredVariant), + } + if !self.verify_merkle_proof()? { + return Err(Error::InvalidMerkleProof); + } + shred_data::sanitize(self) + } + + fn signed_message(&self) -> &[u8] { + self.merkle_branch.root.as_ref() + } +} + +impl Shred for ShredCode { + impl_shred_common!(); + const SIZE_OF_PAYLOAD: usize = shred_code::ShredCode::SIZE_OF_PAYLOAD; + + fn from_payload(mut payload: Vec) -> Result { + let mut cursor = Cursor::new(&payload[..]); + let common_header: ShredCommonHeader = deserialize_from_with_limit(&mut cursor)?; + let proof_size = match common_header.shred_variant { + ShredVariant::MerkleCode(proof_size) => proof_size, + _ => return Err(Error::InvalidShredVariant), + }; + let coding_header = deserialize_from_with_limit(&mut cursor)?; + // Skip erasure code shard. + let shard_size = Self::size_of_erasure_encoded_slice(proof_size)?; + let shard_size = i64::try_from(shard_size).unwrap(); + cursor.seek(SeekFrom::Current(shard_size))?; + // Deserialize merkle branch. + let root = deserialize_from_with_limit(&mut cursor)?; + let proof = repeat_with(|| deserialize_from_with_limit(&mut cursor)) + .take(usize::from(proof_size)) + .collect::>()?; + let merkle_branch = MerkleBranch { root, proof }; + // see: https://github.com/solana-labs/solana/pull/10109 + payload.truncate(Self::SIZE_OF_PAYLOAD); + let shred = Self { + common_header, + coding_header, + merkle_branch, + payload, + }; + shred.sanitize().map(|_| shred) + } + + fn erasure_shard_index(&self) -> Result { + shred_code::erasure_shard_index(self).ok_or_else(|| { + let headers = Box::new((self.common_header, self.coding_header)); + Error::InvalidErasureShardIndex(headers) + }) + } + + fn erasure_shard(self) -> Result, Error> { + if self.payload.len() != Self::SIZE_OF_PAYLOAD { + return Err(Error::InvalidPayloadSize(self.payload.len())); + } + let proof_size = self.proof_size()?; + let shard_size = Self::size_of_erasure_encoded_slice(proof_size)?; + let mut shard = self.payload; + shard.drain(..SIZE_OF_CODING_SHRED_HEADERS); + shard.truncate(shard_size); + Ok(shard) + } + + fn erasure_shard_as_slice(&self) -> Result<&[u8], Error> { + if self.payload.len() != Self::SIZE_OF_PAYLOAD { + return Err(Error::InvalidPayloadSize(self.payload.len())); + } + let proof_size = self.proof_size()?; + let shard_size = Self::size_of_erasure_encoded_slice(proof_size)?; + self.payload + .get(SIZE_OF_CODING_SHRED_HEADERS..SIZE_OF_CODING_SHRED_HEADERS + shard_size) + .ok_or(Error::InvalidPayloadSize(self.payload.len())) + } + + fn sanitize(&self) -> Result<(), Error> { + match self.common_header.shred_variant { + ShredVariant::MerkleCode(proof_size) => { + if self.merkle_branch.proof.len() != usize::from(proof_size) { + return Err(Error::InvalidProofSize(proof_size)); + } + } + _ => return Err(Error::InvalidShredVariant), + } + if !self.verify_merkle_proof()? { + return Err(Error::InvalidMerkleProof); + } + shred_code::sanitize(self) + } + + fn signed_message(&self) -> &[u8] { + self.merkle_branch.root.as_ref() + } +} + +impl ShredDataTrait for ShredData { + #[inline] + fn data_header(&self) -> &DataShredHeader { + &self.data_header + } + + fn data(&self) -> Result<&[u8], Error> { + let proof_size = self.proof_size()?; + let data_buffer_size = Self::capacity(proof_size)?; + let size = usize::from(self.data_header.size); + if size > self.payload.len() + || size < SIZE_OF_DATA_SHRED_HEADERS + || size > SIZE_OF_DATA_SHRED_HEADERS + data_buffer_size + { + return Err(Error::InvalidDataSize { + size: self.data_header.size, + payload: self.payload.len(), + }); + } + Ok(&self.payload[SIZE_OF_DATA_SHRED_HEADERS..size]) + } + + // Only for tests. + fn set_last_in_slot(&mut self) { + self.data_header.flags |= ShredFlags::LAST_SHRED_IN_SLOT; + let buffer = &mut self.payload[SIZE_OF_COMMON_SHRED_HEADER..]; + bincode::serialize_into(buffer, &self.data_header).unwrap(); + } +} + +impl ShredCodeTrait for ShredCode { + #[inline] + fn coding_header(&self) -> &CodingShredHeader { + &self.coding_header + } +} + +// Obtains parent's hash by joining two sibiling nodes in merkle tree. +fn join_nodes, T: AsRef<[u8]>>(node: S, other: T) -> Hash { + let node = &node.as_ref()[..SIZE_OF_MERKLE_PROOF_ENTRY]; + let other = &other.as_ref()[..SIZE_OF_MERKLE_PROOF_ENTRY]; + hashv(&[MERKLE_HASH_PREFIX_NODE, node, other]) +} + +fn verify_merkle_proof(index: usize, node: Hash, merkle_branch: &MerkleBranch) -> bool { + let proof = merkle_branch.proof.iter(); + let (index, root) = proof.fold((index, node), |(index, node), other| { + let parent = if index % 2 == 0 { + join_nodes(node, other) + } else { + join_nodes(other, node) + }; + (index >> 1, parent) + }); + let root = &root.as_ref()[..SIZE_OF_MERKLE_ROOT]; + (index, root) == (0usize, &merkle_branch.root[..]) +} + +#[cfg(test)] +fn make_merkle_tree(mut nodes: Vec) -> Vec { + let mut size = nodes.len(); + while size > 1 { + let offset = nodes.len() - size; + for index in (offset..offset + size).step_by(2) { + let node = &nodes[index]; + let other = &nodes[(index + 1).min(offset + size - 1)]; + let parent = join_nodes(node, other); + nodes.push(parent); + } + size = nodes.len() - offset - size; + } + nodes +} + +#[cfg(test)] +fn make_merkle_branch( + mut index: usize, // leaf index ~ shred's erasure shard index. + mut size: usize, // number of leaves ~ erasure batch size. + tree: &[Hash], +) -> Option { + if index >= size { + return None; + } + let mut offset = 0; + let mut proof = Vec::::new(); + while size > 1 { + let node = tree.get(offset + (index ^ 1).min(size - 1))?; + let entry = &node.as_ref()[..SIZE_OF_MERKLE_PROOF_ENTRY]; + proof.push(MerkleProofEntry::try_from(entry).unwrap()); + offset += size; + size = (size + 1) >> 1; + index >>= 1; + } + if offset + 1 != tree.len() { + return None; + } + let root = &tree.last()?.as_ref()[..SIZE_OF_MERKLE_ROOT]; + let root = MerkleRoot::try_from(root).unwrap(); + Some(MerkleBranch { root, proof }) +} + +#[cfg(test)] +mod test { + use {super::*, rand::Rng, std::iter::repeat_with}; + + // Total size of a data shred including headers and merkle branch. + fn shred_data_size_of_payload(proof_size: u8) -> usize { + SIZE_OF_DATA_SHRED_HEADERS + + ShredData::capacity(proof_size).unwrap() + + SIZE_OF_MERKLE_ROOT + + usize::from(proof_size) * SIZE_OF_MERKLE_PROOF_ENTRY + } + + // Merkle branch is generated and signed after coding shreds are generated. + // All payload excluding merkle branch and the signature are erasure coded. + // Therefore the data capacity is equal to erasure encoded shard size minus + // size of erasure encoded header. + fn shred_data_capacity(proof_size: u8) -> usize { + const SIZE_OF_ERASURE_ENCODED_HEADER: usize = + SIZE_OF_DATA_SHRED_HEADERS - SIZE_OF_SIGNATURE; + ShredCode::size_of_erasure_encoded_slice(proof_size).unwrap() + - SIZE_OF_ERASURE_ENCODED_HEADER + } + + fn shred_data_size_of_erasure_encoded_slice(proof_size: u8) -> usize { + ShredData::SIZE_OF_PAYLOAD + - SIZE_OF_SIGNATURE + - SIZE_OF_MERKLE_ROOT + - usize::from(proof_size) * SIZE_OF_MERKLE_PROOF_ENTRY + } + + #[test] + fn test_shred_data_size_of_payload() { + for proof_size in 0..0x15 { + assert_eq!( + ShredData::SIZE_OF_PAYLOAD, + shred_data_size_of_payload(proof_size) + ); + } + } + + #[test] + fn test_shred_data_capacity() { + for proof_size in 0..0x15 { + assert_eq!( + ShredData::capacity(proof_size).unwrap(), + shred_data_capacity(proof_size) + ); + } + } + + #[test] + fn test_size_of_erasure_encoded_slice() { + for proof_size in 0..0x15 { + assert_eq!( + ShredCode::size_of_erasure_encoded_slice(proof_size).unwrap(), + shred_data_size_of_erasure_encoded_slice(proof_size), + ); + } + } + + #[test] + fn test_merkle_proof_entry_from_hash() { + let mut rng = rand::thread_rng(); + let bytes: [u8; 32] = rng.gen(); + let hash = Hash::from(bytes); + let entry = &hash.as_ref()[..SIZE_OF_MERKLE_PROOF_ENTRY]; + let entry = MerkleProofEntry::try_from(entry).unwrap(); + assert_eq!(entry, &bytes[..SIZE_OF_MERKLE_PROOF_ENTRY]); + } + + fn run_merkle_tree_round_trip(size: usize) { + let mut rng = rand::thread_rng(); + let nodes = repeat_with(|| rng.gen::<[u8; 32]>()).map(Hash::from); + let nodes: Vec<_> = nodes.take(size).collect(); + let tree = make_merkle_tree(nodes.clone()); + for index in 0..size { + let branch = make_merkle_branch(index, size, &tree).unwrap(); + let root = &tree.last().unwrap().as_ref()[..SIZE_OF_MERKLE_ROOT]; + assert_eq!(&branch.root, root); + assert!(verify_merkle_proof(index, nodes[index], &branch)); + for i in (0..size).filter(|&i| i != index) { + assert!(!verify_merkle_proof(i, nodes[i], &branch)); + } + } + } + + #[test] + fn test_merkle_tree_round_trip() { + for size in [1, 2, 3, 4, 5, 6, 7, 8, 9, 19, 37, 64, 79] { + run_merkle_tree_round_trip(size); + } + } +} diff --git a/ledger/src/shred/shred_code.rs b/ledger/src/shred/shred_code.rs index 77a079cff46732..884c058482f6b8 100644 --- a/ledger/src/shred/shred_code.rs +++ b/ledger/src/shred/shred_code.rs @@ -1,7 +1,7 @@ use { crate::shred::{ common::dispatch, - legacy, + legacy, merkle, traits::{Shred, ShredCode as ShredCodeTrait}, CodingShredHeader, Error, ShredCommonHeader, MAX_DATA_SHREDS_PER_FEC_BLOCK, SIZE_OF_NONCE, }, @@ -14,6 +14,7 @@ const_assert_eq!(ShredCode::SIZE_OF_PAYLOAD, 1228); #[derive(Clone, Debug, Eq, PartialEq)] pub enum ShredCode { Legacy(legacy::ShredCode), + Merkle(merkle::ShredCode), } impl ShredCode { @@ -70,6 +71,8 @@ impl ShredCode { pub(super) fn erasure_mismatch(&self, other: &ShredCode) -> bool { match (self, other) { (Self::Legacy(shred), Self::Legacy(other)) => erasure_mismatch(shred, other), + (Self::Merkle(shred), Self::Merkle(other)) => shred.erasure_mismatch(other), + _ => true, } } } @@ -80,6 +83,12 @@ impl From for ShredCode { } } +impl From for ShredCode { + fn from(shred: merkle::ShredCode) -> Self { + Self::Merkle(shred) + } +} + #[inline] pub(super) fn erasure_shard_index(shred: &T) -> Option { // Assert that the last shred index in the erasure set does not diff --git a/ledger/src/shred/shred_data.rs b/ledger/src/shred/shred_data.rs index ca91bfa85c35a9..47728fa9af37d6 100644 --- a/ledger/src/shred/shred_data.rs +++ b/ledger/src/shred/shred_data.rs @@ -2,7 +2,7 @@ use { crate::shred::{ self, common::dispatch, - legacy, + legacy, merkle, traits::{Shred as _, ShredData as ShredDataTrait}, DataShredHeader, Error, ShredCommonHeader, ShredFlags, ShredVariant, MAX_DATA_SHREDS_PER_SLOT, @@ -13,6 +13,7 @@ use { #[derive(Clone, Debug, Eq, PartialEq)] pub enum ShredData { Legacy(legacy::ShredData), + Merkle(merkle::ShredData), } impl ShredData { @@ -77,20 +78,32 @@ impl ShredData { pub(super) fn bytes_to_store(&self) -> &[u8] { match self { Self::Legacy(shred) => shred.bytes_to_store(), + Self::Merkle(shred) => shred.payload(), } } // Possibly zero pads bytes stored in blockstore. pub(crate) fn resize_stored_shred(shred: Vec) -> Result, Error> { match shred::layout::get_shred_variant(&shred)? { - ShredVariant::LegacyCode => Err(Error::InvalidShredType), + ShredVariant::LegacyCode | ShredVariant::MerkleCode(_) => Err(Error::InvalidShredType), + ShredVariant::MerkleData(_) => { + if shred.len() != merkle::ShredData::SIZE_OF_PAYLOAD { + return Err(Error::InvalidPayloadSize(shred.len())); + } + Ok(shred) + } ShredVariant::LegacyData => legacy::ShredData::resize_stored_shred(shred), } } // Maximum size of ledger data that can be embedded in a data-shred. - pub(crate) fn capacity() -> Result { - Ok(legacy::ShredData::CAPACITY) + // merkle_proof_size is the number of proof entries in the merkle tree + // branch. None indicates a legacy data-shred. + pub(crate) fn capacity(merkle_proof_size: Option) -> Result { + match merkle_proof_size { + None => Ok(legacy::ShredData::CAPACITY), + Some(proof_size) => merkle::ShredData::capacity(proof_size), + } } } @@ -100,6 +113,12 @@ impl From for ShredData { } } +impl From for ShredData { + fn from(shred: merkle::ShredData) -> Self { + Self::Merkle(shred) + } +} + #[inline] pub(super) fn erasure_shard_index(shred: &T) -> Option { let fec_set_index = shred.common_header().fec_set_index; diff --git a/ledger/src/shredder.rs b/ledger/src/shredder.rs index 0fbb6262ea99ca..0f5ddb6598a3be 100644 --- a/ledger/src/shredder.rs +++ b/ledger/src/shredder.rs @@ -109,7 +109,7 @@ impl Shredder { serialize_time.stop(); let mut gen_data_time = Measure::start("shred_gen_data_time"); - let data_buffer_size = ShredData::capacity().unwrap(); + let data_buffer_size = ShredData::capacity(/*merkle_proof_size:*/ None).unwrap(); // Integer division to ensure we have enough shreds to fit all the data let num_shreds = (serialized_shreds.len() + data_buffer_size - 1) / data_buffer_size; let last_shred_index = next_shred_index + num_shreds as u32 - 1; @@ -341,7 +341,7 @@ impl Shredder { // For backward compatibility. This is needed when the data shred // payload is None, so that deserializing to Vec results in // an empty vector. - let data_buffer_size = ShredData::capacity().unwrap(); + let data_buffer_size = ShredData::capacity(/*merkle_proof_size:*/ None).unwrap(); Ok(vec![0u8; data_buffer_size]) } else { Ok(data) @@ -405,7 +405,7 @@ mod tests { let size = serialized_size(&entries).unwrap() as usize; // Integer division to ensure we have enough shreds to fit all the data - let data_buffer_size = ShredData::capacity().unwrap(); + let data_buffer_size = ShredData::capacity(/*merkle_proof_size:*/ None).unwrap(); let num_expected_data_shreds = (size + data_buffer_size - 1) / data_buffer_size; let num_expected_coding_shreds = (2 * MAX_DATA_SHREDS_PER_FEC_BLOCK as usize) .saturating_sub(num_expected_data_shreds) @@ -575,7 +575,7 @@ mod tests { let keypair = Arc::new(Keypair::new()); let shredder = Shredder::new(slot, slot - 5, 0, 0).unwrap(); // Create enough entries to make > 1 shred - let data_buffer_size = ShredData::capacity().unwrap(); + let data_buffer_size = ShredData::capacity(/*merkle_proof_size:*/ None).unwrap(); let num_entries = max_ticks_per_n_shreds(1, Some(data_buffer_size)) + 1; let entries: Vec<_> = (0..num_entries) .map(|_| { @@ -624,7 +624,7 @@ mod tests { let entry = Entry::new(&Hash::default(), 1, vec![tx0]); let num_data_shreds: usize = 5; - let data_buffer_size = ShredData::capacity().unwrap(); + let data_buffer_size = ShredData::capacity(/*merkle_proof_size:*/ None).unwrap(); let num_entries = max_entries_per_n_shred(&entry, num_data_shreds as u64, Some(data_buffer_size)); let entries: Vec<_> = (0..num_entries)