From 23efe497f8bb2a88dc1cee4806782c480ee227fd Mon Sep 17 00:00:00 2001 From: Simon Warta Date: Tue, 2 Feb 2021 08:26:09 +0100 Subject: [PATCH 1/5] Add section encoding --- packages/vm/src/imports.rs | 63 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/packages/vm/src/imports.rs b/packages/vm/src/imports.rs index 3303bb7e6c..c3e9feee1c 100644 --- a/packages/vm/src/imports.rs +++ b/packages/vm/src/imports.rs @@ -308,6 +308,33 @@ fn do_next( write_to_contract::(env, &out_data) } +/// Encodes multiple sections of data into one vector. +/// +/// Each section is suffixed by a section length encoded as big endian uint32. +/// Using suffixes instead of prefixes allows reading sections in reverse order, +/// such that the first element does not need to be re-allocated if the contract's +/// data structure supports truncation (such as a Rust vector). +/// +/// The resulting data looks like this: +/// +/// ```ignore +/// section1 || section1_len || section2 || section2_len || section3 || section3_len || … +/// ``` +#[allow(dead_code)] +fn encode_sections(sections: &[Vec]) -> VmResult> { + let mut out_len: usize = sections.iter().map(|section| section.len()).sum(); + out_len += 4 * sections.len(); + let mut out_data = Vec::with_capacity(out_len); + for section in sections { + let section_len = to_u32(section.len())?.to_be_bytes(); + out_data.extend(section); + out_data.extend_from_slice(§ion_len); + } + debug_assert_eq!(out_data.len(), out_len); + debug_assert_eq!(out_data.capacity(), out_len); + Ok(out_data) +} + #[cfg(test)] mod tests { use super::*; @@ -1164,4 +1191,40 @@ mod tests { e => panic!("Unexpected error: {:?}", e), } } + + #[test] + fn encode_sections_works_for_empty_sections() { + let enc = encode_sections(&[]).unwrap(); + assert_eq!(enc, b"" as &[u8]); + let enc = encode_sections(&[vec![]]).unwrap(); + assert_eq!(enc, b"\0\0\0\0" as &[u8]); + let enc = encode_sections(&[vec![], vec![]]).unwrap(); + assert_eq!(enc, b"\0\0\0\0\0\0\0\0" as &[u8]); + let enc = encode_sections(&[vec![], vec![], vec![]]).unwrap(); + assert_eq!(enc, b"\0\0\0\0\0\0\0\0\0\0\0\0" as &[u8]); + } + + #[test] + fn encode_sections_works_for_one_element() { + let enc = encode_sections(&[]).unwrap(); + assert_eq!(enc, b"" as &[u8]); + let enc = encode_sections(&[vec![0xAA]]).unwrap(); + assert_eq!(enc, b"\xAA\0\0\0\x01" as &[u8]); + let enc = encode_sections(&[vec![0xAA, 0xBB]]).unwrap(); + assert_eq!(enc, b"\xAA\xBB\0\0\0\x02" as &[u8]); + let enc = encode_sections(&[vec![0x9D; 277]]).unwrap(); + assert_eq!(enc, b"\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\0\0\x01\x15" as &[u8]); + } + + #[test] + fn encode_sections_works_for_multiple_elements() { + let enc = encode_sections(&[vec![0xAA]]).unwrap(); + assert_eq!(enc, b"\xAA\0\0\0\x01" as &[u8]); + let enc = encode_sections(&[vec![0xAA], vec![0xDE, 0xDE]]).unwrap(); + assert_eq!(enc, b"\xAA\0\0\0\x01\xDE\xDE\0\0\0\x02" as &[u8]); + let enc = encode_sections(&[vec![0xAA], vec![0xDE, 0xDE], vec![]]).unwrap(); + assert_eq!(enc, b"\xAA\0\0\0\x01\xDE\xDE\0\0\0\x02\0\0\0\0" as &[u8]); + let enc = encode_sections(&[vec![0xAA], vec![0xDE, 0xDE], vec![], vec![0xFF; 19]]).unwrap(); + assert_eq!(enc, b"\xAA\0\0\0\x01\xDE\xDE\0\0\0\x02\0\0\0\0\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\0\0\0\x13" as &[u8]); + } } From f93200f231c170386fb5f7ae7daf9087eddc476e Mon Sep 17 00:00:00 2001 From: Simon Warta Date: Tue, 2 Feb 2021 10:05:39 +0100 Subject: [PATCH 2/5] Add sections decoder --- packages/std/src/lib.rs | 1 + packages/std/src/sections.rs | 64 ++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 packages/std/src/sections.rs diff --git a/packages/std/src/lib.rs b/packages/std/src/lib.rs index 84c2094e8a..ccf2279aa5 100644 --- a/packages/std/src/lib.rs +++ b/packages/std/src/lib.rs @@ -15,6 +15,7 @@ mod iterator; mod math; mod query; mod results; +mod sections; mod serde; mod storage; mod traits; diff --git a/packages/std/src/sections.rs b/packages/std/src/sections.rs new file mode 100644 index 0000000000..f63ac76b66 --- /dev/null +++ b/packages/std/src/sections.rs @@ -0,0 +1,64 @@ +/// A sections decoder for the special case of two elements +#[allow(dead_code)] // used in Wasm and tests only +pub fn decode_sections2(data: Vec) -> (Vec, Vec) { + let section2_len: usize = if data.len() >= 4 { + u32::from_be_bytes([ + data[data.len() - 4], + data[data.len() - 3], + data[data.len() - 2], + data[data.len() - 1], + ]) as usize + } else { + panic!("Cannot read section2 length"); + }; + + let section1_len_end = data.len() - 4 - section2_len; + let section1_len: usize = if section1_len_end >= 4 { + u32::from_be_bytes([ + data[section1_len_end - 4], + data[section1_len_end - 3], + data[section1_len_end - 2], + data[section1_len_end - 1], + ]) as usize + } else { + panic!("Cannot read section1 length"); + }; + + if data.len() != 4 + section1_len + 4 + section2_len { + panic!( + "Invalid data length: {}, {}, {}", + data.len(), + section1_len, + section2_len + ); + } + + let mut first = data; + let mut second = first.split_off(section1_len_end); + second.truncate(section2_len); + first.truncate(section1_len); + (first, second) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn decode_sections2_works() { + let data = b"\xAA\0\0\0\x01\xBB\xCC\0\0\0\x02".to_vec(); + assert_eq!(decode_sections2(data), (vec![0xAA], vec![0xBB, 0xCC])); + + let data = b"\xDE\xEF\x62\0\0\0\x03\0\0\0\0".to_vec(); + assert_eq!(decode_sections2(data), (vec![0xDE, 0xEF, 0x62], vec![])); + + let data = b"\0\0\0\0\xDE\xEF\x62\0\0\0\x03".to_vec(); + assert_eq!(decode_sections2(data), (vec![], vec![0xDE, 0xEF, 0x62])); + + let data = b"\0\0\0\0\0\0\0\0".to_vec(); + assert_eq!(decode_sections2(data), (vec![], vec![])); + + let data = b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\0\0\0\x13\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\0\0\x01\x15".to_vec(); + assert_eq!(decode_sections2(data), (vec![0xFF; 19], vec![0x9D; 277])); + } +} From 6bdb305bf2149cdf3fb0920295ffb0285150e9fc Mon Sep 17 00:00:00 2001 From: Simon Warta Date: Tue, 2 Feb 2021 10:26:39 +0100 Subject: [PATCH 3/5] Use sections encoding for db_next --- CHANGELOG.md | 3 +++ packages/std/src/imports.rs | 23 +++++++---------------- packages/vm/src/imports.rs | 14 ++++---------- 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65bf51b857..942f21a987 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,9 @@ and this project adheres to - all: The `query` enpoint is now optional. It is still highly recommended to expose it an almost any use case though. +- all: Change the encoding of the key/value region of the `db_next` import to a + more generic encoding that supports an arbitrary number of sections. This + encoding can then be reused for other multi value regions. - cosmwasm-std: Remove `from_address` from `BankMsg::Send`, as it always sends from the contract address, and this is consistent with other `CosmosMsg` variants. diff --git a/packages/std/src/imports.rs b/packages/std/src/imports.rs index 8222058ad7..e7665775bd 100644 --- a/packages/std/src/imports.rs +++ b/packages/std/src/imports.rs @@ -5,6 +5,7 @@ use crate::binary::Binary; use crate::errors::{StdError, StdResult, SystemError}; use crate::memory::{alloc, build_region, consume_region, Region}; use crate::results::SystemResult; +use crate::sections::decode_sections2; use crate::serde::from_slice; use crate::traits::{Api, Querier, QuerierResult, Storage}; #[cfg(feature = "iterator")] @@ -120,23 +121,13 @@ impl Iterator for ExternalIterator { fn next(&mut self) -> Option { let next_result = unsafe { db_next(self.iterator_id) }; let kv_region_ptr = next_result as *mut Region; - let mut kv = unsafe { consume_region(kv_region_ptr) }; - - // The KV region uses the format value || key || keylen, where keylen is a fixed size big endian u32 value - let keylen = u32::from_be_bytes([ - kv[kv.len() - 4], - kv[kv.len() - 3], - kv[kv.len() - 2], - kv[kv.len() - 1], - ]) as usize; - if keylen == 0 { - return None; + let kv = unsafe { consume_region(kv_region_ptr) }; + let (key, value) = decode_sections2(kv); + if key.len() == 0 { + None + } else { + Some((key, value)) } - - kv.truncate(kv.len() - 4); - let key = kv.split_off(kv.len() - keylen); - let value = kv; - Some((key, value)) } } diff --git a/packages/vm/src/imports.rs b/packages/vm/src/imports.rs index c3e9feee1c..852fad424c 100644 --- a/packages/vm/src/imports.rs +++ b/packages/vm/src/imports.rs @@ -298,13 +298,7 @@ fn do_next( // Empty key will later be treated as _no more element_. let (key, value) = result?.unwrap_or_else(|| (Vec::::new(), Vec::::new())); - // Build value || key || keylen - let keylen_bytes = to_u32(key.len())?.to_be_bytes(); - let mut out_data = value; - out_data.reserve(key.len() + 4); - out_data.extend(key); - out_data.extend_from_slice(&keylen_bytes); - + let out_data = encode_sections(&[key, value])?; write_to_contract::(env, &out_data) } @@ -1158,19 +1152,19 @@ mod tests { let kv_region_ptr = do_next::(&env, id).unwrap(); assert_eq!( force_read(&env, kv_region_ptr), - [VALUE1, KEY1, b"\0\0\0\x03"].concat() + [KEY1, b"\0\0\0\x03", VALUE1, b"\0\0\0\x06"].concat() ); // Entry 2 let kv_region_ptr = do_next::(&env, id).unwrap(); assert_eq!( force_read(&env, kv_region_ptr), - [VALUE2, KEY2, b"\0\0\0\x04"].concat() + [KEY2, b"\0\0\0\x04", VALUE2, b"\0\0\0\x05"].concat() ); // End let kv_region_ptr = do_next::(&env, id).unwrap(); - assert_eq!(force_read(&env, kv_region_ptr), b"\0\0\0\0"); + assert_eq!(force_read(&env, kv_region_ptr), b"\0\0\0\0\0\0\0\0"); // API makes no guarantees for value_ptr in this case } From 15e4c09980403d9363b79be1201c6d1f95db5d9d Mon Sep 17 00:00:00 2001 From: Simon Warta Date: Tue, 2 Feb 2021 18:29:20 +0100 Subject: [PATCH 4/5] Add test decode_sections2_preserved_first_vector --- packages/std/src/sections.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/packages/std/src/sections.rs b/packages/std/src/sections.rs index f63ac76b66..f91978f149 100644 --- a/packages/std/src/sections.rs +++ b/packages/std/src/sections.rs @@ -61,4 +61,20 @@ mod tests { let data = b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\0\0\0\x13\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\0\0\x01\x15".to_vec(); assert_eq!(decode_sections2(data), (vec![0xFF; 19], vec![0x9D; 277])); } + + #[test] + fn decode_sections2_preserved_first_vector() { + let original = b"\xAA\0\0\0\x01\xBB\xCC\0\0\0\x02".to_vec(); + let original_capacity = original.capacity(); + let original_ptr = original.as_ptr(); + let (first, second) = decode_sections2(original); + + // This is not copied + assert_eq!(first.capacity(), original_capacity); + assert_eq!(first.as_ptr(), original_ptr); + + // This is a copy + assert_ne!(second.capacity(), original_capacity); + assert_ne!(second.as_ptr(), original_ptr); + } } From 6a4eafdf5522d47c4bed441b2a2c96dfb60806f3 Mon Sep 17 00:00:00 2001 From: Simon Warta Date: Tue, 2 Feb 2021 18:30:20 +0100 Subject: [PATCH 5/5] Pull out split_tail --- packages/std/src/sections.rs | 51 ++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/packages/std/src/sections.rs b/packages/std/src/sections.rs index f91978f149..962e6bcbb0 100644 --- a/packages/std/src/sections.rs +++ b/packages/std/src/sections.rs @@ -1,7 +1,21 @@ /// A sections decoder for the special case of two elements #[allow(dead_code)] // used in Wasm and tests only pub fn decode_sections2(data: Vec) -> (Vec, Vec) { - let section2_len: usize = if data.len() >= 4 { + let (rest, second) = split_tail(data); + let (_, first) = split_tail(rest); + (first, second) +} + +/// Splits data into the last section ("tail") and the rest. +/// The tail's length information is cut off, such that it is ready to use. +/// The rest is basically unparsed and contails the lengths of the remaining sections. +/// +/// While the tail is copied into a new vector, the rest is only truncated such that +/// no re-allocation is necessary. +/// +/// If `data` contains one section only, `data` is moved into the tail entirely +fn split_tail(data: Vec) -> (Vec, Vec) { + let tail_len: usize = if data.len() >= 4 { u32::from_be_bytes([ data[data.len() - 4], data[data.len() - 3], @@ -9,35 +23,20 @@ pub fn decode_sections2(data: Vec) -> (Vec, Vec) { data[data.len() - 1], ]) as usize } else { - panic!("Cannot read section2 length"); + panic!("Cannot read section length"); }; + let rest_len_end = data.len() - 4 - tail_len; - let section1_len_end = data.len() - 4 - section2_len; - let section1_len: usize = if section1_len_end >= 4 { - u32::from_be_bytes([ - data[section1_len_end - 4], - data[section1_len_end - 3], - data[section1_len_end - 2], - data[section1_len_end - 1], - ]) as usize + let (rest, mut tail) = if rest_len_end == 0 { + // i.e. all data is the tail + (Vec::new(), data) } else { - panic!("Cannot read section1 length"); + let mut rest = data; + let tail = rest.split_off(rest_len_end); + (rest, tail) }; - - if data.len() != 4 + section1_len + 4 + section2_len { - panic!( - "Invalid data length: {}, {}, {}", - data.len(), - section1_len, - section2_len - ); - } - - let mut first = data; - let mut second = first.split_off(section1_len_end); - second.truncate(section2_len); - first.truncate(section1_len); - (first, second) + tail.truncate(tail_len); // cut off length + (rest, tail) } #[cfg(test)]