Skip to content

Create sections encoding for multi-value regions #760

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Feb 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ and this project adheres to

- all: The `query` enpoint is now optional. It is still highly recommended to
expose it an almost any use case though.
- all: Change the encoding of the key/value region of the `db_next` import to a
more generic encoding that supports an arbitrary number of sections. This
encoding can then be reused for other multi value regions.
- cosmwasm-std: Remove `from_address` from `BankMsg::Send`, as it always sends
from the contract address, and this is consistent with other `CosmosMsg`
variants.
Expand Down
23 changes: 7 additions & 16 deletions packages/std/src/imports.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::binary::Binary;
use crate::errors::{StdError, StdResult, SystemError};
use crate::memory::{alloc, build_region, consume_region, Region};
use crate::results::SystemResult;
use crate::sections::decode_sections2;
use crate::serde::from_slice;
use crate::traits::{Api, Querier, QuerierResult, Storage};
#[cfg(feature = "iterator")]
Expand Down Expand Up @@ -120,23 +121,13 @@ impl Iterator for ExternalIterator {
fn next(&mut self) -> Option<Self::Item> {
let next_result = unsafe { db_next(self.iterator_id) };
let kv_region_ptr = next_result as *mut Region;
let mut kv = unsafe { consume_region(kv_region_ptr) };

// The KV region uses the format value || key || keylen, where keylen is a fixed size big endian u32 value
let keylen = u32::from_be_bytes([
kv[kv.len() - 4],
kv[kv.len() - 3],
kv[kv.len() - 2],
kv[kv.len() - 1],
]) as usize;
if keylen == 0 {
return None;
let kv = unsafe { consume_region(kv_region_ptr) };
let (key, value) = decode_sections2(kv);
if key.len() == 0 {
None
} else {
Some((key, value))
}

kv.truncate(kv.len() - 4);
let key = kv.split_off(kv.len() - keylen);
let value = kv;
Some((key, value))
}
}

Expand Down
1 change: 1 addition & 0 deletions packages/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ mod iterator;
mod math;
mod query;
mod results;
mod sections;
mod serde;
mod storage;
mod traits;
Expand Down
79 changes: 79 additions & 0 deletions packages/std/src/sections.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/// A sections decoder for the special case of two elements
#[allow(dead_code)] // used in Wasm and tests only
pub fn decode_sections2(data: Vec<u8>) -> (Vec<u8>, Vec<u8>) {
let (rest, second) = split_tail(data);
let (_, first) = split_tail(rest);
(first, second)
}

/// Splits data into the last section ("tail") and the rest.
/// The tail's length information is cut off, such that it is ready to use.
/// The rest is basically unparsed and contails the lengths of the remaining sections.
///
/// While the tail is copied into a new vector, the rest is only truncated such that
/// no re-allocation is necessary.
///
/// If `data` contains one section only, `data` is moved into the tail entirely
fn split_tail(data: Vec<u8>) -> (Vec<u8>, Vec<u8>) {
let tail_len: usize = if data.len() >= 4 {
u32::from_be_bytes([
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not encode with 2 bytes as now?
We expect/support > 64KB passed as a value for each section?

Although I guess it is a trivial cost to support this and allows us to never worry about size

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather not make unnecessary assumptions on data length.

data[data.len() - 4],
data[data.len() - 3],
data[data.len() - 2],
data[data.len() - 1],
]) as usize
} else {
panic!("Cannot read section length");
};
let rest_len_end = data.len() - 4 - tail_len;

let (rest, mut tail) = if rest_len_end == 0 {
// i.e. all data is the tail
(Vec::new(), data)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and this is 0 alloc, right? so you don't even need to handle the head case differently and still just as efficient. nice.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Jupp. From Vec::new:

The vector will not allocate until elements are pushed onto it.

More general, a vector only allocates heap when capacity > 0.

} else {
let mut rest = data;
let tail = rest.split_off(rest_len_end);
(rest, tail)
};
tail.truncate(tail_len); // cut off length
(rest, tail)
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn decode_sections2_works() {
let data = b"\xAA\0\0\0\x01\xBB\xCC\0\0\0\x02".to_vec();
assert_eq!(decode_sections2(data), (vec![0xAA], vec![0xBB, 0xCC]));

let data = b"\xDE\xEF\x62\0\0\0\x03\0\0\0\0".to_vec();
assert_eq!(decode_sections2(data), (vec![0xDE, 0xEF, 0x62], vec![]));

let data = b"\0\0\0\0\xDE\xEF\x62\0\0\0\x03".to_vec();
assert_eq!(decode_sections2(data), (vec![], vec![0xDE, 0xEF, 0x62]));

let data = b"\0\0\0\0\0\0\0\0".to_vec();
assert_eq!(decode_sections2(data), (vec![], vec![]));

let data = b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\0\0\0\x13\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\0\0\x01\x15".to_vec();
assert_eq!(decode_sections2(data), (vec![0xFF; 19], vec![0x9D; 277]));
}

#[test]
fn decode_sections2_preserved_first_vector() {
let original = b"\xAA\0\0\0\x01\xBB\xCC\0\0\0\x02".to_vec();
let original_capacity = original.capacity();
let original_ptr = original.as_ptr();
let (first, second) = decode_sections2(original);

// This is not copied
assert_eq!(first.capacity(), original_capacity);
assert_eq!(first.as_ptr(), original_ptr);

// This is a copy
assert_ne!(second.capacity(), original_capacity);
assert_ne!(second.as_ptr(), original_ptr);
}
}
77 changes: 67 additions & 10 deletions packages/vm/src/imports.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,16 +298,37 @@ fn do_next<A: Api, S: Storage, Q: Querier>(
// Empty key will later be treated as _no more element_.
let (key, value) = result?.unwrap_or_else(|| (Vec::<u8>::new(), Vec::<u8>::new()));

// Build value || key || keylen
let keylen_bytes = to_u32(key.len())?.to_be_bytes();
let mut out_data = value;
out_data.reserve(key.len() + 4);
out_data.extend(key);
out_data.extend_from_slice(&keylen_bytes);

let out_data = encode_sections(&[key, value])?;
write_to_contract::<A, S, Q>(env, &out_data)
}

/// Encodes multiple sections of data into one vector.
///
/// Each section is suffixed by a section length encoded as big endian uint32.
/// Using suffixes instead of prefixes allows reading sections in reverse order,
/// such that the first element does not need to be re-allocated if the contract's
/// data structure supports truncation (such as a Rust vector).
///
/// The resulting data looks like this:
///
/// ```ignore
/// section1 || section1_len || section2 || section2_len || section3 || section3_len || …
/// ```
#[allow(dead_code)]
fn encode_sections(sections: &[Vec<u8>]) -> VmResult<Vec<u8>> {
let mut out_len: usize = sections.iter().map(|section| section.len()).sum();
out_len += 4 * sections.len();
let mut out_data = Vec::with_capacity(out_len);
for section in sections {
let section_len = to_u32(section.len())?.to_be_bytes();
out_data.extend(section);
out_data.extend_from_slice(&section_len);
}
debug_assert_eq!(out_data.len(), out_len);
debug_assert_eq!(out_data.capacity(), out_len);
Ok(out_data)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -1131,19 +1152,19 @@ mod tests {
let kv_region_ptr = do_next::<MA, MS, MQ>(&env, id).unwrap();
assert_eq!(
force_read(&env, kv_region_ptr),
[VALUE1, KEY1, b"\0\0\0\x03"].concat()
[KEY1, b"\0\0\0\x03", VALUE1, b"\0\0\0\x06"].concat()
);

// Entry 2
let kv_region_ptr = do_next::<MA, MS, MQ>(&env, id).unwrap();
assert_eq!(
force_read(&env, kv_region_ptr),
[VALUE2, KEY2, b"\0\0\0\x04"].concat()
[KEY2, b"\0\0\0\x04", VALUE2, b"\0\0\0\x05"].concat()
);

// End
let kv_region_ptr = do_next::<MA, MS, MQ>(&env, id).unwrap();
assert_eq!(force_read(&env, kv_region_ptr), b"\0\0\0\0");
assert_eq!(force_read(&env, kv_region_ptr), b"\0\0\0\0\0\0\0\0");
// API makes no guarantees for value_ptr in this case
}

Expand All @@ -1164,4 +1185,40 @@ mod tests {
e => panic!("Unexpected error: {:?}", e),
}
}

#[test]
fn encode_sections_works_for_empty_sections() {
let enc = encode_sections(&[]).unwrap();
assert_eq!(enc, b"" as &[u8]);
let enc = encode_sections(&[vec![]]).unwrap();
assert_eq!(enc, b"\0\0\0\0" as &[u8]);
let enc = encode_sections(&[vec![], vec![]]).unwrap();
assert_eq!(enc, b"\0\0\0\0\0\0\0\0" as &[u8]);
let enc = encode_sections(&[vec![], vec![], vec![]]).unwrap();
assert_eq!(enc, b"\0\0\0\0\0\0\0\0\0\0\0\0" as &[u8]);
}

#[test]
fn encode_sections_works_for_one_element() {
let enc = encode_sections(&[]).unwrap();
assert_eq!(enc, b"" as &[u8]);
let enc = encode_sections(&[vec![0xAA]]).unwrap();
assert_eq!(enc, b"\xAA\0\0\0\x01" as &[u8]);
let enc = encode_sections(&[vec![0xAA, 0xBB]]).unwrap();
assert_eq!(enc, b"\xAA\xBB\0\0\0\x02" as &[u8]);
let enc = encode_sections(&[vec![0x9D; 277]]).unwrap();
assert_eq!(enc, b"\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\0\0\x01\x15" as &[u8]);
}

#[test]
fn encode_sections_works_for_multiple_elements() {
let enc = encode_sections(&[vec![0xAA]]).unwrap();
assert_eq!(enc, b"\xAA\0\0\0\x01" as &[u8]);
let enc = encode_sections(&[vec![0xAA], vec![0xDE, 0xDE]]).unwrap();
assert_eq!(enc, b"\xAA\0\0\0\x01\xDE\xDE\0\0\0\x02" as &[u8]);
let enc = encode_sections(&[vec![0xAA], vec![0xDE, 0xDE], vec![]]).unwrap();
assert_eq!(enc, b"\xAA\0\0\0\x01\xDE\xDE\0\0\0\x02\0\0\0\0" as &[u8]);
let enc = encode_sections(&[vec![0xAA], vec![0xDE, 0xDE], vec![], vec![0xFF; 19]]).unwrap();
assert_eq!(enc, b"\xAA\0\0\0\x01\xDE\xDE\0\0\0\x02\0\0\0\0\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\0\0\0\x13" as &[u8]);
}
}