CosmWasm · webmaster128 · Feb 2, 2021 · Feb 2, 2021 · Feb 2, 2021 · Feb 2, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -42,6 +42,9 @@ and this project adheres to
 
 - all: The `query` enpoint is now optional. It is still highly recommended to
   expose it an almost any use case though.
+- all: Change the encoding of the key/value region of the `db_next` import to a
+  more generic encoding that supports an arbitrary number of sections. This
+  encoding can then be reused for other multi value regions.
 - cosmwasm-std: Remove `from_address` from `BankMsg::Send`, as it always sends
   from the contract address, and this is consistent with other `CosmosMsg`
   variants.

diff --git a/packages/std/src/imports.rs b/packages/std/src/imports.rs
@@ -5,6 +5,7 @@ use crate::binary::Binary;
 use crate::errors::{StdError, StdResult, SystemError};
 use crate::memory::{alloc, build_region, consume_region, Region};
 use crate::results::SystemResult;
+use crate::sections::decode_sections2;
 use crate::serde::from_slice;
 use crate::traits::{Api, Querier, QuerierResult, Storage};
 #[cfg(feature = "iterator")]
@@ -120,23 +121,13 @@ impl Iterator for ExternalIterator {
     fn next(&mut self) -> Option<Self::Item> {
         let next_result = unsafe { db_next(self.iterator_id) };
         let kv_region_ptr = next_result as *mut Region;
-        let mut kv = unsafe { consume_region(kv_region_ptr) };
-
-        // The KV region uses the format value || key || keylen, where keylen is a fixed size big endian u32 value
-        let keylen = u32::from_be_bytes([
-            kv[kv.len() - 4],
-            kv[kv.len() - 3],
-            kv[kv.len() - 2],
-            kv[kv.len() - 1],
-        ]) as usize;
-        if keylen == 0 {
-            return None;
+        let kv = unsafe { consume_region(kv_region_ptr) };
+        let (key, value) = decode_sections2(kv);
+        if key.len() == 0 {
+            None
+        } else {
+            Some((key, value))
         }
-
-        kv.truncate(kv.len() - 4);
-        let key = kv.split_off(kv.len() - keylen);
-        let value = kv;
-        Some((key, value))
     }
 }
 

diff --git a/packages/std/src/lib.rs b/packages/std/src/lib.rs
@@ -15,6 +15,7 @@ mod iterator;
 mod math;
 mod query;
 mod results;
+mod sections;
 mod serde;
 mod storage;
 mod traits;

diff --git a/packages/std/src/sections.rs b/packages/std/src/sections.rs
@@ -0,0 +1,79 @@
+/// A sections decoder for the special case of two elements
+#[allow(dead_code)] // used in Wasm and tests only
+pub fn decode_sections2(data: Vec<u8>) -> (Vec<u8>, Vec<u8>) {
+    let (rest, second) = split_tail(data);
+    let (_, first) = split_tail(rest);
+    (first, second)
+}
+
+/// Splits data into the last section ("tail") and the rest.
+/// The tail's length information is cut off, such that it is ready to use.
+/// The rest is basically unparsed and contails the lengths of the remaining sections.
+///
+/// While the tail is copied into a new vector, the rest is only truncated such that
+/// no re-allocation is necessary.
+///
+/// If `data` contains one section only, `data` is moved into the tail entirely
+fn split_tail(data: Vec<u8>) -> (Vec<u8>, Vec<u8>) {
+    let tail_len: usize = if data.len() >= 4 {
+        u32::from_be_bytes([
+            data[data.len() - 4],
+            data[data.len() - 3],
+            data[data.len() - 2],
+            data[data.len() - 1],
+        ]) as usize
+    } else {
+        panic!("Cannot read section length");
+    };
+    let rest_len_end = data.len() - 4 - tail_len;
+
+    let (rest, mut tail) = if rest_len_end == 0 {
+        // i.e. all data is the tail
+        (Vec::new(), data)
+    } else {
+        let mut rest = data;
+        let tail = rest.split_off(rest_len_end);
+        (rest, tail)
+    };
+    tail.truncate(tail_len); // cut off length
+    (rest, tail)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn decode_sections2_works() {
+        let data = b"\xAA\0\0\0\x01\xBB\xCC\0\0\0\x02".to_vec();
+        assert_eq!(decode_sections2(data), (vec![0xAA], vec![0xBB, 0xCC]));
+
+        let data = b"\xDE\xEF\x62\0\0\0\x03\0\0\0\0".to_vec();
+        assert_eq!(decode_sections2(data), (vec![0xDE, 0xEF, 0x62], vec![]));
+
+        let data = b"\0\0\0\0\xDE\xEF\x62\0\0\0\x03".to_vec();
+        assert_eq!(decode_sections2(data), (vec![], vec![0xDE, 0xEF, 0x62]));
+
+        let data = b"\0\0\0\0\0\0\0\0".to_vec();
+        assert_eq!(decode_sections2(data), (vec![], vec![]));
+
+        let data = b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\0\0\0\x13\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\0\0\x01\x15".to_vec();
+        assert_eq!(decode_sections2(data), (vec![0xFF; 19], vec![0x9D; 277]));
+    }
+
+    #[test]
+    fn decode_sections2_preserved_first_vector() {
+        let original = b"\xAA\0\0\0\x01\xBB\xCC\0\0\0\x02".to_vec();
+        let original_capacity = original.capacity();
+        let original_ptr = original.as_ptr();
+        let (first, second) = decode_sections2(original);
+
+        // This is not copied
+        assert_eq!(first.capacity(), original_capacity);
+        assert_eq!(first.as_ptr(), original_ptr);
+
+        // This is a copy
+        assert_ne!(second.capacity(), original_capacity);
+        assert_ne!(second.as_ptr(), original_ptr);
+    }
+}
diff --git a/packages/vm/src/imports.rs b/packages/vm/src/imports.rs
@@ -298,16 +298,37 @@ fn do_next<A: Api, S: Storage, Q: Querier>(
     // Empty key will later be treated as _no more element_.
     let (key, value) = result?.unwrap_or_else(|| (Vec::<u8>::new(), Vec::<u8>::new()));
 
-    // Build value || key || keylen
-    let keylen_bytes = to_u32(key.len())?.to_be_bytes();
-    let mut out_data = value;
-    out_data.reserve(key.len() + 4);
-    out_data.extend(key);
-    out_data.extend_from_slice(&keylen_bytes);
-
+    let out_data = encode_sections(&[key, value])?;
     write_to_contract::<A, S, Q>(env, &out_data)
 }
 
+/// Encodes multiple sections of data into one vector.
+///
+/// Each section is suffixed by a section length encoded as big endian uint32.
+/// Using suffixes instead of prefixes allows reading sections in reverse order,
+/// such that the first element does not need to be re-allocated if the contract's
+/// data structure supports truncation (such as a Rust vector).
+///
+/// The resulting data looks like this:
+///
+/// ```ignore
+/// section1 || section1_len || section2 || section2_len || section3 || section3_len || …
+/// ```
+#[allow(dead_code)]
+fn encode_sections(sections: &[Vec<u8>]) -> VmResult<Vec<u8>> {
+    let mut out_len: usize = sections.iter().map(|section| section.len()).sum();
+    out_len += 4 * sections.len();
+    let mut out_data = Vec::with_capacity(out_len);
+    for section in sections {
+        let section_len = to_u32(section.len())?.to_be_bytes();
+        out_data.extend(section);
+        out_data.extend_from_slice(&section_len);
+    }
+    debug_assert_eq!(out_data.len(), out_len);
+    debug_assert_eq!(out_data.capacity(), out_len);
+    Ok(out_data)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -1131,19 +1152,19 @@ mod tests {
         let kv_region_ptr = do_next::<MA, MS, MQ>(&env, id).unwrap();
         assert_eq!(
             force_read(&env, kv_region_ptr),
-            [VALUE1, KEY1, b"\0\0\0\x03"].concat()
+            [KEY1, b"\0\0\0\x03", VALUE1, b"\0\0\0\x06"].concat()
         );
 
         // Entry 2
         let kv_region_ptr = do_next::<MA, MS, MQ>(&env, id).unwrap();
         assert_eq!(
             force_read(&env, kv_region_ptr),
-            [VALUE2, KEY2, b"\0\0\0\x04"].concat()
+            [KEY2, b"\0\0\0\x04", VALUE2, b"\0\0\0\x05"].concat()
         );
 
         // End
         let kv_region_ptr = do_next::<MA, MS, MQ>(&env, id).unwrap();
-        assert_eq!(force_read(&env, kv_region_ptr), b"\0\0\0\0");
+        assert_eq!(force_read(&env, kv_region_ptr), b"\0\0\0\0\0\0\0\0");
         // API makes no guarantees for value_ptr in this case
     }
 
@@ -1164,4 +1185,40 @@ mod tests {
             e => panic!("Unexpected error: {:?}", e),
         }
     }
+
+    #[test]
+    fn encode_sections_works_for_empty_sections() {
+        let enc = encode_sections(&[]).unwrap();
+        assert_eq!(enc, b"" as &[u8]);
+        let enc = encode_sections(&[vec![]]).unwrap();
+        assert_eq!(enc, b"\0\0\0\0" as &[u8]);
+        let enc = encode_sections(&[vec![], vec![]]).unwrap();
+        assert_eq!(enc, b"\0\0\0\0\0\0\0\0" as &[u8]);
+        let enc = encode_sections(&[vec![], vec![], vec![]]).unwrap();
+        assert_eq!(enc, b"\0\0\0\0\0\0\0\0\0\0\0\0" as &[u8]);
+    }
+
+    #[test]
+    fn encode_sections_works_for_one_element() {
+        let enc = encode_sections(&[]).unwrap();
+        assert_eq!(enc, b"" as &[u8]);
+        let enc = encode_sections(&[vec![0xAA]]).unwrap();
+        assert_eq!(enc, b"\xAA\0\0\0\x01" as &[u8]);
+        let enc = encode_sections(&[vec![0xAA, 0xBB]]).unwrap();
+        assert_eq!(enc, b"\xAA\xBB\0\0\0\x02" as &[u8]);
+        let enc = encode_sections(&[vec![0x9D; 277]]).unwrap();
+        assert_eq!(enc, b"\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\x9D\0\0\x01\x15" as &[u8]);
+    }
+
+    #[test]
+    fn encode_sections_works_for_multiple_elements() {
+        let enc = encode_sections(&[vec![0xAA]]).unwrap();
+        assert_eq!(enc, b"\xAA\0\0\0\x01" as &[u8]);
+        let enc = encode_sections(&[vec![0xAA], vec![0xDE, 0xDE]]).unwrap();
+        assert_eq!(enc, b"\xAA\0\0\0\x01\xDE\xDE\0\0\0\x02" as &[u8]);
+        let enc = encode_sections(&[vec![0xAA], vec![0xDE, 0xDE], vec![]]).unwrap();
+        assert_eq!(enc, b"\xAA\0\0\0\x01\xDE\xDE\0\0\0\x02\0\0\0\0" as &[u8]);
+        let enc = encode_sections(&[vec![0xAA], vec![0xDE, 0xDE], vec![], vec![0xFF; 19]]).unwrap();
+        assert_eq!(enc, b"\xAA\0\0\0\x01\xDE\xDE\0\0\0\x02\0\0\0\0\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\0\0\0\x13" as &[u8]);
+    }
 }