Skip to content

Commit c81e878

Browse files
committed
Support VariableList longer than 2**31 on 32-bit architectures
* Improves handling large VariableList for 32-bit architectures - loudly crash vs. silently overflow (and produce wrong results) * Adds feature to enable capping typenum to usize conversion to usize::MAX * Tests + github actions
1 parent 4fef53f commit c81e878

File tree

6 files changed

+396
-36
lines changed

6 files changed

+396
-36
lines changed

.github/workflows/test-suite.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,28 @@ jobs:
3131
run: rustup update stable
3232
- name: Run tests
3333
run: cargo test --release
34+
cross-test-i686:
35+
name: cross test i686-unknown-linux-gnu
36+
runs-on: ubuntu-latest
37+
steps:
38+
- uses: actions/checkout@v3
39+
- name: Install cross
40+
run: cargo install cross --git https://github.com/cross-rs/cross
41+
- name: Add i686-unknown-linux-gnu target
42+
run: rustup target add i686-unknown-linux-gnu
43+
- name: Run cross test for i686-unknown-linux-gnu
44+
run: cross test --target i686-unknown-linux-gnu
45+
cross-test-i686-overflow:
46+
name: cross test i686-unknown-linux-gnu (typenum overflow feature)
47+
runs-on: ubuntu-latest
48+
steps:
49+
- uses: actions/checkout@v3
50+
- name: Install cross
51+
run: cargo install cross --git https://github.com/cross-rs/cross
52+
- name: Add i686-unknown-linux-gnu target
53+
run: rustup target add i686-unknown-linux-gnu
54+
- name: Run cross test for i686-unknown-linux-gnu with cap-typenum-to-usize-overflow
55+
run: cross test --target i686-unknown-linux-gnu --features cap-typenum-to-usize-overflow
3456
coverage:
3557
name: cargo-tarpaulin
3658
runs-on: ubuntu-latest

Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,16 @@ typenum = "1.12.0"
2020
smallvec = "1.8.0"
2121
arbitrary = { version = "1.0", features = ["derive"], optional = true }
2222
itertools = "0.13.0"
23+
ethereum_hashing = {version = "0.7.0", optional = true}
2324

2425
[dev-dependencies]
2526
serde_json = "1.0.0"
2627
tree_hash_derive = "0.10.0"
28+
ethereum_hashing = {version = "0.7.0"}
29+
30+
[target.i686-unknown-linux-gnu]
31+
rustflags = ["-C", "target-feature=+sse2"]
32+
33+
[features]
34+
# Very careful usage - see comment in the typenum_helpers
35+
cap-typenum-to-usize-overflow=["dep:ethereum_hashing"]

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
mod fixed_vector;
4242
pub mod serde_utils;
4343
mod tree_hash;
44+
mod typenum_helpers;
4445
mod variable_list;
4546

4647
pub use fixed_vector::FixedVector;

src/tree_hash.rs

Lines changed: 92 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,107 @@
1+
use crate::typenum_helpers::to_usize;
12
use tree_hash::{Hash256, MerkleHasher, TreeHash, TreeHashType};
23
use typenum::Unsigned;
34

4-
/// A helper function providing common functionality between the `TreeHash` implementations for
5-
/// `FixedVector` and `VariableList`.
6-
pub fn vec_tree_hash_root<T, N>(vec: &[T]) -> Hash256
7-
where
8-
T: TreeHash,
9-
N: Unsigned,
10-
{
5+
pub fn packing_factor<T: TreeHash>() -> usize {
116
match T::tree_hash_type() {
12-
TreeHashType::Basic => {
13-
let mut hasher = MerkleHasher::with_leaves(
14-
(N::to_usize() + T::tree_hash_packing_factor() - 1) / T::tree_hash_packing_factor(),
15-
);
7+
TreeHashType::Basic => T::tree_hash_packing_factor(),
8+
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => 1,
9+
}
10+
}
11+
12+
mod default_impl {
13+
use super::*;
14+
/// A helper function providing common functionality between the `TreeHash` implementations for
15+
/// `FixedVector` and `VariableList`.
16+
pub fn vec_tree_hash_root<T, N>(vec: &[T]) -> Hash256
17+
where
18+
T: TreeHash,
19+
N: Unsigned,
20+
{
21+
match T::tree_hash_type() {
22+
TreeHashType::Basic => {
23+
let mut hasher = MerkleHasher::with_leaves(
24+
(to_usize::<N>() + T::tree_hash_packing_factor() - 1)
25+
/ T::tree_hash_packing_factor(),
26+
);
27+
28+
for item in vec {
29+
hasher
30+
.write(&item.tree_hash_packed_encoding())
31+
.expect("ssz_types variable vec should not contain more elements than max");
32+
}
1633

17-
for item in vec {
1834
hasher
19-
.write(&item.tree_hash_packed_encoding())
20-
.expect("ssz_types variable vec should not contain more elements than max");
35+
.finish()
36+
.expect("ssz_types variable vec should not have a remaining buffer")
2137
}
38+
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
39+
let mut hasher = MerkleHasher::with_leaves(N::to_usize());
2240

23-
hasher
24-
.finish()
25-
.expect("ssz_types variable vec should not have a remaining buffer")
26-
}
27-
TreeHashType::Container | TreeHashType::List | TreeHashType::Vector => {
28-
let mut hasher = MerkleHasher::with_leaves(N::to_usize());
41+
for item in vec {
42+
hasher
43+
.write(item.tree_hash_root().as_slice())
44+
.expect("ssz_types vec should not contain more elements than max");
45+
}
2946

30-
for item in vec {
3147
hasher
32-
.write(item.tree_hash_root().as_slice())
33-
.expect("ssz_types vec should not contain more elements than max");
48+
.finish()
49+
.expect("ssz_types vec should not have a remaining buffer")
3450
}
51+
}
52+
}
53+
}
54+
55+
#[cfg(feature = "cap-typenum-to-usize-overflow")]
56+
mod arch_32x_workaround {
57+
use super::*;
58+
use ethereum_hashing::{hash32_concat, ZERO_HASHES};
59+
use tree_hash::{Hash256, TreeHash};
60+
use typenum::Unsigned;
61+
62+
type MaxDepth = typenum::U536870912;
3563

36-
hasher
37-
.finish()
38-
.expect("ssz_types vec should not have a remaining buffer")
64+
fn pad_to_depth<Current: Unsigned, Target: Unsigned>(
65+
hash: Hash256,
66+
target_depth: usize,
67+
current_depth: usize,
68+
) -> Hash256 {
69+
let mut curhash: [u8; 32] = hash.0;
70+
for depth in current_depth..target_depth {
71+
curhash = hash32_concat(&curhash, ZERO_HASHES[depth].as_slice());
72+
}
73+
curhash.into()
74+
}
75+
76+
fn target_tree_depth<T: TreeHash, N: Unsigned>() -> usize {
77+
let packing_factor = packing_factor::<T>();
78+
let packing_factor_log2 = packing_factor.next_power_of_two().ilog2() as usize;
79+
let tree_depth = N::to_u64().next_power_of_two().ilog2() as usize;
80+
tree_depth - packing_factor_log2
81+
}
82+
83+
pub fn vec_tree_hash_root<T: TreeHash, N: Unsigned>(vec: &[T]) -> Hash256 {
84+
if N::to_u64() <= MaxDepth::to_u64() {
85+
default_impl::vec_tree_hash_root::<T, N>(vec)
86+
} else {
87+
let main_tree_hash = default_impl::vec_tree_hash_root::<T, MaxDepth>(vec);
88+
89+
let target_depth = target_tree_depth::<T, N>();
90+
let current_depth = target_tree_depth::<T, MaxDepth>();
91+
92+
pad_to_depth::<MaxDepth, N>(main_tree_hash, target_depth, current_depth)
3993
}
4094
}
4195
}
96+
97+
#[cfg(any(
98+
target_pointer_width = "64",
99+
not(feature = "cap-typenum-to-usize-overflow")
100+
))]
101+
pub use default_impl::vec_tree_hash_root;
102+
103+
#[cfg(all(
104+
not(target_pointer_width = "64"),
105+
feature = "cap-typenum-to-usize-overflow"
106+
))]
107+
pub use arch_32x_workaround::vec_tree_hash_root;

src/typenum_helpers.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
use typenum::Unsigned;
2+
3+
// On x64, all typenums always fit usize
4+
#[cfg(target_pointer_width = "64")]
5+
pub fn to_usize<N: Unsigned>() -> usize {
6+
N::to_usize()
7+
}
8+
9+
// On x32, typenums larger starting from 2**32 do not fit usize,
10+
#[cfg(not(target_pointer_width = "64"))]
11+
pub fn to_usize<N: Unsigned>() -> usize {
12+
let as_usize = N::to_usize();
13+
let as_u64 = N::to_u64();
14+
// If usize == u64 representation - N still fit usize, so
15+
// no overflow happened
16+
if as_usize as u64 == as_u64 {
17+
return as_usize;
18+
}
19+
// else we have a choice:
20+
// Option 1. Loudly panic with as informative message as possible
21+
#[cfg(not(feature = "cap-typenum-to-usize-overflow"))]
22+
panic!(
23+
"Overflow converting typenum U{} to usize (usize::MAX={})",
24+
as_u64,
25+
usize::MAX
26+
);
27+
// Option 2. Use usize::MAX - this allows working with VariableLists "virtually larger" than the
28+
// usize, provided the actual number of elements do not exceed usize.
29+
//
30+
// One example is Ethereum BeaconChain.validators field that is a VariableList<..., 2**40>,
31+
// but actual number of validators is far less than 2**32.
32+
//
33+
// This option still seems sound, since if the number of elements
34+
// actually surpass usize::MAX, the machine running this will OOM/segfault/otherwise violently
35+
// crash the program running this, which is nearly equivalent to panic.
36+
//
37+
// Still, the is a double-edged sword, only apply if you can guarantee that none of the
38+
// VariableList used in your program will have more than usize::MAX elements on the
39+
// architecture with the smallest usize it will be even run.
40+
#[cfg(feature = "cap-typenum-to-usize-overflow")]
41+
usize::MAX
42+
}

0 commit comments

Comments
 (0)