Skip to content

Commit 5c3ff26

Browse files
committed
Merge #289: type inference: fix bug in union-bound algorithm
f821e22 fuzz: add ability to generate Simplicity construct nodes, test for encode/decode roundtrip (Andrew Poelstra) bd3fe4e test: add unit tests which demonstrate problems fixed by the last commit (Andrew Poelstra) 75dd206 types: fix bug in "halving" variant of the union-bound algorithm (Andrew Poelstra) 78288bc types: cut off display after 10000 nodes (Andrew Poelstra) Pull request description: We have implemented the "halving" variant of the union-bound algorithm, but my code to do so was wrong in a way that sometimes causes types to "come ununified", i.e. a single type bound is split into two, with different instances in the program more-or-less randomly assigned to each side of the split. This causes all sorts of weird behavior. Fixes #286 ACKs for top commit: uncomputable: utACK f821e22 Tree-SHA512: aa6c1f36e9c26170111bae6222cc8de2e468f1f06752739a89897d807fd212995f69b68a4d31c5c1373b344df9bbec23967e447350cb8ce75656f868d9e73a6c
2 parents 23c3ffb + f821e22 commit 5c3ff26

File tree

8 files changed

+344
-17
lines changed

8 files changed

+344
-17
lines changed

.github/workflows/fuzz.yml

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ c_rust_merkle,
2323
decode_natural,
2424
decode_program,
2525
parse_human,
26+
regression_286,
2627
regression_value,
2728
]
2829
steps:

fuzz/Cargo.toml

+7
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ test = false
6767
doc = false
6868
bench = false
6969

70+
[[bin]]
71+
name = "regression_286"
72+
path = "fuzz_targets/regression_286.rs"
73+
test = false
74+
doc = false
75+
bench = false
76+
7077
[[bin]]
7178
name = "regression_value"
7279
path = "fuzz_targets/regression_value.rs"

fuzz/fuzz_lib/lib.rs

+12
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ use simplicity::types::Final as FinalTy;
77
use simplicity::{BitIter, Value};
88
use std::sync::Arc;
99

10+
mod program;
11+
12+
pub use program::ProgramControl;
13+
1014
/// A wrapper around a buffer which has utilities for extracting various
1115
/// Simplicity types.
1216
#[derive(Clone)]
@@ -16,6 +20,7 @@ pub struct Extractor<'f> {
1620
bit_len: usize,
1721
}
1822

23+
// More impls in the other modules.
1924
impl<'f> Extractor<'f> {
2025
/// Wrap the buffer in an extractor.
2126
pub fn new(data: &'f [u8]) -> Self {
@@ -37,6 +42,13 @@ impl<'f> Extractor<'f> {
3742
}
3843
}
3944

45+
/// Attempt to yield a u16 from the fuzzer.
46+
///
47+
/// Internally, extracts in big-endian.
48+
pub fn extract_u16(&mut self) -> Option<u16> {
49+
Some((u16::from(self.extract_u8()?) << 8) + u16::from(self.extract_u8()?))
50+
}
51+
4052
/// Attempt to yield a single bit from the fuzzer.
4153
pub fn extract_bit(&mut self) -> Option<bool> {
4254
if self.bit_len == 0 {

fuzz/fuzz_lib/program.rs

+183
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
// SPDX-License-Identifier: CC0-1.0
2+
3+
use std::sync::Arc;
4+
5+
use super::Extractor;
6+
use simplicity::node::{
7+
CoreConstructible as _, DisconnectConstructible as _, JetConstructible as _,
8+
WitnessConstructible as _,
9+
};
10+
use simplicity::types;
11+
use simplicity::{jet::Core, Cmr, ConstructNode, FailEntropy};
12+
13+
/// Structure controlling the kind of programs generated by the fuzzer.
14+
pub struct ProgramControl {
15+
/// Whether to attempt to insert "type bombs" into the generated program.
16+
///
17+
/// Importantly, type bombs may have 2^n nodes for fairly large n, and will
18+
/// not respect `max_nodes`. So if you are trying to generate small programs
19+
/// you should not enable this.
20+
pub enable_type_bomb: bool,
21+
/// Whether to attempt to insert disconnect nodes into the generated program.
22+
pub enable_disconnect: bool,
23+
/// Whether to attempt to insert witness nodes into the generated program.
24+
pub enable_witness: bool,
25+
/// Whether to attempt to insert fail nodes into the generated program.
26+
pub enable_fail: bool,
27+
/// Whether to attempt to insert assertl and assertr nodes into the generated program.
28+
pub enable_asserts: bool,
29+
/// Maximum number of nodes a generated program may have. This limit may not
30+
/// be exactly enforced. If it is `None`, no limit is enforced.
31+
pub max_nodes: Option<usize>,
32+
}
33+
34+
impl ProgramControl {
35+
fn from_u16(u: u16) -> Self {
36+
ProgramControl {
37+
enable_type_bomb: u & 0x8000 == 0x8000,
38+
enable_disconnect: u & 0x4000 == 0x4000,
39+
enable_witness: u & 0x2000 == 0x2000,
40+
enable_fail: u & 0x1000 == 0x1000,
41+
enable_asserts: u & 0x0800 == 0x0800,
42+
max_nodes: Some(5 * usize::from(u & 0x07ff)),
43+
}
44+
}
45+
}
46+
47+
impl Extractor<'_> {
48+
pub fn extract_core_construct_node(
49+
&mut self,
50+
force_control: Option<ProgramControl>,
51+
) -> Option<Arc<ConstructNode<Core>>> {
52+
type ArcNode = Arc<ConstructNode<Core>>;
53+
54+
let ctx = types::Context::new();
55+
let mut stack: Vec<ArcNode> = vec![];
56+
57+
let program_control =
58+
force_control.unwrap_or(ProgramControl::from_u16(self.extract_u16()?));
59+
60+
let mut count = 0usize;
61+
for _ in 0..program_control.max_nodes.unwrap_or(usize::MAX) {
62+
let control = self.extract_u8()?;
63+
if program_control.enable_type_bomb && control & 0x80 == 0x80 {
64+
let mut ret = stack.pop()?;
65+
// Special-case: type bomb. Iterate x -> pair(x, x) on the top stack
66+
// item up to 128 times. Its CPU cost and target type will blow up
67+
// by a factor 2^128. If its target type has nonzero size this should
68+
// fail to construct; if it's 0 we should be able to construct it but
69+
// the bit machine should reject it.
70+
for _ in 0..control & 0x7f {
71+
// FIXME should we refuse to make the type-bomb if `ret` contains any
72+
// witness or disconnect nodes? In this case the encoding of our
73+
// CommitNode won't round-trip, since we're force-sharing both children
74+
// of this `pair` but when decoding `CommitNode` we reject anything that
75+
// shares witnesses or disconnects, which at commit-time we treat as
76+
// being unique and never shared.
77+
ret = ArcNode::pair(&ret, &ret).unwrap();
78+
}
79+
stack.push(ret);
80+
} else {
81+
match control {
82+
// Return whatever we've got (note that this will "waste" everything else
83+
// on the stack)
84+
0 => {
85+
if stack.len() == 1 {
86+
return stack.pop();
87+
} else {
88+
return None;
89+
}
90+
}
91+
// 1 through 63
92+
1 => stack.push(ArcNode::unit(&ctx)),
93+
2 => stack.push(ArcNode::iden(&ctx)),
94+
3 => {
95+
use simplicity::dag::DagLike as _;
96+
97+
let val = self.extract_value_direct()?;
98+
if program_control.max_nodes.is_some() {
99+
for _ in val.as_ref().pre_order_iter::<simplicity::dag::NoSharing>() {
100+
count = count.checked_add(1)?;
101+
}
102+
}
103+
if let Some(max) = program_control.max_nodes {
104+
if val.compact_len() > max {
105+
return None;
106+
}
107+
}
108+
stack.push(ArcNode::scribe(&ctx, &val));
109+
}
110+
4 if program_control.enable_witness => stack.push(ArcNode::witness(&ctx, None)),
111+
5 => {
112+
let child = stack.pop()?;
113+
stack.push(ArcNode::injl(&child));
114+
}
115+
6 => {
116+
let child = stack.pop()?;
117+
stack.push(ArcNode::injr(&child));
118+
}
119+
7 => {
120+
let child = stack.pop()?;
121+
stack.push(ArcNode::drop_(&child));
122+
}
123+
8 => {
124+
let child = stack.pop()?;
125+
stack.push(ArcNode::take(&child));
126+
}
127+
9 => {
128+
let child = stack.pop()?;
129+
let cmr_u8 = self.extract_u8()?;
130+
let cmr = Cmr::from_byte_array([cmr_u8; 32]);
131+
stack.push(ArcNode::assertl(&child, cmr).ok()?);
132+
}
133+
10 => {
134+
let child = stack.pop()?;
135+
let cmr_u8 = self.extract_u8()?;
136+
let cmr = Cmr::from_byte_array([cmr_u8; 32]);
137+
stack.push(ArcNode::assertr(cmr, &child).ok()?);
138+
}
139+
11 if program_control.enable_fail => {
140+
let fail_u8 = self.extract_u8()?;
141+
let fail = FailEntropy::from_byte_array([fail_u8; 64]);
142+
stack.push(ArcNode::fail(&ctx, fail));
143+
}
144+
12 => {
145+
let rchild = stack.pop()?;
146+
let lchild = stack.pop()?;
147+
stack.push(ArcNode::pair(&lchild, &rchild).ok()?);
148+
}
149+
13 => {
150+
let rchild = stack.pop()?;
151+
let lchild = stack.pop()?;
152+
stack.push(ArcNode::case(&lchild, &rchild).ok()?);
153+
}
154+
14 => {
155+
let rchild = stack.pop()?;
156+
let lchild = stack.pop()?;
157+
stack.push(ArcNode::comp(&lchild, &rchild).ok()?);
158+
}
159+
15 if program_control.enable_disconnect => {
160+
let child = stack.pop()?;
161+
stack.push(ArcNode::disconnect(&child, &None).ok()?);
162+
}
163+
// We assume that the above cases did not cover 64-255, so that if we
164+
// right-shift by 6 we can get all 4 values.
165+
_ => {
166+
let extra_bits = usize::from(control >> 6);
167+
let idx = (extra_bits << 8) + usize::from(self.extract_u8()?);
168+
stack.push(ArcNode::jet(&ctx, Core::ALL[idx % Core::ALL.len()]));
169+
}
170+
}
171+
}
172+
173+
if let Some(max) = program_control.max_nodes {
174+
count = count.checked_add(1)?;
175+
if count > max {
176+
return None;
177+
}
178+
}
179+
}
180+
181+
None
182+
}
183+
}

fuzz/fuzz_targets/regression_286.rs

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// SPDX-License-Identifier: CC0-1.0
2+
3+
#![cfg_attr(fuzzing, no_main)]
4+
5+
#[cfg(any(fuzzing, test))]
6+
fn do_test(data: &[u8]) {
7+
use simplicity::{jet::Core, BitIter, CommitNode};
8+
9+
let mut extractor = simplicity_fuzz::Extractor::new(data);
10+
11+
let construct =
12+
match extractor.extract_core_construct_node(Some(simplicity_fuzz::ProgramControl {
13+
enable_type_bomb: false,
14+
enable_disconnect: false,
15+
enable_witness: false,
16+
enable_fail: false,
17+
enable_asserts: true,
18+
max_nodes: Some(25),
19+
})) {
20+
Some(x) => x,
21+
None => return,
22+
};
23+
//println!("constructed {construct}");
24+
let finalized = match construct.finalize_types() {
25+
Ok(x) => x,
26+
Err(_) => return,
27+
};
28+
//println!("finalized {finalized}");
29+
let prog = finalized.encode_to_vec();
30+
//println!("{}", simplicity::bitcoin::hex::DisplayHex::as_hex(&prog));
31+
let prog = BitIter::from(prog);
32+
let decode = CommitNode::<Core>::decode(prog).unwrap();
33+
assert_eq!(
34+
finalized, decode,
35+
"Constructed committed LHS; encoded and decoded to get RHS",
36+
);
37+
}
38+
39+
#[cfg(fuzzing)]
40+
libfuzzer_sys::fuzz_target!(|data| do_test(data));
41+
42+
#[cfg(not(fuzzing))]
43+
fn main() {}
44+
45+
#[cfg(test)]
46+
mod tests {
47+
use base64::Engine;
48+
49+
#[test]
50+
fn duplicate_crash() {
51+
let data = base64::prelude::BASE64_STANDARD
52+
.decode("Cg==")
53+
.expect("base64 should be valid");
54+
super::do_test(&data);
55+
}
56+
}

src/node/construct.rs

+70
Original file line numberDiff line numberDiff line change
@@ -517,4 +517,74 @@ mod tests {
517517
.cmr()
518518
);
519519
}
520+
521+
#[test]
522+
fn regression_286_1() {
523+
// This is the smallest pure Simplicity program I was able to find that exhibits the bad
524+
// behavior seen in https://github.com/BlockstreamResearch/rust-simplicity/issues/286
525+
let ctx = types::Context::new();
526+
let cmr = Cmr::from_byte_array([0xde; 32]);
527+
528+
let u0 = Arc::<ConstructNode<Core>>::unit(&ctx);
529+
let i1 = Arc::<ConstructNode<Core>>::injl(&u0);
530+
let i2 = Arc::<ConstructNode<Core>>::injr(&i1);
531+
let i3 = Arc::<ConstructNode<Core>>::injr(&i2);
532+
let i4 = Arc::<ConstructNode<Core>>::injl(&i3);
533+
let u5 = Arc::<ConstructNode<Core>>::unit(&ctx);
534+
let i6 = Arc::<ConstructNode<Core>>::injl(&u5);
535+
let i7 = Arc::<ConstructNode<Core>>::injr(&i6);
536+
let p8 = Arc::<ConstructNode<Core>>::pair(&i4, &i7).unwrap();
537+
let u9 = Arc::<ConstructNode<Core>>::unit(&ctx);
538+
let a10 = Arc::<ConstructNode<Core>>::assertr(cmr, &u9).unwrap();
539+
let u11 = Arc::<ConstructNode<Core>>::unit(&ctx);
540+
let a12 = Arc::<ConstructNode<Core>>::assertr(cmr, &u11).unwrap();
541+
let a13 = Arc::<ConstructNode<Core>>::assertl(&a12, cmr).unwrap();
542+
let c14 = Arc::<ConstructNode<Core>>::case(&a10, &a13).unwrap();
543+
let c15 = Arc::<ConstructNode<Core>>::comp(&p8, &c14).unwrap();
544+
545+
let finalized: Arc<CommitNode<_>> = c15.finalize_types().unwrap();
546+
let prog = finalized.encode_to_vec();
547+
// In #286 we are encoding correctly...
548+
assert_eq!(
549+
hex::DisplayHex::as_hex(&prog).to_string(),
550+
"dc920a28812b6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f243090e00b10e00680",
551+
);
552+
553+
let prog = BitIter::from(prog);
554+
let decode = CommitNode::<Core>::decode(prog).unwrap();
555+
556+
// ...but then during decoding we read the program incorrectly and this assertion fails.
557+
assert_eq!(finalized, decode);
558+
}
559+
560+
#[test]
561+
fn regression_286_2() {
562+
use crate::types::Error as TypeError;
563+
use crate::Error;
564+
565+
// This one is smaller because it starts with a witness node which has a large type.
566+
// This is a bit easier to grok but can't be serialized as a complete/valid program
567+
// without providing the witness data, which limits its ability to share with the
568+
// other libraries.
569+
//
570+
// It also exhibits the bug earlier than the other one -- it *should* just fail to
571+
// typecheck and not be constructible. So we can't get an encoding of it.
572+
let ctx = types::Context::new();
573+
574+
let w0 = Arc::<ConstructNode<Core>>::witness(&ctx, None);
575+
let i1 = Arc::<ConstructNode<Core>>::iden(&ctx);
576+
let d2 = Arc::<ConstructNode<Core>>::drop_(&i1);
577+
let i3 = Arc::<ConstructNode<Core>>::iden(&ctx);
578+
let i4 = Arc::<ConstructNode<Core>>::iden(&ctx);
579+
let t5 = Arc::<ConstructNode<Core>>::take(&i4);
580+
let ca6 = Arc::<ConstructNode<Core>>::case(&i3, &t5).unwrap();
581+
let ca7 = Arc::<ConstructNode<Core>>::case(&d2, &ca6).unwrap();
582+
let c8 = Arc::<ConstructNode<Core>>::comp(&w0, &ca7).unwrap();
583+
let u9 = Arc::<ConstructNode<Core>>::unit(&ctx);
584+
let c10 = Arc::<ConstructNode<Core>>::comp(&c8, &u9).unwrap();
585+
586+
// In #286 we incorrectly succeed finalizing the types, and then encode a bad program.
587+
let err = c10.finalize_types().unwrap_err();
588+
assert!(matches!(err, Error::Type(TypeError::OccursCheck { .. })));
589+
}
520590
}

src/types/mod.rs

+9
Original file line numberDiff line numberDiff line change
@@ -368,12 +368,17 @@ impl Type {
368368
}
369369

370370
const MAX_DISPLAY_DEPTH: usize = 64;
371+
const MAX_DISPLAY_LENGTH: usize = 10000;
371372

372373
impl fmt::Debug for Type {
373374
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
374375
for data in (&self.ctx, self.inner.bound.root())
375376
.verbose_pre_order_iter::<NoSharing>(Some(MAX_DISPLAY_DEPTH))
376377
{
378+
if data.index > MAX_DISPLAY_LENGTH {
379+
write!(f, "... [truncated type after {} nodes]", MAX_DISPLAY_LENGTH)?;
380+
return Ok(());
381+
}
377382
if data.depth == MAX_DISPLAY_DEPTH {
378383
if data.n_children_yielded == 0 {
379384
f.write_str("...")?;
@@ -407,6 +412,10 @@ impl fmt::Display for Type {
407412
for data in (&self.ctx, self.inner.bound.root())
408413
.verbose_pre_order_iter::<NoSharing>(Some(MAX_DISPLAY_DEPTH))
409414
{
415+
if data.index > MAX_DISPLAY_LENGTH {
416+
write!(f, "... [truncated type after {} nodes]", MAX_DISPLAY_LENGTH)?;
417+
return Ok(());
418+
}
410419
if data.depth == MAX_DISPLAY_DEPTH {
411420
if data.n_children_yielded == 0 {
412421
f.write_str("...")?;

0 commit comments

Comments
 (0)