Skip to content

type inference: fix bug in union-bound algorithm #289

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/fuzz.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ c_rust_merkle,
decode_natural,
decode_program,
parse_human,
regression_286,
regression_value,
]
steps:
Expand Down
7 changes: 7 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ test = false
doc = false
bench = false

[[bin]]
name = "regression_286"
path = "fuzz_targets/regression_286.rs"
test = false
doc = false
bench = false

[[bin]]
name = "regression_value"
path = "fuzz_targets/regression_value.rs"
Expand Down
12 changes: 12 additions & 0 deletions fuzz/fuzz_lib/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ use simplicity::types::Final as FinalTy;
use simplicity::{BitIter, Value};
use std::sync::Arc;

mod program;

pub use program::ProgramControl;

/// A wrapper around a buffer which has utilities for extracting various
/// Simplicity types.
#[derive(Clone)]
Expand All @@ -16,6 +20,7 @@ pub struct Extractor<'f> {
bit_len: usize,
}

// More impls in the other modules.
impl<'f> Extractor<'f> {
/// Wrap the buffer in an extractor.
pub fn new(data: &'f [u8]) -> Self {
Expand All @@ -37,6 +42,13 @@ impl<'f> Extractor<'f> {
}
}

/// Attempt to yield a u16 from the fuzzer.
///
/// Internally, extracts in big-endian.
pub fn extract_u16(&mut self) -> Option<u16> {
Some((u16::from(self.extract_u8()?) << 8) + u16::from(self.extract_u8()?))
}

/// Attempt to yield a single bit from the fuzzer.
pub fn extract_bit(&mut self) -> Option<bool> {
if self.bit_len == 0 {
Expand Down
183 changes: 183 additions & 0 deletions fuzz/fuzz_lib/program.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
// SPDX-License-Identifier: CC0-1.0

use std::sync::Arc;

use super::Extractor;
use simplicity::node::{
CoreConstructible as _, DisconnectConstructible as _, JetConstructible as _,
WitnessConstructible as _,
};
use simplicity::types;
use simplicity::{jet::Core, Cmr, ConstructNode, FailEntropy};

/// Structure controlling the kind of programs generated by the fuzzer.
pub struct ProgramControl {
/// Whether to attempt to insert "type bombs" into the generated program.
///
/// Importantly, type bombs may have 2^n nodes for fairly large n, and will
/// not respect `max_nodes`. So if you are trying to generate small programs
/// you should not enable this.
pub enable_type_bomb: bool,
/// Whether to attempt to insert disconnect nodes into the generated program.
pub enable_disconnect: bool,
/// Whether to attempt to insert witness nodes into the generated program.
pub enable_witness: bool,
/// Whether to attempt to insert fail nodes into the generated program.
pub enable_fail: bool,
/// Whether to attempt to insert assertl and assertr nodes into the generated program.
pub enable_asserts: bool,
/// Maximum number of nodes a generated program may have. This limit may not
/// be exactly enforced. If it is `None`, no limit is enforced.
pub max_nodes: Option<usize>,
}

impl ProgramControl {
fn from_u16(u: u16) -> Self {
ProgramControl {
enable_type_bomb: u & 0x8000 == 0x8000,
enable_disconnect: u & 0x4000 == 0x4000,
enable_witness: u & 0x2000 == 0x2000,
enable_fail: u & 0x1000 == 0x1000,
enable_asserts: u & 0x0800 == 0x0800,
max_nodes: Some(5 * usize::from(u & 0x07ff)),
}
}
}

impl Extractor<'_> {
pub fn extract_core_construct_node(
&mut self,
force_control: Option<ProgramControl>,
) -> Option<Arc<ConstructNode<Core>>> {
type ArcNode = Arc<ConstructNode<Core>>;

let ctx = types::Context::new();
let mut stack: Vec<ArcNode> = vec![];

let program_control =
force_control.unwrap_or(ProgramControl::from_u16(self.extract_u16()?));

let mut count = 0usize;
for _ in 0..program_control.max_nodes.unwrap_or(usize::MAX) {
let control = self.extract_u8()?;
if program_control.enable_type_bomb && control & 0x80 == 0x80 {
let mut ret = stack.pop()?;
// Special-case: type bomb. Iterate x -> pair(x, x) on the top stack
// item up to 128 times. Its CPU cost and target type will blow up
// by a factor 2^128. If its target type has nonzero size this should
// fail to construct; if it's 0 we should be able to construct it but
// the bit machine should reject it.
for _ in 0..control & 0x7f {
// FIXME should we refuse to make the type-bomb if `ret` contains any
// witness or disconnect nodes? In this case the encoding of our
// CommitNode won't round-trip, since we're force-sharing both children
// of this `pair` but when decoding `CommitNode` we reject anything that
// shares witnesses or disconnects, which at commit-time we treat as
// being unique and never shared.
ret = ArcNode::pair(&ret, &ret).unwrap();
}
stack.push(ret);
} else {
match control {
// Return whatever we've got (note that this will "waste" everything else
// on the stack)
0 => {
if stack.len() == 1 {
return stack.pop();
} else {
return None;
}
}
// 1 through 63
1 => stack.push(ArcNode::unit(&ctx)),
2 => stack.push(ArcNode::iden(&ctx)),
3 => {
use simplicity::dag::DagLike as _;

let val = self.extract_value_direct()?;
if program_control.max_nodes.is_some() {
for _ in val.as_ref().pre_order_iter::<simplicity::dag::NoSharing>() {
count = count.checked_add(1)?;
}
}
if let Some(max) = program_control.max_nodes {
if val.compact_len() > max {
return None;
}
}
stack.push(ArcNode::scribe(&ctx, &val));
}
4 if program_control.enable_witness => stack.push(ArcNode::witness(&ctx, None)),
5 => {
let child = stack.pop()?;
stack.push(ArcNode::injl(&child));
}
6 => {
let child = stack.pop()?;
stack.push(ArcNode::injr(&child));
}
7 => {
let child = stack.pop()?;
stack.push(ArcNode::drop_(&child));
}
8 => {
let child = stack.pop()?;
stack.push(ArcNode::take(&child));
}
9 => {
let child = stack.pop()?;
let cmr_u8 = self.extract_u8()?;
let cmr = Cmr::from_byte_array([cmr_u8; 32]);
stack.push(ArcNode::assertl(&child, cmr).ok()?);
}
10 => {
let child = stack.pop()?;
let cmr_u8 = self.extract_u8()?;
let cmr = Cmr::from_byte_array([cmr_u8; 32]);
stack.push(ArcNode::assertr(cmr, &child).ok()?);
}
11 if program_control.enable_fail => {
let fail_u8 = self.extract_u8()?;
let fail = FailEntropy::from_byte_array([fail_u8; 64]);
stack.push(ArcNode::fail(&ctx, fail));
}
12 => {
let rchild = stack.pop()?;
let lchild = stack.pop()?;
stack.push(ArcNode::pair(&lchild, &rchild).ok()?);
}
13 => {
let rchild = stack.pop()?;
let lchild = stack.pop()?;
stack.push(ArcNode::case(&lchild, &rchild).ok()?);
}
14 => {
let rchild = stack.pop()?;
let lchild = stack.pop()?;
stack.push(ArcNode::comp(&lchild, &rchild).ok()?);
}
15 if program_control.enable_disconnect => {
let child = stack.pop()?;
stack.push(ArcNode::disconnect(&child, &None).ok()?);
}
// We assume that the above cases did not cover 64-255, so that if we
// right-shift by 6 we can get all 4 values.
_ => {
let extra_bits = usize::from(control >> 6);
let idx = (extra_bits << 8) + usize::from(self.extract_u8()?);
stack.push(ArcNode::jet(&ctx, Core::ALL[idx % Core::ALL.len()]));
}
}
}

if let Some(max) = program_control.max_nodes {
count = count.checked_add(1)?;
if count > max {
return None;
}
}
}

None
}
}
56 changes: 56 additions & 0 deletions fuzz/fuzz_targets/regression_286.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// SPDX-License-Identifier: CC0-1.0

#![cfg_attr(fuzzing, no_main)]

#[cfg(any(fuzzing, test))]
fn do_test(data: &[u8]) {
use simplicity::{jet::Core, BitIter, CommitNode};

let mut extractor = simplicity_fuzz::Extractor::new(data);

let construct =
match extractor.extract_core_construct_node(Some(simplicity_fuzz::ProgramControl {
enable_type_bomb: false,
enable_disconnect: false,
enable_witness: false,
enable_fail: false,
enable_asserts: true,
max_nodes: Some(25),
})) {
Some(x) => x,
None => return,
};
//println!("constructed {construct}");
let finalized = match construct.finalize_types() {
Ok(x) => x,
Err(_) => return,
};
//println!("finalized {finalized}");
let prog = finalized.encode_to_vec();
//println!("{}", simplicity::bitcoin::hex::DisplayHex::as_hex(&prog));
let prog = BitIter::from(prog);
let decode = CommitNode::<Core>::decode(prog).unwrap();
assert_eq!(
finalized, decode,
"Constructed committed LHS; encoded and decoded to get RHS",
);
}

#[cfg(fuzzing)]
libfuzzer_sys::fuzz_target!(|data| do_test(data));

#[cfg(not(fuzzing))]
fn main() {}

#[cfg(test)]
mod tests {
use base64::Engine;

#[test]
fn duplicate_crash() {
let data = base64::prelude::BASE64_STANDARD
.decode("Cg==")
.expect("base64 should be valid");
super::do_test(&data);
}
}
70 changes: 70 additions & 0 deletions src/node/construct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -517,4 +517,74 @@ mod tests {
.cmr()
);
}

#[test]
fn regression_286_1() {
// This is the smallest pure Simplicity program I was able to find that exhibits the bad
// behavior seen in https://github.com/BlockstreamResearch/rust-simplicity/issues/286
let ctx = types::Context::new();
let cmr = Cmr::from_byte_array([0xde; 32]);

let u0 = Arc::<ConstructNode<Core>>::unit(&ctx);
let i1 = Arc::<ConstructNode<Core>>::injl(&u0);
let i2 = Arc::<ConstructNode<Core>>::injr(&i1);
let i3 = Arc::<ConstructNode<Core>>::injr(&i2);
let i4 = Arc::<ConstructNode<Core>>::injl(&i3);
let u5 = Arc::<ConstructNode<Core>>::unit(&ctx);
let i6 = Arc::<ConstructNode<Core>>::injl(&u5);
let i7 = Arc::<ConstructNode<Core>>::injr(&i6);
let p8 = Arc::<ConstructNode<Core>>::pair(&i4, &i7).unwrap();
let u9 = Arc::<ConstructNode<Core>>::unit(&ctx);
let a10 = Arc::<ConstructNode<Core>>::assertr(cmr, &u9).unwrap();
let u11 = Arc::<ConstructNode<Core>>::unit(&ctx);
let a12 = Arc::<ConstructNode<Core>>::assertr(cmr, &u11).unwrap();
let a13 = Arc::<ConstructNode<Core>>::assertl(&a12, cmr).unwrap();
let c14 = Arc::<ConstructNode<Core>>::case(&a10, &a13).unwrap();
let c15 = Arc::<ConstructNode<Core>>::comp(&p8, &c14).unwrap();

let finalized: Arc<CommitNode<_>> = c15.finalize_types().unwrap();
let prog = finalized.encode_to_vec();
// In #286 we are encoding correctly...
assert_eq!(
hex::DisplayHex::as_hex(&prog).to_string(),
"dc920a28812b6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f243090e00b10e00680",
);

let prog = BitIter::from(prog);
let decode = CommitNode::<Core>::decode(prog).unwrap();

// ...but then during decoding we read the program incorrectly and this assertion fails.
assert_eq!(finalized, decode);
}

#[test]
fn regression_286_2() {
use crate::types::Error as TypeError;
use crate::Error;

// This one is smaller because it starts with a witness node which has a large type.
// This is a bit easier to grok but can't be serialized as a complete/valid program
// without providing the witness data, which limits its ability to share with the
// other libraries.
//
// It also exhibits the bug earlier than the other one -- it *should* just fail to
// typecheck and not be constructible. So we can't get an encoding of it.
let ctx = types::Context::new();

let w0 = Arc::<ConstructNode<Core>>::witness(&ctx, None);
let i1 = Arc::<ConstructNode<Core>>::iden(&ctx);
let d2 = Arc::<ConstructNode<Core>>::drop_(&i1);
let i3 = Arc::<ConstructNode<Core>>::iden(&ctx);
let i4 = Arc::<ConstructNode<Core>>::iden(&ctx);
let t5 = Arc::<ConstructNode<Core>>::take(&i4);
let ca6 = Arc::<ConstructNode<Core>>::case(&i3, &t5).unwrap();
let ca7 = Arc::<ConstructNode<Core>>::case(&d2, &ca6).unwrap();
let c8 = Arc::<ConstructNode<Core>>::comp(&w0, &ca7).unwrap();
let u9 = Arc::<ConstructNode<Core>>::unit(&ctx);
let c10 = Arc::<ConstructNode<Core>>::comp(&c8, &u9).unwrap();

// In #286 we incorrectly succeed finalizing the types, and then encode a bad program.
let err = c10.finalize_types().unwrap_err();
assert!(matches!(err, Error::Type(TypeError::OccursCheck { .. })));
}
}
9 changes: 9 additions & 0 deletions src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -368,12 +368,17 @@ impl Type {
}

const MAX_DISPLAY_DEPTH: usize = 64;
const MAX_DISPLAY_LENGTH: usize = 10000;

impl fmt::Debug for Type {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for data in (&self.ctx, self.inner.bound.root())
.verbose_pre_order_iter::<NoSharing>(Some(MAX_DISPLAY_DEPTH))
{
if data.index > MAX_DISPLAY_LENGTH {
write!(f, "... [truncated type after {} nodes]", MAX_DISPLAY_LENGTH)?;
return Ok(());
}
if data.depth == MAX_DISPLAY_DEPTH {
if data.n_children_yielded == 0 {
f.write_str("...")?;
Expand Down Expand Up @@ -407,6 +412,10 @@ impl fmt::Display for Type {
for data in (&self.ctx, self.inner.bound.root())
.verbose_pre_order_iter::<NoSharing>(Some(MAX_DISPLAY_DEPTH))
{
if data.index > MAX_DISPLAY_LENGTH {
write!(f, "... [truncated type after {} nodes]", MAX_DISPLAY_LENGTH)?;
return Ok(());
}
if data.depth == MAX_DISPLAY_DEPTH {
if data.n_children_yielded == 0 {
f.write_str("...")?;
Expand Down
Loading