diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 26f3343b..ef5f7f83 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -23,6 +23,7 @@ c_rust_merkle, decode_natural, decode_program, parse_human, +regression_286, regression_value, ] steps: diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index bd16d8bd..880390a5 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -67,6 +67,13 @@ test = false doc = false bench = false +[[bin]] +name = "regression_286" +path = "fuzz_targets/regression_286.rs" +test = false +doc = false +bench = false + [[bin]] name = "regression_value" path = "fuzz_targets/regression_value.rs" diff --git a/fuzz/fuzz_lib/lib.rs b/fuzz/fuzz_lib/lib.rs index 5d616657..d3772c85 100644 --- a/fuzz/fuzz_lib/lib.rs +++ b/fuzz/fuzz_lib/lib.rs @@ -7,6 +7,10 @@ use simplicity::types::Final as FinalTy; use simplicity::{BitIter, Value}; use std::sync::Arc; +mod program; + +pub use program::ProgramControl; + /// A wrapper around a buffer which has utilities for extracting various /// Simplicity types. #[derive(Clone)] @@ -16,6 +20,7 @@ pub struct Extractor<'f> { bit_len: usize, } +// More impls in the other modules. impl<'f> Extractor<'f> { /// Wrap the buffer in an extractor. pub fn new(data: &'f [u8]) -> Self { @@ -37,6 +42,13 @@ impl<'f> Extractor<'f> { } } + /// Attempt to yield a u16 from the fuzzer. + /// + /// Internally, extracts in big-endian. + pub fn extract_u16(&mut self) -> Option { + Some((u16::from(self.extract_u8()?) << 8) + u16::from(self.extract_u8()?)) + } + /// Attempt to yield a single bit from the fuzzer. pub fn extract_bit(&mut self) -> Option { if self.bit_len == 0 { diff --git a/fuzz/fuzz_lib/program.rs b/fuzz/fuzz_lib/program.rs new file mode 100644 index 00000000..b8d8e7f9 --- /dev/null +++ b/fuzz/fuzz_lib/program.rs @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: CC0-1.0 + +use std::sync::Arc; + +use super::Extractor; +use simplicity::node::{ + CoreConstructible as _, DisconnectConstructible as _, JetConstructible as _, + WitnessConstructible as _, +}; +use simplicity::types; +use simplicity::{jet::Core, Cmr, ConstructNode, FailEntropy}; + +/// Structure controlling the kind of programs generated by the fuzzer. +pub struct ProgramControl { + /// Whether to attempt to insert "type bombs" into the generated program. + /// + /// Importantly, type bombs may have 2^n nodes for fairly large n, and will + /// not respect `max_nodes`. So if you are trying to generate small programs + /// you should not enable this. + pub enable_type_bomb: bool, + /// Whether to attempt to insert disconnect nodes into the generated program. + pub enable_disconnect: bool, + /// Whether to attempt to insert witness nodes into the generated program. + pub enable_witness: bool, + /// Whether to attempt to insert fail nodes into the generated program. + pub enable_fail: bool, + /// Whether to attempt to insert assertl and assertr nodes into the generated program. + pub enable_asserts: bool, + /// Maximum number of nodes a generated program may have. This limit may not + /// be exactly enforced. If it is `None`, no limit is enforced. + pub max_nodes: Option, +} + +impl ProgramControl { + fn from_u16(u: u16) -> Self { + ProgramControl { + enable_type_bomb: u & 0x8000 == 0x8000, + enable_disconnect: u & 0x4000 == 0x4000, + enable_witness: u & 0x2000 == 0x2000, + enable_fail: u & 0x1000 == 0x1000, + enable_asserts: u & 0x0800 == 0x0800, + max_nodes: Some(5 * usize::from(u & 0x07ff)), + } + } +} + +impl Extractor<'_> { + pub fn extract_core_construct_node( + &mut self, + force_control: Option, + ) -> Option>> { + type ArcNode = Arc>; + + let ctx = types::Context::new(); + let mut stack: Vec = vec![]; + + let program_control = + force_control.unwrap_or(ProgramControl::from_u16(self.extract_u16()?)); + + let mut count = 0usize; + for _ in 0..program_control.max_nodes.unwrap_or(usize::MAX) { + let control = self.extract_u8()?; + if program_control.enable_type_bomb && control & 0x80 == 0x80 { + let mut ret = stack.pop()?; + // Special-case: type bomb. Iterate x -> pair(x, x) on the top stack + // item up to 128 times. Its CPU cost and target type will blow up + // by a factor 2^128. If its target type has nonzero size this should + // fail to construct; if it's 0 we should be able to construct it but + // the bit machine should reject it. + for _ in 0..control & 0x7f { + // FIXME should we refuse to make the type-bomb if `ret` contains any + // witness or disconnect nodes? In this case the encoding of our + // CommitNode won't round-trip, since we're force-sharing both children + // of this `pair` but when decoding `CommitNode` we reject anything that + // shares witnesses or disconnects, which at commit-time we treat as + // being unique and never shared. + ret = ArcNode::pair(&ret, &ret).unwrap(); + } + stack.push(ret); + } else { + match control { + // Return whatever we've got (note that this will "waste" everything else + // on the stack) + 0 => { + if stack.len() == 1 { + return stack.pop(); + } else { + return None; + } + } + // 1 through 63 + 1 => stack.push(ArcNode::unit(&ctx)), + 2 => stack.push(ArcNode::iden(&ctx)), + 3 => { + use simplicity::dag::DagLike as _; + + let val = self.extract_value_direct()?; + if program_control.max_nodes.is_some() { + for _ in val.as_ref().pre_order_iter::() { + count = count.checked_add(1)?; + } + } + if let Some(max) = program_control.max_nodes { + if val.compact_len() > max { + return None; + } + } + stack.push(ArcNode::scribe(&ctx, &val)); + } + 4 if program_control.enable_witness => stack.push(ArcNode::witness(&ctx, None)), + 5 => { + let child = stack.pop()?; + stack.push(ArcNode::injl(&child)); + } + 6 => { + let child = stack.pop()?; + stack.push(ArcNode::injr(&child)); + } + 7 => { + let child = stack.pop()?; + stack.push(ArcNode::drop_(&child)); + } + 8 => { + let child = stack.pop()?; + stack.push(ArcNode::take(&child)); + } + 9 => { + let child = stack.pop()?; + let cmr_u8 = self.extract_u8()?; + let cmr = Cmr::from_byte_array([cmr_u8; 32]); + stack.push(ArcNode::assertl(&child, cmr).ok()?); + } + 10 => { + let child = stack.pop()?; + let cmr_u8 = self.extract_u8()?; + let cmr = Cmr::from_byte_array([cmr_u8; 32]); + stack.push(ArcNode::assertr(cmr, &child).ok()?); + } + 11 if program_control.enable_fail => { + let fail_u8 = self.extract_u8()?; + let fail = FailEntropy::from_byte_array([fail_u8; 64]); + stack.push(ArcNode::fail(&ctx, fail)); + } + 12 => { + let rchild = stack.pop()?; + let lchild = stack.pop()?; + stack.push(ArcNode::pair(&lchild, &rchild).ok()?); + } + 13 => { + let rchild = stack.pop()?; + let lchild = stack.pop()?; + stack.push(ArcNode::case(&lchild, &rchild).ok()?); + } + 14 => { + let rchild = stack.pop()?; + let lchild = stack.pop()?; + stack.push(ArcNode::comp(&lchild, &rchild).ok()?); + } + 15 if program_control.enable_disconnect => { + let child = stack.pop()?; + stack.push(ArcNode::disconnect(&child, &None).ok()?); + } + // We assume that the above cases did not cover 64-255, so that if we + // right-shift by 6 we can get all 4 values. + _ => { + let extra_bits = usize::from(control >> 6); + let idx = (extra_bits << 8) + usize::from(self.extract_u8()?); + stack.push(ArcNode::jet(&ctx, Core::ALL[idx % Core::ALL.len()])); + } + } + } + + if let Some(max) = program_control.max_nodes { + count = count.checked_add(1)?; + if count > max { + return None; + } + } + } + + None + } +} diff --git a/fuzz/fuzz_targets/regression_286.rs b/fuzz/fuzz_targets/regression_286.rs new file mode 100644 index 00000000..7ef04cc9 --- /dev/null +++ b/fuzz/fuzz_targets/regression_286.rs @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: CC0-1.0 + +#![cfg_attr(fuzzing, no_main)] + +#[cfg(any(fuzzing, test))] +fn do_test(data: &[u8]) { + use simplicity::{jet::Core, BitIter, CommitNode}; + + let mut extractor = simplicity_fuzz::Extractor::new(data); + + let construct = + match extractor.extract_core_construct_node(Some(simplicity_fuzz::ProgramControl { + enable_type_bomb: false, + enable_disconnect: false, + enable_witness: false, + enable_fail: false, + enable_asserts: true, + max_nodes: Some(25), + })) { + Some(x) => x, + None => return, + }; + //println!("constructed {construct}"); + let finalized = match construct.finalize_types() { + Ok(x) => x, + Err(_) => return, + }; + //println!("finalized {finalized}"); + let prog = finalized.encode_to_vec(); + //println!("{}", simplicity::bitcoin::hex::DisplayHex::as_hex(&prog)); + let prog = BitIter::from(prog); + let decode = CommitNode::::decode(prog).unwrap(); + assert_eq!( + finalized, decode, + "Constructed committed LHS; encoded and decoded to get RHS", + ); +} + +#[cfg(fuzzing)] +libfuzzer_sys::fuzz_target!(|data| do_test(data)); + +#[cfg(not(fuzzing))] +fn main() {} + +#[cfg(test)] +mod tests { + use base64::Engine; + + #[test] + fn duplicate_crash() { + let data = base64::prelude::BASE64_STANDARD + .decode("Cg==") + .expect("base64 should be valid"); + super::do_test(&data); + } +} diff --git a/src/node/construct.rs b/src/node/construct.rs index ec7ef937..e44b7be6 100644 --- a/src/node/construct.rs +++ b/src/node/construct.rs @@ -517,4 +517,74 @@ mod tests { .cmr() ); } + + #[test] + fn regression_286_1() { + // This is the smallest pure Simplicity program I was able to find that exhibits the bad + // behavior seen in https://github.com/BlockstreamResearch/rust-simplicity/issues/286 + let ctx = types::Context::new(); + let cmr = Cmr::from_byte_array([0xde; 32]); + + let u0 = Arc::>::unit(&ctx); + let i1 = Arc::>::injl(&u0); + let i2 = Arc::>::injr(&i1); + let i3 = Arc::>::injr(&i2); + let i4 = Arc::>::injl(&i3); + let u5 = Arc::>::unit(&ctx); + let i6 = Arc::>::injl(&u5); + let i7 = Arc::>::injr(&i6); + let p8 = Arc::>::pair(&i4, &i7).unwrap(); + let u9 = Arc::>::unit(&ctx); + let a10 = Arc::>::assertr(cmr, &u9).unwrap(); + let u11 = Arc::>::unit(&ctx); + let a12 = Arc::>::assertr(cmr, &u11).unwrap(); + let a13 = Arc::>::assertl(&a12, cmr).unwrap(); + let c14 = Arc::>::case(&a10, &a13).unwrap(); + let c15 = Arc::>::comp(&p8, &c14).unwrap(); + + let finalized: Arc> = c15.finalize_types().unwrap(); + let prog = finalized.encode_to_vec(); + // In #286 we are encoding correctly... + assert_eq!( + hex::DisplayHex::as_hex(&prog).to_string(), + "dc920a28812b6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f6f243090e00b10e00680", + ); + + let prog = BitIter::from(prog); + let decode = CommitNode::::decode(prog).unwrap(); + + // ...but then during decoding we read the program incorrectly and this assertion fails. + assert_eq!(finalized, decode); + } + + #[test] + fn regression_286_2() { + use crate::types::Error as TypeError; + use crate::Error; + + // This one is smaller because it starts with a witness node which has a large type. + // This is a bit easier to grok but can't be serialized as a complete/valid program + // without providing the witness data, which limits its ability to share with the + // other libraries. + // + // It also exhibits the bug earlier than the other one -- it *should* just fail to + // typecheck and not be constructible. So we can't get an encoding of it. + let ctx = types::Context::new(); + + let w0 = Arc::>::witness(&ctx, None); + let i1 = Arc::>::iden(&ctx); + let d2 = Arc::>::drop_(&i1); + let i3 = Arc::>::iden(&ctx); + let i4 = Arc::>::iden(&ctx); + let t5 = Arc::>::take(&i4); + let ca6 = Arc::>::case(&i3, &t5).unwrap(); + let ca7 = Arc::>::case(&d2, &ca6).unwrap(); + let c8 = Arc::>::comp(&w0, &ca7).unwrap(); + let u9 = Arc::>::unit(&ctx); + let c10 = Arc::>::comp(&c8, &u9).unwrap(); + + // In #286 we incorrectly succeed finalizing the types, and then encode a bad program. + let err = c10.finalize_types().unwrap_err(); + assert!(matches!(err, Error::Type(TypeError::OccursCheck { .. }))); + } } diff --git a/src/types/mod.rs b/src/types/mod.rs index 75d93c36..81778e64 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -368,12 +368,17 @@ impl Type { } const MAX_DISPLAY_DEPTH: usize = 64; +const MAX_DISPLAY_LENGTH: usize = 10000; impl fmt::Debug for Type { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { for data in (&self.ctx, self.inner.bound.root()) .verbose_pre_order_iter::(Some(MAX_DISPLAY_DEPTH)) { + if data.index > MAX_DISPLAY_LENGTH { + write!(f, "... [truncated type after {} nodes]", MAX_DISPLAY_LENGTH)?; + return Ok(()); + } if data.depth == MAX_DISPLAY_DEPTH { if data.n_children_yielded == 0 { f.write_str("...")?; @@ -407,6 +412,10 @@ impl fmt::Display for Type { for data in (&self.ctx, self.inner.bound.root()) .verbose_pre_order_iter::(Some(MAX_DISPLAY_DEPTH)) { + if data.index > MAX_DISPLAY_LENGTH { + write!(f, "... [truncated type after {} nodes]", MAX_DISPLAY_LENGTH)?; + return Ok(()); + } if data.depth == MAX_DISPLAY_DEPTH { if data.n_children_yielded == 0 { f.write_str("...")?; diff --git a/src/types/union_bound.rs b/src/types/union_bound.rs index f47438d9..df0dd97d 100644 --- a/src/types/union_bound.rs +++ b/src/types/union_bound.rs @@ -93,15 +93,6 @@ impl UbData { } } -impl UbData { - fn shallow_clone(&self) -> Self { - match self { - UbData::Root(x) => UbData::Root(x.shallow_clone()), - UbData::EqualTo(eq) => UbData::EqualTo(eq.shallow_clone()), - } - } -} - impl UbElement { /// Turns an existing piece of data into a singleton union-bound set. pub fn new(data: T) -> Self { @@ -148,7 +139,7 @@ impl UbElement { } }; - let mut parent_inner_lock = parent.inner.lock().unwrap(); + let parent_inner_lock = parent.inner.lock().unwrap(); // If the parent has a parent, remove the intermediate link. (This is // the "halving" variant of union-bound.) match parent_inner_lock.data { @@ -156,13 +147,11 @@ impl UbElement { UbData::Root(..) => return parent.shallow_clone(), UbData::EqualTo(ref grandparent) => { let grandparent = grandparent.shallow_clone(); - // Okay to lock both grandparent and parent at once because the - // algorithm guarantees there are no cycles. - let gp_inner_lock = grandparent.inner.lock().unwrap(); - parent_inner_lock.data = gp_inner_lock.data.shallow_clone(); - drop(gp_inner_lock); - drop(parent_inner_lock); - x = grandparent.shallow_clone(); + // The previous `x_inner_lock` was dropped above, so this will not deadlock. + let mut x_inner_lock = x.inner.lock().unwrap(); + x_inner_lock.data = UbData::EqualTo(grandparent.shallow_clone()); + drop(x_inner_lock); + x = grandparent; } } }