diff --git a/crates/cli-support/Cargo.toml b/crates/cli-support/Cargo.toml index 2c6eb736edf..b3fbd68bb7f 100644 --- a/crates/cli-support/Cargo.toml +++ b/crates/cli-support/Cargo.toml @@ -28,3 +28,6 @@ wasm-bindgen-wasm-interpreter = { path = "../wasm-interpreter", version = '=0.2. wit-text = "0.8.0" wit-walrus = "0.6.0" wit-validator = "0.2.0" + +[dev-dependencies] +wat = "1.0" diff --git a/crates/cli-support/src/descriptor.rs b/crates/cli-support/src/descriptor.rs index a3b73a2221a..4bd515ec87d 100644 --- a/crates/cli-support/src/descriptor.rs +++ b/crates/cli-support/src/descriptor.rs @@ -108,14 +108,17 @@ pub enum VectorKind { } impl Descriptor { - pub fn decode(mut data: &[u32]) -> Descriptor { - let descriptor = Descriptor::_decode(&mut data, false); - assert!(data.is_empty(), "remaining data {:?}", data); - descriptor + /// Decodes `data` into a descriptor, or returns `None` if it's invalid. + pub fn decode(mut data: &[u32]) -> Option { + let descriptor = Descriptor::_decode(&mut data, false)?; + if !data.is_empty() { + return None; + } + Some(descriptor) } - fn _decode(data: &mut &[u32], clamped: bool) -> Descriptor { - match get(data) { + fn _decode(data: &mut &[u32], clamped: bool) -> Option { + Some(match get(data)? { I8 => Descriptor::I8, I16 => Descriptor::I16, I32 => Descriptor::I32, @@ -128,31 +131,31 @@ impl Descriptor { F32 => Descriptor::F32, F64 => Descriptor::F64, BOOLEAN => Descriptor::Boolean, - FUNCTION => Descriptor::Function(Box::new(Function::decode(data))), - CLOSURE => Descriptor::Closure(Box::new(Closure::decode(data))), - REF => Descriptor::Ref(Box::new(Descriptor::_decode(data, clamped))), - REFMUT => Descriptor::RefMut(Box::new(Descriptor::_decode(data, clamped))), - SLICE => Descriptor::Slice(Box::new(Descriptor::_decode(data, clamped))), - VECTOR => Descriptor::Vector(Box::new(Descriptor::_decode(data, clamped))), - OPTIONAL => Descriptor::Option(Box::new(Descriptor::_decode(data, clamped))), - RESULT => Descriptor::Result(Box::new(Descriptor::_decode(data, clamped))), + FUNCTION => Descriptor::Function(Box::new(Function::decode(data)?)), + CLOSURE => Descriptor::Closure(Box::new(Closure::decode(data)?)), + REF => Descriptor::Ref(Box::new(Descriptor::_decode(data, clamped)?)), + REFMUT => Descriptor::RefMut(Box::new(Descriptor::_decode(data, clamped)?)), + SLICE => Descriptor::Slice(Box::new(Descriptor::_decode(data, clamped)?)), + VECTOR => Descriptor::Vector(Box::new(Descriptor::_decode(data, clamped)?)), + OPTIONAL => Descriptor::Option(Box::new(Descriptor::_decode(data, clamped)?)), + RESULT => Descriptor::Result(Box::new(Descriptor::_decode(data, clamped)?)), CACHED_STRING => Descriptor::CachedString, STRING => Descriptor::String, EXTERNREF => Descriptor::Externref, - ENUM => Descriptor::Enum { hole: get(data) }, + ENUM => Descriptor::Enum { hole: get(data)? }, RUST_STRUCT => { - let name = get_string(data); + let name = get_string(data)?; Descriptor::RustStruct(name) } NAMED_EXTERNREF => { - let name = get_string(data); + let name = get_string(data)?; Descriptor::NamedExternref(name) } CHAR => Descriptor::Char, UNIT => Descriptor::Unit, - CLAMPED => Descriptor::_decode(data, true), - other => panic!("unknown descriptor: {}", other), - } + CLAMPED => Descriptor::_decode(data, true)?, + _ => return None, + }) } pub fn unwrap_function(self) -> Function { @@ -204,45 +207,48 @@ impl Descriptor { } } -fn get(a: &mut &[u32]) -> u32 { - let ret = a[0]; +fn get(a: &mut &[u32]) -> Option { + let ret = *a.get(0)?; *a = &a[1..]; - ret + Some(ret) } -fn get_string(data: &mut &[u32]) -> String { - (0..get(data)) - .map(|_| char::from_u32(get(data)).unwrap()) +fn get_string(data: &mut &[u32]) -> Option { + (0..get(data)?) + .map(|_| char::from_u32(get(data)?)) .collect() } impl Closure { - fn decode(data: &mut &[u32]) -> Closure { - let shim_idx = get(data); - let dtor_idx = get(data); - let mutable = get(data) == REFMUT; - assert_eq!(get(data), FUNCTION); - Closure { + fn decode(data: &mut &[u32]) -> Option { + let shim_idx = get(data)?; + let dtor_idx = get(data)?; + let mutable = get(data)? == REFMUT; + if get(data)? != FUNCTION { + return None; + } + + Some(Closure { shim_idx, dtor_idx, mutable, - function: Function::decode(data), - } + function: Function::decode(data)?, + }) } } impl Function { - fn decode(data: &mut &[u32]) -> Function { - let shim_idx = get(data); - let arguments = (0..get(data)) + fn decode(data: &mut &[u32]) -> Option { + let shim_idx = get(data)?; + let arguments = (0..get(data)?) .map(|_| Descriptor::_decode(data, false)) - .collect::>(); - Function { + .collect::>>()?; + Some(Function { arguments, shim_idx, - ret: Descriptor::_decode(data, false), - inner_ret: Some(Descriptor::_decode(data, false)), - } + ret: Descriptor::_decode(data, false)?, + inner_ret: Some(Descriptor::_decode(data, false)?), + }) } } diff --git a/crates/cli-support/src/descriptors.rs b/crates/cli-support/src/descriptors.rs index 36e81092a3b..d8aa2963387 100644 --- a/crates/cli-support/src/descriptors.rs +++ b/crates/cli-support/src/descriptors.rs @@ -14,9 +14,10 @@ use crate::descriptor::{Closure, Descriptor}; use anyhow::Error; use std::borrow::Cow; use std::collections::{HashMap, HashSet}; +use std::fmt::Write; use walrus::ImportId; use walrus::{CustomSection, FunctionId, Module, TypedCustomSectionId}; -use wasm_bindgen_wasm_interpreter::Interpreter; +use wasm_bindgen_wasm_interpreter::{Interpreter, Skip}; #[derive(Default, Debug)] pub struct WasmBindgenDescriptorsSection { @@ -40,6 +41,31 @@ pub fn execute(module: &mut Module) -> Result anyhow::Error { + let mut msg = format!("invalid descriptor: {descriptor:?}"); + + if !skipped.is_empty() { + writeln!( + msg, + "\n\n\ + Some functions containing unsupported instructions were skipped while\n\ + intepreting the wasm module, which may have caused this:" + ) + .unwrap(); + + for skip in skipped { + writeln!(msg, " {skip}").unwrap(); + } + } + + anyhow::Error::msg(msg) +} + impl WasmBindgenDescriptorsSection { fn execute_exports( &mut self, @@ -56,9 +82,10 @@ impl WasmBindgenDescriptorsSection { walrus::ExportItem::Function(id) => id, _ => panic!("{} export not a function", export.name), }; - if let Some(d) = interpreter.interpret_descriptor(id, module) { + if let Some((d, skipped)) = interpreter.interpret_descriptor(id, module) { let name = &export.name[prefix.len()..]; - let descriptor = Descriptor::decode(d); + let descriptor = + Descriptor::decode(d).ok_or_else(|| descriptor_error(d, &skipped))?; self.descriptors.insert(name.to_string(), descriptor); } to_remove.push(export.id()); @@ -96,10 +123,14 @@ impl WasmBindgenDescriptorsSection { }; dfs_in_order(&mut find, local, local.entry_block()); if find.found { - let descriptor = interpreter + let (descriptor, skipped) = interpreter .interpret_closure_descriptor(id, module, &mut element_removal_list) .unwrap(); - func_to_descriptor.insert(id, Descriptor::decode(descriptor)); + func_to_descriptor.insert( + id, + Descriptor::decode(descriptor) + .ok_or_else(|| descriptor_error(descriptor, &skipped))?, + ); } } @@ -179,3 +210,47 @@ impl CustomSection for WasmBindgenDescriptorsSection { panic!("shouldn't emit custom sections just yet"); } } + +#[cfg(test)] +mod tests { + use super::WasmBindgenDescriptorsSectionId; + + fn execute(wat: &str) -> anyhow::Result { + let wasm = wat::parse_str(wat).unwrap(); + let mut module = walrus::Module::from_buffer(&wasm).unwrap(); + super::execute(&mut module) + } + + #[test] + fn unsupported_instruction() { + let result = execute( + r#" + (module + (import "__wbindgen_placeholder__" "__wbindgen_describe" + (func $__wbindgen_describe (param i32))) + + (func $bar + ;; We don't support `block`, so this will cause an error. + block + end + + ;; Which means this descriptor won't go through. + i32.const 0 + call $__wbindgen_describe + ) + + (func $foo + ;; Call into a nested function `bar`, since an unsupported instruction in the + ;; root function we're calling panics immediately. + call $bar + ) + + (export "__wbindgen_describe_foo" (func $foo)) + ) + "#, + ); + let err = result.unwrap_err(); + assert!(err.to_string().contains("invalid descriptor: []")); + assert!(err.to_string().contains("Block")); + } +} diff --git a/crates/wasm-interpreter/src/lib.rs b/crates/wasm-interpreter/src/lib.rs index 1b875492d77..57902af1bd2 100644 --- a/crates/wasm-interpreter/src/lib.rs +++ b/crates/wasm-interpreter/src/lib.rs @@ -19,6 +19,8 @@ #![deny(missing_docs)] use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fmt::{self, Display, Formatter}; +use std::mem; use walrus::ir::Instr; use walrus::{ElementId, FunctionId, LocalId, Module, TableId}; @@ -57,6 +59,13 @@ pub struct Interpreter { // stores the last table index argument, used for finding a different // descriptor. descriptor_table_idx: Option, + + /// Unsupported instructions that we've come across and skipped. + /// + /// We store these to provide a more helpful error message if one of these + /// happened to be necessary and led to an invalid descriptor, by + /// suggesting that they might be the cause. + skipped: Vec, } impl Interpreter { @@ -128,15 +137,23 @@ impl Interpreter { /// /// Returns `Some` if `func` was found in the `module` and `None` if it was /// not found in the `module`. - pub fn interpret_descriptor(&mut self, id: FunctionId, module: &Module) -> Option<&[u32]> { + /// + /// As well as the descriptor, a `Vec` is returned, which is a list + /// of functions that were skipped over due to unsupported instructions. If + /// the descriptor is invalid, those are likely the cause. + pub fn interpret_descriptor( + &mut self, + id: FunctionId, + module: &Module, + ) -> Option<(&[u32], Vec)> { self.descriptor.truncate(0); // We should have a blank wasm and LLVM stack at both the start and end // of the call. assert_eq!(self.sp, self.mem.len() as i32); - self.call(id, module, &[]); + self.call(id, module, &[]).expect("unsupported instruction"); assert_eq!(self.sp, self.mem.len() as i32); - Some(&self.descriptor) + Some((&self.descriptor, mem::take(&mut self.skipped))) } /// Interprets a "closure descriptor", figuring out the signature of the @@ -160,7 +177,7 @@ impl Interpreter { id: FunctionId, module: &Module, entry_removal_list: &mut HashSet<(ElementId, usize)>, - ) -> Option<&[u32]> { + ) -> Option<(&[u32], Vec)> { // Call the `id` function. This is an internal `#[inline(never)]` // whose code is completely controlled by the `wasm-bindgen` crate, so // it should take some arguments (the number of arguments depends on the @@ -183,7 +200,8 @@ impl Interpreter { ); let args = vec![0; num_params]; - self.call(id, module, &args); + self.call(id, module, &args) + .expect("unsupported instruction"); let descriptor_table_idx = self .descriptor_table_idx .take() @@ -213,7 +231,12 @@ impl Interpreter { self.functions } - fn call(&mut self, id: FunctionId, module: &Module, args: &[i32]) -> Option { + fn call( + &mut self, + id: FunctionId, + module: &Module, + args: &[i32], + ) -> Result, Instr> { let func = module.funcs.get(id); log::debug!("starting a call of {:?} {:?}", id, func.name); log::debug!("arguments {:?}", args); @@ -238,12 +261,12 @@ impl Interpreter { } for (instr, _) in block.instrs.iter() { - frame.eval(instr); + frame.eval(instr)?; if frame.done { break; } } - self.scratch.last().cloned() + Ok(self.scratch.last().cloned()) } } @@ -255,7 +278,10 @@ struct Frame<'a> { } impl Frame<'_> { - fn eval(&mut self, instr: &Instr) { + /// Evaluate a stack frame. + /// + /// Returns an `Err` if an unsupported instruction is encountered. + fn eval(&mut self, instr: &Instr) -> Result<(), Instr> { use walrus::ir::*; let stack = &mut self.interp.scratch; @@ -263,7 +289,7 @@ impl Frame<'_> { match instr { Instr::Const(c) => match c.value { Value::I32(n) => stack.push(n), - _ => panic!("non-i32 constant"), + _ => return Err(instr.clone()), }, Instr::LocalGet(e) => stack.push(self.locals.get(&e.local).cloned().unwrap_or(0)), Instr::LocalSet(e) => { @@ -286,7 +312,7 @@ impl Frame<'_> { stack.push(match e.op { BinaryOp::I32Sub => lhs - rhs, BinaryOp::I32Add => lhs + rhs, - op => panic!("invalid binary op {:?}", op), + _ => return Err(instr.clone()), }); } @@ -347,7 +373,27 @@ impl Frame<'_> { let args = (0..ty.params().len()) .map(|_| stack.pop().unwrap()) .collect::>(); - self.interp.call(e.func, self.module, &args); + + if let Err(instr) = self.interp.call(e.func, self.module, &args) { + let name = self.module.funcs.get(e.func).name.clone(); + + // If an unsupported instruction appears in a nested function, it's most likely + // some runtime-initialization call injected by the compiler that we don't care + // about, like `__wasm_call_ctors` on the WASI target. + // + // So, skip the rest of the function and move on. + log::debug!( + "unsupported instruction {instr:?}, skipping function{name}", + name = if let Some(ref name) = name { + format!(" `{name}`") + } else { + "".to_owned() + } + ); + + // Add it to the list of skipped functions. + self.interp.skipped.push(Skip { name, instr }); + } } } @@ -360,7 +406,30 @@ impl Frame<'_> { // Note that LLVM may change over time to generate new // instructions in debug mode, and we'll have to react to those // sorts of changes as they arise. - s => panic!("unknown instruction {:?}", s), + _ => return Err(instr.clone()), + } + + Ok(()) + } +} + +/// A function that was skipped due to an unsupported instruction, and the +/// instruction that caused it. +#[derive(Debug, Clone)] +pub struct Skip { + /// The name of the function that was skipped. + pub name: Option, + + /// The unsupported instruction that caused it to be skipped. + pub instr: Instr, +} + +impl Display for Skip { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + if let Some(ref name) = self.name { + write!(f, "skipped `{name}` due to {:?}", self.instr) + } else { + write!(f, "skipped a function due to {:?}", self.instr) } } } diff --git a/crates/wasm-interpreter/tests/smoke.rs b/crates/wasm-interpreter/tests/smoke.rs index 33b2575e420..dba443a269a 100644 --- a/crates/wasm-interpreter/tests/smoke.rs +++ b/crates/wasm-interpreter/tests/smoke.rs @@ -14,7 +14,11 @@ fn interpret(wat: &str, name: &str, result: Option<&[u32]>) { }) .next() .unwrap(); - assert_eq!(i.interpret_descriptor(id, &module), result); + assert_eq!( + i.interpret_descriptor(id, &module) + .map(|(descriptor, _)| descriptor), + result + ); } #[test] @@ -223,3 +227,57 @@ fn calling_functions() { "#; interpret(wat, "foo", Some(&[0])); } + +#[test] +fn skip_unsupported() { + let wat = r#" + (module + (import "__wbindgen_placeholder__" "__wbindgen_describe" + (func $__wbindgen_describe (param i32))) + + (func $bar + ;; We don't support blocks. + block + end + + ;; Then attempt to add to the descriptor to make sure we aren't getting here. + i32.const 1 + call $__wbindgen_describe + ) + + (func $foo + ;; Call $bar, which contains unsupported instructions and should be skipped. + call $bar + + ;; And then test that this descriptor still goes through. + i32.const 0 + call $__wbindgen_describe + ) + + (export "foo" (func $foo)) + ) + "#; + interpret(wat, "foo", Some(&[0])); +} + +#[test] +#[should_panic] +fn skip_unsupported_root() { + let wat = r#" + (module + (import "__wbindgen_placeholder__" "__wbindgen_describe" + (func $__wbindgen_describe (param i32))) + + (func $foo + ;; Execute an invalid instruction in the root function we're calling. + ;; This causes an immediate panic, since we can't just skip the whole function + ;; we're getting a descriptor from. + block + end + ) + + (export "foo" (func $foo)) + ) + "#; + interpret(wat, "foo", Some(&[0])); +}