From 31d63e89e39a88729bdf744f2563b9b335b4152d Mon Sep 17 00:00:00 2001 From: Liam Murphy Date: Sat, 13 Aug 2022 12:04:40 +1000 Subject: [PATCH] Skip over unsupported instructions instead of panicking Fixes #2969 This changes `wasm-bindgen-wasm-interpreter` to ignore a function rather than panicking if it contains an unsupported instruction. This works around some runtime glue that gets added to our descriptor functions on the WASI target by more-or-less ignoring them. I also put some work into making sure a helpful error message is given if the actual describe function contains unsupported instructions, by keeping track of the instructions that caused us to skip and including them in the error message if the descriptor is invalid. --- crates/cli-support/Cargo.toml | 3 + crates/cli-support/src/descriptor.rs | 90 ++++++++++++------------ crates/cli-support/src/descriptors.rs | 85 +++++++++++++++++++++-- crates/wasm-interpreter/src/lib.rs | 95 ++++++++++++++++++++++---- crates/wasm-interpreter/tests/smoke.rs | 60 +++++++++++++++- 5 files changed, 272 insertions(+), 61 deletions(-) diff --git a/crates/cli-support/Cargo.toml b/crates/cli-support/Cargo.toml index 2c6eb736edf..b3fbd68bb7f 100644 --- a/crates/cli-support/Cargo.toml +++ b/crates/cli-support/Cargo.toml @@ -28,3 +28,6 @@ wasm-bindgen-wasm-interpreter = { path = "../wasm-interpreter", version = '=0.2. wit-text = "0.8.0" wit-walrus = "0.6.0" wit-validator = "0.2.0" + +[dev-dependencies] +wat = "1.0" diff --git a/crates/cli-support/src/descriptor.rs b/crates/cli-support/src/descriptor.rs index a3b73a2221a..4bd515ec87d 100644 --- a/crates/cli-support/src/descriptor.rs +++ b/crates/cli-support/src/descriptor.rs @@ -108,14 +108,17 @@ pub enum VectorKind { } impl Descriptor { - pub fn decode(mut data: &[u32]) -> Descriptor { - let descriptor = Descriptor::_decode(&mut data, false); - assert!(data.is_empty(), "remaining data {:?}", data); - descriptor + /// Decodes `data` into a descriptor, or returns `None` if it's invalid. + pub fn decode(mut data: &[u32]) -> Option { + let descriptor = Descriptor::_decode(&mut data, false)?; + if !data.is_empty() { + return None; + } + Some(descriptor) } - fn _decode(data: &mut &[u32], clamped: bool) -> Descriptor { - match get(data) { + fn _decode(data: &mut &[u32], clamped: bool) -> Option { + Some(match get(data)? { I8 => Descriptor::I8, I16 => Descriptor::I16, I32 => Descriptor::I32, @@ -128,31 +131,31 @@ impl Descriptor { F32 => Descriptor::F32, F64 => Descriptor::F64, BOOLEAN => Descriptor::Boolean, - FUNCTION => Descriptor::Function(Box::new(Function::decode(data))), - CLOSURE => Descriptor::Closure(Box::new(Closure::decode(data))), - REF => Descriptor::Ref(Box::new(Descriptor::_decode(data, clamped))), - REFMUT => Descriptor::RefMut(Box::new(Descriptor::_decode(data, clamped))), - SLICE => Descriptor::Slice(Box::new(Descriptor::_decode(data, clamped))), - VECTOR => Descriptor::Vector(Box::new(Descriptor::_decode(data, clamped))), - OPTIONAL => Descriptor::Option(Box::new(Descriptor::_decode(data, clamped))), - RESULT => Descriptor::Result(Box::new(Descriptor::_decode(data, clamped))), + FUNCTION => Descriptor::Function(Box::new(Function::decode(data)?)), + CLOSURE => Descriptor::Closure(Box::new(Closure::decode(data)?)), + REF => Descriptor::Ref(Box::new(Descriptor::_decode(data, clamped)?)), + REFMUT => Descriptor::RefMut(Box::new(Descriptor::_decode(data, clamped)?)), + SLICE => Descriptor::Slice(Box::new(Descriptor::_decode(data, clamped)?)), + VECTOR => Descriptor::Vector(Box::new(Descriptor::_decode(data, clamped)?)), + OPTIONAL => Descriptor::Option(Box::new(Descriptor::_decode(data, clamped)?)), + RESULT => Descriptor::Result(Box::new(Descriptor::_decode(data, clamped)?)), CACHED_STRING => Descriptor::CachedString, STRING => Descriptor::String, EXTERNREF => Descriptor::Externref, - ENUM => Descriptor::Enum { hole: get(data) }, + ENUM => Descriptor::Enum { hole: get(data)? }, RUST_STRUCT => { - let name = get_string(data); + let name = get_string(data)?; Descriptor::RustStruct(name) } NAMED_EXTERNREF => { - let name = get_string(data); + let name = get_string(data)?; Descriptor::NamedExternref(name) } CHAR => Descriptor::Char, UNIT => Descriptor::Unit, - CLAMPED => Descriptor::_decode(data, true), - other => panic!("unknown descriptor: {}", other), - } + CLAMPED => Descriptor::_decode(data, true)?, + _ => return None, + }) } pub fn unwrap_function(self) -> Function { @@ -204,45 +207,48 @@ impl Descriptor { } } -fn get(a: &mut &[u32]) -> u32 { - let ret = a[0]; +fn get(a: &mut &[u32]) -> Option { + let ret = *a.get(0)?; *a = &a[1..]; - ret + Some(ret) } -fn get_string(data: &mut &[u32]) -> String { - (0..get(data)) - .map(|_| char::from_u32(get(data)).unwrap()) +fn get_string(data: &mut &[u32]) -> Option { + (0..get(data)?) + .map(|_| char::from_u32(get(data)?)) .collect() } impl Closure { - fn decode(data: &mut &[u32]) -> Closure { - let shim_idx = get(data); - let dtor_idx = get(data); - let mutable = get(data) == REFMUT; - assert_eq!(get(data), FUNCTION); - Closure { + fn decode(data: &mut &[u32]) -> Option { + let shim_idx = get(data)?; + let dtor_idx = get(data)?; + let mutable = get(data)? == REFMUT; + if get(data)? != FUNCTION { + return None; + } + + Some(Closure { shim_idx, dtor_idx, mutable, - function: Function::decode(data), - } + function: Function::decode(data)?, + }) } } impl Function { - fn decode(data: &mut &[u32]) -> Function { - let shim_idx = get(data); - let arguments = (0..get(data)) + fn decode(data: &mut &[u32]) -> Option { + let shim_idx = get(data)?; + let arguments = (0..get(data)?) .map(|_| Descriptor::_decode(data, false)) - .collect::>(); - Function { + .collect::>>()?; + Some(Function { arguments, shim_idx, - ret: Descriptor::_decode(data, false), - inner_ret: Some(Descriptor::_decode(data, false)), - } + ret: Descriptor::_decode(data, false)?, + inner_ret: Some(Descriptor::_decode(data, false)?), + }) } } diff --git a/crates/cli-support/src/descriptors.rs b/crates/cli-support/src/descriptors.rs index 36e81092a3b..d8aa2963387 100644 --- a/crates/cli-support/src/descriptors.rs +++ b/crates/cli-support/src/descriptors.rs @@ -14,9 +14,10 @@ use crate::descriptor::{Closure, Descriptor}; use anyhow::Error; use std::borrow::Cow; use std::collections::{HashMap, HashSet}; +use std::fmt::Write; use walrus::ImportId; use walrus::{CustomSection, FunctionId, Module, TypedCustomSectionId}; -use wasm_bindgen_wasm_interpreter::Interpreter; +use wasm_bindgen_wasm_interpreter::{Interpreter, Skip}; #[derive(Default, Debug)] pub struct WasmBindgenDescriptorsSection { @@ -40,6 +41,31 @@ pub fn execute(module: &mut Module) -> Result anyhow::Error { + let mut msg = format!("invalid descriptor: {descriptor:?}"); + + if !skipped.is_empty() { + writeln!( + msg, + "\n\n\ + Some functions containing unsupported instructions were skipped while\n\ + intepreting the wasm module, which may have caused this:" + ) + .unwrap(); + + for skip in skipped { + writeln!(msg, " {skip}").unwrap(); + } + } + + anyhow::Error::msg(msg) +} + impl WasmBindgenDescriptorsSection { fn execute_exports( &mut self, @@ -56,9 +82,10 @@ impl WasmBindgenDescriptorsSection { walrus::ExportItem::Function(id) => id, _ => panic!("{} export not a function", export.name), }; - if let Some(d) = interpreter.interpret_descriptor(id, module) { + if let Some((d, skipped)) = interpreter.interpret_descriptor(id, module) { let name = &export.name[prefix.len()..]; - let descriptor = Descriptor::decode(d); + let descriptor = + Descriptor::decode(d).ok_or_else(|| descriptor_error(d, &skipped))?; self.descriptors.insert(name.to_string(), descriptor); } to_remove.push(export.id()); @@ -96,10 +123,14 @@ impl WasmBindgenDescriptorsSection { }; dfs_in_order(&mut find, local, local.entry_block()); if find.found { - let descriptor = interpreter + let (descriptor, skipped) = interpreter .interpret_closure_descriptor(id, module, &mut element_removal_list) .unwrap(); - func_to_descriptor.insert(id, Descriptor::decode(descriptor)); + func_to_descriptor.insert( + id, + Descriptor::decode(descriptor) + .ok_or_else(|| descriptor_error(descriptor, &skipped))?, + ); } } @@ -179,3 +210,47 @@ impl CustomSection for WasmBindgenDescriptorsSection { panic!("shouldn't emit custom sections just yet"); } } + +#[cfg(test)] +mod tests { + use super::WasmBindgenDescriptorsSectionId; + + fn execute(wat: &str) -> anyhow::Result { + let wasm = wat::parse_str(wat).unwrap(); + let mut module = walrus::Module::from_buffer(&wasm).unwrap(); + super::execute(&mut module) + } + + #[test] + fn unsupported_instruction() { + let result = execute( + r#" + (module + (import "__wbindgen_placeholder__" "__wbindgen_describe" + (func $__wbindgen_describe (param i32))) + + (func $bar + ;; We don't support `block`, so this will cause an error. + block + end + + ;; Which means this descriptor won't go through. + i32.const 0 + call $__wbindgen_describe + ) + + (func $foo + ;; Call into a nested function `bar`, since an unsupported instruction in the + ;; root function we're calling panics immediately. + call $bar + ) + + (export "__wbindgen_describe_foo" (func $foo)) + ) + "#, + ); + let err = result.unwrap_err(); + assert!(err.to_string().contains("invalid descriptor: []")); + assert!(err.to_string().contains("Block")); + } +} diff --git a/crates/wasm-interpreter/src/lib.rs b/crates/wasm-interpreter/src/lib.rs index 1b875492d77..57902af1bd2 100644 --- a/crates/wasm-interpreter/src/lib.rs +++ b/crates/wasm-interpreter/src/lib.rs @@ -19,6 +19,8 @@ #![deny(missing_docs)] use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fmt::{self, Display, Formatter}; +use std::mem; use walrus::ir::Instr; use walrus::{ElementId, FunctionId, LocalId, Module, TableId}; @@ -57,6 +59,13 @@ pub struct Interpreter { // stores the last table index argument, used for finding a different // descriptor. descriptor_table_idx: Option, + + /// Unsupported instructions that we've come across and skipped. + /// + /// We store these to provide a more helpful error message if one of these + /// happened to be necessary and led to an invalid descriptor, by + /// suggesting that they might be the cause. + skipped: Vec, } impl Interpreter { @@ -128,15 +137,23 @@ impl Interpreter { /// /// Returns `Some` if `func` was found in the `module` and `None` if it was /// not found in the `module`. - pub fn interpret_descriptor(&mut self, id: FunctionId, module: &Module) -> Option<&[u32]> { + /// + /// As well as the descriptor, a `Vec` is returned, which is a list + /// of functions that were skipped over due to unsupported instructions. If + /// the descriptor is invalid, those are likely the cause. + pub fn interpret_descriptor( + &mut self, + id: FunctionId, + module: &Module, + ) -> Option<(&[u32], Vec)> { self.descriptor.truncate(0); // We should have a blank wasm and LLVM stack at both the start and end // of the call. assert_eq!(self.sp, self.mem.len() as i32); - self.call(id, module, &[]); + self.call(id, module, &[]).expect("unsupported instruction"); assert_eq!(self.sp, self.mem.len() as i32); - Some(&self.descriptor) + Some((&self.descriptor, mem::take(&mut self.skipped))) } /// Interprets a "closure descriptor", figuring out the signature of the @@ -160,7 +177,7 @@ impl Interpreter { id: FunctionId, module: &Module, entry_removal_list: &mut HashSet<(ElementId, usize)>, - ) -> Option<&[u32]> { + ) -> Option<(&[u32], Vec)> { // Call the `id` function. This is an internal `#[inline(never)]` // whose code is completely controlled by the `wasm-bindgen` crate, so // it should take some arguments (the number of arguments depends on the @@ -183,7 +200,8 @@ impl Interpreter { ); let args = vec![0; num_params]; - self.call(id, module, &args); + self.call(id, module, &args) + .expect("unsupported instruction"); let descriptor_table_idx = self .descriptor_table_idx .take() @@ -213,7 +231,12 @@ impl Interpreter { self.functions } - fn call(&mut self, id: FunctionId, module: &Module, args: &[i32]) -> Option { + fn call( + &mut self, + id: FunctionId, + module: &Module, + args: &[i32], + ) -> Result, Instr> { let func = module.funcs.get(id); log::debug!("starting a call of {:?} {:?}", id, func.name); log::debug!("arguments {:?}", args); @@ -238,12 +261,12 @@ impl Interpreter { } for (instr, _) in block.instrs.iter() { - frame.eval(instr); + frame.eval(instr)?; if frame.done { break; } } - self.scratch.last().cloned() + Ok(self.scratch.last().cloned()) } } @@ -255,7 +278,10 @@ struct Frame<'a> { } impl Frame<'_> { - fn eval(&mut self, instr: &Instr) { + /// Evaluate a stack frame. + /// + /// Returns an `Err` if an unsupported instruction is encountered. + fn eval(&mut self, instr: &Instr) -> Result<(), Instr> { use walrus::ir::*; let stack = &mut self.interp.scratch; @@ -263,7 +289,7 @@ impl Frame<'_> { match instr { Instr::Const(c) => match c.value { Value::I32(n) => stack.push(n), - _ => panic!("non-i32 constant"), + _ => return Err(instr.clone()), }, Instr::LocalGet(e) => stack.push(self.locals.get(&e.local).cloned().unwrap_or(0)), Instr::LocalSet(e) => { @@ -286,7 +312,7 @@ impl Frame<'_> { stack.push(match e.op { BinaryOp::I32Sub => lhs - rhs, BinaryOp::I32Add => lhs + rhs, - op => panic!("invalid binary op {:?}", op), + _ => return Err(instr.clone()), }); } @@ -347,7 +373,27 @@ impl Frame<'_> { let args = (0..ty.params().len()) .map(|_| stack.pop().unwrap()) .collect::>(); - self.interp.call(e.func, self.module, &args); + + if let Err(instr) = self.interp.call(e.func, self.module, &args) { + let name = self.module.funcs.get(e.func).name.clone(); + + // If an unsupported instruction appears in a nested function, it's most likely + // some runtime-initialization call injected by the compiler that we don't care + // about, like `__wasm_call_ctors` on the WASI target. + // + // So, skip the rest of the function and move on. + log::debug!( + "unsupported instruction {instr:?}, skipping function{name}", + name = if let Some(ref name) = name { + format!(" `{name}`") + } else { + "".to_owned() + } + ); + + // Add it to the list of skipped functions. + self.interp.skipped.push(Skip { name, instr }); + } } } @@ -360,7 +406,30 @@ impl Frame<'_> { // Note that LLVM may change over time to generate new // instructions in debug mode, and we'll have to react to those // sorts of changes as they arise. - s => panic!("unknown instruction {:?}", s), + _ => return Err(instr.clone()), + } + + Ok(()) + } +} + +/// A function that was skipped due to an unsupported instruction, and the +/// instruction that caused it. +#[derive(Debug, Clone)] +pub struct Skip { + /// The name of the function that was skipped. + pub name: Option, + + /// The unsupported instruction that caused it to be skipped. + pub instr: Instr, +} + +impl Display for Skip { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + if let Some(ref name) = self.name { + write!(f, "skipped `{name}` due to {:?}", self.instr) + } else { + write!(f, "skipped a function due to {:?}", self.instr) } } } diff --git a/crates/wasm-interpreter/tests/smoke.rs b/crates/wasm-interpreter/tests/smoke.rs index 33b2575e420..dba443a269a 100644 --- a/crates/wasm-interpreter/tests/smoke.rs +++ b/crates/wasm-interpreter/tests/smoke.rs @@ -14,7 +14,11 @@ fn interpret(wat: &str, name: &str, result: Option<&[u32]>) { }) .next() .unwrap(); - assert_eq!(i.interpret_descriptor(id, &module), result); + assert_eq!( + i.interpret_descriptor(id, &module) + .map(|(descriptor, _)| descriptor), + result + ); } #[test] @@ -223,3 +227,57 @@ fn calling_functions() { "#; interpret(wat, "foo", Some(&[0])); } + +#[test] +fn skip_unsupported() { + let wat = r#" + (module + (import "__wbindgen_placeholder__" "__wbindgen_describe" + (func $__wbindgen_describe (param i32))) + + (func $bar + ;; We don't support blocks. + block + end + + ;; Then attempt to add to the descriptor to make sure we aren't getting here. + i32.const 1 + call $__wbindgen_describe + ) + + (func $foo + ;; Call $bar, which contains unsupported instructions and should be skipped. + call $bar + + ;; And then test that this descriptor still goes through. + i32.const 0 + call $__wbindgen_describe + ) + + (export "foo" (func $foo)) + ) + "#; + interpret(wat, "foo", Some(&[0])); +} + +#[test] +#[should_panic] +fn skip_unsupported_root() { + let wat = r#" + (module + (import "__wbindgen_placeholder__" "__wbindgen_describe" + (func $__wbindgen_describe (param i32))) + + (func $foo + ;; Execute an invalid instruction in the root function we're calling. + ;; This causes an immediate panic, since we can't just skip the whole function + ;; we're getting a descriptor from. + block + end + ) + + (export "foo" (func $foo)) + ) + "#; + interpret(wat, "foo", Some(&[0])); +}