From 7ce74a69622b2c6e45bb42da4e67908b43b57808 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Mon, 27 Oct 2025 15:12:11 -0700 Subject: [PATCH] Add poison state to sandbox to prevent misuse Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/src/error.rs | 104 ++++++ .../src/sandbox/initialized_multi_use.rs | 325 +++++++++++++++++- src/hyperlight_host/tests/integration_test.rs | 34 ++ .../tests/sandbox_host_tests.rs | 10 +- src/tests/rust_guests/dummyguest/Cargo.lock | 4 +- src/tests/rust_guests/simpleguest/Cargo.lock | 4 +- src/tests/rust_guests/witguest/Cargo.lock | 12 +- 7 files changed, 479 insertions(+), 14 deletions(-) diff --git a/src/hyperlight_host/src/error.rs b/src/hyperlight_host/src/error.rs index 99e46b7c9..9851b9f61 100644 --- a/src/hyperlight_host/src/error.rs +++ b/src/hyperlight_host/src/error.rs @@ -191,6 +191,27 @@ pub enum HyperlightError { #[error("Failure processing PE File {0:?}")] PEFileProcessingFailure(#[from] goblin::error::Error), + /// The sandbox becomes **poisoned** when the guest is not run to completion, leaving it in + /// an inconsistent state that could compromise memory safety, data integrity, or security. + /// + /// ### When Does Poisoning Occur? + /// + /// Poisoning happens when guest execution is interrupted before normal completion: + /// + /// - **Guest panics or aborts** - When a guest function panics, crashes, or calls `abort()`, + /// the normal cleanup and unwinding process is interrupted + /// - **Invalid memory access** - Attempts to read/write/execute memory outside allowed regions + /// - **Stack overflow** - Guest exhausts its stack space during execution + /// - **Heap exhaustion** - Guest runs out of heap memory + /// - **Host-initiated cancellation** - Calling [`InterruptHandle::kill()`] to forcefully + /// terminate an in-progress guest function + /// + /// ## Recovery + /// + /// Use [`crate::MultiUseSandbox::restore()`] to recover from a poisoned sandbox. + #[error("The sandbox was poisoned")] + PoisonedSandbox, + /// Raw pointer is less than base address #[error("Raw pointer ({0:?}) was less than the base address ({1})")] RawPointerLessThanBaseAddress(RawPtr, u64), @@ -286,6 +307,89 @@ impl From>> for HyperlightError { } } +impl HyperlightError { + /// Internal helper to determines if the given error has potential to poison the sandbox. + /// + /// Errors that poison the sandbox are those that can leave the sandbox in an inconsistent + /// state where memory, resources, or data structures may be corrupted or leaked. Usually + /// due to the guest not running to completion. + /// + /// If this method returns `true`, the sandbox will be poisoned and all further operations + /// will fail until the sandbox is restored from a non-poisoned snapshot using + /// [`crate::MultiUseSandbox::restore()`]. + pub(crate) fn is_poison_error(&self) -> bool { + // wildcard _ or matches! not used here purposefully to ensure that new error variants + // are explicitly considered for poisoning behavior. + match self { + // These errors poison the sandbox because they can leave it in an inconsistent state due + // to the guest not running to completion. + HyperlightError::GuestAborted(_, _) + | HyperlightError::ExecutionCanceledByHost() + | HyperlightError::PoisonedSandbox + | HyperlightError::ExecutionAccessViolation(_) + | HyperlightError::StackOverflow() + | HyperlightError::MemoryAccessViolation(_, _, _) => true, + + // All other errors do not poison the sandbox. + HyperlightError::AnyhowError(_) + | HyperlightError::BoundsCheckFailed(_, _) + | HyperlightError::CheckedAddOverflow(_, _) + | HyperlightError::CStringConversionError(_) + | HyperlightError::Error(_) + | HyperlightError::FailedToGetValueFromParameter() + | HyperlightError::FieldIsMissingInGuestLogData(_) + | HyperlightError::GuestError(_, _) + | HyperlightError::GuestExecutionHungOnHostFunctionCall() + | HyperlightError::GuestFunctionCallAlreadyInProgress() + | HyperlightError::GuestInterfaceUnsupportedType(_) + | HyperlightError::GuestOffsetIsInvalid(_) + | HyperlightError::HostFunctionNotFound(_) + | HyperlightError::IOError(_) + | HyperlightError::IntConversionFailure(_) + | HyperlightError::InvalidFlatBuffer(_) + | HyperlightError::JsonConversionFailure(_) + | HyperlightError::LockAttemptFailed(_) + | HyperlightError::MemoryAllocationFailed(_) + | HyperlightError::MemoryProtectionFailed(_) + | HyperlightError::MemoryRequestTooBig(_, _) + | HyperlightError::MetricNotFound(_) + | HyperlightError::MmapFailed(_) + | HyperlightError::MprotectFailed(_) + | HyperlightError::NoHypervisorFound() + | HyperlightError::NoMemorySnapshot + | HyperlightError::ParameterValueConversionFailure(_, _) + | HyperlightError::PEFileProcessingFailure(_) + | HyperlightError::RawPointerLessThanBaseAddress(_, _) + | HyperlightError::RefCellBorrowFailed(_) + | HyperlightError::RefCellMutBorrowFailed(_) + | HyperlightError::ReturnValueConversionFailure(_, _) + | HyperlightError::SnapshotSandboxMismatch + | HyperlightError::SystemTimeError(_) + | HyperlightError::TryFromSliceError(_) + | HyperlightError::UnexpectedNoOfArguments(_, _) + | HyperlightError::UnexpectedParameterValueType(_, _) + | HyperlightError::UnexpectedReturnValueType(_, _) + | HyperlightError::UTF8StringConversionFailure(_) + | HyperlightError::VectorCapacityIncorrect(_, _, _) => false, + + #[cfg(target_os = "windows")] + HyperlightError::CrossBeamReceiveError(_) => false, + #[cfg(target_os = "windows")] + HyperlightError::CrossBeamSendError(_) => false, + #[cfg(target_os = "windows")] + HyperlightError::WindowsAPIError(_) => false, + #[cfg(target_os = "linux")] + HyperlightError::VmmSysError(_) => false, + #[cfg(kvm)] + HyperlightError::KVMError(_) => false, + #[cfg(mshv)] + HyperlightError::MSHVError(_) => false, + #[cfg(gdb)] + HyperlightError::TranslateGuestAddress(_) => false, + } + } +} + /// Creates a `HyperlightError::Error` from a string literal or format string #[macro_export] macro_rules! new_error { diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 765a1a898..9a2256bf7 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -56,9 +56,45 @@ static SANDBOX_ID_COUNTER: AtomicU64 = AtomicU64::new(0); /// /// Guest functions can be called repeatedly while maintaining state between calls. /// The sandbox supports creating snapshots and restoring to previous states. +/// +/// ## Sandbox Poisoning +/// +/// The sandbox becomes **poisoned** when the guest is not run to completion, leaving it in +/// an inconsistent state that could compromise memory safety, data integrity, or security. +/// +/// ### When Does Poisoning Occur? +/// +/// Poisoning happens when guest execution is interrupted before normal completion: +/// +/// - **Guest panics or aborts** - When a guest function panics, crashes, or calls `abort()`, +/// the normal cleanup and unwinding process is interrupted +/// - **Invalid memory access** - Attempts to read/write/execute memory outside allowed regions +/// - **Stack overflow** - Guest exhausts its stack space during execution +/// - **Heap exhaustion** - Guest runs out of heap memory +/// - **Host-initiated cancellation** - Calling [`InterruptHandle::kill()`] to forcefully +/// terminate an in-progress guest function +/// +/// ### Why This Is Unsafe +/// +/// When guest execution doesn't complete normally, critical cleanup operations are skipped: +/// +/// - **Memory leaks** - Heap allocations remain unreachable as the call stack is unwound +/// - **Corrupted allocator state** - Memory allocator metadata (free lists, heap headers) +/// left inconsistent +/// - **Locked resources** - Mutexes or other synchronization primitives remain locked +/// - **Partial state updates** - Data structures left half-modified (corrupted linked lists, +/// inconsistent hash tables, etc.) +/// +/// ### Recovery +/// +/// Use [`restore()`](Self::restore) with a snapshot taken before poisoning occurred. +/// This is the **only safe way** to recover - it completely replaces all memory state, +/// eliminating any inconsistencies. See [`restore()`](Self::restore) for details. pub struct MultiUseSandbox { /// Unique identifier for this sandbox instance id: u64, + /// Whether this sandbox is poisoned + poisoned: bool, // We need to keep a reference to the host functions, even if the compiler marks it as unused. The compiler cannot detect our dynamic usages of the host function in `HyperlightFunction::call`. pub(super) _host_funcs: Arc>, pub(crate) mem_mgr: SandboxMemoryManager, @@ -87,6 +123,7 @@ impl MultiUseSandbox { ) -> MultiUseSandbox { Self { id: SANDBOX_ID_COUNTER.fetch_add(1, Ordering::Relaxed), + poisoned: false, _host_funcs: host_funcs, mem_mgr: mgr, vm, @@ -102,6 +139,11 @@ impl MultiUseSandbox { /// The snapshot is tied to this specific sandbox instance and can only be /// restored to the same sandbox it was created from. /// + /// ## Poisoned Sandbox + /// + /// This method will return [`crate::HyperlightError::PoisonedSandbox`] if the sandbox + /// is currently poisoned. Snapshots can only be taken from non-poisoned sandboxes. + /// /// # Examples /// /// ```no_run @@ -122,6 +164,10 @@ impl MultiUseSandbox { /// ``` #[instrument(err(Debug), skip_all, parent = Span::current())] pub fn snapshot(&mut self) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } + if let Some(snapshot) = &self.snapshot { return Ok(snapshot.clone()); } @@ -140,6 +186,22 @@ impl MultiUseSandbox { /// Attempting to restore a snapshot from a different sandbox will return /// a [`SnapshotSandboxMismatch`](crate::HyperlightError::SnapshotSandboxMismatch) error. /// + /// ## Poison State Recovery + /// + /// This method automatically clears any poison state when successful. This is safe because: + /// - Snapshots can only be taken from non-poisoned sandboxes + /// - Restoration completely replaces all memory state, eliminating any inconsistencies + /// caused by incomplete guest execution + /// + /// ### What Gets Fixed During Restore + /// + /// When a poisoned sandbox is restored, the memory state is completely reset: + /// - **Leaked heap memory** - All allocations from interrupted execution are discarded + /// - **Corrupted allocator metadata** - Free lists and heap headers restored to consistent state + /// - **Locked mutexes** - All lock state is reset + /// - **Partial updates** - Data structures restored to their pre-execution state + /// + /// /// # Examples /// /// ```no_run @@ -165,6 +227,35 @@ impl MultiUseSandbox { /// # Ok(()) /// # } /// ``` + /// + /// ## Recovering from Poison + /// + /// ```no_run + /// # use hyperlight_host::{MultiUseSandbox, UninitializedSandbox, GuestBinary, HyperlightError}; + /// # fn example() -> Result<(), Box> { + /// let mut sandbox: MultiUseSandbox = UninitializedSandbox::new( + /// GuestBinary::FilePath("guest.bin".into()), + /// None + /// )?.evolve()?; + /// + /// // Take snapshot before potentially poisoning operation + /// let snapshot = sandbox.snapshot()?; + /// + /// // This might poison the sandbox (guest not run to completion) + /// let result = sandbox.call::<()>("guest_panic", ()); + /// if result.is_err() { + /// if sandbox.poisoned() { + /// // Restore from snapshot to clear poison + /// sandbox.restore(&snapshot)?; + /// assert!(!sandbox.poisoned()); + /// + /// // Sandbox is now usable again + /// sandbox.call::("Echo", "hello".to_string())?; + /// } + /// } + /// # Ok(()) + /// # } + /// ``` #[instrument(err(Debug), skip_all, parent = Span::current())] pub fn restore(&mut self, snapshot: &Snapshot) -> Result<()> { if let Some(snap) = &self.snapshot @@ -197,6 +288,17 @@ impl MultiUseSandbox { // The restored snapshot is now our most current snapshot self.snapshot = Some(snapshot.clone()); + // Clear poison state when successfully restoring from snapshot. + // + // # Safety: + // This is safe because: + // 1. Snapshots can only be taken from non-poisoned sandboxes (verified at snapshot creation) + // 2. Restoration completely replaces all memory state, eliminating: + // - All leaked heap allocations (memory is restored to snapshot state) + // - All corrupted data structures (overwritten with consistent snapshot data) + // - All inconsistent global state (reset to snapshot values) + self.poisoned = false; + Ok(()) } @@ -204,6 +306,11 @@ impl MultiUseSandbox { /// /// Changes made to the sandbox during execution are *not* persisted. /// + /// ## Poisoned Sandbox + /// + /// This method will return [`crate::HyperlightError::PoisonedSandbox`] if the sandbox + /// is currently poisoned. Use [`restore()`](Self::restore) to recover from a poisoned state. + /// /// # Examples /// /// ```no_run @@ -242,6 +349,9 @@ impl MultiUseSandbox { func_name: &str, args: impl ParameterTuple, ) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } let snapshot = self.snapshot()?; let res = self.call(func_name, args); self.restore(&snapshot)?; @@ -252,6 +362,22 @@ impl MultiUseSandbox { /// /// Changes made to the sandbox during execution are persisted. /// + /// ## Poisoned Sandbox + /// + /// This method will return [`crate::HyperlightError::PoisonedSandbox`] if the sandbox + /// is already poisoned before the call. Use [`restore()`](Self::restore) to recover from + /// a poisoned state. + /// + /// ## Sandbox Poisoning + /// + /// If this method returns an error, the sandbox may be poisoned if the guest was not run + /// to completion (due to panic, abort, memory violation, stack/heap exhaustion, or forced + /// termination). Use [`poisoned()`](Self::poisoned) to check the poison state and + /// [`restore()`](Self::restore) to recover if needed. + /// + /// If this method returns `Ok`, the sandbox is guaranteed to **not** be poisoned - the guest + /// function completed successfully and the sandbox state is consistent. + /// /// # Examples /// /// ```no_run @@ -279,12 +405,44 @@ impl MultiUseSandbox { /// # Ok(()) /// # } /// ``` + /// + /// ## Handling Potential Poisoning + /// + /// ```no_run + /// # use hyperlight_host::{MultiUseSandbox, UninitializedSandbox, GuestBinary}; + /// # fn example() -> Result<(), Box> { + /// let mut sandbox: MultiUseSandbox = UninitializedSandbox::new( + /// GuestBinary::FilePath("guest.bin".into()), + /// None + /// )?.evolve()?; + /// + /// // Take snapshot before risky operation + /// let snapshot = sandbox.snapshot()?; + /// + /// // Call potentially unsafe guest function + /// let result = sandbox.call::("RiskyOperation", "input".to_string()); + /// + /// // Check if the call failed and poisoned the sandbox + /// if let Err(e) = result { + /// eprintln!("Guest function failed: {}", e); + /// + /// if sandbox.poisoned() { + /// eprintln!("Sandbox was poisoned, restoring from snapshot"); + /// sandbox.restore(&snapshot)?; + /// } + /// } + /// # Ok(()) + /// # } + /// ``` #[instrument(err(Debug), skip(self, args), parent = Span::current())] pub fn call( &mut self, func_name: &str, args: impl ParameterTuple, ) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } // Reset snapshot since we are mutating the sandbox state self.snapshot = None; maybe_time_and_emit_guest_call(func_name, || { @@ -303,12 +461,20 @@ impl MultiUseSandbox { /// (typically page-aligned). The `region_type` field is ignored as guest /// page table entries are not created. /// + /// ## Poisoned Sandbox + /// + /// This method will return [`crate::HyperlightError::PoisonedSandbox`] if the sandbox + /// is currently poisoned. Use [`restore()`](Self::restore) to recover from a poisoned state. + /// /// # Safety /// /// The caller must ensure the host memory region remains valid and unmodified /// for the lifetime of `self`. #[instrument(err(Debug), skip(self, rgn), parent = Span::current())] pub unsafe fn map_region(&mut self, rgn: &MemoryRegion) -> Result<()> { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } if rgn.flags.contains(MemoryRegionFlags::STACK_GUARD) { // Stack guard pages are an internal implementation detail // (which really should be moved into the guest) @@ -330,8 +496,16 @@ impl MultiUseSandbox { /// Map the contents of a file into the guest at a particular address /// /// Returns the length of the mapping in bytes. + /// + /// ## Poisoned Sandbox + /// + /// This method will return [`crate::HyperlightError::PoisonedSandbox`] if the sandbox + /// is currently poisoned. Use [`restore()`](Self::restore) to recover from a poisoned state. #[instrument(err(Debug), skip(self, _fp, _guest_base), parent = Span::current())] pub fn map_file_cow(&mut self, _fp: &Path, _guest_base: u64) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } #[cfg(windows)] log_then_return!("mmap'ing a file into the guest is not yet supported on Windows"); #[cfg(unix)] @@ -369,6 +543,11 @@ impl MultiUseSandbox { /// Calls a guest function with type-erased parameters and return values. /// /// This function is used for fuzz testing parameter and return type handling. + /// + /// ## Poisoned Sandbox + /// + /// This method will return [`crate::HyperlightError::PoisonedSandbox`] if the sandbox + /// is currently poisoned. Use [`restore()`](Self::restore) to recover from a poisoned state. #[cfg(feature = "fuzzing")] #[instrument(err(Debug), skip(self, args), parent = Span::current())] pub fn call_type_erased_guest_function_by_name( @@ -377,6 +556,9 @@ impl MultiUseSandbox { ret_type: ReturnType, args: Vec, ) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } // Reset snapshot since we are mutating the sandbox state self.snapshot = None; maybe_time_and_emit_guest_call(func_name, || { @@ -390,6 +572,9 @@ impl MultiUseSandbox { return_type: ReturnType, args: Vec, ) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } let res = (|| { let estimated_capacity = estimate_flatbuffer_capacity(function_name, &args); @@ -437,8 +622,11 @@ impl MultiUseSandbox { // - the serialized guest function result is zeroed out by us (the host) during deserialization, see `get_guest_function_call_result` // - any serialized host function call are zeroed out by us (the host) during deserialization, see `get_host_function_call` // - any serialized host function result is zeroed out by the guest during deserialization, see `get_host_return_value` - if res.is_err() { + if let Err(e) = &res { self.mem_mgr.clear_io_buffers(); + + // Determine if we should poison the sandbox. + self.poisoned |= e.is_poison_error(); } res } @@ -475,6 +663,7 @@ impl MultiUseSandbox { pub fn interrupt_handle(&self) -> Arc { self.vm.interrupt_handle() } + /// Generate a crash dump of the current state of the VM underlying this sandbox. /// /// Creates an ELF core dump file that can be used for debugging. The dump @@ -512,10 +701,49 @@ impl MultiUseSandbox { /// #[cfg(crashdump)] #[instrument(err(Debug), skip_all, parent = Span::current())] - pub fn generate_crashdump(&self) -> Result<()> { crate::hypervisor::crashdump::generate_crashdump(self.vm.as_ref()) } + + /// Returns whether the sandbox is currently poisoned. + /// + /// A poisoned sandbox is in an inconsistent state due to the guest not running to completion. + /// All operations will be rejected until the sandbox is restored from a non-poisoned snapshot. + /// + /// ## Causes of Poisoning + /// + /// The sandbox becomes poisoned when guest execution is interrupted: + /// - **Panics/Aborts** - Guest code panics or calls `abort()` + /// - **Invalid Memory Access** - Read/write/execute violations + /// - **Stack Overflow** - Guest exhausts stack space + /// - **Heap Exhaustion** - Guest runs out of heap memory + /// - **Forced Termination** - [`InterruptHandle::kill()`] called during execution + /// + /// ## Recovery + /// + /// To clear the poison state, use [`restore()`](Self::restore) with a snapshot + /// that was taken before the sandbox became poisoned. + /// + /// # Examples + /// + /// ```no_run + /// # use hyperlight_host::{MultiUseSandbox, UninitializedSandbox, GuestBinary}; + /// # fn example() -> Result<(), Box> { + /// let mut sandbox: MultiUseSandbox = UninitializedSandbox::new( + /// GuestBinary::FilePath("guest.bin".into()), + /// None + /// )?.evolve()?; + /// + /// // Check if sandbox is poisoned + /// if sandbox.poisoned() { + /// println!("Sandbox is poisoned and needs attention"); + /// } + /// # Ok(()) + /// # } + /// ``` + pub fn poisoned(&self) -> bool { + self.poisoned + } } impl Callable for MultiUseSandbox { @@ -524,6 +752,9 @@ impl Callable for MultiUseSandbox { func_name: &str, args: impl ParameterTuple, ) -> Result { + if self.poisoned { + return Err(crate::HyperlightError::PoisonedSandbox); + } self.call(func_name, args) } } @@ -551,6 +782,96 @@ mod tests { use crate::sandbox::SandboxConfiguration; use crate::{GuestBinary, HyperlightError, MultiUseSandbox, Result, UninitializedSandbox}; + #[test] + fn poison() { + let mut sbox: MultiUseSandbox = { + let path = simple_guest_as_string().unwrap(); + let u_sbox = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u_sbox.evolve() + } + .unwrap(); + let snapshot = sbox.snapshot().unwrap(); + + // poison on purpose + let res = sbox + .call::<()>("guest_panic", "hello".to_string()) + .unwrap_err(); + assert!( + matches!(res, HyperlightError::GuestAborted(code, context) if code == ErrorCode::UnknownError as u8 && context.contains("hello")) + ); + assert!(sbox.poisoned()); + + // guest calls should fail when poisoned + let res = sbox + .call::<()>("guest_panic", "hello2".to_string()) + .unwrap_err(); + assert!(matches!(res, HyperlightError::PoisonedSandbox)); + + // snapshot should fail when poisoned + if let Err(e) = sbox.snapshot() { + assert!(sbox.poisoned()); + assert!(matches!(e, HyperlightError::PoisonedSandbox)); + } else { + panic!("Snapshot should fail"); + } + + // map_region should fail when poisoned + #[cfg(target_os = "linux")] + { + let map_mem = allocate_guest_memory(); + let guest_base = 0x0; + let region = region_for_memory(&map_mem, guest_base, MemoryRegionFlags::READ); + let res = unsafe { sbox.map_region(®ion) }.unwrap_err(); + assert!(matches!(res, HyperlightError::PoisonedSandbox)); + } + + // map_file_cow should fail when poisoned + #[cfg(target_os = "linux")] + { + let temp_file = std::env::temp_dir().join("test_poison_map_file.bin"); + let res = sbox.map_file_cow(&temp_file, 0x0).unwrap_err(); + assert!(matches!(res, HyperlightError::PoisonedSandbox)); + std::fs::remove_file(&temp_file).ok(); // Clean up + } + + // call_guest_function_by_name (deprecated) should fail when poisoned + #[allow(deprecated)] + let res = sbox + .call_guest_function_by_name::("Echo", "test".to_string()) + .unwrap_err(); + assert!(matches!(res, HyperlightError::PoisonedSandbox)); + + // restore to non-poisoned snapshot should work and clear poison + sbox.restore(&snapshot).unwrap(); + assert!(!sbox.poisoned()); + + // guest calls should work again after restore + let res = sbox.call::("Echo", "hello2".to_string()).unwrap(); + assert_eq!(res, "hello2".to_string()); + assert!(!sbox.poisoned()); + + // re-poison on purpose + let res = sbox + .call::<()>("guest_panic", "hello".to_string()) + .unwrap_err(); + assert!( + matches!(res, HyperlightError::GuestAborted(code, context) if code == ErrorCode::UnknownError as u8 && context.contains("hello")) + ); + assert!(sbox.poisoned()); + + // restore to non-poisoned snapshot should work again + sbox.restore(&snapshot).unwrap(); + assert!(!sbox.poisoned()); + + // guest calls should work again + let res = sbox.call::("Echo", "hello3".to_string()).unwrap(); + assert_eq!(res, "hello3".to_string()); + assert!(!sbox.poisoned()); + + // snapshot should work again + let _ = sbox.snapshot().unwrap(); + } + /// Make sure input/output buffers are properly reset after guest call (with host call) #[test] fn host_func_error() { diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index b252aa947..11b138e7b 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -52,6 +52,7 @@ fn interrupt_host_call() { usbox.register("Spin", spin).unwrap(); let mut sandbox: MultiUseSandbox = usbox.evolve().unwrap(); + let snapshot = sandbox.snapshot().unwrap(); let interrupt_handle = sandbox.interrupt_handle(); assert!(!interrupt_handle.dropped()); // not yet dropped @@ -64,6 +65,11 @@ fn interrupt_host_call() { let result = sandbox.call::("CallHostSpin", ()).unwrap_err(); assert!(matches!(result, HyperlightError::ExecutionCanceledByHost())); + assert!(sandbox.poisoned()); + + // Restore from snapshot to clear poison + sandbox.restore(&snapshot).unwrap(); + assert!(!sandbox.poisoned()); thread.join().unwrap(); } @@ -72,6 +78,7 @@ fn interrupt_host_call() { #[test] fn interrupt_in_progress_guest_call() { let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); + let snapshot = sbox1.snapshot().unwrap(); let barrier = Arc::new(Barrier::new(2)); let barrier2 = barrier.clone(); let interrupt_handle = sbox1.interrupt_handle(); @@ -88,6 +95,11 @@ fn interrupt_in_progress_guest_call() { let res = sbox1.call::("Spin", ()).unwrap_err(); assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + assert!(sbox1.poisoned()); + + // Restore from snapshot to clear poison + sbox1.restore(&snapshot).unwrap(); + assert!(!sbox1.poisoned()); barrier.wait(); // Make sure we can still call guest functions after the VM was interrupted @@ -103,6 +115,7 @@ fn interrupt_in_progress_guest_call() { #[test] fn interrupt_guest_call_in_advance() { let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); + let snapshot = sbox1.snapshot().unwrap(); let barrier = Arc::new(Barrier::new(2)); let barrier2 = barrier.clone(); let interrupt_handle = sbox1.interrupt_handle(); @@ -119,6 +132,11 @@ fn interrupt_guest_call_in_advance() { barrier.wait(); // wait until `kill()` is called before starting the guest call let res = sbox1.call::("Spin", ()).unwrap_err(); assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + assert!(sbox1.poisoned()); + + // Restore from snapshot to clear poison + sbox1.restore(&snapshot).unwrap(); + assert!(!sbox1.poisoned()); // Make sure we can still call guest functions after the VM was interrupted sbox1.call::("Echo", "hello".to_string()).unwrap(); @@ -142,6 +160,7 @@ fn interrupt_guest_call_in_advance() { fn interrupt_same_thread() { let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); let mut sbox2: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); + let snapshot2 = sbox2.snapshot().unwrap(); let mut sbox3: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); let barrier = Arc::new(Barrier::new(2)); @@ -172,6 +191,9 @@ fn interrupt_same_thread() { } _ => panic!("Unexpected return"), }; + if sbox2.poisoned() { + sbox2.restore(&snapshot2).unwrap(); + } sbox3 .call::("Echo", "hello".to_string()) .expect("Only sandbox 2 is allowed to be interrupted"); @@ -184,6 +206,7 @@ fn interrupt_same_thread() { fn interrupt_same_thread_no_barrier() { let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); let mut sbox2: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); + let snapshot2 = sbox2.snapshot().unwrap(); let mut sbox3: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); let barrier = Arc::new(Barrier::new(2)); @@ -216,6 +239,9 @@ fn interrupt_same_thread_no_barrier() { } _ => panic!("Unexpected return"), }; + if sbox2.poisoned() { + sbox2.restore(&snapshot2).unwrap(); + } sbox3 .call::("Echo", "hello".to_string()) .expect("Only sandbox 2 is allowed to be interrupted"); @@ -229,6 +255,7 @@ fn interrupt_same_thread_no_barrier() { #[test] fn interrupt_moved_sandbox() { let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); + let snapshot1 = sbox1.snapshot().unwrap(); let mut sbox2: MultiUseSandbox = new_uninit_rust().unwrap().evolve().unwrap(); let interrupt_handle = sbox1.interrupt_handle(); @@ -241,6 +268,9 @@ fn interrupt_moved_sandbox() { barrier2.wait(); let res = sbox1.call::("Spin", ()).unwrap_err(); assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + assert!(sbox1.poisoned()); + sbox1.restore(&snapshot1).unwrap(); + assert!(!sbox1.poisoned()); }); let thread2 = thread::spawn(move || { @@ -279,6 +309,7 @@ fn interrupt_custom_signal_no_and_retry_delay() { .evolve() .unwrap(); + let snapshot1 = sbox1.snapshot().unwrap(); let interrupt_handle = sbox1.interrupt_handle(); assert!(!interrupt_handle.dropped()); // not yet dropped @@ -295,8 +326,11 @@ fn interrupt_custom_signal_no_and_retry_delay() { for _ in 0..NUM_ITERS { let res = sbox1.call::("Spin", ()).unwrap_err(); assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + assert!(sbox1.poisoned()); // immediately reenter another guest function call after having being cancelled, // so that the vcpu is running again before the interruptor-thread has a chance to see that the vcpu is not running + sbox1.restore(&snapshot1).unwrap(); + assert!(!sbox1.poisoned()); } thread.join().expect("Thread should finish"); } diff --git a/src/hyperlight_host/tests/sandbox_host_tests.rs b/src/hyperlight_host/tests/sandbox_host_tests.rs index cfcc0e6c6..a79ae638c 100644 --- a/src/hyperlight_host/tests/sandbox_host_tests.rs +++ b/src/hyperlight_host/tests/sandbox_host_tests.rs @@ -351,6 +351,7 @@ fn host_function_error() -> Result<()> { // call guest function that calls host function let mut init_sandbox: MultiUseSandbox = sandbox.evolve()?; let msg = "Hello world"; + let snapshot = init_sandbox.snapshot()?; for _ in 0..1000 { let res = init_sandbox @@ -358,9 +359,14 @@ fn host_function_error() -> Result<()> { .unwrap_err(); assert!( matches!(&res, HyperlightError::GuestError(_, msg) if msg == "Host function error!") // rust guest - || matches!(&res, HyperlightError::GuestAborted(_, msg) if msg.contains("Host function error!")) // c guest - || matches!(&res, HyperlightError::StackOverflow()) // c guest. TODO fix this. C guest leaks when host func returns error guest panics. + || matches!(&res, HyperlightError::GuestAborted(_, msg) if msg.contains("Host function error!")), // c guest + "expected something but got {}", + res ); + // C guest panics in rust guest lib when host function returns error, which will poison the sandbox + if init_sandbox.poisoned() { + init_sandbox.restore(&snapshot)?; + } } } Ok(()) diff --git a/src/tests/rust_guests/dummyguest/Cargo.lock b/src/tests/rust_guests/dummyguest/Cargo.lock index 8844a2bb2..febe57d6c 100644 --- a/src/tests/rust_guests/dummyguest/Cargo.lock +++ b/src/tests/rust_guests/dummyguest/Cargo.lock @@ -85,9 +85,9 @@ dependencies = [ [[package]] name = "heapless" -version = "0.8.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" +checksum = "b1edcd5a338e64688fbdcb7531a846cfd3476a54784dcb918a0844682bc7ada5" dependencies = [ "hash32", "serde", diff --git a/src/tests/rust_guests/simpleguest/Cargo.lock b/src/tests/rust_guests/simpleguest/Cargo.lock index 5edd3cc30..a9ce6e53d 100644 --- a/src/tests/rust_guests/simpleguest/Cargo.lock +++ b/src/tests/rust_guests/simpleguest/Cargo.lock @@ -77,9 +77,9 @@ dependencies = [ [[package]] name = "heapless" -version = "0.8.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" +checksum = "b1edcd5a338e64688fbdcb7531a846cfd3476a54784dcb918a0844682bc7ada5" dependencies = [ "hash32", "serde", diff --git a/src/tests/rust_guests/witguest/Cargo.lock b/src/tests/rust_guests/witguest/Cargo.lock index 6172a003c..91a34d2ce 100644 --- a/src/tests/rust_guests/witguest/Cargo.lock +++ b/src/tests/rust_guests/witguest/Cargo.lock @@ -193,9 +193,9 @@ dependencies = [ [[package]] name = "heapless" -version = "0.8.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" +checksum = "b1edcd5a338e64688fbdcb7531a846cfd3476a54784dcb918a0844682bc7ada5" dependencies = [ "hash32", "serde", @@ -396,9 +396,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.102" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e0f6df8eaa422d97d72edcd152e1451618fed47fabbdbd5a8864167b1d4aff7" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] @@ -532,9 +532,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "syn" -version = "2.0.107" +version = "2.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" dependencies = [ "proc-macro2", "quote",