Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fast trace collection & use it for instrumenting guest memory operations #103

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
56 changes: 56 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion src/hyperlight_host/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ workspace = true

[dependencies]
goblin = { version = "0.9" }
framehop = { version = "0.13.1", optional = true }
fallible-iterator = { version = "0.3.0", optional = true }
rand = { version = "0.8.5" }
cfg-if = { version = "1.0.0" }
libc = { version = "0.2.167" }
Expand Down Expand Up @@ -127,7 +129,7 @@ crashdump = ["dep:tempfile"] # Dumps the VM state to a file on unexpected errors
trace_guest = []
# This feature enables unwinding the guest stack from the host, in
# order to produce stack traces for debugging or profiling.
unwind_guest = [ "trace_guest" ]
unwind_guest = [ "trace_guest", "dep:framehop", "dep:fallible-iterator" ]
kvm = ["dep:kvm-bindings", "dep:kvm-ioctls"]
mshv = ["dep:mshv-bindings", "dep:mshv-ioctls"]
inprocess = []
Expand Down
5 changes: 4 additions & 1 deletion src/hyperlight_host/src/hypervisor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,10 @@ pub(crate) mod tests {
SandboxConfiguration::DEFAULT_MAX_WAIT_FOR_CANCELLATION as u64,
),
#[cfg(feature = "trace_guest")]
trace_info: crate::sandbox::TraceInfo::new()?,
trace_info: crate::sandbox::TraceInfo::new(
#[cfg(feature = "unwind_guest")]
Arc::new(crate::mem::exe::DummyUnwindInfo {}),
)?,
};

let mut hv_handler = HypervisorHandler::new(hv_handler_config);
Expand Down
110 changes: 108 additions & 2 deletions src/hyperlight_host/src/mem/elf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

#[cfg(feature = "unwind_guest")]
use std::sync::Arc;

#[cfg(target_arch = "aarch64")]
use goblin::elf::reloc::{R_AARCH64_NONE, R_AARCH64_RELATIVE};
#[cfg(target_arch = "x86_64")]
Expand All @@ -23,13 +26,85 @@ use goblin::elf64::program_header::PT_LOAD;

use crate::{log_then_return, new_error, Result};

#[cfg(feature = "unwind_guest")]
struct ResolvedSectionHeader {
name: String,
addr: u64,
offset: u64,
size: u64,
}

pub(crate) struct ElfInfo {
payload: Vec<u8>,
phdrs: ProgramHeaders,
#[cfg(feature = "unwind_guest")]
shdrs: Vec<ResolvedSectionHeader>,
entry: u64,
relocs: Vec<Reloc>,
}

#[cfg(feature = "unwind_guest")]
struct UnwindInfo {
payload: Vec<u8>,
load_addr: u64,
va_size: u64,
base_svma: u64,
shdrs: Vec<ResolvedSectionHeader>,
}

#[cfg(feature = "unwind_guest")]
impl super::exe::UnwindInfo for UnwindInfo {
fn as_module(&self) -> framehop::Module<Vec<u8>> {
framehop::Module::new(
// TODO: plumb through a name from from_file if this
syntactically marked this conversation as resolved.
Show resolved Hide resolved
// came from a file
"guest".to_string(),
self.load_addr..self.load_addr + self.va_size,
self.load_addr,
self,
)
}
fn hash(&self) -> blake3::Hash {
blake3::hash(&self.payload)
}
}

#[cfg(feature = "unwind_guest")]
impl UnwindInfo {
fn resolved_section_header(&self, name: &[u8]) -> Option<&ResolvedSectionHeader> {
self.shdrs
.iter()
.find(|&sh| sh.name.as_bytes()[0..core::cmp::min(name.len(), sh.name.len())] == *name)
}
}

#[cfg(feature = "unwind_guest")]
impl framehop::ModuleSectionInfo<Vec<u8>> for &UnwindInfo {
fn base_svma(&self) -> u64 {
self.base_svma
}
fn section_svma_range(&mut self, name: &[u8]) -> Option<std::ops::Range<u64>> {
let shdr = self.resolved_section_header(name)?;
Some(shdr.addr..shdr.addr + shdr.size)
}
fn section_data(&mut self, name: &[u8]) -> Option<Vec<u8>> {
if name == b".eh_frame" && self.resolved_section_header(b".debug_frame").is_some() {
/* Rustc does not always emit enough information for stack
syntactically marked this conversation as resolved.
Show resolved Hide resolved
* unwinding in .eh_frame, presumably because we use panic =
* abort in the guest. Framehop defaults to ignoring
* .debug_frame if .eh_frame exists, but we want the opposite
* behaviour here, since .debug_frame will actually contain
* frame information whereas .eh_frame often doesn't because
* of the aforementioned behaviour. Consequently, we hack
* around this by pretending that .eh_frame doesn't exist if
* .debug_frame does. */
return None;
}
let shdr = self.resolved_section_header(name)?;
Some(self.payload[shdr.offset as usize..(shdr.offset + shdr.size) as usize].to_vec())
}
}

impl ElfInfo {
pub(crate) fn new(bytes: &[u8]) -> Result<Self> {
let elf = Elf::parse(bytes)?;
Expand All @@ -44,6 +119,19 @@ impl ElfInfo {
Ok(ElfInfo {
payload: bytes.to_vec(),
phdrs: elf.program_headers,
#[cfg(feature = "unwind_guest")]
shdrs: elf
.section_headers
.iter()
.filter_map(|sh| {
Some(ResolvedSectionHeader {
name: elf.shdr_strtab.get_at(sh.sh_name)?.to_string(),
addr: sh.sh_addr,
offset: sh.sh_offset,
size: sh.sh_size,
})
})
.collect(),
entry: elf.entry,
relocs,
})
Expand All @@ -68,7 +156,11 @@ impl ElfInfo {
.unwrap(); // guaranteed not to panic because of the check in new()
(max_phdr.p_vaddr + max_phdr.p_memsz - self.get_base_va()) as usize
}
pub(crate) fn load_at(self, load_addr: usize, target: &mut [u8]) -> Result<()> {
pub(crate) fn load_at(
self,
load_addr: usize,
target: &mut [u8],
) -> Result<super::exe::LoadInfo> {
let base_va = self.get_base_va();
for phdr in self.phdrs.iter().filter(|phdr| phdr.p_type == PT_LOAD) {
let start_va = (phdr.p_vaddr - base_va) as usize;
Expand Down Expand Up @@ -108,6 +200,20 @@ impl ElfInfo {
}
}
}
Ok(())
cfg_if::cfg_if! {
if #[cfg(feature = "unwind_guest")] {
let va_size = self.get_va_size() as u64;
let base_svma = self.get_base_va();
Ok(Arc::new(UnwindInfo {
payload: self.payload,
load_addr: load_addr as u64,
va_size,
base_svma,
shdrs: self.shdrs,
}))
} else {
Ok(())
}
}
}
}
51 changes: 46 additions & 5 deletions src/hyperlight_host/src/mem/exe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ limitations under the License.

use std::fs::File;
use std::io::Read;
#[cfg(feature = "unwind_guest")]
use std::sync::Arc;
use std::vec::Vec;

use super::elf::ElfInfo;
Expand All @@ -40,6 +42,41 @@ pub enum ExeInfo {
const DEFAULT_ELF_STACK_RESERVE: u64 = 65536;
const DEFAULT_ELF_HEAP_RESERVE: u64 = 131072;

#[cfg(feature = "unwind_guest")]
pub(crate) trait UnwindInfo: Send + Sync {
fn as_module(&self) -> framehop::Module<Vec<u8>>;
fn hash(&self) -> blake3::Hash;
}

#[cfg(feature = "unwind_guest")]
pub(crate) struct DummyUnwindInfo {}
#[cfg(feature = "unwind_guest")]
impl UnwindInfo for DummyUnwindInfo {
fn as_module(&self) -> framehop::Module<Vec<u8>> {
framehop::Module::new("unsupported".to_string(), 0..0, 0, self)
}
fn hash(&self) -> blake3::Hash {
blake3::Hash::from_bytes([0; 32])
}
}
#[cfg(feature = "unwind_guest")]
impl<A> framehop::ModuleSectionInfo<A> for &DummyUnwindInfo {
fn base_svma(&self) -> u64 {
0
}
fn section_svma_range(&mut self, _name: &[u8]) -> Option<std::ops::Range<u64>> {
None
}
fn section_data(&mut self, _name: &[u8]) -> Option<A> {
None
}
}

#[cfg(feature = "unwind_guest")]
pub(crate) type LoadInfo = Arc<dyn UnwindInfo>;
#[cfg(not(feature = "unwind_guest"))]
pub(crate) type LoadInfo = ();

impl ExeInfo {
pub fn from_file(path: &str) -> Result<Self> {
let mut file = File::open(path)?;
Expand Down Expand Up @@ -80,17 +117,21 @@ impl ExeInfo {
// copying into target, but the PE loader chooses to apply
// relocations in its owned representation of the PE contents,
// which requires it to be &mut.
pub fn load(self, load_addr: usize, target: &mut [u8]) -> Result<()> {
pub fn load(self, load_addr: usize, target: &mut [u8]) -> Result<LoadInfo> {
match self {
ExeInfo::PE(mut pe) => {
let patches = pe.get_exe_relocation_patches(load_addr)?;
pe.apply_relocation_patches(patches)?;
target[0..pe.payload.len()].copy_from_slice(&pe.payload);
cfg_if::cfg_if! {
if #[cfg(feature = "unwind_guest")] {
Ok(Arc::new(DummyUnwindInfo {}))
} else {
Ok(())
}
}
}
ExeInfo::Elf(elf) => {
elf.load_at(load_addr, target)?;
}
ExeInfo::Elf(elf) => elf.load_at(load_addr, target),
}
Ok(())
}
}
2 changes: 1 addition & 1 deletion src/hyperlight_host/src/mem/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,7 @@ impl SandboxMemoryLayout {

/// Get the guest address of the code section in the sandbox
#[instrument(skip_all, parent = Span::current(), level= "Trace")]
pub(super) fn get_guest_code_address(&self) -> usize {
pub(crate) fn get_guest_code_address(&self) -> usize {
Self::BASE_ADDRESS + self.guest_code_offset
}

Expand Down
Loading