Skip to content

Commit a196a11

Browse files
committed
Add SIMD support
Signed-off-by: Ludvig Liljenberg <[email protected]>
1 parent c725c8b commit a196a11

File tree

18 files changed

+1014
-213
lines changed

18 files changed

+1014
-213
lines changed

.vscode/settings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"Cargo.toml",
44
// guest crates for testing, not part of the workspace
55
"src/tests/rust_guests/simpleguest/Cargo.toml",
6+
"src/tests/rust_guests/simdguest/Cargo.toml",
67
"src/tests/rust_guests/callbackguest/Cargo.toml"
78
]
89
}

Cargo.lock

Lines changed: 0 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ exclude = [
2222
"src/tests/rust_guests/dummyguest",
2323
"src/tests/rust_guests/simpleguest",
2424
"src/tests/rust_guests/witguest",
25+
"src/tests/rust_guests/simdguest",
2526
]
2627

2728
[workspace.package]

Justfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ simpleguest_source := "src/tests/rust_guests/simpleguest/target/x86_64-unknown-n
1313
dummyguest_source := "src/tests/rust_guests/dummyguest/target/x86_64-unknown-none"
1414
callbackguest_source := "src/tests/rust_guests/callbackguest/target/x86_64-unknown-none"
1515
witguest_source := "src/tests/rust_guests/witguest/target/x86_64-unknown-none"
16+
simdguest_source := "src/tests/rust_guests/simdguest/target/x86_64-unknown-none"
1617
rust_guests_bin_dir := "src/tests/rust_guests/bin"
1718

1819
################
@@ -43,12 +44,14 @@ build-rust-guests target=default-target: (witguest-wit)
4344
cd src/tests/rust_guests/simpleguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }}
4445
cd src/tests/rust_guests/dummyguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }}
4546
cd src/tests/rust_guests/witguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }}
47+
cd src/tests/rust_guests/simdguest && cargo build --profile={{ if target == "debug" { "dev" } else { target } }}
4648

4749
@move-rust-guests target=default-target:
4850
cp {{ callbackguest_source }}/{{ target }}/callbackguest* {{ rust_guests_bin_dir }}/{{ target }}/
4951
cp {{ simpleguest_source }}/{{ target }}/simpleguest* {{ rust_guests_bin_dir }}/{{ target }}/
5052
cp {{ dummyguest_source }}/{{ target }}/dummyguest* {{ rust_guests_bin_dir }}/{{ target }}/
5153
cp {{ witguest_source }}/{{ target }}/witguest* {{ rust_guests_bin_dir }}/{{ target }}/
54+
cp {{ simdguest_source }}/{{ target }}/simdguest* {{ rust_guests_bin_dir }}/{{ target }}/
5255

5356
build-and-move-rust-guests: (build-rust-guests "debug") (move-rust-guests "debug") (build-rust-guests "release") (move-rust-guests "release")
5457
build-and-move-c-guests: (build-c-guests "debug") (move-c-guests "debug") (build-c-guests "release") (move-c-guests "release")
@@ -61,6 +64,7 @@ clean-rust:
6164
cd src/tests/rust_guests/dummyguest && cargo clean
6265
cd src/tests/rust_guests/callbackguest && cargo clean
6366
cd src/tests/rust_guests/witguest && cargo clean
67+
cd src/tests/rust_guests/simdguest && cargo clean
6468
cd src/tests/rust_guests/witguest && rm -f interface.wasm
6569
git clean -fdx src/tests/c_guests/bin src/tests/rust_guests/bin
6670

@@ -149,6 +153,7 @@ fmt-check:
149153
cargo +nightly fmt --manifest-path src/tests/rust_guests/simpleguest/Cargo.toml -- --check
150154
cargo +nightly fmt --manifest-path src/tests/rust_guests/dummyguest/Cargo.toml -- --check
151155
cargo +nightly fmt --manifest-path src/tests/rust_guests/witguest/Cargo.toml -- --check
156+
cargo +nightly fmt --manifest-path src/tests/rust_guests/simdguest/Cargo.toml -- --check
152157
cargo +nightly fmt --manifest-path src/hyperlight_guest_capi/Cargo.toml -- --check
153158

154159
check-license-headers:
@@ -160,6 +165,7 @@ fmt-apply:
160165
cargo +nightly fmt --manifest-path src/tests/rust_guests/simpleguest/Cargo.toml
161166
cargo +nightly fmt --manifest-path src/tests/rust_guests/dummyguest/Cargo.toml
162167
cargo +nightly fmt --manifest-path src/tests/rust_guests/witguest/Cargo.toml
168+
cargo +nightly fmt --manifest-path src/tests/rust_guests/simdguest/Cargo.toml
163169
cargo +nightly fmt --manifest-path src/hyperlight_guest_capi/Cargo.toml
164170

165171
clippy target=default-target: (witguest-wit)
@@ -169,6 +175,7 @@ clippy-guests target=default-target: (witguest-wit)
169175
cd src/tests/rust_guests/simpleguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings
170176
cd src/tests/rust_guests/callbackguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings
171177
cd src/tests/rust_guests/witguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings
178+
cd src/tests/rust_guests/simdguest && cargo clippy --profile={{ if target == "debug" { "dev" } else { target } }} -- -D warnings
172179

173180
clippy-apply-fix-unix:
174181
cargo clippy --fix --all

count_simd_instructions.sh

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/bin/bash
2+
3+
# Script to count SIMD instructions in an ELF binary
4+
# Usage: ./count_simd_instructions.sh <binary_file>
5+
6+
if [ $# -eq 0 ]; then
7+
echo "Usage: $0 <binary_file>"
8+
exit 1
9+
fi
10+
11+
BINARY="$1"
12+
13+
if [ ! -f "$BINARY" ]; then
14+
echo "Error: File '$BINARY' not found"
15+
exit 1
16+
fi
17+
18+
echo "Analyzing SIMD instructions in: $BINARY"
19+
echo "========================================"
20+
21+
# Disassemble the binary
22+
DISASM=$(objdump -d "$BINARY" 2>/dev/null)
23+
24+
if [ $? -ne 0 ]; then
25+
echo "Error: Failed to disassemble binary. Make sure it's a valid ELF file."
26+
exit 1
27+
fi
28+
29+
# Count different instruction sets
30+
SSE_COUNT=$(echo "$DISASM" | grep -i -E "\b(movss|movsd|addss|addsd|subss|subsd|mulss|mulsd|divss|divsd|sqrtss|sqrtsd|maxss|maxsd|minss|minsd|cmpss|cmpsd|ucomiss|ucomisd|comiss|comisd)\b" | wc -l)
31+
32+
SSE2_COUNT=$(echo "$DISASM" | grep -i -E "\b(movdqa|movdqu|movq|movd|paddb|paddw|paddd|paddq|psubb|psubw|psubd|psubq|pmullw|pmuludq|pand|pandn|por|pxor|psllw|pslld|psllq|psrlw|psrld|psrlq|psraw|psrad|packsswb|packssdw|packuswb|punpckhbw|punpckhwd|punpckhdq|punpckhqdq|punpcklbw|punpcklwd|punpckldq|punpcklqdq|pcmpeqb|pcmpeqw|pcmpeqd|pcmpgtb|pcmpgtw|pcmpgtd|pmaxub|pmaxsw|pminub|pminsw|psadbw|pavgb|pavgw)\b" | wc -l)
33+
34+
SSE3_COUNT=$(echo "$DISASM" | grep -i -E "\b(addsubpd|addsubps|haddpd|haddps|hsubpd|hsubps|movddup|movshdup|movsldup|lddqu)\b" | wc -l)
35+
36+
SSSE3_COUNT=$(echo "$DISASM" | grep -i -E "\b(pabsb|pabsw|pabsd|palignr|phaddb|phaddw|phaddd|phaddsw|phsubb|phsubw|phsubd|phsubsw|pmaddubsw|pmulhrsw|pshufb|psignb|psignw|psignd)\b" | wc -l)
37+
38+
SSE41_COUNT=$(echo "$DISASM" | grep -i -E "\b(blendpd|blendps|blendvpd|blendvps|dppd|dpps|extractps|insertps|movntdqa|mpsadbw|packusdw|pblendvb|pblendw|pcmpeqq|pextrb|pextrd|pextrq|pextrw|phminposuw|pinsrb|pinsrd|pinsrq|pmaxsb|pmaxsd|pmaxud|pmaxuw|pminsb|pminsd|pminud|pminuw|pmovsxbw|pmovsxbd|pmovsxbq|pmovsxwd|pmovsxwq|pmovsxdq|pmovzxbw|pmovzxbd|pmovzxbq|pmovzxwd|pmovzxwq|pmovzxdq|pmuldq|pmulld|ptest|roundpd|roundps|roundsd|roundss)\b" | wc -l)
39+
40+
SSE42_COUNT=$(echo "$DISASM" | grep -i -E "\b(crc32|pcmpestri|pcmpestrm|pcmpistri|pcmpistrm|pcmpgtq)\b" | wc -l)
41+
42+
AVX_COUNT=$(echo "$DISASM" | grep -i -E "\bv(movss|movsd|addss|addsd|subss|subsd|mulss|mulsd|divss|divsd|sqrtss|sqrtsd|maxss|maxsd|minss|minsd|cmpss|cmpsd|ucomiss|ucomisd|comiss|comisd|movaps|movapd|movups|movupd|movlps|movlpd|movhps|movhpd|movlhps|movhlps|unpcklps|unpcklpd|unpckhps|unpckhpd|addps|addpd|subps|subpd|mulps|mulpd|divps|divpd|sqrtps|sqrtpd|maxps|maxpd|minps|minpd|cmpps|cmppd|andps|andpd|andnps|andnpd|orps|orpd|xorps|xorpd|shufps|shufpd|blendps|blendpd|blendvps|blendvpd|dpps|dppd|roundps|roundpd|roundss|roundsd|insertf128|extractf128|broadcast|permute|maskload|maskstore|testc|testz|testnzc)\b" | wc -l)
43+
44+
AVX2_COUNT=$(echo "$DISASM" | grep -i -E "\bv(pabs|padd|psub|pmul|pand|pandn|por|pxor|psll|psrl|psra|ppack|punpck|pcmp|pmax|pmin|psad|pavg|pblend|pbroadcast|perm|pgather|pinsert|pextract|pmovsx|pmovzx|psign|pshuf|palign|pmadd|pmaddubs|phsub|phadd)\b" | wc -l)
45+
46+
AVX512_COUNT=$(echo "$DISASM" | grep -i -E "\b(evex|zmm|k[0-7])\b|\bv.*\{.*\}\b" | wc -l)
47+
48+
echo "SSE instructions: $SSE_COUNT"
49+
echo "SSE2 instructions: $SSE2_COUNT"
50+
echo "SSE3 instructions: $SSE3_COUNT"
51+
echo "SSSE3 instructions: $SSSE3_COUNT"
52+
echo "SSE4.1 instructions: $SSE41_COUNT"
53+
echo "SSE4.2 instructions: $SSE42_COUNT"
54+
echo "AVX instructions: $AVX_COUNT"
55+
echo "AVX2 instructions: $AVX2_COUNT"
56+
echo "AVX-512 instructions: $AVX512_COUNT"
57+
echo "========================================"
58+
59+
TOTAL=$((SSE_COUNT + SSE2_COUNT + SSE3_COUNT + SSSE3_COUNT + SSE41_COUNT + SSE42_COUNT + AVX_COUNT + AVX2_COUNT + AVX512_COUNT))
60+
echo "Total SIMD instructions: $TOTAL"

src/hyperlight_guest/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,5 @@ Provides only the essential building blocks for interacting with the host enviro
1313

1414
[dependencies]
1515
anyhow = { version = "1.0.98", default-features = false }
16-
serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
16+
# serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
1717
hyperlight-common = { workspace = true }

src/hyperlight_guest/src/error.rs

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ limitations under the License.
1717
use alloc::format;
1818
use alloc::string::String;
1919

20+
use anyhow;
2021
use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode;
21-
use {anyhow, serde_json};
2222

2323
pub type Result<T> = core::result::Result<T, HyperlightGuestError>;
2424

@@ -42,12 +42,3 @@ impl From<anyhow::Error> for HyperlightGuestError {
4242
}
4343
}
4444
}
45-
46-
impl From<serde_json::Error> for HyperlightGuestError {
47-
fn from(error: serde_json::Error) -> Self {
48-
Self {
49-
kind: ErrorCode::GuestError,
50-
message: format!("Error: {:?}", error),
51-
}
52-
}
53-
}

src/hyperlight_host/src/hypervisor/kvm.rs

Lines changed: 118 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use std::sync::Arc;
2121
use std::sync::Mutex;
2222
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
2323

24-
use kvm_bindings::{KVM_MEM_READONLY, kvm_fpu, kvm_regs, kvm_userspace_memory_region};
24+
use kvm_bindings::{KVM_MEM_READONLY, kvm_fpu, kvm_regs, kvm_userspace_memory_region, kvm_xcrs};
2525
use kvm_ioctls::Cap::UserMemory;
2626
use kvm_ioctls::{Kvm, VcpuExit, VcpuFd, VmFd};
2727
use log::LevelFilter;
@@ -37,8 +37,8 @@ use super::handlers::DbgMemAccessHandlerWrapper;
3737
use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper};
3838
#[cfg(feature = "init-paging")]
3939
use super::{
40-
CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE,
41-
EFER_LMA, EFER_LME, EFER_NX, EFER_SCE,
40+
CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT,
41+
CR4_OSXSAVE, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, XCR0_AVX, XCR0_SSE, XCR0_X87,
4242
};
4343
use super::{HyperlightExit, Hypervisor, InterruptHandle, LinuxInterruptHandle, VirtualCPU};
4444
#[cfg(gdb)]
@@ -336,6 +336,10 @@ impl KVMDriver {
336336
})?;
337337

338338
let mut vcpu_fd = vm_fd.create_vcpu(0)?;
339+
let now = std::time::SystemTime::now();
340+
Self::setup_cpuid(&kvm, &mut vcpu_fd)?;
341+
let elapsed = now.elapsed().unwrap();
342+
println!("CPUID setup took: {:?}", elapsed);
339343
Self::setup_initial_sregs(&mut vcpu_fd, pml4_addr)?;
340344

341345
#[cfg(gdb)]
@@ -409,7 +413,7 @@ impl KVMDriver {
409413
cfg_if::cfg_if! {
410414
if #[cfg(feature = "init-paging")] {
411415
sregs.cr3 = _pml4_addr;
412-
sregs.cr4 = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT;
416+
sregs.cr4 = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT | CR4_OSXSAVE;
413417
sregs.cr0 = CR0_PE | CR0_MP | CR0_ET | CR0_NE | CR0_AM | CR0_PG | CR0_WP;
414418
sregs.efer = EFER_LME | EFER_LMA | EFER_SCE | EFER_NX;
415419
sregs.cs.l = 1; // required for 64-bit mode
@@ -419,6 +423,116 @@ impl KVMDriver {
419423
}
420424
}
421425
vcpu_fd.set_sregs(&sregs)?;
426+
427+
// Setup XCR0 (Extended Control Register 0) to enable SIMD features
428+
// This is required for AVX and other SIMD instruction support
429+
// Only set XCR0 if the init-paging feature is enabled
430+
cfg_if::cfg_if! {
431+
if #[cfg(feature = "init-paging")] {
432+
// Create a properly initialized kvm_xcrs structure
433+
let mut xcrs: kvm_xcrs = unsafe { std::mem::zeroed() };
434+
435+
// Set XCR0 to enable x87 FPU (required), SSE, and AVX
436+
// XCR0 bit 0 (x87) must always be set for any XSAVE features
437+
xcrs.xcrs[0].xcr = 0; // XCR0 register number
438+
xcrs.xcrs[0].value = XCR0_X87 | XCR0_SSE | XCR0_AVX;
439+
xcrs.nr_xcrs = 1;
440+
441+
println!("Setting XCRs: XCR0={:#x}, nr_xcrs={}", xcrs.xcrs[0].value, xcrs.nr_xcrs);
442+
443+
match vcpu_fd.set_xcrs(&xcrs) {
444+
Ok(_) => {
445+
println!("Successfully set XCR0 to enable SIMD features: {:#x}", xcrs.xcrs[0].value);
446+
},
447+
Err(e) => {
448+
println!("Failed to set XCRs (XCR0) for SIMD support: {:?}", e);
449+
}
450+
}
451+
}
452+
}
453+
454+
Ok(())
455+
}
456+
457+
/// Setup the CPUID for the vCPU to enable SIMD features.
458+
/// This is done by just mirroring the host's CPUID in the guest.
459+
#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")]
460+
fn setup_cpuid(kvm: &Kvm, vcpu_fd: &mut VcpuFd) -> Result<()> {
461+
// Get the supported CPUID from the host machine
462+
let cpuid = kvm.get_supported_cpuid(kvm_bindings::KVM_MAX_CPUID_ENTRIES)?;
463+
464+
let entries = cpuid.as_slice();
465+
466+
// https://en.wikipedia.org/wiki/CPUID
467+
// sse: EAX=1, EDX bit 25
468+
assert!(
469+
entries
470+
.get(1)
471+
.map(|entry| entry.edx & (1 << 25) != 0)
472+
.unwrap_or(false),
473+
"SSE support not detected on the host machine"
474+
);
475+
// sse2 is EAX=1, EDX bit 26
476+
assert!(
477+
entries
478+
.get(1)
479+
.map(|entry| entry.edx & (1 << 26) != 0)
480+
.unwrap_or(false),
481+
"SSE2 support not detected on the host machine"
482+
);
483+
// sse3 is EAX=1, ECX bit 0
484+
assert!(
485+
entries
486+
.get(1)
487+
.map(|entry| entry.ecx & (1 << 0) != 0)
488+
.unwrap_or(false),
489+
"SSE3 support not detected on the host machine"
490+
);
491+
// ssse3 is EAX=1, ECX bit 9
492+
assert!(
493+
entries
494+
.get(1)
495+
.map(|entry| entry.ecx & (1 << 9) != 0)
496+
.unwrap_or(false),
497+
"SSSE3 support not detected on the host machine"
498+
);
499+
// sse4.1 is EAX=1, ECX bit 19
500+
assert!(
501+
entries
502+
.get(1)
503+
.map(|entry| entry.ecx & (1 << 19) != 0)
504+
.unwrap_or(false),
505+
"SSE4.1 support not detected on the host machine"
506+
);
507+
// sse4.2 is EAX=1, ECX bit 20
508+
assert!(
509+
entries
510+
.get(1)
511+
.map(|entry| entry.ecx & (1 << 20) != 0)
512+
.unwrap_or(false),
513+
"SSE4.2 support not detected on the host machine"
514+
);
515+
// avx is EAX=1, ECX bit 28
516+
assert!(
517+
entries
518+
.get(1)
519+
.map(|entry| entry.ecx & (1 << 28) != 0)
520+
.unwrap_or(false),
521+
"AVX support not detected on the host machine"
522+
);
523+
// avx2 is EAX=7, EBX bit 5
524+
assert!(
525+
entries
526+
.get(7)
527+
.map(|entry| entry.ebx & (1 << 5) != 0)
528+
.unwrap_or(false),
529+
"AVX2 support not detected on the host machine"
530+
);
531+
532+
// Set the CPUID for the guest's vCPU to be the same as the host's
533+
vcpu_fd.set_cpuid2(&cpuid)?;
534+
println!("CPUID set successfully for SIMD support");
535+
422536
Ok(())
423537
}
424538
}

src/hyperlight_host/src/hypervisor/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ cfg_if::cfg_if! {
8080
pub(crate) const CR4_PAE: u64 = 1 << 5;
8181
pub(crate) const CR4_OSFXSR: u64 = 1 << 9;
8282
pub(crate) const CR4_OSXMMEXCPT: u64 = 1 << 10;
83+
pub(crate) const CR4_OSXSAVE: u64 = 1 << 18;
8384
pub(crate) const CR0_PE: u64 = 1;
8485
pub(crate) const CR0_MP: u64 = 1 << 1;
8586
pub(crate) const CR0_ET: u64 = 1 << 4;
@@ -91,6 +92,11 @@ cfg_if::cfg_if! {
9192
pub(crate) const EFER_LMA: u64 = 1 << 10;
9293
pub(crate) const EFER_SCE: u64 = 1;
9394
pub(crate) const EFER_NX: u64 = 1 << 11;
95+
96+
// XCR0 (Extended Control Register 0) bits for XSAVE features
97+
pub(crate) const XCR0_X87: u64 = 1 << 0; // x87 FPU state
98+
pub(crate) const XCR0_SSE: u64 = 1 << 1; // SSE state (XMM registers)
99+
pub(crate) const XCR0_AVX: u64 = 1 << 2; // AVX state (YMM registers)
94100
}
95101
}
96102

0 commit comments

Comments
 (0)