diff --git a/benches/bench.rs b/benches/bench.rs index defdb128..69a752b8 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -144,7 +144,7 @@ fn bench_many_chunks_avx512(b: &mut Bencher) { } #[bench] -#[cfg(blake3_neon)] +#[cfg(blake3_neon_ffi)] fn bench_many_chunks_neon(b: &mut Bencher) { bench_many_chunks_fn(b, Platform::neon().unwrap()); } @@ -215,7 +215,7 @@ fn bench_many_parents_avx512(b: &mut Bencher) { } #[bench] -#[cfg(blake3_neon)] +#[cfg(blake3_neon_ffi)] fn bench_many_parents_neon(b: &mut Bencher) { bench_many_parents_fn(b, Platform::neon().unwrap()); } diff --git a/build.rs b/build.rs index 01b692fd..6a6f20fa 100644 --- a/build.rs +++ b/build.rs @@ -236,6 +236,7 @@ fn build_avx512_c_intrinsics() { // This is required on 32-bit x86 targets, since the assembly // implementation doesn't support those. println!("cargo:rustc-cfg=blake3_avx512_ffi"); + println!("cargo:rustc-cfg=blake3_avx512_ffi_intrinsics"); let mut build = new_build(); build.file("c/blake3_avx512.c"); if is_windows_msvc() { @@ -256,6 +257,7 @@ fn build_avx512_assembly() { // only supports x86_64. assert!(is_x86_64()); println!("cargo:rustc-cfg=blake3_avx512_ffi"); + println!("cargo:rustc-cfg=blake3_avx512_ffi_assembly"); let mut build = new_build(); let mut is_msvc = false; if is_windows_target() { @@ -279,6 +281,7 @@ fn build_avx512_assembly() { } fn build_neon_c_intrinsics() { + println!("cargo:rustc-cfg=blake3_neon_ffi"); let mut build = new_build(); // Note that blake3_neon.c normally depends on the blake3_portable.c // for the single-instance compression function, but we expose @@ -310,7 +313,9 @@ fn main() -> Result<(), Box> { "blake3_avx2_ffi", "blake3_avx2_rust", "blake3_avx512_ffi", - "blake3_neon", + "blake3_avx512_ffi_assembly", + "blake3_avx512_ffi_intrinsics", + "blake3_neon_ffi", "blake3_wasm32_simd", ]; for cfg_name in all_cfgs { @@ -353,7 +358,6 @@ fn main() -> Result<(), Box> { if (is_arm() && is_neon()) || (!is_no_neon() && !is_pure() && is_aarch64() && is_little_endian()) { - println!("cargo:rustc-cfg=blake3_neon"); build_neon_c_intrinsics(); } diff --git a/src/ffi_avx2.rs b/src/ffi_avx2.rs index 43bf1504..360cb653 100644 --- a/src/ffi_avx2.rs +++ b/src/ffi_avx2.rs @@ -35,6 +35,10 @@ pub unsafe fn hash_many( } pub mod ffi { + #[cfg_attr( + blake3_avx2_ffi, + link(name = "blake3_sse2_sse41_avx2_assembly", kind = "static") + )] extern "C" { pub fn blake3_hash_many_avx2( inputs: *const *const u8, diff --git a/src/ffi_avx512.rs b/src/ffi_avx512.rs index e648edaf..e09277ce 100644 --- a/src/ffi_avx512.rs +++ b/src/ffi_avx512.rs @@ -97,6 +97,14 @@ pub unsafe fn xof_many( } pub mod ffi { + #[cfg_attr( + blake3_avx512_ffi_assembly, + link(name = "blake3_avx512_assembly", kind = "static") + )] + #[cfg_attr( + blake3_avx512_ffi_intrinsics, + link(name = "blake3_avx512_intrinsics", kind = "static") + )] extern "C" { pub fn blake3_compress_in_place_avx512( cv: *mut u32, diff --git a/src/ffi_neon.rs b/src/ffi_neon.rs index 54d07a4d..b8493dae 100644 --- a/src/ffi_neon.rs +++ b/src/ffi_neon.rs @@ -53,6 +53,7 @@ pub extern "C" fn blake3_compress_in_place_portable( } pub mod ffi { + #[cfg_attr(blake3_neon_ffi, link(name = "blake3_neon", kind = "static"))] extern "C" { pub fn blake3_hash_many_neon( inputs: *const *const u8, diff --git a/src/ffi_sse2.rs b/src/ffi_sse2.rs index 8dafd698..8796c02c 100644 --- a/src/ffi_sse2.rs +++ b/src/ffi_sse2.rs @@ -73,6 +73,10 @@ pub unsafe fn hash_many( } pub mod ffi { + #[cfg_attr( + blake3_sse2_ffi, + link(name = "blake3_sse2_sse41_avx2_assembly", kind = "static") + )] extern "C" { pub fn blake3_compress_in_place_sse2( cv: *mut u32, diff --git a/src/ffi_sse41.rs b/src/ffi_sse41.rs index f851ca15..05d39550 100644 --- a/src/ffi_sse41.rs +++ b/src/ffi_sse41.rs @@ -73,6 +73,10 @@ pub unsafe fn hash_many( } pub mod ffi { + #[cfg_attr( + blake3_sse41_ffi, + link(name = "blake3_sse2_sse41_avx2_assembly", kind = "static") + )] extern "C" { pub fn blake3_compress_in_place_sse41( cv: *mut u32, diff --git a/src/lib.rs b/src/lib.rs index 92bdbdb9..c935e6f0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -114,7 +114,7 @@ mod avx2; #[cfg(blake3_avx512_ffi)] #[path = "ffi_avx512.rs"] mod avx512; -#[cfg(blake3_neon)] +#[cfg(blake3_neon_ffi)] #[path = "ffi_neon.rs"] mod neon; mod portable; diff --git a/src/platform.rs b/src/platform.rs index 51b3b7b1..14cfd1b3 100644 --- a/src/platform.rs +++ b/src/platform.rs @@ -10,7 +10,7 @@ cfg_if::cfg_if! { pub const MAX_SIMD_DEGREE: usize = 8; } } - } else if #[cfg(blake3_neon)] { + } else if #[cfg(blake3_neon_ffi)] { pub const MAX_SIMD_DEGREE: usize = 4; } else if #[cfg(blake3_wasm32_simd)] { pub const MAX_SIMD_DEGREE: usize = 4; @@ -32,7 +32,7 @@ cfg_if::cfg_if! { pub const MAX_SIMD_DEGREE_OR_2: usize = 8; } } - } else if #[cfg(blake3_neon)] { + } else if #[cfg(blake3_neon_ffi)] { pub const MAX_SIMD_DEGREE_OR_2: usize = 4; } else if #[cfg(blake3_wasm32_simd)] { pub const MAX_SIMD_DEGREE_OR_2: usize = 4; @@ -53,7 +53,7 @@ pub enum Platform { #[cfg(blake3_avx512_ffi)] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] AVX512, - #[cfg(blake3_neon)] + #[cfg(blake3_neon_ffi)] NEON, #[cfg(blake3_wasm32_simd)] #[allow(non_camel_case_types)] @@ -88,7 +88,7 @@ impl Platform { } // We don't use dynamic feature detection for NEON. If the "neon" // feature is on, NEON is assumed to be supported. - #[cfg(blake3_neon)] + #[cfg(blake3_neon_ffi)] { return Platform::NEON; } @@ -111,7 +111,7 @@ impl Platform { #[cfg(blake3_avx512_ffi)] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::AVX512 => 16, - #[cfg(blake3_neon)] + #[cfg(blake3_neon_ffi)] Platform::NEON => 4, #[cfg(blake3_wasm32_simd)] Platform::WASM32_SIMD => 4, @@ -147,7 +147,7 @@ impl Platform { crate::avx512::compress_in_place(cv, block, block_len, counter, flags) }, // No NEON compress_in_place() implementation yet. - #[cfg(blake3_neon)] + #[cfg(blake3_neon_ffi)] Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags), #[cfg(blake3_wasm32_simd)] Platform::WASM32_SIMD => { @@ -183,7 +183,7 @@ impl Platform { crate::avx512::compress_xof(cv, block, block_len, counter, flags) }, // No NEON compress_xof() implementation yet. - #[cfg(blake3_neon)] + #[cfg(blake3_neon_ffi)] Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags), #[cfg(blake3_wasm32_simd)] Platform::WASM32_SIMD => { @@ -282,7 +282,7 @@ impl Platform { ) }, // Assumed to be safe if the "neon" feature is on. - #[cfg(blake3_neon)] + #[cfg(blake3_neon_ffi)] Platform::NEON => unsafe { crate::neon::hash_many( inputs, @@ -390,7 +390,7 @@ impl Platform { } } - #[cfg(blake3_neon)] + #[cfg(blake3_neon_ffi)] pub fn neon() -> Option { // Assumed to be safe if the "neon" feature is on. Some(Self::NEON)