From 2a940510e8003480dec2d11729d0f207607a3e04 Mon Sep 17 00:00:00 2001 From: sayantn Date: Thu, 17 Apr 2025 05:46:35 +0530 Subject: [PATCH 1/5] Add checks for void pointer types to ensure consistency --- crates/stdarch-verify/tests/x86-intel.rs | 94 +++++++++++++++++++----- 1 file changed, 74 insertions(+), 20 deletions(-) diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs index 76170922f7..cb772ac882 100644 --- a/crates/stdarch-verify/tests/x86-intel.rs +++ b/crates/stdarch-verify/tests/x86-intel.rs @@ -67,7 +67,7 @@ static TUPLE: Type = Type::Tuple; static CPUID: Type = Type::CpuidResult; static NEVER: Type = Type::Never; -#[derive(Debug)] +#[derive(Debug, PartialEq, Copy, Clone)] enum Type { PrimFloat(u8), PrimSigned(u8), @@ -520,7 +520,7 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> { // Make sure we've got the right return type. if let Some(t) = rust.ret { - equate(t, &intel.return_.type_, "", rust.name, false)?; + equate(t, &intel.return_.type_, "", intel, false)?; } else if !intel.return_.type_.is_empty() && intel.return_.type_ != "void" { bail!( "{} returns `{}` with intel, void in rust", @@ -542,7 +542,7 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> { } for (i, (a, b)) in intel.parameters.iter().zip(rust.arguments).enumerate() { let is_const = rust.required_const.contains(&i); - equate(b, &a.type_, &a.etype, &intel.name, is_const)?; + equate(b, &a.type_, &a.etype, &intel, is_const)?; } } @@ -655,11 +655,59 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> { Ok(()) } +fn pointed_type(intrinsic: &Intrinsic) -> Result { + Ok( + if intrinsic.tech == "AMX" + || intrinsic + .cpuid + .iter() + .any(|cpuid| matches!(&**cpuid, "KEYLOCKER" | "KEYLOCKER_WIDE" | "XSAVE" | "FXSR")) + { + // AMX, KEYLOCKER and XSAVE intrinsics should take `*u8` + U8 + } else if intrinsic.name == "_mm_clflush" { + // Just a false match in the following logic + U8 + } else if ["_mm_storeu_si", "_mm_loadu_si"] + .iter() + .any(|x| intrinsic.name.starts_with(x)) + { + // These have already been stabilized, so cannot be changed anymore + U8 + } else if intrinsic.name.ends_with("i8") { + I8 + } else if intrinsic.name.ends_with("i16") { + I16 + } else if intrinsic.name.ends_with("i32") { + I32 + } else if intrinsic.name.ends_with("i64") { + I64 + } else if intrinsic.name.ends_with("i128") { + M128I + } else if intrinsic.name.ends_with("i256") { + M256I + } else if intrinsic.name.ends_with("i512") { + M512I + } else if intrinsic.name.ends_with("h") { + F16 + } else if intrinsic.name.ends_with("s") { + F32 + } else if intrinsic.name.ends_with("d") { + F64 + } else { + bail!( + "Don't know what type of *void to use for {}", + intrinsic.name + ); + }, + ) +} + fn equate( t: &Type, intel: &str, etype: &str, - intrinsic: &str, + intrinsic: &Intrinsic, is_const: bool, ) -> Result<(), String> { // Make pointer adjacent to the type: float * foo => float* foo @@ -676,7 +724,7 @@ fn equate( if etype == "IMM" || intel == "constexpr int" { // The _bittest intrinsics claim to only accept immediates but actually // accept run-time values as well. - if !is_const && !intrinsic.starts_with("_bittest") { + if !is_const && !intrinsic.name.starts_with("_bittest") { bail!("argument required to be const but isn't"); } } else { @@ -723,7 +771,16 @@ fn equate( (&Type::MMASK16, "__mmask16") => {} (&Type::MMASK8, "__mmask8") => {} - (&Type::MutPtr(_), "void*") => {} + (&Type::MutPtr(_type), "void*") | (&Type::ConstPtr(_type), "void const*") => { + let pointed_type = pointed_type(intrinsic)?; + if _type != &pointed_type { + bail!( + "incorrect void pointer type {_type:?} in {}, should be pointer to {pointed_type:?}", + intrinsic.name, + ); + } + } + (&Type::MutPtr(&Type::PrimFloat(32)), "float*") => {} (&Type::MutPtr(&Type::PrimFloat(64)), "double*") => {} (&Type::MutPtr(&Type::PrimSigned(8)), "char*") => {} @@ -752,7 +809,6 @@ fn equate( (&Type::MutPtr(&Type::M512I), "__m512i*") => {} (&Type::MutPtr(&Type::M512D), "__m512d*") => {} - (&Type::ConstPtr(_), "void const*") => {} (&Type::ConstPtr(&Type::PrimFloat(16)), "_Float16 const*") => {} (&Type::ConstPtr(&Type::PrimFloat(32)), "float const*") => {} (&Type::ConstPtr(&Type::PrimFloat(64)), "double const*") => {} @@ -792,34 +848,32 @@ fn equate( // This is a macro (?) in C which seems to mutate its arguments, but // that means that we're taking pointers to arguments in rust // as we're not exposing it as a macro. - (&Type::MutPtr(&Type::M128), "__m128") if intrinsic == "_MM_TRANSPOSE4_PS" => {} + (&Type::MutPtr(&Type::M128), "__m128") if intrinsic.name == "_MM_TRANSPOSE4_PS" => {} // The _rdtsc intrinsic uses a __int64 return type, but this is a bug in // the intrinsics guide: https://github.com/rust-lang/stdarch/issues/559 // We have manually fixed the bug by changing the return type to `u64`. - (&Type::PrimUnsigned(64), "__int64") if intrinsic == "_rdtsc" => {} + (&Type::PrimUnsigned(64), "__int64") if intrinsic.name == "_rdtsc" => {} // The _bittest and _bittest64 intrinsics takes a mutable pointer in the // intrinsics guide even though it never writes through the pointer: - (&Type::ConstPtr(&Type::PrimSigned(32)), "__int32*") if intrinsic == "_bittest" => {} - (&Type::ConstPtr(&Type::PrimSigned(64)), "__int64*") if intrinsic == "_bittest64" => {} + (&Type::ConstPtr(&Type::PrimSigned(32)), "__int32*") if intrinsic.name == "_bittest" => {} + (&Type::ConstPtr(&Type::PrimSigned(64)), "__int64*") if intrinsic.name == "_bittest64" => {} // The _xrstor, _fxrstor, _xrstor64, _fxrstor64 intrinsics take a // mutable pointer in the intrinsics guide even though they never write // through the pointer: (&Type::ConstPtr(&Type::PrimUnsigned(8)), "void*") - if intrinsic == "_xrstor" - || intrinsic == "_xrstor64" - || intrinsic == "_fxrstor" - || intrinsic == "_fxrstor64" => {} + if matches!( + &*intrinsic.name, + "_xrstor" | "_xrstor64" | "_fxrstor" | "_fxrstor64" + ) => {} // The _mm_stream_load_si128 intrinsic take a mutable pointer in the intrinsics // guide even though they never write through the pointer - (&Type::ConstPtr(&Type::M128I), "void*") if intrinsic == "_mm_stream_load_si128" => {} + (&Type::ConstPtr(&Type::M128I), "void*") if intrinsic.name == "_mm_stream_load_si128" => {} _ => bail!( - "failed to equate: `{}` and {:?} for {}", - intel, - t, - intrinsic + "failed to equate: `{intel}` and {t:?} for {}", + intrinsic.name ), } Ok(()) From cef1ecba3d867bf7dbb88eb9c3e6d12f07547725 Mon Sep 17 00:00:00 2001 From: sayantn Date: Thu, 17 Apr 2025 06:03:43 +0530 Subject: [PATCH 2/5] Change void* type for 3 intrinsics - `_mm512_load_si512` - `_mm512_loadu_si512` - `_mm512_stream_si512` --- crates/core_arch/src/x86/avx512f.rs | 14 +++++++------- crates/core_arch/src/x86/gfni.rs | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index f279c170f9..523a226a89 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -29696,7 +29696,7 @@ pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovntdq))] #[allow(clippy::cast_ptr_alignment)] -pub unsafe fn _mm512_stream_si512(mem_addr: *mut i32, a: __m512i) { +pub unsafe fn _mm512_stream_si512(mem_addr: *mut __m512i, a: __m512i) { crate::arch::asm!( vps!("vmovntdq", ",{a}"), p = in(reg) mem_addr, @@ -34435,8 +34435,8 @@ pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32 -pub unsafe fn _mm512_loadu_si512(mem_addr: *const i32) -> __m512i { - ptr::read_unaligned(mem_addr as *const __m512i) +pub unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i { + ptr::read_unaligned(mem_addr) } /// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary. @@ -34509,8 +34509,8 @@ pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32 -pub unsafe fn _mm512_load_si512(mem_addr: *const i32) -> __m512i { - ptr::read(mem_addr as *const __m512i) +pub unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i { + ptr::read(mem_addr) } /// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated. @@ -57231,7 +57231,7 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_loadu_si512() { let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50]; - let p = a.as_ptr(); + let p = a.as_ptr().cast(); let r = _mm512_loadu_si512(black_box(p)); let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50); assert_eq_m512i(r, e); @@ -57254,7 +57254,7 @@ mod tests { let a = Align { data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50], }; - let p = (a.data).as_ptr(); + let p = (a.data).as_ptr().cast(); let r = _mm512_load_si512(black_box(p)); let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50); assert_eq_m512i(r, e); diff --git a/crates/core_arch/src/x86/gfni.rs b/crates/core_arch/src/x86/gfni.rs index 42387e3d21..4ba1e62009 100644 --- a/crates/core_arch/src/x86/gfni.rs +++ b/crates/core_arch/src/x86/gfni.rs @@ -867,7 +867,7 @@ mod tests { #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] unsafe fn load_m512i_word(data: &[T], word_index: usize) -> __m512i { let byte_offset = word_index * 64 / size_of::(); - let pointer = data.as_ptr().add(byte_offset) as *const i32; + let pointer = data.as_ptr().add(byte_offset) as *const _; _mm512_loadu_si512(black_box(pointer)) } From 41cde2b80caa254e1dc342cce8e09f810b4ec00a Mon Sep 17 00:00:00 2001 From: sayantn Date: Thu, 17 Apr 2025 05:53:54 +0530 Subject: [PATCH 3/5] Change void* type for `compressstore` intrinsics --- crates/core_arch/src/x86/avx512f.rs | 72 ++++++++++++------------- crates/core_arch/src/x86/avx512vbmi2.rs | 50 ++++++++--------- 2 files changed, 57 insertions(+), 65 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 523a226a89..8f2b08b7c7 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -18605,7 +18605,7 @@ pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressd))] -pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask16, a: __m512i) { +pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask16, a: __m512i) { vcompressstored(base_addr as *mut _, a.as_i32x16(), k) } @@ -18616,7 +18616,7 @@ pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask16, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressd))] -pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m256i) { +pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m256i) { vcompressstored256(base_addr as *mut _, a.as_i32x8(), k) } @@ -18627,7 +18627,7 @@ pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressd))] -pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m128i) { +pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m128i) { vcompressstored128(base_addr as *mut _, a.as_i32x4(), k) } @@ -18638,7 +18638,7 @@ pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressq))] -pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m512i) { +pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m512i) { vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k) } @@ -18649,7 +18649,7 @@ pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressq))] -pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m256i) { +pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m256i) { vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k) } @@ -18660,7 +18660,7 @@ pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressq))] -pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m128i) { +pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m128i) { vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k) } @@ -18671,7 +18671,7 @@ pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompressps))] -pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask16, a: __m512) { +pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask16, a: __m512) { vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k) } @@ -18682,7 +18682,7 @@ pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask16, a: #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompressps))] -pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m256) { +pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m256) { vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k) } @@ -18693,7 +18693,7 @@ pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompressps))] -pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m128) { +pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m128) { vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k) } @@ -18704,7 +18704,7 @@ pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompresspd))] -pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m512d) { +pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m512d) { vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k) } @@ -18715,7 +18715,7 @@ pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompresspd))] -pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m256d) { +pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m256d) { vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k) } @@ -18726,7 +18726,7 @@ pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vcompresspd))] -pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m128d) { +pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m128d) { vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k) } @@ -56718,9 +56718,9 @@ mod tests { unsafe fn test_mm512_mask_compressstoreu_epi32() { let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); let mut r = [0_i32; 16]; - _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a); + _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_i32; 16]); - _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1111000011001010, a); + _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1111000011001010, a); assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]); } @@ -56728,9 +56728,9 @@ mod tests { unsafe fn test_mm256_mask_compressstoreu_epi32() { let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); let mut r = [0_i32; 8]; - _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a); + _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_i32; 8]); - _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b11001010, a); + _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b11001010, a); assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]); } @@ -56738,9 +56738,9 @@ mod tests { unsafe fn test_mm_mask_compressstoreu_epi32() { let a = _mm_setr_epi32(1, 2, 3, 4); let mut r = [0_i32; 4]; - _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a); + _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_i32; 4]); - _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1011, a); + _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1011, a); assert_eq!(&r, &[1, 2, 4, 0]); } @@ -56748,9 +56748,9 @@ mod tests { unsafe fn test_mm512_mask_compressstoreu_epi64() { let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); let mut r = [0_i64; 8]; - _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a); + _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_i64; 8]); - _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b11001010, a); + _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b11001010, a); assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]); } @@ -56758,9 +56758,9 @@ mod tests { unsafe fn test_mm256_mask_compressstoreu_epi64() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let mut r = [0_i64; 4]; - _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a); + _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_i64; 4]); - _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b1011, a); + _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b1011, a); assert_eq!(&r, &[1, 2, 4, 0]); } @@ -56768,9 +56768,9 @@ mod tests { unsafe fn test_mm_mask_compressstoreu_epi64() { let a = _mm_setr_epi64x(1, 2); let mut r = [0_i64; 2]; - _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a); + _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_i64; 2]); - _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b10, a); + _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b10, a); assert_eq!(&r, &[2, 0]); } @@ -56781,9 +56781,9 @@ mod tests { 13_f32, 14_f32, 15_f32, 16_f32, ); let mut r = [0_f32; 16]; - _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a); + _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_f32; 16]); - _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1111000011001010, a); + _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1111000011001010, a); assert_eq!( &r, &[ @@ -56797,9 +56797,9 @@ mod tests { unsafe fn test_mm256_mask_compressstoreu_ps() { let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32); let mut r = [0_f32; 8]; - _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a); + _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_f32; 8]); - _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b11001010, a); + _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0b11001010, a); assert_eq!( &r, &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32] @@ -56810,9 +56810,9 @@ mod tests { unsafe fn test_mm_mask_compressstoreu_ps() { let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32); let mut r = [0.; 4]; - _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a); + _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0.; 4]); - _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1011, a); + _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1011, a); assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]); } @@ -56820,9 +56820,9 @@ mod tests { unsafe fn test_mm512_mask_compressstoreu_pd() { let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); let mut r = [0.; 8]; - _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a); + _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0.; 8]); - _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b11001010, a); + _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0b11001010, a); assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]); } @@ -56830,9 +56830,9 @@ mod tests { unsafe fn test_mm256_mask_compressstoreu_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let mut r = [0.; 4]; - _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a); + _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0.; 4]); - _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b1011, a); + _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0b1011, a); assert_eq!(&r, &[1., 2., 4., 0.]); } @@ -56840,9 +56840,9 @@ mod tests { unsafe fn test_mm_mask_compressstoreu_pd() { let a = _mm_setr_pd(1., 2.); let mut r = [0.; 2]; - _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a); + _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0.; 2]); - _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b10, a); + _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0b10, a); assert_eq!(&r, &[2., 0.]); } diff --git a/crates/core_arch/src/x86/avx512vbmi2.rs b/crates/core_arch/src/x86/avx512vbmi2.rs index 97c7986c17..7fc22985d5 100644 --- a/crates/core_arch/src/x86/avx512vbmi2.rs +++ b/crates/core_arch/src/x86/avx512vbmi2.rs @@ -169,7 +169,7 @@ pub unsafe fn _mm_maskz_expandloadu_epi8(k: __mmask16, mem_addr: *const i8) -> _ #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressw))] -pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask32, a: __m512i) { +pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask32, a: __m512i) { vcompressstorew(base_addr as *mut _, a.as_i16x32(), k) } @@ -180,7 +180,7 @@ pub unsafe fn _mm512_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask32, #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressw))] -pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask16, a: __m256i) { +pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask16, a: __m256i) { vcompressstorew256(base_addr as *mut _, a.as_i16x16(), k) } @@ -191,7 +191,7 @@ pub unsafe fn _mm256_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask16, #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressw))] -pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask8, a: __m128i) { +pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut i16, k: __mmask8, a: __m128i) { vcompressstorew128(base_addr as *mut _, a.as_i16x8(), k) } @@ -202,8 +202,8 @@ pub unsafe fn _mm_mask_compressstoreu_epi16(base_addr: *mut u8, k: __mmask8, a: #[target_feature(enable = "avx512vbmi2")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressb))] -pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask64, a: __m512i) { - vcompressstoreb(base_addr as *mut _, a.as_i8x64(), k) +pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask64, a: __m512i) { + vcompressstoreb(base_addr, a.as_i8x64(), k) } /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -213,8 +213,8 @@ pub unsafe fn _mm512_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask64, #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressb))] -pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask32, a: __m256i) { - vcompressstoreb256(base_addr as *mut _, a.as_i8x32(), k) +pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask32, a: __m256i) { + vcompressstoreb256(base_addr, a.as_i8x32(), k) } /// Contiguously store the active 8-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -224,8 +224,8 @@ pub unsafe fn _mm256_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask32, #[target_feature(enable = "avx512vbmi2,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpcompressb))] -pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut u8, k: __mmask16, a: __m128i) { - vcompressstoreb128(base_addr as *mut _, a.as_i8x16(), k) +pub unsafe fn _mm_mask_compressstoreu_epi8(base_addr: *mut i8, k: __mmask16, a: __m128i) { + vcompressstoreb128(base_addr, a.as_i8x16(), k) } /// Contiguously store the active 16-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src. @@ -3853,13 +3853,9 @@ mod tests { 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, ); let mut r = [0_i16; 32]; - _mm512_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a); + _mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_i16; 32]); - _mm512_mask_compressstoreu_epi16( - r.as_mut_ptr() as *mut _, - 0b11110000_11001010_11111111_00000000, - a, - ); + _mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a); assert_eq!( &r, &[ @@ -3873,9 +3869,9 @@ mod tests { unsafe fn test_mm256_mask_compressstoreu_epi16() { let a = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); let mut r = [0_i16; 16]; - _mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a); + _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_i16; 16]); - _mm256_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0b11110000_11001010, a); + _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010, a); assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]); } @@ -3883,9 +3879,9 @@ mod tests { unsafe fn test_mm_mask_compressstoreu_epi16() { let a = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1); let mut r = [0_i16; 8]; - _mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0, a); + _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_i16; 8]); - _mm_mask_compressstoreu_epi16(r.as_mut_ptr() as *mut _, 0b11110000, a); + _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000, a); assert_eq!(&r, &[5, 6, 7, 8, 0, 0, 0, 0]); } @@ -3897,10 +3893,10 @@ mod tests { 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, ); let mut r = [0_i8; 64]; - _mm512_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a); + _mm512_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_i8; 64]); _mm512_mask_compressstoreu_epi8( - r.as_mut_ptr() as *mut _, + r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111, a, ); @@ -3921,13 +3917,9 @@ mod tests { 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, ); let mut r = [0_i8; 32]; - _mm256_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a); + _mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_i8; 32]); - _mm256_mask_compressstoreu_epi8( - r.as_mut_ptr() as *mut _, - 0b11110000_11001010_11111111_00000000, - a, - ); + _mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a); assert_eq!( &r, &[ @@ -3941,9 +3933,9 @@ mod tests { unsafe fn test_mm_mask_compressstoreu_epi8() { let a = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); let mut r = [0_i8; 16]; - _mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0, a); + _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a); assert_eq!(&r, &[0_i8; 16]); - _mm_mask_compressstoreu_epi8(r.as_mut_ptr() as *mut _, 0b11110000_11001010, a); + _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010, a); assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]); } } From 3d5931391648de9693bb5c200f6239598342c600 Mon Sep 17 00:00:00 2001 From: sayantn Date: Thu, 17 Apr 2025 05:59:40 +0530 Subject: [PATCH 4/5] Change void* type for `cvt_storeu` intrinsics --- crates/core_arch/src/x86/avx512f.rs | 126 ++++++++++++------------- crates/core_arch/src/x86_64/avx512f.rs | 36 +++---- 2 files changed, 81 insertions(+), 81 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 8f2b08b7c7..a7ce04b1df 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -33841,8 +33841,8 @@ pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] -pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) { - vpmovdwmem(mem_addr, a.as_i32x16(), k); +pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) { + vpmovdwmem(mem_addr.cast(), a.as_i32x16(), k); } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -33852,8 +33852,8 @@ pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] -pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) { - vpmovdwmem256(mem_addr, a.as_i32x8(), k); +pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) { + vpmovdwmem256(mem_addr.cast(), a.as_i32x8(), k); } /// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -33863,8 +33863,8 @@ pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovdw))] -pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) { - vpmovdwmem128(mem_addr, a.as_i32x4(), k); +pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) { + vpmovdwmem128(mem_addr.cast(), a.as_i32x4(), k); } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -33874,8 +33874,8 @@ pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] -pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) { - vpmovsdwmem(mem_addr, a.as_i32x16(), k); +pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) { + vpmovsdwmem(mem_addr.cast(), a.as_i32x16(), k); } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -33885,8 +33885,8 @@ pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] -pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) { - vpmovsdwmem256(mem_addr, a.as_i32x8(), k); +pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) { + vpmovsdwmem256(mem_addr.cast(), a.as_i32x8(), k); } /// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -33896,8 +33896,8 @@ pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsdw))] -pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) { - vpmovsdwmem128(mem_addr, a.as_i32x4(), k); +pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) { + vpmovsdwmem128(mem_addr.cast(), a.as_i32x4(), k); } /// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -33907,8 +33907,8 @@ pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] -pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) { - vpmovusdwmem(mem_addr, a.as_i32x16(), k); +pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) { + vpmovusdwmem(mem_addr.cast(), a.as_i32x16(), k); } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -33918,8 +33918,8 @@ pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask1 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] -pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) { - vpmovusdwmem256(mem_addr, a.as_i32x8(), k); +pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) { + vpmovusdwmem256(mem_addr.cast(), a.as_i32x8(), k); } /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -33929,8 +33929,8 @@ pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusdw))] -pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) { - vpmovusdwmem128(mem_addr, a.as_i32x4(), k); +pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) { + vpmovusdwmem128(mem_addr.cast(), a.as_i32x4(), k); } /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34039,8 +34039,8 @@ pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] -pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) { - vpmovqwmem(mem_addr, a.as_i64x8(), k); +pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) { + vpmovqwmem(mem_addr.cast(), a.as_i64x8(), k); } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34050,8 +34050,8 @@ pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] -pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) { - vpmovqwmem256(mem_addr, a.as_i64x4(), k); +pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) { + vpmovqwmem256(mem_addr.cast(), a.as_i64x4(), k); } /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34061,8 +34061,8 @@ pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqw))] -pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) { - vpmovqwmem128(mem_addr, a.as_i64x2(), k); +pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) { + vpmovqwmem128(mem_addr.cast(), a.as_i64x2(), k); } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34072,8 +34072,8 @@ pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] -pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) { - vpmovsqwmem(mem_addr, a.as_i64x8(), k); +pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) { + vpmovsqwmem(mem_addr.cast(), a.as_i64x8(), k); } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34083,8 +34083,8 @@ pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] -pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) { - vpmovsqwmem256(mem_addr, a.as_i64x4(), k); +pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) { + vpmovsqwmem256(mem_addr.cast(), a.as_i64x4(), k); } /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34094,8 +34094,8 @@ pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqw))] -pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) { - vpmovsqwmem128(mem_addr, a.as_i64x2(), k); +pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) { + vpmovsqwmem128(mem_addr.cast(), a.as_i64x2(), k); } /// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34105,8 +34105,8 @@ pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] -pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) { - vpmovusqwmem(mem_addr, a.as_i64x8(), k); +pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) { + vpmovusqwmem(mem_addr.cast(), a.as_i64x8(), k); } /// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34116,8 +34116,8 @@ pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] -pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) { - vpmovusqwmem256(mem_addr, a.as_i64x4(), k); +pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) { + vpmovusqwmem256(mem_addr.cast(), a.as_i64x4(), k); } /// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34127,8 +34127,8 @@ pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqw))] -pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) { - vpmovusqwmem128(mem_addr, a.as_i64x2(), k); +pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) { + vpmovusqwmem128(mem_addr.cast(), a.as_i64x2(), k); } /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34237,8 +34237,8 @@ pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] -pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) { - vpmovqdmem(mem_addr, a.as_i64x8(), k); +pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) { + vpmovqdmem(mem_addr.cast(), a.as_i64x8(), k); } ///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34248,8 +34248,8 @@ pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] -pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) { - vpmovqdmem256(mem_addr, a.as_i64x4(), k); +pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) { + vpmovqdmem256(mem_addr.cast(), a.as_i64x4(), k); } ///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34259,8 +34259,8 @@ pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovqd))] -pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) { - vpmovqdmem128(mem_addr, a.as_i64x2(), k); +pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) { + vpmovqdmem128(mem_addr.cast(), a.as_i64x2(), k); } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34270,8 +34270,8 @@ pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] -pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) { - vpmovsqdmem(mem_addr, a.as_i64x8(), k); +pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) { + vpmovsqdmem(mem_addr.cast(), a.as_i64x8(), k); } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34281,8 +34281,8 @@ pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] -pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) { - vpmovsqdmem256(mem_addr, a.as_i64x4(), k); +pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) { + vpmovsqdmem256(mem_addr.cast(), a.as_i64x4(), k); } /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34292,8 +34292,8 @@ pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovsqd))] -pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) { - vpmovsqdmem128(mem_addr, a.as_i64x2(), k); +pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) { + vpmovsqdmem128(mem_addr.cast(), a.as_i64x2(), k); } /// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34303,8 +34303,8 @@ pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] -pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) { - vpmovusqdmem(mem_addr, a.as_i64x8(), k); +pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) { + vpmovusqdmem(mem_addr.cast(), a.as_i64x8(), k); } /// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34314,8 +34314,8 @@ pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] -pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) { - vpmovusqdmem256(mem_addr, a.as_i64x4(), k); +pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) { + vpmovusqdmem256(mem_addr.cast(), a.as_i64x4(), k); } /// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. @@ -34325,8 +34325,8 @@ pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8 #[target_feature(enable = "avx512f,avx512vl")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vpmovusqd))] -pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) { - vpmovusqdmem128(mem_addr, a.as_i64x2(), k); +pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) { + vpmovusqdmem128(mem_addr.cast(), a.as_i64x2(), k); } /// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. @@ -57013,7 +57013,7 @@ mod tests { unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() { let a = _mm512_set1_epi32(9); let mut r = _mm256_undefined_si256(); - _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); + _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a); let e = _mm256_set1_epi16(9); assert_eq_m256i(r, e); } @@ -57022,7 +57022,7 @@ mod tests { unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() { let a = _mm256_set1_epi32(9); let mut r = _mm_undefined_si128(); - _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set1_epi16(9); assert_eq_m128i(r, e); } @@ -57031,7 +57031,7 @@ mod tests { unsafe fn test_mm_mask_cvtepi32_storeu_epi16() { let a = _mm_set1_epi32(9); let mut r = _mm_set1_epi8(0); - _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9); assert_eq_m128i(r, e); } @@ -57040,7 +57040,7 @@ mod tests { unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() { let a = _mm512_set1_epi32(i32::MAX); let mut r = _mm256_undefined_si256(); - _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); + _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a); let e = _mm256_set1_epi16(i16::MAX); assert_eq_m256i(r, e); } @@ -57049,7 +57049,7 @@ mod tests { unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() { let a = _mm256_set1_epi32(i32::MAX); let mut r = _mm_undefined_si128(); - _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set1_epi16(i16::MAX); assert_eq_m128i(r, e); } @@ -57058,7 +57058,7 @@ mod tests { unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() { let a = _mm_set1_epi32(i32::MAX); let mut r = _mm_set1_epi8(0); - _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); assert_eq_m128i(r, e); } @@ -57067,7 +57067,7 @@ mod tests { unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() { let a = _mm512_set1_epi32(i32::MAX); let mut r = _mm256_undefined_si256(); - _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); + _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a); let e = _mm256_set1_epi16(u16::MAX as i16); assert_eq_m256i(r, e); } @@ -57076,7 +57076,7 @@ mod tests { unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() { let a = _mm256_set1_epi32(i32::MAX); let mut r = _mm_undefined_si128(); - _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set1_epi16(u16::MAX as i16); assert_eq_m128i(r, e); } @@ -57085,7 +57085,7 @@ mod tests { unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() { let a = _mm_set1_epi32(i32::MAX); let mut r = _mm_set1_epi8(0); - _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set_epi16( 0, 0, diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 946b900a2b..d0d73488c1 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -12191,7 +12191,7 @@ mod tests { unsafe fn test_mm512_mask_cvtepi64_storeu_epi16() { let a = _mm512_set1_epi64(9); let mut r = _mm_undefined_si128(); - _mm512_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm512_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set1_epi16(9); assert_eq_m128i(r, e); } @@ -12200,7 +12200,7 @@ mod tests { unsafe fn test_mm256_mask_cvtepi64_storeu_epi16() { let a = _mm256_set1_epi64x(9); let mut r = _mm_set1_epi16(0); - _mm256_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm256_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9); assert_eq_m128i(r, e); } @@ -12209,7 +12209,7 @@ mod tests { unsafe fn test_mm_mask_cvtepi64_storeu_epi16() { let a = _mm_set1_epi64x(9); let mut r = _mm_set1_epi16(0); - _mm_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 9, 9); assert_eq_m128i(r, e); } @@ -12218,7 +12218,7 @@ mod tests { unsafe fn test_mm512_mask_cvtsepi64_storeu_epi16() { let a = _mm512_set1_epi64(i64::MAX); let mut r = _mm_undefined_si128(); - _mm512_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm512_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set1_epi16(i16::MAX); assert_eq_m128i(r, e); } @@ -12227,7 +12227,7 @@ mod tests { unsafe fn test_mm256_mask_cvtsepi64_storeu_epi16() { let a = _mm256_set1_epi64x(i64::MAX); let mut r = _mm_set1_epi16(0); - _mm256_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm256_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); assert_eq_m128i(r, e); } @@ -12236,7 +12236,7 @@ mod tests { unsafe fn test_mm_mask_cvtsepi64_storeu_epi16() { let a = _mm_set1_epi64x(i64::MAX); let mut r = _mm_set1_epi16(0); - _mm_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX); assert_eq_m128i(r, e); } @@ -12245,7 +12245,7 @@ mod tests { unsafe fn test_mm512_mask_cvtusepi64_storeu_epi16() { let a = _mm512_set1_epi64(i64::MAX); let mut r = _mm_undefined_si128(); - _mm512_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm512_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set1_epi16(u16::MAX as i16); assert_eq_m128i(r, e); } @@ -12254,7 +12254,7 @@ mod tests { unsafe fn test_mm256_mask_cvtusepi64_storeu_epi16() { let a = _mm256_set1_epi64x(i64::MAX); let mut r = _mm_set1_epi16(0); - _mm256_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm256_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set_epi16( 0, 0, @@ -12272,7 +12272,7 @@ mod tests { unsafe fn test_mm_mask_cvtusepi64_storeu_epi16() { let a = _mm_set1_epi64x(i64::MAX); let mut r = _mm_set1_epi16(0); - _mm_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a); let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16); assert_eq_m128i(r, e); } @@ -12392,7 +12392,7 @@ mod tests { unsafe fn test_mm512_mask_cvtepi64_storeu_epi32() { let a = _mm512_set1_epi64(9); let mut r = _mm256_undefined_si256(); - _mm512_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm512_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a); let e = _mm256_set1_epi32(9); assert_eq_m256i(r, e); } @@ -12401,7 +12401,7 @@ mod tests { unsafe fn test_mm256_mask_cvtepi64_storeu_epi32() { let a = _mm256_set1_epi64x(9); let mut r = _mm_set1_epi32(0); - _mm256_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm256_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a); let e = _mm_set_epi32(9, 9, 9, 9); assert_eq_m128i(r, e); } @@ -12410,7 +12410,7 @@ mod tests { unsafe fn test_mm_mask_cvtepi64_storeu_epi32() { let a = _mm_set1_epi64x(9); let mut r = _mm_set1_epi16(0); - _mm_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a); let e = _mm_set_epi32(0, 0, 9, 9); assert_eq_m128i(r, e); } @@ -12419,7 +12419,7 @@ mod tests { unsafe fn test_mm512_mask_cvtsepi64_storeu_epi32() { let a = _mm512_set1_epi64(i64::MAX); let mut r = _mm256_undefined_si256(); - _mm512_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm512_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a); let e = _mm256_set1_epi32(i32::MAX); assert_eq_m256i(r, e); } @@ -12428,7 +12428,7 @@ mod tests { unsafe fn test_mm256_mask_cvtsepi64_storeu_epi32() { let a = _mm256_set1_epi64x(i64::MAX); let mut r = _mm_set1_epi32(0); - _mm256_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00001111, a); + _mm256_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00001111, a); let e = _mm_set1_epi32(i32::MAX); assert_eq_m128i(r, e); } @@ -12437,7 +12437,7 @@ mod tests { unsafe fn test_mm_mask_cvtsepi64_storeu_epi32() { let a = _mm_set1_epi64x(i64::MAX); let mut r = _mm_set1_epi16(0); - _mm_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00000011, a); + _mm_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00000011, a); let e = _mm_set_epi32(0, 0, i32::MAX, i32::MAX); assert_eq_m128i(r, e); } @@ -12446,7 +12446,7 @@ mod tests { unsafe fn test_mm512_mask_cvtusepi64_storeu_epi32() { let a = _mm512_set1_epi64(i64::MAX); let mut r = _mm256_undefined_si256(); - _mm512_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); + _mm512_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a); let e = _mm256_set1_epi32(u32::MAX as i32); assert_eq_m256i(r, e); } @@ -12455,7 +12455,7 @@ mod tests { unsafe fn test_mm256_mask_cvtusepi64_storeu_epi32() { let a = _mm256_set1_epi64x(i64::MAX); let mut r = _mm_set1_epi32(0); - _mm256_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00001111, a); + _mm256_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00001111, a); let e = _mm_set1_epi32(u32::MAX as i32); assert_eq_m128i(r, e); } @@ -12464,7 +12464,7 @@ mod tests { unsafe fn test_mm_mask_cvtusepi64_storeu_epi32() { let a = _mm_set1_epi64x(i64::MAX); let mut r = _mm_set1_epi16(0); - _mm_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00000011, a); + _mm_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00000011, a); let e = _mm_set_epi32(0, 0, u32::MAX as i32, u32::MAX as i32); assert_eq_m128i(r, e); } From b1fa61feff1025c5b105b30f906a9da84d7be017 Mon Sep 17 00:00:00 2001 From: sayantn Date: Thu, 17 Apr 2025 06:05:42 +0530 Subject: [PATCH 5/5] Change void* type for `gather`/`scatter` intrinsics --- crates/core_arch/src/x86/avx512f.rs | 222 +++++++++++++------------ crates/core_arch/src/x86_64/avx512f.rs | 162 +++++++++--------- 2 files changed, 198 insertions(+), 186 deletions(-) diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index a7ce04b1df..8671c0094b 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -16515,7 +16515,10 @@ pub fn _mm512_setr_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_i32gather_pd(offsets: __m256i, slice: *const u8) -> __m512d { +pub unsafe fn _mm512_i32gather_pd( + offsets: __m256i, + slice: *const f64, +) -> __m512d { static_assert_imm8_scale!(SCALE); let zero = f64x8::ZERO; let neg_one = -1; @@ -16537,7 +16540,7 @@ pub unsafe fn _mm512_mask_i32gather_pd( src: __m512d, mask: __mmask8, offsets: __m256i, - slice: *const u8, + slice: *const f64, ) -> __m512d { static_assert_imm8_scale!(SCALE); let src = src.as_f64x8(); @@ -16555,7 +16558,10 @@ pub unsafe fn _mm512_mask_i32gather_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_i64gather_pd(offsets: __m512i, slice: *const u8) -> __m512d { +pub unsafe fn _mm512_i64gather_pd( + offsets: __m512i, + slice: *const f64, +) -> __m512d { static_assert_imm8_scale!(SCALE); let zero = f64x8::ZERO; let neg_one = -1; @@ -16577,7 +16583,7 @@ pub unsafe fn _mm512_mask_i64gather_pd( src: __m512d, mask: __mmask8, offsets: __m512i, - slice: *const u8, + slice: *const f64, ) -> __m512d { static_assert_imm8_scale!(SCALE); let src = src.as_f64x8(); @@ -16595,7 +16601,7 @@ pub unsafe fn _mm512_mask_i64gather_pd( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_i64gather_ps(offsets: __m512i, slice: *const u8) -> __m256 { +pub unsafe fn _mm512_i64gather_ps(offsets: __m512i, slice: *const f32) -> __m256 { static_assert_imm8_scale!(SCALE); let zero = f32x8::ZERO; let neg_one = -1; @@ -16617,7 +16623,7 @@ pub unsafe fn _mm512_mask_i64gather_ps( src: __m256, mask: __mmask8, offsets: __m512i, - slice: *const u8, + slice: *const f32, ) -> __m256 { static_assert_imm8_scale!(SCALE); let src = src.as_f32x8(); @@ -16635,7 +16641,7 @@ pub unsafe fn _mm512_mask_i64gather_ps( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))] #[rustc_legacy_const_generics(2)] -pub unsafe fn _mm512_i32gather_ps(offsets: __m512i, slice: *const u8) -> __m512 { +pub unsafe fn _mm512_i32gather_ps(offsets: __m512i, slice: *const f32) -> __m512 { static_assert_imm8_scale!(SCALE); let zero = f32x16::ZERO; let neg_one = -1; @@ -16657,7 +16663,7 @@ pub unsafe fn _mm512_mask_i32gather_ps( src: __m512, mask: __mmask16, offsets: __m512i, - slice: *const u8, + slice: *const f32, ) -> __m512 { static_assert_imm8_scale!(SCALE); let src = src.as_f32x16(); @@ -16677,7 +16683,7 @@ pub unsafe fn _mm512_mask_i32gather_ps( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_i32gather_epi32( offsets: __m512i, - slice: *const u8, + slice: *const i32, ) -> __m512i { static_assert_imm8_scale!(SCALE); let zero = i32x16::ZERO; @@ -16700,7 +16706,7 @@ pub unsafe fn _mm512_mask_i32gather_epi32( src: __m512i, mask: __mmask16, offsets: __m512i, - slice: *const u8, + slice: *const i32, ) -> __m512i { static_assert_imm8_scale!(SCALE); let src = src.as_i32x16(); @@ -16721,7 +16727,7 @@ pub unsafe fn _mm512_mask_i32gather_epi32( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_i32gather_epi64( offsets: __m256i, - slice: *const u8, + slice: *const i64, ) -> __m512i { static_assert_imm8_scale!(SCALE); let zero = i64x8::ZERO; @@ -16744,7 +16750,7 @@ pub unsafe fn _mm512_mask_i32gather_epi64( src: __m512i, mask: __mmask8, offsets: __m256i, - slice: *const u8, + slice: *const i64, ) -> __m512i { static_assert_imm8_scale!(SCALE); let src = src.as_i64x8(); @@ -16765,7 +16771,7 @@ pub unsafe fn _mm512_mask_i32gather_epi64( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_i64gather_epi64( offsets: __m512i, - slice: *const u8, + slice: *const i64, ) -> __m512i { static_assert_imm8_scale!(SCALE); let zero = i64x8::ZERO; @@ -16788,7 +16794,7 @@ pub unsafe fn _mm512_mask_i64gather_epi64( src: __m512i, mask: __mmask8, offsets: __m512i, - slice: *const u8, + slice: *const i64, ) -> __m512i { static_assert_imm8_scale!(SCALE); let src = src.as_i64x8(); @@ -16809,7 +16815,7 @@ pub unsafe fn _mm512_mask_i64gather_epi64( #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_i64gather_epi32( offsets: __m512i, - slice: *const u8, + slice: *const i32, ) -> __m256i { static_assert_imm8_scale!(SCALE); let zeros = i32x8::ZERO; @@ -16832,7 +16838,7 @@ pub unsafe fn _mm512_mask_i64gather_epi32( src: __m256i, mask: __mmask8, offsets: __m512i, - slice: *const u8, + slice: *const i32, ) -> __m256i { static_assert_imm8_scale!(SCALE); let src = src.as_i32x8(); @@ -16852,7 +16858,7 @@ pub unsafe fn _mm512_mask_i64gather_epi32( #[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm512_i32scatter_pd( - slice: *mut u8, + slice: *mut f64, offsets: __m256i, src: __m512d, ) { @@ -16873,7 +16879,7 @@ pub unsafe fn _mm512_i32scatter_pd( #[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm512_mask_i32scatter_pd( - slice: *mut u8, + slice: *mut f64, mask: __mmask8, offsets: __m256i, src: __m512d, @@ -16894,7 +16900,7 @@ pub unsafe fn _mm512_mask_i32scatter_pd( #[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm512_i64scatter_pd( - slice: *mut u8, + slice: *mut f64, offsets: __m512i, src: __m512d, ) { @@ -16915,7 +16921,7 @@ pub unsafe fn _mm512_i64scatter_pd( #[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm512_mask_i64scatter_pd( - slice: *mut u8, + slice: *mut f64, mask: __mmask8, offsets: __m512i, src: __m512d, @@ -16936,7 +16942,7 @@ pub unsafe fn _mm512_mask_i64scatter_pd( #[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm512_i32scatter_ps( - slice: *mut u8, + slice: *mut f32, offsets: __m512i, src: __m512, ) { @@ -16957,7 +16963,7 @@ pub unsafe fn _mm512_i32scatter_ps( #[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm512_mask_i32scatter_ps( - slice: *mut u8, + slice: *mut f32, mask: __mmask16, offsets: __m512i, src: __m512, @@ -16978,7 +16984,7 @@ pub unsafe fn _mm512_mask_i32scatter_ps( #[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm512_i64scatter_ps( - slice: *mut u8, + slice: *mut f32, offsets: __m512i, src: __m256, ) { @@ -16999,7 +17005,7 @@ pub unsafe fn _mm512_i64scatter_ps( #[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm512_mask_i64scatter_ps( - slice: *mut u8, + slice: *mut f32, mask: __mmask8, offsets: __m512i, src: __m256, @@ -17020,7 +17026,7 @@ pub unsafe fn _mm512_mask_i64scatter_ps( #[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm512_i32scatter_epi64( - slice: *mut u8, + slice: *mut i64, offsets: __m256i, src: __m512i, ) { @@ -17041,7 +17047,7 @@ pub unsafe fn _mm512_i32scatter_epi64( #[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm512_mask_i32scatter_epi64( - slice: *mut u8, + slice: *mut i64, mask: __mmask8, offsets: __m256i, src: __m512i, @@ -17063,7 +17069,7 @@ pub unsafe fn _mm512_mask_i32scatter_epi64( #[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm512_i64scatter_epi64( - slice: *mut u8, + slice: *mut i64, offsets: __m512i, src: __m512i, ) { @@ -17084,7 +17090,7 @@ pub unsafe fn _mm512_i64scatter_epi64( #[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm512_mask_i64scatter_epi64( - slice: *mut u8, + slice: *mut i64, mask: __mmask8, offsets: __m512i, src: __m512i, @@ -17106,7 +17112,7 @@ pub unsafe fn _mm512_mask_i64scatter_epi64( #[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm512_i32scatter_epi32( - slice: *mut u8, + slice: *mut i32, offsets: __m512i, src: __m512i, ) { @@ -17127,7 +17133,7 @@ pub unsafe fn _mm512_i32scatter_epi32( #[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm512_mask_i32scatter_epi32( - slice: *mut u8, + slice: *mut i32, mask: __mmask16, offsets: __m512i, src: __m512i, @@ -17149,7 +17155,7 @@ pub unsafe fn _mm512_mask_i32scatter_epi32( #[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm512_i64scatter_epi32( - slice: *mut u8, + slice: *mut i32, offsets: __m512i, src: __m256i, ) { @@ -17170,7 +17176,7 @@ pub unsafe fn _mm512_i64scatter_epi32( #[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm512_mask_i64scatter_epi32( - slice: *mut u8, + slice: *mut i32, mask: __mmask8, offsets: __m512i, src: __m256i, @@ -17194,9 +17200,9 @@ pub unsafe fn _mm512_mask_i64scatter_epi32( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_i32logather_epi64( vindex: __m512i, - base_addr: *const u8, + base_addr: *const i64, ) -> __m512i { - _mm512_i32gather_epi64::(_mm512_castsi512_si256(vindex), base_addr as _) + _mm512_i32gather_epi64::(_mm512_castsi512_si256(vindex), base_addr) } /// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer @@ -17213,9 +17219,9 @@ pub unsafe fn _mm512_mask_i32logather_epi64( src: __m512i, k: __mmask8, vindex: __m512i, - base_addr: *const u8, + base_addr: *const i64, ) -> __m512i { - _mm512_mask_i32gather_epi64::(src, k, _mm512_castsi512_si256(vindex), base_addr as _) + _mm512_mask_i32gather_epi64::(src, k, _mm512_castsi512_si256(vindex), base_addr) } /// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr @@ -17229,9 +17235,9 @@ pub unsafe fn _mm512_mask_i32logather_epi64( #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_i32logather_pd( vindex: __m512i, - base_addr: *const u8, + base_addr: *const f64, ) -> __m512d { - _mm512_i32gather_pd::(_mm512_castsi512_si256(vindex), base_addr as _) + _mm512_i32gather_pd::(_mm512_castsi512_si256(vindex), base_addr) } /// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr @@ -17248,9 +17254,9 @@ pub unsafe fn _mm512_mask_i32logather_pd( src: __m512d, k: __mmask8, vindex: __m512i, - base_addr: *const u8, + base_addr: *const f64, ) -> __m512d { - _mm512_mask_i32gather_pd::(src, k, _mm512_castsi512_si256(vindex), base_addr as _) + _mm512_mask_i32gather_pd::(src, k, _mm512_castsi512_si256(vindex), base_addr) } /// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer @@ -17263,11 +17269,11 @@ pub unsafe fn _mm512_mask_i32logather_pd( #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_i32loscatter_epi64( - base_addr: *mut u8, + base_addr: *mut i64, vindex: __m512i, a: __m512i, ) { - _mm512_i32scatter_epi64::(base_addr as _, _mm512_castsi512_si256(vindex), a) + _mm512_i32scatter_epi64::(base_addr, _mm512_castsi512_si256(vindex), a) } /// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer @@ -17281,12 +17287,12 @@ pub unsafe fn _mm512_i32loscatter_epi64( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_mask_i32loscatter_epi64( - base_addr: *mut u8, + base_addr: *mut i64, k: __mmask8, vindex: __m512i, a: __m512i, ) { - _mm512_mask_i32scatter_epi64::(base_addr as _, k, _mm512_castsi512_si256(vindex), a) + _mm512_mask_i32scatter_epi64::(base_addr, k, _mm512_castsi512_si256(vindex), a) } /// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr @@ -17299,11 +17305,11 @@ pub unsafe fn _mm512_mask_i32loscatter_epi64( #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_i32loscatter_pd( - base_addr: *mut u8, + base_addr: *mut f64, vindex: __m512i, a: __m512d, ) { - _mm512_i32scatter_pd::(base_addr as _, _mm512_castsi512_si256(vindex), a) + _mm512_i32scatter_pd::(base_addr, _mm512_castsi512_si256(vindex), a) } /// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr @@ -17317,12 +17323,12 @@ pub unsafe fn _mm512_i32loscatter_pd( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_mask_i32loscatter_pd( - base_addr: *mut u8, + base_addr: *mut f64, k: __mmask8, vindex: __m512i, a: __m512d, ) { - _mm512_mask_i32scatter_pd::(base_addr as _, k, _mm512_castsi512_si256(vindex), a) + _mm512_mask_i32scatter_pd::(base_addr, k, _mm512_castsi512_si256(vindex), a) } /// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer @@ -17335,7 +17341,7 @@ pub unsafe fn _mm512_mask_i32loscatter_pd( #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_i32scatter_epi32( - base_addr: *mut u8, + base_addr: *mut i32, vindex: __m256i, a: __m256i, ) { @@ -17354,7 +17360,7 @@ pub unsafe fn _mm256_i32scatter_epi32( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_i32scatter_epi32( - base_addr: *mut u8, + base_addr: *mut i32, k: __mmask8, vindex: __m256i, a: __m256i, @@ -17372,7 +17378,7 @@ pub unsafe fn _mm256_mask_i32scatter_epi32( #[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm256_i32scatter_epi64( - slice: *mut u8, + slice: *mut i64, offsets: __m128i, src: __m256i, ) { @@ -17394,7 +17400,7 @@ pub unsafe fn _mm256_i32scatter_epi64( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_i32scatter_epi64( - base_addr: *mut u8, + base_addr: *mut i64, k: __mmask8, vindex: __m128i, a: __m256i, @@ -17413,7 +17419,7 @@ pub unsafe fn _mm256_mask_i32scatter_epi64( #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_i32scatter_pd( - base_addr: *mut u8, + base_addr: *mut f64, vindex: __m128i, a: __m256d, ) { @@ -17432,7 +17438,7 @@ pub unsafe fn _mm256_i32scatter_pd( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_i32scatter_pd( - base_addr: *mut u8, + base_addr: *mut f64, k: __mmask8, vindex: __m128i, a: __m256d, @@ -17451,7 +17457,7 @@ pub unsafe fn _mm256_mask_i32scatter_pd( #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_i32scatter_ps( - base_addr: *mut u8, + base_addr: *mut f32, vindex: __m256i, a: __m256, ) { @@ -17470,7 +17476,7 @@ pub unsafe fn _mm256_i32scatter_ps( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_i32scatter_ps( - base_addr: *mut u8, + base_addr: *mut f32, k: __mmask8, vindex: __m256i, a: __m256, @@ -17489,7 +17495,7 @@ pub unsafe fn _mm256_mask_i32scatter_ps( #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_i64scatter_epi32( - base_addr: *mut u8, + base_addr: *mut i32, vindex: __m256i, a: __m128i, ) { @@ -17508,7 +17514,7 @@ pub unsafe fn _mm256_i64scatter_epi32( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_i64scatter_epi32( - base_addr: *mut u8, + base_addr: *mut i32, k: __mmask8, vindex: __m256i, a: __m128i, @@ -17527,7 +17533,7 @@ pub unsafe fn _mm256_mask_i64scatter_epi32( #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_i64scatter_epi64( - base_addr: *mut u8, + base_addr: *mut i64, vindex: __m256i, a: __m256i, ) { @@ -17546,7 +17552,7 @@ pub unsafe fn _mm256_i64scatter_epi64( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_i64scatter_epi64( - base_addr: *mut u8, + base_addr: *mut i64, k: __mmask8, vindex: __m256i, a: __m256i, @@ -17565,7 +17571,7 @@ pub unsafe fn _mm256_mask_i64scatter_epi64( #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_i64scatter_pd( - base_addr: *mut u8, + base_addr: *mut f64, vindex: __m256i, a: __m256d, ) { @@ -17584,7 +17590,7 @@ pub unsafe fn _mm256_i64scatter_pd( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_i64scatter_pd( - base_addr: *mut u8, + base_addr: *mut f64, k: __mmask8, vindex: __m256i, a: __m256d, @@ -17603,7 +17609,7 @@ pub unsafe fn _mm256_mask_i64scatter_pd( #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_i64scatter_ps( - base_addr: *mut u8, + base_addr: *mut f32, vindex: __m256i, a: __m128, ) { @@ -17622,7 +17628,7 @@ pub unsafe fn _mm256_i64scatter_ps( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm256_mask_i64scatter_ps( - base_addr: *mut u8, + base_addr: *mut f32, k: __mmask8, vindex: __m256i, a: __m128, @@ -17645,7 +17651,7 @@ pub unsafe fn _mm256_mmask_i32gather_epi32( src: __m256i, k: __mmask8, vindex: __m256i, - base_addr: *const u8, + base_addr: *const i32, ) -> __m256i { static_assert_imm8_scale!(SCALE); transmute(vpgatherdd_256( @@ -17671,7 +17677,7 @@ pub unsafe fn _mm256_mmask_i32gather_epi64( src: __m256i, k: __mmask8, vindex: __m128i, - base_addr: *const u8, + base_addr: *const i64, ) -> __m256i { static_assert_imm8_scale!(SCALE); transmute(vpgatherdq_256( @@ -17697,7 +17703,7 @@ pub unsafe fn _mm256_mmask_i32gather_pd( src: __m256d, k: __mmask8, vindex: __m128i, - base_addr: *const u8, + base_addr: *const f64, ) -> __m256d { static_assert_imm8_scale!(SCALE); transmute(vgatherdpd_256( @@ -17723,7 +17729,7 @@ pub unsafe fn _mm256_mmask_i32gather_ps( src: __m256, k: __mmask8, vindex: __m256i, - base_addr: *const u8, + base_addr: *const f32, ) -> __m256 { static_assert_imm8_scale!(SCALE); transmute(vgatherdps_256( @@ -17749,7 +17755,7 @@ pub unsafe fn _mm256_mmask_i64gather_epi32( src: __m128i, k: __mmask8, vindex: __m256i, - base_addr: *const u8, + base_addr: *const i32, ) -> __m128i { static_assert_imm8_scale!(SCALE); transmute(vpgatherqd_256( @@ -17775,7 +17781,7 @@ pub unsafe fn _mm256_mmask_i64gather_epi64( src: __m256i, k: __mmask8, vindex: __m256i, - base_addr: *const u8, + base_addr: *const i64, ) -> __m256i { static_assert_imm8_scale!(SCALE); transmute(vpgatherqq_256( @@ -17801,7 +17807,7 @@ pub unsafe fn _mm256_mmask_i64gather_pd( src: __m256d, k: __mmask8, vindex: __m256i, - base_addr: *const u8, + base_addr: *const f64, ) -> __m256d { static_assert_imm8_scale!(SCALE); transmute(vgatherqpd_256( @@ -17827,7 +17833,7 @@ pub unsafe fn _mm256_mmask_i64gather_ps( src: __m128, k: __mmask8, vindex: __m256i, - base_addr: *const u8, + base_addr: *const f32, ) -> __m128 { static_assert_imm8_scale!(SCALE); transmute(vgatherqps_256( @@ -17849,7 +17855,7 @@ pub unsafe fn _mm256_mmask_i64gather_ps( #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_i32scatter_epi32( - base_addr: *mut u8, + base_addr: *mut i32, vindex: __m128i, a: __m128i, ) { @@ -17868,7 +17874,7 @@ pub unsafe fn _mm_i32scatter_epi32( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_i32scatter_epi32( - base_addr: *mut u8, + base_addr: *mut i32, k: __mmask8, vindex: __m128i, a: __m128i, @@ -17887,7 +17893,7 @@ pub unsafe fn _mm_mask_i32scatter_epi32( #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_i32scatter_epi64( - base_addr: *mut u8, + base_addr: *mut i64, vindex: __m128i, a: __m128i, ) { @@ -17906,7 +17912,7 @@ pub unsafe fn _mm_i32scatter_epi64( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_i32scatter_epi64( - base_addr: *mut u8, + base_addr: *mut i64, k: __mmask8, vindex: __m128i, a: __m128i, @@ -17924,7 +17930,11 @@ pub unsafe fn _mm_mask_i32scatter_epi64( #[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_i32scatter_pd(base_addr: *mut u8, vindex: __m128i, a: __m128d) { +pub unsafe fn _mm_i32scatter_pd( + base_addr: *mut f64, + vindex: __m128i, + a: __m128d, +) { static_assert_imm8_scale!(SCALE); vscatterdpd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x2(), SCALE) } @@ -17940,7 +17950,7 @@ pub unsafe fn _mm_i32scatter_pd(base_addr: *mut u8, vindex: __ #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_i32scatter_pd( - base_addr: *mut u8, + base_addr: *mut f64, k: __mmask8, vindex: __m128i, a: __m128d, @@ -17958,7 +17968,7 @@ pub unsafe fn _mm_mask_i32scatter_pd( #[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_i32scatter_ps(base_addr: *mut u8, vindex: __m128i, a: __m128) { +pub unsafe fn _mm_i32scatter_ps(base_addr: *mut f32, vindex: __m128i, a: __m128) { static_assert_imm8_scale!(SCALE); vscatterdps_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f32x4(), SCALE) } @@ -17974,7 +17984,7 @@ pub unsafe fn _mm_i32scatter_ps(base_addr: *mut u8, vindex: __ #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_i32scatter_ps( - base_addr: *mut u8, + base_addr: *mut f32, k: __mmask8, vindex: __m128i, a: __m128, @@ -17993,7 +18003,7 @@ pub unsafe fn _mm_mask_i32scatter_ps( #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_i64scatter_epi32( - base_addr: *mut u8, + base_addr: *mut i32, vindex: __m128i, a: __m128i, ) { @@ -18012,7 +18022,7 @@ pub unsafe fn _mm_i64scatter_epi32( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_i64scatter_epi32( - base_addr: *mut u8, + base_addr: *mut i32, k: __mmask8, vindex: __m128i, a: __m128i, @@ -18031,7 +18041,7 @@ pub unsafe fn _mm_mask_i64scatter_epi32( #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_i64scatter_epi64( - base_addr: *mut u8, + base_addr: *mut i64, vindex: __m128i, a: __m128i, ) { @@ -18050,7 +18060,7 @@ pub unsafe fn _mm_i64scatter_epi64( #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_i64scatter_epi64( - base_addr: *mut u8, + base_addr: *mut i64, k: __mmask8, vindex: __m128i, a: __m128i, @@ -18068,7 +18078,11 @@ pub unsafe fn _mm_mask_i64scatter_epi64( #[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_i64scatter_pd(base_addr: *mut u8, vindex: __m128i, a: __m128d) { +pub unsafe fn _mm_i64scatter_pd( + base_addr: *mut f64, + vindex: __m128i, + a: __m128d, +) { static_assert_imm8_scale!(SCALE); vscatterqpd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f64x2(), SCALE) } @@ -18084,7 +18098,7 @@ pub unsafe fn _mm_i64scatter_pd(base_addr: *mut u8, vindex: __ #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_i64scatter_pd( - base_addr: *mut u8, + base_addr: *mut f64, k: __mmask8, vindex: __m128i, a: __m128d, @@ -18102,7 +18116,7 @@ pub unsafe fn _mm_mask_i64scatter_pd( #[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -pub unsafe fn _mm_i64scatter_ps(base_addr: *mut u8, vindex: __m128i, a: __m128) { +pub unsafe fn _mm_i64scatter_ps(base_addr: *mut f32, vindex: __m128i, a: __m128) { static_assert_imm8_scale!(SCALE); vscatterqps_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f32x4(), SCALE) } @@ -18117,7 +18131,7 @@ pub unsafe fn _mm_i64scatter_ps(base_addr: *mut u8, vindex: __ #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm_mask_i64scatter_ps( - base_addr: *mut u8, + base_addr: *mut f32, k: __mmask8, vindex: __m128i, a: __m128, @@ -18140,7 +18154,7 @@ pub unsafe fn _mm_mmask_i32gather_epi32( src: __m128i, k: __mmask8, vindex: __m128i, - base_addr: *const u8, + base_addr: *const i32, ) -> __m128i { static_assert_imm8_scale!(SCALE); transmute(vpgatherdd_128( @@ -18166,7 +18180,7 @@ pub unsafe fn _mm_mmask_i32gather_epi64( src: __m128i, k: __mmask8, vindex: __m128i, - base_addr: *const u8, + base_addr: *const i64, ) -> __m128i { static_assert_imm8_scale!(SCALE); transmute(vpgatherdq_128( @@ -18192,7 +18206,7 @@ pub unsafe fn _mm_mmask_i32gather_pd( src: __m128d, k: __mmask8, vindex: __m128i, - base_addr: *const u8, + base_addr: *const f64, ) -> __m128d { static_assert_imm8_scale!(SCALE); transmute(vgatherdpd_128( @@ -18218,7 +18232,7 @@ pub unsafe fn _mm_mmask_i32gather_ps( src: __m128, k: __mmask8, vindex: __m128i, - base_addr: *const u8, + base_addr: *const f32, ) -> __m128 { static_assert_imm8_scale!(SCALE); transmute(vgatherdps_128( @@ -18244,7 +18258,7 @@ pub unsafe fn _mm_mmask_i64gather_epi32( src: __m128i, k: __mmask8, vindex: __m128i, - base_addr: *const u8, + base_addr: *const i32, ) -> __m128i { static_assert_imm8_scale!(SCALE); transmute(vpgatherqd_128( @@ -18270,7 +18284,7 @@ pub unsafe fn _mm_mmask_i64gather_epi64( src: __m128i, k: __mmask8, vindex: __m128i, - base_addr: *const u8, + base_addr: *const i64, ) -> __m128i { static_assert_imm8_scale!(SCALE); transmute(vpgatherqq_128( @@ -18296,7 +18310,7 @@ pub unsafe fn _mm_mmask_i64gather_pd( src: __m128d, k: __mmask8, vindex: __m128i, - base_addr: *const u8, + base_addr: *const f64, ) -> __m128d { static_assert_imm8_scale!(SCALE); transmute(vgatherqpd_128( @@ -18322,7 +18336,7 @@ pub unsafe fn _mm_mmask_i64gather_ps( src: __m128, k: __mmask8, vindex: __m128i, - base_addr: *const u8, + base_addr: *const f32, ) -> __m128 { static_assert_imm8_scale!(SCALE); transmute(vgatherqps_128( @@ -49637,7 +49651,7 @@ mod tests { #[rustfmt::skip] let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, 120, 128, 136, 144, 152, 160, 168, 176); - let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr() as *const u8); + let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr()); #[rustfmt::skip] assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112., 120., 128., 136., 144., 152., 160., 168., 176.)); @@ -49652,7 +49666,7 @@ mod tests { let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, 120, 128, 136, 144, 152, 160, 168, 176); // A multiplier of 4 is word-addressing - let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr() as *const u8); + let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr()); #[rustfmt::skip] assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112., 2., 128., 2., 144., 2., 160., 2., 176.)); @@ -49665,7 +49679,7 @@ mod tests { #[rustfmt::skip] let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, 120, 128, 136, 144, 152, 160, 168, 176); - let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr() as *const u8); + let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr()); #[rustfmt::skip] assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112, 120, 128, 136, 144, 152, 160, 168, 176)); @@ -49680,7 +49694,7 @@ mod tests { 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, ); // A multiplier of 4 is word-addressing - let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr() as *const u8); + let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr()); assert_eq_m512i( r, _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240), @@ -49697,7 +49711,7 @@ mod tests { 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., ); // A multiplier of 4 is word-addressing - _mm512_i32scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, index, src); + _mm512_i32scatter_ps::<4>(arr.as_mut_ptr(), index, src); let mut expected = [0f32; 256]; for i in 0..16 { expected[i * 16] = (i + 1) as f32; @@ -49716,7 +49730,7 @@ mod tests { 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., ); // A multiplier of 4 is word-addressing - _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src); + _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src); let mut expected = [0f32; 256]; for i in 0..8 { expected[i * 32 + 16] = 2. * (i + 1) as f32; @@ -49733,7 +49747,7 @@ mod tests { 128, 144, 160, 176, 192, 208, 224, 240); let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); // A multiplier of 4 is word-addressing - _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, index, src); + _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr(), index, src); let mut expected = [0i32; 256]; for i in 0..16 { expected[i * 16] = (i + 1) as i32; @@ -49750,7 +49764,7 @@ mod tests { 128, 144, 160, 176, 192, 208, 224, 240); let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); // A multiplier of 4 is word-addressing - _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src); + _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src); let mut expected = [0i32; 256]; for i in 0..8 { expected[i * 32 + 16] = 2 * (i + 1) as i32; diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index d0d73488c1..c6f84fd965 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -7441,7 +7441,7 @@ mod tests { let arr: [f64; 128] = core::array::from_fn(|i| i as f64); // A multiplier of 8 is word-addressing let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); - let r = _mm512_i32gather_pd::<8>(index, arr.as_ptr() as *const u8); + let r = _mm512_i32gather_pd::<8>(index, arr.as_ptr()); assert_eq_m512d(r, _mm512_setr_pd(0., 16., 32., 48., 64., 80., 96., 112.)); } @@ -7452,7 +7452,7 @@ mod tests { let mask = 0b10101010; let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); // A multiplier of 8 is word-addressing - let r = _mm512_mask_i32gather_pd::<8>(src, mask, index, arr.as_ptr() as *const u8); + let r = _mm512_mask_i32gather_pd::<8>(src, mask, index, arr.as_ptr()); assert_eq_m512d(r, _mm512_setr_pd(2., 16., 2., 48., 2., 80., 2., 112.)); } @@ -7461,7 +7461,7 @@ mod tests { let arr: [f64; 128] = core::array::from_fn(|i| i as f64); // A multiplier of 8 is word-addressing let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); - let r = _mm512_i64gather_pd::<8>(index, arr.as_ptr() as *const u8); + let r = _mm512_i64gather_pd::<8>(index, arr.as_ptr()); assert_eq_m512d(r, _mm512_setr_pd(0., 16., 32., 48., 64., 80., 96., 112.)); } @@ -7472,7 +7472,7 @@ mod tests { let mask = 0b10101010; let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); // A multiplier of 8 is word-addressing - let r = _mm512_mask_i64gather_pd::<8>(src, mask, index, arr.as_ptr() as *const u8); + let r = _mm512_mask_i64gather_pd::<8>(src, mask, index, arr.as_ptr()); assert_eq_m512d(r, _mm512_setr_pd(2., 16., 2., 48., 2., 80., 2., 112.)); } @@ -7482,7 +7482,7 @@ mod tests { // A multiplier of 4 is word-addressing #[rustfmt::skip] let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); - let r = _mm512_i64gather_ps::<4>(index, arr.as_ptr() as *const u8); + let r = _mm512_i64gather_ps::<4>(index, arr.as_ptr()); assert_eq_m256(r, _mm256_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.)); } @@ -7494,7 +7494,7 @@ mod tests { #[rustfmt::skip] let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); // A multiplier of 4 is word-addressing - let r = _mm512_mask_i64gather_ps::<4>(src, mask, index, arr.as_ptr() as *const u8); + let r = _mm512_mask_i64gather_ps::<4>(src, mask, index, arr.as_ptr()); assert_eq_m256(r, _mm256_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.)); } @@ -7506,7 +7506,7 @@ mod tests { } // A multiplier of 8 is word-addressing let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); - let r = _mm512_i32gather_epi64::<8>(index, arr.as_ptr() as *const u8); + let r = _mm512_i32gather_epi64::<8>(index, arr.as_ptr()); assert_eq_m512i(r, _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112)); } @@ -7520,7 +7520,7 @@ mod tests { let mask = 0b10101010; let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); // A multiplier of 8 is word-addressing - let r = _mm512_mask_i32gather_epi64::<8>(src, mask, index, arr.as_ptr() as *const u8); + let r = _mm512_mask_i32gather_epi64::<8>(src, mask, index, arr.as_ptr()); assert_eq_m512i(r, _mm512_setr_epi64(2, 16, 2, 48, 2, 80, 2, 112)); } @@ -7532,7 +7532,7 @@ mod tests { } // A multiplier of 8 is word-addressing let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); - let r = _mm512_i64gather_epi64::<8>(index, arr.as_ptr() as *const u8); + let r = _mm512_i64gather_epi64::<8>(index, arr.as_ptr()); assert_eq_m512i(r, _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112)); } @@ -7546,7 +7546,7 @@ mod tests { let mask = 0b10101010; let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); // A multiplier of 8 is word-addressing - let r = _mm512_mask_i64gather_epi64::<8>(src, mask, index, arr.as_ptr() as *const u8); + let r = _mm512_mask_i64gather_epi64::<8>(src, mask, index, arr.as_ptr()); assert_eq_m512i(r, _mm512_setr_epi64(2, 16, 2, 48, 2, 80, 2, 112)); } @@ -7558,7 +7558,7 @@ mod tests { } // A multiplier of 8 is word-addressing let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); - let r = _mm512_i64gather_epi32::<8>(index, arr.as_ptr() as *const u8); + let r = _mm512_i64gather_epi32::<8>(index, arr.as_ptr() as *const i32); assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112)); } @@ -7572,7 +7572,7 @@ mod tests { let mask = 0b10101010; let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); // A multiplier of 8 is word-addressing - let r = _mm512_mask_i64gather_epi32::<8>(src, mask, index, arr.as_ptr() as *const u8); + let r = _mm512_mask_i64gather_epi32::<8>(src, mask, index, arr.as_ptr() as *const i32); assert_eq_m256i(r, _mm256_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112)); } @@ -7582,7 +7582,7 @@ mod tests { let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); // A multiplier of 8 is word-addressing - _mm512_i32scatter_pd::<8>(arr.as_mut_ptr() as *mut u8, index, src); + _mm512_i32scatter_pd::<8>(arr.as_mut_ptr(), index, src); let mut expected = [0f64; 128]; for i in 0..8 { expected[i * 16] = (i + 1) as f64; @@ -7597,7 +7597,7 @@ mod tests { let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); // A multiplier of 8 is word-addressing - _mm512_mask_i32scatter_pd::<8>(arr.as_mut_ptr() as *mut u8, mask, index, src); + _mm512_mask_i32scatter_pd::<8>(arr.as_mut_ptr(), mask, index, src); let mut expected = [0f64; 128]; for i in 0..4 { expected[i * 32 + 16] = 2. * (i + 1) as f64; @@ -7611,7 +7611,7 @@ mod tests { let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); // A multiplier of 8 is word-addressing - _mm512_i64scatter_pd::<8>(arr.as_mut_ptr() as *mut u8, index, src); + _mm512_i64scatter_pd::<8>(arr.as_mut_ptr(), index, src); let mut expected = [0f64; 128]; for i in 0..8 { expected[i * 16] = (i + 1) as f64; @@ -7626,7 +7626,7 @@ mod tests { let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); // A multiplier of 8 is word-addressing - _mm512_mask_i64scatter_pd::<8>(arr.as_mut_ptr() as *mut u8, mask, index, src); + _mm512_mask_i64scatter_pd::<8>(arr.as_mut_ptr(), mask, index, src); let mut expected = [0f64; 128]; for i in 0..4 { expected[i * 32 + 16] = 2. * (i + 1) as f64; @@ -7640,7 +7640,7 @@ mod tests { let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); let src = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); // A multiplier of 4 is word-addressing - _mm512_i64scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, index, src); + _mm512_i64scatter_ps::<4>(arr.as_mut_ptr(), index, src); let mut expected = [0f32; 128]; for i in 0..8 { expected[i * 16] = (i + 1) as f32; @@ -7655,7 +7655,7 @@ mod tests { let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); let src = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); // A multiplier of 4 is word-addressing - _mm512_mask_i64scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src); + _mm512_mask_i64scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src); let mut expected = [0f32; 128]; for i in 0..4 { expected[i * 32 + 16] = 2. * (i + 1) as f32; @@ -7669,7 +7669,7 @@ mod tests { let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); // A multiplier of 8 is word-addressing - _mm512_i32scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, index, src); + _mm512_i32scatter_epi64::<8>(arr.as_mut_ptr(), index, src); let mut expected = [0i64; 128]; for i in 0..8 { expected[i * 16] = (i + 1) as i64; @@ -7684,7 +7684,7 @@ mod tests { let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); // A multiplier of 8 is word-addressing - _mm512_mask_i32scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, mask, index, src); + _mm512_mask_i32scatter_epi64::<8>(arr.as_mut_ptr(), mask, index, src); let mut expected = [0i64; 128]; for i in 0..4 { expected[i * 32 + 16] = 2 * (i + 1) as i64; @@ -7698,7 +7698,7 @@ mod tests { let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); // A multiplier of 8 is word-addressing - _mm512_i64scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, index, src); + _mm512_i64scatter_epi64::<8>(arr.as_mut_ptr(), index, src); let mut expected = [0i64; 128]; for i in 0..8 { expected[i * 16] = (i + 1) as i64; @@ -7713,7 +7713,7 @@ mod tests { let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); // A multiplier of 8 is word-addressing - _mm512_mask_i64scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, mask, index, src); + _mm512_mask_i64scatter_epi64::<8>(arr.as_mut_ptr(), mask, index, src); let mut expected = [0i64; 128]; for i in 0..4 { expected[i * 32 + 16] = 2 * (i + 1) as i64; @@ -7727,7 +7727,7 @@ mod tests { let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); let src = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); // A multiplier of 4 is word-addressing - _mm512_i64scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, index, src); + _mm512_i64scatter_epi32::<4>(arr.as_mut_ptr(), index, src); let mut expected = [0i32; 128]; for i in 0..8 { expected[i * 16] = (i + 1) as i32; @@ -7742,7 +7742,7 @@ mod tests { let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); let src = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); // A multiplier of 4 is word-addressing - _mm512_mask_i64scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src); + _mm512_mask_i64scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src); let mut expected = [0i32; 128]; for i in 0..4 { expected[i * 32 + 16] = 2 * (i + 1) as i32; @@ -7754,7 +7754,7 @@ mod tests { unsafe fn test_mm512_i32logather_epi64() { let base_addr: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); - let r = _mm512_i32logather_epi64::<8>(vindex, base_addr.as_ptr().cast()); + let r = _mm512_i32logather_epi64::<8>(vindex, base_addr.as_ptr()); let expected = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1); assert_eq_m512i(expected, r); } @@ -7764,8 +7764,7 @@ mod tests { let base_addr: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; let src = _mm512_setr_epi64(9, 10, 11, 12, 13, 14, 15, 16); let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); - let r = - _mm512_mask_i32logather_epi64::<8>(src, 0b01010101, vindex, base_addr.as_ptr().cast()); + let r = _mm512_mask_i32logather_epi64::<8>(src, 0b01010101, vindex, base_addr.as_ptr()); let expected = _mm512_setr_epi64(2, 10, 4, 12, 6, 14, 8, 16); assert_eq_m512i(expected, r); } @@ -7774,7 +7773,7 @@ mod tests { unsafe fn test_mm512_i32logather_pd() { let base_addr: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.]; let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); - let r = _mm512_i32logather_pd::<8>(vindex, base_addr.as_ptr().cast()); + let r = _mm512_i32logather_pd::<8>(vindex, base_addr.as_ptr()); let expected = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.); assert_eq_m512d(expected, r); } @@ -7784,7 +7783,7 @@ mod tests { let base_addr: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.]; let src = _mm512_setr_pd(9., 10., 11., 12., 13., 14., 15., 16.); let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); - let r = _mm512_mask_i32logather_pd::<8>(src, 0b01010101, vindex, base_addr.as_ptr().cast()); + let r = _mm512_mask_i32logather_pd::<8>(src, 0b01010101, vindex, base_addr.as_ptr()); let expected = _mm512_setr_pd(2., 10., 4., 12., 6., 14., 8., 16.); assert_eq_m512d(expected, r); } @@ -7794,7 +7793,7 @@ mod tests { let mut base_addr: [i64; 8] = [0; 8]; let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); let src = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1); - _mm512_i32loscatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm512_i32loscatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src); let expected = [1, 2, 3, 4, 5, 6, 7, 8]; assert_eq!(expected, base_addr); } @@ -7804,7 +7803,7 @@ mod tests { let mut base_addr: [i64; 8] = [0; 8]; let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); let src = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1); - _mm512_mask_i32loscatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b01010101, vindex, src); + _mm512_mask_i32loscatter_epi64::<8>(base_addr.as_mut_ptr(), 0b01010101, vindex, src); let expected = [0, 2, 0, 4, 0, 6, 0, 8]; assert_eq!(expected, base_addr); } @@ -7814,7 +7813,7 @@ mod tests { let mut base_addr: [f64; 8] = [0.; 8]; let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); let src = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.); - _mm512_i32loscatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm512_i32loscatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src); let expected = [1., 2., 3., 4., 5., 6., 7., 8.]; assert_eq!(expected, base_addr); } @@ -7824,7 +7823,7 @@ mod tests { let mut base_addr: [f64; 8] = [0.; 8]; let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1); let src = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.); - _mm512_mask_i32loscatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b01010101, vindex, src); + _mm512_mask_i32loscatter_pd::<8>(base_addr.as_mut_ptr(), 0b01010101, vindex, src); let expected = [0., 2., 0., 4., 0., 6., 0., 8.]; assert_eq!(expected, base_addr); } @@ -7834,7 +7833,7 @@ mod tests { let base_addr: [i32; 4] = [1, 2, 3, 4]; let src = _mm_setr_epi32(5, 6, 7, 8); let vindex = _mm_setr_epi32(1, 2, 3, 0); - let r = _mm_mmask_i32gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr().cast()); + let r = _mm_mmask_i32gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr()); let expected = _mm_setr_epi32(2, 6, 4, 8); assert_eq_m128i(expected, r); } @@ -7844,7 +7843,7 @@ mod tests { let base_addr: [i64; 2] = [1, 2]; let src = _mm_setr_epi64x(5, 6); let vindex = _mm_setr_epi32(1, 0, -1, -1); - let r = _mm_mmask_i32gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr().cast()); + let r = _mm_mmask_i32gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr()); let expected = _mm_setr_epi64x(2, 6); assert_eq_m128i(expected, r); } @@ -7854,7 +7853,7 @@ mod tests { let base_addr: [f64; 2] = [1., 2.]; let src = _mm_setr_pd(5., 6.); let vindex = _mm_setr_epi32(1, 0, -1, -1); - let r = _mm_mmask_i32gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr().cast()); + let r = _mm_mmask_i32gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr()); let expected = _mm_setr_pd(2., 6.); assert_eq_m128d(expected, r); } @@ -7864,7 +7863,7 @@ mod tests { let base_addr: [f32; 4] = [1., 2., 3., 4.]; let src = _mm_setr_ps(5., 6., 7., 8.); let vindex = _mm_setr_epi32(1, 2, 3, 0); - let r = _mm_mmask_i32gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr().cast()); + let r = _mm_mmask_i32gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr()); let expected = _mm_setr_ps(2., 6., 4., 8.); assert_eq_m128(expected, r); } @@ -7874,7 +7873,7 @@ mod tests { let base_addr: [i32; 2] = [1, 2]; let src = _mm_setr_epi32(5, 6, 7, 8); let vindex = _mm_setr_epi64x(1, 0); - let r = _mm_mmask_i64gather_epi32::<4>(src, 0b01, vindex, base_addr.as_ptr().cast()); + let r = _mm_mmask_i64gather_epi32::<4>(src, 0b01, vindex, base_addr.as_ptr()); let expected = _mm_setr_epi32(2, 6, 0, 0); assert_eq_m128i(expected, r); } @@ -7884,7 +7883,7 @@ mod tests { let base_addr: [i64; 2] = [1, 2]; let src = _mm_setr_epi64x(5, 6); let vindex = _mm_setr_epi64x(1, 0); - let r = _mm_mmask_i64gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr().cast()); + let r = _mm_mmask_i64gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr()); let expected = _mm_setr_epi64x(2, 6); assert_eq_m128i(expected, r); } @@ -7894,7 +7893,7 @@ mod tests { let base_addr: [f64; 2] = [1., 2.]; let src = _mm_setr_pd(5., 6.); let vindex = _mm_setr_epi64x(1, 0); - let r = _mm_mmask_i64gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr().cast()); + let r = _mm_mmask_i64gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr()); let expected = _mm_setr_pd(2., 6.); assert_eq_m128d(expected, r); } @@ -7904,7 +7903,7 @@ mod tests { let base_addr: [f32; 2] = [1., 2.]; let src = _mm_setr_ps(5., 6., 7., 8.); let vindex = _mm_setr_epi64x(1, 0); - let r = _mm_mmask_i64gather_ps::<4>(src, 0b01, vindex, base_addr.as_ptr().cast()); + let r = _mm_mmask_i64gather_ps::<4>(src, 0b01, vindex, base_addr.as_ptr()); let expected = _mm_setr_ps(2., 6., 0., 0.); assert_eq_m128(expected, r); } @@ -7914,8 +7913,7 @@ mod tests { let base_addr: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; let src = _mm256_setr_epi32(9, 10, 11, 12, 13, 14, 15, 16); let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); - let r = - _mm256_mmask_i32gather_epi32::<4>(src, 0b01010101, vindex, base_addr.as_ptr().cast()); + let r = _mm256_mmask_i32gather_epi32::<4>(src, 0b01010101, vindex, base_addr.as_ptr()); let expected = _mm256_setr_epi32(2, 10, 4, 12, 6, 14, 8, 16); assert_eq_m256i(expected, r); } @@ -7925,7 +7923,7 @@ mod tests { let base_addr: [i64; 4] = [1, 2, 3, 4]; let src = _mm256_setr_epi64x(9, 10, 11, 12); let vindex = _mm_setr_epi32(1, 2, 3, 4); - let r = _mm256_mmask_i32gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr().cast()); + let r = _mm256_mmask_i32gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr()); let expected = _mm256_setr_epi64x(2, 10, 4, 12); assert_eq_m256i(expected, r); } @@ -7935,7 +7933,7 @@ mod tests { let base_addr: [f64; 4] = [1., 2., 3., 4.]; let src = _mm256_setr_pd(9., 10., 11., 12.); let vindex = _mm_setr_epi32(1, 2, 3, 4); - let r = _mm256_mmask_i32gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr().cast()); + let r = _mm256_mmask_i32gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr()); let expected = _mm256_setr_pd(2., 10., 4., 12.); assert_eq_m256d(expected, r); } @@ -7945,7 +7943,7 @@ mod tests { let base_addr: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.]; let src = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); - let r = _mm256_mmask_i32gather_ps::<4>(src, 0b01010101, vindex, base_addr.as_ptr().cast()); + let r = _mm256_mmask_i32gather_ps::<4>(src, 0b01010101, vindex, base_addr.as_ptr()); let expected = _mm256_setr_ps(2., 10., 4., 12., 6., 14., 8., 16.); assert_eq_m256(expected, r); } @@ -7955,7 +7953,7 @@ mod tests { let base_addr: [i32; 4] = [1, 2, 3, 4]; let src = _mm_setr_epi32(9, 10, 11, 12); let vindex = _mm256_setr_epi64x(1, 2, 3, 0); - let r = _mm256_mmask_i64gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr().cast()); + let r = _mm256_mmask_i64gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr()); let expected = _mm_setr_epi32(2, 10, 4, 12); assert_eq_m128i(expected, r); } @@ -7965,7 +7963,7 @@ mod tests { let base_addr: [i64; 4] = [1, 2, 3, 4]; let src = _mm256_setr_epi64x(9, 10, 11, 12); let vindex = _mm256_setr_epi64x(1, 2, 3, 0); - let r = _mm256_mmask_i64gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr().cast()); + let r = _mm256_mmask_i64gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr()); let expected = _mm256_setr_epi64x(2, 10, 4, 12); assert_eq_m256i(expected, r); } @@ -7975,7 +7973,7 @@ mod tests { let base_addr: [f64; 4] = [1., 2., 3., 4.]; let src = _mm256_setr_pd(9., 10., 11., 12.); let vindex = _mm256_setr_epi64x(1, 2, 3, 0); - let r = _mm256_mmask_i64gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr().cast()); + let r = _mm256_mmask_i64gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr()); let expected = _mm256_setr_pd(2., 10., 4., 12.); assert_eq_m256d(expected, r); } @@ -7985,7 +7983,7 @@ mod tests { let base_addr: [f32; 4] = [1., 2., 3., 4.]; let src = _mm_setr_ps(9., 10., 11., 12.); let vindex = _mm256_setr_epi64x(1, 2, 3, 0); - let r = _mm256_mmask_i64gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr().cast()); + let r = _mm256_mmask_i64gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr()); let expected = _mm_setr_ps(2., 10., 4., 12.); assert_eq_m128(expected, r); } @@ -7995,7 +7993,7 @@ mod tests { let mut base_addr: [i32; 4] = [0; 4]; let vindex = _mm_setr_epi32(1, 2, 3, 0); let src = _mm_setr_epi32(2, 3, 4, 1); - _mm_i32scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src); let expected = [1, 2, 3, 4]; assert_eq!(expected, base_addr); } @@ -8005,7 +8003,7 @@ mod tests { let mut base_addr: [i32; 4] = [0; 4]; let vindex = _mm_setr_epi32(1, 2, 3, 0); let src = _mm_setr_epi32(2, 3, 4, 1); - _mm_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src); + _mm_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src); let expected = [0, 2, 0, 4]; assert_eq!(expected, base_addr); } @@ -8015,7 +8013,7 @@ mod tests { let mut base_addr: [i64; 2] = [0; 2]; let vindex = _mm_setr_epi32(1, 0, -1, -1); let src = _mm_setr_epi64x(2, 1); - _mm_i32scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src); let expected = [1, 2]; assert_eq!(expected, base_addr); } @@ -8025,7 +8023,7 @@ mod tests { let mut base_addr: [i64; 2] = [0; 2]; let vindex = _mm_setr_epi32(1, 0, -1, -1); let src = _mm_setr_epi64x(2, 1); - _mm_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src); + _mm_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src); let expected = [0, 2]; assert_eq!(expected, base_addr); } @@ -8035,7 +8033,7 @@ mod tests { let mut base_addr: [f64; 2] = [0.; 2]; let vindex = _mm_setr_epi32(1, 0, -1, -1); let src = _mm_setr_pd(2., 1.); - _mm_i32scatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm_i32scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src); let expected = [1., 2.]; assert_eq!(expected, base_addr); } @@ -8045,7 +8043,7 @@ mod tests { let mut base_addr: [f64; 2] = [0.; 2]; let vindex = _mm_setr_epi32(1, 0, -1, -1); let src = _mm_setr_pd(2., 1.); - _mm_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src); + _mm_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src); let expected = [0., 2.]; assert_eq!(expected, base_addr); } @@ -8055,7 +8053,7 @@ mod tests { let mut base_addr: [f32; 4] = [0.; 4]; let vindex = _mm_setr_epi32(1, 2, 3, 0); let src = _mm_setr_ps(2., 3., 4., 1.); - _mm_i32scatter_ps::<4>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm_i32scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src); let expected = [1., 2., 3., 4.]; assert_eq!(expected, base_addr); } @@ -8065,7 +8063,7 @@ mod tests { let mut base_addr: [f32; 4] = [0.; 4]; let vindex = _mm_setr_epi32(1, 2, 3, 0); let src = _mm_setr_ps(2., 3., 4., 1.); - _mm_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src); + _mm_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src); let expected = [0., 2., 0., 4.]; assert_eq!(expected, base_addr); } @@ -8075,7 +8073,7 @@ mod tests { let mut base_addr: [i32; 2] = [0; 2]; let vindex = _mm_setr_epi64x(1, 0); let src = _mm_setr_epi32(2, 1, -1, -1); - _mm_i64scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src); let expected = [1, 2]; assert_eq!(expected, base_addr); } @@ -8085,7 +8083,7 @@ mod tests { let mut base_addr: [i32; 2] = [0; 2]; let vindex = _mm_setr_epi64x(1, 0); let src = _mm_setr_epi32(2, 1, -1, -1); - _mm_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src); + _mm_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b01, vindex, src); let expected = [0, 2]; assert_eq!(expected, base_addr); } @@ -8095,7 +8093,7 @@ mod tests { let mut base_addr: [i64; 2] = [0; 2]; let vindex = _mm_setr_epi64x(1, 0); let src = _mm_setr_epi64x(2, 1); - _mm_i64scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src); let expected = [1, 2]; assert_eq!(expected, base_addr); } @@ -8105,7 +8103,7 @@ mod tests { let mut base_addr: [i64; 2] = [0; 2]; let vindex = _mm_setr_epi64x(1, 0); let src = _mm_setr_epi64x(2, 1); - _mm_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src); + _mm_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src); let expected = [0, 2]; assert_eq!(expected, base_addr); } @@ -8115,7 +8113,7 @@ mod tests { let mut base_addr: [f64; 2] = [0.; 2]; let vindex = _mm_setr_epi64x(1, 0); let src = _mm_setr_pd(2., 1.); - _mm_i64scatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm_i64scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src); let expected = [1., 2.]; assert_eq!(expected, base_addr); } @@ -8125,7 +8123,7 @@ mod tests { let mut base_addr: [f64; 2] = [0.; 2]; let vindex = _mm_setr_epi64x(1, 0); let src = _mm_setr_pd(2., 1.); - _mm_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src); + _mm_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src); let expected = [0., 2.]; assert_eq!(expected, base_addr); } @@ -8135,7 +8133,7 @@ mod tests { let mut base_addr: [f32; 2] = [0.; 2]; let vindex = _mm_setr_epi64x(1, 0); let src = _mm_setr_ps(2., 1., -1., -1.); - _mm_i64scatter_ps::<4>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm_i64scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src); let expected = [1., 2.]; assert_eq!(expected, base_addr); } @@ -8145,7 +8143,7 @@ mod tests { let mut base_addr: [f32; 2] = [0.; 2]; let vindex = _mm_setr_epi64x(1, 0); let src = _mm_setr_ps(2., 1., -1., -1.); - _mm_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src); + _mm_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr(), 0b01, vindex, src); let expected = [0., 2.]; assert_eq!(expected, base_addr); } @@ -8155,7 +8153,7 @@ mod tests { let mut base_addr: [i32; 8] = [0; 8]; let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); let src = _mm256_setr_epi32(2, 3, 4, 5, 6, 7, 8, 1); - _mm256_i32scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm256_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src); let expected = [1, 2, 3, 4, 5, 6, 7, 8]; assert_eq!(expected, base_addr); } @@ -8165,7 +8163,7 @@ mod tests { let mut base_addr: [i32; 8] = [0; 8]; let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); let src = _mm256_setr_epi32(2, 3, 4, 5, 6, 7, 8, 1); - _mm256_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), 0b01010101, vindex, src); + _mm256_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b01010101, vindex, src); let expected = [0, 2, 0, 4, 0, 6, 0, 8]; assert_eq!(expected, base_addr); } @@ -8175,7 +8173,7 @@ mod tests { let mut base_addr: [i64; 4] = [0; 4]; let vindex = _mm_setr_epi32(1, 2, 3, 0); let src = _mm256_setr_epi64x(2, 3, 4, 1); - _mm256_i32scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm256_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src); let expected = [1, 2, 3, 4]; assert_eq!(expected, base_addr); } @@ -8185,7 +8183,7 @@ mod tests { let mut base_addr: [i64; 4] = [0; 4]; let vindex = _mm_setr_epi32(1, 2, 3, 0); let src = _mm256_setr_epi64x(2, 3, 4, 1); - _mm256_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src); + _mm256_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src); let expected = [0, 2, 0, 4]; assert_eq!(expected, base_addr); } @@ -8195,7 +8193,7 @@ mod tests { let mut base_addr: [f64; 4] = [0.; 4]; let vindex = _mm_setr_epi32(1, 2, 3, 0); let src = _mm256_setr_pd(2., 3., 4., 1.); - _mm256_i32scatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm256_i32scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src); let expected = [1., 2., 3., 4.]; assert_eq!(expected, base_addr); } @@ -8205,7 +8203,7 @@ mod tests { let mut base_addr: [f64; 4] = [0.; 4]; let vindex = _mm_setr_epi32(1, 2, 3, 0); let src = _mm256_setr_pd(2., 3., 4., 1.); - _mm256_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src); + _mm256_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src); let expected = [0., 2., 0., 4.]; assert_eq!(expected, base_addr); } @@ -8215,7 +8213,7 @@ mod tests { let mut base_addr: [f32; 8] = [0.; 8]; let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); let src = _mm256_setr_ps(2., 3., 4., 5., 6., 7., 8., 1.); - _mm256_i32scatter_ps::<4>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm256_i32scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src); let expected = [1., 2., 3., 4., 5., 6., 7., 8.]; assert_eq!(expected, base_addr); } @@ -8225,7 +8223,7 @@ mod tests { let mut base_addr: [f32; 8] = [0.; 8]; let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); let src = _mm256_setr_ps(2., 3., 4., 5., 6., 7., 8., 1.); - _mm256_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr().cast(), 0b01010101, vindex, src); + _mm256_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr(), 0b01010101, vindex, src); let expected = [0., 2., 0., 4., 0., 6., 0., 8.]; assert_eq!(expected, base_addr); } @@ -8235,7 +8233,7 @@ mod tests { let mut base_addr: [i32; 4] = [0; 4]; let vindex = _mm256_setr_epi64x(1, 2, 3, 0); let src = _mm_setr_epi32(2, 3, 4, 1); - _mm256_i64scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm256_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src); let expected = [1, 2, 3, 4]; assert_eq!(expected, base_addr); } @@ -8245,7 +8243,7 @@ mod tests { let mut base_addr: [i32; 4] = [0; 4]; let vindex = _mm256_setr_epi64x(1, 2, 3, 0); let src = _mm_setr_epi32(2, 3, 4, 1); - _mm256_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src); + _mm256_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src); let expected = [0, 2, 0, 4]; assert_eq!(expected, base_addr); } @@ -8255,7 +8253,7 @@ mod tests { let mut base_addr: [i64; 4] = [0; 4]; let vindex = _mm256_setr_epi64x(1, 2, 3, 0); let src = _mm256_setr_epi64x(2, 3, 4, 1); - _mm256_i64scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm256_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src); let expected = [1, 2, 3, 4]; assert_eq!(expected, base_addr); } @@ -8265,7 +8263,7 @@ mod tests { let mut base_addr: [i64; 4] = [0; 4]; let vindex = _mm256_setr_epi64x(1, 2, 3, 0); let src = _mm256_setr_epi64x(2, 3, 4, 1); - _mm256_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src); + _mm256_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src); let expected = [0, 2, 0, 4]; assert_eq!(expected, base_addr); } @@ -8275,7 +8273,7 @@ mod tests { let mut base_addr: [f64; 4] = [0.; 4]; let vindex = _mm256_setr_epi64x(1, 2, 3, 0); let src = _mm256_setr_pd(2., 3., 4., 1.); - _mm256_i64scatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm256_i64scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src); let expected = [1., 2., 3., 4.]; assert_eq!(expected, base_addr); } @@ -8285,7 +8283,7 @@ mod tests { let mut base_addr: [f64; 4] = [0.; 4]; let vindex = _mm256_setr_epi64x(1, 2, 3, 0); let src = _mm256_setr_pd(2., 3., 4., 1.); - _mm256_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src); + _mm256_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src); let expected = [0., 2., 0., 4.]; assert_eq!(expected, base_addr); } @@ -8295,7 +8293,7 @@ mod tests { let mut base_addr: [f32; 4] = [0.; 4]; let vindex = _mm256_setr_epi64x(1, 2, 3, 0); let src = _mm_setr_ps(2., 3., 4., 1.); - _mm256_i64scatter_ps::<4>(base_addr.as_mut_ptr().cast(), vindex, src); + _mm256_i64scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src); let expected = [1., 2., 3., 4.]; assert_eq!(expected, base_addr); } @@ -8305,7 +8303,7 @@ mod tests { let mut base_addr: [f32; 4] = [0.; 4]; let vindex = _mm256_setr_epi64x(1, 2, 3, 0); let src = _mm_setr_ps(2., 3., 4., 1.); - _mm256_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src); + _mm256_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src); let expected = [0., 2., 0., 4.]; assert_eq!(expected, base_addr); }