Skip to content

Commit 7e2cdc6

Browse files
asomersAmanieu
authored andcommitted
Implement _mm256_i32scatter_epi64 from AVX512VL
1 parent 461aad2 commit 7e2cdc6

File tree

3 files changed

+37
-1
lines changed

3 files changed

+37
-1
lines changed

crates/core_arch/avx512f.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1519,7 +1519,7 @@
15191519
* [x] [`_mm512_mask_i32scatter_epi64`]
15201520
* [_] [`_mm_i32scatter_epi64`]//need i1
15211521
* [_] [`_mm_mask_i32scatter_epi64`] //need i1
1522-
* [_] [`_mm256_i32scatter_epi64`] //need i1
1522+
* [x] [`_mm256_i32scatter_epi64`]
15231523
* [_] [`_mm256_mask_i32scatter_epi64`] //need i1
15241524
* [x] [`_mm512_i32scatter_ps`]
15251525
* [x] [`_mm512_mask_i32scatter_ps`]

crates/core_arch/src/x86/avx512f.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15757,6 +15757,26 @@ pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
1575715757
vpscatterdq(slice, mask, offsets, src, SCALE);
1575815758
}
1575915759

15760+
/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
15761+
///
15762+
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
15763+
#[inline]
15764+
#[target_feature(enable = "avx512f,avx512vl")]
15765+
#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
15766+
#[rustc_legacy_const_generics(3)]
15767+
pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
15768+
slice: *mut u8,
15769+
offsets: __m128i,
15770+
src: __m256i,
15771+
) {
15772+
static_assert_imm8_scale!(SCALE);
15773+
let src = src.as_i64x4();
15774+
let neg_one = -1;
15775+
let slice = slice as *mut i8;
15776+
let offsets = offsets.as_i32x4();
15777+
vpscatterdq256(slice, neg_one, offsets, src, SCALE);
15778+
}
15779+
1576015780
/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
1576115781
///
1576215782
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
@@ -38307,6 +38327,8 @@ extern "C" {
3830738327
fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
3830838328
#[link_name = "llvm.x86.avx512.scatter.dpq.512"]
3830938329
fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
38330+
#[link_name = "llvm.x86.avx512.scattersiv4.di"]
38331+
fn vpscatterdq256(slice: *mut i8, mask: i8, offsets: i32x4, src: i64x4, scale: i32);
3831038332

3831138333
#[link_name = "llvm.x86.avx512.scatter.dpi.512"]
3831238334
fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);

crates/core_arch/src/x86_64/avx512f.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7551,6 +7551,20 @@ mod tests {
75517551
assert_eq!(&arr[..], &expected[..],);
75527552
}
75537553

7554+
#[simd_test(enable = "avx512f,avx512vl")]
7555+
unsafe fn test_mm256_i32scatter_epi64() {
7556+
let mut arr = [0i64; 64];
7557+
let index = _mm_setr_epi32(0, 16, 32, 48);
7558+
let src = _mm256_setr_epi64x(1, 2, 3, 4);
7559+
// A multiplier of 8 is word-addressing
7560+
_mm256_i32scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, index, src);
7561+
let mut expected = [0i64; 64];
7562+
for i in 0..4 {
7563+
expected[i * 16] = (i + 1) as i64;
7564+
}
7565+
assert_eq!(&arr[..], &expected[..],);
7566+
}
7567+
75547568
#[simd_test(enable = "avx512f")]
75557569
unsafe fn test_mm512_i64scatter_epi64() {
75567570
let mut arr = [0i64; 128];

0 commit comments

Comments
 (0)