@@ -16156,6 +16156,126 @@ pub unsafe fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
16156
16156
transmute(vcompresspd128(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), k))
16157
16157
}
16158
16158
16159
+ /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16160
+ ///
16161
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compressstoreu_epi32)
16162
+ #[inline]
16163
+ #[target_feature(enable = "avx512f")]
16164
+ #[cfg_attr(test, assert_instr(vpcompressd))]
16165
+ pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask16, a: __m512i) {
16166
+ vcompressstored(base_addr as *mut _, a.as_i32x16(), k)
16167
+ }
16168
+
16169
+ /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16170
+ ///
16171
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_compressstoreu_epi32)
16172
+ #[inline]
16173
+ #[target_feature(enable = "avx512f,avx512vl")]
16174
+ #[cfg_attr(test, assert_instr(vpcompressd))]
16175
+ pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m256i) {
16176
+ vcompressstored256(base_addr as *mut _, a.as_i32x8(), k)
16177
+ }
16178
+
16179
+ /// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16180
+ ///
16181
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_compressstoreu_epi32)
16182
+ #[inline]
16183
+ #[target_feature(enable = "avx512f,avx512vl")]
16184
+ #[cfg_attr(test, assert_instr(vpcompressd))]
16185
+ pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m128i) {
16186
+ vcompressstored128(base_addr as *mut _, a.as_i32x4(), k)
16187
+ }
16188
+
16189
+ /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16190
+ ///
16191
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compressstoreu_epi64)
16192
+ #[inline]
16193
+ #[target_feature(enable = "avx512f")]
16194
+ #[cfg_attr(test, assert_instr(vpcompressq))]
16195
+ pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m512i) {
16196
+ vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k)
16197
+ }
16198
+
16199
+ /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16200
+ ///
16201
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_compressstoreu_epi64)
16202
+ #[inline]
16203
+ #[target_feature(enable = "avx512f,avx512vl")]
16204
+ #[cfg_attr(test, assert_instr(vpcompressq))]
16205
+ pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m256i) {
16206
+ vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k)
16207
+ }
16208
+
16209
+ /// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16210
+ ///
16211
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_compressstoreu_epi64)
16212
+ #[inline]
16213
+ #[target_feature(enable = "avx512f,avx512vl")]
16214
+ #[cfg_attr(test, assert_instr(vpcompressq))]
16215
+ pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m128i) {
16216
+ vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k)
16217
+ }
16218
+
16219
+ /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16220
+ ///
16221
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compressstoreu_ps)
16222
+ #[inline]
16223
+ #[target_feature(enable = "avx512f")]
16224
+ #[cfg_attr(test, assert_instr(vcompressps))]
16225
+ pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask16, a: __m512) {
16226
+ vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k)
16227
+ }
16228
+
16229
+ /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16230
+ ///
16231
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_compressstoreu_ps)
16232
+ #[inline]
16233
+ #[target_feature(enable = "avx512f,avx512vl")]
16234
+ #[cfg_attr(test, assert_instr(vcompressps))]
16235
+ pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m256) {
16236
+ vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k)
16237
+ }
16238
+
16239
+ /// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16240
+ ///
16241
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_compressstoreu_ps)
16242
+ #[inline]
16243
+ #[target_feature(enable = "avx512f,avx512vl")]
16244
+ #[cfg_attr(test, assert_instr(vcompressps))]
16245
+ pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m128) {
16246
+ vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k)
16247
+ }
16248
+
16249
+ /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16250
+ ///
16251
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_compressstoreu_pd)
16252
+ #[inline]
16253
+ #[target_feature(enable = "avx512f")]
16254
+ #[cfg_attr(test, assert_instr(vcompresspd))]
16255
+ pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m512d) {
16256
+ vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k)
16257
+ }
16258
+
16259
+ /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16260
+ ///
16261
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_compressstoreu_pd)
16262
+ #[inline]
16263
+ #[target_feature(enable = "avx512f,avx512vl")]
16264
+ #[cfg_attr(test, assert_instr(vcompresspd))]
16265
+ pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m256d) {
16266
+ vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k)
16267
+ }
16268
+
16269
+ /// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
16270
+ ///
16271
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_compressstoreu_pd)
16272
+ #[inline]
16273
+ #[target_feature(enable = "avx512f,avx512vl")]
16274
+ #[cfg_attr(test, assert_instr(vcompresspd))]
16275
+ pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m128d) {
16276
+ vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k)
16277
+ }
16278
+
16159
16279
/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16160
16280
///
16161
16281
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expand_epi32&expand=2316)
@@ -38007,6 +38127,34 @@ extern "C" {
38007
38127
#[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
38008
38128
fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
38009
38129
38130
+ #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
38131
+ fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
38132
+ #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
38133
+ fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
38134
+ #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
38135
+ fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
38136
+
38137
+ #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
38138
+ fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
38139
+ #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
38140
+ fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
38141
+ #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
38142
+ fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
38143
+
38144
+ #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
38145
+ fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
38146
+ #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
38147
+ fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
38148
+ #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
38149
+ fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
38150
+
38151
+ #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
38152
+ fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
38153
+ #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
38154
+ fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
38155
+ #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
38156
+ fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
38157
+
38010
38158
#[link_name = "llvm.x86.avx512.mask.expand.d.512"]
38011
38159
fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
38012
38160
#[link_name = "llvm.x86.avx512.mask.expand.d.256"]
@@ -51357,6 +51505,138 @@ mod tests {
51357
51505
assert_eq_m128(r, e);
51358
51506
}
51359
51507
51508
+ #[simd_test(enable = "avx512f")]
51509
+ unsafe fn test_mm512_mask_compressstoreu_epi32() {
51510
+ let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51511
+ let mut r = [0_i32; 16];
51512
+ _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
51513
+ assert_eq!(&r, &[0_i32; 16]);
51514
+ _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
51515
+ assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
51516
+ }
51517
+
51518
+ #[simd_test(enable = "avx512f,avx512vl")]
51519
+ unsafe fn test_mm256_mask_compressstoreu_epi32() {
51520
+ let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51521
+ let mut r = [0_i32; 8];
51522
+ _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
51523
+ assert_eq!(&r, &[0_i32; 8]);
51524
+ _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b11001010, a);
51525
+ assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
51526
+ }
51527
+
51528
+ #[simd_test(enable = "avx512f,avx512vl")]
51529
+ unsafe fn test_mm_mask_compressstoreu_epi32() {
51530
+ let a = _mm_setr_epi32(1, 2, 3, 4);
51531
+ let mut r = [0_i32; 4];
51532
+ _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
51533
+ assert_eq!(&r, &[0_i32; 4]);
51534
+ _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1011, a);
51535
+ assert_eq!(&r, &[1, 2, 4, 0]);
51536
+ }
51537
+
51538
+ #[simd_test(enable = "avx512f")]
51539
+ unsafe fn test_mm512_mask_compressstoreu_epi64() {
51540
+ let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51541
+ let mut r = [0_i64; 8];
51542
+ _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
51543
+ assert_eq!(&r, &[0_i64; 8]);
51544
+ _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b11001010, a);
51545
+ assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
51546
+ }
51547
+
51548
+ #[simd_test(enable = "avx512f,avx512vl")]
51549
+ unsafe fn test_mm256_mask_compressstoreu_epi64() {
51550
+ let a = _mm256_setr_epi64x(1, 2, 3, 4);
51551
+ let mut r = [0_i64; 4];
51552
+ _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
51553
+ assert_eq!(&r, &[0_i64; 4]);
51554
+ _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b1011, a);
51555
+ assert_eq!(&r, &[1, 2, 4, 0]);
51556
+ }
51557
+
51558
+ #[simd_test(enable = "avx512f,avx512vl")]
51559
+ unsafe fn test_mm_mask_compressstoreu_epi64() {
51560
+ let a = _mm_setr_epi64x(1, 2);
51561
+ let mut r = [0_i64; 2];
51562
+ _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
51563
+ assert_eq!(&r, &[0_i64; 2]);
51564
+ _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b10, a);
51565
+ assert_eq!(&r, &[2, 0]);
51566
+ }
51567
+
51568
+ #[simd_test(enable = "avx512f")]
51569
+ unsafe fn test_mm512_mask_compressstoreu_ps() {
51570
+ let a = _mm512_setr_ps(
51571
+ 1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
51572
+ 13_f32, 14_f32, 15_f32, 16_f32,
51573
+ );
51574
+ let mut r = [0_f32; 16];
51575
+ _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
51576
+ assert_eq!(&r, &[0_f32; 16]);
51577
+ _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
51578
+ assert_eq!(
51579
+ &r,
51580
+ &[
51581
+ 2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
51582
+ 0_f32, 0_f32, 0_f32, 0_f32, 0_f32
51583
+ ]
51584
+ );
51585
+ }
51586
+
51587
+ #[simd_test(enable = "avx512f,avx512vl")]
51588
+ unsafe fn test_mm256_mask_compressstoreu_ps() {
51589
+ let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
51590
+ let mut r = [0_f32; 8];
51591
+ _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
51592
+ assert_eq!(&r, &[0_f32; 8]);
51593
+ _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b11001010, a);
51594
+ assert_eq!(
51595
+ &r,
51596
+ &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
51597
+ );
51598
+ }
51599
+
51600
+ #[simd_test(enable = "avx512f,avx512vl")]
51601
+ unsafe fn test_mm_mask_compressstoreu_ps() {
51602
+ let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
51603
+ let mut r = [0.; 4];
51604
+ _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
51605
+ assert_eq!(&r, &[0.; 4]);
51606
+ _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1011, a);
51607
+ assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
51608
+ }
51609
+
51610
+ #[simd_test(enable = "avx512f")]
51611
+ unsafe fn test_mm512_mask_compressstoreu_pd() {
51612
+ let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
51613
+ let mut r = [0.; 8];
51614
+ _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
51615
+ assert_eq!(&r, &[0.; 8]);
51616
+ _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b11001010, a);
51617
+ assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
51618
+ }
51619
+
51620
+ #[simd_test(enable = "avx512f,avx512vl")]
51621
+ unsafe fn test_mm256_mask_compressstoreu_pd() {
51622
+ let a = _mm256_setr_pd(1., 2., 3., 4.);
51623
+ let mut r = [0.; 4];
51624
+ _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
51625
+ assert_eq!(&r, &[0.; 4]);
51626
+ _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b1011, a);
51627
+ assert_eq!(&r, &[1., 2., 4., 0.]);
51628
+ }
51629
+
51630
+ #[simd_test(enable = "avx512f,avx512vl")]
51631
+ unsafe fn test_mm_mask_compressstoreu_pd() {
51632
+ let a = _mm_setr_pd(1., 2.);
51633
+ let mut r = [0.; 2];
51634
+ _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
51635
+ assert_eq!(&r, &[0.; 2]);
51636
+ _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b10, a);
51637
+ assert_eq!(&r, &[2., 0.]);
51638
+ }
51639
+
51360
51640
#[simd_test(enable = "avx512f")]
51361
51641
unsafe fn test_mm512_mask_expand_epi32() {
51362
51642
let src = _mm512_set1_epi32(200);
0 commit comments