@@ -365,6 +365,131 @@ pub unsafe fn _mm512_maskz_dpbf16_ps(
365
365
transmute ( simd_select_bitmask ( k, rst, zero) )
366
366
}
367
367
368
+ /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
369
+ /// floating-point elements, and store the results in dst.
370
+ ///
371
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpbh_ps)
372
+ #[ inline]
373
+ #[ target_feature( enable = "avx512bf16,avx512f" ) ]
374
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
375
+ pub unsafe fn _mm512_cvtpbh_ps ( a : __m256bh ) -> __m512 {
376
+ _mm512_castsi512_ps ( _mm512_slli_epi32 :: < 16 > ( _mm512_cvtepi16_epi32 ( transmute ( a) ) ) )
377
+ }
378
+
379
+ /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
380
+ /// floating-point elements, and store the results in dst using writemask k (elements are copied
381
+ /// from src when the corresponding mask bit is not set).
382
+ ///
383
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpbh_ps)
384
+ #[ inline]
385
+ #[ target_feature( enable = "avx512bf16,avx512f" ) ]
386
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
387
+ pub unsafe fn _mm512_mask_cvtpbh_ps ( src : __m512 , k : __mmask16 , a : __m256bh ) -> __m512 {
388
+ let cvt = _mm512_cvtpbh_ps ( a) ;
389
+ transmute ( simd_select_bitmask ( k, cvt. as_f32x16 ( ) , src. as_f32x16 ( ) ) )
390
+ }
391
+
392
+ /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
393
+ /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out
394
+ /// when the corresponding mask bit is not set).
395
+ ///
396
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpbh_ps)
397
+ #[ inline]
398
+ #[ target_feature( enable = "avx512bf16,avx512f" ) ]
399
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
400
+ pub unsafe fn _mm512_maskz_cvtpbh_ps ( k : __mmask16 , a : __m256bh ) -> __m512 {
401
+ let cvt = _mm512_cvtpbh_ps ( a) ;
402
+ let zero = _mm512_setzero_ps ( ) ;
403
+ transmute ( simd_select_bitmask ( k, cvt. as_f32x16 ( ) , zero. as_f32x16 ( ) ) )
404
+ }
405
+
406
+ /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
407
+ /// floating-point elements, and store the results in dst.
408
+ ///
409
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpbh_ps)
410
+ #[ inline]
411
+ #[ target_feature( enable = "avx512bf16,avx512vl" ) ]
412
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
413
+ pub unsafe fn _mm256_cvtpbh_ps ( a : __m128bh ) -> __m256 {
414
+ _mm256_castsi256_ps ( _mm256_slli_epi32 :: < 16 > ( _mm256_cvtepi16_epi32 ( transmute ( a) ) ) )
415
+ }
416
+
417
+ /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
418
+ /// floating-point elements, and store the results in dst using writemask k (elements are copied
419
+ /// from src when the corresponding mask bit is not set).
420
+ ///
421
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpbh_ps)
422
+ #[ inline]
423
+ #[ target_feature( enable = "avx512bf16,avx512vl" ) ]
424
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
425
+ pub unsafe fn _mm256_mask_cvtpbh_ps ( src : __m256 , k : __mmask8 , a : __m128bh ) -> __m256 {
426
+ let cvt = _mm256_cvtpbh_ps ( a) ;
427
+ transmute ( simd_select_bitmask ( k, cvt. as_f32x8 ( ) , src. as_f32x8 ( ) ) )
428
+ }
429
+
430
+ /// Converts packed BF16 (16-bit) floating-point elements in a to packed single-precision (32-bit)
431
+ /// floating-point elements, and store the results in dst using zeromask k (elements are zeroed out
432
+ /// when the corresponding mask bit is not set).
433
+ ///
434
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpbh_ps)
435
+ #[ inline]
436
+ #[ target_feature( enable = "avx512bf16,avx512vl" ) ]
437
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
438
+ pub unsafe fn _mm256_maskz_cvtpbh_ps ( k : __mmask8 , a : __m128bh ) -> __m256 {
439
+ let cvt = _mm256_cvtpbh_ps ( a) ;
440
+ let zero = _mm256_setzero_ps ( ) ;
441
+ transmute ( simd_select_bitmask ( k, cvt. as_f32x8 ( ) , zero. as_f32x8 ( ) ) )
442
+ }
443
+
444
+ /// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point
445
+ /// elements, and store the results in dst.
446
+ ///
447
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpbh_ps)
448
+ #[ inline]
449
+ #[ target_feature( enable = "avx512bf16,avx512vl" ) ]
450
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
451
+ pub unsafe fn _mm_cvtpbh_ps ( a : __m128bh ) -> __m128 {
452
+ _mm_castsi128_ps ( _mm_slli_epi32 :: < 16 > ( _mm_cvtepi16_epi32 ( transmute ( a) ) ) )
453
+ }
454
+
455
+ /// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point
456
+ /// elements, and store the results in dst using writemask k (elements are copied from src when the corresponding
457
+ /// mask bit is not set).
458
+ ///
459
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpbh_ps)
460
+ #[ inline]
461
+ #[ target_feature( enable = "avx512bf16,avx512vl" ) ]
462
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
463
+ pub unsafe fn _mm_mask_cvtpbh_ps ( src : __m128 , k : __mmask8 , a : __m128bh ) -> __m128 {
464
+ let cvt = _mm_cvtpbh_ps ( a) ;
465
+ transmute ( simd_select_bitmask ( k, cvt. as_f32x4 ( ) , src. as_f32x4 ( ) ) )
466
+ }
467
+
468
+ /// Converts packed BF16 (16-bit) floating-point elements in a to single-precision (32-bit) floating-point
469
+ /// elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding
470
+ /// mask bit is not set).
471
+ ///
472
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpbh_ps)
473
+ #[ inline]
474
+ #[ target_feature( enable = "avx512bf16,avx512vl" ) ]
475
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
476
+ pub unsafe fn _mm_maskz_cvtpbh_ps ( k : __mmask8 , a : __m128bh ) -> __m128 {
477
+ let cvt = _mm_cvtpbh_ps ( a) ;
478
+ let zero = _mm_setzero_ps ( ) ;
479
+ transmute ( simd_select_bitmask ( k, cvt. as_f32x4 ( ) , zero. as_f32x4 ( ) ) )
480
+ }
481
+
482
+ /// Converts a single BF16 (16-bit) floating-point element in a to a single-precision (32-bit) floating-point
483
+ /// element, and store the result in dst.
484
+ ///
485
+ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsbh_ss)
486
+ #[ inline]
487
+ #[ target_feature( enable = "avx512bf16,avx512f" ) ]
488
+ #[ unstable( feature = "stdarch_x86_avx512" , issue = "111137" ) ]
489
+ pub unsafe fn _mm_cvtsbh_ss ( a : u16 ) -> f32 {
490
+ f32:: from_bits ( ( a as u32 ) << 16 )
491
+ }
492
+
368
493
#[ cfg( test) ]
369
494
mod tests {
370
495
use crate :: { core_arch:: x86:: * , mem:: transmute} ;
@@ -1592,4 +1717,123 @@ mod tests {
1592
1717
] ;
1593
1718
assert_eq ! ( result, expected_result) ;
1594
1719
}
1720
+
1721
+ const BF16_ONE : u16 = 0b0_01111111_0000000 ;
1722
+ const BF16_TWO : u16 = 0b0_10000000_0000000 ;
1723
+ const BF16_THREE : u16 = 0b0_10000000_1000000 ;
1724
+ const BF16_FOUR : u16 = 0b0_10000001_0000000 ;
1725
+ const BF16_FIVE : u16 = 0b0_10000001_0100000 ;
1726
+ const BF16_SIX : u16 = 0b0_10000001_1000000 ;
1727
+ const BF16_SEVEN : u16 = 0b0_10000001_1100000 ;
1728
+ const BF16_EIGHT : u16 = 0b0_10000010_0000000 ;
1729
+
1730
+ #[ simd_test( enable = "avx512bf16" ) ]
1731
+ unsafe fn test_mm512_cvtpbh_ps ( ) {
1732
+ let a = __m256bh (
1733
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1734
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1735
+ ) ;
1736
+ let r = _mm512_cvtpbh_ps ( a) ;
1737
+ let e = _mm512_setr_ps (
1738
+ 1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 , 1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 ,
1739
+ ) ;
1740
+ assert_eq_m512 ( r, e) ;
1741
+ }
1742
+
1743
+ #[ simd_test( enable = "avx512bf16" ) ]
1744
+ unsafe fn test_mm512_mask_cvtpbh_ps ( ) {
1745
+ let a = __m256bh (
1746
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1747
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1748
+ ) ;
1749
+ let src = _mm512_setr_ps (
1750
+ 9. , 10. , 11. , 12. , 13. , 14. , 15. , 16. , 9. , 10. , 11. , 12. , 13. , 14. , 15. , 16. ,
1751
+ ) ;
1752
+ let k = 0b1010_1010_1010_1010 ;
1753
+ let r = _mm512_mask_cvtpbh_ps ( src, k, a) ;
1754
+ let e = _mm512_setr_ps (
1755
+ 9. , 2. , 11. , 4. , 13. , 6. , 15. , 8. , 9. , 2. , 11. , 4. , 13. , 6. , 15. , 8. ,
1756
+ ) ;
1757
+ assert_eq_m512 ( r, e) ;
1758
+ }
1759
+
1760
+ #[ simd_test( enable = "avx512bf16" ) ]
1761
+ unsafe fn test_mm512_maskz_cvtpbh_ps ( ) {
1762
+ let a = __m256bh (
1763
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1764
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1765
+ ) ;
1766
+ let k = 0b1010_1010_1010_1010 ;
1767
+ let r = _mm512_maskz_cvtpbh_ps ( k, a) ;
1768
+ let e = _mm512_setr_ps (
1769
+ 0. , 2. , 0. , 4. , 0. , 6. , 0. , 8. , 0. , 2. , 0. , 4. , 0. , 6. , 0. , 8. ,
1770
+ ) ;
1771
+ assert_eq_m512 ( r, e) ;
1772
+ }
1773
+
1774
+ #[ simd_test( enable = "avx512bf16,avx512vl" ) ]
1775
+ unsafe fn test_mm256_cvtpbh_ps ( ) {
1776
+ let a = __m128bh (
1777
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1778
+ ) ;
1779
+ let r = _mm256_cvtpbh_ps ( a) ;
1780
+ let e = _mm256_setr_ps ( 1.0 , 2.0 , 3.0 , 4.0 , 5.0 , 6.0 , 7.0 , 8.0 ) ;
1781
+ assert_eq_m256 ( r, e) ;
1782
+ }
1783
+
1784
+ #[ simd_test( enable = "avx512bf16,avx512vl" ) ]
1785
+ unsafe fn test_mm256_mask_cvtpbh_ps ( ) {
1786
+ let a = __m128bh (
1787
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1788
+ ) ;
1789
+ let src = _mm256_setr_ps ( 9. , 10. , 11. , 12. , 13. , 14. , 15. , 16. ) ;
1790
+ let k = 0b1010_1010 ;
1791
+ let r = _mm256_mask_cvtpbh_ps ( src, k, a) ;
1792
+ let e = _mm256_setr_ps ( 9. , 2. , 11. , 4. , 13. , 6. , 15. , 8. ) ;
1793
+ assert_eq_m256 ( r, e) ;
1794
+ }
1795
+
1796
+ #[ simd_test( enable = "avx512bf16,avx512vl" ) ]
1797
+ unsafe fn test_mm256_maskz_cvtpbh_ps ( ) {
1798
+ let a = __m128bh (
1799
+ BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , BF16_FIVE , BF16_SIX , BF16_SEVEN , BF16_EIGHT ,
1800
+ ) ;
1801
+ let k = 0b1010_1010 ;
1802
+ let r = _mm256_maskz_cvtpbh_ps ( k, a) ;
1803
+ let e = _mm256_setr_ps ( 0. , 2. , 0. , 4. , 0. , 6. , 0. , 8. ) ;
1804
+ assert_eq_m256 ( r, e) ;
1805
+ }
1806
+
1807
+ #[ simd_test( enable = "avx512bf16,avx512vl" ) ]
1808
+ unsafe fn test_mm_cvtpbh_ps ( ) {
1809
+ let a = __m128bh ( BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , 0 , 0 , 0 , 0 ) ;
1810
+ let r = _mm_cvtpbh_ps ( a) ;
1811
+ let e = _mm_setr_ps ( 1.0 , 2.0 , 3.0 , 4.0 ) ;
1812
+ assert_eq_m128 ( r, e) ;
1813
+ }
1814
+
1815
+ #[ simd_test( enable = "avx512bf16,avx512vl" ) ]
1816
+ unsafe fn test_mm_mask_cvtpbh_ps ( ) {
1817
+ let a = __m128bh ( BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , 0 , 0 , 0 , 0 ) ;
1818
+ let src = _mm_setr_ps ( 9. , 10. , 11. , 12. ) ;
1819
+ let k = 0b1010 ;
1820
+ let r = _mm_mask_cvtpbh_ps ( src, k, a) ;
1821
+ let e = _mm_setr_ps ( 9. , 2. , 11. , 4. ) ;
1822
+ assert_eq_m128 ( r, e) ;
1823
+ }
1824
+
1825
+ #[ simd_test( enable = "avx512bf16,avx512vl" ) ]
1826
+ unsafe fn test_mm_maskz_cvtpbh_ps ( ) {
1827
+ let a = __m128bh ( BF16_ONE , BF16_TWO , BF16_THREE , BF16_FOUR , 0 , 0 , 0 , 0 ) ;
1828
+ let k = 0b1010 ;
1829
+ let r = _mm_maskz_cvtpbh_ps ( k, a) ;
1830
+ let e = _mm_setr_ps ( 0. , 2. , 0. , 4. ) ;
1831
+ assert_eq_m128 ( r, e) ;
1832
+ }
1833
+
1834
+ #[ simd_test( enable = "avx512bf16" ) ]
1835
+ unsafe fn test_mm_cvtsbh_ss ( ) {
1836
+ let r = _mm_cvtsbh_ss ( BF16_ONE ) ;
1837
+ assert_eq ! ( r, 1. ) ;
1838
+ }
1595
1839
}
0 commit comments