@@ -100,6 +100,8 @@ unsafe extern "unadjusted" {
100
100
#[ link_name = "llvm.s390.verimh" ] fn verimh ( a : vector_signed_short , b : vector_signed_short , c : vector_signed_short , d : i32 ) -> vector_signed_short ;
101
101
#[ link_name = "llvm.s390.verimf" ] fn verimf ( a : vector_signed_int , b : vector_signed_int , c : vector_signed_int , d : i32 ) -> vector_signed_int ;
102
102
#[ link_name = "llvm.s390.verimg" ] fn verimg ( a : vector_signed_long_long , b : vector_signed_long_long , c : vector_signed_long_long , d : i32 ) -> vector_signed_long_long ;
103
+
104
+ #[ link_name = "llvm.s390.vperm" ] fn vperm ( a : vector_signed_char , b : vector_signed_char , c : vector_unsigned_char ) -> vector_signed_char ;
103
105
}
104
106
105
107
impl_from ! { i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -1260,6 +1262,43 @@ mod sealed {
1260
1262
vector_signed_int, vmrlf, vmrhf,
1261
1263
vector_signed_long_long, vmrlg, vmrhg
1262
1264
}
1265
+
1266
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
1267
+ pub trait VectorPerm {
1268
+ unsafe fn vec_perm ( self , other : Self , c : vector_unsigned_char ) -> Self ;
1269
+ }
1270
+
1271
+ macro_rules! impl_merge {
1272
+ ( $( $ty: ident) ,* ) => {
1273
+ $(
1274
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
1275
+ impl VectorPerm for $ty {
1276
+ #[ inline]
1277
+ #[ target_feature( enable = "vector" ) ]
1278
+ unsafe fn vec_perm( self , other: Self , c: vector_unsigned_char) -> Self {
1279
+ transmute( vperm( transmute( self ) , transmute( other) , c) )
1280
+ }
1281
+ }
1282
+ ) *
1283
+ }
1284
+ }
1285
+
1286
+ impl_merge ! {
1287
+ vector_signed_char,
1288
+ vector_signed_short,
1289
+ vector_signed_int,
1290
+ vector_signed_long_long,
1291
+ vector_unsigned_char,
1292
+ vector_unsigned_short,
1293
+ vector_unsigned_int,
1294
+ vector_unsigned_long_long,
1295
+ vector_bool_char,
1296
+ vector_bool_short,
1297
+ vector_bool_int,
1298
+ vector_bool_long_long,
1299
+ vector_float,
1300
+ vector_double
1301
+ }
1263
1302
}
1264
1303
1265
1304
/// Vector element-wise addition.
@@ -1810,6 +1849,16 @@ pub unsafe fn vec_genmasks_64<const L: u8, const H: u8>() -> vector_unsigned_lon
1810
1849
vector_unsigned_long_long ( const { [ genmasks ( u64:: BITS , L , H ) ; 2 ] } )
1811
1850
}
1812
1851
1852
+ /// Returns a vector that contains some elements of two vectors, in the order specified by a third vector.
1853
+ /// Each byte of the result is selected by using the least significant 5 bits of the corresponding byte of c as an index into the concatenated bytes of a and b.
1854
+ /// Note: The vector generate mask built-in function [`vec_genmask`] could help generate the mask c.
1855
+ #[ inline]
1856
+ #[ target_feature( enable = "vector" ) ]
1857
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
1858
+ pub unsafe fn vec_perm < T : sealed:: VectorPerm > ( a : T , b : T , c : vector_unsigned_char ) -> T {
1859
+ a. vec_perm ( b, c)
1860
+ }
1861
+
1813
1862
#[ cfg( test) ]
1814
1863
mod tests {
1815
1864
use super :: * ;
@@ -2227,4 +2276,94 @@ mod tests {
2227
2276
[ 0x00000000 , 0x11111111 , 0x22222222 , 0x33333333 ] ,
2228
2277
[ 0xCCCCCCCC , 0x22222222 , 0xDDDDDDDD , 0x33333333 ]
2229
2278
}
2279
+
2280
+ macro_rules! test_vec_perm {
2281
+ { $name: ident,
2282
+ $shorttype: ident, $longtype: ident,
2283
+ [ $( $a: expr) ,+] , [ $( $b: expr) ,+] , [ $( $c: expr) ,+] , [ $( $d: expr) ,+] } => {
2284
+ #[ simd_test( enable = "vector" ) ]
2285
+ unsafe fn $name( ) {
2286
+ let a: $longtype = transmute( $shorttype:: new( $( $a) ,+) ) ;
2287
+ let b: $longtype = transmute( $shorttype:: new( $( $b) ,+) ) ;
2288
+ let c: vector_unsigned_char = transmute( u8x16:: new( $( $c) ,+) ) ;
2289
+ let d = $shorttype:: new( $( $d) ,+) ;
2290
+
2291
+ let r: $shorttype = transmute( vec_perm( a, b, c) ) ;
2292
+ assert_eq!( d, r) ;
2293
+ }
2294
+ }
2295
+ }
2296
+
2297
+ test_vec_perm ! { test_vec_perm_u8x16,
2298
+ u8x16, vector_unsigned_char,
2299
+ [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ] ,
2300
+ [ 100 , 101 , 102 , 103 , 104 , 105 , 106 , 107 , 108 , 109 , 110 , 111 , 112 , 113 , 114 , 115 ] ,
2301
+ [ 0x00 , 0x01 , 0x10 , 0x11 , 0x02 , 0x03 , 0x12 , 0x13 ,
2302
+ 0x04 , 0x05 , 0x14 , 0x15 , 0x06 , 0x07 , 0x16 , 0x17 ] ,
2303
+ [ 0 , 1 , 100 , 101 , 2 , 3 , 102 , 103 , 4 , 5 , 104 , 105 , 6 , 7 , 106 , 107 ] }
2304
+ test_vec_perm ! { test_vec_perm_i8x16,
2305
+ i8x16, vector_signed_char,
2306
+ [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ] ,
2307
+ [ 100 , 101 , 102 , 103 , 104 , 105 , 106 , 107 , 108 , 109 , 110 , 111 , 112 , 113 , 114 , 115 ] ,
2308
+ [ 0x00 , 0x01 , 0x10 , 0x11 , 0x02 , 0x03 , 0x12 , 0x13 ,
2309
+ 0x04 , 0x05 , 0x14 , 0x15 , 0x06 , 0x07 , 0x16 , 0x17 ] ,
2310
+ [ 0 , 1 , 100 , 101 , 2 , 3 , 102 , 103 , 4 , 5 , 104 , 105 , 6 , 7 , 106 , 107 ] }
2311
+
2312
+ test_vec_perm ! { test_vec_perm_m8x16,
2313
+ m8x16, vector_bool_char,
2314
+ [ false , false , false , false , false , false , false , false , false , false , false , false , false , false , false , false ] ,
2315
+ [ true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true ] ,
2316
+ [ 0x00 , 0x01 , 0x10 , 0x11 , 0x02 , 0x03 , 0x12 , 0x13 ,
2317
+ 0x04 , 0x05 , 0x14 , 0x15 , 0x06 , 0x07 , 0x16 , 0x17 ] ,
2318
+ [ false , false , true , true , false , false , true , true , false , false , true , true , false , false , true , true ] }
2319
+ test_vec_perm ! { test_vec_perm_u16x8,
2320
+ u16x8, vector_unsigned_short,
2321
+ [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] ,
2322
+ [ 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 ] ,
2323
+ [ 0x00 , 0x01 , 0x10 , 0x11 , 0x02 , 0x03 , 0x12 , 0x13 ,
2324
+ 0x04 , 0x05 , 0x14 , 0x15 , 0x06 , 0x07 , 0x16 , 0x17 ] ,
2325
+ [ 0 , 10 , 1 , 11 , 2 , 12 , 3 , 13 ] }
2326
+ test_vec_perm ! { test_vec_perm_i16x8,
2327
+ i16x8, vector_signed_short,
2328
+ [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] ,
2329
+ [ 10 , 11 , 12 , 13 , 14 , 15 , 16 , 17 ] ,
2330
+ [ 0x00 , 0x01 , 0x10 , 0x11 , 0x02 , 0x03 , 0x12 , 0x13 ,
2331
+ 0x04 , 0x05 , 0x14 , 0x15 , 0x06 , 0x07 , 0x16 , 0x17 ] ,
2332
+ [ 0 , 10 , 1 , 11 , 2 , 12 , 3 , 13 ] }
2333
+ test_vec_perm ! { test_vec_perm_m16x8,
2334
+ m16x8, vector_bool_short,
2335
+ [ false , false , false , false , false , false , false , false ] ,
2336
+ [ true , true , true , true , true , true , true , true ] ,
2337
+ [ 0x00 , 0x01 , 0x10 , 0x11 , 0x02 , 0x03 , 0x12 , 0x13 ,
2338
+ 0x04 , 0x05 , 0x14 , 0x15 , 0x06 , 0x07 , 0x16 , 0x17 ] ,
2339
+ [ false , true , false , true , false , true , false , true ] }
2340
+
2341
+ test_vec_perm ! { test_vec_perm_u32x4,
2342
+ u32x4, vector_unsigned_int,
2343
+ [ 0 , 1 , 2 , 3 ] ,
2344
+ [ 10 , 11 , 12 , 13 ] ,
2345
+ [ 0x00 , 0x01 , 0x02 , 0x03 , 0x10 , 0x11 , 0x12 , 0x13 ,
2346
+ 0x04 , 0x05 , 0x06 , 0x07 , 0x14 , 0x15 , 0x16 , 0x17 ] ,
2347
+ [ 0 , 10 , 1 , 11 ] }
2348
+ test_vec_perm ! { test_vec_perm_i32x4,
2349
+ i32x4, vector_signed_int,
2350
+ [ 0 , 1 , 2 , 3 ] ,
2351
+ [ 10 , 11 , 12 , 13 ] ,
2352
+ [ 0x00 , 0x01 , 0x02 , 0x03 , 0x10 , 0x11 , 0x12 , 0x13 ,
2353
+ 0x04 , 0x05 , 0x06 , 0x07 , 0x14 , 0x15 , 0x16 , 0x17 ] ,
2354
+ [ 0 , 10 , 1 , 11 ] }
2355
+ test_vec_perm ! { test_vec_perm_m32x4,
2356
+ m32x4, vector_bool_int,
2357
+ [ false , false , false , false ] ,
2358
+ [ true , true , true , true ] ,
2359
+ [ 0x00 , 0x01 , 0x02 , 0x03 , 0x10 , 0x11 , 0x12 , 0x13 ,
2360
+ 0x04 , 0x05 , 0x06 , 0x07 , 0x14 , 0x15 , 0x16 , 0x17 ] ,
2361
+ [ false , true , false , true ] }
2362
+ test_vec_perm ! { test_vec_perm_f32x4,
2363
+ f32x4, vector_float,
2364
+ [ 0.0 , 1.0 , 2.0 , 3.0 ] ,
2365
+ [ 1.0 , 1.1 , 1.2 , 1.3 ] ,
2366
+ [ 0x00 , 0x01 , 0x02 , 0x03 , 0x10 , 0x11 , 0x12 , 0x13 ,
2367
+ 0x04 , 0x05 , 0x06 , 0x07 , 0x14 , 0x15 , 0x16 , 0x17 ] ,
2368
+ [ 0.0 , 1.0 , 1.0 , 1.1 ] }
2230
2369
}
0 commit comments