Skip to content

Commit 6f940af

Browse files
committed
add vec_perm
1 parent cf05f82 commit 6f940af

File tree

1 file changed

+139
-0
lines changed

1 file changed

+139
-0
lines changed

crates/core_arch/src/s390x/vector.rs

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ unsafe extern "unadjusted" {
100100
#[link_name = "llvm.s390.verimh"] fn verimh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short, d: i32) -> vector_signed_short;
101101
#[link_name = "llvm.s390.verimf"] fn verimf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int, d: i32) -> vector_signed_int;
102102
#[link_name = "llvm.s390.verimg"] fn verimg(a: vector_signed_long_long, b: vector_signed_long_long, c: vector_signed_long_long, d: i32) -> vector_signed_long_long;
103+
104+
#[link_name = "llvm.s390.vperm"] fn vperm(a: vector_signed_char, b: vector_signed_char, c: vector_unsigned_char) -> vector_signed_char;
103105
}
104106

105107
impl_from! { i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -1260,6 +1262,43 @@ mod sealed {
12601262
vector_signed_int, vmrlf, vmrhf,
12611263
vector_signed_long_long, vmrlg, vmrhg
12621264
}
1265+
1266+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
1267+
pub trait VectorPerm {
1268+
unsafe fn vec_perm(self, other: Self, c: vector_unsigned_char) -> Self;
1269+
}
1270+
1271+
macro_rules! impl_merge {
1272+
($($ty:ident),*) => {
1273+
$(
1274+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
1275+
impl VectorPerm for $ty {
1276+
#[inline]
1277+
#[target_feature(enable = "vector")]
1278+
unsafe fn vec_perm(self, other: Self, c: vector_unsigned_char) -> Self {
1279+
transmute(vperm(transmute(self), transmute(other), c))
1280+
}
1281+
}
1282+
)*
1283+
}
1284+
}
1285+
1286+
impl_merge! {
1287+
vector_signed_char,
1288+
vector_signed_short,
1289+
vector_signed_int,
1290+
vector_signed_long_long,
1291+
vector_unsigned_char,
1292+
vector_unsigned_short,
1293+
vector_unsigned_int,
1294+
vector_unsigned_long_long,
1295+
vector_bool_char,
1296+
vector_bool_short,
1297+
vector_bool_int,
1298+
vector_bool_long_long,
1299+
vector_float,
1300+
vector_double
1301+
}
12631302
}
12641303

12651304
/// Vector element-wise addition.
@@ -1810,6 +1849,16 @@ pub unsafe fn vec_genmasks_64<const L: u8, const H: u8>() -> vector_unsigned_lon
18101849
vector_unsigned_long_long(const { [genmasks(u64::BITS, L, H); 2] })
18111850
}
18121851

1852+
/// Returns a vector that contains some elements of two vectors, in the order specified by a third vector.
1853+
/// Each byte of the result is selected by using the least significant 5 bits of the corresponding byte of c as an index into the concatenated bytes of a and b.
1854+
/// Note: The vector generate mask built-in function [`vec_genmask`] could help generate the mask c.
1855+
#[inline]
1856+
#[target_feature(enable = "vector")]
1857+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
1858+
pub unsafe fn vec_perm<T: sealed::VectorPerm>(a: T, b: T, c: vector_unsigned_char) -> T {
1859+
a.vec_perm(b, c)
1860+
}
1861+
18131862
#[cfg(test)]
18141863
mod tests {
18151864
use super::*;
@@ -2227,4 +2276,94 @@ mod tests {
22272276
[0x00000000, 0x11111111, 0x22222222, 0x33333333],
22282277
[0xCCCCCCCC, 0x22222222, 0xDDDDDDDD, 0x33333333]
22292278
}
2279+
2280+
macro_rules! test_vec_perm {
2281+
{$name:ident,
2282+
$shorttype:ident, $longtype:ident,
2283+
[$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
2284+
#[simd_test(enable = "vector")]
2285+
unsafe fn $name() {
2286+
let a: $longtype = transmute($shorttype::new($($a),+));
2287+
let b: $longtype = transmute($shorttype::new($($b),+));
2288+
let c: vector_unsigned_char = transmute(u8x16::new($($c),+));
2289+
let d = $shorttype::new($($d),+);
2290+
2291+
let r: $shorttype = transmute(vec_perm(a, b, c));
2292+
assert_eq!(d, r);
2293+
}
2294+
}
2295+
}
2296+
2297+
test_vec_perm! {test_vec_perm_u8x16,
2298+
u8x16, vector_unsigned_char,
2299+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
2300+
[100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115],
2301+
[0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
2302+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
2303+
[0, 1, 100, 101, 2, 3, 102, 103, 4, 5, 104, 105, 6, 7, 106, 107]}
2304+
test_vec_perm! {test_vec_perm_i8x16,
2305+
i8x16, vector_signed_char,
2306+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
2307+
[100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115],
2308+
[0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
2309+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
2310+
[0, 1, 100, 101, 2, 3, 102, 103, 4, 5, 104, 105, 6, 7, 106, 107]}
2311+
2312+
test_vec_perm! {test_vec_perm_m8x16,
2313+
m8x16, vector_bool_char,
2314+
[false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false],
2315+
[true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true],
2316+
[0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
2317+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
2318+
[false, false, true, true, false, false, true, true, false, false, true, true, false, false, true, true]}
2319+
test_vec_perm! {test_vec_perm_u16x8,
2320+
u16x8, vector_unsigned_short,
2321+
[0, 1, 2, 3, 4, 5, 6, 7],
2322+
[10, 11, 12, 13, 14, 15, 16, 17],
2323+
[0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
2324+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
2325+
[0, 10, 1, 11, 2, 12, 3, 13]}
2326+
test_vec_perm! {test_vec_perm_i16x8,
2327+
i16x8, vector_signed_short,
2328+
[0, 1, 2, 3, 4, 5, 6, 7],
2329+
[10, 11, 12, 13, 14, 15, 16, 17],
2330+
[0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
2331+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
2332+
[0, 10, 1, 11, 2, 12, 3, 13]}
2333+
test_vec_perm! {test_vec_perm_m16x8,
2334+
m16x8, vector_bool_short,
2335+
[false, false, false, false, false, false, false, false],
2336+
[true, true, true, true, true, true, true, true],
2337+
[0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13,
2338+
0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17],
2339+
[false, true, false, true, false, true, false, true]}
2340+
2341+
test_vec_perm! {test_vec_perm_u32x4,
2342+
u32x4, vector_unsigned_int,
2343+
[0, 1, 2, 3],
2344+
[10, 11, 12, 13],
2345+
[0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
2346+
0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17],
2347+
[0, 10, 1, 11]}
2348+
test_vec_perm! {test_vec_perm_i32x4,
2349+
i32x4, vector_signed_int,
2350+
[0, 1, 2, 3],
2351+
[10, 11, 12, 13],
2352+
[0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
2353+
0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17],
2354+
[0, 10, 1, 11]}
2355+
test_vec_perm! {test_vec_perm_m32x4,
2356+
m32x4, vector_bool_int,
2357+
[false, false, false, false],
2358+
[true, true, true, true],
2359+
[0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
2360+
0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17],
2361+
[false, true, false, true]}
2362+
test_vec_perm! {test_vec_perm_f32x4,
2363+
f32x4, vector_float,
2364+
[0.0, 1.0, 2.0, 3.0],
2365+
[1.0, 1.1, 1.2, 1.3],
2366+
[0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
2367+
0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17],
2368+
[0.0, 1.0, 1.0, 1.1]}
22302369
}

0 commit comments

Comments
 (0)