Skip to content

Commit cf1dbfa

Browse files
authored
wasm: Lower alignment of all loads/stores (#1175)
This changes wasm simd intrisnics which deal with memory to match clang where they all are emitted with an alignment of 1. This is expected to not impact performance since wasm engines generally ignore alignment as it's just a hint. Otherwise this can increase safety slightly when used from Rust since if an unaligned pointer was previously passed in that could result in UB on the LLVM side. This means that the intrinsics are slighly more usable in more situations than before. It's expected that if higher alignment is desired then programs will not use these intrinsics but rather the component parts. For example instead of `v128_load` you'd just load the pointer itself (and loading from a pointer in Rust automatically assumes correct alignment). For `v128_load64_splat` you'd do a load followed by a splat operation, which LLVM should optimized into a `v128.load64_splat` instruction with the desired alignment. LLVM doesn't fully support some optimizations (such as optimizing `v128.load16_lane` from component parts) but that's expected to be a temporary issue. Additionally we don't have a way of configuring the alignment on operations that otherwise can't be decomposed into their portions (such as with `i64x2_load_extend_u32x2`), but we can ideally cross such a bridge when we get there if anyone ever needs the alignment configured there.
1 parent b9aeb7e commit cf1dbfa

File tree

1 file changed

+30
-11
lines changed

1 file changed

+30
-11
lines changed

crates/core_arch/src/wasm32/simd128.rs

+30-11
Original file line numberDiff line numberDiff line change
@@ -277,13 +277,23 @@ extern "C" {
277277
fn llvm_f64x2_promote_low_f32x4(x: simd::f32x4) -> simd::f64x2;
278278
}
279279

280+
#[repr(packed)]
281+
#[derive(Copy)]
282+
struct Unaligned<T>(T);
283+
284+
impl<T: Copy> Clone for Unaligned<T> {
285+
fn clone(&self) -> Unaligned<T> {
286+
*self
287+
}
288+
}
289+
280290
/// Loads a `v128` vector from the given heap address.
281291
#[inline]
282292
#[cfg_attr(test, assert_instr(v128.load))]
283293
#[target_feature(enable = "simd128")]
284294
#[doc(alias("v128.load"))]
285295
pub unsafe fn v128_load(m: *const v128) -> v128 {
286-
*m
296+
(*(m as *const Unaligned<v128>)).0
287297
}
288298

289299
/// Load eight 8-bit integers and sign extend each one to a 16-bit lane
@@ -292,7 +302,8 @@ pub unsafe fn v128_load(m: *const v128) -> v128 {
292302
#[target_feature(enable = "simd128")]
293303
#[doc(alias("v128.load8x8_s"))]
294304
pub unsafe fn i16x8_load_extend_i8x8(m: *const i8) -> v128 {
295-
transmute(simd_cast::<_, simd::i16x8>(*(m as *const simd::i8x8)))
305+
let m = *(m as *const Unaligned<simd::i8x8>);
306+
transmute(simd_cast::<_, simd::i16x8>(m.0))
296307
}
297308

298309
/// Load eight 8-bit integers and zero extend each one to a 16-bit lane
@@ -301,7 +312,8 @@ pub unsafe fn i16x8_load_extend_i8x8(m: *const i8) -> v128 {
301312
#[target_feature(enable = "simd128")]
302313
#[doc(alias("v128.load8x8_u"))]
303314
pub unsafe fn i16x8_load_extend_u8x8(m: *const u8) -> v128 {
304-
transmute(simd_cast::<_, simd::u16x8>(*(m as *const simd::u8x8)))
315+
let m = *(m as *const Unaligned<simd::u8x8>);
316+
transmute(simd_cast::<_, simd::u16x8>(m.0))
305317
}
306318

307319
pub use i16x8_load_extend_u8x8 as u16x8_load_extend_u8x8;
@@ -312,7 +324,8 @@ pub use i16x8_load_extend_u8x8 as u16x8_load_extend_u8x8;
312324
#[target_feature(enable = "simd128")]
313325
#[doc(alias("v128.load16x4_s"))]
314326
pub unsafe fn i32x4_load_extend_i16x4(m: *const i16) -> v128 {
315-
transmute(simd_cast::<_, simd::i32x4>(*(m as *const simd::i16x4)))
327+
let m = *(m as *const Unaligned<simd::i16x4>);
328+
transmute(simd_cast::<_, simd::i32x4>(m.0))
316329
}
317330

318331
/// Load four 16-bit integers and zero extend each one to a 32-bit lane
@@ -321,7 +334,8 @@ pub unsafe fn i32x4_load_extend_i16x4(m: *const i16) -> v128 {
321334
#[target_feature(enable = "simd128")]
322335
#[doc(alias("v128.load16x4_u"))]
323336
pub unsafe fn i32x4_load_extend_u16x4(m: *const u16) -> v128 {
324-
transmute(simd_cast::<_, simd::u32x4>(*(m as *const simd::u16x4)))
337+
let m = *(m as *const Unaligned<simd::u16x4>);
338+
transmute(simd_cast::<_, simd::u32x4>(m.0))
325339
}
326340

327341
pub use i32x4_load_extend_u16x4 as u32x4_load_extend_u16x4;
@@ -332,7 +346,8 @@ pub use i32x4_load_extend_u16x4 as u32x4_load_extend_u16x4;
332346
#[target_feature(enable = "simd128")]
333347
#[doc(alias("v128.load32x2_s"))]
334348
pub unsafe fn i64x2_load_extend_i32x2(m: *const i32) -> v128 {
335-
transmute(simd_cast::<_, simd::i64x2>(*(m as *const simd::i32x2)))
349+
let m = *(m as *const Unaligned<simd::i32x2>);
350+
transmute(simd_cast::<_, simd::i64x2>(m.0))
336351
}
337352

338353
/// Load two 32-bit integers and zero extend each one to a 64-bit lane
@@ -341,7 +356,8 @@ pub unsafe fn i64x2_load_extend_i32x2(m: *const i32) -> v128 {
341356
#[target_feature(enable = "simd128")]
342357
#[doc(alias("v128.load32x2_u"))]
343358
pub unsafe fn i64x2_load_extend_u32x2(m: *const u32) -> v128 {
344-
transmute(simd_cast::<_, simd::u64x2>(*(m as *const simd::u32x2)))
359+
let m = *(m as *const Unaligned<simd::u32x2>);
360+
transmute(simd_cast::<_, simd::u64x2>(m.0))
345361
}
346362

347363
pub use i64x2_load_extend_u32x2 as u64x2_load_extend_u32x2;
@@ -361,7 +377,8 @@ pub unsafe fn v128_load8_splat(m: *const u8) -> v128 {
361377
#[target_feature(enable = "simd128")]
362378
#[doc(alias("v128.load16_splat"))]
363379
pub unsafe fn v128_load16_splat(m: *const u16) -> v128 {
364-
transmute(simd::u16x8::splat(*m))
380+
let m = ptr::read_unaligned(m);
381+
transmute(simd::u16x8::splat(m))
365382
}
366383

367384
/// Load a single element and splat to all lanes of a v128 vector.
@@ -370,7 +387,8 @@ pub unsafe fn v128_load16_splat(m: *const u16) -> v128 {
370387
#[target_feature(enable = "simd128")]
371388
#[doc(alias("v128.load32_splat"))]
372389
pub unsafe fn v128_load32_splat(m: *const u32) -> v128 {
373-
transmute(simd::u32x4::splat(*m))
390+
let m = ptr::read_unaligned(m);
391+
transmute(simd::u32x4::splat(m))
374392
}
375393

376394
/// Load a single element and splat to all lanes of a v128 vector.
@@ -379,7 +397,8 @@ pub unsafe fn v128_load32_splat(m: *const u32) -> v128 {
379397
#[target_feature(enable = "simd128")]
380398
#[doc(alias("v128.load64_splat"))]
381399
pub unsafe fn v128_load64_splat(m: *const u64) -> v128 {
382-
transmute(simd::u64x2::splat(*m))
400+
let m = ptr::read_unaligned(m);
401+
transmute(simd::u64x2::splat(m))
383402
}
384403

385404
/// Load a 32-bit element into the low bits of the vector and sets all other
@@ -408,7 +427,7 @@ pub unsafe fn v128_load64_zero(m: *const u64) -> v128 {
408427
#[target_feature(enable = "simd128")]
409428
#[doc(alias("v128.store"))]
410429
pub unsafe fn v128_store(m: *mut v128, a: v128) {
411-
*m = a;
430+
*(m as *mut Unaligned<v128>) = Unaligned(a);
412431
}
413432

414433
/// Loads an 8-bit value from `m` and sets lane `L` of `v` to that value.

0 commit comments

Comments
 (0)