Skip to content

Commit 0e2ee37

Browse files
authored
[Big Endian] Add and use readLE/writeLE helpers (WebAssembly#8470)
According to godbolt.org these functions optimize to a simple `*(T *)ptr` in many cases while ensuring that memory alignment requirements and endianess does not effect the behavior. The unroll pragma is needed for GCC to properly optimize the code. The approach of using bit shifts is also used in multiple other parts of binaryen (eg. `WasmBinaryReader::getInt16`) but usage of the helper function doesn't seem to be that easy there. Helps WebAssembly#2983
1 parent 2579fa3 commit 0e2ee37

File tree

8 files changed

+77
-55
lines changed

8 files changed

+77
-55
lines changed

src/literal.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <array>
2121
#include <iostream>
2222

23+
#include "support/bits.h"
2324
#include "support/hash.h"
2425
#include "support/name.h"
2526
#include "support/small_vector.h"
@@ -831,7 +832,8 @@ template<> struct hash<wasm::Literal> {
831832
return digest;
832833
case wasm::Type::v128:
833834
uint64_t chunks[2];
834-
memcpy(&chunks, a.getv128Ptr(), 16);
835+
chunks[0] = wasm::Bits::readLE<uint64_t>(a.getv128Ptr());
836+
chunks[1] = wasm::Bits::readLE<uint64_t>(&a.getv128Ptr()[8]);
835837
wasm::rehash(digest, chunks[0]);
836838
wasm::rehash(digest, chunks[1]);
837839
return digest;

src/shell-interface.h

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "interpreter/exception.h"
2626
#include "ir/module-utils.h"
2727
#include "shared-constants.h"
28+
#include "support/bits.h"
2829
#include "support/name.h"
2930
#include "support/utilities.h"
3031
#include "wasm-interpreter.h"
@@ -33,20 +34,9 @@
3334
namespace wasm {
3435

3536
struct ShellExternalInterface : ModuleRunner::ExternalInterface {
36-
// The underlying memory can be accessed through unaligned pointers which
37-
// isn't well-behaved in C++. WebAssembly nonetheless expects it to behave
38-
// properly. Avoid emitting unaligned load/store by checking for alignment
39-
// explicitly, and performing memcpy if unaligned.
40-
//
41-
// The allocated memory tries to have the same alignment as the memory being
42-
// simulated.
4337
class Memory {
4438
// Use char because it doesn't run afoul of aliasing rules.
4539
std::vector<char> memory;
46-
template<typename T> static bool aligned(const char* address) {
47-
static_assert(!(sizeof(T) & (sizeof(T) - 1)), "must be a power of 2");
48-
return 0 == (reinterpret_cast<uintptr_t>(address) & (sizeof(T) - 1));
49-
}
5040

5141
public:
5242
Memory() = default;
@@ -65,20 +55,10 @@ struct ShellExternalInterface : ModuleRunner::ExternalInterface {
6555
}
6656
}
6757
template<typename T> void set(size_t address, T value) {
68-
if (aligned<T>(&memory[address])) {
69-
*reinterpret_cast<T*>(&memory[address]) = value;
70-
} else {
71-
std::memcpy(&memory[address], &value, sizeof(T));
72-
}
58+
Bits::writeLE<T>(value, &memory[address]);
7359
}
7460
template<typename T> T get(size_t address) {
75-
if (aligned<T>(&memory[address])) {
76-
return *reinterpret_cast<T*>(&memory[address]);
77-
} else {
78-
T loaded;
79-
std::memcpy(&loaded, &memory[address], sizeof(T));
80-
return loaded;
81-
}
61+
return Bits::readLE<T>(&memory[address]);
8262
}
8363
};
8464

src/support/bits.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717
#ifndef wasm_support_bits_h
1818
#define wasm_support_bits_h
1919

20+
#include <array>
2021
#include <climits>
2122
#include <cstdint>
23+
#include <cstring>
2224
#include <type_traits>
2325

2426
/*
@@ -94,6 +96,56 @@ template<typename T, typename U> inline static T rotateRight(T val, U count) {
9496
uint32_t log2(uint32_t v);
9597
uint32_t pow2(uint32_t v);
9698

99+
template<
100+
typename T,
101+
typename std::enable_if<
102+
std::is_same<T, typename std::array<uint8_t, std::tuple_size<T>::value>>::
103+
value,
104+
bool>::type = true>
105+
void writeLE(T val, void* ptr) {
106+
memcpy(ptr, val.data(), sizeof(T));
107+
}
108+
109+
template<typename T,
110+
typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
111+
void writeLE(T val, void* ptr) {
112+
auto v = typename std::conditional<std::is_signed<T>::value,
113+
typename std::make_unsigned<T>::type,
114+
T>::type(val);
115+
unsigned char* buf = reinterpret_cast<unsigned char*>(ptr);
116+
#pragma GCC unroll 10
117+
for (size_t i = 0; i < sizeof(T); ++i) {
118+
buf[i] = v >> (CHAR_BIT * i);
119+
}
120+
}
121+
122+
template<
123+
typename T,
124+
typename std::enable_if<
125+
std::is_same<T, typename std::array<uint8_t, std::tuple_size<T>::value>>::
126+
value,
127+
bool>::type = true>
128+
T readLE(const void* ptr) {
129+
T v;
130+
memcpy(v.data(), ptr, sizeof(T));
131+
return v;
132+
}
133+
134+
template<typename T,
135+
typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
136+
T readLE(const void* ptr) {
137+
using TU = typename std::conditional<std::is_signed<T>::value,
138+
typename std::make_unsigned<T>::type,
139+
T>::type;
140+
TU v = 0;
141+
const unsigned char* buf = reinterpret_cast<const unsigned char*>(ptr);
142+
#pragma GCC unroll 10
143+
for (size_t i = 0; i < sizeof(T); ++i) {
144+
v += (TU)buf[i] << (CHAR_BIT * i);
145+
}
146+
return v;
147+
}
148+
97149
} // namespace wasm::Bits
98150

99151
#endif // wasm_support_bits_h

src/tools/wasm-ctor-eval.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "ir/memory-utils.h"
3333
#include "ir/names.h"
3434
#include "pass.h"
35+
#include "support/bits.h"
3536
#include "support/colors.h"
3637
#include "support/file.h"
3738
#include "support/insert_ordered.h"
@@ -501,15 +502,11 @@ struct CtorEvalExternalInterface : EvallingModuleRunner::ExternalInterface {
501502
}
502503

503504
template<typename T> void doStore(Address address, T value, Name memoryName) {
504-
// Use memcpy to avoid UB if unaligned.
505-
memcpy(getMemory(address, memoryName, sizeof(T)), &value, sizeof(T));
505+
Bits::writeLE<T>(value, getMemory(address, memoryName, sizeof(T)));
506506
}
507507

508508
template<typename T> T doLoad(Address address, Name memoryName) {
509-
// Use memcpy to avoid UB if unaligned.
510-
T ret;
511-
memcpy(&ret, getMemory(address, memoryName, sizeof(T)), sizeof(T));
512-
return ret;
509+
return Bits::readLE<T>(getMemory(address, memoryName, sizeof(T)));
513510
}
514511

515512
// Clear the state of the operation of applying the interpreter's runtime

src/tools/wasm-fuzz-lattices.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "analysis/reaching-definitions-transfer-function.h"
3737
#include "analysis/transfer-function.h"
3838

39+
#include "support/bits.h"
3940
#include "support/command-line.h"
4041
#include "tools/fuzzing.h"
4142
#include "tools/fuzzing/random.h"
@@ -995,7 +996,7 @@ struct Fuzzer {
995996
// Fewer bytes are needed to generate three random lattices.
996997
std::vector<char> funcBytes(128);
997998
for (size_t i = 0; i < funcBytes.size(); i += sizeof(uint64_t)) {
998-
*(uint64_t*)(funcBytes.data() + i) = getFuncRand();
999+
Bits::writeLE<uint64_t>(getFuncRand(), funcBytes.data() + i);
9991000
}
10001001

10011002
Random rand(std::move(funcBytes));
@@ -1030,7 +1031,7 @@ struct Fuzzer {
10301031
// 4kb of random bytes should be enough for anyone!
10311032
std::vector<char> bytes(4096);
10321033
for (size_t i = 0; i < bytes.size(); i += sizeof(uint64_t)) {
1033-
*(uint64_t*)(bytes.data() + i) = getRand();
1034+
Bits::writeLE<uint64_t>(getRand(), bytes.data() + i);
10341035
}
10351036

10361037
Module testModule;

src/tools/wasm-fuzz-types.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <string>
2121
#include <variant>
2222

23+
#include "support/bits.h"
2324
#include "support/command-line.h"
2425
#include "tools/fuzzing/heap-types.h"
2526
#include "tools/fuzzing/random.h"
@@ -68,7 +69,7 @@ void Fuzzer::run(uint64_t seed) {
6869
// 4kb of random bytes should be enough for anyone!
6970
std::vector<char> bytes(4096);
7071
for (size_t i = 0; i < bytes.size(); i += sizeof(uint64_t)) {
71-
*(uint64_t*)(bytes.data() + i) = getRand();
72+
Bits::writeLE<uint64_t>(getRand(), bytes.data() + i);
7273
}
7374
rand = Random(std::move(bytes));
7475

src/wasm-interpreter.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2840,14 +2840,12 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
28402840
case Field::NotPacked:
28412841
return Literal::makeFromMemory(p, field.type);
28422842
case Field::i8: {
2843-
int8_t i;
2844-
memcpy(&i, p, sizeof(i));
2845-
return truncateForPacking(Literal(int32_t(i)), field);
2843+
return truncateForPacking(Literal(int32_t(Bits::readLE<int8_t>(p))),
2844+
field);
28462845
}
28472846
case Field::i16: {
2848-
int16_t i;
2849-
memcpy(&i, p, sizeof(i));
2850-
return truncateForPacking(Literal(int32_t(i)), field);
2847+
return truncateForPacking(Literal(int32_t(Bits::readLE<int16_t>(p))),
2848+
field);
28512849
}
28522850
case Field::WaitQueue: {
28532851
WASM_UNREACHABLE("waitqueue not implemented");

src/wasm/literal.cpp

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,7 @@ static void extractBytes(uint8_t (&dest)[16], const LaneArray<Lanes>& lanes) {
241241
for (size_t lane_index = 0; lane_index < Lanes; ++lane_index) {
242242
uint8_t bits[16];
243243
lanes[lane_index].getBits(bits);
244-
LaneT lane;
245-
memcpy(&lane, bits, sizeof(lane));
244+
LaneT lane = Bits::readLE<LaneT>(bits);
246245
for (size_t offset = 0; offset < lane_width; ++offset) {
247246
bytes.at(lane_index * lane_width + offset) =
248247
uint8_t(lane >> (8 * offset));
@@ -317,24 +316,16 @@ Literal Literal::makeFromMemory(void* p, Type type) {
317316
assert(type.isNumber());
318317
switch (type.getBasic()) {
319318
case Type::i32: {
320-
int32_t i;
321-
memcpy(&i, p, sizeof(i));
322-
return Literal(i);
319+
return Literal(Bits::readLE<int32_t>(p));
323320
}
324321
case Type::i64: {
325-
int64_t i;
326-
memcpy(&i, p, sizeof(i));
327-
return Literal(i);
322+
return Literal(Bits::readLE<int64_t>(p));
328323
}
329324
case Type::f32: {
330-
int32_t i;
331-
memcpy(&i, p, sizeof(i));
332-
return Literal(bit_cast<float>(i));
325+
return Literal(bit_cast<float>(Bits::readLE<int32_t>(p)));
333326
}
334327
case Type::f64: {
335-
int64_t i;
336-
memcpy(&i, p, sizeof(i));
337-
return Literal(bit_cast<double>(i));
328+
return Literal(bit_cast<double>(Bits::readLE<int64_t>(p)));
338329
}
339330
case Type::v128: {
340331
uint8_t bytes[16];
@@ -461,11 +452,11 @@ void Literal::getBits(uint8_t (&buf)[16]) const {
461452
switch (type.getBasic()) {
462453
case Type::i32:
463454
case Type::f32:
464-
memcpy(buf, &i32, sizeof(i32));
455+
Bits::writeLE<int32_t>(i32, buf);
465456
break;
466457
case Type::i64:
467458
case Type::f64:
468-
memcpy(buf, &i64, sizeof(i64));
459+
Bits::writeLE<int64_t>(i64, buf);
469460
break;
470461
case Type::v128:
471462
memcpy(buf, &v128, sizeof(v128));

0 commit comments

Comments
 (0)