diff --git a/include/biscuit/assembler.hpp b/include/biscuit/assembler.hpp index da3a797..c1e7451 100644 --- a/include/biscuit/assembler.hpp +++ b/include/biscuit/assembler.hpp @@ -178,6 +178,7 @@ class Assembler { void LH(GPR rd, int32_t imm, GPR rs) noexcept; void LHU(GPR rd, int32_t imm, GPR rs) noexcept; void LI(GPR rd, uint32_t imm) noexcept; + void LI64(GPR rd, uint64_t imm) noexcept; void LUI(GPR rd, uint32_t imm) noexcept; void LW(GPR rd, int32_t imm, GPR rs) noexcept; diff --git a/src/assembler.cpp b/src/assembler.cpp index bddfb3c..04976f2 100644 --- a/src/assembler.cpp +++ b/src/assembler.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include "assembler_util.hpp" @@ -303,30 +304,66 @@ void Assembler::LHU(GPR rd, int32_t imm, GPR rs) noexcept { } void Assembler::LI(GPR rd, uint32_t imm) noexcept { - const auto lower = imm & 0xFFF; - const auto upper = (imm & 0xFFFFF000) >> 12; - const auto simm = static_cast(imm); - - // If the immediate can fit within 12 bits, we only need to emit an ADDI. - if (IsValidSigned12BitImm(simm)) { - ADDI(rd, x0, static_cast(lower)); - } else { - const bool needs_increment = (lower & 0x800) != 0; - const auto upper_imm = needs_increment ? upper + 1 : upper; - - // Note that we add 1 to the upper portion of the immediate if the lower - // immediate's most significant bit is set. This is necessary, as ADDI - // sign-extends its 12-bit immediate before performing addition. - // - // In the event of the sign-extension, this means that we'll be adding - // an equivalent of "lower - 4096" to the upper immediate. - // - // We add 1 to the upper part of the immediate. the upper part's least - // significant bit is bit 12. Adding 1 to this bit is equivalent to adding - // 4096, which counteracts the sign-extension, preserving the value. + // Depending on imm, the following instructions are emitted. + // hi20 == 0 -> ADDI + // lo12 == 0 && hi20 != 0 -> LUI + // otherwise -> LUI+ADDI + + // Add 0x800 to cancel out the signed extension of ADDI. + const auto hi20 = (imm + 0x800) >> 12 & 0xFFFFF; + const auto lo12 = static_cast(imm) & 0xFFF; + GPR rs1 = zero; + + if (hi20 != 0) { + LUI(rd, hi20); + rs1 = rd; + } + + if (lo12 != 0 || hi20 == 0) { + ADDI(rd, rs1, lo12); + } +} + +void Assembler::LI64(GPR rd, uint64_t imm) noexcept { + // For 64-bit imm, a sequence of up to 8 instructions (i.e. LUI+ADDIW+SLLI+ + // ADDI+SLLI+ADDI+SLLI+ADDI) is emitted. + // In the following, imm is processed from LSB to MSB while instruction emission + // is performed from MSB to LSB by calling LI64() recursively. In each recursion, + // the lowest 12 bits are removed from imm and the optimal shift amount is + // calculated. Then, the remaining part of imm is processed recursively and + // LI() get called as soon as it fits into 32 bits. + + if (static_cast(static_cast(imm << 32) >> 32) == imm) { + // Depending on imm, the following instructions are emitted. + // hi20 == 0 -> ADDIW + // lo12 == 0 && hi20 != 0 -> LUI + // otherwise -> LUI+ADDIW + + // Add 0x800 to cancel out the signed extension of ADDIW. + const auto hi20 = (static_cast(imm) + 0x800) >> 12 & 0xFFFFF; + const auto lo12 = static_cast(imm) & 0xFFF; + GPR rs1 = zero; + + if (hi20 != 0) { + LUI(rd, hi20); + rs1 = rd; + } + + if (lo12 != 0 || hi20 == 0) { + ADDIW(rd, rs1, lo12); + } + return; + } - LUI(rd, upper_imm); - ADDI(rd, rd, static_cast(lower)); + const auto lo12 = static_cast(static_cast(imm << 52) >> 52); + // Add 0x800 to cancel out the signed extension of ADDI. + uint64_t hi52 = (imm + 0x800) >> 12; + const uint32_t shift = 12 + static_cast(std::countr_zero(hi52)); + hi52 = static_cast((static_cast(hi52 >> (shift - 12)) << shift) >> shift); + LI64(rd, hi52); + SLLI64(rd, rd, shift); + if (lo12 != 0) { + ADDI(rd, rd, lo12); } } diff --git a/tests/src/assembler_rv32i_tests.cpp b/tests/src/assembler_rv32i_tests.cpp index d4d931a..10e9a72 100644 --- a/tests/src/assembler_rv32i_tests.cpp +++ b/tests/src/assembler_rv32i_tests.cpp @@ -339,16 +339,65 @@ TEST_CASE("LI", "[rv32i]") { REQUIRE(vals[1] == val_2); }; - // Immediates that fit within -2048 to 2047 should only emit an ADDI + ///////// Single ADDI cases + + as.LI(x1, 0); + // addi x1, x0, 0 + compare_vals(0x00000093, 0x00000000); + as.RewindBuffer(); + vals = {}; + as.LI(x1, -1); + // addi x1, x0, -1 compare_vals(0xFFF00093, 0x00000000); + as.RewindBuffer(); + vals = {}; + as.LI(x1, 42); + // addi x1, x0, 42 + compare_vals(0x02A00093, 0x000000000); as.RewindBuffer(); vals = {}; - // Immediates larger than the above should generate both a LUI followed by an ADDI + as.LI(x1, 0x7ff); + // addi x1, x0, 2047 + compare_vals(0x7FF00093, 0x00000000); + as.RewindBuffer(); + vals = {}; + + ///////// Single LUI cases + + as.LI(x1, 0x2A000); + // lui x1, 42 + compare_vals(0x0002A0B7, 0x00000000); + as.RewindBuffer(); + vals = {}; + + as.LI(x1, ~0xFFF); + // lui x1, -1 + compare_vals(0xFFFFF0B7, 0x00000000); + as.RewindBuffer(); + vals = {}; + + as.LI(x1, INT32_MIN); + // lui x1, -524288 + compare_vals(0x800000B7, 0x00000000); + as.RewindBuffer(); + vals = {}; + + ///////// Full LUI+ADDI cases + as.LI(x1, 0x11111111); + // lui x1, 69905 + // addi x1, x1, 273 compare_vals(0x111110B7, 0x11108093); + as.RewindBuffer(); + vals = {}; + + as.LI(x1, INT32_MAX); + // lui x1, -524288 + // addi x1, x1, -1 + compare_vals(0x800000B7, 0xFFF08093); } TEST_CASE("LUI", "[rv32i]") { diff --git a/tests/src/assembler_rv64i_tests.cpp b/tests/src/assembler_rv64i_tests.cpp index 019b34c..dee7838 100644 --- a/tests/src/assembler_rv64i_tests.cpp +++ b/tests/src/assembler_rv64i_tests.cpp @@ -1,5 +1,6 @@ #include +#include #include using namespace biscuit; @@ -76,6 +77,149 @@ TEST_CASE("LD", "[rv64i]") { REQUIRE(value == 0xFFFFB783); } +TEST_CASE("LI64", "[rv64i]") { + // Up to 8 instructions can be generated + std::array vals{}; + Assembler as(reinterpret_cast(vals.data()), sizeof(vals)); + + const auto compare_vals = [&vals](const Args&... args) { + static_assert(sizeof...(args) <= vals.size()); + + size_t i = 0; + for (const auto arg : {args...}) { + REQUIRE(vals[i] == arg); + i++; + } + }; + + ///////// Single ADDIW cases + + as.LI64(x1, 0); + // addiw x1, x0, 0 + compare_vals(0x0000009BU, 0x00000000U); + as.RewindBuffer(); + vals = {}; + + as.LI64(x1, -1); + // addiw x1, x0, -1 + compare_vals(0xFFF0009BU, 0x00000000U); + as.RewindBuffer(); + vals = {}; + + as.LI64(x1, 42); + // addiw x1, x0, 42 + compare_vals(0x02A0009BU, 0x000000000U); + as.RewindBuffer(); + vals = {}; + + as.LI64(x1, 0x7ff); + // addiw x1, x0, 2047 + compare_vals(0x7FF0009BU, 0x00000000U); + as.RewindBuffer(); + vals = {}; + + ///////// Single LUI cases + + as.LI64(x1, 0x2A000); + // lui x1, 42 + compare_vals(0x0002A0B7U, 0x00000000U); + as.RewindBuffer(); + vals = {}; + + as.LI64(x1, ~0xFFF); + // lui x1, -1 + compare_vals(0xFFFFF0B7U, 0x00000000U); + as.RewindBuffer(); + vals = {}; + + as.LI64(x1, INT32_MIN); + // lui x1, -524288 + compare_vals(0x800000B7U, 0x00000000U); + as.RewindBuffer(); + vals = {}; + + ///////// LUI+ADDIW cases + + as.LI64(x1, 0x11111111); + // lui x1, 69905 + // addiw x1, x1, 273 + compare_vals(0x111110B7U, 0x1110809BU, 0x00000000U); + as.RewindBuffer(); + vals = {}; + + as.LI64(x1, INT32_MAX); + // lui x1, -524288 + // addiw x1, x1, -1 + compare_vals(0x800000B7U, 0xFFF0809BU, 0x00000000U); + as.RewindBuffer(); + vals = {}; + + ///////// ADDIW+SLLI cases + + as.LI64(x1, 0x7FF0000000ULL); + // addiw x1, x0, 2047 + // slli x1, x1, 28 + compare_vals(0x7FF0009BU, 0x01C09093U, 0x000000000U); + as.RewindBuffer(); + vals = {}; + + as.LI64(x1, 0xABC00000ULL); + // addiw x1, x0, 687 + // slli x1, x1, 22 + compare_vals(0x2AF0009BU, 0x01609093U, 0x000000000U); + as.RewindBuffer(); + vals = {}; + + ///////// LUI+ADDIW+SLLI cases + + as.LI64(x1, 0x7FFFFFFF0000ULL); + // lui x1, -524288 + // addiw x1, x1, -1 + // slli x1, x1, 16 + compare_vals(0x800000B7U, 0xFFF0809BU, 0x01009093U, 0x000000000U); + as.RewindBuffer(); + vals = {}; + + ///////// LUI+ADDIW+SLLI+ADDI cases + + as.LI64(x1, 0x7FFFFFFF0123); + // lui x1, -524288 + // addiw x1, x1, -1 + // slli x1, x1, 16 + // addi x1, x1, 291 + compare_vals(0x800000B7U, 0xfff0809BU, 0x01009093U, 0x12308093U, + 0x000000000U); + as.RewindBuffer(); + vals = {}; + + ///////// ADDIW+SLLI+ADDI+SLLI+ADDI cases + + as.LI64(x1, 0x8000000080000001ULL); + // addiw x1, x0, -1 + // slli x1, x1, 32 + // addi x1, x1, 1 + // slli x1, x1, 31 + // addi x1, x1, 1 + compare_vals(0xFFF0009BU, 0x02009093U, 0x00108093U, 0x01F09093U, + 0x00108093U, 0x000000000U); + as.RewindBuffer(); + vals = {}; + + ///////// Full LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI cases + + as.LI64(x1, 0x80808000808080F1ULL); + // lui x1, -16 + // addiw x1, x1, 257 + // slli x1, x1, 16 + // addi x1, x1, 1 + // slli x1, x1, 16 + // addi x1, x1, 257 + // slli x1, x1, 15 + // addi x1, x1, 241 + compare_vals(0xFFFF00B7U, 0x1010809BU, 0x01009093U, 0x00108093U, + 0x01009093U, 0x10108093U, 0x00F09093U, 0x0F108093U); +} + TEST_CASE("SD", "[rv64i]") { uint32_t value = 0; Assembler as(reinterpret_cast(&value), sizeof(value));