Merge pull request #11 from ksco/li

assembler: Refine LI() and add LI64() to the assembler
lioncash · Jan 14, 2024 · 2cfdc89 · 2cfdc89
2 parents 91a2da7 + 94f5200
commit 2cfdc89
Show file tree

Hide file tree

Showing 4 changed files with 256 additions and 25 deletions.
diff --git a/include/biscuit/assembler.hpp b/include/biscuit/assembler.hpp
@@ -178,6 +178,7 @@ class Assembler {
     void LH(GPR rd, int32_t imm, GPR rs) noexcept;
     void LHU(GPR rd, int32_t imm, GPR rs) noexcept;
     void LI(GPR rd, uint32_t imm) noexcept;
+    void LI64(GPR rd, uint64_t imm) noexcept;
     void LUI(GPR rd, uint32_t imm) noexcept;
     void LW(GPR rd, int32_t imm, GPR rs) noexcept;
 

diff --git a/src/assembler.cpp b/src/assembler.cpp
@@ -1,6 +1,7 @@
 #include <biscuit/assert.hpp>
 #include <biscuit/assembler.hpp>
 
+#include <bit>
 #include <cstring>
 
 #include "assembler_util.hpp"
@@ -303,30 +304,66 @@ void Assembler::LHU(GPR rd, int32_t imm, GPR rs) noexcept {
 }
 
 void Assembler::LI(GPR rd, uint32_t imm) noexcept {
-    const auto lower = imm & 0xFFF;
-    const auto upper = (imm & 0xFFFFF000) >> 12;
-    const auto simm = static_cast<int32_t>(imm);
-
-    // If the immediate can fit within 12 bits, we only need to emit an ADDI.
-    if (IsValidSigned12BitImm(simm)) {
-        ADDI(rd, x0, static_cast<int32_t>(lower));
-    } else {
-        const bool needs_increment = (lower & 0x800) != 0;
-        const auto upper_imm = needs_increment ? upper + 1 : upper;
-
-        // Note that we add 1 to the upper portion of the immediate if the lower
-        // immediate's most significant bit is set. This is necessary, as ADDI
-        // sign-extends its 12-bit immediate before performing addition.
-        //
-        // In the event of the sign-extension, this means that we'll be adding
-        // an equivalent of "lower - 4096" to the upper immediate.
-        //
-        // We add 1 to the upper part of the immediate. the upper part's least
-        // significant bit is bit 12. Adding 1 to this bit is equivalent to adding
-        // 4096, which counteracts the sign-extension, preserving the value.
+    // Depending on imm, the following instructions are emitted.
+    // hi20 == 0              -> ADDI
+    // lo12 == 0 && hi20 != 0 -> LUI
+    // otherwise              -> LUI+ADDI
+
+    // Add 0x800 to cancel out the signed extension of ADDI.
+    const auto hi20 = (imm + 0x800) >> 12 & 0xFFFFF;
+    const auto lo12 = static_cast<int32_t>(imm) & 0xFFF;
+    GPR rs1 = zero;
+
+    if (hi20 != 0) {
+        LUI(rd, hi20);
+        rs1 = rd;
+    }
+
+    if (lo12 != 0 || hi20 == 0) {
+        ADDI(rd, rs1, lo12);
+    }
+}
+
+void Assembler::LI64(GPR rd, uint64_t imm) noexcept {
+    // For 64-bit imm, a sequence of up to 8 instructions (i.e. LUI+ADDIW+SLLI+
+    // ADDI+SLLI+ADDI+SLLI+ADDI) is emitted.
+    // In the following, imm is processed from LSB to MSB while instruction emission
+    // is performed from MSB to LSB by calling LI64() recursively. In each recursion,
+    // the lowest 12 bits are removed from imm and the optimal shift amount is
+    // calculated. Then, the remaining part of imm is processed recursively and
+    // LI() get called as soon as it fits into 32 bits.
+
+    if (static_cast<uint64_t>(static_cast<int64_t>(imm << 32) >> 32) == imm) {
+        // Depending on imm, the following instructions are emitted.
+        // hi20 == 0              -> ADDIW
+        // lo12 == 0 && hi20 != 0 -> LUI
+        // otherwise              -> LUI+ADDIW
+
+        // Add 0x800 to cancel out the signed extension of ADDIW.
+        const auto hi20 = (static_cast<uint32_t>(imm) + 0x800) >> 12 & 0xFFFFF;
+        const auto lo12 = static_cast<int32_t>(imm) & 0xFFF;
+        GPR rs1 = zero;
+
+        if (hi20 != 0) {
+            LUI(rd, hi20);
+            rs1 = rd;
+        }
+
+        if (lo12 != 0 || hi20 == 0) {
+            ADDIW(rd, rs1, lo12);
+        }
+        return;
+    }
 
-        LUI(rd, upper_imm);
-        ADDI(rd, rd, static_cast<int32_t>(lower));
+    const auto lo12 = static_cast<int32_t>(static_cast<int64_t>(imm << 52) >> 52);
+    // Add 0x800 to cancel out the signed extension of ADDI.
+    uint64_t hi52 = (imm + 0x800) >> 12;
+    const uint32_t shift = 12 + static_cast<uint32_t>(std::countr_zero(hi52));
+    hi52 = static_cast<uint64_t>((static_cast<int64_t>(hi52 >> (shift - 12)) << shift) >> shift);
+    LI64(rd, hi52);
+    SLLI64(rd, rd, shift);
+    if (lo12 != 0) {
+        ADDI(rd, rd, lo12);
     }
 }
 

diff --git a/tests/src/assembler_rv32i_tests.cpp b/tests/src/assembler_rv32i_tests.cpp
@@ -339,16 +339,65 @@ TEST_CASE("LI", "[rv32i]") {
         REQUIRE(vals[1] == val_2);
     };
 
-    // Immediates that fit within -2048 to 2047 should only emit an ADDI
+    ///////// Single ADDI cases
+
+    as.LI(x1, 0);
+    // addi x1, x0, 0
+    compare_vals(0x00000093, 0x00000000);
+    as.RewindBuffer();
+    vals = {};
+
     as.LI(x1, -1);
+    // addi x1, x0, -1
     compare_vals(0xFFF00093, 0x00000000);
+    as.RewindBuffer();
+    vals = {};
 
+    as.LI(x1, 42);
+    // addi x1, x0, 42
+    compare_vals(0x02A00093, 0x000000000);
     as.RewindBuffer();
     vals = {};
 
-    // Immediates larger than the above should generate both a LUI followed by an ADDI
+    as.LI(x1, 0x7ff);
+    // addi x1, x0, 2047
+    compare_vals(0x7FF00093, 0x00000000);
+    as.RewindBuffer();
+    vals = {};
+
+    ///////// Single LUI cases
+
+    as.LI(x1, 0x2A000);
+    // lui x1, 42
+    compare_vals(0x0002A0B7, 0x00000000);
+    as.RewindBuffer();
+    vals = {};
+
+    as.LI(x1, ~0xFFF);
+    // lui x1, -1
+    compare_vals(0xFFFFF0B7, 0x00000000);
+    as.RewindBuffer();
+    vals = {};
+
+    as.LI(x1, INT32_MIN);
+    // lui x1, -524288
+    compare_vals(0x800000B7, 0x00000000);
+    as.RewindBuffer();
+    vals = {};
+
+    ///////// Full LUI+ADDI cases
+
     as.LI(x1, 0x11111111);
+    // lui x1, 69905
+    // addi x1, x1, 273
     compare_vals(0x111110B7, 0x11108093);
+    as.RewindBuffer();
+    vals = {};
+
+    as.LI(x1, INT32_MAX);
+    // lui x1, -524288
+    // addi x1, x1, -1
+    compare_vals(0x800000B7, 0xFFF08093);
 }
 
 TEST_CASE("LUI", "[rv32i]") {

diff --git a/tests/src/assembler_rv64i_tests.cpp b/tests/src/assembler_rv64i_tests.cpp
@@ -1,5 +1,6 @@
 #include <catch/catch.hpp>
 
+#include <array>
 #include <biscuit/assembler.hpp>
 
 using namespace biscuit;
@@ -76,6 +77,149 @@ TEST_CASE("LD", "[rv64i]") {
     REQUIRE(value == 0xFFFFB783);
 }
 
+TEST_CASE("LI64", "[rv64i]") {
+    // Up to 8 instructions can be generated
+    std::array<uint32_t, 8> vals{};
+    Assembler as(reinterpret_cast<uint8_t*>(vals.data()), sizeof(vals));
+
+    const auto compare_vals = [&vals]<typename... Args>(const Args&... args) {
+        static_assert(sizeof...(args) <= vals.size());
+
+        size_t i = 0;
+        for (const auto arg : {args...}) {
+            REQUIRE(vals[i] == arg);
+            i++;
+        }
+    };
+
+    ///////// Single ADDIW cases
+
+    as.LI64(x1, 0);
+    // addiw x1, x0, 0
+    compare_vals(0x0000009BU, 0x00000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    as.LI64(x1, -1);
+    // addiw x1, x0, -1
+    compare_vals(0xFFF0009BU, 0x00000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    as.LI64(x1, 42);
+    // addiw x1, x0, 42
+    compare_vals(0x02A0009BU, 0x000000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    as.LI64(x1, 0x7ff);
+    // addiw x1, x0, 2047
+    compare_vals(0x7FF0009BU, 0x00000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    ///////// Single LUI cases
+
+    as.LI64(x1, 0x2A000);
+    // lui x1, 42
+    compare_vals(0x0002A0B7U, 0x00000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    as.LI64(x1, ~0xFFF);
+    // lui x1, -1
+    compare_vals(0xFFFFF0B7U, 0x00000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    as.LI64(x1, INT32_MIN);
+    // lui x1, -524288
+    compare_vals(0x800000B7U, 0x00000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    ///////// LUI+ADDIW cases
+
+    as.LI64(x1, 0x11111111);
+    // lui x1, 69905
+    // addiw x1, x1, 273
+    compare_vals(0x111110B7U, 0x1110809BU, 0x00000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    as.LI64(x1, INT32_MAX);
+    // lui x1, -524288
+    // addiw x1, x1, -1
+    compare_vals(0x800000B7U, 0xFFF0809BU, 0x00000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    ///////// ADDIW+SLLI cases
+
+    as.LI64(x1, 0x7FF0000000ULL);
+    // addiw x1, x0, 2047
+    // slli x1, x1, 28
+    compare_vals(0x7FF0009BU, 0x01C09093U, 0x000000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    as.LI64(x1, 0xABC00000ULL);
+    // addiw x1, x0, 687
+    // slli x1, x1, 22
+    compare_vals(0x2AF0009BU, 0x01609093U, 0x000000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    ///////// LUI+ADDIW+SLLI cases
+
+    as.LI64(x1, 0x7FFFFFFF0000ULL);
+    // lui x1, -524288
+    // addiw x1, x1, -1
+    // slli x1, x1, 16
+    compare_vals(0x800000B7U, 0xFFF0809BU, 0x01009093U, 0x000000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    ///////// LUI+ADDIW+SLLI+ADDI cases
+
+    as.LI64(x1, 0x7FFFFFFF0123);
+    // lui x1, -524288
+    // addiw x1, x1, -1
+    // slli x1, x1, 16
+    // addi x1, x1, 291
+    compare_vals(0x800000B7U, 0xfff0809BU, 0x01009093U, 0x12308093U,
+                 0x000000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    ///////// ADDIW+SLLI+ADDI+SLLI+ADDI cases
+
+    as.LI64(x1, 0x8000000080000001ULL);
+    // addiw x1, x0, -1
+    // slli x1, x1, 32
+    // addi x1, x1, 1
+    // slli x1, x1, 31
+    // addi x1, x1, 1
+    compare_vals(0xFFF0009BU, 0x02009093U, 0x00108093U, 0x01F09093U,
+                 0x00108093U, 0x000000000U);
+    as.RewindBuffer();
+    vals = {};
+
+    ///////// Full LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI cases
+
+    as.LI64(x1, 0x80808000808080F1ULL);
+    // lui x1, -16
+    // addiw x1, x1, 257
+    // slli x1, x1, 16
+    // addi x1, x1, 1
+    // slli x1, x1, 16
+    // addi x1, x1, 257
+    // slli x1, x1, 15
+    // addi x1, x1, 241
+    compare_vals(0xFFFF00B7U, 0x1010809BU, 0x01009093U, 0x00108093U,
+                 0x01009093U, 0x10108093U, 0x00F09093U, 0x0F108093U);
+}
+
 TEST_CASE("SD", "[rv64i]") {
     uint32_t value = 0;
     Assembler as(reinterpret_cast<uint8_t*>(&value), sizeof(value));