Skip to content

Commit

Permalink
Merge pull request #11 from ksco/li
Browse files Browse the repository at this point in the history
assembler: Refine LI() and add LI64() to the assembler
  • Loading branch information
lioncash authored Jan 14, 2024
2 parents 91a2da7 + 94f5200 commit 2cfdc89
Show file tree
Hide file tree
Showing 4 changed files with 256 additions and 25 deletions.
1 change: 1 addition & 0 deletions include/biscuit/assembler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ class Assembler {
void LH(GPR rd, int32_t imm, GPR rs) noexcept;
void LHU(GPR rd, int32_t imm, GPR rs) noexcept;
void LI(GPR rd, uint32_t imm) noexcept;
void LI64(GPR rd, uint64_t imm) noexcept;
void LUI(GPR rd, uint32_t imm) noexcept;
void LW(GPR rd, int32_t imm, GPR rs) noexcept;

Expand Down
83 changes: 60 additions & 23 deletions src/assembler.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <biscuit/assert.hpp>
#include <biscuit/assembler.hpp>

#include <bit>
#include <cstring>

#include "assembler_util.hpp"
Expand Down Expand Up @@ -303,30 +304,66 @@ void Assembler::LHU(GPR rd, int32_t imm, GPR rs) noexcept {
}

void Assembler::LI(GPR rd, uint32_t imm) noexcept {
const auto lower = imm & 0xFFF;
const auto upper = (imm & 0xFFFFF000) >> 12;
const auto simm = static_cast<int32_t>(imm);

// If the immediate can fit within 12 bits, we only need to emit an ADDI.
if (IsValidSigned12BitImm(simm)) {
ADDI(rd, x0, static_cast<int32_t>(lower));
} else {
const bool needs_increment = (lower & 0x800) != 0;
const auto upper_imm = needs_increment ? upper + 1 : upper;

// Note that we add 1 to the upper portion of the immediate if the lower
// immediate's most significant bit is set. This is necessary, as ADDI
// sign-extends its 12-bit immediate before performing addition.
//
// In the event of the sign-extension, this means that we'll be adding
// an equivalent of "lower - 4096" to the upper immediate.
//
// We add 1 to the upper part of the immediate. the upper part's least
// significant bit is bit 12. Adding 1 to this bit is equivalent to adding
// 4096, which counteracts the sign-extension, preserving the value.
// Depending on imm, the following instructions are emitted.
// hi20 == 0 -> ADDI
// lo12 == 0 && hi20 != 0 -> LUI
// otherwise -> LUI+ADDI

// Add 0x800 to cancel out the signed extension of ADDI.
const auto hi20 = (imm + 0x800) >> 12 & 0xFFFFF;
const auto lo12 = static_cast<int32_t>(imm) & 0xFFF;
GPR rs1 = zero;

if (hi20 != 0) {
LUI(rd, hi20);
rs1 = rd;
}

if (lo12 != 0 || hi20 == 0) {
ADDI(rd, rs1, lo12);
}
}

void Assembler::LI64(GPR rd, uint64_t imm) noexcept {
// For 64-bit imm, a sequence of up to 8 instructions (i.e. LUI+ADDIW+SLLI+
// ADDI+SLLI+ADDI+SLLI+ADDI) is emitted.
// In the following, imm is processed from LSB to MSB while instruction emission
// is performed from MSB to LSB by calling LI64() recursively. In each recursion,
// the lowest 12 bits are removed from imm and the optimal shift amount is
// calculated. Then, the remaining part of imm is processed recursively and
// LI() get called as soon as it fits into 32 bits.

if (static_cast<uint64_t>(static_cast<int64_t>(imm << 32) >> 32) == imm) {
// Depending on imm, the following instructions are emitted.
// hi20 == 0 -> ADDIW
// lo12 == 0 && hi20 != 0 -> LUI
// otherwise -> LUI+ADDIW

// Add 0x800 to cancel out the signed extension of ADDIW.
const auto hi20 = (static_cast<uint32_t>(imm) + 0x800) >> 12 & 0xFFFFF;
const auto lo12 = static_cast<int32_t>(imm) & 0xFFF;
GPR rs1 = zero;

if (hi20 != 0) {
LUI(rd, hi20);
rs1 = rd;
}

if (lo12 != 0 || hi20 == 0) {
ADDIW(rd, rs1, lo12);
}
return;
}

LUI(rd, upper_imm);
ADDI(rd, rd, static_cast<int32_t>(lower));
const auto lo12 = static_cast<int32_t>(static_cast<int64_t>(imm << 52) >> 52);
// Add 0x800 to cancel out the signed extension of ADDI.
uint64_t hi52 = (imm + 0x800) >> 12;
const uint32_t shift = 12 + static_cast<uint32_t>(std::countr_zero(hi52));
hi52 = static_cast<uint64_t>((static_cast<int64_t>(hi52 >> (shift - 12)) << shift) >> shift);
LI64(rd, hi52);
SLLI64(rd, rd, shift);
if (lo12 != 0) {
ADDI(rd, rd, lo12);
}
}

Expand Down
53 changes: 51 additions & 2 deletions tests/src/assembler_rv32i_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,16 +339,65 @@ TEST_CASE("LI", "[rv32i]") {
REQUIRE(vals[1] == val_2);
};

// Immediates that fit within -2048 to 2047 should only emit an ADDI
///////// Single ADDI cases

as.LI(x1, 0);
// addi x1, x0, 0
compare_vals(0x00000093, 0x00000000);
as.RewindBuffer();
vals = {};

as.LI(x1, -1);
// addi x1, x0, -1
compare_vals(0xFFF00093, 0x00000000);
as.RewindBuffer();
vals = {};

as.LI(x1, 42);
// addi x1, x0, 42
compare_vals(0x02A00093, 0x000000000);
as.RewindBuffer();
vals = {};

// Immediates larger than the above should generate both a LUI followed by an ADDI
as.LI(x1, 0x7ff);
// addi x1, x0, 2047
compare_vals(0x7FF00093, 0x00000000);
as.RewindBuffer();
vals = {};

///////// Single LUI cases

as.LI(x1, 0x2A000);
// lui x1, 42
compare_vals(0x0002A0B7, 0x00000000);
as.RewindBuffer();
vals = {};

as.LI(x1, ~0xFFF);
// lui x1, -1
compare_vals(0xFFFFF0B7, 0x00000000);
as.RewindBuffer();
vals = {};

as.LI(x1, INT32_MIN);
// lui x1, -524288
compare_vals(0x800000B7, 0x00000000);
as.RewindBuffer();
vals = {};

///////// Full LUI+ADDI cases

as.LI(x1, 0x11111111);
// lui x1, 69905
// addi x1, x1, 273
compare_vals(0x111110B7, 0x11108093);
as.RewindBuffer();
vals = {};

as.LI(x1, INT32_MAX);
// lui x1, -524288
// addi x1, x1, -1
compare_vals(0x800000B7, 0xFFF08093);
}

TEST_CASE("LUI", "[rv32i]") {
Expand Down
144 changes: 144 additions & 0 deletions tests/src/assembler_rv64i_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <catch/catch.hpp>

#include <array>
#include <biscuit/assembler.hpp>

using namespace biscuit;
Expand Down Expand Up @@ -76,6 +77,149 @@ TEST_CASE("LD", "[rv64i]") {
REQUIRE(value == 0xFFFFB783);
}

TEST_CASE("LI64", "[rv64i]") {
// Up to 8 instructions can be generated
std::array<uint32_t, 8> vals{};
Assembler as(reinterpret_cast<uint8_t*>(vals.data()), sizeof(vals));

const auto compare_vals = [&vals]<typename... Args>(const Args&... args) {
static_assert(sizeof...(args) <= vals.size());

size_t i = 0;
for (const auto arg : {args...}) {
REQUIRE(vals[i] == arg);
i++;
}
};

///////// Single ADDIW cases

as.LI64(x1, 0);
// addiw x1, x0, 0
compare_vals(0x0000009BU, 0x00000000U);
as.RewindBuffer();
vals = {};

as.LI64(x1, -1);
// addiw x1, x0, -1
compare_vals(0xFFF0009BU, 0x00000000U);
as.RewindBuffer();
vals = {};

as.LI64(x1, 42);
// addiw x1, x0, 42
compare_vals(0x02A0009BU, 0x000000000U);
as.RewindBuffer();
vals = {};

as.LI64(x1, 0x7ff);
// addiw x1, x0, 2047
compare_vals(0x7FF0009BU, 0x00000000U);
as.RewindBuffer();
vals = {};

///////// Single LUI cases

as.LI64(x1, 0x2A000);
// lui x1, 42
compare_vals(0x0002A0B7U, 0x00000000U);
as.RewindBuffer();
vals = {};

as.LI64(x1, ~0xFFF);
// lui x1, -1
compare_vals(0xFFFFF0B7U, 0x00000000U);
as.RewindBuffer();
vals = {};

as.LI64(x1, INT32_MIN);
// lui x1, -524288
compare_vals(0x800000B7U, 0x00000000U);
as.RewindBuffer();
vals = {};

///////// LUI+ADDIW cases

as.LI64(x1, 0x11111111);
// lui x1, 69905
// addiw x1, x1, 273
compare_vals(0x111110B7U, 0x1110809BU, 0x00000000U);
as.RewindBuffer();
vals = {};

as.LI64(x1, INT32_MAX);
// lui x1, -524288
// addiw x1, x1, -1
compare_vals(0x800000B7U, 0xFFF0809BU, 0x00000000U);
as.RewindBuffer();
vals = {};

///////// ADDIW+SLLI cases

as.LI64(x1, 0x7FF0000000ULL);
// addiw x1, x0, 2047
// slli x1, x1, 28
compare_vals(0x7FF0009BU, 0x01C09093U, 0x000000000U);
as.RewindBuffer();
vals = {};

as.LI64(x1, 0xABC00000ULL);
// addiw x1, x0, 687
// slli x1, x1, 22
compare_vals(0x2AF0009BU, 0x01609093U, 0x000000000U);
as.RewindBuffer();
vals = {};

///////// LUI+ADDIW+SLLI cases

as.LI64(x1, 0x7FFFFFFF0000ULL);
// lui x1, -524288
// addiw x1, x1, -1
// slli x1, x1, 16
compare_vals(0x800000B7U, 0xFFF0809BU, 0x01009093U, 0x000000000U);
as.RewindBuffer();
vals = {};

///////// LUI+ADDIW+SLLI+ADDI cases

as.LI64(x1, 0x7FFFFFFF0123);
// lui x1, -524288
// addiw x1, x1, -1
// slli x1, x1, 16
// addi x1, x1, 291
compare_vals(0x800000B7U, 0xfff0809BU, 0x01009093U, 0x12308093U,
0x000000000U);
as.RewindBuffer();
vals = {};

///////// ADDIW+SLLI+ADDI+SLLI+ADDI cases

as.LI64(x1, 0x8000000080000001ULL);
// addiw x1, x0, -1
// slli x1, x1, 32
// addi x1, x1, 1
// slli x1, x1, 31
// addi x1, x1, 1
compare_vals(0xFFF0009BU, 0x02009093U, 0x00108093U, 0x01F09093U,
0x00108093U, 0x000000000U);
as.RewindBuffer();
vals = {};

///////// Full LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI cases

as.LI64(x1, 0x80808000808080F1ULL);
// lui x1, -16
// addiw x1, x1, 257
// slli x1, x1, 16
// addi x1, x1, 1
// slli x1, x1, 16
// addi x1, x1, 257
// slli x1, x1, 15
// addi x1, x1, 241
compare_vals(0xFFFF00B7U, 0x1010809BU, 0x01009093U, 0x00108093U,
0x01009093U, 0x10108093U, 0x00F09093U, 0x0F108093U);
}

TEST_CASE("SD", "[rv64i]") {
uint32_t value = 0;
Assembler as(reinterpret_cast<uint8_t*>(&value), sizeof(value));
Expand Down

0 comments on commit 2cfdc89

Please sign in to comment.