Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JitArm64: Hardcode fewer registers in EmitBackpatchRoutine #13256

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Next Next commit
JitArm64: Check GPRs/FPRs to push inside EmitBackpatchRoutine
Preparation for the next commit, which will make EmitBackpatchRoutine
allocate registers on its own. Because the register allocation will
change during the call to EmitBackpatchRoutine, the set of GPRs/FPRs to
push can't be computed prior to the call, so let's compute them during
the call instead.
JosJuice committed Dec 28, 2024
commit 9ab275122989c47cc0096739d03ff105c2d207d1
8 changes: 4 additions & 4 deletions Source/Core/Core/PowerPC/JitArm64/Jit.h
Original file line number Diff line number Diff line change
@@ -273,11 +273,11 @@ class JitArm64 : public JitBase, public Arm64Gen::ARM64CodeBlock, public CommonA
// !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30
// !emitting_routine && mode == Auto && jo.fastmem: X30
//
// Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push
// may be clobbered if mode != AlwaysFastAccess.
// If there are any other registers that the caller doesn't mind being overwritten,
// these can be indicated in scratch_gprs and scratch_fprs.
void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS,
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0),
BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false);
Arm64Gen::ARM64Reg addr, BitSet32 scratch_gprs = BitSet32(0),
BitSet32 scratch_fprs = BitSet32(0), bool emitting_routine = false);

// Loadstore routines
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update);
7 changes: 6 additions & 1 deletion Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp
Original file line number Diff line number Diff line change
@@ -54,7 +54,7 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx)
}

void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg addr,
BitSet32 gprs_to_push, BitSet32 fprs_to_push,
BitSet32 scratch_gprs, BitSet32 scratch_fprs,
bool emitting_routine)
{
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
@@ -65,6 +65,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
const bool emit_fast_access = mode != MemAccessMode::AlwaysSlowAccess;
const bool emit_slow_access = mode != MemAccessMode::AlwaysFastAccess;

const BitSet32 gprs_to_push =
(emitting_routine ? CALLER_SAVED_GPRS : gpr.GetCallerSavedUsed()) & ~scratch_gprs;
const BitSet32 fprs_to_push =
(emitting_routine ? BitSet32(0xFFFFFFFF) : fpr.GetCallerSavedUsed()) & ~scratch_fprs;

bool in_far_code = false;
const u8* fast_access_start = GetCodePtr();
std::optional<FixupBranch> slow_access_fixup;
91 changes: 47 additions & 44 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp
Original file line number Diff line number Diff line change
@@ -123,14 +123,14 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
MOV(gpr.R(addr), addr_reg);
}

BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;
scratch_gprs[DecodeReg(dest_reg)] = true;

u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
@@ -140,22 +140,23 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, scratch_gprs,
scratch_fprs);
}
else if (mmio_address)
{
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0;
regs_in_use[DecodeReg(dest_reg)] = 0;
MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, regs_in_use,
fprs_in_use, dest_reg, mmio_address, flags);
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
scratch_gprs[DecodeReg(ARM64Reg::W30)] = true;
scratch_gprs[DecodeReg(dest_reg)] = true;
MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit,
gpr.GetCallerSavedUsed() & ~scratch_gprs,
fpr.GetCallerSavedUsed() & ~scratch_fprs, dest_reg, mmio_address, flags);
addr_reg_set = false;
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, scratch_gprs, scratch_fprs);
}

gpr.BindToRegister(dest, false, true);
@@ -271,13 +272,13 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
MOV(gpr.R(dest), addr_reg);
}

BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;

u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
@@ -313,22 +314,24 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, scratch_gprs,
scratch_fprs);
}
else if (mmio_address)
{
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
regs_in_use[DecodeReg(ARM64Reg::W30)] = 0;
regs_in_use[DecodeReg(RS)] = 0;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
scratch_gprs[DecodeReg(ARM64Reg::W30)] = true;
scratch_gprs[DecodeReg(RS)] = 0;
MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit,
regs_in_use, fprs_in_use, RS, mmio_address, flags);
gpr.GetCallerSavedUsed() & ~scratch_gprs,
fpr.GetCallerSavedUsed() & ~scratch_fprs, RS, mmio_address, flags);
addr_reg_set = false;
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, scratch_gprs, scratch_fprs);
}

if (update && !early_update)
@@ -592,16 +595,16 @@ void JitArm64::lmw(UGeckoInstruction inst)
else if (i != d)
ADDI2R(addr_reg, addr_base_reg, (i - d) * 4);

BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(addr_reg)] = 0;
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(addr_reg)] = true;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = 0;
scratch_gprs[DecodeReg(dest_reg)] = true;

EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), regs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg),
scratch_gprs, scratch_fprs);

gpr.BindToRegister(i, false, true);
ASSERT(dest_reg == gpr.R(i));
@@ -710,15 +713,15 @@ void JitArm64::stmw(UGeckoInstruction inst)
else if (i != s)
ADDI2R(addr_reg, addr_base_reg, (i - s) * 4);

BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
regs_in_use[DecodeReg(addr_reg)] = 0;
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
scratch_gprs[DecodeReg(addr_reg)] = true;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;

EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), scratch_gprs,
scratch_fprs);

// To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores
// after this instruction, flush registers that would be flushed after this instruction anyway.
@@ -1044,14 +1047,14 @@ void JitArm64::dcbz(UGeckoInstruction inst)
}
}

BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
gprs_to_push[DecodeReg(ARM64Reg::W1)] = 0;
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (!jo.fastmem)
gprs_to_push[DecodeReg(ARM64Reg::W0)] = 0;
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;

EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1,
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);
EncodeRegTo64(addr_reg), scratch_gprs, scratch_fprs);

if (using_dcbz_hack)
SetJumpTarget(end_dcbz_hack);
39 changes: 20 additions & 19 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp
Original file line number Diff line number Diff line change
@@ -164,23 +164,24 @@ void JitArm64::lfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg);
}

BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
scratch_gprs[DecodeReg(ARM64Reg::Q0)] = true;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VD)] = 0;
scratch_fprs[DecodeReg(VD)] = true;

if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags)))
{
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, scratch_gprs,
scratch_fprs);
}
else
{
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, scratch_gprs, scratch_fprs);
}

const ARM64Reg VD_again = fpr.RW(inst.FD, type, true);
@@ -367,14 +368,14 @@ void JitArm64::stfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg);
}

BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W1)] = 0;
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W2)] = 0;
scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = 0;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = 0;
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
scratch_fprs[DecodeReg(ARM64Reg::Q0)] = true;

if (is_immediate)
{
@@ -402,20 +403,20 @@ void JitArm64::stfXX(UGeckoInstruction inst)
else if (m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags)))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, scratch_gprs,
scratch_fprs);
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, regs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, scratch_gprs,
scratch_fprs);
}
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, scratch_gprs, scratch_fprs);
}

if (update && !early_update)
32 changes: 15 additions & 17 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp
Original file line number Diff line number Diff line change
@@ -79,24 +79,23 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)

if (js.assumeNoPairedQuantize)
{
BitSet32 gprs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;

// Wipe the registers we are using as temporaries
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (jo.memcheck || !jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
scratch_fprs[DecodeReg(ARM64Reg::Q0)] = true;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VS)] = 0;
scratch_fprs[DecodeReg(VS)] = true;

u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
flags |= BackPatchInfo::FLAG_PAIR;

EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), scratch_gprs,
scratch_fprs);
}
else
{
@@ -239,22 +238,21 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)

if (js.assumeNoPairedQuantize)
{
BitSet32 gprs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;

// Wipe the registers we are using as temporaries
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
scratch_gprs[DecodeReg(ARM64Reg::W1)] = true;
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
scratch_gprs[DecodeReg(ARM64Reg::W2)] = true;
if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;

u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
flags |= BackPatchInfo::FLAG_PAIR;

EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), scratch_gprs,
scratch_fprs);
}
else
{
Loading