Skip to content

Commit

Permalink
JitArm64: Get rid of one MOV from lmw/stmw
Browse files Browse the repository at this point in the history
This is possible now that EmitBackpatchRoutine lets the caller choose
addr register.

Note: The removed MOV is actually traded for one added MOV in farcode.
  • Loading branch information
JosJuice committed Dec 30, 2024
1 parent c3b6e67 commit c88c6f5
Showing 1 changed file with 58 additions and 23 deletions.
81 changes: 58 additions & 23 deletions Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,21 +519,26 @@ void JitArm64::lmw(UGeckoInstruction inst)
// MMU games make use of a >= d despite this being invalid according to the PEM.
// If a >= d occurs, we must make sure to not re-read rA after starting doing the loads.
ARM64Reg addr_reg = ARM64Reg::W1;
Arm64RegCache::ScopedARM64Reg addr_base_reg;
bool a_is_addr_base_reg = false;
if (!a)
MOVI2R(addr_reg, offset);
{
addr_base_reg = gpr.GetScopedReg();
MOVI2R(addr_base_reg, offset);
}
else if (gpr.IsImm(a))
MOVI2R(addr_reg, gpr.GetImm(a) + offset);
{
addr_base_reg = gpr.GetScopedReg();
MOVI2R(addr_base_reg, gpr.GetImm(a) + offset);
}
else if (a < d && offset + (31 - d) * 4 < 0x1000)
{
a_is_addr_base_reg = true;
}
else
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);

Arm64RegCache::ScopedARM64Reg addr_base_reg;
if (!a_is_addr_base_reg)
{
addr_base_reg = gpr.GetScopedReg();
MOV(addr_base_reg, addr_reg);
ADDI2R(addr_base_reg, gpr.R(a), offset, addr_base_reg);
}

BitSet32 gprs_to_discard{};
Expand Down Expand Up @@ -576,20 +581,32 @@ void JitArm64::lmw(UGeckoInstruction inst)
{
gpr.BindToRegister(i, false, false);
ARM64Reg dest_reg = gpr.R(i);
ARM64Reg current_iteration_addr_reg = addr_reg;

if (a_is_addr_base_reg)
ADDI2R(addr_reg, gpr.R(a), offset + (i - d) * 4);
else if (i != d)
ADDI2R(addr_reg, addr_base_reg, (i - d) * 4);
{
const u32 current_iteration_offset = offset + (i - d) * 4;
if (current_iteration_offset != 0)
ADDI2R(addr_reg, gpr.R(a), current_iteration_offset);
else
current_iteration_addr_reg = gpr.R(a);
}
else
{
if (i != d)
ADDI2R(addr_reg, addr_base_reg, (i - d) * 4);
else
current_iteration_addr_reg = addr_base_reg;
}

BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(addr_reg)] = true;
if (jo.memcheck)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;

EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, addr_reg, scratch_gprs,
scratch_fprs);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, current_iteration_addr_reg,
scratch_gprs, scratch_fprs);

gpr.BindToRegister(i, false, true);
ASSERT(dest_reg == gpr.R(i));
Expand Down Expand Up @@ -633,21 +650,26 @@ void JitArm64::stmw(UGeckoInstruction inst)
gpr.Lock(ARM64Reg::W2, ARM64Reg::W30);

ARM64Reg addr_reg = ARM64Reg::W2;
Arm64RegCache::ScopedARM64Reg addr_base_reg;
bool a_is_addr_base_reg = false;
if (!a)
MOVI2R(addr_reg, offset);
{
addr_base_reg = gpr.GetScopedReg();
MOVI2R(addr_base_reg, offset);
}
else if (gpr.IsImm(a))
MOVI2R(addr_reg, gpr.GetImm(a) + offset);
{
addr_base_reg = gpr.GetScopedReg();
MOVI2R(addr_base_reg, gpr.GetImm(a) + offset);
}
else if (offset + (31 - s) * 4 < 0x1000)
{
a_is_addr_base_reg = true;
}
else
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);

Arm64GPRCache::ScopedARM64Reg addr_base_reg;
if (!a_is_addr_base_reg)
{
addr_base_reg = gpr.GetScopedReg();
MOV(addr_base_reg, addr_reg);
ADDI2R(addr_base_reg, gpr.R(a), offset, addr_base_reg);
}

BitSet32 gprs_to_discard{};
Expand Down Expand Up @@ -690,17 +712,30 @@ void JitArm64::stmw(UGeckoInstruction inst)
for (u32 i = s; i < 32; i++)
{
ARM64Reg src_reg = gpr.R(i);
ARM64Reg current_iteration_addr_reg = addr_reg;

if (a_is_addr_base_reg)
ADDI2R(addr_reg, gpr.R(a), offset + (i - s) * 4);
else if (i != s)
ADDI2R(addr_reg, addr_base_reg, (i - s) * 4);
{
const u32 current_iteration_offset = offset + (i - s) * 4;
if (current_iteration_offset != 0)
ADDI2R(addr_reg, gpr.R(a), current_iteration_offset);
else
current_iteration_addr_reg = gpr.R(a);
}
else
{
if (i != s)
ADDI2R(addr_reg, addr_base_reg, (i - s) * 4);
else
current_iteration_addr_reg = addr_base_reg;
}

BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(addr_reg)] = true;

EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, addr_reg, scratch_gprs, scratch_fprs);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, current_iteration_addr_reg,
scratch_gprs, scratch_fprs);

// To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores
// after this instruction, flush registers that would be flushed after this instruction anyway.
Expand Down

0 comments on commit c88c6f5

Please sign in to comment.