Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jit: Flush registers used in memory breakpoint conditions #13245

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 32 additions & 4 deletions Source/Core/Core/PowerPC/BreakPoints.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,9 +306,13 @@ void MemChecks::Add(TMemCheck memory_check)
{
m_mem_checks.emplace_back(std::move(memory_check));
}
// If this is the first one, clear the JIT cache so it can switch to
// watchpoint-compatible code.
if (!had_any)

const bool registers_changed = UpdateRegistersUsedInConditions();

// If this is the first memcheck, clear the JIT cache so it can switch to watchpoint-compatible
// code. Or, if the memcheck's condition wants to read from a new register,
// clear the JIT cache to make the slow memory access code flush that register.
if (!had_any || registers_changed)
m_system.GetJitInterface().ClearCache(guard);
m_system.GetMMU().DBATUpdated();
}
Expand Down Expand Up @@ -336,7 +340,10 @@ bool MemChecks::Remove(u32 address)

const Core::CPUThreadGuard guard(m_system);
m_mem_checks.erase(iter);
if (!HasAny())

const bool registers_changed = UpdateRegistersUsedInConditions();

if (registers_changed || !HasAny())
m_system.GetJitInterface().ClearCache(guard);
m_system.GetMMU().DBATUpdated();
return true;
Expand Down Expand Up @@ -379,6 +386,27 @@ bool MemChecks::OverlapsMemcheck(u32 address, u32 length) const
});
}

bool MemChecks::UpdateRegistersUsedInConditions()
{
BitSet32 gprs_used, fprs_used;
for (TMemCheck& mem_check : m_mem_checks)
{
if (mem_check.condition)
{
gprs_used |= mem_check.condition->GetGPRsUsed();
fprs_used |= mem_check.condition->GetFPRsUsed();
}
}

const bool registers_changed =
gprs_used != m_gprs_used_in_conditions || fprs_used != m_fprs_used_in_conditions;

m_gprs_used_in_conditions = gprs_used;
m_fprs_used_in_conditions = fprs_used;

return registers_changed;
}

bool TMemCheck::Action(Core::System& system, u64 value, u32 addr, bool write, size_t size, u32 pc)
{
if (!is_enabled)
Expand Down
9 changes: 9 additions & 0 deletions Source/Core/Core/PowerPC/BreakPoints.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <string>
#include <vector>

#include "Common/BitSet.h"
#include "Common/CommonTypes.h"
#include "Core/PowerPC/Expression.h"

Expand Down Expand Up @@ -127,7 +128,15 @@ class MemChecks
void Clear();
bool HasAny() const { return !m_mem_checks.empty(); }

BitSet32 GetGPRsUsedInConditions() { return m_gprs_used_in_conditions; }
BitSet32 GetFPRsUsedInConditions() { return m_fprs_used_in_conditions; }

private:
// Returns whether any change was made
bool UpdateRegistersUsedInConditions();

TMemChecks m_mem_checks;
Core::System& m_system;
BitSet32 m_gprs_used_in_conditions;
BitSet32 m_fprs_used_in_conditions;
};
19 changes: 19 additions & 0 deletions Source/Core/Core/PowerPC/Expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -495,3 +495,22 @@ std::string Expression::GetText() const
{
return m_text;
}

void Expression::ComputeRegistersUsed()
{
if (m_has_computed_registers_used)
return;

for (const VarBinding& bind : m_binds)
{
switch (bind.type)
{
case VarBindingType::GPR:
m_gprs_used[bind.index] = true;
break;
case VarBindingType::FPR:
m_fprs_used[bind.index] = true;
break;
}
}
}
20 changes: 20 additions & 0 deletions Source/Core/Core/PowerPC/Expression.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include <string_view>
#include <vector>

#include "Common/BitSet.h"

struct expr;
struct expr_var_list;

Expand Down Expand Up @@ -41,6 +43,18 @@ class Expression

std::string GetText() const;

BitSet32 GetGPRsUsed()
{
ComputeRegistersUsed();
return m_gprs_used;
}

BitSet32 GetFPRsUsed()
{
ComputeRegistersUsed();
return m_fprs_used;
}

private:
enum class SynchronizeDirection
{
Expand Down Expand Up @@ -69,10 +83,16 @@ class Expression
void SynchronizeBindings(Core::System& system, SynchronizeDirection dir) const;
void Reporting(const double result) const;

void ComputeRegistersUsed();

std::string m_text;
ExprPointer m_expr;
ExprVarListPointer m_vars;
std::vector<VarBinding> m_binds;

BitSet32 m_gprs_used;
BitSet32 m_fprs_used;
bool m_has_computed_registers_used = false;
};

inline bool EvaluateCondition(Core::System& system, const std::optional<Expression>& condition)
Expand Down
26 changes: 26 additions & 0 deletions Source/Core/Core/PowerPC/Jit64/Jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1294,6 +1294,32 @@ void Jit64::IntializeSpeculativeConstants()
}
}

void Jit64::FlushPCBeforeSlowAccess()
{
// PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe
// interrupt checks, and printing accurate PC locations in debug logs.
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
}

void Jit64::FlushRegistersBeforeSlowAccess()
{
// Register values can be used by memory watchpoint conditions.
MemChecks& mem_checks = m_system.GetPowerPC().GetMemChecks();
if (mem_checks.HasAny())
{
BitSet32 gprs = mem_checks.GetGPRsUsedInConditions();
BitSet32 fprs = mem_checks.GetFPRsUsedInConditions();
if (gprs || fprs)
{
RCForkGuard gpr_guard = gpr.Fork();
RCForkGuard fpr_guard = fpr.Fork();

gpr.Flush(gprs);
fpr.Flush(fprs);
}
}
}

bool Jit64::HandleFunctionHooking(u32 address)
{
const auto result = HLE::TryReplaceFunction(m_ppc_symbol_db, address, PowerPC::CoreMode::JIT);
Expand Down
3 changes: 3 additions & 0 deletions Source/Core/Core/PowerPC/Jit64/Jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ class Jit64 : public JitBase, public QuantizedMemoryRoutines

void IntializeSpeculativeConstants();

void FlushPCBeforeSlowAccess();
void FlushRegistersBeforeSlowAccess();

JitBlockCache* GetBlockCache() override { return &blocks; }
void Trace();

Expand Down
14 changes: 13 additions & 1 deletion Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ void Jit64::lXXx(UGeckoInstruction inst)
PanicAlertFmt("Invalid instruction");
}

FlushRegistersBeforeSlowAccess();

// PowerPC has no 8-bit sign extended load, but x86 does, so merge extsb with the load if we find
// it.
if (CanMergeNextInstructions(1) && accessSize == 8 && js.op[1].inst.OPCD == 31 &&
Expand Down Expand Up @@ -439,6 +441,8 @@ void Jit64::dcbz(UGeckoInstruction inst)
int a = inst.RA;
int b = inst.RB;

FlushRegistersBeforeSlowAccess();

{
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
RCOpArg Rb = gpr.Use(b, RCMode::Read);
Expand Down Expand Up @@ -477,7 +481,7 @@ void Jit64::dcbz(UGeckoInstruction inst)
SwitchToFarCode();
SetJumpTarget(slow);
}
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
FlushPCBeforeSlowAccess();
BitSet32 registersInUse = CallerSavedRegistersInUse();
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
ABI_CallFunctionPR(PowerPC::ClearDCacheLineFromJit, &m_mmu, RSCRATCH);
Expand Down Expand Up @@ -524,6 +528,8 @@ void Jit64::stX(UGeckoInstruction inst)
return;
}

FlushRegistersBeforeSlowAccess();

// If we already know the address of the write
if (!a || gpr.IsImm(a))
{
Expand Down Expand Up @@ -602,6 +608,8 @@ void Jit64::stXx(UGeckoInstruction inst)
break;
}

FlushRegistersBeforeSlowAccess();

const bool does_clobber = WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse);

RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
Expand Down Expand Up @@ -634,6 +642,8 @@ void Jit64::lmw(UGeckoInstruction inst)

int a = inst.RA, d = inst.RD;

FlushRegistersBeforeSlowAccess();

// TODO: This doesn't handle rollback on DSI correctly
{
RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
Expand All @@ -657,6 +667,8 @@ void Jit64::stmw(UGeckoInstruction inst)

int a = inst.RA, d = inst.RD;

FlushRegistersBeforeSlowAccess();

// TODO: This doesn't handle rollback on DSI correctly
for (int i = d; i < 32; i++)
{
Expand Down
6 changes: 6 additions & 0 deletions Source/Core/Core/PowerPC/Jit64/Jit_LoadStoreFloating.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ void Jit64::lfXXX(UGeckoInstruction inst)

FALLBACK_IF(!indexed && !a);

FlushRegistersBeforeSlowAccess();

s32 offset = 0;
RCOpArg addr = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
RegCache::Realize(addr);
Expand Down Expand Up @@ -103,6 +105,8 @@ void Jit64::stfXXX(UGeckoInstruction inst)

FALLBACK_IF(update && jo.memcheck && indexed && a == b);

FlushRegistersBeforeSlowAccess();

if (single)
{
if (js.fpr_is_store_safe[s] && js.op->fprIsSingle[s])
Expand Down Expand Up @@ -196,6 +200,8 @@ void Jit64::stfiwx(UGeckoInstruction inst)
int a = inst.RA;
int b = inst.RB;

FlushRegistersBeforeSlowAccess();

RCOpArg Ra = a ? gpr.Use(a, RCMode::Read) : RCOpArg::Imm32(0);
RCOpArg Rb = gpr.Use(b, RCMode::Read);
RCOpArg Rs = fpr.Use(s, RCMode::Read);
Expand Down
16 changes: 10 additions & 6 deletions Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
int w = indexed ? inst.Wx : inst.W;
FALLBACK_IF(!a);

FlushRegistersBeforeSlowAccess();

RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
RCOpArg Ra = update ? gpr.Bind(a, RCMode::ReadWrite) : gpr.Use(a, RCMode::Read);
RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset);
Expand Down Expand Up @@ -69,8 +71,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
}
else
{
// Stash PC in case asm routine needs to call into C++
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
FlushPCBeforeSlowAccess();

// We know what GQR is here, so we can load RSCRATCH2 and call into the store method directly
// with just the scale bits.
MOV(32, R(RSCRATCH2), Imm32(gqrValue & 0x3F00));
Expand All @@ -83,8 +85,8 @@ void Jit64::psq_stXX(UGeckoInstruction inst)
}
else
{
// Stash PC in case asm routine needs to call into C++
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
FlushPCBeforeSlowAccess();

// Some games (e.g. Dirt 2) incorrectly set the unused bits which breaks the lookup table code.
// Hence, we need to mask out the unused bits. The layout of the GQR register is
// UU[SCALE]UUUUU[TYPE] where SCALE is 6 bits and TYPE is 3 bits, so we have to AND with
Expand Down Expand Up @@ -124,6 +126,8 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
int w = indexed ? inst.Wx : inst.W;
FALLBACK_IF(!a);

FlushRegistersBeforeSlowAccess();

RCX64Reg scratch_guard = gpr.Scratch(RSCRATCH_EXTRA);
RCX64Reg Ra = gpr.Bind(a, update ? RCMode::ReadWrite : RCMode::Read);
RCOpArg Rb = indexed ? gpr.Use(b, RCMode::Read) : RCOpArg::Imm32((u32)offset);
Expand All @@ -144,8 +148,8 @@ void Jit64::psq_lXX(UGeckoInstruction inst)
}
else
{
// Stash PC in case asm routine needs to call into C++
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
FlushPCBeforeSlowAccess();

// Get the high part of the GQR register
OpArg gqr = PPCSTATE_SPR(SPR_GQR0 + i);
gqr.AddMemOffset(2);
Expand Down
28 changes: 8 additions & 20 deletions Source/Core/Core/PowerPC/Jit64Common/EmuCodeBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,15 +386,10 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg& opAddress,
SetJumpTarget(slow);
}

// PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe
// interrupt checks, and printing accurate PC locations in debug logs.
//
// In the case of Jit64AsmCommon routines, we don't know the PC here,
// so the caller has to store the PC themselves.
// In the case of Jit64AsmCommon routines, the state we want to store here
// isn't known at JIT time, so the caller has to store it themselves.
if (!(flags & SAFE_LOADSTORE_NO_UPDATE_PC))
{
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
}
m_jit.FlushPCBeforeSlowAccess();

size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0;
ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment);
Expand Down Expand Up @@ -457,8 +452,7 @@ void EmuCodeBlock::SafeLoadToRegImmediate(X64Reg reg_value, u32 address, int acc
return;
}

// Helps external systems know which instruction triggered the read.
MOV(32, PPCSTATE(pc), Imm32(m_jit.js.compilerPC));
m_jit.FlushPCBeforeSlowAccess();

// Fall back to general-case code.
ABI_PushRegistersAndAdjustStack(registersInUse, 0);
Expand Down Expand Up @@ -560,15 +554,10 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
SetJumpTarget(slow);
}

// PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe
// interrupt checks, and printing accurate PC locations in debug logs.
//
// In the case of Jit64AsmCommon routines, we don't know the PC here,
// so the caller has to store the PC themselves.
// In the case of Jit64AsmCommon routines, the state we want to store here
// isn't known at JIT time, so the caller has to store it themselves.
if (!(flags & SAFE_LOADSTORE_NO_UPDATE_PC))
{
MOV(32, PPCSTATE(pc), Imm32(js.compilerPC));
}
m_jit.FlushPCBeforeSlowAccess();

size_t rsp_alignment = (flags & SAFE_LOADSTORE_NO_PROLOG) ? 8 : 0;
ABI_PushRegistersAndAdjustStack(registersInUse, rsp_alignment);
Expand Down Expand Up @@ -663,8 +652,7 @@ bool EmuCodeBlock::WriteToConstAddress(int accessSize, OpArg arg, u32 address,
}
else
{
// Helps external systems know which instruction triggered the write
MOV(32, PPCSTATE(pc), Imm32(m_jit.js.compilerPC));
m_jit.FlushPCBeforeSlowAccess();

ABI_PushRegistersAndAdjustStack(registersInUse, 0);
switch (accessSize)
Expand Down
Loading