Skip to content

Commit

Permalink
Review
Browse files Browse the repository at this point in the history
  • Loading branch information
Sonicadvance1 committed Jan 22, 2025
1 parent 46dca8a commit 38dbfe2
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 38 deletions.
1 change: 1 addition & 0 deletions FEXCore/include/FEXCore/Debug/InternalThreadState.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ struct InternalThreadState : public FEXCore::Allocator::FEXAllocOperators {

std::shared_mutex ObjectCacheRefCounter {};

// This pointer is owned by the frontend.
FEXCore::Profiler::ThreadStats* ThreadStats {};

///< Data pointer for exclusive use by the frontend
Expand Down
5 changes: 3 additions & 2 deletions FEXCore/include/FEXCore/Utils/Profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ struct ThreadStatsHeader {
uint8_t _pad[2];
char fex_version[48];
std::atomic<uint32_t> Head;
std::atomic<uint64_t> Size;
std::atomic<uint32_t> Size;
uint32_t Pad;
};

struct ThreadStats {
Expand All @@ -46,7 +47,7 @@ struct ThreadStats {

#ifdef _M_ARM_64
/**
* @brief Get the raw cycle counter which is synchronizing.
* @brief Get the raw cycle counter with synchronizing isb.
*
* `CNTVCTSS_EL0` also does the same thing, but requires the FEAT_ECV feature.
*/
Expand Down
8 changes: 4 additions & 4 deletions Source/Common/Profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ void StatAllocBase::SaveHeader(FEXCore::Profiler::AppType AppType) {
Head->Size.store(CurrentSize, std::memory_order_relaxed);
Head->Version = FEXCore::Profiler::STATS_VERSION;

constexpr std::array<char, std::char_traits<char>::length(GIT_DESCRIBE_STRING) + 1> GitString = {GIT_DESCRIBE_STRING};
std::string_view GitString = GIT_DESCRIBE_STRING;
strncpy(Head->fex_version, GitString.data(), std::min(GitString.size(), sizeof(Head->fex_version)));
Head->app_type = AppType;

Expand All @@ -26,7 +26,7 @@ void StatAllocBase::SaveHeader(FEXCore::Profiler::AppType AppType) {
bool StatAllocBase::AllocateMoreSlots() {
const auto OriginalSlotCount = TotalSlotsFromSize();

uint64_t NewSize = AllocateMoreSlots(CurrentSize * 2);
uint32_t NewSize = FrontendAllocateSlots(CurrentSize * 2);

if (NewSize == CurrentSize) {
return false;
Expand All @@ -39,7 +39,7 @@ bool StatAllocBase::AllocateMoreSlots() {
return true;
}

FEXCore::Profiler::ThreadStats* StatAllocBase::AllocateBaseSlot(uint32_t TID) {
FEXCore::Profiler::ThreadStats* StatAllocBase::AllocateSlot(uint32_t TID) {
if (!RemainingSlots) {
if (!AllocateMoreSlots()) {
return nullptr;
Expand Down Expand Up @@ -76,7 +76,7 @@ FEXCore::Profiler::ThreadStats* StatAllocBase::AllocateBaseSlot(uint32_t TID) {
return AllocatedSlot;
}

void StatAllocBase::DeallocateBaseSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) {
void StatAllocBase::DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) {
if (!AllocatedSlot) {
return;
}
Expand Down
24 changes: 13 additions & 11 deletions Source/Common/Profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,47 +20,49 @@ static inline void memory_barrier() {
#else
static inline void memory_barrier() {
// Intentionally empty.
// x86 is strongly memory ordered with regular loadstores. No need for barrier.
}
#endif

namespace FEX::Profiler {
class StatAllocBase {
public:
virtual ~StatAllocBase() = default;

protected:
FEXCore::Profiler::ThreadStats* AllocateBaseSlot(uint32_t TID);
void DeallocateBaseSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot);
FEXCore::Profiler::ThreadStats* AllocateSlot(uint32_t TID);
void DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot);

uint32_t OffsetFromStat(FEXCore::Profiler::ThreadStats* Stat) const {
return reinterpret_cast<uint64_t>(Stat) - reinterpret_cast<uint64_t>(Base);
}
size_t TotalSlotsFromSize() const {
uint32_t TotalSlotsFromSize() const {
return (CurrentSize - sizeof(FEXCore::Profiler::ThreadStatsHeader)) / sizeof(FEXCore::Profiler::ThreadStats) - 1;
}
size_t SlotIndexFromOffset(uint32_t Offset) {
uint32_t TotalSlotsFromSize(uint32_t Size) const {
return (Size - sizeof(FEXCore::Profiler::ThreadStatsHeader)) / sizeof(FEXCore::Profiler::ThreadStats) - 1;
}

uint32_t SlotIndexFromOffset(uint32_t Offset) {
return (Offset - sizeof(FEXCore::Profiler::ThreadStatsHeader)) / sizeof(FEXCore::Profiler::ThreadStats);
}

void SaveHeader(FEXCore::Profiler::AppType AppType);

void* Base;
size_t CurrentSize {};
uint32_t CurrentSize {};
FEXCore::Profiler::ThreadStatsHeader* Head {};
FEXCore::Profiler::ThreadStats* Stats;
FEXCore::Profiler::ThreadStats* StatTail {};
uint64_t RemainingSlots;
uint32_t RemainingSlots;

// Limited to 4MB which should be a few hundred threads of tracking capability.
// I (Sonicadvance1) wanted to reserve 128MB of VA space because it's cheap, but ran in to a bug when running WINE.
// WINE allocates [0x7fff'fe00'0000, 0x7fff'ffff'0000) which /consistently/ overlaps with FEX's sigaltstack.
// This only occurs when this stat allocation size is large as the top-down allocation pushes the alt-stack further.
// Additionally, only occurs on 48-bit VA systems, as mmap on lesser VA will fail regardless.
// TODO: Bump allocation size up once FEXCore's allocator can first use the 128TB of blocked VA space on 48-bit systems.
constexpr static size_t MAX_STATS_SIZE = 4 * 1024 * 1024;
constexpr static uint32_t MAX_STATS_SIZE = 4 * 1024 * 1024;

private:
virtual uint64_t AllocateMoreSlots(uint64_t NewSize) = 0;
virtual uint32_t FrontendAllocateSlots(uint32_t NewSize) = 0;
bool AllocateMoreSlots();
};

Expand Down
29 changes: 13 additions & 16 deletions Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@ void ThreadManager::StatAlloc::Initialize() {
goto err;
}

// 128MB ought to be enough for anyone.
// Reserve a region of MAX_STATS_SIZE so we can grow the allocation buffer.
// Number of thread slots when ThreadStatsHeader == 64bytes and ThreadStats == 40bytes:
// 1 page: 99 slots
// 1 MB: 26211 slots
// 128 MB: 3355440 slots
Base = ::mmap(nullptr, MAX_STATS_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
if (Base == MAP_FAILED) {
LogMan::Msg::EFmt("[StatAlloc] mmap base failed");
Expand All @@ -61,9 +65,10 @@ void ThreadManager::StatAlloc::Initialize() {
close(fd);
}

uint64_t ThreadManager::StatAlloc::AllocateMoreSlots(uint64_t NewSize) {
uint32_t ThreadManager::StatAlloc::FrontendAllocateSlots(uint32_t NewSize) {
if (CurrentSize == MAX_STATS_SIZE) {
// Nope.
// Allocator has reached maximum slots. We can't allocate anymore.
// New threads won't get stats.
return CurrentSize;
}
NewSize = std::max(MAX_STATS_SIZE, NewSize);
Expand All @@ -86,14 +91,6 @@ uint64_t ThreadManager::StatAlloc::AllocateMoreSlots(uint64_t NewSize) {
LogMan::Msg::EFmt("[StatAlloc] allocate more mmap shm failed");
goto err;
}

// TODO: Just a sanity check.
const char* SharedTest = (const char*)Base;
for (size_t i = CurrentSize; i < NewSize; ++i) {
if (SharedTest[i] != 0) {
LogMan::Msg::EFmt("truncate and map shared resulted in not zero'd memory!");
}
}
}

err:
Expand All @@ -103,7 +100,7 @@ uint64_t ThreadManager::StatAlloc::AllocateMoreSlots(uint64_t NewSize) {

FEXCore::Profiler::ThreadStats* ThreadManager::StatAlloc::AllocateSlot(uint32_t TID) {
std::scoped_lock lk(StatMutex);
return AllocateBaseSlot(TID);
return StatAllocBase::AllocateSlot(TID);
}

void ThreadManager::StatAlloc::DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) {
Expand All @@ -112,7 +109,7 @@ void ThreadManager::StatAlloc::DeallocateSlot(FEXCore::Profiler::ThreadStats* Al
}

std::scoped_lock lk(StatMutex);
DeallocateBaseSlot(AllocatedSlot);
StatAllocBase::DeallocateSlot(AllocatedSlot);
}

void ThreadManager::StatAlloc::CleanupForExit() {
Expand All @@ -138,8 +135,8 @@ void ThreadManager::StatAlloc::UnlockAfterFork(FEXCore::Core::InternalThreadStat

StatMutex.StealAndDropActiveLocks();

// shm_memory tied to this process is now not owned by this process.
// Replace the shm region! Otherwise this process will keep reporting time in the original parent thread's stats region!
// shm_memory ownership is retained by the parent process, so the child must replace it with its own one.
// Otherwise this process will keep reporting in the original parent thread's stats region.
munmap(Base, MAX_STATS_SIZE);
Base = nullptr;
CurrentSize = 0;
Expand Down Expand Up @@ -380,7 +377,7 @@ void ThreadManager::UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThre
// This function is called after fork
// We need to cleanup some of the thread data that is dead
for (auto& DeadThread : Threads) {
// This is not owned by the child after fork.
// The fork parent retains ownership of ThreadStats
DeadThread->Thread->ThreadStats = nullptr;

if (DeadThread->Thread == LiveThread) {
Expand Down
2 changes: 1 addition & 1 deletion Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class ThreadManager final {
private:
void Initialize();

uint64_t AllocateMoreSlots(uint64_t NewSize) override;
uint32_t FrontendAllocateSlots(uint32_t NewSize) override;
FEX_CONFIG_OPT(ProfileStats, PROFILESTATS);
FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);

Expand Down
2 changes: 1 addition & 1 deletion Source/Windows/Common/Profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ __attribute__((naked)) uint64_t linux_getpid() {
: "r0", "r8");
}

uint64_t StatAlloc::AllocateMoreSlots(uint64_t NewSize) {
uint32_t StatAlloc::FrontendAllocateSlots(uint32_t NewSize) {
LogMan::Msg::DFmt("Ran out of slots. Can't allocate more");
return CurrentSize;
}
Expand Down
6 changes: 3 additions & 3 deletions Source/Windows/Common/Profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,19 @@ class StatAlloc final : public FEX::Profiler::StatAllocBase {
virtual ~StatAlloc();

FEXCore::Profiler::ThreadStats* AllocateSlot(uint32_t TID) {
return AllocateBaseSlot(TID);
return StatAllocBase::AllocateSlot(TID);
}

void DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) {
if (!AllocatedSlot) {
return;
}

DeallocateBaseSlot(AllocatedSlot);
StatAllocBase::DeallocateSlot(AllocatedSlot);
}

private:
uint64_t AllocateMoreSlots(uint64_t NewSize) override;
uint32_t FrontendAllocateSlots(uint32_t NewSize) override;
};

} // namespace FEX::Windows

0 comments on commit 38dbfe2

Please sign in to comment.