Skip to content

Commit

Permalink
Add support for ARM counters
Browse files Browse the repository at this point in the history
Adding support for ARM counters via a third-party lib.
The main target platform is Android.
  • Loading branch information
tabikati authored and baldurk committed May 20, 2020
1 parent 6a2415f commit f1bd727
Show file tree
Hide file tree
Showing 57 changed files with 10,086 additions and 1 deletion.
6 changes: 6 additions & 0 deletions qrenderdoc/Windows/Dialogs/PerformanceCounterSelection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ enum class CounterFamily
Intel,
NVIDIA,
VulkanExtended,
ARM,
};

CounterFamily GetCounterFamily(GPUCounter counter)
Expand All @@ -71,6 +72,10 @@ CounterFamily GetCounterFamily(GPUCounter counter)
{
return CounterFamily::VulkanExtended;
}
else if(IsARMCounter(counter))
{
return CounterFamily::ARM;
}

return CounterFamily::Generic;
}
Expand All @@ -84,6 +89,7 @@ QString ToString(CounterFamily family)
case CounterFamily::Intel: return lit("Intel");
case CounterFamily::NVIDIA: return lit("NVIDIA");
case CounterFamily::VulkanExtended: return lit("Vulkan Extended");
case CounterFamily::ARM: return lit("ARM");
case CounterFamily::Unknown: return lit("Unknown");
}

Expand Down
4 changes: 4 additions & 0 deletions qrenderdoc/Windows/PerformanceCounterViewer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ QTableWidgetItem *PerformanceCounterViewer::MakeCounterResultItem(const CounterR

case CounterUnit::Absolute:
case CounterUnit::Ratio: break;

case CounterUnit::Hertz: returnValue += lit(" Hz"); break;
case CounterUnit::Volt: returnValue += lit(" V"); break;
case CounterUnit::Celsius: returnValue += lit(" °C"); break;
}

return new CustomSortedTableItem(returnValue, SortValue(result, description));
Expand Down
11 changes: 11 additions & 0 deletions renderdoc.sln
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NV", "renderdoc\driver\ihv\
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Intel", "renderdoc\driver\ihv\intel\Intel.vcxproj", "{7FCB5FC5-1DBD-4DA6-83A0-6BA4E945BDA5}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ARM", "renderdoc\driver\ihv\arm\ARM.vcxproj", "{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Development|x64 = Development|x64
Expand Down Expand Up @@ -283,6 +285,14 @@ Global
{7FCB5FC5-1DBD-4DA6-83A0-6BA4E945BDA5}.Release|x64.Build.0 = Release|x64
{7FCB5FC5-1DBD-4DA6-83A0-6BA4E945BDA5}.Release|x86.ActiveCfg = Release|Win32
{7FCB5FC5-1DBD-4DA6-83A0-6BA4E945BDA5}.Release|x86.Build.0 = Release|Win32
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Development|x64.ActiveCfg = Development|x64
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Development|x64.Build.0 = Development|x64
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Development|x86.ActiveCfg = Development|Win32
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Development|x86.Build.0 = Development|Win32
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Release|x64.ActiveCfg = Release|x64
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Release|x64.Build.0 = Release|x64
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Release|x86.ActiveCfg = Release|Win32
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -318,5 +328,6 @@ Global
{37955C79-D91D-423F-8C6C-8F5BCF4F28D4} = {B5A783D9-AEB9-420D-8E77-D4D930F8D88C}
{40349AD9-5558-4DF4-84E2-11934DE90A11} = {4DA2F3E3-9A65-45DD-A69B-82C7757D4904}
{7FCB5FC5-1DBD-4DA6-83A0-6BA4E945BDA5} = {4DA2F3E3-9A65-45DD-A69B-82C7757D4904}
{F9CCE6CA-0CA3-4A22-9C7B-881369955E62} = {4DA2F3E3-9A65-45DD-A69B-82C7757D4904}
EndGlobalSection
EndGlobal
6 changes: 6 additions & 0 deletions renderdoc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,12 @@ if(ENABLE_GL OR ENABLE_GLES)
list(APPEND renderdoc_objects $<TARGET_OBJECTS:rdoc_intel>)
endif()

# pull in the ARM folder for perf query
if(ENABLE_GL OR ENABLE_GLES)
add_subdirectory(driver/ihv/arm)
list(APPEND renderdoc_objects $<TARGET_OBJECTS:rdoc_arm>)
endif()

add_library(rdoc OBJECT ${sources})
target_compile_definitions(rdoc ${RDOC_DEFINITIONS})
target_include_directories(rdoc ${RDOC_INCLUDES})
Expand Down
11 changes: 11 additions & 0 deletions renderdoc/android/android.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -558,11 +558,22 @@ struct AndroidRemoteServer : public RemoteServer
{
ResetAndroidSettings();

// enable profiling to measure hardware counters
Android::adbExecCommand(m_deviceID, "shell setprop security.perf_harden 0");

LazilyStartLogcatThread();

return RemoteServer::OpenCapture(proxyid, filename, opts, progress);
}

virtual void CloseCapture(IReplayController *rend) override
{
// disable profiling
Android::adbExecCommand(m_deviceID, "shell setprop security.perf_harden 1");

RemoteServer::CloseCapture(rend);
}

virtual rdcstr GetHomeFolder() override { return ""; }
virtual rdcarray<PathEntry> ListFolder(const char *path) override
{
Expand Down
19 changes: 18 additions & 1 deletion renderdoc/api/replay/replay_enums.h
Original file line number Diff line number Diff line change
Expand Up @@ -3229,7 +3229,10 @@ enum class GPUCounter : uint32_t
FirstVulkanExtended = 4000000,
LastNvidia = FirstVulkanExtended - 1,

LastVulkanExtended = 5000000,
FirstARM = 5000000,
LastVulkanExtended = FirstARM - 1,

LastARM = 6000000,
};

ITERABLE_OPERATORS(GPUCounter);
Expand Down Expand Up @@ -3290,6 +3293,17 @@ inline constexpr bool IsVulkanExtendedCounter(GPUCounter c)
return c >= GPUCounter::FirstVulkanExtended && c <= GPUCounter::LastVulkanExtended;
}

DOCUMENT(R"(Check whether or not this is an ARM private counter.
:param GPUCounter c: The counter.
:return: ``True`` if it is an ARM private counter, ``False`` if it's not.
:rtype: ``bool``
)");
inline constexpr bool IsARMCounter(GPUCounter c)
{
return c >= GPUCounter::FirstARM && c <= GPUCounter::LastARM;
}

DOCUMENT(R"(The unit that GPU counter data is returned in.
.. data:: Absolute
Expand Down Expand Up @@ -3324,6 +3338,9 @@ enum class CounterUnit : uint32_t
Ratio,
Bytes,
Cycles,
Hertz,
Volt,
Celsius
};

DECLARE_REFLECTION_ENUM(CounterUnit);
Expand Down
99 changes: 99 additions & 0 deletions renderdoc/driver/gl/gl_counters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <algorithm>
#include <iterator>
#include "driver/ihv/amd/amd_counters.h"
#include "driver/ihv/arm/arm_counters.h"
#include "driver/ihv/intel/intel_gl_counters.h"
#include "gl_driver.h"
#include "gl_replay.h"
Expand Down Expand Up @@ -65,6 +66,11 @@ rdcarray<GPUCounter> GLReplay::EnumerateCounters()
ret.append(m_pIntelCounters->GetPublicCounterIds());
}

if(m_pARMCounters)
{
ret.append(m_pARMCounters->GetPublicCounterIds());
}

return ret;
}

Expand Down Expand Up @@ -96,6 +102,11 @@ CounterDescription GLReplay::DescribeCounter(GPUCounter counterID)
}
}

if(IsARMCounter(counterID) && m_pARMCounters)
{
return m_pARMCounters->GetCounterDescription(counterID);
}

// FFBA5548-FBF8-405D-BA18-F0329DA370A0
desc.uuid.words[0] = 0xFFBA5548;
desc.uuid.words[1] = 0xFBF8405D;
Expand Down Expand Up @@ -461,6 +472,84 @@ rdcarray<CounterResult> GLReplay::FetchCountersIntel(const rdcarray<GPUCounter>
return ret;
}

void GLReplay::FillTimersARM(uint32_t *eventStartID, uint32_t *sampleIndex,
rdcarray<uint32_t> *eventIDs, const DrawcallDescription &drawnode)
{
if(drawnode.children.empty())
return;

for(size_t i = 0; i < drawnode.children.size(); i++)
{
const DrawcallDescription &d = drawnode.children[i];

FillTimersARM(eventStartID, sampleIndex, eventIDs, drawnode.children[i]);

if(d.events.empty())
continue;

eventIDs->push_back(d.eventId);

m_pDriver->ReplayLog(*eventStartID, d.eventId, eReplay_WithoutDraw);

m_pARMCounters->BeginSample(d.eventId);

m_pDriver->ReplayLog(*eventStartID, d.eventId, eReplay_OnlyDraw);

// wait for the GPU to process all commands
GLsync sync = GL.glFenceSync(eGL_SYNC_GPU_COMMANDS_COMPLETE, 0);
GL.glClientWaitSync(sync, eGL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);

m_pARMCounters->EndSample();

GL.glDeleteSync(sync);

*eventStartID = d.eventId + 1;
++*sampleIndex;
}
}

rdcarray<CounterResult> GLReplay::FetchCountersARM(const rdcarray<GPUCounter> &counters)
{
m_pARMCounters->DisableAllCounters();

// enable counters it needs
for(size_t i = 0; i < counters.size(); i++)
{
// This function is only called internally, and violating this assertion means our
// caller has invoked this method incorrectly
RDCASSERT(IsARMCounter(counters[i]));
m_pARMCounters->EnableCounter(counters[i]);
}

uint32_t passCount = m_pARMCounters->GetPassCount();

uint32_t sampleIndex = 0;

rdcarray<uint32_t> eventIDs;

m_pDriver->ReplayMarkers(false);

for(uint32_t p = 0; p < passCount; p++)
{
m_pARMCounters->BeginPass(p);

uint32_t eventStartID = 0;

sampleIndex = 0;

eventIDs.clear();

FillTimersARM(&eventStartID, &sampleIndex, &eventIDs, m_pDriver->GetRootDraw());

m_pARMCounters->EndPass();
}
m_pDriver->ReplayMarkers(true);

rdcarray<CounterResult> ret = m_pARMCounters->GetCounterData(eventIDs, counters);

return ret;
}

rdcarray<CounterResult> GLReplay::FetchCounters(const rdcarray<GPUCounter> &allCounters)
{
rdcarray<CounterResult> ret;
Expand Down Expand Up @@ -503,6 +592,16 @@ rdcarray<CounterResult> GLReplay::FetchCounters(const rdcarray<GPUCounter> &allC
}
}

if(m_pARMCounters)
{
rdcarray<GPUCounter> armCounters;
std::copy_if(allCounters.begin(), allCounters.end(), std::back_inserter(armCounters),
[](const GPUCounter &c) { return IsARMCounter(c); });

if(!armCounters.empty())
ret = FetchCountersARM(armCounters);
}

if(counters.empty())
{
return ret;
Expand Down
18 changes: 18 additions & 0 deletions renderdoc/driver/gl/gl_replay.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "gl_replay.h"
#include "core/settings.h"
#include "driver/ihv/amd/amd_counters.h"
#include "driver/ihv/arm/arm_counters.h"
#include "driver/ihv/intel/intel_gl_counters.h"
#include "maths/matrix.h"
#include "serialise/rdcfile.h"
Expand Down Expand Up @@ -67,6 +68,7 @@ void GLReplay::Shutdown()
{
SAFE_DELETE(m_pAMDCounters);
SAFE_DELETE(m_pIntelCounters);
SAFE_DELETE(m_pARMCounters);

DeleteDebugData();

Expand Down Expand Up @@ -235,6 +237,7 @@ void GLReplay::SetReplayData(GLWindowingData data)
{
AMDCounters *countersAMD = NULL;
IntelGlCounters *countersIntel = NULL;
ARMCounters *countersARM = NULL;

bool isMesa = false;

Expand Down Expand Up @@ -283,6 +286,11 @@ void GLReplay::SetReplayData(GLWindowingData data)
RDCLOG("AMD GPU detected - trying to initialise AMD counters");
countersAMD = new AMDCounters();
}
else if(m_DriverInfo.vendor == GPUVendor::ARM)
{
RDCLOG("ARM Mali GPU detected - trying to initialise ARM counters");
countersARM = new ARMCounters();
}
else
{
RDCLOG("%s GPU detected - no counters available", ToStr(m_DriverInfo.vendor).c_str());
Expand All @@ -308,6 +316,16 @@ void GLReplay::SetReplayData(GLWindowingData data)
delete countersIntel;
m_pIntelCounters = NULL;
}

if(countersARM && countersARM->Init())
{
m_pARMCounters = countersARM;
}
else
{
delete countersARM;
m_pARMCounters = NULL;
}
}
}

Expand Down
9 changes: 9 additions & 0 deletions renderdoc/driver/gl/gl_replay.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "gl_common.h"

class AMDCounters;
class ARMCounters;
class IntelGlCounters;
class WrappedOpenGL;
struct GLCounterContext;
Expand Down Expand Up @@ -465,4 +466,12 @@ class GLReplay : public IReplayDriver
const DrawcallDescription &drawnode);

rdcarray<CounterResult> FetchCountersIntel(const rdcarray<GPUCounter> &counters);

// ARM counter instance
ARMCounters *m_pARMCounters = NULL;

void FillTimersARM(uint32_t *eventStartID, uint32_t *sampleIndex, rdcarray<uint32_t> *eventIDs,
const DrawcallDescription &drawnode);

rdcarray<CounterResult> FetchCountersARM(const rdcarray<GPUCounter> &counters);
};
Loading

0 comments on commit f1bd727

Please sign in to comment.