From ac074e9aa7a98e6a4413bb22637c89aa888baa6e Mon Sep 17 00:00:00 2001 From: dzhdan Date: Mon, 23 Dec 2024 17:25:57 +0800 Subject: [PATCH] v1.156: HIGHLIGHTS: Better annotations for profiling tools: - "(Begin/End)Annotation" start/end a named range - "Annotation" emit a named simultaneous event - "nriBeginAnnotation", "nriEndAnnotation" and "nriAnnotation" annotate CPU timeline - "CmdBeginAnnotation", "CmdEndAnnotation" and "CmdAnnotation" annotate GPU timeline (but most of the tools show them on the CPU timeline too) - hooked up "PIX Event Runtime" BREAKING CHANGE: - "nriEvent" renamed to "nriAnnotation" DETAILS: - Core: follow up improvements for v1.155 - Core: added "CmdAnnotation" to emit a named simultaneous event on the GPU timeline - VK: force color.a = 1 for annotations to match PIX behavior - D3D11: clarify that colorization in annotations is not supported - D3D12: try to load dynamically "WinPixEventRuntime.dll" as the only way to colorize annotations --- Include/NRI.h | 16 +++++++++------- Resources/Version.h | 2 +- Source/Creation/Creation.cpp | 2 +- Source/D3D11/CommandBufferD3D11.h | 1 + Source/D3D11/CommandBufferD3D11.hpp | 23 +++++++++++++++-------- Source/D3D11/CommandBufferEmuD3D11.h | 1 + Source/D3D11/CommandBufferEmuD3D11.hpp | 20 ++++++++++++++++++++ Source/D3D11/ImplD3D11.cpp | 10 ++++++++++ Source/D3D11/SharedD3D11.h | 2 ++ Source/D3D12/CommandBufferD3D12.h | 1 + Source/D3D12/CommandBufferD3D12.hpp | 17 +++++++++++++++-- Source/D3D12/DeviceD3D12.h | 3 +-- Source/D3D12/DeviceD3D12.hpp | 3 ++- Source/D3D12/ImplD3D12.cpp | 5 +++++ Source/NONE/ImplNONE.cpp | 4 ++++ Source/Shared/D3DExt.h | 23 +++++++++++++++++++---- Source/Shared/D3DExt.hpp | 14 ++++++++++++++ Source/VK/CommandBufferVK.h | 1 + Source/VK/CommandBufferVK.hpp | 15 ++++++++++++++- Source/VK/DeviceVK.hpp | 1 + Source/VK/DispatchTable.h | 1 + Source/VK/ImplVK.cpp | 5 +++++ Source/Validation/CommandBufferVal.h | 1 + Source/Validation/CommandBufferVal.hpp | 6 ++++++ Source/Validation/ImplVal.cpp | 5 +++++ 25 files changed, 155 insertions(+), 27 deletions(-) diff --git a/Include/NRI.h b/Include/NRI.h index 0da41a17..933f6887 100644 --- a/Include/NRI.h +++ b/Include/NRI.h @@ -24,8 +24,8 @@ Non-goals: #pragma once #define NRI_VERSION_MAJOR 1 -#define NRI_VERSION_MINOR 155 -#define NRI_VERSION_DATE "20 December 2024" +#define NRI_VERSION_MINOR 156 +#define NRI_VERSION_DATE "23 December 2024" #include "NRIDescs.h" @@ -35,10 +35,9 @@ NriNamespaceBegin NRI_API Nri(Result) NRI_CALL nriGetInterface(const NriRef(Device) device, const char* interfaceName, size_t interfaceSize, void* interfacePtr); // Annotations for profiling tools: host (via NVTX) -// BGRA color can be constructed via "NriBgra" macro or "BGRA_UNUSED" constant NRI_API void NRI_CALL nriBeginAnnotation(const char* name, uint32_t bgra); // start a named range NRI_API void NRI_CALL nriEndAnnotation(); // end the last opened range -NRI_API void NRI_CALL nriEvent(const char* name, uint32_t bgra); // emit a simultaneous event +NRI_API void NRI_CALL nriAnnotation(const char* name, uint32_t bgra); // emit a named simultaneous event NRI_API void NRI_CALL nriSetThreadName(const char* name); // assign a name to the current thread NriStruct(CoreInterface) { @@ -173,9 +172,12 @@ NriStruct(CoreInterface) { void (NRI_CALL *CmdEndQuery) (NriRef(CommandBuffer) commandBuffer, NriRef(QueryPool) queryPool, uint32_t offset); void (NRI_CALL *CmdCopyQueries) (NriRef(CommandBuffer) commandBuffer, const NriRef(QueryPool) queryPool, uint32_t offset, uint32_t num, NriRef(Buffer) dstBuffer, uint64_t dstOffset); - // Annotations for profiling tools: device - void (NRI_CALL *CmdBeginAnnotation) (NriRef(CommandBuffer) commandBuffer, const char* name, uint32_t bgra); - void (NRI_CALL *CmdEndAnnotation) (NriRef(CommandBuffer) commandBuffer); + // Annotations for profiling tools: device (most of tools show them on the CPU timeline too) + // D3D11: no colors + // D3D12: no colors if "WinPixEventRuntime.dll" is not nearby + void (NRI_CALL *CmdBeginAnnotation) (NriRef(CommandBuffer) commandBuffer, const char* name, uint32_t bgra); // start a named range + void (NRI_CALL *CmdEndAnnotation) (NriRef(CommandBuffer) commandBuffer); // end the last opened range + void (NRI_CALL *CmdAnnotation) (NriRef(CommandBuffer) commandBuffer, const char* name, uint32_t bgra); // emit a named simultaneous event // } } Nri(Result) (NRI_CALL *EndCommandBuffer) (NriRef(CommandBuffer) commandBuffer); diff --git a/Resources/Version.h b/Resources/Version.h index 2d6b74b4..4268ba1c 100644 --- a/Resources/Version.h +++ b/Resources/Version.h @@ -4,7 +4,7 @@ #define STR(x) STR_HELPER(x) #define VERSION_MAJOR 1 -#define VERSION_MINOR 155 +#define VERSION_MINOR 156 #define VERSION_BUILD 0 #define VERSION_REVISION 0 diff --git a/Source/Creation/Creation.cpp b/Source/Creation/Creation.cpp index 0622e84f..1d81fbb7 100644 --- a/Source/Creation/Creation.cpp +++ b/Source/Creation/Creation.cpp @@ -125,7 +125,7 @@ NRI_API void NRI_CALL nriEndAnnotation() { #endif } -NRI_API void NRI_CALL nriEvent(const char* name, uint32_t bgra) { +NRI_API void NRI_CALL nriAnnotation(const char* name, uint32_t bgra) { MaybeUnused(name, bgra); #if NRI_USE_NVTX diff --git a/Source/D3D11/CommandBufferD3D11.h b/Source/D3D11/CommandBufferD3D11.h index f3096340..f117d735 100644 --- a/Source/D3D11/CommandBufferD3D11.h +++ b/Source/D3D11/CommandBufferD3D11.h @@ -85,6 +85,7 @@ struct CommandBufferD3D11 final : public CommandBufferHelper { void CopyQueries(const QueryPool& queryPool, uint32_t offset, uint32_t num, Buffer& dstBuffer, uint64_t dstOffset); void BeginAnnotation(const char* name, uint32_t bgra); void EndAnnotation(); + void Annotation(const char* name, uint32_t bgra); private: DeviceD3D11& m_Device; diff --git a/Source/D3D11/CommandBufferD3D11.hpp b/Source/D3D11/CommandBufferD3D11.hpp index 0b32c494..30931260 100644 --- a/Source/D3D11/CommandBufferD3D11.hpp +++ b/Source/D3D11/CommandBufferD3D11.hpp @@ -573,21 +573,28 @@ NRI_INLINE void CommandBufferD3D11::CopyQueries(const QueryPool& queryPool, uint } NRI_INLINE void CommandBufferD3D11::BeginAnnotation(const char* name, uint32_t bgra) { - /* - // TODO: unfortunately, just a few tools support "BeginEventInt" +#if USE_ANNOTATION_INT if (m_Version >= 2) PIXBeginEvent(m_DeferredContext, bgra, name); else - */ - PIXBeginEvent(m_Annotation, bgra, name); +#endif + PIXBeginEvent(m_Annotation, bgra, name); } NRI_INLINE void CommandBufferD3D11::EndAnnotation() { - /* - // TODO: unfortunately, just a few tools support "BeginEventInt" +#if USE_ANNOTATION_INT if (m_Version >= 2) PIXEndEvent(m_DeferredContext); else - */ - PIXEndEvent(m_Annotation); +#endif + PIXEndEvent(m_Annotation); +} + +NRI_INLINE void CommandBufferD3D11::Annotation(const char* name, uint32_t bgra) { +#if USE_ANNOTATION_INT + if (m_Version >= 2) + PIXSetMarker(m_DeferredContext, bgra, name); + else +#endif + PIXSetMarker(m_Annotation, bgra, name); } diff --git a/Source/D3D11/CommandBufferEmuD3D11.h b/Source/D3D11/CommandBufferEmuD3D11.h index 2ba5ddd0..f575abd6 100644 --- a/Source/D3D11/CommandBufferEmuD3D11.h +++ b/Source/D3D11/CommandBufferEmuD3D11.h @@ -76,6 +76,7 @@ struct CommandBufferEmuD3D11 final : public CommandBufferHelper { void CopyQueries(const QueryPool& queryPool, uint32_t offset, uint32_t num, Buffer& dstBuffer, uint64_t dstOffset); void BeginAnnotation(const char* name, uint32_t bgra); void EndAnnotation(); + void Annotation(const char* name, uint32_t bgra); private: DeviceD3D11& m_Device; diff --git a/Source/D3D11/CommandBufferEmuD3D11.hpp b/Source/D3D11/CommandBufferEmuD3D11.hpp index d7727332..31e3f5b6 100644 --- a/Source/D3D11/CommandBufferEmuD3D11.hpp +++ b/Source/D3D11/CommandBufferEmuD3D11.hpp @@ -38,6 +38,7 @@ enum OpCode : uint32_t { COPY_QUERIES, BEGIN_ANNOTATION, END_ANNOTATION, + ANNOTATION, UNKNOWN }; @@ -469,6 +470,16 @@ void CommandBufferEmuD3D11::Submit() { case END_ANNOTATION: { commandBuffer.EndAnnotation(); } break; + case ANNOTATION: { + uint32_t len; + const char* name; + Read(m_PushBuffer, i, name, len); + + uint32_t bgra; + Read(m_PushBuffer, i, bgra); + + commandBuffer.Annotation(name, bgra); + } break; } } } @@ -731,3 +742,12 @@ NRI_INLINE void CommandBufferEmuD3D11::BeginAnnotation(const char* name, uint32_ NRI_INLINE void CommandBufferEmuD3D11::EndAnnotation() { Push(m_PushBuffer, END_ANNOTATION); } + +NRI_INLINE void CommandBufferEmuD3D11::Annotation(const char* name, uint32_t bgra) { + uint32_t len = (uint32_t)std::strlen(name) + 1; + + Push(m_PushBuffer, ANNOTATION); + Push(m_PushBuffer, name, len); + Push(m_PushBuffer, name, bgra); +} + diff --git a/Source/D3D11/ImplD3D11.cpp b/Source/D3D11/ImplD3D11.cpp index 77a8c846..6f482354 100644 --- a/Source/D3D11/ImplD3D11.cpp +++ b/Source/D3D11/ImplD3D11.cpp @@ -423,6 +423,10 @@ static void NRI_CALL CmdEndAnnotation(CommandBuffer& commandBuffer) { ((CommandBufferD3D11&)commandBuffer).EndAnnotation(); } +static void NRI_CALL CmdAnnotation(CommandBuffer& commandBuffer, const char* name, uint32_t bgra) { + ((CommandBufferD3D11&)commandBuffer).Annotation(name, bgra); +} + static void NRI_CALL CmdClearStorageBuffer(CommandBuffer& commandBuffer, const ClearStorageBufferDesc& clearDesc) { ((CommandBufferD3D11&)commandBuffer).ClearStorageBuffer(clearDesc); } @@ -600,6 +604,10 @@ static void NRI_CALL EmuCmdEndAnnotation(CommandBuffer& commandBuffer) { ((CommandBufferEmuD3D11&)commandBuffer).EndAnnotation(); } +static void NRI_CALL EmuCmdAnnotation(CommandBuffer& commandBuffer, const char* name, uint32_t bgra) { + ((CommandBufferEmuD3D11&)commandBuffer).Annotation(name, bgra); +} + static void NRI_CALL EmuCmdClearStorageBuffer(CommandBuffer& commandBuffer, const ClearStorageBufferDesc& clearDesc) { ((CommandBufferEmuD3D11&)commandBuffer).ClearStorageBuffer(clearDesc); } @@ -832,6 +840,7 @@ Result DeviceD3D11::FillFunctionTable(CoreInterface& table) const { table.CmdCopyQueries = ::EmuCmdCopyQueries; table.CmdBeginAnnotation = ::EmuCmdBeginAnnotation; table.CmdEndAnnotation = ::EmuCmdEndAnnotation; + table.CmdAnnotation = ::EmuCmdAnnotation; table.EndCommandBuffer = ::EmuEndCommandBuffer; } else { table.GetCommandBufferNativeObject = ::GetCommandBufferNativeObject; @@ -876,6 +885,7 @@ Result DeviceD3D11::FillFunctionTable(CoreInterface& table) const { table.CmdCopyQueries = ::CmdCopyQueries; table.CmdBeginAnnotation = ::CmdBeginAnnotation; table.CmdEndAnnotation = ::CmdEndAnnotation; + table.CmdAnnotation = ::CmdAnnotation; table.EndCommandBuffer = ::EndCommandBuffer; } diff --git a/Source/D3D11/SharedD3D11.h b/Source/D3D11/SharedD3D11.h index 8d366597..c459081a 100644 --- a/Source/D3D11/SharedD3D11.h +++ b/Source/D3D11/SharedD3D11.h @@ -6,6 +6,8 @@ #include "SharedExternal.h" +#define USE_ANNOTATION_INT 0 // unfortunately, just a few tools support "BeginEventInt" and "SetMarkerInt" + struct AGSContext; struct ID3D11DeviceContext4; diff --git a/Source/D3D12/CommandBufferD3D12.h b/Source/D3D12/CommandBufferD3D12.h index 1758a4e4..1979b632 100644 --- a/Source/D3D12/CommandBufferD3D12.h +++ b/Source/D3D12/CommandBufferD3D12.h @@ -95,6 +95,7 @@ struct CommandBufferD3D12 { void CopyQueries(const QueryPool& queryPool, uint32_t offset, uint32_t num, Buffer& buffer, uint64_t alignedBufferOffset); void BeginAnnotation(const char* name, uint32_t bgra); void EndAnnotation(); + void Annotation(const char* name, uint32_t bgra); void BuildTopLevelAccelerationStructure(uint32_t instanceNum, const Buffer& buffer, uint64_t bufferOffset, AccelerationStructureBuildBits flags, AccelerationStructure& dst, Buffer& scratch, uint64_t scratchOffset); void BuildBottomLevelAccelerationStructure(uint32_t geometryObjectNum, const GeometryObject* geometryObjects, AccelerationStructureBuildBits flags, AccelerationStructure& dst, Buffer& scratch, uint64_t scratchOffset); void UpdateTopLevelAccelerationStructure(uint32_t instanceNum, const Buffer& buffer, uint64_t bufferOffset, AccelerationStructureBuildBits flags, AccelerationStructure& dst, const AccelerationStructure& src, Buffer& scratch, uint64_t scratchOffset); diff --git a/Source/D3D12/CommandBufferD3D12.hpp b/Source/D3D12/CommandBufferD3D12.hpp index 3dd9df63..b78c7c07 100644 --- a/Source/D3D12/CommandBufferD3D12.hpp +++ b/Source/D3D12/CommandBufferD3D12.hpp @@ -933,11 +933,24 @@ NRI_INLINE void CommandBufferD3D12::CopyQueries(const QueryPool& queryPool, uint } NRI_INLINE void CommandBufferD3D12::BeginAnnotation(const char* name, uint32_t bgra) { - PIXBeginEvent(m_GraphicsCommandList, bgra, name); + if (m_Device.GetExt()->m_Pix.BeginEventOnCommandList) + m_Device.GetExt()->m_Pix.BeginEventOnCommandList(m_GraphicsCommandList, bgra, name); + else + PIXBeginEvent(m_GraphicsCommandList, bgra, name); } NRI_INLINE void CommandBufferD3D12::EndAnnotation() { - PIXEndEvent(m_GraphicsCommandList); + if (m_Device.GetExt()->m_Pix.EndEventOnCommandList) + m_Device.GetExt()->m_Pix.EndEventOnCommandList(m_GraphicsCommandList); + else + PIXEndEvent(m_GraphicsCommandList); +} + +NRI_INLINE void CommandBufferD3D12::Annotation(const char* name, uint32_t bgra) { + if (m_Device.GetExt()->m_Pix.SetMarkerOnCommandList) + m_Device.GetExt()->m_Pix.SetMarkerOnCommandList(m_GraphicsCommandList, bgra, name); + else + PIXSetMarker(m_GraphicsCommandList, bgra, name); } NRI_INLINE void CommandBufferD3D12::BuildTopLevelAccelerationStructure(uint32_t instanceNum, const Buffer& buffer, uint64_t bufferOffset, AccelerationStructureBuildBits flags, AccelerationStructure& dst, Buffer& scratch, uint64_t scratchOffset) { diff --git a/Source/D3D12/DeviceD3D12.h b/Source/D3D12/DeviceD3D12.h index dfe1a29f..f5b1dd54 100644 --- a/Source/D3D12/DeviceD3D12.h +++ b/Source/D3D12/DeviceD3D12.h @@ -19,7 +19,6 @@ namespace nri { struct CommandQueueD3D12; -constexpr size_t DESCRIPTOR_HEAP_TYPE_NUM = D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES; constexpr uint32_t DESCRIPTORS_BATCH_SIZE = 1024; struct DeviceD3D12 final : public DeviceBase { @@ -146,7 +145,7 @@ struct DeviceD3D12 final : public DeviceBase { DeviceDesc m_Desc = {}; uint8_t m_Version = 0; bool m_IsWrapped = false; - std::array m_FreeDescriptorLocks; + std::array m_FreeDescriptorLocks; Lock m_DescriptorHeapLock; Lock m_QueueLock; }; diff --git a/Source/D3D12/DeviceD3D12.hpp b/Source/D3D12/DeviceD3D12.hpp index 1ba41b22..d799aba5 100644 --- a/Source/D3D12/DeviceD3D12.hpp +++ b/Source/D3D12/DeviceD3D12.hpp @@ -58,7 +58,7 @@ DeviceD3D12::DeviceD3D12(const CallbackInterface& callbacks, StdAllocator(GetStdAllocator())); + m_FreeDescriptors.resize(D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES, Vector(GetStdAllocator())); m_AllocationCallbacks.pPrivateData = &GetStdAllocator(); m_AllocationCallbacks.pAllocate = vmaAllocate; m_AllocationCallbacks.pFree = vmaFree; @@ -118,6 +118,7 @@ Result DeviceD3D12::Create(const DeviceCreationDesc& deviceCreationDesc, const D m_Desc.adapterDesc.vendor = GetVendorFromID(desc.VendorId); // Extensions + m_Ext.InitializePixExt(); if (m_Desc.adapterDesc.vendor == Vendor::NVIDIA) m_Ext.InitializeNVExt(this, deviceCreationD3D12Desc.isNVAPILoaded, deviceCreationD3D12Desc.d3d12Device != nullptr); else if (m_Desc.adapterDesc.vendor == Vendor::AMD) diff --git a/Source/D3D12/ImplD3D12.cpp b/Source/D3D12/ImplD3D12.cpp index c35e4ddd..093dea93 100644 --- a/Source/D3D12/ImplD3D12.cpp +++ b/Source/D3D12/ImplD3D12.cpp @@ -259,6 +259,10 @@ static void NRI_CALL CmdEndAnnotation(CommandBuffer& commandBuffer) { ((CommandBufferD3D12&)commandBuffer).EndAnnotation(); } +static void NRI_CALL CmdAnnotation(CommandBuffer& commandBuffer, const char* name, uint32_t bgra) { + ((CommandBufferD3D12&)commandBuffer).Annotation(name, bgra); +} + static void NRI_CALL CmdClearStorageBuffer(CommandBuffer& commandBuffer, const ClearStorageBufferDesc& clearDesc) { ((CommandBufferD3D12&)commandBuffer).ClearStorageBuffer(clearDesc); } @@ -634,6 +638,7 @@ Result DeviceD3D12::FillFunctionTable(CoreInterface& table) const { table.CmdCopyQueries = ::CmdCopyQueries; table.CmdBeginAnnotation = ::CmdBeginAnnotation; table.CmdEndAnnotation = ::CmdEndAnnotation; + table.CmdAnnotation = ::CmdAnnotation; table.EndCommandBuffer = ::EndCommandBuffer; table.ResetQueries = ::ResetQueries; table.QueueSubmit = ::QueueSubmit; diff --git a/Source/NONE/ImplNONE.cpp b/Source/NONE/ImplNONE.cpp index 971ea086..690d320e 100644 --- a/Source/NONE/ImplNONE.cpp +++ b/Source/NONE/ImplNONE.cpp @@ -518,6 +518,9 @@ static void NRI_CALL CmdBeginAnnotation(CommandBuffer&, const char*, uint32_t) { static void NRI_CALL CmdEndAnnotation(CommandBuffer&) { } +static void NRI_CALL CmdAnnotation(CommandBuffer&, const char*, uint32_t) { +} + static Result NRI_CALL EndCommandBuffer(CommandBuffer&) { return Result::SUCCESS; } @@ -701,6 +704,7 @@ Result DeviceNONE::FillFunctionTable(CoreInterface& table) const { table.CmdCopyQueries = ::CmdCopyQueries; table.CmdBeginAnnotation = ::CmdBeginAnnotation; table.CmdEndAnnotation = ::CmdEndAnnotation; + table.CmdAnnotation = ::CmdAnnotation; table.EndCommandBuffer = ::EndCommandBuffer; table.ResetQueries = ::ResetQueries; table.QueueSubmit = ::QueueSubmit; diff --git a/Source/Shared/D3DExt.h b/Source/Shared/D3DExt.h index b0ce131d..2d26d23c 100644 --- a/Source/Shared/D3DExt.h +++ b/Source/Shared/D3DExt.h @@ -20,6 +20,20 @@ struct AGSFunctionTable { AGS_DRIVEREXTENSIONSDX12_DESTROYDEVICE DestroyDeviceD3D12; }; +# if defined(__d3d11_h__) +typedef void ID3D12GraphicsCommandList; +# endif + +typedef HRESULT(WINAPI* PIX_BEGINEVENTONCOMMANDLIST)(ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString); +typedef HRESULT(WINAPI* PIX_ENDEVENTONCOMMANDLIST)(ID3D12GraphicsCommandList* commandList); +typedef HRESULT(WINAPI* PIX_SETMARKERONCOMMANDLIST)(ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString); + +struct PixFunctionTable { + PIX_BEGINEVENTONCOMMANDLIST BeginEventOnCommandList; + PIX_ENDEVENTONCOMMANDLIST EndEventOnCommandList; + PIX_SETMARKERONCOMMANDLIST SetMarkerOnCommandList; +}; + struct Ext { ~Ext(); @@ -33,6 +47,7 @@ struct Ext { void InitializeNVExt(const nri::DeviceBase* deviceBase, bool isNVAPILoadedInApp, bool isImported); void InitializeAMDExt(const nri::DeviceBase* deviceBase, AGSContext* agsContext, bool isImported); + void InitializePixExt(); // D3D11 # if defined(__d3d11_h__) @@ -40,16 +55,16 @@ struct Ext { void EndUAVOverlap(ID3D11DeviceContext* deviceContext) const; void WaitForDrain(ID3D11DeviceContext* deviceContext, uint32_t flags) const; void SetDepthBounds(ID3D11DeviceContext* deviceContext, float minBound, float maxBound) const; - void DrawIndirect( - ID3D11DeviceContext* deviceContext, ID3D11Buffer* buffer, uint64_t offset, uint32_t drawNum, uint32_t stride, ID3D11Buffer* countBuffer, uint32_t countBufferOffset) const; - void DrawIndexedIndirect( - ID3D11DeviceContext* deviceContext, ID3D11Buffer* buffer, uint64_t offset, uint32_t drawNum, uint32_t stride, ID3D11Buffer* countBuffer, uint32_t countBufferOffset) const; + void DrawIndirect(ID3D11DeviceContext* deviceContext, ID3D11Buffer* buffer, uint64_t offset, uint32_t drawNum, uint32_t stride, ID3D11Buffer* countBuffer, uint32_t countBufferOffset) const; + void DrawIndexedIndirect(ID3D11DeviceContext* deviceContext, ID3D11Buffer* buffer, uint64_t offset, uint32_t drawNum, uint32_t stride, ID3D11Buffer* countBuffer, uint32_t countBufferOffset) const; # endif const nri::DeviceBase* m_DeviceBase = nullptr; AGSContext* m_AGSContext = nullptr; AGSFunctionTable m_AGS = {}; Library* m_AGSLibrary = nullptr; + PixFunctionTable m_Pix = {}; + Library* m_PixLibrary = nullptr; bool m_IsNvAPIAvailable = false; bool m_IsImported = false; }; diff --git a/Source/Shared/D3DExt.hpp b/Source/Shared/D3DExt.hpp index ca979825..8c9a3289 100644 --- a/Source/Shared/D3DExt.hpp +++ b/Source/Shared/D3DExt.hpp @@ -14,6 +14,11 @@ Ext::~Ext() { UnloadSharedLibrary(*m_AGSLibrary); m_AGSLibrary = nullptr; } + + if (m_PixLibrary) { + UnloadSharedLibrary(*m_PixLibrary); + m_PixLibrary = nullptr; + } } void Ext::InitializeNVExt(const nri::DeviceBase* deviceBase, bool isNVAPILoadedInApp, bool isImported) { @@ -92,6 +97,15 @@ void Ext::InitializeAMDExt(const nri::DeviceBase* deviceBase, AGSContext* agsCon m_AGSContext = agsContext; } +void Ext::InitializePixExt() { + m_PixLibrary = LoadSharedLibrary("WinPixEventRuntime.dll"); + if (m_PixLibrary) { + m_Pix.BeginEventOnCommandList = (PIX_BEGINEVENTONCOMMANDLIST)GetSharedLibraryFunction(*m_PixLibrary, "PIXBeginEventOnCommandList"); + m_Pix.EndEventOnCommandList = (PIX_ENDEVENTONCOMMANDLIST)GetSharedLibraryFunction(*m_PixLibrary, "PIXEndEventOnCommandList"); + m_Pix.SetMarkerOnCommandList = (PIX_SETMARKERONCOMMANDLIST)GetSharedLibraryFunction(*m_PixLibrary, "PIXSetMarkerOnCommandList"); + } +} + // D3D11 # if defined(__d3d11_h__) diff --git a/Source/VK/CommandBufferVK.h b/Source/VK/CommandBufferVK.h index 26a32737..14e4360b 100644 --- a/Source/VK/CommandBufferVK.h +++ b/Source/VK/CommandBufferVK.h @@ -65,6 +65,7 @@ struct CommandBufferVK { void EndQuery(QueryPool& queryPool, uint32_t offset); void BeginAnnotation(const char* name, uint32_t bgra); void EndAnnotation(); + void Annotation(const char* name, uint32_t bgra); void ClearStorageBuffer(const ClearStorageBufferDesc& clearDesc); void ClearStorageTexture(const ClearStorageTextureDesc& clearDesc); void CopyBuffer(Buffer& dstBuffer, uint64_t dstOffset, const Buffer& srcBuffer, uint64_t srcOffset, uint64_t size); diff --git a/Source/VK/CommandBufferVK.hpp b/Source/VK/CommandBufferVK.hpp index 6608cc84..142e4bfe 100644 --- a/Source/VK/CommandBufferVK.hpp +++ b/Source/VK/CommandBufferVK.hpp @@ -838,7 +838,7 @@ NRI_INLINE void CommandBufferVK::BeginAnnotation(const char* name, uint32_t bgra info.color[0] = ((bgra >> 16) & 0xFF) / 255.0f; info.color[1] = ((bgra >> 8) & 0xFF) / 255.0f; info.color[2] = ((bgra >> 0) & 0xFF) / 255.0f; - info.color[3] = ((bgra >> 24) & 0xFF) / 255.0f; + info.color[3] = 1.0f; // PIX sets alpha to 1 const auto& vk = m_Device.GetDispatchTable(); if (vk.CmdBeginDebugUtilsLabelEXT) @@ -851,6 +851,19 @@ NRI_INLINE void CommandBufferVK::EndAnnotation() { vk.CmdEndDebugUtilsLabelEXT(m_Handle); } +NRI_INLINE void CommandBufferVK::Annotation(const char* name, uint32_t bgra) { + VkDebugUtilsLabelEXT info = {VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT}; + info.pLabelName = name; + info.color[0] = ((bgra >> 16) & 0xFF) / 255.0f; + info.color[1] = ((bgra >> 8) & 0xFF) / 255.0f; + info.color[2] = ((bgra >> 0) & 0xFF) / 255.0f; + info.color[3] = 1.0f; // PIX sets alpha to 1 + + const auto& vk = m_Device.GetDispatchTable(); + if (vk.CmdInsertDebugUtilsLabelEXT) + vk.CmdInsertDebugUtilsLabelEXT(m_Handle, &info); +} + NRI_INLINE void CommandBufferVK::BuildTopLevelAccelerationStructure(uint32_t instanceNum, const Buffer& buffer, uint64_t bufferOffset, AccelerationStructureBuildBits flags, AccelerationStructure& dst, Buffer& scratch, uint64_t scratchOffset) { static_assert(sizeof(VkAccelerationStructureInstanceKHR) == sizeof(GeometryObjectInstance), "Mismatched sizeof"); diff --git a/Source/VK/DeviceVK.hpp b/Source/VK/DeviceVK.hpp index 416d8974..a50aafa5 100644 --- a/Source/VK/DeviceVK.hpp +++ b/Source/VK/DeviceVK.hpp @@ -1500,6 +1500,7 @@ Result DeviceVK::ResolveInstanceDispatchTable(const Vector& desired GET_INSTANCE_PROC(SetDebugUtilsObjectNameEXT); GET_INSTANCE_PROC(CmdBeginDebugUtilsLabelEXT); GET_INSTANCE_PROC(CmdEndDebugUtilsLabelEXT); + GET_INSTANCE_PROC(CmdInsertDebugUtilsLabelEXT); } if (IsExtensionSupported(VK_KHR_SURFACE_EXTENSION_NAME, desiredInstanceExts)) { diff --git a/Source/VK/DispatchTable.h b/Source/VK/DispatchTable.h index 658781c0..76b7273c 100644 --- a/Source/VK/DispatchTable.h +++ b/Source/VK/DispatchTable.h @@ -51,6 +51,7 @@ struct DispatchTable { VULKAN_FUNCTION(SetDebugUtilsObjectNameEXT); VULKAN_FUNCTION(CmdBeginDebugUtilsLabelEXT); VULKAN_FUNCTION(CmdEndDebugUtilsLabelEXT); + VULKAN_FUNCTION(CmdInsertDebugUtilsLabelEXT); // Device VULKAN_FUNCTION(CreateBuffer); diff --git a/Source/VK/ImplVK.cpp b/Source/VK/ImplVK.cpp index e40cd355..9127c544 100644 --- a/Source/VK/ImplVK.cpp +++ b/Source/VK/ImplVK.cpp @@ -242,6 +242,10 @@ static void NRI_CALL CmdEndAnnotation(CommandBuffer& commandBuffer) { ((CommandBufferVK&)commandBuffer).EndAnnotation(); } +static void NRI_CALL CmdAnnotation(CommandBuffer& commandBuffer, const char* name, uint32_t bgra) { + ((CommandBufferVK&)commandBuffer).Annotation(name, bgra); +} + static void NRI_CALL CmdClearStorageBuffer(CommandBuffer& commandBuffer, const ClearStorageBufferDesc& clearDesc) { ((CommandBufferVK&)commandBuffer).ClearStorageBuffer(clearDesc); } @@ -622,6 +626,7 @@ Result DeviceVK::FillFunctionTable(CoreInterface& table) const { table.CmdCopyQueries = ::CmdCopyQueries; table.CmdBeginAnnotation = ::CmdBeginAnnotation; table.CmdEndAnnotation = ::CmdEndAnnotation; + table.CmdAnnotation = ::CmdAnnotation; table.EndCommandBuffer = ::EndCommandBuffer; table.ResetQueries = ::ResetQueries; table.QueueSubmit = ::QueueSubmit; diff --git a/Source/Validation/CommandBufferVal.h b/Source/Validation/CommandBufferVal.h index 320e3ca6..f28e96f9 100644 --- a/Source/Validation/CommandBufferVal.h +++ b/Source/Validation/CommandBufferVal.h @@ -73,6 +73,7 @@ struct CommandBufferVal : public DeviceObjectVal { void ResetQueries(QueryPool& queryPool, uint32_t offset, uint32_t num); void BeginAnnotation(const char* name, uint32_t bgra); void EndAnnotation(); + void Annotation(const char* name, uint32_t bgra); void BuildTopLevelAccelerationStructure(uint32_t instanceNum, const Buffer& buffer, uint64_t bufferOffset, AccelerationStructureBuildBits flags, AccelerationStructure& dst, Buffer& scratch, uint64_t scratchOffset); void BuildBottomLevelAccelerationStructure(uint32_t geometryObjectNum, const GeometryObject* geometryObjects, AccelerationStructureBuildBits flags, AccelerationStructure& dst, Buffer& scratch, uint64_t scratchOffset); void UpdateTopLevelAccelerationStructure(uint32_t instanceNum, const Buffer& buffer, uint64_t bufferOffset, AccelerationStructureBuildBits flags, AccelerationStructure& dst, const AccelerationStructure& src, Buffer& scratch, uint64_t scratchOffset); diff --git a/Source/Validation/CommandBufferVal.hpp b/Source/Validation/CommandBufferVal.hpp index a22e9268..862726a5 100644 --- a/Source/Validation/CommandBufferVal.hpp +++ b/Source/Validation/CommandBufferVal.hpp @@ -519,6 +519,12 @@ NRI_INLINE void CommandBufferVal::EndAnnotation() { m_AnnotationStack--; } +NRI_INLINE void CommandBufferVal::Annotation(const char* name, uint32_t bgra) { + RETURN_ON_FAILURE(&m_Device, m_IsRecordingStarted, ReturnVoid(), "the command buffer must be in the recording state"); + + GetCoreInterface().CmdAnnotation(*GetImpl(), name, bgra); +} + NRI_INLINE void CommandBufferVal::BuildTopLevelAccelerationStructure(uint32_t instanceNum, const Buffer& buffer, uint64_t bufferOffset, AccelerationStructureBuildBits flags, AccelerationStructure& dst, Buffer& scratch, uint64_t scratchOffset) { RETURN_ON_FAILURE(&m_Device, m_IsRecordingStarted, ReturnVoid(), "the command buffer must be in the recording state"); RETURN_ON_FAILURE(&m_Device, !m_IsRenderPass, ReturnVoid(), "must be called outside of 'CmdBeginRendering/CmdEndRendering'"); diff --git a/Source/Validation/ImplVal.cpp b/Source/Validation/ImplVal.cpp index 6d947456..8f572ef5 100644 --- a/Source/Validation/ImplVal.cpp +++ b/Source/Validation/ImplVal.cpp @@ -218,6 +218,10 @@ static void NRI_CALL CmdEndAnnotation(CommandBuffer& commandBuffer) { ((CommandBufferVal&)commandBuffer).EndAnnotation(); } +static void NRI_CALL CmdAnnotation(CommandBuffer& commandBuffer, const char* name, uint32_t bgra) { + ((CommandBufferVal&)commandBuffer).Annotation(name, bgra); +} + static void NRI_CALL CmdClearStorageBuffer(CommandBuffer& commandBuffer, const ClearStorageBufferDesc& clearDesc) { ((CommandBufferVal&)commandBuffer).ClearStorageBuffer(clearDesc); } @@ -629,6 +633,7 @@ Result DeviceVal::FillFunctionTable(CoreInterface& table) const { table.CmdCopyQueries = ::CmdCopyQueries; table.CmdBeginAnnotation = ::CmdBeginAnnotation; table.CmdEndAnnotation = ::CmdEndAnnotation; + table.CmdAnnotation = ::CmdAnnotation; table.EndCommandBuffer = ::EndCommandBuffer; table.ResetQueries = ::ResetQueries; table.QueueSubmit = ::QueueSubmit;