From 919412b61114f737a7ac984cd909fd31bced3953 Mon Sep 17 00:00:00 2001 From: dzhdan Date: Wed, 20 Mar 2024 14:04:23 +0800 Subject: [PATCH] v1.125: HIGHLIGHTS: - added "NRILowLatency" extension offering latency reduction via REFLEX (D3D and VK supported) - added "NRIStreamer" extension offering comfortable constant, buffer and texture data streaming functionality - added COPY timestamps support - explicit "waitable" SwapChain (it improves behavior in multi-threaded environment, allowing to "wait for present" in one place and "acquire next image" in another) - bug fixes and improvements BREAKING CHANGES: - "SwapChainPresent" => "QueuePresent" to emphasize that it's a queue command - "QueueWait" and "QueueSignal" merged (again!) into "QueueSubmit": it maps better on "VK_KHR_synchronization2" and needed for "low latency" extension DETAILS: - Core: "QueueWait" and "QueueSignal" merged (again!) into "QueueSubmit" - Core: exposed "QueryType::TIMESTAMP_COPY_QUEUE" needed for timestamps issued in COPY queue - Core: slightly reworked "DeviceDesc", explicitly exposed NRI extension support status - SwapChain: added "SwapChainDesc::waitable" swapchain support for all APIs - SwapChain: explicitly exposed "WaitForPresent", which implicitly improves multi-threading behavior allowing to "wait" in one place and "acquire" in another - SwapChain: added "SwapChainDesc::allowLowLatency" - SwapChain: added "SwapChainDesc::queuedFrameNum" (use 0 for auto mode) - Wrappers: minor tweaks - D3D12: hooked up NVAPI and AMDAGS - D3D12: updated Agility SDK - VK: all CORE functions get queried by CORE or KHR names to improve compatibility with VK 1.2 - VK: VK_KHR_swapchain is an optional extension now - VK: command queues gathering improved by introducing a score system - VK: removed EXT suffixes from VK entities, which have been promoted to VK 1.2 - Validation: fixed a rare SwapChain related memory leak - Validation: hooked up Streamer - bug fixes and improvements - improved .clang-format --- .clang-format | 4 +- CMakeLists.txt | 17 +- Include/Extensions/NRIHelper.h | 51 +- Include/Extensions/NRILowLatency.h | 63 +++ Include/Extensions/NRIStreamer.h | 67 +++ Include/Extensions/NRISwapChain.h | 36 +- Include/Extensions/NRIWrapperD3D12.h | 2 + Include/Extensions/NRIWrapperVK.h | 1 - Include/NRI.h | 10 +- Include/NRIDescs.h | 39 +- Resources/Version.h | 2 +- Source/Creation/Creation.cpp | 23 +- Source/D3D11/BufferD3D11.cpp | 6 +- Source/D3D11/BufferD3D11.h | 2 +- Source/D3D11/BufferD3D11.hpp | 2 +- Source/D3D11/CommandAllocatorD3D11.cpp | 6 +- Source/D3D11/CommandAllocatorD3D11.h | 2 +- Source/D3D11/CommandAllocatorD3D11.hpp | 2 +- Source/D3D11/CommandBufferD3D11.h | 2 +- Source/D3D11/CommandBufferD3D11.hpp | 2 +- Source/D3D11/CommandBufferEmuD3D11.h | 2 +- Source/D3D11/CommandQueueD3D11.cpp | 19 +- Source/D3D11/CommandQueueD3D11.h | 2 +- Source/D3D11/CommandQueueD3D11.hpp | 14 +- Source/D3D11/DescriptorD3D11.cpp | 6 +- Source/D3D11/DescriptorD3D11.h | 2 +- Source/D3D11/DescriptorD3D11.hpp | 2 +- Source/D3D11/DescriptorPoolD3D11.h | 2 +- Source/D3D11/DescriptorPoolD3D11.hpp | 2 +- Source/D3D11/DescriptorSetD3D11.h | 2 +- Source/D3D11/DescriptorSetD3D11.hpp | 2 +- Source/D3D11/DeviceD3D11.cpp | 63 ++- Source/D3D11/DeviceD3D11.h | 11 +- Source/D3D11/DeviceD3D11.hpp | 121 +++- Source/D3D11/FenceD3D11.cpp | 10 +- Source/D3D11/FenceD3D11.h | 6 +- Source/D3D11/FenceD3D11.hpp | 10 +- Source/D3D11/MemoryD3D11.h | 2 +- Source/D3D11/PipelineD3D11.h | 4 +- Source/D3D11/PipelineLayoutD3D11.h | 6 +- Source/D3D11/QueryPoolD3D11.cpp | 4 +- Source/D3D11/QueryPoolD3D11.h | 2 +- Source/D3D11/QueryPoolD3D11.hpp | 2 +- Source/D3D11/SharedD3D11.h | 12 +- Source/D3D11/SwapChainD3D11.cpp | 109 +++- Source/D3D11/SwapChainD3D11.h | 13 +- Source/D3D11/SwapChainD3D11.hpp | 29 +- Source/D3D11/TextureD3D11.cpp | 15 - Source/D3D11/TextureD3D11.h | 7 +- Source/D3D11/TextureD3D11.hpp | 2 +- Source/D3D12/AccelerationStructureD3D12.cpp | 3 +- Source/D3D12/AccelerationStructureD3D12.h | 2 +- Source/D3D12/AccelerationStructureD3D12.hpp | 2 +- Source/D3D12/BufferD3D12.h | 2 +- Source/D3D12/BufferD3D12.hpp | 2 +- Source/D3D12/CommandAllocatorD3D12.h | 2 +- Source/D3D12/CommandAllocatorD3D12.hpp | 2 +- Source/D3D12/CommandBufferD3D12.h | 6 +- Source/D3D12/CommandBufferD3D12.hpp | 5 +- Source/D3D12/CommandQueueD3D12.cpp | 19 +- Source/D3D12/CommandQueueD3D12.h | 2 +- Source/D3D12/CommandQueueD3D12.hpp | 14 +- Source/D3D12/DescriptorD3D12.h | 4 +- Source/D3D12/DescriptorD3D12.hpp | 2 +- Source/D3D12/DescriptorPoolD3D12.h | 2 +- Source/D3D12/DescriptorPoolD3D12.hpp | 2 +- Source/D3D12/DescriptorSetD3D12.cpp | 4 +- Source/D3D12/DescriptorSetD3D12.h | 2 +- Source/D3D12/DescriptorSetD3D12.hpp | 2 +- Source/D3D12/DeviceD3D12.cpp | 80 ++- Source/D3D12/DeviceD3D12.h | 17 +- Source/D3D12/DeviceD3D12.hpp | 148 +++-- Source/D3D12/FenceD3D12.cpp | 6 +- Source/D3D12/FenceD3D12.h | 2 +- Source/D3D12/FenceD3D12.hpp | 10 +- Source/D3D12/MemoryD3D12.h | 2 +- Source/D3D12/PipelineD3D12.h | 4 +- Source/D3D12/PipelineLayoutD3D12.cpp | 10 +- Source/D3D12/PipelineLayoutD3D12.h | 4 +- Source/D3D12/QueryPoolD3D12.cpp | 7 +- Source/D3D12/QueryPoolD3D12.h | 2 +- Source/D3D12/QueryPoolD3D12.hpp | 2 +- Source/D3D12/SharedD3D12.h | 7 + Source/D3D12/SwapChainD3D12.cpp | 107 +++- Source/D3D12/SwapChainD3D12.h | 13 +- Source/D3D12/SwapChainD3D12.hpp | 29 +- Source/D3D12/TextureD3D12.cpp | 17 - Source/D3D12/TextureD3D12.h | 7 +- Source/D3D12/TextureD3D12.hpp | 2 +- .../D3D11Extensions.h => Shared/D3DExt.h} | 19 +- .../D3D11Extensions.cpp => Shared/D3DExt.hpp} | 98 ++-- Source/Shared/DeviceBase.h | 57 +- Source/Shared/HelperDataUpload.cpp | 213 ++++--- Source/Shared/HelperDataUpload.h | 15 +- Source/Shared/HelperDeviceMemoryAllocator.cpp | 27 +- Source/Shared/HelperDeviceMemoryAllocator.h | 5 +- Source/Shared/HelperWaitIdle.cpp | 28 +- Source/Shared/HelperWaitIdle.h | 13 +- Source/Shared/Lock.h | 4 +- Source/Shared/SharedExternal.cpp | 29 + Source/Shared/SharedExternal.h | 96 +++- Source/Shared/StdAllocator.h | 2 +- Source/Shared/Streamer.cpp | 246 ++++++++ Source/Shared/Streamer.h | 69 +++ Source/VK/AccelerationStructureVK.h | 4 +- Source/VK/BufferVK.h | 2 +- Source/VK/BufferVK.hpp | 2 +- Source/VK/CommandAllocatorVK.h | 2 +- Source/VK/CommandAllocatorVK.hpp | 2 +- Source/VK/CommandBufferVK.cpp | 59 -- Source/VK/CommandBufferVK.h | 4 +- Source/VK/CommandBufferVK.hpp | 5 +- Source/VK/CommandQueueVK.cpp | 48 +- Source/VK/CommandQueueVK.h | 8 +- Source/VK/CommandQueueVK.hpp | 14 +- Source/VK/ConversionVK.h | 61 +- Source/VK/DescriptorPoolVK.cpp | 20 +- Source/VK/DescriptorPoolVK.h | 2 +- Source/VK/DescriptorPoolVK.hpp | 2 +- Source/VK/DescriptorSetVK.cpp | 2 +- Source/VK/DescriptorSetVK.h | 2 +- Source/VK/DescriptorSetVK.hpp | 2 +- Source/VK/DescriptorVK.cpp | 2 +- Source/VK/DescriptorVK.h | 4 +- Source/VK/DescriptorVK.hpp | 2 +- Source/VK/DeviceVK.cpp | 517 +++++++++-------- Source/VK/DeviceVK.h | 21 +- Source/VK/DeviceVK.hpp | 87 ++- Source/VK/DispatchTable.h | 70 ++- Source/VK/FenceVK.cpp | 48 +- Source/VK/FenceVK.h | 10 +- Source/VK/FenceVK.hpp | 10 +- Source/VK/MemoryVK.h | 2 +- Source/VK/PipelineLayoutVK.cpp | 162 +++--- Source/VK/PipelineLayoutVK.h | 17 +- Source/VK/PipelineVK.h | 4 +- Source/VK/QueryPoolVK.cpp | 2 +- Source/VK/QueryPoolVK.h | 2 +- Source/VK/QueryPoolVK.hpp | 2 +- Source/VK/SwapChainVK.cpp | 534 +++++++++++------- Source/VK/SwapChainVK.h | 33 +- Source/VK/SwapChainVK.hpp | 29 +- Source/VK/TextureVK.cpp | 19 +- Source/VK/TextureVK.h | 7 +- Source/VK/TextureVK.hpp | 2 +- .../Validation/AccelerationStructureVal.cpp | 16 +- Source/Validation/AccelerationStructureVal.h | 7 +- Source/Validation/BufferVal.h | 2 +- Source/Validation/BufferVal.hpp | 2 +- Source/Validation/CommandBufferVal.cpp | 20 +- Source/Validation/CommandBufferVal.h | 13 +- Source/Validation/CommandBufferVal.hpp | 5 +- Source/Validation/CommandQueueVal.cpp | 34 +- Source/Validation/CommandQueueVal.h | 8 +- Source/Validation/CommandQueueVal.hpp | 14 +- Source/Validation/DescriptorPoolVal.h | 14 +- Source/Validation/DescriptorPoolVal.hpp | 2 +- Source/Validation/DescriptorSetVal.h | 2 +- Source/Validation/DescriptorSetVal.hpp | 2 +- Source/Validation/DescriptorVal.cpp | 16 +- Source/Validation/DescriptorVal.h | 2 +- Source/Validation/DescriptorVal.hpp | 2 +- Source/Validation/DeviceVal.cpp | 26 +- Source/Validation/DeviceVal.h | 57 +- Source/Validation/DeviceVal.hpp | 276 +++++++-- Source/Validation/FenceVal.cpp | 8 - Source/Validation/FenceVal.h | 2 - Source/Validation/FenceVal.hpp | 10 +- Source/Validation/MemoryVal.cpp | 4 +- Source/Validation/MemoryVal.h | 2 +- Source/Validation/PipelineLayoutVal.cpp | 12 +- Source/Validation/PipelineLayoutVal.h | 2 +- Source/Validation/PipelineVal.cpp | 16 +- Source/Validation/PipelineVal.h | 3 +- Source/Validation/QueryPoolVal.cpp | 7 +- Source/Validation/QueryPoolVal.h | 2 +- Source/Validation/QueryPoolVal.hpp | 2 +- Source/Validation/SharedVal.h | 38 +- Source/Validation/SwapChainVal.cpp | 51 +- Source/Validation/SwapChainVal.h | 17 +- Source/Validation/SwapChainVal.hpp | 29 +- Source/Validation/TextureVal.h | 2 +- Source/Validation/TextureVal.hpp | 2 +- 183 files changed, 3244 insertions(+), 1610 deletions(-) create mode 100644 Include/Extensions/NRILowLatency.h create mode 100644 Include/Extensions/NRIStreamer.h rename Source/{D3D11/D3D11Extensions.h => Shared/D3DExt.h} (90%) rename Source/{D3D11/D3D11Extensions.cpp => Shared/D3DExt.hpp} (77%) create mode 100644 Source/Shared/Streamer.cpp create mode 100644 Source/Shared/Streamer.h diff --git a/.clang-format b/.clang-format index 57fa2b61..540d61e2 100644 --- a/.clang-format +++ b/.clang-format @@ -1,6 +1,6 @@ --- BasedOnStyle: Google -AccessModifierOffset: -2 +AccessModifierOffset: -4 AlignAfterOpenBracket: DontAlign AlignArrayOfStructures: None AlignConsecutiveAssignments: @@ -77,7 +77,7 @@ BreakBeforeBraces: Attach BreakBeforeConceptDeclarations: Always # BreakBeforeInlineASMColon: OnlyMultiline BreakBeforeTernaryOperators: true -BreakConstructorInitializers: BeforeColon +BreakConstructorInitializers: AfterColon BreakInheritanceList: BeforeColon BreakStringLiterals: true ColumnLimit: 180 diff --git a/CMakeLists.txt b/CMakeLists.txt index df7cafcd..77355d3b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ option (NRI_ENABLE_WAYLAND_SUPPORT "Enable 'wayland' support" ON) # Options: D3D12 specific option (NRI_ENABLE_AGILITY_SDK_SUPPORT "Enable Agility SDK suupport to unlock access to recent D3D12 features" OFF) set (NRI_AGILITY_SDK_PATH "C:/AgilitySDK" CACHE STRING "Path to a directory containing Agility SDK (contents of '.nupkg/build/native/')") -set (NRI_AGILITY_SDK_VERSION "711" CACHE STRING "Agility SDK version") +set (NRI_AGILITY_SDK_VERSION "613" CACHE STRING "Agility SDK version") set (NRI_AGILITY_SDK_DIR "AgilitySDK" CACHE STRING "Directory where Agility SDK binaries will be copied to relative to 'CMAKE_RUNTIME_OUTPUT_DIRECTORY'") # Is submodule? @@ -137,7 +137,7 @@ if (WIN32 AND NRI_ENABLE_D3D11_SUPPORT) target_include_directories (NRI_D3D11 PRIVATE "Include" "Source/Shared" "External") target_compile_definitions (NRI_D3D11 PRIVATE ${COMPILE_DEFINITIONS}) target_compile_options (NRI_D3D11 PRIVATE ${COMPILE_OPTIONS}) - target_link_libraries (NRI_D3D11 PRIVATE NRI_Shared ${INPUT_LIBS_D3D11} ${INPUT_LIB_DXGUID} ${INPUT_LIB_NVAPI}) + target_link_libraries (NRI_D3D11 PRIVATE NRI_Shared ${INPUT_LIBS_D3D11} ${INPUT_LIB_NVAPI} ${INPUT_LIB_DXGUID}) set_property (TARGET NRI_D3D11 PROPERTY FOLDER ${PROJECT_NAME}) endif () @@ -147,13 +147,20 @@ if (WIN32 AND NRI_ENABLE_D3D12_SUPPORT) set (COMPILE_DEFINITIONS ${COMPILE_DEFINITIONS} NRI_USE_D3D12=1) set (INPUT_LIBS_D3D12 ${INPUT_LIB_D3D12} ${INPUT_LIB_DXGI}) + find_library (INPUT_LIB_NVAPI NAMES nvapi64 nvapi PATHS "External/nvapi/${NVAPI_BIN_ARCHITECTURE}" REQUIRED) + find_library (INPUT_LIB_AGS NAMES amd_ags_${BIN_ARCHITECTURE} PATHS "External/amdags/ags_lib/lib" REQUIRED) + file (GLOB D3D12_SOURCE "Source/D3D12/*.cpp" "Source/D3D12/*.h" "Source/D3D12/*.hpp") source_group ("" FILES ${D3D12_SOURCE}) - add_library (NRI_D3D12 STATIC ${D3D12_SOURCE}) - target_include_directories (NRI_D3D12 PRIVATE "Include" "Source/Shared") + file (GLOB D3D12_NVAPI "External/nvapi/*.h") + source_group ("External/nvapi" FILES ${D3D12_NVAPI}) + file (GLOB D3D12_AMDAGS "External/amdags/ags_lib/inc/*.h") + source_group ("External/amdags" FILES ${D3D12_AMDAGS}) + add_library (NRI_D3D12 STATIC ${D3D12_SOURCE} ${D3D12_NVAPI} ${D3D12_AMDAGS}) + target_include_directories (NRI_D3D12 PRIVATE "Include" "Source/Shared" "External") target_compile_definitions (NRI_D3D12 PRIVATE ${COMPILE_DEFINITIONS}) target_compile_options (NRI_D3D12 PRIVATE ${COMPILE_OPTIONS}) - target_link_libraries (NRI_D3D12 PRIVATE NRI_Shared ${INPUT_LIBS_D3D12}) + target_link_libraries (NRI_D3D12 PRIVATE NRI_Shared ${INPUT_LIBS_D3D12} ${INPUT_LIB_NVAPI}) set_property (TARGET NRI_D3D12 PROPERTY FOLDER ${PROJECT_NAME}) if (NRI_ENABLE_AGILITY_SDK_SUPPORT) diff --git a/Include/Extensions/NRIHelper.h b/Include/Extensions/NRIHelper.h index d3832f4d..89e75ea5 100644 --- a/Include/Extensions/NRIHelper.h +++ b/Include/Extensions/NRIHelper.h @@ -47,10 +47,14 @@ NRI_STRUCT(ResourceGroupDesc) NRI_STRUCT(HelperInterface) { + // Optimized memory allocation for a group of resources uint32_t (NRI_CALL *CalculateAllocationNumber)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(ResourceGroupDesc) resourceGroupDesc); NRI_NAME(Result) (NRI_CALL *AllocateAndBindMemory)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(ResourceGroupDesc) resourceGroupDesc, NRI_NAME(Memory)** allocations); - NRI_NAME(Result) (NRI_CALL *UploadData)(NRI_NAME_REF(CommandQueue) commandQueue, const NRI_NAME(TextureUploadDesc)* textureUploadDescs, uint32_t textureUploadDescNum, - const NRI_NAME(BufferUploadDesc)* bufferUploadDescs, uint32_t bufferUploadDescNum); + + // Populate resources with data (not for streaming data) + NRI_NAME(Result) (NRI_CALL *UploadData)(NRI_NAME_REF(CommandQueue) commandQueue, const NRI_NAME(TextureUploadDesc)* textureUploadDescs, uint32_t textureUploadDescNum, const NRI_NAME(BufferUploadDesc)* bufferUploadDescs, uint32_t bufferUploadDescNum); + + // WFI NRI_NAME(Result) (NRI_CALL *WaitForIdle)(NRI_NAME_REF(CommandQueue) commandQueue); }; @@ -97,7 +101,11 @@ static inline NRI_NAME(Format) NRI_FUNC_NAME(GetSupportedDepthFormat)(const NRI_ return NRI_ENUM_MEMBER(Format, UNKNOWN); } -static inline NRI_NAME(TextureDesc) NRI_FUNC_NAME(Texture1D)(NRI_NAME(Format) format, uint16_t width, NRI_NAME(Mip_t) mipNum NRI_DEFAULT_VALUE(1), NRI_NAME(Dim_t) arraySize NRI_DEFAULT_VALUE(1), +// "TextureDesc" constructors +static inline NRI_NAME(TextureDesc) NRI_FUNC_NAME(Texture1D)(NRI_NAME(Format) format, + uint16_t width, + NRI_NAME(Mip_t) mipNum NRI_DEFAULT_VALUE(1), + NRI_NAME(Dim_t) arraySize NRI_DEFAULT_VALUE(1), NRI_NAME(TextureUsageBits) usageMask NRI_DEFAULT_VALUE(NRI_ENUM_MEMBER(TextureUsageBits, SHADER_RESOURCE))) { NRI_NAME(TextureDesc) textureDesc = NRI_ZERO_INIT; @@ -114,8 +122,13 @@ static inline NRI_NAME(TextureDesc) NRI_FUNC_NAME(Texture1D)(NRI_NAME(Format) fo return textureDesc; } -static inline NRI_NAME(TextureDesc) NRI_FUNC_NAME(Texture2D)(NRI_NAME(Format) format, NRI_NAME(Dim_t) width, NRI_NAME(Dim_t) height, NRI_NAME(Mip_t) mipNum NRI_DEFAULT_VALUE(1), NRI_NAME(Dim_t) arraySize NRI_DEFAULT_VALUE(1), - NRI_NAME(TextureUsageBits) usageMask NRI_DEFAULT_VALUE(NRI_ENUM_MEMBER(TextureUsageBits, SHADER_RESOURCE)), NRI_NAME(Sample_t) sampleNum NRI_DEFAULT_VALUE(1)) +static inline NRI_NAME(TextureDesc) NRI_FUNC_NAME(Texture2D)(NRI_NAME(Format) format, + NRI_NAME(Dim_t) width, + NRI_NAME(Dim_t) height, + NRI_NAME(Mip_t) mipNum NRI_DEFAULT_VALUE(1), + NRI_NAME(Dim_t) arraySize NRI_DEFAULT_VALUE(1), + NRI_NAME(TextureUsageBits) usageMask NRI_DEFAULT_VALUE(NRI_ENUM_MEMBER(TextureUsageBits, SHADER_RESOURCE)), + NRI_NAME(Sample_t) sampleNum NRI_DEFAULT_VALUE(1)) { NRI_NAME(TextureDesc) textureDesc = NRI_ZERO_INIT; textureDesc.type = NRI_ENUM_MEMBER(TextureType, TEXTURE_2D); @@ -131,7 +144,11 @@ static inline NRI_NAME(TextureDesc) NRI_FUNC_NAME(Texture2D)(NRI_NAME(Format) fo return textureDesc; } -static inline NRI_NAME(TextureDesc) NRI_FUNC_NAME(Texture3D)(NRI_NAME(Format) format, NRI_NAME(Dim_t) width, NRI_NAME(Dim_t) height, uint16_t depth, NRI_NAME(Mip_t) mipNum NRI_DEFAULT_VALUE(1), +static inline NRI_NAME(TextureDesc) NRI_FUNC_NAME(Texture3D)(NRI_NAME(Format) format, + NRI_NAME(Dim_t) width, + NRI_NAME(Dim_t) height, + uint16_t depth, + NRI_NAME(Mip_t) mipNum NRI_DEFAULT_VALUE(1), NRI_NAME(TextureUsageBits) usageMask NRI_DEFAULT_VALUE(NRI_ENUM_MEMBER(TextureUsageBits, SHADER_RESOURCE))) { NRI_NAME(TextureDesc) textureDesc = NRI_ZERO_INIT; @@ -148,8 +165,13 @@ static inline NRI_NAME(TextureDesc) NRI_FUNC_NAME(Texture3D)(NRI_NAME(Format) fo return textureDesc; } -static inline NRI_NAME(TextureBarrierDesc) NRI_FUNC_NAME(TextureBarrier)(NRI_NAME(Texture)* texture, NRI_NAME(AccessLayoutStage) before, NRI_NAME(AccessLayoutStage) after, - NRI_NAME(Mip_t) mipOffset NRI_DEFAULT_VALUE(0), NRI_NAME(Mip_t) mipNum NRI_DEFAULT_VALUE(NRI_NAME(REMAINING_MIP_LEVELS)), NRI_NAME(Dim_t) arrayOffset NRI_DEFAULT_VALUE(0), +// "TextureBarrierDesc" constructors +static inline NRI_NAME(TextureBarrierDesc) NRI_FUNC_NAME(TextureBarrier)(NRI_NAME(Texture)* texture, + NRI_NAME(AccessLayoutStage) before, + NRI_NAME(AccessLayoutStage) after, + NRI_NAME(Mip_t) mipOffset NRI_DEFAULT_VALUE(0), + NRI_NAME(Mip_t) mipNum NRI_DEFAULT_VALUE(NRI_NAME(REMAINING_MIP_LEVELS)), + NRI_NAME(Dim_t) arrayOffset NRI_DEFAULT_VALUE(0), NRI_NAME(Dim_t) arraySize NRI_DEFAULT_VALUE(NRI_NAME(REMAINING_ARRAY_LAYERS))) { NRI_NAME(TextureBarrierDesc) textureBarrierDesc = NRI_ZERO_INIT; @@ -164,8 +186,11 @@ static inline NRI_NAME(TextureBarrierDesc) NRI_FUNC_NAME(TextureBarrier)(NRI_NAM return textureBarrierDesc; } -static inline NRI_NAME(TextureBarrierDesc) NRI_FUNC_NAME(TextureBarrierFromUnknown)(NRI_NAME(Texture)* texture, NRI_NAME(AccessLayoutStage) after, - NRI_NAME(Mip_t) mipOffset NRI_DEFAULT_VALUE(0), NRI_NAME(Mip_t) mipNum NRI_DEFAULT_VALUE(NRI_NAME(REMAINING_MIP_LEVELS)), NRI_NAME(Dim_t) arrayOffset NRI_DEFAULT_VALUE(0), +static inline NRI_NAME(TextureBarrierDesc) NRI_FUNC_NAME(TextureBarrierFromUnknown)(NRI_NAME(Texture)* texture, + NRI_NAME(AccessLayoutStage) after, + NRI_NAME(Mip_t) mipOffset NRI_DEFAULT_VALUE(0), + NRI_NAME(Mip_t) mipNum NRI_DEFAULT_VALUE(NRI_NAME(REMAINING_MIP_LEVELS)), + NRI_NAME(Dim_t) arrayOffset NRI_DEFAULT_VALUE(0), NRI_NAME(Dim_t) arraySize NRI_DEFAULT_VALUE(NRI_NAME(REMAINING_ARRAY_LAYERS))) { NRI_NAME(TextureBarrierDesc) textureBarrierDesc = NRI_ZERO_INIT; @@ -182,8 +207,10 @@ static inline NRI_NAME(TextureBarrierDesc) NRI_FUNC_NAME(TextureBarrierFromUnkno return textureBarrierDesc; } -static inline NRI_NAME(TextureBarrierDesc) NRI_FUNC_NAME(TextureBarrierFromState)(NRI_NAME_REF(TextureBarrierDesc) prevState, NRI_NAME(AccessLayoutStage) after, - NRI_NAME(Mip_t) mipOffset NRI_DEFAULT_VALUE(0), NRI_NAME(Mip_t) mipNum NRI_DEFAULT_VALUE(NRI_NAME(REMAINING_MIP_LEVELS))) +static inline NRI_NAME(TextureBarrierDesc) NRI_FUNC_NAME(TextureBarrierFromState)(NRI_NAME_REF(TextureBarrierDesc) prevState, + NRI_NAME(AccessLayoutStage) after, + NRI_NAME(Mip_t) mipOffset NRI_DEFAULT_VALUE(0), + NRI_NAME(Mip_t) mipNum NRI_DEFAULT_VALUE(NRI_NAME(REMAINING_MIP_LEVELS))) { NRI_REF_ACCESS(prevState)->mipOffset = mipOffset; NRI_REF_ACCESS(prevState)->mipNum = mipNum; diff --git a/Include/Extensions/NRILowLatency.h b/Include/Extensions/NRILowLatency.h new file mode 100644 index 00000000..50fcbc8b --- /dev/null +++ b/Include/Extensions/NRILowLatency.h @@ -0,0 +1,63 @@ +// © 2024 NVIDIA Corporation + +#pragma once + +NRI_NAMESPACE_BEGIN + +NRI_FORWARD_STRUCT(SwapChain); +NRI_FORWARD_STRUCT(CommandQueue); + +// us = microseconds + +NRI_ENUM +( + LatencyMarker, uint8_t, + + // Should be called: + SIMULATION_START = 0, // at the start of the simulation execution each frame, but after the call to "LatencySleep" + SIMULATION_END = 1, // at the end of the simulation execution each frame + RENDER_SUBMIT_START = 2, // at the beginning of the render submission execution each frame (must not span into asynchronous rendering) + RENDER_SUBMIT_END = 3, // at the end of the render submission execution each frame + INPUT_SAMPLE = 6, // just before the application gathers input data, but between SIMULATION_START and SIMULATION_END (yes, 6!) + + MAX_NUM +); + +NRI_STRUCT(LatencySleepMode) +{ + uint32_t minIntervalUs; // minimum allowed frame interval (0 - no frame rate limit) + bool lowLatencyMode; // low latency mode enablement + bool lowLatencyBoost; // hint to increase performance to provide additional latency savings at a cost of increased power consumption +}; + +NRI_STRUCT(LatencyReport) +{ + // The time stamp written: + uint64_t inputSampleTimeUs; // when "SetLatencyMarker(INPUT_SAMPLE)" is called + uint64_t simulationStartTimeUs; // when "SetLatencyMarker(SIMULATION_START)" is called + uint64_t simulationEndTimeUs; // when "SetLatencyMarker(SIMULATION_END)" is called + uint64_t renderSubmitStartTimeUs; // when "SetLatencyMarker(RENDER_SUBMIT_START)" is called + uint64_t renderSubmitEndTimeUs; // when "SetLatencyMarker(RENDER_SUBMIT_END)" is called + uint64_t presentStartTimeUs; // right before "Present" + uint64_t presentEndTimeUs; // right after "Present" + uint64_t driverStartTimeUs; // when the first "QueueSubmitTrackable" is called + uint64_t driverEndTimeUs; // when the final "QueueSubmitTrackable" hands off from the driver + uint64_t osRenderQueueStartTimeUs; + uint64_t osRenderQueueEndTimeUs; + uint64_t gpuRenderStartTimeUs; // when the first submission reaches the GPU + uint64_t gpuRenderEndTimeUs; // when the final submission finishes on the GPU +}; + +// Multi-swapchain is supported only by VK +NRI_STRUCT(LowLatencyInterface) +{ + NRI_NAME(Result) (NRI_CALL *SetLatencySleepMode)(NRI_NAME_REF(SwapChain) swapChain, const NRI_NAME_REF(LatencySleepMode) latencySleepMode); + NRI_NAME(Result) (NRI_CALL *SetLatencyMarker)(NRI_NAME_REF(SwapChain) swapChain, NRI_NAME(LatencyMarker) latencyMarker); + NRI_NAME(Result) (NRI_CALL *LatencySleep)(NRI_NAME_REF(SwapChain) swapChain); // call once before INPUT_SAMPLE + NRI_NAME(Result) (NRI_CALL *GetLatencyReport)(const NRI_NAME_REF(SwapChain) swapChain, NRI_NAME_REF(LatencyReport) latencyReport); + + // This function must be used in "low latency" mode instead of "QueueSubmit" + void (NRI_CALL *QueueSubmitTrackable)(NRI_NAME_REF(CommandQueue) commandQueue, const NRI_NAME_REF(QueueSubmitDesc) queueSubmitDesc, const NRI_NAME_REF(SwapChain) swapChain); +}; + +NRI_NAMESPACE_END diff --git a/Include/Extensions/NRIStreamer.h b/Include/Extensions/NRIStreamer.h new file mode 100644 index 00000000..97e0f19e --- /dev/null +++ b/Include/Extensions/NRIStreamer.h @@ -0,0 +1,67 @@ +// © 2024 NVIDIA Corporation + +#pragma once + +NRI_NAMESPACE_BEGIN + +NRI_FORWARD_STRUCT(Streamer); + +NRI_STRUCT(StreamerDesc) +{ + // Statically allocated ring-buffer for dynamic constants (optional) + NRI_NAME(MemoryLocation) constantBufferMemoryLocation; // UPLOAD or DEVICE_UPLOAD + uint64_t constantBufferSize; + + // Dynamically (re)allocated ring-buffer for copying and rendering (mandatory) + NRI_NAME(MemoryLocation) dynamicBufferMemoryLocation; // UPLOAD or DEVICE_UPLOAD + NRI_NAME(BufferUsageBits) dynamicBufferUsageBits; + uint32_t frameInFlightNum; +}; + +NRI_STRUCT(BufferUpdateRequestDesc) +{ + // Data to upload + const void* data; // pointer must be valid until "CopyStreamerUpdateRequests" call + uint64_t dataSize; + + // Destination (optional, ignored for constants) + NRI_NAME(Buffer)* dstBuffer; + uint64_t dstBufferOffset; +}; + +NRI_STRUCT(TextureUpdateRequestDesc) +{ + // Data to upload + const void* data; // pointer must be valid until "CopyStreamerUpdateRequests" call + uint32_t dataRowPitch; + uint32_t dataSlicePitch; + + // Destination (mandatory) + NRI_NAME(Texture)* dstTexture; + NRI_NAME(TextureRegionDesc) dstRegionDesc; +}; + +NRI_STRUCT(StreamerInterface) +{ + NRI_NAME(Result) (NRI_CALL *CreateStreamer)(NRI_NAME_REF(Device) device, const NRI_NAME_REF(StreamerDesc) streamerDesc, NRI_NAME_REF(Streamer*) streamer); + void (NRI_CALL *DestroyStreamer)(NRI_NAME_REF(Streamer) streamer); + + // Get internal buffers + NRI_NAME(Buffer*) (NRI_CALL *GetStreamerConstantBuffer)(NRI_NAME_REF(Streamer) streamer); // Never changes + NRI_NAME(Buffer*) (NRI_CALL *GetStreamerDynamicBuffer)(NRI_NAME_REF(Streamer) streamer); // Valid only after "CopyStreamerUpdateRequests" + + // Add an update request. Return the offset in the ring buffer and don't invoke any work + uint64_t (NRI_CALL *AddStreamerBufferUpdateRequest)(NRI_NAME_REF(Streamer) streamer, const NRI_NAME_REF(BufferUpdateRequestDesc) bufferUpdateRequestDesc); + uint64_t (NRI_CALL *AddStreamerTextureUpdateRequest)(NRI_NAME_REF(Streamer) streamer, const NRI_NAME_REF(TextureUpdateRequestDesc) textureUpdateRequestDesc); + + // (HOST) Copy data and get the offset in the dedicated ring buffer (for dynamic constant buffers) + uint32_t (NRI_CALL *UpdateStreamerConstantBuffer)(NRI_NAME_REF(Streamer) streamer, const void* data, uint32_t dataSize); + + // (HOST) Copy gathered requests to the internal buffer, potentially a new one if the capacity exceeded. Must be called once per frame + NRI_NAME(Result) (NRI_CALL *CopyStreamerUpdateRequests)(NRI_NAME_REF(Streamer) streamer); + + // (DEVICE) Copy data to destinations (if any), barriers are externally controlled. Must be called after "CopyStreamerUpdateRequests" + void (NRI_CALL *CmdUploadStreamerUpdateRequests)(NRI_NAME_REF(CommandBuffer) commandBuffer, NRI_NAME_REF(Streamer) streamer); +}; + +NRI_NAMESPACE_END \ No newline at end of file diff --git a/Include/Extensions/NRISwapChain.h b/Include/Extensions/NRISwapChain.h index ca3474fe..92f380a6 100644 --- a/Include/Extensions/NRISwapChain.h +++ b/Include/Extensions/NRISwapChain.h @@ -6,6 +6,8 @@ NRI_NAMESPACE_BEGIN NRI_FORWARD_STRUCT(SwapChain); +static const uint32_t NRI_CONST_NAME(OUT_OF_DATE) = (uint32_t)(-1); // VK only: swap chain is out of date + // Color space: // - BT.709 - LDR https://en.wikipedia.org/wiki/Rec._709 // - BT.2020 - HDR https://en.wikipedia.org/wiki/Rec._2020 @@ -58,7 +60,9 @@ NRI_UNION(Window) NRI_NAME(WaylandWindow) wayland; }; -// SwapChain buffers will be created as "color attachment" resources +// SwapChain textures will be created as "color attachment" resources +// queuedFrameNum = 0 - auto-selection between 1 (for waitable) or 2 (otherwise) +// queuedFrameNum = 2 - recommended if the GPU frame time is less than the desired frame time, but the sum of 2 frames is greater NRI_STRUCT(SwapChainDesc) { NRI_NAME(Window) window; @@ -67,7 +71,10 @@ NRI_STRUCT(SwapChainDesc) NRI_NAME(Dim_t) height; uint8_t textureNum; NRI_NAME(SwapChainFormat) format; - uint8_t verticalSyncInterval; + uint8_t verticalSyncInterval; // 0 - vsync off + uint8_t queuedFrameNum; // aka "max frame latency", aka "number of frames in flight" (mostly for D3D11) + bool waitable; // allows to use "WaitForPresent", which helps to reduce latency + bool allowLowLatency; // unlocks "NRILowLatency" functionality (requires "isLowLatencySupported") }; NRI_STRUCT(ChromaticityCoords) @@ -75,24 +82,24 @@ NRI_STRUCT(ChromaticityCoords) float x, y; // [0; 1] }; -// Describes color settings and capabilities of the closest display -// nit = cd/m2 -// SDR = standard dynamic range -// LDR = low dynamic range (in many cases LDR == SDR) -// HDR = high dynamic range, assumes G2084: +// Describes color settings and capabilities of the closest display: +// - Luminance provided in nits (cd/m2) +// - SDR = standard dynamic range +// - LDR = low dynamic range (in many cases LDR == SDR) +// - HDR = high dynamic range, assumes G2084: // - BT709_G10_16BIT: HDR gets enabled and applied implicitly if Windows HDR is enabled // - BT2020_G2084_10BIT: HDR requires explicit color conversions and enabled HDR in Windows -// "SDR scale in HDR mode" = sdrLuminance / 80 +// - "SDR scale in HDR mode" = sdrLuminance / 80 NRI_STRUCT(DisplayDesc) { NRI_NAME(ChromaticityCoords) redPrimary; NRI_NAME(ChromaticityCoords) greenPrimary; NRI_NAME(ChromaticityCoords) bluePrimary; NRI_NAME(ChromaticityCoords) whitePoint; - float minLuminance; // nits - float maxLuminance; // nits - float maxFullFrameLuminance; // nits - float sdrLuminance; // nits + float minLuminance; + float maxLuminance; + float maxFullFrameLuminance; + float sdrLuminance; bool isHDR; }; @@ -102,8 +109,9 @@ NRI_STRUCT(SwapChainInterface) void (NRI_CALL *DestroySwapChain)(NRI_NAME_REF(SwapChain) swapChain); void (NRI_CALL *SetSwapChainDebugName)(NRI_NAME_REF(SwapChain) swapChain, const char* name); NRI_NAME(Texture)* const* (NRI_CALL *GetSwapChainTextures)(const NRI_NAME_REF(SwapChain) swapChain, uint32_t NRI_REF textureNum); - uint32_t (NRI_CALL *AcquireNextSwapChainTexture)(NRI_NAME_REF(SwapChain) swapChain); // IMPORTANT: return OUT_OF_DATE index to indicate "out of date" swap chain status (VK only) - NRI_NAME(Result) (NRI_CALL *SwapChainPresent)(NRI_NAME_REF(SwapChain) swapChain); + uint32_t (NRI_CALL *AcquireNextSwapChainTexture)(NRI_NAME_REF(SwapChain) swapChain); // can return OUT_OF_DATE (VK only) + NRI_NAME(Result) (NRI_CALL *WaitForPresent)(NRI_NAME_REF(SwapChain) swapChain); // call once right before input sampling (must be called starting from the 1st frame) + NRI_NAME(Result) (NRI_CALL *QueuePresent)(NRI_NAME_REF(SwapChain) swapChain); NRI_NAME(Result) (NRI_CALL *GetDisplayDesc)(NRI_NAME_REF(SwapChain) swapChain, NRI_NAME_REF(DisplayDesc) displayDesc); // returns FAILURE if window is outside of all monitors }; diff --git a/Include/Extensions/NRIWrapperD3D12.h b/Include/Extensions/NRIWrapperD3D12.h index 5b1a4996..79318852 100644 --- a/Include/Extensions/NRIWrapperD3D12.h +++ b/Include/Extensions/NRIWrapperD3D12.h @@ -21,9 +21,11 @@ NRI_STRUCT(DeviceCreationD3D12Desc) ID3D12CommandQueue* d3d12GraphicsQueue; ID3D12CommandQueue* d3d12ComputeQueue; ID3D12CommandQueue* d3d12CopyQueue; + AGSContext* agsContext; // can be NULL NRI_NAME(CallbackInterface) callbackInterface; NRI_NAME(MemoryAllocatorInterface) memoryAllocatorInterface; bool enableNRIValidation; + bool isNVAPILoaded; // At least NVAPI requires calling "NvAPI_Initialize" in DLL/EXE where the device is created in addition to NRI }; NRI_STRUCT(CommandBufferD3D12Desc) diff --git a/Include/Extensions/NRIWrapperVK.h b/Include/Extensions/NRIWrapperVK.h index 0b4e6dec..f35c7a66 100644 --- a/Include/Extensions/NRIWrapperVK.h +++ b/Include/Extensions/NRIWrapperVK.h @@ -34,7 +34,6 @@ NRI_STRUCT(DeviceCreationVKDesc) NRIVkInstance vkInstance; NRIVkDevice vkDevice; NRIVkPhysicalDevice vkPhysicalDevice; - uint32_t deviceGroupSize; const uint32_t* queueFamilyIndices; uint32_t queueFamilyIndexNum; const char* vulkanLoaderPath; diff --git a/Include/NRI.h b/Include/NRI.h index 076abd29..0970b742 100644 --- a/Include/NRI.h +++ b/Include/NRI.h @@ -25,8 +25,8 @@ Non-goals: #include #define NRI_VERSION_MAJOR 1 -#define NRI_VERSION_MINOR 124 -#define NRI_VERSION_DATE "1 March 2024" +#define NRI_VERSION_MINOR 125 +#define NRI_VERSION_DATE "19 March 2024" #ifdef _WIN32 #define NRI_CALL __fastcall @@ -123,7 +123,7 @@ NRI_STRUCT(CoreInterface) void (NRI_CALL *CmdSetScissors)(NRI_NAME_REF(CommandBuffer) commandBuffer, const NRI_NAME(Rect)* rects, uint32_t rectNum); // Mandatory state, if enabled (can be set only once) - void (NRI_CALL *CmdSetStencilReference)(NRI_NAME_REF(CommandBuffer) commandBuffer, uint8_t frontRef, uint8_t backRef); + void (NRI_CALL *CmdSetStencilReference)(NRI_NAME_REF(CommandBuffer) commandBuffer, uint8_t frontRef, uint8_t backRef); // "backRef" requires "isIndependentFrontAndBackStencilReferenceAndMasksSupported" void (NRI_CALL *CmdSetDepthBounds)(NRI_NAME_REF(CommandBuffer) commandBuffer, float boundsMin, float boundsMax); void (NRI_CALL *CmdSetBlendConstants)(NRI_NAME_REF(CommandBuffer) commandBuffer, const NRI_NAME_REF(Color32f) color); @@ -168,11 +168,9 @@ NRI_STRUCT(CoreInterface) // Synchronization uint64_t (NRI_CALL *GetFenceValue)(NRI_NAME_REF(Fence) fence); - void (NRI_CALL *QueueSignal)(NRI_NAME_REF(CommandQueue) commandQueue, NRI_NAME_REF(Fence) fence, uint64_t value); - void (NRI_CALL *QueueWait)(NRI_NAME_REF(CommandQueue) commandQueue, NRI_NAME_REF(Fence) fence, uint64_t value); void (NRI_CALL *Wait)(NRI_NAME_REF(Fence) fence, uint64_t value); - // Work submission + // Work submission (with queue synchronization) void (NRI_CALL *QueueSubmit)(NRI_NAME_REF(CommandQueue) commandQueue, const NRI_NAME_REF(QueueSubmitDesc) queueSubmitDesc); // Descriptor set diff --git a/Include/NRIDescs.h b/Include/NRIDescs.h index d7246b7a..da8d9eee 100644 --- a/Include/NRIDescs.h +++ b/Include/NRIDescs.h @@ -37,7 +37,6 @@ static const uint32_t NRI_CONST_NAME(ONE_VIEWPORT) = 0; // only for "viewportNum static const bool NRI_CONST_NAME(VARIABLE_DESCRIPTOR_NUM) = true; static const bool NRI_CONST_NAME(DESCRIPTOR_ARRAY) = true; static const bool NRI_CONST_NAME(PARTIALLY_BOUND) = true; -static const uint32_t NRI_CONST_NAME(OUT_OF_DATE) = (uint32_t)(-1); // VK only: swap chain is out of date //=============================================================================================================================== // Common @@ -302,7 +301,7 @@ NRI_STRUCT(DepthStencil) }; NRI_STRUCT(SamplePosition) -{ +{ int8_t x, y; // [-8; 7] }; @@ -937,7 +936,7 @@ NRI_STRUCT(DepthAttachmentDesc) NRI_STRUCT(StencilAttachmentDesc) { NRI_NAME(StencilDesc) front; - NRI_NAME(StencilDesc) back; + NRI_NAME(StencilDesc) back; // "back.writeMask" requires "isIndependentFrontAndBackStencilReferenceAndMasksSupported" }; NRI_STRUCT(OutputMergerDesc) @@ -1188,11 +1187,22 @@ NRI_STRUCT(TextureDataLayoutDesc) uint32_t slicePitch; }; -// Submit work to queue +// Work submission +NRI_STRUCT(FenceSubmitDesc) +{ + NRI_NAME(Fence)* fence; + uint64_t value; + NRI_NAME(StageBits) stages; +}; + NRI_STRUCT(QueueSubmitDesc) { + const NRI_NAME(FenceSubmitDesc)* waitFences; + uint32_t waitFenceNum; const NRI_NAME(CommandBuffer)* const* commandBuffers; uint32_t commandBufferNum; + const NRI_NAME(FenceSubmitDesc)* signalFences; + uint32_t signalFenceNum; }; // Memory @@ -1293,6 +1303,7 @@ NRI_ENUM QueryType, uint8_t, TIMESTAMP, + TIMESTAMP_COPY_QUEUE, // requires "isCopyQueueTimestampSupported" OCCLUSION, PIPELINE_STATISTICS, ACCELERATION_STRUCTURE_COMPACTED_SIZE, @@ -1486,23 +1497,27 @@ NRI_STRUCT(DeviceDesc) uint32_t combinedClipAndCullDistanceMaxNum; uint8_t conservativeRasterTier; - // Features support + // Features + uint32_t isComputeQueueSupported : 1; + uint32_t isCopyQueueSupported : 1; uint32_t isTextureFilterMinMaxSupported : 1; uint32_t isLogicOpSupported : 1; uint32_t isDepthBoundsTestSupported : 1; uint32_t isProgrammableSampleLocationsSupported : 1; - uint32_t isComputeQueueSupported : 1; - uint32_t isCopyQueueSupported : 1; - uint32_t isCopyQueueTimestampSupported : 1; uint32_t isRegisterAliasingSupported : 1; uint32_t isFloat16Supported : 1; - uint32_t isRaytracingSupported : 1; + uint32_t isIndependentFrontAndBackStencilReferenceAndMasksSupported : 1; + uint32_t isLineSmoothingSupported : 1; + uint32_t isCopyQueueTimestampSupported : 1; uint32_t isDispatchRaysIndirectSupported : 1; - uint32_t isMeshShaderSupported : 1; uint32_t isDrawMeshTasksIndirectSupported : 1; uint32_t isMeshShaderPipelineStatsSupported : 1; - uint32_t isIndependentFrontAndBackStencilReferenceAndMasksSupported : 1; // if not supported: only front face ref and masks are used - uint32_t isLineSmoothingSupported : 1; + + // Extensions (unexposed are always supported) + uint32_t isSwapChainSupported : 1; // NRISwapChain + uint32_t isRayTracingSupported : 1; // NRIRayTracing + uint32_t isMeshShaderSupported : 1; // NRIMeshShader + uint32_t isLowLatencySupported : 1; // NRILowLatency }; #pragma endregion diff --git a/Resources/Version.h b/Resources/Version.h index c76cc774..78e86f13 100644 --- a/Resources/Version.h +++ b/Resources/Version.h @@ -12,7 +12,7 @@ Versioning rules: */ #define VERSION_MAJOR 1 -#define VERSION_MINOR 124 +#define VERSION_MINOR 125 #define VERSION_BUILD 0 #define VERSION_REVISION 0 diff --git a/Source/Creation/Creation.cpp b/Source/Creation/Creation.cpp index 6bbce02a..687258f5 100644 --- a/Source/Creation/Creation.cpp +++ b/Source/Creation/Creation.cpp @@ -2,9 +2,6 @@ #include "SharedExternal.h" -#define NRI_STRINGIFY_(token) #token -#define NRI_STRINGIFY(token) NRI_STRINGIFY_(token) - using namespace nri; #if NRI_USE_D3D11 @@ -38,10 +35,10 @@ NRI_API Result NRI_CALL nriGetInterface(const Device& device, const char* interf realInterfaceSize = sizeof(CoreInterface); if (realInterfaceSize == interfaceSize) result = deviceBase.FillFunctionTable(*(CoreInterface*)interfacePtr); - } else if (hash == Hash(NRI_STRINGIFY(nri::SwapChainInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(SwapChainInterface)))) { - realInterfaceSize = sizeof(SwapChainInterface); + } else if (hash == Hash(NRI_STRINGIFY(nri::HelperInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(HelperInterface)))) { + realInterfaceSize = sizeof(HelperInterface); if (realInterfaceSize == interfaceSize) - result = deviceBase.FillFunctionTable(*(SwapChainInterface*)interfacePtr); + result = deviceBase.FillFunctionTable(*(HelperInterface*)interfacePtr); } else if (hash == Hash(NRI_STRINGIFY(nri::WrapperD3D11Interface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(WrapperD3D11Interface)))) { realInterfaceSize = sizeof(WrapperD3D11Interface); if (realInterfaceSize == interfaceSize) @@ -54,6 +51,10 @@ NRI_API Result NRI_CALL nriGetInterface(const Device& device, const char* interf realInterfaceSize = sizeof(WrapperVKInterface); if (realInterfaceSize == interfaceSize) result = deviceBase.FillFunctionTable(*(WrapperVKInterface*)interfacePtr); + } else if (hash == Hash(NRI_STRINGIFY(nri::SwapChainInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(SwapChainInterface)))) { + realInterfaceSize = sizeof(SwapChainInterface); + if (realInterfaceSize == interfaceSize) + result = deviceBase.FillFunctionTable(*(SwapChainInterface*)interfacePtr); } else if (hash == Hash(NRI_STRINGIFY(nri::RayTracingInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(RayTracingInterface)))) { realInterfaceSize = sizeof(RayTracingInterface); if (realInterfaceSize == interfaceSize) @@ -62,10 +63,14 @@ NRI_API Result NRI_CALL nriGetInterface(const Device& device, const char* interf realInterfaceSize = sizeof(MeshShaderInterface); if (realInterfaceSize == interfaceSize) result = deviceBase.FillFunctionTable(*(MeshShaderInterface*)interfacePtr); - } else if (hash == Hash(NRI_STRINGIFY(nri::HelperInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(HelperInterface)))) { - realInterfaceSize = sizeof(HelperInterface); + } else if (hash == Hash(NRI_STRINGIFY(nri::LowLatencyInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(LowLatencyInterface)))) { + realInterfaceSize = sizeof(LowLatencyInterface); if (realInterfaceSize == interfaceSize) - result = deviceBase.FillFunctionTable(*(HelperInterface*)interfacePtr); + result = deviceBase.FillFunctionTable(*(LowLatencyInterface*)interfacePtr); + } else if (hash == Hash(NRI_STRINGIFY(nri::StreamerInterface)) || hash == Hash(NRI_STRINGIFY(NRI_NAME_C(StreamerInterface)))) { + realInterfaceSize = sizeof(StreamerInterface); + if (realInterfaceSize == interfaceSize) + result = deviceBase.FillFunctionTable(*(StreamerInterface*)interfacePtr); } if (result == Result::INVALID_ARGUMENT) diff --git a/Source/D3D11/BufferD3D11.cpp b/Source/D3D11/BufferD3D11.cpp index a3313535..d374fdfe 100644 --- a/Source/D3D11/BufferD3D11.cpp +++ b/Source/D3D11/BufferD3D11.cpp @@ -10,8 +10,7 @@ using namespace nri; BufferD3D11::~BufferD3D11() { - if (m_ReadbackTexture) - Deallocate(m_Device.GetStdAllocator(), m_ReadbackTexture); + Deallocate(m_Device.GetStdAllocator(), m_ReadbackTexture); } Result BufferD3D11::Create(const MemoryD3D11& memory) { @@ -207,8 +206,7 @@ TextureD3D11& BufferD3D11::RecreateReadbackTexture(const TextureD3D11& srcTextur else if (srcRegionDesc.height == 1) textureDesc.type = TextureType::TEXTURE_1D; - if (m_ReadbackTexture) - Deallocate(m_Device.GetStdAllocator(), m_ReadbackTexture); + Deallocate(m_Device.GetStdAllocator(), m_ReadbackTexture); m_ReadbackTexture = Allocate(m_Device.GetStdAllocator(), m_Device, textureDesc); m_ReadbackTexture->Create(nullptr); diff --git a/Source/D3D11/BufferD3D11.h b/Source/D3D11/BufferD3D11.h index d1a801f0..daa842df 100644 --- a/Source/D3D11/BufferD3D11.h +++ b/Source/D3D11/BufferD3D11.h @@ -63,7 +63,7 @@ struct BufferD3D11 { void* Map(uint64_t offset, uint64_t size); void Unmap(); - private: +private: DeviceD3D11& m_Device; ComPtr m_Buffer; TextureD3D11* m_ReadbackTexture = nullptr; diff --git a/Source/D3D11/BufferD3D11.hpp b/Source/D3D11/BufferD3D11.hpp index e32d7e9a..d2190ae3 100644 --- a/Source/D3D11/BufferD3D11.hpp +++ b/Source/D3D11/BufferD3D11.hpp @@ -27,4 +27,4 @@ static void NRI_CALL UnmapBuffer(Buffer& buffer) { #pragma endregion -Define_Core_Buffer_PartiallyFillFunctionTable(D3D11) +Define_Core_Buffer_PartiallyFillFunctionTable(D3D11); diff --git a/Source/D3D11/CommandAllocatorD3D11.cpp b/Source/D3D11/CommandAllocatorD3D11.cpp index b04e0bed..26428e54 100644 --- a/Source/D3D11/CommandAllocatorD3D11.cpp +++ b/Source/D3D11/CommandAllocatorD3D11.cpp @@ -21,11 +21,11 @@ Result CreateCommandBuffer(DeviceD3D11& device, ID3D11DeviceContext* precreatedC else impl = Allocate(device.GetStdAllocator(), device); - const nri::Result result = ((CommandBufferHelper*)impl)->Create(precreatedContext); + const Result result = ((CommandBufferHelper*)impl)->Create(precreatedContext); - if (result == nri::Result::SUCCESS) { + if (result == Result::SUCCESS) { commandBuffer = (CommandBuffer*)impl; - return nri::Result::SUCCESS; + return Result::SUCCESS; } if (isImmediate) diff --git a/Source/D3D11/CommandAllocatorD3D11.h b/Source/D3D11/CommandAllocatorD3D11.h index a1afb7e0..e53bcfee 100644 --- a/Source/D3D11/CommandAllocatorD3D11.h +++ b/Source/D3D11/CommandAllocatorD3D11.h @@ -30,7 +30,7 @@ struct CommandAllocatorD3D11 { Result CreateCommandBuffer(CommandBuffer*& commandBuffer); - private: +private: DeviceD3D11& m_Device; }; diff --git a/Source/D3D11/CommandAllocatorD3D11.hpp b/Source/D3D11/CommandAllocatorD3D11.hpp index ba059517..07775b5e 100644 --- a/Source/D3D11/CommandAllocatorD3D11.hpp +++ b/Source/D3D11/CommandAllocatorD3D11.hpp @@ -16,4 +16,4 @@ static void NRI_CALL ResetCommandAllocator(CommandAllocator& commandAllocator) { #pragma endregion -Define_Core_CommandAllocator_PartiallyFillFunctionTable(D3D11) \ No newline at end of file +Define_Core_CommandAllocator_PartiallyFillFunctionTable(D3D11); \ No newline at end of file diff --git a/Source/D3D11/CommandBufferD3D11.h b/Source/D3D11/CommandBufferD3D11.h index bf44827e..af59e4f6 100644 --- a/Source/D3D11/CommandBufferD3D11.h +++ b/Source/D3D11/CommandBufferD3D11.h @@ -75,7 +75,7 @@ struct CommandBufferD3D11 final : public CommandBufferHelper { void BeginAnnotation(const char* name); void EndAnnotation(); - private: +private: DeviceD3D11& m_Device; ComPtr m_DeferredContext; // can be immediate to redirect data from emulation ComPtr m_CommandList; diff --git a/Source/D3D11/CommandBufferD3D11.hpp b/Source/D3D11/CommandBufferD3D11.hpp index 9c5c0f05..6bfeaed1 100644 --- a/Source/D3D11/CommandBufferD3D11.hpp +++ b/Source/D3D11/CommandBufferD3D11.hpp @@ -174,4 +174,4 @@ static void* NRI_CALL GetCommandBufferNativeObject(const CommandBuffer& commandB #pragma endregion -Define_Core_CommandBuffer_PartiallyFillFunctionTable(D3D11) +Define_Core_CommandBuffer_PartiallyFillFunctionTable(D3D11); diff --git a/Source/D3D11/CommandBufferEmuD3D11.h b/Source/D3D11/CommandBufferEmuD3D11.h index 0943d9b4..b624fed9 100644 --- a/Source/D3D11/CommandBufferEmuD3D11.h +++ b/Source/D3D11/CommandBufferEmuD3D11.h @@ -72,7 +72,7 @@ struct CommandBufferEmuD3D11 final : public CommandBufferHelper { void BeginAnnotation(const char* name); void EndAnnotation(); - private: +private: DeviceD3D11& m_Device; PushBuffer m_PushBuffer; }; diff --git a/Source/D3D11/CommandQueueD3D11.cpp b/Source/D3D11/CommandQueueD3D11.cpp index 9bab9677..edadeedc 100644 --- a/Source/D3D11/CommandQueueD3D11.cpp +++ b/Source/D3D11/CommandQueueD3D11.cpp @@ -3,6 +3,9 @@ #include "SharedD3D11.h" #include "CommandQueueD3D11.h" +#include "FenceD3D11.h" +#include "HelperDataUpload.h" +#include "HelperWaitIdle.h" using namespace nri; @@ -11,10 +14,22 @@ using namespace nri; //================================================================================================================ inline void CommandQueueD3D11::Submit(const QueueSubmitDesc& queueSubmitDesc) { + for (uint32_t i = 0; i < queueSubmitDesc.waitFenceNum; i++) { + const FenceSubmitDesc& fenceSubmitDesc = queueSubmitDesc.waitFences[i]; + FenceD3D11* fence = (FenceD3D11*)fenceSubmitDesc.fence; + fence->QueueWait(fenceSubmitDesc.value); + } + for (uint32_t i = 0; i < queueSubmitDesc.commandBufferNum; i++) { CommandBufferHelper* commandBuffer = (CommandBufferHelper*)queueSubmitDesc.commandBuffers[i]; commandBuffer->Submit(); } + + for (uint32_t i = 0; i < queueSubmitDesc.signalFenceNum; i++) { + const FenceSubmitDesc& fenceSubmitDesc = queueSubmitDesc.signalFences[i]; + FenceD3D11* fence = (FenceD3D11*)fenceSubmitDesc.fence; + fence->QueueSignal(fenceSubmitDesc.value); + } } inline Result CommandQueueD3D11::UploadData( @@ -25,9 +40,7 @@ inline Result CommandQueueD3D11::UploadData( } inline Result CommandQueueD3D11::WaitForIdle() { - HelperWaitIdle helperWaitIdle(m_Device.GetCoreInterface(), (Device&)m_Device, (CommandQueue&)*this); - - return helperWaitIdle.WaitIdle(); + return WaitIdle(m_Device.GetCoreInterface(), (Device&)m_Device, (CommandQueue&)*this); } #include "CommandQueueD3D11.hpp" diff --git a/Source/D3D11/CommandQueueD3D11.h b/Source/D3D11/CommandQueueD3D11.h index 19fb32bf..15fbdd16 100644 --- a/Source/D3D11/CommandQueueD3D11.h +++ b/Source/D3D11/CommandQueueD3D11.h @@ -29,7 +29,7 @@ struct CommandQueueD3D11 { Result UploadData(const TextureUploadDesc* textureUploadDescs, uint32_t textureUploadDescNum, const BufferUploadDesc* bufferUploadDescs, uint32_t bufferUploadDescNum); Result WaitForIdle(); - private: +private: DeviceD3D11& m_Device; }; diff --git a/Source/D3D11/CommandQueueD3D11.hpp b/Source/D3D11/CommandQueueD3D11.hpp index a1029291..4bb7adfc 100644 --- a/Source/D3D11/CommandQueueD3D11.hpp +++ b/Source/D3D11/CommandQueueD3D11.hpp @@ -28,4 +28,16 @@ static Result NRI_CALL WaitForIdle(CommandQueue& commandQueue) { #pragma endregion -Define_Core_CommandQueue_PartiallyFillFunctionTable(D3D11) Define_Helper_CommandQueue_PartiallyFillFunctionTable(D3D11) +#pragma region[ Low latency ] + +static void NRI_CALL QueueSubmitTrackable(CommandQueue& commandQueue, const QueueSubmitDesc& workSubmissionDesc, const SwapChain& swapChain) { + MaybeUnused(swapChain); + + ((CommandQueueD3D11&)commandQueue).Submit(workSubmissionDesc); +} + +#pragma endregion + +Define_Core_CommandQueue_PartiallyFillFunctionTable(D3D11); +Define_Helper_CommandQueue_PartiallyFillFunctionTable(D3D11); +Define_LowLatency_CommandQueue_PartiallyFillFunctionTable(D3D11); diff --git a/Source/D3D11/DescriptorD3D11.cpp b/Source/D3D11/DescriptorD3D11.cpp index b801219e..fc2261ba 100644 --- a/Source/D3D11/DescriptorD3D11.cpp +++ b/Source/D3D11/DescriptorD3D11.cpp @@ -8,7 +8,7 @@ using namespace nri; -inline D3D11_TEXTURE_ADDRESS_MODE GetD3D11AdressMode(nri::AddressMode mode) { +inline D3D11_TEXTURE_ADDRESS_MODE GetD3D11AdressMode(AddressMode mode) { return (D3D11_TEXTURE_ADDRESS_MODE)(D3D11_TEXTURE_ADDRESS_WRAP + (uint32_t)mode); } @@ -477,8 +477,8 @@ DescriptorD3D11::DescriptorD3D11(DeviceD3D11& device, ID3D11DepthStencilView* de m_Type = DescriptorTypeDX11::NO_SHADER_VISIBLE; } -DescriptorD3D11::DescriptorD3D11(DeviceD3D11& device, ID3D11Buffer* constantBuffer, uint32_t elementOffset, uint32_t elementNum) - : m_Descriptor(constantBuffer), m_ElementOffset(elementOffset), m_ElementNum(elementNum), m_Device(device) { +DescriptorD3D11::DescriptorD3D11(DeviceD3D11& device, ID3D11Buffer* constantBuffer, uint32_t elementOffset, uint32_t elementNum) : + m_Descriptor(constantBuffer), m_ElementOffset(elementOffset), m_ElementNum(elementNum), m_Device(device) { m_Type = DescriptorTypeDX11::CONSTANT; } diff --git a/Source/D3D11/DescriptorD3D11.h b/Source/D3D11/DescriptorD3D11.h index b8baf792..6c21b2ad 100644 --- a/Source/D3D11/DescriptorD3D11.h +++ b/Source/D3D11/DescriptorD3D11.h @@ -74,7 +74,7 @@ struct DescriptorD3D11 { SET_D3D_DEBUG_OBJECT_NAME(m_Descriptor, name); } - private: +private: DeviceD3D11& m_Device; ComPtr m_Descriptor; SubresourceInfo m_SubresourceInfo = {}; diff --git a/Source/D3D11/DescriptorD3D11.hpp b/Source/D3D11/DescriptorD3D11.hpp index 8cb4bae5..dc54e902 100644 --- a/Source/D3D11/DescriptorD3D11.hpp +++ b/Source/D3D11/DescriptorD3D11.hpp @@ -15,4 +15,4 @@ static uint64_t NRI_CALL GetDescriptorNativeObject(const Descriptor& descriptor) #pragma endregion -Define_Core_Descriptor_PartiallyFillFunctionTable(D3D11) \ No newline at end of file +Define_Core_Descriptor_PartiallyFillFunctionTable(D3D11); \ No newline at end of file diff --git a/Source/D3D11/DescriptorPoolD3D11.h b/Source/D3D11/DescriptorPoolD3D11.h index 72fffb30..3e092780 100644 --- a/Source/D3D11/DescriptorPoolD3D11.h +++ b/Source/D3D11/DescriptorPoolD3D11.h @@ -37,7 +37,7 @@ struct DescriptorPoolD3D11 { Result AllocateDescriptorSets( const PipelineLayout& pipelineLayout, uint32_t setIndexInPipelineLayout, DescriptorSet** descriptorSets, uint32_t instanceNum, uint32_t variableDescriptorNum); - private: +private: DeviceD3D11& m_Device; Vector m_DescriptorSets; Vector m_DescriptorPool; diff --git a/Source/D3D11/DescriptorPoolD3D11.hpp b/Source/D3D11/DescriptorPoolD3D11.hpp index c94b5d6a..af890e4d 100644 --- a/Source/D3D11/DescriptorPoolD3D11.hpp +++ b/Source/D3D11/DescriptorPoolD3D11.hpp @@ -17,4 +17,4 @@ static void NRI_CALL ResetDescriptorPool(DescriptorPool& descriptorPool) { #pragma endregion -Define_Core_DescriptorPool_PartiallyFillFunctionTable(D3D11) +Define_Core_DescriptorPool_PartiallyFillFunctionTable(D3D11); diff --git a/Source/D3D11/DescriptorSetD3D11.h b/Source/D3D11/DescriptorSetD3D11.h index 32882059..d8ab8b9e 100644 --- a/Source/D3D11/DescriptorSetD3D11.h +++ b/Source/D3D11/DescriptorSetD3D11.h @@ -38,7 +38,7 @@ struct DescriptorSetD3D11 { void UpdateDynamicConstantBuffers(uint32_t baseBuffer, uint32_t bufferNum, const Descriptor* const* descriptors); void Copy(const DescriptorSetCopyDesc& descriptorSetCopyDesc); - private: +private: Vector m_Ranges; const DescriptorD3D11** m_Descriptors = nullptr; uint32_t m_DynamicConstantBuffersNum = 0; diff --git a/Source/D3D11/DescriptorSetD3D11.hpp b/Source/D3D11/DescriptorSetD3D11.hpp index a2e1fbc4..831d74a3 100644 --- a/Source/D3D11/DescriptorSetD3D11.hpp +++ b/Source/D3D11/DescriptorSetD3D11.hpp @@ -20,4 +20,4 @@ static void NRI_CALL CopyDescriptorSet(DescriptorSet& descriptorSet, const Descr #pragma endregion -Define_Core_DescriptorSet_PartiallyFillFunctionTable(D3D11) +Define_Core_DescriptorSet_PartiallyFillFunctionTable(D3D11); diff --git a/Source/D3D11/DeviceD3D11.cpp b/Source/D3D11/DeviceD3D11.cpp index 83663c3e..918adfb9 100644 --- a/Source/D3D11/DeviceD3D11.cpp +++ b/Source/D3D11/DeviceD3D11.cpp @@ -9,10 +9,12 @@ #include "DescriptorPoolD3D11.h" #include "DeviceD3D11.h" #include "FenceD3D11.h" +#include "HelperDeviceMemoryAllocator.h" #include "MemoryD3D11.h" #include "PipelineD3D11.h" #include "PipelineLayoutD3D11.h" #include "QueryPoolD3D11.h" +#include "Streamer.h" #include "SwapChainD3D11.h" #include "TextureD3D11.h" @@ -45,11 +47,11 @@ Result CreateDeviceD3D11(const DeviceCreationDesc& deviceCreationDesc, DeviceBas StdAllocator allocator(deviceCreationDesc.memoryAllocatorInterface); DeviceD3D11* implementation = Allocate(allocator, deviceCreationDesc.callbackInterface, allocator); - const nri::Result result = implementation->Create(deviceCreationDesc, nullptr, nullptr, false); + Result result = implementation->Create(deviceCreationDesc, nullptr, nullptr, false); - if (result == nri::Result::SUCCESS) { + if (result == Result::SUCCESS) { device = (DeviceBase*)implementation; - return nri::Result::SUCCESS; + return Result::SUCCESS; } Deallocate(allocator, implementation); @@ -66,12 +68,11 @@ Result CreateDeviceD3D11(const DeviceCreationD3D11Desc& deviceCreationD3D11Desc, StdAllocator allocator(deviceCreationDesc.memoryAllocatorInterface); DeviceD3D11* implementation = Allocate(allocator, deviceCreationDesc.callbackInterface, allocator); - const nri::Result result = - implementation->Create(deviceCreationDesc, deviceCreationD3D11Desc.d3d11Device, deviceCreationD3D11Desc.agsContext, deviceCreationD3D11Desc.isNVAPILoaded); + Result result = implementation->Create(deviceCreationDesc, deviceCreationD3D11Desc.d3d11Device, deviceCreationD3D11Desc.agsContext, deviceCreationD3D11Desc.isNVAPILoaded); - if (result == nri::Result::SUCCESS) { + if (result == Result::SUCCESS) { device = (DeviceBase*)implementation; - return nri::Result::SUCCESS; + return Result::SUCCESS; } Deallocate(allocator, implementation); @@ -210,7 +211,7 @@ Result DeviceD3D11::Create(const DeviceCreationDesc& deviceCreationDesc, ID3D11D // Other FillDesc(params); - for (uint32_t i = 0; i < COMMAND_QUEUE_TYPE_NUM; i++) + for (uint32_t i = 0; i < (uint32_t)CommandQueueType::MAX_NUM; i++) m_CommandQueues.emplace_back(*this); return FillFunctionTable(m_CoreInterface); @@ -370,24 +371,22 @@ void DeviceD3D11::FillDesc(const AGSDX11ReturnedParams& params) { m_Desc.clipDistanceMaxNum = D3D11_CLIP_OR_CULL_DISTANCE_COUNT; m_Desc.cullDistanceMaxNum = D3D11_CLIP_OR_CULL_DISTANCE_COUNT; m_Desc.combinedClipAndCullDistanceMaxNum = D3D11_CLIP_OR_CULL_DISTANCE_COUNT; - m_Desc.rayTracingShaderGroupIdentifierSize = 0; - m_Desc.rayTracingShaderTableAligment = 0; - m_Desc.rayTracingShaderTableMaxStride = 0; - m_Desc.rayTracingShaderRecursionMaxDepth = 0; - m_Desc.rayTracingGeometryObjectMaxNum = 0; m_Desc.conservativeRasterTier = (uint8_t)options2.ConservativeRasterizationTier; m_Desc.isTextureFilterMinMaxSupported = options1.MinMaxFiltering != 0; m_Desc.isLogicOpSupported = options.OutputMergerLogicOp != 0; m_Desc.isDepthBoundsTestSupported = params.extensionsSupported.depthBoundsDeferredContexts; - m_Desc.isProgrammableSampleLocationsSupported = m_Desc.adapterDesc.vendor == Vendor::NVIDIA; + m_Desc.isProgrammableSampleLocationsSupported = m_Ext.HasNVAPI(); m_Desc.isLineSmoothingSupported = true; + + m_Desc.isSwapChainSupported = HasOutput(); + m_Desc.isLowLatencySupported = m_Ext.HasNVAPI(); } template Result DeviceD3D11::CreateImplementation(Interface*& entity, const Args&... args) { Implementation* implementation = Allocate(GetStdAllocator(), *this); - const Result result = implementation->Create(args...); + Result result = implementation->Create(args...); if (result == Result::SUCCESS) { entity = (Interface*)implementation; @@ -476,11 +475,11 @@ inline Result DeviceD3D11::CreateDescriptor(const SamplerDesc& samplerDesc, Desc inline Result DeviceD3D11::CreatePipelineLayout(const PipelineLayoutDesc& pipelineLayoutDesc, PipelineLayout*& pipelineLayout) { PipelineLayoutD3D11* implementation = Allocate(GetStdAllocator(), *this); - const nri::Result res = implementation->Create(pipelineLayoutDesc); + Result res = implementation->Create(pipelineLayoutDesc); - if (res == nri::Result::SUCCESS) { + if (res == Result::SUCCESS) { pipelineLayout = (PipelineLayout*)implementation; - return nri::Result::SUCCESS; + return Result::SUCCESS; } Deallocate(GetStdAllocator(), implementation); @@ -490,11 +489,11 @@ inline Result DeviceD3D11::CreatePipelineLayout(const PipelineLayoutDesc& pipeli inline Result DeviceD3D11::CreatePipeline(const GraphicsPipelineDesc& graphicsPipelineDesc, Pipeline*& pipeline) { PipelineD3D11* implementation = Allocate(GetStdAllocator(), *this); - const nri::Result res = implementation->Create(graphicsPipelineDesc); + Result res = implementation->Create(graphicsPipelineDesc); - if (res == nri::Result::SUCCESS) { + if (res == Result::SUCCESS) { pipeline = (Pipeline*)implementation; - return nri::Result::SUCCESS; + return Result::SUCCESS; } Deallocate(GetStdAllocator(), implementation); @@ -504,11 +503,11 @@ inline Result DeviceD3D11::CreatePipeline(const GraphicsPipelineDesc& graphicsPi inline Result DeviceD3D11::CreatePipeline(const ComputePipelineDesc& computePipelineDesc, Pipeline*& pipeline) { PipelineD3D11* implementation = Allocate(GetStdAllocator(), *this); - const nri::Result res = implementation->Create(computePipelineDesc); + Result res = implementation->Create(computePipelineDesc); - if (res == nri::Result::SUCCESS) { + if (res == Result::SUCCESS) { pipeline = (Pipeline*)implementation; - return nri::Result::SUCCESS; + return Result::SUCCESS; } Deallocate(GetStdAllocator(), implementation); @@ -522,11 +521,11 @@ inline Result DeviceD3D11::CreateQueryPool(const QueryPoolDesc& queryPoolDesc, Q inline Result DeviceD3D11::CreateFence(uint64_t initialValue, Fence*& fence) { FenceD3D11* implementation = Allocate(GetStdAllocator(), *this); - const nri::Result res = implementation->Create(initialValue); + Result res = implementation->Create(initialValue); - if (res == nri::Result::SUCCESS) { + if (res == Result::SUCCESS) { fence = (Fence*)implementation; - return nri::Result::SUCCESS; + return Result::SUCCESS; } Deallocate(GetStdAllocator(), implementation); @@ -611,15 +610,19 @@ inline FormatSupportBits DeviceD3D11::GetFormatSupport(Format format) const { } inline uint32_t DeviceD3D11::CalculateAllocationNumber(const ResourceGroupDesc& resourceGroupDesc) const { - HelperDeviceMemoryAllocator allocator(m_CoreInterface, (Device&)*this, m_StdAllocator); + HelperDeviceMemoryAllocator allocator(m_CoreInterface, (Device&)*this); return allocator.CalculateAllocationNumber(resourceGroupDesc); } -inline Result DeviceD3D11::AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDesc, nri::Memory** allocations) { - HelperDeviceMemoryAllocator allocator(m_CoreInterface, (Device&)*this, m_StdAllocator); +inline Result DeviceD3D11::AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDesc, Memory** allocations) { + HelperDeviceMemoryAllocator allocator(m_CoreInterface, (Device&)*this); return allocator.AllocateAndBindMemory(resourceGroupDesc, allocations); } +namespace d3d11 { +#include "D3DExt.hpp" +} + #include "DeviceD3D11.hpp" diff --git a/Source/D3D11/DeviceD3D11.h b/Source/D3D11/DeviceD3D11.h index 5e90974e..4fafac5e 100644 --- a/Source/D3D11/DeviceD3D11.h +++ b/Source/D3D11/DeviceD3D11.h @@ -25,7 +25,7 @@ struct DeviceD3D11 final : public DeviceBase { return m_Version; } - inline const D3D11Extensions* GetExt() const { + inline const d3d11::Ext* GetExt() const { return &m_Ext; } @@ -122,16 +122,17 @@ struct DeviceD3D11 final : public DeviceBase { Result FillFunctionTable(SwapChainInterface& table) const; Result FillFunctionTable(WrapperD3D11Interface& table) const; Result FillFunctionTable(HelperInterface& helperInterface) const; + Result FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const; + Result FillFunctionTable(StreamerInterface& streamerInterface) const; - private: +private: void FillDesc(const AGSDX11ReturnedParams& params); template Result CreateImplementation(Interface*& entity, const Args&... args); - private: - // don't sort - ~D3D11Extensions must be called last! - D3D11Extensions m_Ext = {}; +private: + d3d11::Ext m_Ext = {}; // don't sort: destructor must be called last! ComPtr m_Device; ComPtr m_Adapter; ComPtr m_ImmediateContext; diff --git a/Source/D3D11/DeviceD3D11.hpp b/Source/D3D11/DeviceD3D11.hpp index a8487b43..f216098b 100644 --- a/Source/D3D11/DeviceD3D11.hpp +++ b/Source/D3D11/DeviceD3D11.hpp @@ -1,9 +1,10 @@ // © 2021 NVIDIA Corporation -Declare_PartiallyFillFunctionTable_Functions(D3D11) +Declare_PartiallyFillFunctionTable_Functions(D3D11); + #pragma region[ Core ] - static const DeviceDesc& NRI_CALL GetDeviceDesc(const Device& device) { +static const DeviceDesc& NRI_CALL GetDeviceDesc(const Device& device) { return ((const DeviceD3D11&)device).GetDesc(); } @@ -287,6 +288,28 @@ Result DeviceD3D11::FillFunctionTable(SwapChainInterface& swapChainInterface) co #pragma endregion +#pragma region[ Helper ] + +static uint32_t NRI_CALL CountAllocationNum(Device& device, const ResourceGroupDesc& resourceGroupDesc) { + return ((DeviceD3D11&)device).CalculateAllocationNumber(resourceGroupDesc); +} + +static Result NRI_CALL AllocateAndBindMemory(Device& device, const ResourceGroupDesc& resourceGroupDesc, Memory** allocations) { + return ((DeviceD3D11&)device).AllocateAndBindMemory(resourceGroupDesc, allocations); +} + +Result DeviceD3D11::FillFunctionTable(HelperInterface& helperInterface) const { + helperInterface = {}; + helperInterface.CalculateAllocationNumber = ::CountAllocationNum; + helperInterface.AllocateAndBindMemory = ::AllocateAndBindMemory; + + Helper_CommandQueue_PartiallyFillFunctionTableD3D11(helperInterface); + + return ValidateFunctionTable(helperInterface); +} + +#pragma endregion + #pragma region[ WrapperD3D11 ] static Result NRI_CALL CreateCommandBuffer(Device& device, const CommandBufferD3D11Desc& commandBufferD3D11Desc, CommandBuffer*& commandBuffer) { @@ -299,11 +322,11 @@ static Result NRI_CALL CreateBuffer(Device& device, const BufferD3D11Desc& buffe DeviceD3D11& deviceD3D11 = (DeviceD3D11&)device; BufferD3D11* implementation = Allocate(deviceD3D11.GetStdAllocator(), deviceD3D11); - const nri::Result res = implementation->Create(bufferD3D11Desc); + Result res = implementation->Create(bufferD3D11Desc); - if (res == nri::Result::SUCCESS) { + if (res == Result::SUCCESS) { buffer = (Buffer*)implementation; - return nri::Result::SUCCESS; + return Result::SUCCESS; } Deallocate(deviceD3D11.GetStdAllocator(), implementation); @@ -315,11 +338,11 @@ static Result NRI_CALL CreateTexture(Device& device, const TextureD3D11Desc& tex DeviceD3D11& deviceD3D11 = (DeviceD3D11&)device; TextureD3D11* implementation = Allocate(deviceD3D11.GetStdAllocator(), deviceD3D11); - const nri::Result res = implementation->Create(textureD3D11Desc); + Result res = implementation->Create(textureD3D11Desc); - if (res == nri::Result::SUCCESS) { + if (res == Result::SUCCESS) { texture = (Texture*)implementation; - return nri::Result::SUCCESS; + return Result::SUCCESS; } Deallocate(deviceD3D11.GetStdAllocator(), implementation); @@ -338,24 +361,84 @@ Result DeviceD3D11::FillFunctionTable(WrapperD3D11Interface& wrapperD3D11Interfa #pragma endregion -#pragma region[ Helper ] +#pragma region[ LowLatency ] -static uint32_t NRI_CALL CountAllocationNum(Device& device, const ResourceGroupDesc& resourceGroupDesc) { - return ((DeviceD3D11&)device).CalculateAllocationNumber(resourceGroupDesc); +Result DeviceD3D11::FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const { + lowLatencyInterface = {}; + if (!m_Desc.isLowLatencySupported) + return Result::UNSUPPORTED; + + LowLatency_CommandQueue_PartiallyFillFunctionTableD3D11(lowLatencyInterface); + LowLatency_SwapChain_PartiallyFillFunctionTableD3D11(lowLatencyInterface); + + return ValidateFunctionTable(lowLatencyInterface); } -static Result NRI_CALL AllocateAndBindMemory(Device& device, const ResourceGroupDesc& resourceGroupDesc, Memory** allocations) { - return ((DeviceD3D11&)device).AllocateAndBindMemory(resourceGroupDesc, allocations); +#pragma endregion + +#pragma region[ Streamer ] + +static Result CreateStreamer(Device& device, const StreamerDesc& streamerDesc, Streamer*& streamer) { + DeviceD3D11& deviceD3D11 = (DeviceD3D11&)device; + + StreamerImpl* implementation = Allocate(deviceD3D11.GetStdAllocator(), device, deviceD3D11.GetCoreInterface()); + Result res = implementation->Create(streamerDesc); + + if (res == Result::SUCCESS) { + streamer = (Streamer*)implementation; + return Result::SUCCESS; + } + + Deallocate(deviceD3D11.GetStdAllocator(), implementation); + + return res; } -Result DeviceD3D11::FillFunctionTable(HelperInterface& helperInterface) const { - helperInterface = {}; - helperInterface.CalculateAllocationNumber = ::CountAllocationNum; - helperInterface.AllocateAndBindMemory = ::AllocateAndBindMemory; +static void DestroyStreamer(Streamer& streamer) { + Deallocate(((DeviceBase&)((StreamerImpl&)streamer).GetDevice()).GetStdAllocator(), (StreamerImpl*)&streamer); +} - Helper_CommandQueue_PartiallyFillFunctionTableD3D11(helperInterface); +static Buffer* GetStreamerConstantBuffer(Streamer& streamer) { + return ((StreamerImpl&)streamer).GetConstantBuffer(); +} - return ValidateFunctionTable(helperInterface); +static uint32_t UpdateStreamerConstantBuffer(Streamer& streamer, const void* data, uint32_t dataSize) { + return ((StreamerImpl&)streamer).UpdateStreamerConstantBuffer(data, dataSize); +} + +static uint64_t AddStreamerBufferUpdateRequest(Streamer& streamer, const BufferUpdateRequestDesc& bufferUpdateRequestDesc) { + return ((StreamerImpl&)streamer).AddStreamerBufferUpdateRequest(bufferUpdateRequestDesc); +} + +static uint64_t AddStreamerTextureUpdateRequest(Streamer& streamer, const TextureUpdateRequestDesc& textureUpdateRequestDesc) { + return ((StreamerImpl&)streamer).AddStreamerTextureUpdateRequest(textureUpdateRequestDesc); +} + +static Result CopyStreamerUpdateRequests(Streamer& streamer) { + return ((StreamerImpl&)streamer).CopyStreamerUpdateRequests(); +} + +static Buffer* GetStreamerDynamicBuffer(Streamer& streamer) { + return ((StreamerImpl&)streamer).GetDynamicBuffer(); +} + +static void CmdUploadStreamerUpdateRequests(CommandBuffer& commandBuffer, Streamer& streamer) { + ((StreamerImpl&)streamer).CmdUploadStreamerUpdateRequests(commandBuffer); +} + +Result DeviceD3D11::FillFunctionTable(StreamerInterface& streamerInterface) const { + streamerInterface = {}; + streamerInterface.CreateStreamer = ::CreateStreamer; + streamerInterface.DestroyStreamer = ::DestroyStreamer; + streamerInterface.GetStreamerConstantBuffer = ::GetStreamerConstantBuffer; + streamerInterface.UpdateStreamerConstantBuffer = ::UpdateStreamerConstantBuffer; + streamerInterface.AddStreamerBufferUpdateRequest = ::AddStreamerBufferUpdateRequest; + streamerInterface.AddStreamerTextureUpdateRequest = ::AddStreamerTextureUpdateRequest; + streamerInterface.CopyStreamerUpdateRequests = ::CopyStreamerUpdateRequests; + streamerInterface.GetStreamerDynamicBuffer = ::GetStreamerDynamicBuffer; + streamerInterface.CmdUploadStreamerUpdateRequests = ::CmdUploadStreamerUpdateRequests; + + return ValidateFunctionTable(streamerInterface); } #pragma endregion diff --git a/Source/D3D11/FenceD3D11.cpp b/Source/D3D11/FenceD3D11.cpp index 66d4ba3b..aad8817a 100644 --- a/Source/D3D11/FenceD3D11.cpp +++ b/Source/D3D11/FenceD3D11.cpp @@ -35,9 +35,7 @@ inline uint64_t FenceD3D11::GetFenceValue() const { return m_Value; } -inline void FenceD3D11::QueueSignal(CommandQueueD3D11& commandQueue, uint64_t value) { - MaybeUnused(commandQueue); - +void FenceD3D11::QueueSignal(uint64_t value) { if (m_Fence) { HRESULT hr = m_Device.GetImmediateContext()->Signal(m_Fence, value); RETURN_ON_FAILURE(&m_Device, hr == S_OK, ReturnVoid(), "D3D11DeviceContext4::Signal() - FAILED!"); @@ -47,9 +45,7 @@ inline void FenceD3D11::QueueSignal(CommandQueueD3D11& commandQueue, uint64_t va } } -inline void FenceD3D11::QueueWait(CommandQueueD3D11& commandQueue, uint64_t value) { - MaybeUnused(commandQueue); - +void FenceD3D11::QueueWait(uint64_t value) { if (m_Fence) { HRESULT hr = m_Device.GetImmediateContext()->Wait(m_Fence, value); RETURN_ON_FAILURE(&m_Device, hr == S_OK, ReturnVoid(), "D3D11DeviceContext4::Wait() - FAILED!"); @@ -65,7 +61,7 @@ inline void FenceD3D11::Wait(uint64_t value) { HRESULT hr = m_Fence->SetEventOnCompletion(value, m_Event); RETURN_ON_FAILURE(&m_Device, hr == S_OK, ReturnVoid(), "ID3D12Fence::SetEventOnCompletion() - FAILED!"); - uint32_t result = WaitForSingleObjectEx(m_Event, DEFAULT_TIMEOUT, TRUE); + uint32_t result = WaitForSingleObjectEx(m_Event, TIMEOUT_FENCE, TRUE); RETURN_ON_FAILURE(&m_Device, result == WAIT_OBJECT_0, ReturnVoid(), "WaitForSingleObjectEx() - FAILED!"); } } else { diff --git a/Source/D3D11/FenceD3D11.h b/Source/D3D11/FenceD3D11.h index c1517966..43b41a3f 100644 --- a/Source/D3D11/FenceD3D11.h +++ b/Source/D3D11/FenceD3D11.h @@ -33,11 +33,11 @@ struct FenceD3D11 { } uint64_t GetFenceValue() const; - void QueueSignal(CommandQueueD3D11& commandQueue, uint64_t value); - void QueueWait(CommandQueueD3D11& commandQueue, uint64_t value); + void QueueSignal(uint64_t value); + void QueueWait(uint64_t value); void Wait(uint64_t value); - private: +private: DeviceD3D11& m_Device; ComPtr m_Query; ComPtr m_Fence; diff --git a/Source/D3D11/FenceD3D11.hpp b/Source/D3D11/FenceD3D11.hpp index cddea8ce..6ebf08e1 100644 --- a/Source/D3D11/FenceD3D11.hpp +++ b/Source/D3D11/FenceD3D11.hpp @@ -6,14 +6,6 @@ static uint64_t NRI_CALL GetFenceValue(Fence& fence) { return ((FenceD3D11&)fence).GetFenceValue(); } -static void NRI_CALL QueueSignal(CommandQueue& commandQueue, Fence& fence, uint64_t value) { - return ((FenceD3D11&)fence).QueueSignal((CommandQueueD3D11&)commandQueue, value); -} - -static void NRI_CALL QueueWait(CommandQueue& commandQueue, Fence& fence, uint64_t value) { - return ((FenceD3D11&)fence).QueueWait((CommandQueueD3D11&)commandQueue, value); -} - static void NRI_CALL Wait(Fence& fence, uint64_t value) { ((FenceD3D11&)fence).Wait(value); } @@ -24,4 +16,4 @@ static void NRI_CALL SetFenceDebugName(Fence& fence, const char* name) { #pragma endregion -Define_Core_Fence_PartiallyFillFunctionTable(D3D11) +Define_Core_Fence_PartiallyFillFunctionTable(D3D11); diff --git a/Source/D3D11/MemoryD3D11.h b/Source/D3D11/MemoryD3D11.h index 617dfb99..9b3a77cf 100644 --- a/Source/D3D11/MemoryD3D11.h +++ b/Source/D3D11/MemoryD3D11.h @@ -40,7 +40,7 @@ struct MemoryD3D11 { MaybeUnused(name); } - private: +private: DeviceD3D11& m_Device; MemoryLocation m_Location; MemoryResidencyPriority m_ResidencyPriority = MemoryResidencyPriority::DEFAULT; diff --git a/Source/D3D11/PipelineD3D11.h b/Source/D3D11/PipelineD3D11.h index aa4d6f78..61895d99 100644 --- a/Source/D3D11/PipelineD3D11.h +++ b/Source/D3D11/PipelineD3D11.h @@ -46,12 +46,12 @@ struct PipelineD3D11 { void SetDebugName(const char* name); - private: +private: inline bool IsCompute() const { return m_ComputeShader != nullptr; } - private: +private: DeviceD3D11& m_Device; const PipelineLayoutD3D11* m_PipelineLayout = nullptr; Vector m_InputAssemplyStrides; diff --git a/Source/D3D11/PipelineLayoutD3D11.h b/Source/D3D11/PipelineLayoutD3D11.h index 5a97c941..7a2829f5 100644 --- a/Source/D3D11/PipelineLayoutD3D11.h +++ b/Source/D3D11/PipelineLayoutD3D11.h @@ -41,8 +41,8 @@ struct BindingData { }; struct PipelineLayoutD3D11 { - inline PipelineLayoutD3D11(DeviceD3D11& device) - : m_Device(device), m_BindingSets(device.GetStdAllocator()), m_BindingRanges(device.GetStdAllocator()), m_ConstantBuffers(device.GetStdAllocator()) { + inline PipelineLayoutD3D11(DeviceD3D11& device) : + m_Device(device), m_BindingSets(device.GetStdAllocator()), m_BindingRanges(device.GetStdAllocator()), m_ConstantBuffers(device.GetStdAllocator()) { } inline DeviceD3D11& GetDevice() const { @@ -71,7 +71,7 @@ struct PipelineLayoutD3D11 { MaybeUnused(name); } - private: +private: template void BindDescriptorSetImpl(BindingState& currentBindingState, ID3D11DeviceContextBest* deferredContext, uint32_t setIndexInPipelineLayout, const DescriptorSetD3D11& descriptorSet, const uint32_t* dynamicConstantBufferOffsets) const; diff --git a/Source/D3D11/QueryPoolD3D11.cpp b/Source/D3D11/QueryPoolD3D11.cpp index 715bf683..71e90049 100644 --- a/Source/D3D11/QueryPoolD3D11.cpp +++ b/Source/D3D11/QueryPoolD3D11.cpp @@ -8,14 +8,14 @@ using namespace nri; Result QueryPoolD3D11::Create(const QueryPoolDesc& queryPoolDesc) { D3D11_QUERY_DESC queryDesc = {}; - if (queryPoolDesc.queryType == QueryType::TIMESTAMP) + if (queryPoolDesc.queryType == QueryType::TIMESTAMP || queryPoolDesc.queryType == QueryType::TIMESTAMP_COPY_QUEUE) queryDesc.Query = D3D11_QUERY_TIMESTAMP; else if (queryPoolDesc.queryType == QueryType::OCCLUSION) queryDesc.Query = D3D11_QUERY_OCCLUSION; else if (queryPoolDesc.queryType == QueryType::PIPELINE_STATISTICS) queryDesc.Query = D3D11_QUERY_PIPELINE_STATISTICS; else - return Result::INVALID_ARGUMENT; + return queryPoolDesc.queryType < QueryType::MAX_NUM ? Result::UNSUPPORTED : Result::INVALID_ARGUMENT; m_Type = queryPoolDesc.queryType; diff --git a/Source/D3D11/QueryPoolD3D11.h b/Source/D3D11/QueryPoolD3D11.h index 739e6754..c701b74f 100644 --- a/Source/D3D11/QueryPoolD3D11.h +++ b/Source/D3D11/QueryPoolD3D11.h @@ -32,7 +32,7 @@ struct QueryPoolD3D11 { return m_Type == QueryType::PIPELINE_STATISTICS ? sizeof(D3D11_QUERY_DATA_PIPELINE_STATISTICS) : sizeof(uint64_t); } - private: +private: DeviceD3D11& m_Device; Vector> m_QueryPool; QueryType m_Type = QueryType::MAX_NUM; diff --git a/Source/D3D11/QueryPoolD3D11.hpp b/Source/D3D11/QueryPoolD3D11.hpp index 4c5ff4b1..e243cd13 100644 --- a/Source/D3D11/QueryPoolD3D11.hpp +++ b/Source/D3D11/QueryPoolD3D11.hpp @@ -12,4 +12,4 @@ static uint32_t NRI_CALL GetQuerySize(const QueryPool& queryPool) { #pragma endregion -Define_Core_QueryPool_PartiallyFillFunctionTable(D3D11) +Define_Core_QueryPool_PartiallyFillFunctionTable(D3D11); diff --git a/Source/D3D11/SharedD3D11.h b/Source/D3D11/SharedD3D11.h index d432ea4f..0eee08d7 100644 --- a/Source/D3D11/SharedD3D11.h +++ b/Source/D3D11/SharedD3D11.h @@ -11,12 +11,8 @@ struct AGSContext; struct ID3D11DeviceContext4; typedef ID3D11DeviceContext4 ID3D11DeviceContextBest; -#define SHADER_EXT_UAV_SLOT 63 // TODO: D3D 11.1 assumed - namespace nri { -struct D3D11Extensions; - constexpr Mip_t NULL_TEXTURE_REGION_DESC = Mip_t(-1); enum class BufferType { @@ -189,5 +185,11 @@ struct SamplePositionsState { } // namespace nri -#include "D3D11Extensions.h" +#include "amdags/ags_lib/inc/amd_ags.h" +#include "nvapi/nvapi.h" + +namespace d3d11 { +#include "D3DExt.h" +} + #include "DeviceD3D11.h" diff --git a/Source/D3D11/SwapChainD3D11.cpp b/Source/D3D11/SwapChainD3D11.cpp index 61a0404d..4b5c8d77 100644 --- a/Source/D3D11/SwapChainD3D11.cpp +++ b/Source/D3D11/SwapChainD3D11.cpp @@ -82,7 +82,9 @@ Result SwapChainD3D11::Create(const SwapChainDesc& swapChainDesc) { desc.Scaling = DXGI_SCALING_NONE; desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; - desc.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + + if (swapChainDesc.waitable) + desc.Flags |= DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; if (isTearingAllowed) desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; @@ -120,21 +122,33 @@ Result SwapChainD3D11::Create(const SwapChainDesc& swapChainDesc) { } // Maximum frame latency - if (m_Version >= 2) { + uint8_t queuedFrameNum = swapChainDesc.queuedFrameNum; + if (queuedFrameNum == 0) + queuedFrameNum = swapChainDesc.waitable ? 1 : 2; + + if (swapChainDesc.waitable && m_Version >= 2) { + // https://docs.microsoft.com/en-us/windows/uwp/gaming/reduce-latency-with-dxgi-1-3-swap-chains#step-4-wait-before-rendering-each-frame // IMPORTANT: SetMaximumFrameLatency must be called BEFORE GetFrameLatencyWaitableObject! - hr = m_SwapChain->SetMaximumFrameLatency(swapChainDesc.textureNum); + hr = m_SwapChain->SetMaximumFrameLatency(queuedFrameNum); RETURN_ON_BAD_HRESULT(&m_Device, hr, "IDXGISwapChain2::SetMaximumFrameLatency()"); m_FrameLatencyWaitableObject = m_SwapChain->GetFrameLatencyWaitableObject(); + } else { + ComPtr dxgiDevice1; + hr = m_Device->QueryInterface(IID_PPV_ARGS(&dxgiDevice1)); + if (SUCCEEDED(hr)) + dxgiDevice1->SetMaximumFrameLatency(queuedFrameNum); } // Finalize + m_PresentId = GetSwapChainId(); m_Flags = desc.Flags; - m_SwapChainDesc = swapChainDesc; - m_SwapChainDesc.textureNum = 1; // IMPORTANT: only 1 texture is available in D3D11 + m_Desc = swapChainDesc; + m_Desc.textureNum = 1; // IMPORTANT: only 1 texture is available in D3D11 + m_Desc.allowLowLatency = swapChainDesc.allowLowLatency && m_Device.GetExt()->HasNVAPI(); - m_Textures.reserve(m_SwapChainDesc.textureNum); - for (uint32_t i = 0; i < m_SwapChainDesc.textureNum; i++) { + m_Textures.reserve(m_Desc.textureNum); + for (uint32_t i = 0; i < m_Desc.textureNum; i++) { ComPtr textureNative; hr = m_SwapChain->GetBuffer(i, IID_PPV_ARGS(&textureNative)); RETURN_ON_BAD_HRESULT(&m_Device, hr, "IDXGISwapChain::GetBuffer()"); @@ -158,28 +172,93 @@ Result SwapChainD3D11::Create(const SwapChainDesc& swapChainDesc) { //================================================================================================================ inline Texture* const* SwapChainD3D11::GetTextures(uint32_t& textureNum) const { - textureNum = m_SwapChainDesc.textureNum; + textureNum = m_Desc.textureNum; return (Texture**)m_Textures.data(); } inline uint32_t SwapChainD3D11::AcquireNextTexture() { - // https://docs.microsoft.com/en-us/windows/uwp/gaming/reduce-latency-with-dxgi-1-3-swap-chains#step-4-wait-before-rendering-each-frame + return 0; // IMPORTANT: only 1 texture is available in D3D11 +} + +inline Result SwapChainD3D11::WaitForPresent() { if (m_FrameLatencyWaitableObject) { - uint32_t result = WaitForSingleObjectEx(m_FrameLatencyWaitableObject, DEFAULT_TIMEOUT, TRUE); - if (result != WAIT_OBJECT_0) - REPORT_ERROR(&m_Device, "WaitForSingleObjectEx(): failed, result = 0x%08X!", result); + uint32_t result = WaitForSingleObjectEx(m_FrameLatencyWaitableObject, TIMEOUT_PRESENT, TRUE); + return result == WAIT_OBJECT_0 ? Result::SUCCESS : Result::FAILURE; } - return 0; // IMPORTANT: only 1 texture is available in D3D11 + return Result::UNSUPPORTED; } inline Result SwapChainD3D11::Present() { - uint32_t flags = (!m_SwapChainDesc.verticalSyncInterval && (m_Flags & DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING)) ? DXGI_PRESENT_ALLOW_TEARING : 0; // TODO: and not fullscreen - HRESULT hr = m_SwapChain->Present(m_SwapChainDesc.verticalSyncInterval, flags); + if (m_Desc.allowLowLatency) + SetLatencyMarker((LatencyMarker)PRESENT_START); + + uint32_t flags = (!m_Desc.verticalSyncInterval && (m_Flags & DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING)) ? DXGI_PRESENT_ALLOW_TEARING : 0; + HRESULT hr = m_SwapChain->Present(m_Desc.verticalSyncInterval, flags); RETURN_ON_BAD_HRESULT(&m_Device, hr, "IDXGISwapChain::Present()"); + if (m_Desc.allowLowLatency) + SetLatencyMarker((LatencyMarker)PRESENT_END); + + m_PresentId++; + return Result::SUCCESS; } +inline Result SwapChainD3D11::SetLatencySleepMode(const LatencySleepMode& latencySleepMode) { + NV_SET_SLEEP_MODE_PARAMS params = {NV_SET_SLEEP_MODE_PARAMS_VER}; + params.bLowLatencyMode = latencySleepMode.lowLatencyMode; + params.bLowLatencyBoost = latencySleepMode.lowLatencyBoost; + params.minimumIntervalUs = latencySleepMode.minIntervalUs; + params.bUseMarkersToOptimize = true; + + NvAPI_Status status = NvAPI_D3D_SetSleepMode(m_Device.GetNativeObject(), ¶ms); + + return status == NVAPI_OK ? Result::SUCCESS : Result::FAILURE; +} + +inline Result SwapChainD3D11::SetLatencyMarker(LatencyMarker latencyMarker) { + NV_LATENCY_MARKER_PARAMS params = {NV_LATENCY_MARKER_PARAMS_VER}; + params.frameID = m_PresentId; + params.markerType = (NV_LATENCY_MARKER_TYPE)latencyMarker; + + NvAPI_Status status = NvAPI_D3D_SetLatencyMarker(m_Device.GetNativeObject(), ¶ms); + + return status == NVAPI_OK ? Result::SUCCESS : Result::FAILURE; +} + +inline Result SwapChainD3D11::LatencySleep() { + NvAPI_Status status = NvAPI_D3D_Sleep(m_Device.GetNativeObject()); + + return status == NVAPI_OK ? Result::SUCCESS : Result::FAILURE; +} + +inline Result SwapChainD3D11::GetLatencyReport(LatencyReport& latencyReport) { + NV_LATENCY_RESULT_PARAMS params = {NV_LATENCY_RESULT_PARAMS_VER}; + NvAPI_Status status = NvAPI_D3D_GetLatency(m_Device.GetNativeObject(), ¶ms); + + latencyReport = {}; + if (status == NVAPI_OK) { + const uint32_t i = 63; // the most recent frame + latencyReport.inputSampleTimeUs = params.frameReport[i].inputSampleTime; + latencyReport.simulationStartTimeUs = params.frameReport[i].simStartTime; + latencyReport.simulationEndTimeUs = params.frameReport[i].simEndTime; + latencyReport.renderSubmitStartTimeUs = params.frameReport[i].renderSubmitStartTime; + latencyReport.renderSubmitEndTimeUs = params.frameReport[i].renderSubmitEndTime; + latencyReport.presentStartTimeUs = params.frameReport[i].presentStartTime; + latencyReport.presentEndTimeUs = params.frameReport[i].presentEndTime; + latencyReport.driverStartTimeUs = params.frameReport[i].driverStartTime; + latencyReport.driverEndTimeUs = params.frameReport[i].driverEndTime; + latencyReport.osRenderQueueStartTimeUs = params.frameReport[i].osRenderQueueStartTime; + latencyReport.osRenderQueueEndTimeUs = params.frameReport[i].osRenderQueueEndTime; + latencyReport.gpuRenderStartTimeUs = params.frameReport[i].gpuRenderStartTime; + latencyReport.gpuRenderEndTimeUs = params.frameReport[i].gpuRenderEndTime; + + return Result::SUCCESS; + } + + return Result::FAILURE; +} + #include "SwapChainD3D11.hpp" diff --git a/Source/D3D11/SwapChainD3D11.h b/Source/D3D11/SwapChainD3D11.h index ad83eaf2..929016fd 100644 --- a/Source/D3D11/SwapChainD3D11.h +++ b/Source/D3D11/SwapChainD3D11.h @@ -31,19 +31,26 @@ struct SwapChainD3D11 : public DisplayDescHelper { } inline Result GetDisplayDesc(DisplayDesc& displayDesc) { - return DisplayDescHelper::GetDisplayDesc(m_SwapChainDesc.window.windows.hwnd, displayDesc); + return DisplayDescHelper::GetDisplayDesc(m_Desc.window.windows.hwnd, displayDesc); } Texture* const* GetTextures(uint32_t& textureNum) const; uint32_t AcquireNextTexture(); + Result WaitForPresent(); Result Present(); - private: + Result SetLatencySleepMode(const LatencySleepMode& latencySleepMode); + Result SetLatencyMarker(LatencyMarker latencyMarker); + Result LatencySleep(); + Result GetLatencyReport(LatencyReport& latencyReport); + +private: DeviceD3D11& m_Device; ComPtr m_SwapChain; Vector m_Textures; - SwapChainDesc m_SwapChainDesc = {}; + SwapChainDesc m_Desc = {}; HANDLE m_FrameLatencyWaitableObject = nullptr; + uint64_t m_PresentId = 0; UINT m_Flags = 0; uint8_t m_Version = 0; }; diff --git a/Source/D3D11/SwapChainD3D11.hpp b/Source/D3D11/SwapChainD3D11.hpp index 76e58fb3..816ff571 100644 --- a/Source/D3D11/SwapChainD3D11.hpp +++ b/Source/D3D11/SwapChainD3D11.hpp @@ -14,7 +14,11 @@ static uint32_t NRI_CALL AcquireNextSwapChainTexture(SwapChain& swapChain) { return ((SwapChainD3D11&)swapChain).AcquireNextTexture(); } -static Result NRI_CALL SwapChainPresent(SwapChain& swapChain) { +static Result NRI_CALL WaitForPresent(SwapChain& swapChain) { + return ((SwapChainD3D11&)swapChain).WaitForPresent(); +} + +static Result NRI_CALL QueuePresent(SwapChain& swapChain) { return ((SwapChainD3D11&)swapChain).Present(); } @@ -24,4 +28,25 @@ static Result NRI_CALL GetDisplayDesc(SwapChain& swapChain, DisplayDesc& display #pragma endregion -Define_SwapChain_PartiallyFillFunctionTable(D3D11) +#pragma region[ Low latency ] + +static Result SetLatencySleepMode(SwapChain& swapChain, const LatencySleepMode& latencySleepMode) { + return ((SwapChainD3D11&)swapChain).SetLatencySleepMode(latencySleepMode); +} + +static Result SetLatencyMarker(SwapChain& swapChain, LatencyMarker latencyMarker) { + return ((SwapChainD3D11&)swapChain).SetLatencyMarker(latencyMarker); +} + +static Result LatencySleep(SwapChain& swapChain) { + return ((SwapChainD3D11&)swapChain).LatencySleep(); +} + +static Result GetLatencyReport(const SwapChain& swapChain, LatencyReport& latencyReport) { + return ((SwapChainD3D11&)swapChain).GetLatencyReport(latencyReport); +} + +#pragma endregion + +Define_SwapChain_PartiallyFillFunctionTable(D3D11); +Define_LowLatency_SwapChain_PartiallyFillFunctionTable(D3D11); diff --git a/Source/D3D11/TextureD3D11.cpp b/Source/D3D11/TextureD3D11.cpp index 5b2196ba..f6506081 100644 --- a/Source/D3D11/TextureD3D11.cpp +++ b/Source/D3D11/TextureD3D11.cpp @@ -138,21 +138,6 @@ uint32_t TextureD3D11::GetMipmappedSize(uint32_t w, uint32_t h, uint32_t d, Mip_ return size; } -Dim_t TextureD3D11::GetSize(Dim_t dimensionIndex, Mip_t mip) const { - assert(dimensionIndex < 3); - - Dim_t dim = m_Desc.depth; - if (dimensionIndex == 0) - dim = m_Desc.width; - else if (dimensionIndex == 1) - dim = m_Desc.height; - - dim = (Dim_t)std::max(dim >> mip, 1); - dim = Align(dim, dimensionIndex < 2 ? GetFormatProps(m_Desc.format).blockWidth : 1); - - return dim; -} - //================================================================================================================ // NRI //================================================================================================================ diff --git a/Source/D3D11/TextureD3D11.h b/Source/D3D11/TextureD3D11.h index 0b130b59..992c8a16 100644 --- a/Source/D3D11/TextureD3D11.h +++ b/Source/D3D11/TextureD3D11.h @@ -45,10 +45,13 @@ struct TextureD3D11 { return regionDesc.mipOffset + regionDesc.arrayOffset * m_Desc.mipNum; } + inline Dim_t GetSize(Dim_t dimensionIndex, Mip_t mip = 0) const { + return GetDimension(GraphicsAPI::D3D11, m_Desc, dimensionIndex, mip); + } + Result Create(const MemoryD3D11* memory); Result Create(const TextureD3D11Desc& textureDesc); uint32_t GetMipmappedSize(uint32_t w = 0, uint32_t h = 0, uint32_t d = 0, Mip_t mipNum = 0, Mip_t mipOffset = 0) const; - Dim_t GetSize(Dim_t dimensionIndex, Mip_t mip = 0) const; //================================================================================================================ // NRI @@ -60,7 +63,7 @@ struct TextureD3D11 { void GetMemoryInfo(MemoryLocation memoryLocation, MemoryDesc& memoryDesc) const; - private: +private: DeviceD3D11& m_Device; ComPtr m_Texture; TextureDesc m_Desc = {}; diff --git a/Source/D3D11/TextureD3D11.hpp b/Source/D3D11/TextureD3D11.hpp index b15d0e45..0969f907 100644 --- a/Source/D3D11/TextureD3D11.hpp +++ b/Source/D3D11/TextureD3D11.hpp @@ -19,4 +19,4 @@ static void NRI_CALL GetTextureMemoryInfo(const Texture& texture, MemoryLocation #pragma endregion -Define_Core_Texture_PartiallyFillFunctionTable(D3D11) +Define_Core_Texture_PartiallyFillFunctionTable(D3D11); diff --git a/Source/D3D12/AccelerationStructureD3D12.cpp b/Source/D3D12/AccelerationStructureD3D12.cpp index 88e4fa25..b5641505 100644 --- a/Source/D3D12/AccelerationStructureD3D12.cpp +++ b/Source/D3D12/AccelerationStructureD3D12.cpp @@ -8,8 +8,7 @@ using namespace nri; AccelerationStructureD3D12::~AccelerationStructureD3D12() { - if (m_Buffer) - Deallocate(m_Device.GetStdAllocator(), m_Buffer); + Deallocate(m_Device.GetStdAllocator(), m_Buffer); } Result AccelerationStructureD3D12::Create(const AccelerationStructureD3D12Desc& accelerationStructureDesc) { diff --git a/Source/D3D12/AccelerationStructureD3D12.h b/Source/D3D12/AccelerationStructureD3D12.h index 75163dd0..666e74db 100644 --- a/Source/D3D12/AccelerationStructureD3D12.h +++ b/Source/D3D12/AccelerationStructureD3D12.h @@ -38,7 +38,7 @@ struct AccelerationStructureD3D12 { void SetDebugName(const char* name); - private: +private: DeviceD3D12& m_Device; D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO m_PrebuildInfo = {}; BufferD3D12* m_Buffer = nullptr; diff --git a/Source/D3D12/AccelerationStructureD3D12.hpp b/Source/D3D12/AccelerationStructureD3D12.hpp index 148984ed..ed4e657b 100644 --- a/Source/D3D12/AccelerationStructureD3D12.hpp +++ b/Source/D3D12/AccelerationStructureD3D12.hpp @@ -32,4 +32,4 @@ static uint64_t NRI_CALL GetAccelerationStructureNativeObject(const Acceleration #pragma endregion -Define_RayTracing_AccelerationStructure_PartiallyFillFunctionTable(D3D12) +Define_RayTracing_AccelerationStructure_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D12/BufferD3D12.h b/Source/D3D12/BufferD3D12.h index 1a05b5ed..00031dd7 100644 --- a/Source/D3D12/BufferD3D12.h +++ b/Source/D3D12/BufferD3D12.h @@ -54,7 +54,7 @@ struct BufferD3D12 { void* Map(uint64_t offset, uint64_t size); void Unmap(); - private: +private: DeviceD3D12& m_Device; ComPtr m_Buffer; BufferDesc m_Desc = {}; diff --git a/Source/D3D12/BufferD3D12.hpp b/Source/D3D12/BufferD3D12.hpp index 92f8537a..b0c77e6c 100644 --- a/Source/D3D12/BufferD3D12.hpp +++ b/Source/D3D12/BufferD3D12.hpp @@ -27,4 +27,4 @@ static void NRI_CALL UnmapBuffer(Buffer& buffer) { #pragma endregion -Define_Core_Buffer_PartiallyFillFunctionTable(D3D12) +Define_Core_Buffer_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D12/CommandAllocatorD3D12.h b/Source/D3D12/CommandAllocatorD3D12.h index cb3481b7..ff68027a 100644 --- a/Source/D3D12/CommandAllocatorD3D12.h +++ b/Source/D3D12/CommandAllocatorD3D12.h @@ -37,7 +37,7 @@ struct CommandAllocatorD3D12 { Result CreateCommandBuffer(CommandBuffer*& commandBuffer); void Reset(); - private: +private: DeviceD3D12& m_Device; ComPtr m_CommandAllocator; D3D12_COMMAND_LIST_TYPE m_CommandListType = D3D12_COMMAND_LIST_TYPE(-1); diff --git a/Source/D3D12/CommandAllocatorD3D12.hpp b/Source/D3D12/CommandAllocatorD3D12.hpp index 4f3ddbac..960b2f45 100644 --- a/Source/D3D12/CommandAllocatorD3D12.hpp +++ b/Source/D3D12/CommandAllocatorD3D12.hpp @@ -16,4 +16,4 @@ static void NRI_CALL ResetCommandAllocator(CommandAllocator& commandAllocator) { #pragma endregion -Define_Core_CommandAllocator_PartiallyFillFunctionTable(D3D12) +Define_Core_CommandAllocator_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D12/CommandBufferD3D12.h b/Source/D3D12/CommandBufferD3D12.h index 3860dadf..5d531009 100644 --- a/Source/D3D12/CommandBufferD3D12.h +++ b/Source/D3D12/CommandBufferD3D12.h @@ -6,8 +6,8 @@ struct ID3D12CommandAllocator; struct ID3D12Resource; #ifdef NRI_USE_AGILITY_SDK -struct ID3D12GraphicsCommandList9; -typedef ID3D12GraphicsCommandList9 ID3D12GraphicsCommandListBest; +struct ID3D12GraphicsCommandList10; +typedef ID3D12GraphicsCommandList10 ID3D12GraphicsCommandListBest; #else struct ID3D12GraphicsCommandList6; typedef ID3D12GraphicsCommandList6 ID3D12GraphicsCommandListBest; @@ -101,7 +101,7 @@ struct CommandBufferD3D12 { void DrawMeshTasks(const DrawMeshTasksDesc& drawMeshTasksDesc); void DrawMeshTasksIndirect(const Buffer& buffer, uint64_t offset, uint32_t drawNum, uint32_t stride); - private: +private: DeviceD3D12& m_Device; ComPtr m_CommandAllocator; ComPtr m_GraphicsCommandList; diff --git a/Source/D3D12/CommandBufferD3D12.hpp b/Source/D3D12/CommandBufferD3D12.hpp index 29e08066..12e16413 100644 --- a/Source/D3D12/CommandBufferD3D12.hpp +++ b/Source/D3D12/CommandBufferD3D12.hpp @@ -229,5 +229,6 @@ static void NRI_CALL CmdDrawMeshTasksIndirect(CommandBuffer& commandBuffer, cons #pragma endregion -Define_Core_CommandBuffer_PartiallyFillFunctionTable(D3D12) Define_RayTracing_CommandBuffer_PartiallyFillFunctionTable(D3D12) - Define_MeshShader_CommandBuffer_PartiallyFillFunctionTable(D3D12) +Define_Core_CommandBuffer_PartiallyFillFunctionTable(D3D12); +Define_RayTracing_CommandBuffer_PartiallyFillFunctionTable(D3D12); +Define_MeshShader_CommandBuffer_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D12/CommandQueueD3D12.cpp b/Source/D3D12/CommandQueueD3D12.cpp index cd9e1aad..55f3df28 100644 --- a/Source/D3D12/CommandQueueD3D12.cpp +++ b/Source/D3D12/CommandQueueD3D12.cpp @@ -4,6 +4,9 @@ #include "CommandBufferD3D12.h" #include "CommandQueueD3D12.h" +#include "FenceD3D12.h" +#include "HelperDataUpload.h" +#include "HelperWaitIdle.h" using namespace nri; @@ -36,6 +39,12 @@ Result CommandQueueD3D12::Create(ID3D12CommandQueue* commandQueue) { //================================================================================================================ inline void CommandQueueD3D12::Submit(const QueueSubmitDesc& queueSubmitDesc) { + for (uint32_t i = 0; i < queueSubmitDesc.waitFenceNum; i++) { + const FenceSubmitDesc& fenceSubmitDesc = queueSubmitDesc.waitFences[i]; + FenceD3D12* fence = (FenceD3D12*)fenceSubmitDesc.fence; + fence->QueueWait(*this, fenceSubmitDesc.value); + } + if (queueSubmitDesc.commandBufferNum) { ID3D12CommandList** commandLists = STACK_ALLOC(ID3D12CommandList*, queueSubmitDesc.commandBufferNum); for (uint32_t j = 0; j < queueSubmitDesc.commandBufferNum; j++) @@ -43,6 +52,12 @@ inline void CommandQueueD3D12::Submit(const QueueSubmitDesc& queueSubmitDesc) { m_CommandQueue->ExecuteCommandLists(queueSubmitDesc.commandBufferNum, commandLists); } + + for (uint32_t i = 0; i < queueSubmitDesc.signalFenceNum; i++) { + const FenceSubmitDesc& fenceSubmitDesc = queueSubmitDesc.signalFences[i]; + FenceD3D12* fence = (FenceD3D12*)fenceSubmitDesc.fence; + fence->QueueSignal(*this, fenceSubmitDesc.value); + } } inline Result CommandQueueD3D12::UploadData( @@ -53,9 +68,7 @@ inline Result CommandQueueD3D12::UploadData( } inline Result CommandQueueD3D12::WaitForIdle() { - HelperWaitIdle helperWaitIdle(m_Device.GetCoreInterface(), (Device&)m_Device, (CommandQueue&)*this); - - return helperWaitIdle.WaitIdle(); + return WaitIdle(m_Device.GetCoreInterface(), (Device&)m_Device, (CommandQueue&)*this); } #include "CommandQueueD3D12.hpp" diff --git a/Source/D3D12/CommandQueueD3D12.h b/Source/D3D12/CommandQueueD3D12.h index 457906f1..88d0ebfa 100644 --- a/Source/D3D12/CommandQueueD3D12.h +++ b/Source/D3D12/CommandQueueD3D12.h @@ -44,7 +44,7 @@ struct CommandQueueD3D12 { Result UploadData(const TextureUploadDesc* textureUploadDescs, uint32_t textureUploadDescNum, const BufferUploadDesc* bufferUploadDescs, uint32_t bufferUploadDescNum); Result WaitForIdle(); - private: +private: DeviceD3D12& m_Device; ComPtr m_CommandQueue; D3D12_COMMAND_LIST_TYPE m_CommandListType = D3D12_COMMAND_LIST_TYPE(-1); diff --git a/Source/D3D12/CommandQueueD3D12.hpp b/Source/D3D12/CommandQueueD3D12.hpp index 1cf2f44f..8d126c0f 100644 --- a/Source/D3D12/CommandQueueD3D12.hpp +++ b/Source/D3D12/CommandQueueD3D12.hpp @@ -28,4 +28,16 @@ static Result NRI_CALL WaitForIdle(CommandQueue& commandQueue) { #pragma endregion -Define_Core_CommandQueue_PartiallyFillFunctionTable(D3D12) Define_Helper_CommandQueue_PartiallyFillFunctionTable(D3D12) +#pragma region[ Low latency ] + +static void NRI_CALL QueueSubmitTrackable(CommandQueue& commandQueue, const QueueSubmitDesc& workSubmissionDesc, const SwapChain& swapChain) { + MaybeUnused(swapChain); + + ((CommandQueueD3D12&)commandQueue).Submit(workSubmissionDesc); +} + +#pragma endregion + +Define_Core_CommandQueue_PartiallyFillFunctionTable(D3D12); +Define_Helper_CommandQueue_PartiallyFillFunctionTable(D3D12); +Define_LowLatency_CommandQueue_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D12/DescriptorD3D12.h b/Source/D3D12/DescriptorD3D12.h index 84645b72..697aa9c0 100644 --- a/Source/D3D12/DescriptorD3D12.h +++ b/Source/D3D12/DescriptorD3D12.h @@ -51,14 +51,14 @@ struct DescriptorD3D12 { MaybeUnused(name); } - private: +private: Result CreateConstantBufferView(const D3D12_CONSTANT_BUFFER_VIEW_DESC& desc); Result CreateShaderResourceView(ID3D12Resource* resource, const D3D12_SHADER_RESOURCE_VIEW_DESC& desc); Result CreateUnorderedAccessView(ID3D12Resource* resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC& desc, Format format); Result CreateRenderTargetView(ID3D12Resource* resource, const D3D12_RENDER_TARGET_VIEW_DESC& desc); Result CreateDepthStencilView(ID3D12Resource* resource, const D3D12_DEPTH_STENCIL_VIEW_DESC& desc); - private: +private: DeviceD3D12& m_Device; ID3D12Resource* m_Resource = nullptr; D3D12_GPU_VIRTUAL_ADDRESS m_BufferLocation = 0; diff --git a/Source/D3D12/DescriptorD3D12.hpp b/Source/D3D12/DescriptorD3D12.hpp index 0e698d9a..abdb023f 100644 --- a/Source/D3D12/DescriptorD3D12.hpp +++ b/Source/D3D12/DescriptorD3D12.hpp @@ -15,4 +15,4 @@ static uint64_t NRI_CALL GetDescriptorNativeObject(const Descriptor& descriptor) #pragma endregion -Define_Core_Descriptor_PartiallyFillFunctionTable(D3D12) +Define_Core_Descriptor_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D12/DescriptorPoolD3D12.h b/Source/D3D12/DescriptorPoolD3D12.h index 3739a5f3..61e5d976 100644 --- a/Source/D3D12/DescriptorPoolD3D12.h +++ b/Source/D3D12/DescriptorPoolD3D12.h @@ -36,7 +36,7 @@ struct DescriptorPoolD3D12 { const PipelineLayout& pipelineLayout, uint32_t setIndexInPipelineLayout, DescriptorSet** descriptorSets, uint32_t instanceNum, uint32_t variableDescriptorNum); void Reset(); - private: +private: DeviceD3D12& m_Device; std::array m_DescriptorHeapDescs; std::array m_DescriptorNum = {}; diff --git a/Source/D3D12/DescriptorPoolD3D12.hpp b/Source/D3D12/DescriptorPoolD3D12.hpp index bb4a12dd..e17ae299 100644 --- a/Source/D3D12/DescriptorPoolD3D12.hpp +++ b/Source/D3D12/DescriptorPoolD3D12.hpp @@ -17,4 +17,4 @@ static void NRI_CALL ResetDescriptorPool(DescriptorPool& descriptorPool) { #pragma endregion -Define_Core_DescriptorPool_PartiallyFillFunctionTable(D3D12) +Define_Core_DescriptorPool_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D12/DescriptorSetD3D12.cpp b/Source/D3D12/DescriptorSetD3D12.cpp index 1ec2b3ee..a3c8a0c2 100644 --- a/Source/D3D12/DescriptorSetD3D12.cpp +++ b/Source/D3D12/DescriptorSetD3D12.cpp @@ -9,8 +9,8 @@ using namespace nri; -DescriptorSetD3D12::DescriptorSetD3D12(DescriptorPoolD3D12& desriptorPoolD3D12) - : m_DescriptorPoolD3D12(desriptorPoolD3D12), m_DynamicConstantBuffers(desriptorPoolD3D12.GetDevice().GetStdAllocator()) { +DescriptorSetD3D12::DescriptorSetD3D12(DescriptorPoolD3D12& desriptorPoolD3D12) : + m_DescriptorPoolD3D12(desriptorPoolD3D12), m_DynamicConstantBuffers(desriptorPoolD3D12.GetDevice().GetStdAllocator()) { } void DescriptorSetD3D12::Initialize(const DescriptorSetMapping* descriptorSetMapping, uint16_t dynamicConstantBufferNum) { diff --git a/Source/D3D12/DescriptorSetD3D12.h b/Source/D3D12/DescriptorSetD3D12.h index 2ce8673f..2004c98d 100644 --- a/Source/D3D12/DescriptorSetD3D12.h +++ b/Source/D3D12/DescriptorSetD3D12.h @@ -44,7 +44,7 @@ struct DescriptorSetD3D12 { void UpdateDynamicConstantBuffers(uint32_t baseBuffer, uint32_t bufferNum, const Descriptor* const* descriptors); void Copy(const DescriptorSetCopyDesc& descriptorSetCopyDesc); - private: +private: DescriptorPoolD3D12& m_DescriptorPoolD3D12; Vector m_DynamicConstantBuffers; std::array m_HeapOffset = {}; diff --git a/Source/D3D12/DescriptorSetD3D12.hpp b/Source/D3D12/DescriptorSetD3D12.hpp index 20e55ea7..5a990214 100644 --- a/Source/D3D12/DescriptorSetD3D12.hpp +++ b/Source/D3D12/DescriptorSetD3D12.hpp @@ -20,4 +20,4 @@ static void NRI_CALL CopyDescriptorSet(DescriptorSet& descriptorSet, const Descr #pragma endregion -Define_Core_DescriptorSet_PartiallyFillFunctionTable(D3D12) +Define_Core_DescriptorSet_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D12/DeviceD3D12.cpp b/Source/D3D12/DeviceD3D12.cpp index dcb41002..ed660123 100644 --- a/Source/D3D12/DeviceD3D12.cpp +++ b/Source/D3D12/DeviceD3D12.cpp @@ -10,10 +10,12 @@ #include "DescriptorD3D12.h" #include "DescriptorPoolD3D12.h" #include "FenceD3D12.h" +#include "HelperDeviceMemoryAllocator.h" #include "MemoryD3D12.h" #include "PipelineD3D12.h" #include "PipelineLayoutD3D12.h" #include "QueryPoolD3D12.h" +#include "Streamer.h" #include "SwapChainD3D12.h" #include "TextureD3D12.h" @@ -22,6 +24,7 @@ using namespace nri; static uint8_t QueryLatestDevice(ComPtr& in, ComPtr& out) { static const IID versions[] = { #ifdef NRI_USE_AGILITY_SDK + __uuidof(ID3D12Device14), __uuidof(ID3D12Device13), __uuidof(ID3D12Device12), __uuidof(ID3D12Device11), @@ -54,22 +57,22 @@ Result CreateDeviceD3D12(const DeviceCreationDesc& deviceCreationDesc, DeviceBas StdAllocator allocator(deviceCreationDesc.memoryAllocatorInterface); DeviceD3D12* implementation = Allocate(allocator, deviceCreationDesc.callbackInterface, allocator); - const nri::Result result = implementation->Create(deviceCreationDesc); - if (result != nri::Result::SUCCESS) { + Result result = implementation->Create(deviceCreationDesc); + if (result != Result::SUCCESS) { Deallocate(allocator, implementation); return result; } device = (DeviceBase*)implementation; - return nri::Result::SUCCESS; + return Result::SUCCESS; } Result CreateDeviceD3D12(const DeviceCreationD3D12Desc& deviceCreationDesc, DeviceBase*& device) { StdAllocator allocator(deviceCreationDesc.memoryAllocatorInterface); DeviceD3D12* implementation = Allocate(allocator, deviceCreationDesc.callbackInterface, allocator); - const Result res = implementation->Create(deviceCreationDesc); + Result res = implementation->Create(deviceCreationDesc); if (res == Result::SUCCESS) { device = implementation; @@ -80,13 +83,13 @@ Result CreateDeviceD3D12(const DeviceCreationD3D12Desc& deviceCreationDesc, Devi return res; } -DeviceD3D12::DeviceD3D12(const CallbackInterface& callbacks, StdAllocator& stdAllocator) - : DeviceBase(callbacks, stdAllocator), - m_DescriptorHeaps(GetStdAllocator()), - m_FreeDescriptors(GetStdAllocator()), - m_DrawCommandSignatures(GetStdAllocator()), - m_DrawIndexedCommandSignatures(GetStdAllocator()), - m_DrawMeshCommandSignatures(GetStdAllocator()) { +DeviceD3D12::DeviceD3D12(const CallbackInterface& callbacks, StdAllocator& stdAllocator) : + DeviceBase(callbacks, stdAllocator), + m_DescriptorHeaps(GetStdAllocator()), + m_FreeDescriptors(GetStdAllocator()), + m_DrawCommandSignatures(GetStdAllocator()), + m_DrawIndexedCommandSignatures(GetStdAllocator()), + m_DrawMeshCommandSignatures(GetStdAllocator()) { m_FreeDescriptors.resize(DESCRIPTOR_HEAP_TYPE_NUM, Vector(GetStdAllocator())); m_Desc.graphicsAPI = GraphicsAPI::D3D12; m_Desc.nriVersionMajor = NRI_VERSION_MAJOR; @@ -103,7 +106,7 @@ DeviceD3D12::~DeviceD3D12() { template Result DeviceD3D12::CreateImplementation(Interface*& entity, const Args&... args) { Implementation* implementation = Allocate(GetStdAllocator(), *this); - const Result result = implementation->Create(args...); + Result result = implementation->Create(args...); if (result == Result::SUCCESS) { entity = (Interface*)implementation; @@ -117,6 +120,8 @@ Result DeviceD3D12::CreateImplementation(Interface*& entity, const Args&... args Result DeviceD3D12::Create(const DeviceCreationD3D12Desc& deviceCreationDesc) { ComPtr device = (ID3D12DeviceBest*)deviceCreationDesc.d3d12Device; + if (!device) + return Result::INVALID_ARGUMENT; // Get adapter ComPtr dxgiFactory; @@ -138,6 +143,12 @@ Result DeviceD3D12::Create(const DeviceCreationD3D12Desc& deviceCreationDesc) { m_Desc.adapterDesc.deviceId = desc.DeviceId; m_Desc.adapterDesc.vendor = GetVendorFromID(desc.VendorId); + // Extensions + if (m_Desc.adapterDesc.vendor == Vendor::NVIDIA) + m_Ext.InitializeNVExt(this, deviceCreationDesc.isNVAPILoaded, true); + else if (m_Desc.adapterDesc.vendor == Vendor::AMD) + m_Ext.InitializeAMDExt(this, deviceCreationDesc.agsContext, true); + // Create device m_Version = QueryLatestDevice(device, m_Device); REPORT_INFO(this, "Using ID3D12Device%u...", m_Version); @@ -201,6 +212,12 @@ Result DeviceD3D12::Create(const DeviceCreationDesc& deviceCreationDesc) { m_Desc.adapterDesc.deviceId = desc.DeviceId; m_Desc.adapterDesc.vendor = GetVendorFromID(desc.VendorId); + // Extensions + if (m_Desc.adapterDesc.vendor == Vendor::NVIDIA) + m_Ext.InitializeNVExt(this, false, false); + else if (m_Desc.adapterDesc.vendor == Vendor::AMD) + m_Ext.InitializeAMDExt(this, nullptr, false); + // Create device ComPtr device; hr = D3D12CreateDevice(m_Adapter, D3D_FEATURE_LEVEL_12_0, __uuidof(ID3D12Device), (void**)&device); @@ -399,6 +416,7 @@ void DeviceD3D12::FillDesc() { hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options3, sizeof(options3)); if (FAILED(hr)) REPORT_WARNING(this, "ID3D12Device::CheckFeatureSupport(options3) failed, result = 0x%08X!", hr); + m_Desc.isCopyQueueTimestampSupported = options3.CopyQueueTimestampQueriesSupported; D3D12_FEATURE_DATA_D3D12_OPTIONS4 options4 = {}; hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS4, &options4, sizeof(options4)); @@ -409,7 +427,7 @@ void DeviceD3D12::FillDesc() { hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS5, &options5, sizeof(options5)); if (FAILED(hr)) REPORT_WARNING(this, "ID3D12Device::CheckFeatureSupport(options5) failed, result = 0x%08X!", hr); - m_Desc.isRaytracingSupported = options5.RaytracingTier >= D3D12_RAYTRACING_TIER_1_0; + m_Desc.isRayTracingSupported = options5.RaytracingTier >= D3D12_RAYTRACING_TIER_1_0; m_Desc.isDispatchRaysIndirectSupported = options5.RaytracingTier >= D3D12_RAYTRACING_TIER_1_1; D3D12_FEATURE_DATA_D3D12_OPTIONS6 options6 = {}; @@ -490,6 +508,16 @@ void DeviceD3D12::FillDesc() { hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS19, &options19, sizeof(options19)); if (FAILED(hr)) REPORT_WARNING(this, "ID3D12Device::CheckFeatureSupport(options19) failed, result = 0x%08X!", hr); + + D3D12_FEATURE_DATA_D3D12_OPTIONS20 options20 = {}; + hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS20, &options20, sizeof(options20)); + if (FAILED(hr)) + REPORT_WARNING(this, "ID3D12Device::CheckFeatureSupport(options20) failed, result = 0x%08X!", hr); + + D3D12_FEATURE_DATA_D3D12_OPTIONS21 options21 = {}; + hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS21, &options21, sizeof(options21)); + if (FAILED(hr)) + REPORT_WARNING(this, "ID3D12Device::CheckFeatureSupport(options21) failed, result = 0x%08X!", hr); #endif const std::array levelsList = { @@ -503,7 +531,9 @@ void DeviceD3D12::FillDesc() { D3D12_FEATURE_DATA_FEATURE_LEVELS levels = {}; levels.NumFeatureLevels = (uint32_t)levelsList.size(); levels.pFeatureLevelsRequested = levelsList.data(); - m_Device->CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS, &levels, sizeof(levels)); + hr = m_Device->CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS, &levels, sizeof(levels)); + if (FAILED(hr)) + REPORT_WARNING(this, "ID3D12Device::CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS) failed, result = 0x%08X!", hr); uint64_t timestampFrequency = 0; { @@ -609,7 +639,7 @@ void DeviceD3D12::FillDesc() { m_Desc.computeShaderWorkGroupMaxDim[1] = D3D12_CS_THREAD_GROUP_MAX_Y; m_Desc.computeShaderWorkGroupMaxDim[2] = D3D12_CS_THREAD_GROUP_MAX_Z; - if (m_Desc.isRaytracingSupported) { + if (m_Desc.isRayTracingSupported) { m_Desc.rayTracingShaderGroupIdentifierSize = D3D12_RAYTRACING_SHADER_RECORD_BYTE_ALIGNMENT; m_Desc.rayTracingShaderTableAligment = D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT; m_Desc.rayTracingShaderTableMaxStride = std::numeric_limits::max(); @@ -653,13 +683,15 @@ void DeviceD3D12::FillDesc() { m_Desc.isProgrammableSampleLocationsSupported = options2.ProgrammableSamplePositionsTier != D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED; m_Desc.isComputeQueueSupported = true; m_Desc.isCopyQueueSupported = true; - m_Desc.isCopyQueueTimestampSupported = options3.CopyQueueTimestampQueriesSupported != 0; m_Desc.isRegisterAliasingSupported = true; m_Desc.isFloat16Supported = options4.Native16BitShaderOpsSupported; #ifdef NRI_USE_AGILITY_SDK m_Desc.isIndependentFrontAndBackStencilReferenceAndMasksSupported = options14.IndependentFrontAndBackStencilRefMaskSupported ? true : false; #endif m_Desc.isLineSmoothingSupported = true; + + m_Desc.isSwapChainSupported = HasOutput(); + m_Desc.isLowLatencySupported = m_Ext.HasNVAPI(); } //================================================================================================================ @@ -741,8 +773,7 @@ inline Result DeviceD3D12::CreateDescriptor(const Texture3DViewDesc& textureView return CreateImplementation(textureView, textureViewDesc); } -Result DeviceD3D12::CreateDescriptor(const AccelerationStructure& accelerationStructure, Descriptor*& accelerationStructureView) // TODO: not inline -{ +Result DeviceD3D12::CreateDescriptor(const AccelerationStructure& accelerationStructure, Descriptor*& accelerationStructureView) { // TODO: not inline return CreateImplementation(accelerationStructureView, accelerationStructure); } @@ -778,8 +809,7 @@ inline Result DeviceD3D12::CreateCommandBuffer(const CommandBufferD3D12Desc& com return CreateImplementation(commandBuffer, commandBufferDesc); } -Result DeviceD3D12::CreateBuffer(const BufferD3D12Desc& bufferDesc, Buffer*& buffer) // TODO: not inline -{ +Result DeviceD3D12::CreateBuffer(const BufferD3D12Desc& bufferDesc, Buffer*& buffer) { // TODO: not inline return CreateImplementation(buffer, bufferDesc); } @@ -872,13 +902,13 @@ inline FormatSupportBits DeviceD3D12::GetFormatSupport(Format format) const { } inline uint32_t DeviceD3D12::CalculateAllocationNumber(const ResourceGroupDesc& resourceGroupDesc) const { - HelperDeviceMemoryAllocator allocator(m_CoreInterface, (Device&)*this, m_StdAllocator); + HelperDeviceMemoryAllocator allocator(m_CoreInterface, (Device&)*this); return allocator.CalculateAllocationNumber(resourceGroupDesc); } -inline Result DeviceD3D12::AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDesc, nri::Memory** allocations) { - HelperDeviceMemoryAllocator allocator(m_CoreInterface, (Device&)*this, m_StdAllocator); +inline Result DeviceD3D12::AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDesc, Memory** allocations) { + HelperDeviceMemoryAllocator allocator(m_CoreInterface, (Device&)*this); return allocator.AllocateAndBindMemory(resourceGroupDesc, allocations); } @@ -895,4 +925,8 @@ inline void DeviceD3D12::DestroyAccelerationStructure(AccelerationStructure& acc Deallocate(GetStdAllocator(), (AccelerationStructureD3D12*)&accelerationStructure); } +namespace d3d12 { +#include "D3DExt.hpp" +} + #include "DeviceD3D12.hpp" diff --git a/Source/D3D12/DeviceD3D12.h b/Source/D3D12/DeviceD3D12.h index 0bbd9bbd..f77cd3fb 100644 --- a/Source/D3D12/DeviceD3D12.h +++ b/Source/D3D12/DeviceD3D12.h @@ -8,8 +8,8 @@ struct ID3D12CommandSignature; struct D3D12_CPU_DESCRIPTOR_HANDLE; #ifdef NRI_USE_AGILITY_SDK -struct ID3D12Device13; -typedef ID3D12Device13 ID3D12DeviceBest; +struct ID3D12Device14; +typedef ID3D12Device14 ID3D12DeviceBest; #else struct ID3D12Device5; typedef ID3D12Device5 ID3D12DeviceBest; @@ -38,6 +38,10 @@ struct DeviceD3D12 final : public DeviceBase { return m_Version; } + inline const d3d12::Ext* GetExt() const { + return &m_Ext; + } + inline IDXGIAdapter* GetAdapter() const { return m_Adapter; } @@ -144,16 +148,19 @@ struct DeviceD3D12 final : public DeviceBase { Result FillFunctionTable(RayTracingInterface& rayTracingInterface) const; Result FillFunctionTable(MeshShaderInterface& meshShaderInterface) const; Result FillFunctionTable(HelperInterface& helperInterface) const; + Result FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const; + Result FillFunctionTable(StreamerInterface& streamerInterface) const; - private: +private: void FillDesc(); MemoryType GetMemoryType(MemoryLocation memoryLocation, const D3D12_RESOURCE_DESC& resourceDesc) const; ComPtr CreateCommandSignature(D3D12_INDIRECT_ARGUMENT_TYPE indirectArgumentType, uint32_t stride); - private: +private: + d3d12::Ext m_Ext = {}; // don't sort: destructor must be called last! ComPtr m_Device; ComPtr m_Adapter; - std::array m_CommandQueues = {}; + std::array m_CommandQueues = {}; Vector m_DescriptorHeaps; Vector> m_FreeDescriptors; DeviceDesc m_Desc = {}; diff --git a/Source/D3D12/DeviceD3D12.hpp b/Source/D3D12/DeviceD3D12.hpp index 3c662489..93244178 100644 --- a/Source/D3D12/DeviceD3D12.hpp +++ b/Source/D3D12/DeviceD3D12.hpp @@ -1,9 +1,10 @@ // © 2021 NVIDIA Corporation -Declare_PartiallyFillFunctionTable_Functions(D3D12) +Declare_PartiallyFillFunctionTable_Functions(D3D12); + #pragma region[ Core ] - static const DeviceDesc& NRI_CALL GetDeviceDesc(const Device& device) { +static const DeviceDesc& NRI_CALL GetDeviceDesc(const Device& device) { return ((const DeviceD3D12&)device).GetDesc(); } @@ -255,28 +256,24 @@ Result DeviceD3D12::FillFunctionTable(CoreInterface& coreInterface) const { #pragma endregion -#pragma region[ SwapChain ] +#pragma region[ Helper ] -static Result NRI_CALL CreateSwapChain(Device& device, const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { - return ((DeviceD3D12&)device).CreateSwapChain(swapChainDesc, swapChain); +static uint32_t NRI_CALL CountAllocationNum(Device& device, const ResourceGroupDesc& resourceGroupDesc) { + return ((DeviceD3D12&)device).CalculateAllocationNumber(resourceGroupDesc); } -static void NRI_CALL DestroySwapChain(SwapChain& swapChain) { - if (!(&swapChain)) - return; - - DeviceD3D12& device = ((SwapChainD3D12&)swapChain).GetDevice(); - device.DestroySwapChain(swapChain); +static Result NRI_CALL AllocateAndBindMemory(Device& device, const ResourceGroupDesc& resourceGroupDesc, Memory** allocations) { + return ((DeviceD3D12&)device).AllocateAndBindMemory(resourceGroupDesc, allocations); } -Result DeviceD3D12::FillFunctionTable(SwapChainInterface& swapChainInterface) const { - swapChainInterface = {}; - swapChainInterface.CreateSwapChain = ::CreateSwapChain; - swapChainInterface.DestroySwapChain = ::DestroySwapChain; +Result DeviceD3D12::FillFunctionTable(HelperInterface& helperInterface) const { + helperInterface = {}; + helperInterface.CalculateAllocationNumber = ::CountAllocationNum; + helperInterface.AllocateAndBindMemory = ::AllocateAndBindMemory; - SwapChain_PartiallyFillFunctionTableD3D12(swapChainInterface); + Helper_CommandQueue_PartiallyFillFunctionTableD3D12(helperInterface); - return ValidateFunctionTable(swapChainInterface); + return ValidateFunctionTable(helperInterface); } #pragma endregion @@ -316,6 +313,35 @@ Result DeviceD3D12::FillFunctionTable(WrapperD3D12Interface& wrapperD3D12Interfa #pragma endregion +#pragma region[ SwapChain ] + +static Result NRI_CALL CreateSwapChain(Device& device, const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { + return ((DeviceD3D12&)device).CreateSwapChain(swapChainDesc, swapChain); +} + +static void NRI_CALL DestroySwapChain(SwapChain& swapChain) { + if (!(&swapChain)) + return; + + DeviceD3D12& device = ((SwapChainD3D12&)swapChain).GetDevice(); + device.DestroySwapChain(swapChain); +} + +Result DeviceD3D12::FillFunctionTable(SwapChainInterface& swapChainInterface) const { + swapChainInterface = {}; + if (!m_Desc.isSwapChainSupported) + return Result::UNSUPPORTED; + + swapChainInterface.CreateSwapChain = ::CreateSwapChain; + swapChainInterface.DestroySwapChain = ::DestroySwapChain; + + SwapChain_PartiallyFillFunctionTableD3D12(swapChainInterface); + + return ValidateFunctionTable(swapChainInterface); +} + +#pragma endregion + #pragma region[ RayTracing ] static Result NRI_CALL CreateRayTracingPipeline(Device& device, const RayTracingPipelineDesc& rayTracingPipelineDesc, Pipeline*& pipeline) { @@ -342,8 +368,7 @@ void FillFunctionTablePipelineD3D12(RayTracingInterface& rayTracingInterface); Result DeviceD3D12::FillFunctionTable(RayTracingInterface& rayTracingInterface) const { rayTracingInterface = {}; - - if (!m_Desc.isRaytracingSupported) + if (!m_Desc.isRayTracingSupported) return Result::UNSUPPORTED; FillFunctionTablePipelineD3D12(rayTracingInterface); @@ -363,11 +388,10 @@ Result DeviceD3D12::FillFunctionTable(RayTracingInterface& rayTracingInterface) #pragma region[ MeshShader ] Result DeviceD3D12::FillFunctionTable(MeshShaderInterface& meshShaderInterface) const { + meshShaderInterface = {}; if (!m_Desc.isMeshShaderSupported) return Result::UNSUPPORTED; - meshShaderInterface = {}; - MeshShader_CommandBuffer_PartiallyFillFunctionTableD3D12(meshShaderInterface); return ValidateFunctionTable(meshShaderInterface); @@ -375,24 +399,84 @@ Result DeviceD3D12::FillFunctionTable(MeshShaderInterface& meshShaderInterface) #pragma endregion -#pragma region[ Helper ] +#pragma region[ LowLatency ] -static uint32_t NRI_CALL CountAllocationNum(Device& device, const ResourceGroupDesc& resourceGroupDesc) { - return ((DeviceD3D12&)device).CalculateAllocationNumber(resourceGroupDesc); +Result DeviceD3D12::FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const { + lowLatencyInterface = {}; + if (!m_Desc.isLowLatencySupported) + return Result::UNSUPPORTED; + + LowLatency_CommandQueue_PartiallyFillFunctionTableD3D12(lowLatencyInterface); + LowLatency_SwapChain_PartiallyFillFunctionTableD3D12(lowLatencyInterface); + + return ValidateFunctionTable(lowLatencyInterface); } -static Result NRI_CALL AllocateAndBindMemory(Device& device, const ResourceGroupDesc& resourceGroupDesc, Memory** allocations) { - return ((DeviceD3D12&)device).AllocateAndBindMemory(resourceGroupDesc, allocations); +#pragma endregion + +#pragma region[ Streamer ] + +static Result CreateStreamer(Device& device, const StreamerDesc& streamerDesc, Streamer*& streamer) { + DeviceD3D12& deviceD3D12 = (DeviceD3D12&)device; + + StreamerImpl* implementation = Allocate(deviceD3D12.GetStdAllocator(), device, deviceD3D12.GetCoreInterface()); + Result res = implementation->Create(streamerDesc); + + if (res == Result::SUCCESS) { + streamer = (Streamer*)implementation; + return Result::SUCCESS; + } + + Deallocate(deviceD3D12.GetStdAllocator(), implementation); + + return res; } -Result DeviceD3D12::FillFunctionTable(HelperInterface& helperInterface) const { - helperInterface = {}; - helperInterface.CalculateAllocationNumber = ::CountAllocationNum; - helperInterface.AllocateAndBindMemory = ::AllocateAndBindMemory; +static void DestroyStreamer(Streamer& streamer) { + Deallocate(((DeviceBase&)((StreamerImpl&)streamer).GetDevice()).GetStdAllocator(), (StreamerImpl*)&streamer); +} - Helper_CommandQueue_PartiallyFillFunctionTableD3D12(helperInterface); +static Buffer* GetStreamerConstantBuffer(Streamer& streamer) { + return ((StreamerImpl&)streamer).GetConstantBuffer(); +} - return ValidateFunctionTable(helperInterface); +static uint32_t UpdateStreamerConstantBuffer(Streamer& streamer, const void* data, uint32_t dataSize) { + return ((StreamerImpl&)streamer).UpdateStreamerConstantBuffer(data, dataSize); +} + +static uint64_t AddStreamerBufferUpdateRequest(Streamer& streamer, const BufferUpdateRequestDesc& bufferUpdateRequestDesc) { + return ((StreamerImpl&)streamer).AddStreamerBufferUpdateRequest(bufferUpdateRequestDesc); +} + +static uint64_t AddStreamerTextureUpdateRequest(Streamer& streamer, const TextureUpdateRequestDesc& textureUpdateRequestDesc) { + return ((StreamerImpl&)streamer).AddStreamerTextureUpdateRequest(textureUpdateRequestDesc); +} + +static Result CopyStreamerUpdateRequests(Streamer& streamer) { + return ((StreamerImpl&)streamer).CopyStreamerUpdateRequests(); +} + +static Buffer* GetStreamerDynamicBuffer(Streamer& streamer) { + return ((StreamerImpl&)streamer).GetDynamicBuffer(); +} + +static void CmdUploadStreamerUpdateRequests(CommandBuffer& commandBuffer, Streamer& streamer) { + ((StreamerImpl&)streamer).CmdUploadStreamerUpdateRequests(commandBuffer); +} + +Result DeviceD3D12::FillFunctionTable(StreamerInterface& streamerInterface) const { + streamerInterface = {}; + streamerInterface.CreateStreamer = ::CreateStreamer; + streamerInterface.DestroyStreamer = ::DestroyStreamer; + streamerInterface.GetStreamerConstantBuffer = ::GetStreamerConstantBuffer; + streamerInterface.UpdateStreamerConstantBuffer = ::UpdateStreamerConstantBuffer; + streamerInterface.AddStreamerBufferUpdateRequest = ::AddStreamerBufferUpdateRequest; + streamerInterface.AddStreamerTextureUpdateRequest = ::AddStreamerTextureUpdateRequest; + streamerInterface.CopyStreamerUpdateRequests = ::CopyStreamerUpdateRequests; + streamerInterface.GetStreamerDynamicBuffer = ::GetStreamerDynamicBuffer; + streamerInterface.CmdUploadStreamerUpdateRequests = ::CmdUploadStreamerUpdateRequests; + + return ValidateFunctionTable(streamerInterface); } #pragma endregion diff --git a/Source/D3D12/FenceD3D12.cpp b/Source/D3D12/FenceD3D12.cpp index a399a589..5a774206 100644 --- a/Source/D3D12/FenceD3D12.cpp +++ b/Source/D3D12/FenceD3D12.cpp @@ -23,12 +23,12 @@ inline uint64_t FenceD3D12::GetFenceValue() const { return m_Fence->GetCompletedValue(); } -inline void FenceD3D12::QueueSignal(CommandQueueD3D12& commandQueue, uint64_t value) { +void FenceD3D12::QueueSignal(CommandQueueD3D12& commandQueue, uint64_t value) { HRESULT hr = ((ID3D12CommandQueue*)commandQueue)->Signal(m_Fence, value); RETURN_ON_FAILURE(&m_Device, hr == S_OK, ReturnVoid(), "ID3D12CommandQueue::Signal() - FAILED!"); } -inline void FenceD3D12::QueueWait(CommandQueueD3D12& commandQueue, uint64_t value) { +void FenceD3D12::QueueWait(CommandQueueD3D12& commandQueue, uint64_t value) { HRESULT hr = ((ID3D12CommandQueue*)commandQueue)->Wait(m_Fence, value); RETURN_ON_FAILURE(&m_Device, hr == S_OK, ReturnVoid(), "ID3D12CommandQueue::Wait() - FAILED!"); } @@ -41,7 +41,7 @@ inline void FenceD3D12::Wait(uint64_t value) { HRESULT hr = m_Fence->SetEventOnCompletion(value, m_Event); RETURN_ON_FAILURE(&m_Device, hr == S_OK, ReturnVoid(), "ID3D12Fence::SetEventOnCompletion() - FAILED!"); - uint32_t result = WaitForSingleObjectEx(m_Event, DEFAULT_TIMEOUT, TRUE); + uint32_t result = WaitForSingleObjectEx(m_Event, TIMEOUT_FENCE, TRUE); RETURN_ON_FAILURE(&m_Device, result == WAIT_OBJECT_0, ReturnVoid(), "WaitForSingleObjectEx() - FAILED!"); } } diff --git a/Source/D3D12/FenceD3D12.h b/Source/D3D12/FenceD3D12.h index f2c4b714..bc5a7e53 100644 --- a/Source/D3D12/FenceD3D12.h +++ b/Source/D3D12/FenceD3D12.h @@ -36,7 +36,7 @@ struct FenceD3D12 { void QueueWait(CommandQueueD3D12& commandQueue, uint64_t value); void Wait(uint64_t value); - private: +private: DeviceD3D12& m_Device; ComPtr m_Fence; HANDLE m_Event = 0; diff --git a/Source/D3D12/FenceD3D12.hpp b/Source/D3D12/FenceD3D12.hpp index 2c25d107..3ed9e6c0 100644 --- a/Source/D3D12/FenceD3D12.hpp +++ b/Source/D3D12/FenceD3D12.hpp @@ -6,14 +6,6 @@ static uint64_t NRI_CALL GetFenceValue(Fence& fence) { return ((FenceD3D12&)fence).GetFenceValue(); } -static void NRI_CALL QueueSignal(CommandQueue& commandQueue, Fence& fence, uint64_t value) { - return ((FenceD3D12&)fence).QueueSignal((CommandQueueD3D12&)commandQueue, value); -} - -static void NRI_CALL QueueWait(CommandQueue& commandQueue, Fence& fence, uint64_t value) { - return ((FenceD3D12&)fence).QueueWait((CommandQueueD3D12&)commandQueue, value); -} - static void NRI_CALL Wait(Fence& fence, uint64_t value) { ((FenceD3D12&)fence).Wait(value); } @@ -24,4 +16,4 @@ static void NRI_CALL SetFenceDebugName(Fence& fence, const char* name) { #pragma endregion -Define_Core_Fence_PartiallyFillFunctionTable(D3D12) +Define_Core_Fence_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D12/MemoryD3D12.h b/Source/D3D12/MemoryD3D12.h index 2b7366cb..db0eca7f 100644 --- a/Source/D3D12/MemoryD3D12.h +++ b/Source/D3D12/MemoryD3D12.h @@ -40,7 +40,7 @@ struct MemoryD3D12 { SET_D3D_DEBUG_OBJECT_NAME(m_Heap, name); } - private: +private: DeviceD3D12& m_Device; ComPtr m_Heap; D3D12_HEAP_DESC m_HeapDesc = {}; diff --git a/Source/D3D12/PipelineD3D12.h b/Source/D3D12/PipelineD3D12.h index 6071bdc8..c889b187 100644 --- a/Source/D3D12/PipelineD3D12.h +++ b/Source/D3D12/PipelineD3D12.h @@ -53,10 +53,10 @@ struct PipelineD3D12 { Result WriteShaderGroupIdentifiers(uint32_t baseShaderGroupIndex, uint32_t shaderGroupNum, void* buffer) const; - private: +private: Result CreateFromStream(const GraphicsPipelineDesc& graphicsPipelineDesc); - private: +private: DeviceD3D12& m_Device; ComPtr m_PipelineState; ComPtr m_StateObject; diff --git a/Source/D3D12/PipelineLayoutD3D12.cpp b/Source/D3D12/PipelineLayoutD3D12.cpp index a7210b18..12da7707 100644 --- a/Source/D3D12/PipelineLayoutD3D12.cpp +++ b/Source/D3D12/PipelineLayoutD3D12.cpp @@ -37,11 +37,11 @@ D3D12_ROOT_SIGNATURE_FLAGS GetRootSignatureStageFlags(const PipelineLayoutDesc& return flags; } -PipelineLayoutD3D12::PipelineLayoutD3D12(DeviceD3D12& device) - : m_DescriptorSetMappings(device.GetStdAllocator()), - m_DescriptorSetRootMappings(device.GetStdAllocator()), - m_DynamicConstantBufferMappings(device.GetStdAllocator()), - m_Device(device) { +PipelineLayoutD3D12::PipelineLayoutD3D12(DeviceD3D12& device) : + m_DescriptorSetMappings(device.GetStdAllocator()), + m_DescriptorSetRootMappings(device.GetStdAllocator()), + m_DynamicConstantBufferMappings(device.GetStdAllocator()), + m_Device(device) { } Result PipelineLayoutD3D12::Create(const PipelineLayoutDesc& pipelineLayoutDesc) { diff --git a/Source/D3D12/PipelineLayoutD3D12.h b/Source/D3D12/PipelineLayoutD3D12.h index 83ddab1b..87169028 100644 --- a/Source/D3D12/PipelineLayoutD3D12.h +++ b/Source/D3D12/PipelineLayoutD3D12.h @@ -66,12 +66,12 @@ struct PipelineLayoutD3D12 { SET_D3D_DEBUG_OBJECT_NAME(m_RootSignature, name); } - private: +private: template void SetDescriptorSetImpl( ID3D12GraphicsCommandList& graphicsCommandList, uint32_t setIndexInPipelineLayout, const DescriptorSet& descriptorSet, const uint32_t* dynamicConstantBufferOffsets) const; - private: +private: ComPtr m_RootSignature; bool m_IsGraphicsPipelineLayout = false; uint32_t m_PushConstantsBaseIndex = 0; diff --git a/Source/D3D12/QueryPoolD3D12.cpp b/Source/D3D12/QueryPoolD3D12.cpp index be4ee070..3ed11d0f 100644 --- a/Source/D3D12/QueryPoolD3D12.cpp +++ b/Source/D3D12/QueryPoolD3D12.cpp @@ -11,7 +11,12 @@ Result QueryPoolD3D12::Create(const QueryPoolDesc& queryPoolDesc) { if (queryPoolDesc.queryType == QueryType::TIMESTAMP) { m_QuerySize = sizeof(uint64_t); m_QueryType = D3D12_QUERY_TYPE_TIMESTAMP; - desc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; // TODO: D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP for copy queues... + desc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + } else if (queryPoolDesc.queryType == QueryType::TIMESTAMP_COPY_QUEUE) { + // Prerequisite: D3D12_FEATURE_D3D12_OPTIONS3 + m_QuerySize = sizeof(uint64_t); + m_QueryType = D3D12_QUERY_TYPE_TIMESTAMP; + desc.Type = D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP; } else if (queryPoolDesc.queryType == QueryType::OCCLUSION) { m_QuerySize = sizeof(uint64_t); m_QueryType = D3D12_QUERY_TYPE_OCCLUSION; diff --git a/Source/D3D12/QueryPoolD3D12.h b/Source/D3D12/QueryPoolD3D12.h index 61162ba8..f7ed5ca9 100644 --- a/Source/D3D12/QueryPoolD3D12.h +++ b/Source/D3D12/QueryPoolD3D12.h @@ -45,7 +45,7 @@ struct QueryPoolD3D12 { return m_QuerySize; } - private: +private: Result CreateReadbackBuffer(const QueryPoolDesc& queryPoolDesc); DeviceD3D12& m_Device; diff --git a/Source/D3D12/QueryPoolD3D12.hpp b/Source/D3D12/QueryPoolD3D12.hpp index ce426d20..46d848f7 100644 --- a/Source/D3D12/QueryPoolD3D12.hpp +++ b/Source/D3D12/QueryPoolD3D12.hpp @@ -12,4 +12,4 @@ static uint32_t NRI_CALL GetQuerySize(const QueryPool& queryPool) { #pragma endregion -Define_Core_QueryPool_PartiallyFillFunctionTable(D3D12) +Define_Core_QueryPool_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D12/SharedD3D12.h b/Source/D3D12/SharedD3D12.h index e4e790c0..f6b10ba2 100644 --- a/Source/D3D12/SharedD3D12.h +++ b/Source/D3D12/SharedD3D12.h @@ -71,4 +71,11 @@ D3D12_DESCRIPTOR_RANGE_TYPE GetDescriptorRangesType(DescriptorType descriptorTyp D3D12_RESOURCE_DIMENSION GetResourceDimension(TextureType textureType); } // namespace nri +#include "amdags/ags_lib/inc/amd_ags.h" +#include "nvapi/nvapi.h" + +namespace d3d12 { +#include "D3DExt.h" +} + #include "DeviceD3D12.h" diff --git a/Source/D3D12/SwapChainD3D12.cpp b/Source/D3D12/SwapChainD3D12.cpp index 332378b1..8dc1a320 100644 --- a/Source/D3D12/SwapChainD3D12.cpp +++ b/Source/D3D12/SwapChainD3D12.cpp @@ -85,7 +85,9 @@ Result SwapChainD3D12::Create(const SwapChainDesc& swapChainDesc) { desc.Scaling = DXGI_SCALING_NONE; desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; - desc.Flags = DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + + if (swapChainDesc.waitable) + desc.Flags |= DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; if (isTearingAllowed) desc.Flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING; @@ -123,20 +125,32 @@ Result SwapChainD3D12::Create(const SwapChainDesc& swapChainDesc) { } // Maximum frame latency - if (m_Version >= 2) { + uint8_t queuedFrameNum = swapChainDesc.queuedFrameNum; + if (queuedFrameNum == 0) + queuedFrameNum = swapChainDesc.waitable ? 1 : 2; + + if (swapChainDesc.waitable && m_Version >= 2) { + // https://docs.microsoft.com/en-us/windows/uwp/gaming/reduce-latency-with-dxgi-1-3-swap-chains#step-4-wait-before-rendering-each-frame // IMPORTANT: SetMaximumFrameLatency must be called BEFORE GetFrameLatencyWaitableObject! - hr = m_SwapChain->SetMaximumFrameLatency(swapChainDesc.textureNum); + hr = m_SwapChain->SetMaximumFrameLatency(queuedFrameNum); RETURN_ON_BAD_HRESULT(&m_Device, hr, "IDXGISwapChain2::SetMaximumFrameLatency()"); m_FrameLatencyWaitableObject = m_SwapChain->GetFrameLatencyWaitableObject(); + } else { + ComPtr dxgiDevice1; + hr = m_Device->QueryInterface(IID_PPV_ARGS(&dxgiDevice1)); + if (SUCCEEDED(hr)) + dxgiDevice1->SetMaximumFrameLatency(queuedFrameNum); } // Finalize + m_PresentId = GetSwapChainId(); m_Flags = desc.Flags; - m_SwapChainDesc = swapChainDesc; + m_Desc = swapChainDesc; + m_Desc.allowLowLatency = swapChainDesc.allowLowLatency && m_Device.GetExt()->HasNVAPI(); - m_Textures.reserve(m_SwapChainDesc.textureNum); - for (uint32_t i = 0; i < m_SwapChainDesc.textureNum; i++) { + m_Textures.reserve(m_Desc.textureNum); + for (uint32_t i = 0; i < m_Desc.textureNum; i++) { ComPtr textureNative; hr = m_SwapChain->GetBuffer(i, IID_PPV_ARGS(&textureNative)); RETURN_ON_BAD_HRESULT(&m_Device, hr, "IDXGISwapChain::GetBuffer()"); @@ -160,28 +174,93 @@ Result SwapChainD3D12::Create(const SwapChainDesc& swapChainDesc) { //================================================================================================================ inline Texture* const* SwapChainD3D12::GetTextures(uint32_t& textureNum) const { - textureNum = m_SwapChainDesc.textureNum; + textureNum = m_Desc.textureNum; return (Texture**)m_Textures.data(); } inline uint32_t SwapChainD3D12::AcquireNextTexture() { - // https://docs.microsoft.com/en-us/windows/uwp/gaming/reduce-latency-with-dxgi-1-3-swap-chains#step-4-wait-before-rendering-each-frame + return m_SwapChain->GetCurrentBackBufferIndex(); +} + +inline Result SwapChainD3D12::WaitForPresent() { if (m_FrameLatencyWaitableObject) { - uint32_t result = WaitForSingleObjectEx(m_FrameLatencyWaitableObject, DEFAULT_TIMEOUT, TRUE); - if (result != WAIT_OBJECT_0) - REPORT_ERROR(&m_Device, "WaitForSingleObjectEx(): failed, result = 0x%08X!", result); + uint32_t result = WaitForSingleObjectEx(m_FrameLatencyWaitableObject, TIMEOUT_PRESENT, TRUE); + return result == WAIT_OBJECT_0 ? Result::SUCCESS : Result::FAILURE; } - return m_SwapChain->GetCurrentBackBufferIndex(); + return Result::UNSUPPORTED; } inline Result SwapChainD3D12::Present() { - uint32_t flags = (!m_SwapChainDesc.verticalSyncInterval && (m_Flags & DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING)) ? DXGI_PRESENT_ALLOW_TEARING : 0; // TODO: and not fullscreen - HRESULT hr = m_SwapChain->Present(m_SwapChainDesc.verticalSyncInterval, flags); + if (m_Desc.allowLowLatency) + SetLatencyMarker((LatencyMarker)PRESENT_START); + + uint32_t flags = (!m_Desc.verticalSyncInterval && (m_Flags & DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING)) ? DXGI_PRESENT_ALLOW_TEARING : 0; + HRESULT hr = m_SwapChain->Present(m_Desc.verticalSyncInterval, flags); RETURN_ON_BAD_HRESULT(&m_Device, hr, "IDXGISwapChain::Present()"); + if (m_Desc.allowLowLatency) + SetLatencyMarker((LatencyMarker)PRESENT_END); + + m_PresentId++; + return Result::SUCCESS; } +inline Result SwapChainD3D12::SetLatencySleepMode(const LatencySleepMode& latencySleepMode) { + NV_SET_SLEEP_MODE_PARAMS params = {NV_SET_SLEEP_MODE_PARAMS_VER}; + params.bLowLatencyMode = latencySleepMode.lowLatencyMode; + params.bLowLatencyBoost = latencySleepMode.lowLatencyBoost; + params.minimumIntervalUs = latencySleepMode.minIntervalUs; + params.bUseMarkersToOptimize = true; + + NvAPI_Status status = NvAPI_D3D_SetSleepMode(m_Device.GetNativeObject(), ¶ms); + + return status == NVAPI_OK ? Result::SUCCESS : Result::FAILURE; +} + +inline Result SwapChainD3D12::SetLatencyMarker(LatencyMarker latencyMarker) { + NV_LATENCY_MARKER_PARAMS params = {NV_LATENCY_MARKER_PARAMS_VER}; + params.frameID = m_PresentId; + params.markerType = (NV_LATENCY_MARKER_TYPE)latencyMarker; + + NvAPI_Status status = NvAPI_D3D_SetLatencyMarker(m_Device.GetNativeObject(), ¶ms); + + return status == NVAPI_OK ? Result::SUCCESS : Result::FAILURE; +} + +inline Result SwapChainD3D12::LatencySleep() { + NvAPI_Status status = NvAPI_D3D_Sleep(m_Device.GetNativeObject()); + + return status == NVAPI_OK ? Result::SUCCESS : Result::FAILURE; +} + +inline Result SwapChainD3D12::GetLatencyReport(LatencyReport& latencyReport) { + NV_LATENCY_RESULT_PARAMS params = {NV_LATENCY_RESULT_PARAMS_VER}; + NvAPI_Status status = NvAPI_D3D_GetLatency(m_Device.GetNativeObject(), ¶ms); + + latencyReport = {}; + if (status == NVAPI_OK) { + const uint32_t i = 63; // the most recent frame + latencyReport.inputSampleTimeUs = params.frameReport[i].inputSampleTime; + latencyReport.simulationStartTimeUs = params.frameReport[i].simStartTime; + latencyReport.simulationEndTimeUs = params.frameReport[i].simEndTime; + latencyReport.renderSubmitStartTimeUs = params.frameReport[i].renderSubmitStartTime; + latencyReport.renderSubmitEndTimeUs = params.frameReport[i].renderSubmitEndTime; + latencyReport.presentStartTimeUs = params.frameReport[i].presentStartTime; + latencyReport.presentEndTimeUs = params.frameReport[i].presentEndTime; + latencyReport.driverStartTimeUs = params.frameReport[i].driverStartTime; + latencyReport.driverEndTimeUs = params.frameReport[i].driverEndTime; + latencyReport.osRenderQueueStartTimeUs = params.frameReport[i].osRenderQueueStartTime; + latencyReport.osRenderQueueEndTimeUs = params.frameReport[i].osRenderQueueEndTime; + latencyReport.gpuRenderStartTimeUs = params.frameReport[i].gpuRenderStartTime; + latencyReport.gpuRenderEndTimeUs = params.frameReport[i].gpuRenderEndTime; + + return Result::SUCCESS; + } + + return Result::FAILURE; +} + #include "SwapChainD3D12.hpp" diff --git a/Source/D3D12/SwapChainD3D12.h b/Source/D3D12/SwapChainD3D12.h index 731a7f64..139fc441 100644 --- a/Source/D3D12/SwapChainD3D12.h +++ b/Source/D3D12/SwapChainD3D12.h @@ -31,19 +31,26 @@ struct SwapChainD3D12 : public DisplayDescHelper { } inline Result GetDisplayDesc(DisplayDesc& displayDesc) { - return DisplayDescHelper::GetDisplayDesc(m_SwapChainDesc.window.windows.hwnd, displayDesc); + return DisplayDescHelper::GetDisplayDesc(m_Desc.window.windows.hwnd, displayDesc); } Texture* const* GetTextures(uint32_t& textureNum) const; uint32_t AcquireNextTexture(); + Result WaitForPresent(); Result Present(); - private: + Result SetLatencySleepMode(const LatencySleepMode& latencySleepMode); + Result SetLatencyMarker(LatencyMarker latencyMarker); + Result LatencySleep(); + Result GetLatencyReport(LatencyReport& latencyReport); + +private: DeviceD3D12& m_Device; ComPtr m_SwapChain; Vector m_Textures; - SwapChainDesc m_SwapChainDesc = {}; + SwapChainDesc m_Desc = {}; HANDLE m_FrameLatencyWaitableObject = nullptr; + uint64_t m_PresentId = 0; uint8_t m_Version = 0; UINT m_Flags = 0; }; diff --git a/Source/D3D12/SwapChainD3D12.hpp b/Source/D3D12/SwapChainD3D12.hpp index 78b49c04..cf055574 100644 --- a/Source/D3D12/SwapChainD3D12.hpp +++ b/Source/D3D12/SwapChainD3D12.hpp @@ -14,7 +14,11 @@ static uint32_t NRI_CALL AcquireNextSwapChainTexture(SwapChain& swapChain) { return ((SwapChainD3D12&)swapChain).AcquireNextTexture(); } -static Result NRI_CALL SwapChainPresent(SwapChain& swapChain) { +static Result NRI_CALL WaitForPresent(SwapChain& swapChain) { + return ((SwapChainD3D12&)swapChain).WaitForPresent(); +} + +static Result NRI_CALL QueuePresent(SwapChain& swapChain) { return ((SwapChainD3D12&)swapChain).Present(); } @@ -24,4 +28,25 @@ static Result NRI_CALL GetDisplayDesc(SwapChain& swapChain, DisplayDesc& display #pragma endregion -Define_SwapChain_PartiallyFillFunctionTable(D3D12) +#pragma region[ Low latency ] + +static Result SetLatencySleepMode(SwapChain& swapChain, const LatencySleepMode& latencySleepMode) { + return ((SwapChainD3D12&)swapChain).SetLatencySleepMode(latencySleepMode); +} + +static Result SetLatencyMarker(SwapChain& swapChain, LatencyMarker latencyMarker) { + return ((SwapChainD3D12&)swapChain).SetLatencyMarker(latencyMarker); +} + +static Result LatencySleep(SwapChain& swapChain) { + return ((SwapChainD3D12&)swapChain).LatencySleep(); +} + +static Result GetLatencyReport(const SwapChain& swapChain, LatencyReport& latencyReport) { + return ((SwapChainD3D12&)swapChain).GetLatencyReport(latencyReport); +} + +#pragma endregion + +Define_SwapChain_PartiallyFillFunctionTable(D3D12); +Define_LowLatency_SwapChain_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D12/TextureD3D12.cpp b/Source/D3D12/TextureD3D12.cpp index 2fa9af0b..8c29415a 100644 --- a/Source/D3D12/TextureD3D12.cpp +++ b/Source/D3D12/TextureD3D12.cpp @@ -81,23 +81,6 @@ Result TextureD3D12::BindMemory(const MemoryD3D12* memory, uint64_t offset) { return Result::SUCCESS; } -Dim_t TextureD3D12::GetSize(Dim_t dimensionIndex, Mip_t mip) const { - assert(dimensionIndex < 3); - - Dim_t dim; - if (dimensionIndex == 0) - dim = m_Desc.width; - else if (dimensionIndex == 1) - dim = m_Desc.height; - else - dim = m_Desc.depth; - - dim = (Dim_t)std::max(dim >> mip, 1); - dim = Align(dim, dimensionIndex < 2 ? GetFormatProps(m_Desc.format).blockWidth : 1); - - return dim; -} - //================================================================================================================ // NRI //================================================================================================================ diff --git a/Source/D3D12/TextureD3D12.h b/Source/D3D12/TextureD3D12.h index 4496303a..0e109d6d 100644 --- a/Source/D3D12/TextureD3D12.h +++ b/Source/D3D12/TextureD3D12.h @@ -40,10 +40,13 @@ struct TextureD3D12 { return arrayOffset * m_Desc.mipNum + mipOffset; } + inline Dim_t GetSize(Dim_t dimensionIndex, Mip_t mip = 0) const { + return GetDimension(GraphicsAPI::D3D12, m_Desc, dimensionIndex, mip); + } + Result Create(const TextureDesc& textureDesc); Result Create(const TextureD3D12Desc& textureDesc); Result BindMemory(const MemoryD3D12* memory, uint64_t offset); - Dim_t GetSize(Dim_t dimensionIndex, Mip_t mip = 0) const; //================================================================================================================ // NRI @@ -55,7 +58,7 @@ struct TextureD3D12 { void GetMemoryInfo(MemoryLocation memoryLocation, MemoryDesc& memoryDesc) const; - private: +private: DeviceD3D12& m_Device; TextureDesc m_Desc = {}; ComPtr m_Texture; diff --git a/Source/D3D12/TextureD3D12.hpp b/Source/D3D12/TextureD3D12.hpp index cc4a3fb2..87d7ae22 100644 --- a/Source/D3D12/TextureD3D12.hpp +++ b/Source/D3D12/TextureD3D12.hpp @@ -19,4 +19,4 @@ static void NRI_CALL GetTextureMemoryInfo(const Texture& texture, MemoryLocation #pragma endregion -Define_Core_Texture_PartiallyFillFunctionTable(D3D12) +Define_Core_Texture_PartiallyFillFunctionTable(D3D12); diff --git a/Source/D3D11/D3D11Extensions.h b/Source/Shared/D3DExt.h similarity index 90% rename from Source/D3D11/D3D11Extensions.h rename to Source/Shared/D3DExt.h index 5efa4b6b..d0b6d2c2 100644 --- a/Source/D3D11/D3D11Extensions.h +++ b/Source/Shared/D3DExt.h @@ -1,10 +1,5 @@ #pragma once -#include "amdags/ags_lib/inc/amd_ags.h" -#include "nvapi/nvapi.h" - -namespace nri { - struct AGSFunctionTable { AGS_INITIALIZE Initialize; AGS_DEINITIALIZE Deinitialize; @@ -16,8 +11,8 @@ struct AGSFunctionTable { AGS_DRIVEREXTENSIONSDX11_MULTIDRAWINSTANCEDINDIRECT MultiDrawInstancedIndirect; }; -struct D3D11Extensions { - ~D3D11Extensions(); +struct Ext { + ~Ext(); inline bool HasNVAPI() const { return m_IsNvAPIAvailable; @@ -30,21 +25,21 @@ struct D3D11Extensions { void InitializeNVExt(const nri::DeviceBase* deviceBase, bool isNVAPILoadedInApp, bool isImported); void InitializeAMDExt(const nri::DeviceBase* deviceBase, AGSContext* agsContext, bool isImported); + // D3D11 +#if defined(__d3d11_h__) + void CreateDeviceUsingAGS(IDXGIAdapter* adapter, const D3D_FEATURE_LEVEL* featureLevels, size_t featureLevelNum, UINT flags, AGSDX11ReturnedParams& params); void BeginUAVOverlap(ID3D11DeviceContext* deviceContext) const; void EndUAVOverlap(ID3D11DeviceContext* deviceContext) const; void WaitForDrain(ID3D11DeviceContext* deviceContext, uint32_t flags) const; void SetDepthBounds(ID3D11DeviceContext* deviceContext, float minBound, float maxBound) const; void MultiDrawIndirect(ID3D11DeviceContext* deviceContext, ID3D11Buffer* buffer, uint64_t offset, uint32_t drawNum, uint32_t stride) const; void MultiDrawIndexedIndirect(ID3D11DeviceContext* deviceContext, ID3D11Buffer* buffer, uint64_t offset, uint32_t drawNum, uint32_t stride) const; - - void CreateDeviceUsingAGS(IDXGIAdapter* adapter, const D3D_FEATURE_LEVEL* featureLevels, size_t featureLevelNum, UINT flags, AGSDX11ReturnedParams& params); +#endif const nri::DeviceBase* m_DeviceBase = nullptr; AGSContext* m_AGSContext = nullptr; AGSFunctionTable m_AGS = {}; - HMODULE m_AGSLibrary = nullptr; + Library* m_AGSLibrary = nullptr; bool m_IsNvAPIAvailable = false; bool m_IsImported = false; }; - -} // namespace nri diff --git a/Source/D3D11/D3D11Extensions.cpp b/Source/Shared/D3DExt.hpp similarity index 77% rename from Source/D3D11/D3D11Extensions.cpp rename to Source/Shared/D3DExt.hpp index 1f53bdfc..c74b3074 100644 --- a/Source/D3D11/D3D11Extensions.cpp +++ b/Source/Shared/D3DExt.hpp @@ -1,12 +1,6 @@ // © 2021 NVIDIA Corporation -#include "SharedD3D11.h" - -#include "D3D11Extensions.h" - -using namespace nri; - -D3D11Extensions::~D3D11Extensions() { +Ext::~Ext() { if (m_IsNvAPIAvailable) NvAPI_Unload(); @@ -15,12 +9,12 @@ D3D11Extensions::~D3D11Extensions() { m_AGS.Deinitialize(m_AGSContext); m_AGSContext = nullptr; - FreeLibrary(m_AGSLibrary); + UnloadSharedLibrary(*m_AGSLibrary); m_AGSLibrary = nullptr; } } -void D3D11Extensions::InitializeNVExt(const nri::DeviceBase* deviceBase, bool isNVAPILoadedInApp, bool isImported) { +void Ext::InitializeNVExt(const nri::DeviceBase* deviceBase, bool isNVAPILoadedInApp, bool isImported) { m_DeviceBase = deviceBase; m_IsImported = isImported; @@ -34,7 +28,7 @@ void D3D11Extensions::InitializeNVExt(const nri::DeviceBase* deviceBase, bool is } } -void D3D11Extensions::InitializeAMDExt(const nri::DeviceBase* deviceBase, AGSContext* agsContext, bool isImported) { +void Ext::InitializeAMDExt(const nri::DeviceBase* deviceBase, AGSContext* agsContext, bool isImported) { m_DeviceBase = deviceBase; m_IsImported = isImported; @@ -43,21 +37,22 @@ void D3D11Extensions::InitializeAMDExt(const nri::DeviceBase* deviceBase, AGSCon return; } - m_AGSLibrary = LoadLibraryW(L"amd_ags_x64"); + m_AGSLibrary = LoadSharedLibrary("amd_ags_x64.dll"); if (!m_AGSLibrary) { REPORT_WARNING(m_DeviceBase, "AMDAGS is disabled, because 'amd_ags_x64' is not found"); return; } - m_AGS.Initialize = (AGS_INITIALIZE)GetProcAddress(m_AGSLibrary, "agsInitialize"); - m_AGS.Deinitialize = (AGS_DEINITIALIZE)GetProcAddress(m_AGSLibrary, "agsDeInitialize"); - m_AGS.CreateDevice = (AGS_DRIVEREXTENSIONSDX11_CREATEDEVICE)GetProcAddress(m_AGSLibrary, "agsDriverExtensionsDX11_CreateDevice"); - m_AGS.BeginUAVOverlap = (AGS_DRIVEREXTENSIONSDX11_BEGINUAVOVERLAP)GetProcAddress(m_AGSLibrary, "agsDriverExtensionsDX11_BeginUAVOverlap"); - m_AGS.EndUAVOverlap = (AGS_DRIVEREXTENSIONSDX11_ENDUAVOVERLAP)GetProcAddress(m_AGSLibrary, "agsDriverExtensionsDX11_EndUAVOverlap"); + m_AGS.Initialize = (AGS_INITIALIZE)GetSharedLibraryFunction(*m_AGSLibrary, "agsInitialize"); + m_AGS.Deinitialize = (AGS_DEINITIALIZE)GetSharedLibraryFunction(*m_AGSLibrary, "agsDeInitialize"); + m_AGS.CreateDevice = (AGS_DRIVEREXTENSIONSDX11_CREATEDEVICE)GetSharedLibraryFunction(*m_AGSLibrary, "agsDriverExtensionsDX11_CreateDevice"); + m_AGS.BeginUAVOverlap = (AGS_DRIVEREXTENSIONSDX11_BEGINUAVOVERLAP)GetSharedLibraryFunction(*m_AGSLibrary, "agsDriverExtensionsDX11_BeginUAVOverlap"); + m_AGS.EndUAVOverlap = (AGS_DRIVEREXTENSIONSDX11_ENDUAVOVERLAP)GetSharedLibraryFunction(*m_AGSLibrary, "agsDriverExtensionsDX11_EndUAVOverlap"); m_AGS.MultiDrawIndexedInstancedIndirect = - (AGS_DRIVEREXTENSIONSDX11_MULTIDRAWINDEXEDINSTANCEDINDIRECT)GetProcAddress(m_AGSLibrary, "agsDriverExtensionsDX11_MultiDrawIndexedInstancedIndirect"); - m_AGS.SetDepthBounds = (AGS_DRIVEREXTENSIONSDX11_SETDEPTHBOUNDS)GetProcAddress(m_AGSLibrary, "agsDriverExtensionsDX11_SetDepthBounds"); - m_AGS.MultiDrawInstancedIndirect = (AGS_DRIVEREXTENSIONSDX11_MULTIDRAWINSTANCEDINDIRECT)GetProcAddress(m_AGSLibrary, "agsDriverExtensionsDX11_MultiDrawInstancedIndirect"); + (AGS_DRIVEREXTENSIONSDX11_MULTIDRAWINDEXEDINSTANCEDINDIRECT)GetSharedLibraryFunction(*m_AGSLibrary, "agsDriverExtensionsDX11_MultiDrawIndexedInstancedIndirect"); + m_AGS.SetDepthBounds = (AGS_DRIVEREXTENSIONSDX11_SETDEPTHBOUNDS)GetSharedLibraryFunction(*m_AGSLibrary, "agsDriverExtensionsDX11_SetDepthBounds"); + m_AGS.MultiDrawInstancedIndirect = + (AGS_DRIVEREXTENSIONSDX11_MULTIDRAWINSTANCEDINDIRECT)GetSharedLibraryFunction(*m_AGSLibrary, "agsDriverExtensionsDX11_MultiDrawInstancedIndirect"); const void** functionArray = (const void**)&m_AGS; const size_t functionArraySize = sizeof(AGSFunctionTable) / sizeof(void*); @@ -67,7 +62,7 @@ void D3D11Extensions::InitializeAMDExt(const nri::DeviceBase* deviceBase, AGSCon if (i != functionArraySize) { REPORT_WARNING(m_DeviceBase, "AMDAGS is disabled, because not all functions are found in the DLL"); - FreeLibrary(m_AGSLibrary); + UnloadSharedLibrary(*m_AGSLibrary); m_AGSLibrary = nullptr; return; @@ -79,7 +74,7 @@ void D3D11Extensions::InitializeAMDExt(const nri::DeviceBase* deviceBase, AGSCon const AGSReturnCode result = m_AGS.Initialize(AGS_CURRENT_VERSION, &config, &agsContext, &gpuInfo); if (result != AGS_SUCCESS || !agsContext) { REPORT_ERROR(m_DeviceBase, "Failed to initialize AMDAGS: %d", (int32_t)result); - FreeLibrary(m_AGSLibrary); + UnloadSharedLibrary(*m_AGSLibrary); m_AGSLibrary = nullptr; } } @@ -87,7 +82,32 @@ void D3D11Extensions::InitializeAMDExt(const nri::DeviceBase* deviceBase, AGSCon m_AGSContext = agsContext; } -void D3D11Extensions::BeginUAVOverlap(ID3D11DeviceContext* deviceContext) const { +// D3D11 +#if defined(__d3d11_h__) +void Ext::CreateDeviceUsingAGS(IDXGIAdapter* adapter, const D3D_FEATURE_LEVEL* featureLevels, size_t featureLevelNum, UINT flags, AGSDX11ReturnedParams& params) { + CHECK(m_AGSContext != nullptr, "AMDAGS is not available"); + + AGSDX11DeviceCreationParams deviceCreationParams = {}; + deviceCreationParams.pAdapter = adapter; + deviceCreationParams.DriverType = D3D_DRIVER_TYPE_UNKNOWN; + deviceCreationParams.Flags = flags; + deviceCreationParams.pFeatureLevels = featureLevels; + deviceCreationParams.FeatureLevels = (uint32_t)featureLevelNum; + deviceCreationParams.SDKVersion = D3D11_SDK_VERSION; + + AGSDX11ExtensionParams extensionsParams = {}; + extensionsParams.uavSlot = SHADER_EXT_UAV_SLOT; + + AGSReturnCode result = m_AGS.CreateDevice(m_AGSContext, &deviceCreationParams, &extensionsParams, ¶ms); + if (flags != 0 && result != AGS_SUCCESS) { + deviceCreationParams.Flags = 0; + result = m_AGS.CreateDevice(m_AGSContext, &deviceCreationParams, &extensionsParams, ¶ms); + } + + RETURN_ON_FAILURE(m_DeviceBase, result == AGS_SUCCESS, ReturnVoid(), "agsDriverExtensionsDX11_CreateDevice() failed: %d", (int32_t)result); +} + +void Ext::BeginUAVOverlap(ID3D11DeviceContext* deviceContext) const { if (m_IsNvAPIAvailable) { const NvAPI_Status res = NvAPI_D3D11_BeginUAVOverlap(deviceContext); RETURN_ON_FAILURE(m_DeviceBase, res == NVAPI_OK, ReturnVoid(), "NvAPI_D3D11_BeginUAVOverlap() - FAILED!"); @@ -97,7 +117,7 @@ void D3D11Extensions::BeginUAVOverlap(ID3D11DeviceContext* deviceContext) const } } -void D3D11Extensions::EndUAVOverlap(ID3D11DeviceContext* deviceContext) const { +void Ext::EndUAVOverlap(ID3D11DeviceContext* deviceContext) const { if (m_IsNvAPIAvailable) { const NvAPI_Status status = NvAPI_D3D11_EndUAVOverlap(deviceContext); RETURN_ON_FAILURE(m_DeviceBase, status == NVAPI_OK, ReturnVoid(), "NvAPI_D3D11_EndUAVOverlap() - FAILED!"); @@ -107,7 +127,7 @@ void D3D11Extensions::EndUAVOverlap(ID3D11DeviceContext* deviceContext) const { } } -void D3D11Extensions::WaitForDrain(ID3D11DeviceContext* deviceContext, uint32_t flags) const { +void Ext::WaitForDrain(ID3D11DeviceContext* deviceContext, uint32_t flags) const { if (m_IsNvAPIAvailable) { const NvAPI_Status res = NvAPI_D3D11_BeginUAVOverlapEx(deviceContext, flags); RETURN_ON_FAILURE(m_DeviceBase, res == NVAPI_OK, ReturnVoid(), "NvAPI_D3D11_BeginUAVOverlap() - FAILED!"); @@ -120,7 +140,7 @@ void D3D11Extensions::WaitForDrain(ID3D11DeviceContext* deviceContext, uint32_t } } -void D3D11Extensions::SetDepthBounds(ID3D11DeviceContext* deviceContext, float minBound, float maxBound) const { +void Ext::SetDepthBounds(ID3D11DeviceContext* deviceContext, float minBound, float maxBound) const { bool isEnabled = minBound != 0.0f || maxBound != 1.0f; if (m_IsNvAPIAvailable) { @@ -132,7 +152,7 @@ void D3D11Extensions::SetDepthBounds(ID3D11DeviceContext* deviceContext, float m } } -void D3D11Extensions::MultiDrawIndirect(ID3D11DeviceContext* deviceContext, ID3D11Buffer* buffer, uint64_t offset, uint32_t drawNum, uint32_t stride) const { +void Ext::MultiDrawIndirect(ID3D11DeviceContext* deviceContext, ID3D11Buffer* buffer, uint64_t offset, uint32_t drawNum, uint32_t stride) const { if (m_IsNvAPIAvailable && drawNum > 1) { const NvAPI_Status status = NvAPI_D3D11_MultiDrawInstancedIndirect(deviceContext, drawNum, buffer, (uint32_t)offset, stride); RETURN_ON_FAILURE(m_DeviceBase, status == NVAPI_OK, ReturnVoid(), "NvAPI_D3D11_MultiDrawInstancedIndirect() - FAILED!"); @@ -147,7 +167,7 @@ void D3D11Extensions::MultiDrawIndirect(ID3D11DeviceContext* deviceContext, ID3D } } -void D3D11Extensions::MultiDrawIndexedIndirect(ID3D11DeviceContext* deviceContext, ID3D11Buffer* buffer, uint64_t offset, uint32_t drawNum, uint32_t stride) const { +void Ext::MultiDrawIndexedIndirect(ID3D11DeviceContext* deviceContext, ID3D11Buffer* buffer, uint64_t offset, uint32_t drawNum, uint32_t stride) const { if (m_IsNvAPIAvailable && drawNum > 1) { const NvAPI_Status status = NvAPI_D3D11_MultiDrawIndexedInstancedIndirect(deviceContext, drawNum, buffer, (uint32_t)offset, stride); RETURN_ON_FAILURE(m_DeviceBase, status == NVAPI_OK, ReturnVoid(), "NvAPI_D3D11_MultiDrawIndexedInstancedIndirect() - FAILED!"); @@ -161,26 +181,4 @@ void D3D11Extensions::MultiDrawIndexedIndirect(ID3D11DeviceContext* deviceContex } } } - -void D3D11Extensions::CreateDeviceUsingAGS(IDXGIAdapter* adapter, const D3D_FEATURE_LEVEL* featureLevels, size_t featureLevelNum, UINT flags, AGSDX11ReturnedParams& params) { - CHECK(m_AGSContext != nullptr, "AMDAGS is not available"); - - AGSDX11DeviceCreationParams deviceCreationParams = {}; - deviceCreationParams.pAdapter = adapter; - deviceCreationParams.DriverType = D3D_DRIVER_TYPE_UNKNOWN; - deviceCreationParams.Flags = flags; - deviceCreationParams.pFeatureLevels = featureLevels; - deviceCreationParams.FeatureLevels = (uint32_t)featureLevelNum; - deviceCreationParams.SDKVersion = D3D11_SDK_VERSION; - - AGSDX11ExtensionParams extensionsParams = {}; - extensionsParams.uavSlot = SHADER_EXT_UAV_SLOT; - - AGSReturnCode result = m_AGS.CreateDevice(m_AGSContext, &deviceCreationParams, &extensionsParams, ¶ms); - if (flags != 0 && result != AGS_SUCCESS) { - deviceCreationParams.Flags = 0; - result = m_AGS.CreateDevice(m_AGSContext, &deviceCreationParams, &extensionsParams, ¶ms); - } - - RETURN_ON_FAILURE(m_DeviceBase, result == AGS_SUCCESS, ReturnVoid(), "agsDriverExtensionsDX11_CreateDevice() failed: %d", (int32_t)result); -} \ No newline at end of file +#endif diff --git a/Source/Shared/DeviceBase.h b/Source/Shared/DeviceBase.h index 72ea64f0..08208a23 100644 --- a/Source/Shared/DeviceBase.h +++ b/Source/Shared/DeviceBase.h @@ -11,61 +11,80 @@ struct DeviceBase { return m_StdAllocator; } - void ReportMessage(nri::Message messageType, const char* file, uint32_t line, const char* format, ...) const; + void ReportMessage(Message messageType, const char* file, uint32_t line, const char* format, ...) const; template - nri::Result ValidateFunctionTable(const T& table) const { + Result ValidateFunctionTable(const T& table) const { const void* const* const begin = (void**)&table; const void* const* const end = (void**)(&table + 1); for (const void* const* current = begin; current != end; current++) { if (*current == nullptr) { REPORT_ERROR(this, "Invalid function table: function #%u is NULL!", uint32_t(current - begin)); - return nri::Result::FAILURE; + return Result::FAILURE; } } - return nri::Result::SUCCESS; + return Result::SUCCESS; } virtual ~DeviceBase() { } + virtual const DeviceDesc& GetDesc() const = 0; virtual void Destroy() = 0; + virtual Result FillFunctionTable(CoreInterface& table) const { table = {}; return Result::UNSUPPORTED; } - virtual Result FillFunctionTable(SwapChainInterface& table) const { + + virtual Result FillFunctionTable(HelperInterface& table) const { table = {}; return Result::UNSUPPORTED; } + + virtual Result FillFunctionTable(StreamerInterface& table) const { + table = {}; + return Result::UNSUPPORTED; + } + virtual Result FillFunctionTable(WrapperD3D11Interface& table) const { table = {}; return Result::UNSUPPORTED; } + virtual Result FillFunctionTable(WrapperD3D12Interface& table) const { table = {}; return Result::UNSUPPORTED; } + virtual Result FillFunctionTable(WrapperVKInterface& table) const { table = {}; return Result::UNSUPPORTED; } + + virtual Result FillFunctionTable(SwapChainInterface& table) const { + table = {}; + return Result::UNSUPPORTED; + } + virtual Result FillFunctionTable(RayTracingInterface& table) const { table = {}; return Result::UNSUPPORTED; } + virtual Result FillFunctionTable(MeshShaderInterface& table) const { table = {}; return Result::UNSUPPORTED; } - virtual Result FillFunctionTable(HelperInterface& table) const { + + virtual Result FillFunctionTable(LowLatencyInterface& table) const { table = {}; return Result::UNSUPPORTED; } - protected: - nri::CallbackInterface m_CallbackInterface = {}; +protected: + CallbackInterface m_CallbackInterface = {}; StdAllocator m_StdAllocator; }; } // namespace nri @@ -85,7 +104,9 @@ struct DeviceBase { void RayTracing_CommandBuffer_PartiallyFillFunctionTable##API(RayTracingInterface& table); \ void RayTracing_AccelerationStructure_PartiallyFillFunctionTable##API(RayTracingInterface& table); \ void MeshShader_CommandBuffer_PartiallyFillFunctionTable##API(MeshShaderInterface& table); \ - void Helper_CommandQueue_PartiallyFillFunctionTable##API(HelperInterface& table); + void Helper_CommandQueue_PartiallyFillFunctionTable##API(HelperInterface& table); \ + void LowLatency_SwapChain_PartiallyFillFunctionTable##API(LowLatencyInterface& table); \ + void LowLatency_CommandQueue_PartiallyFillFunctionTable##API(LowLatencyInterface& table) #define Define_Core_Buffer_PartiallyFillFunctionTable(API) \ void Core_Buffer_PartiallyFillFunctionTable##API(CoreInterface& table) { \ @@ -177,8 +198,6 @@ struct DeviceBase { #define Define_Core_Fence_PartiallyFillFunctionTable(API) \ void Core_Fence_PartiallyFillFunctionTable##API(CoreInterface& table) { \ table.GetFenceValue = ::GetFenceValue; \ - table.QueueSignal = ::QueueSignal; \ - table.QueueWait = ::QueueWait; \ table.Wait = ::Wait; \ table.SetFenceDebugName = ::SetFenceDebugName; \ } @@ -201,7 +220,8 @@ struct DeviceBase { table.SetSwapChainDebugName = ::SetSwapChainDebugName; \ table.GetSwapChainTextures = ::GetSwapChainTextures; \ table.AcquireNextSwapChainTexture = ::AcquireNextSwapChainTexture; \ - table.SwapChainPresent = ::SwapChainPresent; \ + table.WaitForPresent = ::WaitForPresent; \ + table.QueuePresent = ::QueuePresent; \ table.GetDisplayDesc = ::GetDisplayDesc; \ } @@ -239,3 +259,16 @@ struct DeviceBase { table.UploadData = ::UploadData; \ table.WaitForIdle = ::WaitForIdle; \ } + +#define Define_LowLatency_SwapChain_PartiallyFillFunctionTable(API) \ + void LowLatency_SwapChain_PartiallyFillFunctionTable##API(LowLatencyInterface& table) { \ + table.SetLatencySleepMode = ::SetLatencySleepMode; \ + table.SetLatencyMarker = ::SetLatencyMarker; \ + table.LatencySleep = ::LatencySleep; \ + table.GetLatencyReport = ::GetLatencyReport; \ + } + +#define Define_LowLatency_CommandQueue_PartiallyFillFunctionTable(API) \ + void LowLatency_CommandQueue_PartiallyFillFunctionTable##API(LowLatencyInterface& table) { \ + table.QueueSubmitTrackable = ::QueueSubmitTrackable; \ + } diff --git a/Source/Shared/HelperDataUpload.cpp b/Source/Shared/HelperDataUpload.cpp index 97eb3f10..c1377414 100644 --- a/Source/Shared/HelperDataUpload.cpp +++ b/Source/Shared/HelperDataUpload.cpp @@ -1,11 +1,69 @@ #include "SharedExternal.h" +#include "HelperDataUpload.h" + using namespace nri; -constexpr size_t BASE_UPLOAD_BUFFER_SIZE = 65536; +constexpr uint32_t BARRIERS_PER_PASS = 256; +constexpr uint64_t COPY_ALIGMENT = 16; + +static void DoTransition(const CoreInterface& NRI, CommandBuffer* commandBuffer, bool isInitial, const TextureUploadDesc* textureUploadDescs, uint32_t textureDataDescNum) { + TextureBarrierDesc textureBarriers[BARRIERS_PER_PASS]; + + const AccessLayoutStage state = {AccessBits::COPY_DESTINATION, Layout::COPY_DESTINATION, StageBits::COPY}; + const AccessLayoutStage initialState = {AccessBits::UNKNOWN, Layout::UNKNOWN, StageBits::NONE}; + + for (uint32_t i = 0; i < textureDataDescNum;) { + const uint32_t passBegin = i; + const uint32_t passEnd = std::min(i + BARRIERS_PER_PASS, textureDataDescNum); + + for (; i < passEnd; i++) { + const TextureUploadDesc& textureUploadDesc = textureUploadDescs[i]; + const TextureDesc& textureDesc = NRI.GetTextureDesc(*textureUploadDesc.texture); + + TextureBarrierDesc& barrier = textureBarriers[i - passBegin]; + barrier = {}; + barrier.texture = textureUploadDesc.texture; + barrier.mipNum = textureDesc.mipNum; + barrier.arraySize = textureDesc.arraySize; + barrier.before = isInitial ? initialState : state; + barrier.after = isInitial ? state : textureUploadDesc.after; + } + + BarrierGroupDesc barrierGroup = {}; + barrierGroup.textures = textureBarriers; + barrierGroup.textureNum = uint16_t(passEnd - passBegin); + + NRI.CmdBarrier(*commandBuffer, barrierGroup); + } +} + +static void DoTransition(const CoreInterface& NRI, CommandBuffer* commandBuffer, bool isInitial, const BufferUploadDesc* bufferUploadDescs, uint32_t bufferUploadDescNum) { + BufferBarrierDesc bufferBarriers[BARRIERS_PER_PASS]; + + const AccessStage state = {AccessBits::COPY_DESTINATION, StageBits::COPY}; + const AccessStage initialState = {AccessBits::UNKNOWN, StageBits::NONE}; + + for (uint32_t i = 0; i < bufferUploadDescNum;) { + const uint32_t passBegin = i; + const uint32_t passEnd = std::min(i + BARRIERS_PER_PASS, bufferUploadDescNum); + + for (; i < passEnd; i++) { + const BufferUploadDesc& bufferUploadDesc = bufferUploadDescs[i]; + + BufferBarrierDesc& barrier = bufferBarriers[i - passBegin]; + barrier = {}; + barrier.buffer = bufferUploadDesc.buffer; + barrier.before = isInitial ? initialState : state; + barrier.after = isInitial ? state : bufferUploadDesc.after; + } + + BarrierGroupDesc barrierGroup = {}; + barrierGroup.buffers = bufferBarriers; + barrierGroup.bufferNum = uint16_t(passEnd - passBegin); -HelperDataUpload::HelperDataUpload(const CoreInterface& NRI, Device& device, CommandQueue& commandQueue) - : NRI(NRI), m_Device(device), m_CommandQueue(commandQueue), m_UploadBufferSize(BASE_UPLOAD_BUFFER_SIZE) { + NRI.CmdBarrier(*commandBuffer, barrierGroup); + } } Result HelperDataUpload::UploadData( @@ -18,24 +76,22 @@ Result HelperDataUpload::UploadData( const TextureSubresourceUploadDesc& subresource = textureUploadDescs[i].subresources[0]; - const uint32_t sliceRowNum = std::max(subresource.slicePitch / subresource.rowPitch, 1u); - const uint64_t alignedRowPitch = Align(subresource.rowPitch, deviceDesc.uploadBufferTextureRowAlignment); - const uint64_t alignedSlicePitch = Align(sliceRowNum * alignedRowPitch, deviceDesc.uploadBufferTextureSliceAlignment); - const uint64_t mipLevelContentSize = alignedSlicePitch * std::max(subresource.sliceNum, 1u); - m_UploadBufferSize = std::max(m_UploadBufferSize, mipLevelContentSize); + uint32_t sliceRowNum = std::max(subresource.slicePitch / subresource.rowPitch, 1u); + uint64_t alignedRowPitch = Align(subresource.rowPitch, deviceDesc.uploadBufferTextureRowAlignment); + uint64_t alignedSlicePitch = Align(sliceRowNum * alignedRowPitch, deviceDesc.uploadBufferTextureSliceAlignment); + uint64_t contentSize = alignedSlicePitch * std::max(subresource.sliceNum, 1u); + + m_UploadBufferSize = std::max(m_UploadBufferSize, contentSize); } m_UploadBufferSize = Align(m_UploadBufferSize, COPY_ALIGMENT); Result result = Create(); - - if (result != Result::SUCCESS) - return result; - - result = UploadTextures(textureUploadDescs, textureUploadDescNum); - - if (result == Result::SUCCESS) - result = UploadBuffers(bufferUploadDescs, bufferUploadDescNum); + if (result == Result::SUCCESS) { + result = UploadTextures(textureUploadDescs, textureUploadDescNum); + if (result == Result::SUCCESS) + result = UploadBuffers(bufferUploadDescs, bufferUploadDescNum); + } Destroy(); @@ -88,26 +144,25 @@ Result HelperDataUpload::UploadTextures(const TextureUploadDesc* textureUploadDe if (!textureDataDescNum) return Result::SUCCESS; - bool firstTime = true; + bool isInitial = true; uint32_t i = 0; Dim_t arrayOffset = 0; Mip_t mipOffset = 0; - Result result; while (i < textureDataDescNum) { - if (!firstTime) { - result = EndCommandBuffersAndSubmit(); + if (!isInitial) { + Result result = EndCommandBuffersAndSubmit(); if (result != Result::SUCCESS) return result; } - result = BeginCommandBuffers(); + Result result = NRI.BeginCommandBuffer(*m_CommandBuffer, nullptr); if (result != Result::SUCCESS) return result; - if (firstTime) { - DoTransition(textureUploadDescs, textureDataDescNum); - firstTime = false; + if (isInitial) { + DoTransition(NRI, m_CommandBuffer, true, textureUploadDescs, textureDataDescNum); + isInitial = false; } m_UploadBufferOffset = 0; @@ -120,7 +175,7 @@ Result HelperDataUpload::UploadTextures(const TextureUploadDesc* textureUploadDe return Result::OUT_OF_MEMORY; } - DoTransition(textureUploadDescs, textureDataDescNum); + DoTransition(NRI, m_CommandBuffer, false, textureUploadDescs, textureDataDescNum); return EndCommandBuffersAndSubmit(); } @@ -129,25 +184,24 @@ Result HelperDataUpload::UploadBuffers(const BufferUploadDesc* bufferUploadDescs if (!bufferUploadDescNum) return Result::SUCCESS; - bool firstTime = true; + bool isInitial = true; uint32_t i = 0; uint64_t bufferContentOffset = 0; - Result result; while (i < bufferUploadDescNum) { - if (!firstTime) { - result = EndCommandBuffersAndSubmit(); + if (!isInitial) { + Result result = EndCommandBuffersAndSubmit(); if (result != Result::SUCCESS) return result; } - result = BeginCommandBuffers(); + Result result = NRI.BeginCommandBuffer(*m_CommandBuffer, nullptr); if (result != Result::SUCCESS) return result; - if (firstTime) { - DoTransition(bufferUploadDescs, bufferUploadDescNum); - firstTime = false; + if (isInitial) { + DoTransition(NRI, m_CommandBuffer, true, bufferUploadDescs, bufferUploadDescNum); + isInitial = false; } m_UploadBufferOffset = 0; @@ -159,27 +213,27 @@ Result HelperDataUpload::UploadBuffers(const BufferUploadDesc* bufferUploadDescs NRI.UnmapBuffer(*m_UploadBuffer); } - DoTransition(bufferUploadDescs, bufferUploadDescNum); + DoTransition(NRI, m_CommandBuffer, false, bufferUploadDescs, bufferUploadDescNum); return EndCommandBuffersAndSubmit(); } -Result HelperDataUpload::BeginCommandBuffers() { - return NRI.BeginCommandBuffer(*m_CommandBuffer, nullptr); -} - Result HelperDataUpload::EndCommandBuffersAndSubmit() { const Result result = NRI.EndCommandBuffer(*m_CommandBuffer); if (result != Result::SUCCESS) return result; + FenceSubmitDesc fenceSubmitDesc = {}; + fenceSubmitDesc.fence = m_Fence; + fenceSubmitDesc.value = m_FenceValue; + QueueSubmitDesc queueSubmitDesc = {}; queueSubmitDesc.commandBufferNum = 1; queueSubmitDesc.commandBuffers = &m_CommandBuffer; + queueSubmitDesc.signalFences = &fenceSubmitDesc; + queueSubmitDesc.signalFenceNum = 1; NRI.QueueSubmit(m_CommandQueue, queueSubmitDesc); - - NRI.QueueSignal(m_CommandQueue, *m_Fence, m_FenceValue); NRI.Wait(*m_Fence, m_FenceValue); m_FenceValue++; @@ -200,14 +254,14 @@ bool HelperDataUpload::CopyTextureContent(const TextureUploadDesc& textureUpload for (; mipOffset < textureDesc.mipNum; mipOffset++) { const auto& subresource = textureUploadDesc.subresources[arrayOffset * textureDesc.mipNum + mipOffset]; - const uint32_t sliceRowNum = subresource.slicePitch / subresource.rowPitch; - const uint32_t alignedRowPitch = Align(subresource.rowPitch, deviceDesc.uploadBufferTextureRowAlignment); - const uint32_t alignedSlicePitch = Align(sliceRowNum * alignedRowPitch, deviceDesc.uploadBufferTextureSliceAlignment); - const uint64_t mipLevelContentSize = uint64_t(alignedSlicePitch) * subresource.sliceNum; - const uint64_t freeSpace = m_UploadBufferSize - m_UploadBufferOffset; + uint32_t sliceRowNum = subresource.slicePitch / subresource.rowPitch; + uint32_t alignedRowPitch = Align(subresource.rowPitch, deviceDesc.uploadBufferTextureRowAlignment); + uint32_t alignedSlicePitch = Align(sliceRowNum * alignedRowPitch, deviceDesc.uploadBufferTextureSliceAlignment); + uint64_t contentSize = uint64_t(alignedSlicePitch) * subresource.sliceNum; + uint64_t freeSpace = m_UploadBufferSize - m_UploadBufferOffset; - if (mipLevelContentSize > freeSpace) { - isCapacityInsufficient = mipLevelContentSize > m_UploadBufferSize; + if (contentSize > freeSpace) { + isCapacityInsufficient = contentSize > m_UploadBufferSize; return false; } @@ -224,7 +278,7 @@ bool HelperDataUpload::CopyTextureContent(const TextureUploadDesc& textureUpload NRI.CmdUploadBufferToTexture(*m_CommandBuffer, *textureUploadDesc.texture, dstRegion, *m_UploadBuffer, srcDataLayout); - m_UploadBufferOffset = Align(m_UploadBufferOffset + mipLevelContentSize, COPY_ALIGMENT); + m_UploadBufferOffset = Align(m_UploadBufferOffset + contentSize, COPY_ALIGMENT); } mipOffset = 0; } @@ -278,66 +332,3 @@ bool HelperDataUpload::CopyBufferContent(const BufferUploadDesc& bufferUploadDes return true; } - -template -void HelperDataUpload::DoTransition(const TextureUploadDesc* textureUploadDescs, uint32_t textureDataDescNum) { - constexpr uint32_t TEXTURES_PER_PASS = 256; - TextureBarrierDesc textureBarriers[TEXTURES_PER_PASS]; - - const AccessLayoutStage state = {AccessBits::COPY_DESTINATION, Layout::COPY_DESTINATION, StageBits::COPY}; - const AccessLayoutStage initialState = {AccessBits::UNKNOWN, Layout::UNKNOWN, StageBits::NONE}; - - for (uint32_t i = 0; i < textureDataDescNum;) { - const uint32_t passBegin = i; - const uint32_t passEnd = std::min(i + TEXTURES_PER_PASS, textureDataDescNum); - - for (; i < passEnd; i++) { - const TextureUploadDesc& textureUploadDesc = textureUploadDescs[i]; - const TextureDesc& textureDesc = NRI.GetTextureDesc(*textureUploadDesc.texture); - - TextureBarrierDesc& barrier = textureBarriers[i - passBegin]; - barrier = {}; - barrier.texture = textureUploadDesc.texture; - barrier.mipNum = textureDesc.mipNum; - barrier.arraySize = textureDesc.arraySize; - barrier.before = isInitialTransition ? initialState : state; - barrier.after = isInitialTransition ? state : textureUploadDesc.after; - } - - BarrierGroupDesc barrierGroup = {}; - barrierGroup.textures = textureBarriers; - barrierGroup.textureNum = uint16_t(passEnd - passBegin); - - NRI.CmdBarrier(*m_CommandBuffer, barrierGroup); - } -} - -template -void HelperDataUpload::DoTransition(const BufferUploadDesc* bufferUploadDescs, uint32_t bufferUploadDescNum) { - constexpr uint32_t BUFFERS_PER_PASS = 256; - BufferBarrierDesc bufferBarriers[BUFFERS_PER_PASS]; - - const AccessStage state = {AccessBits::COPY_DESTINATION, StageBits::COPY}; - const AccessStage initialState = {AccessBits::UNKNOWN, StageBits::NONE}; - - for (uint32_t i = 0; i < bufferUploadDescNum;) { - const uint32_t passBegin = i; - const uint32_t passEnd = std::min(i + BUFFERS_PER_PASS, bufferUploadDescNum); - - for (; i < passEnd; i++) { - const BufferUploadDesc& bufferUploadDesc = bufferUploadDescs[i]; - - BufferBarrierDesc& barrier = bufferBarriers[i - passBegin]; - barrier = {}; - barrier.buffer = bufferUploadDesc.buffer; - barrier.before = isInitialTransition ? initialState : state; - barrier.after = isInitialTransition ? state : bufferUploadDesc.after; - } - - BarrierGroupDesc barrierGroup = {}; - barrierGroup.buffers = bufferBarriers; - barrierGroup.bufferNum = uint16_t(passEnd - passBegin); - - NRI.CmdBarrier(*m_CommandBuffer, barrierGroup); - } -} diff --git a/Source/Shared/HelperDataUpload.h b/Source/Shared/HelperDataUpload.h index c0cc9435..b9a320de 100644 --- a/Source/Shared/HelperDataUpload.h +++ b/Source/Shared/HelperDataUpload.h @@ -1,24 +1,23 @@ #pragma once +constexpr size_t BASE_UPLOAD_BUFFER_SIZE = 1 * 1024 * 1024; + struct HelperDataUpload { - HelperDataUpload(const nri::CoreInterface& NRI, nri::Device& device, nri::CommandQueue& commandQueue); + inline HelperDataUpload(const nri::CoreInterface& NRI, nri::Device& device, nri::CommandQueue& commandQueue) : + NRI(NRI), m_Device(device), m_CommandQueue(commandQueue), m_UploadBufferSize(BASE_UPLOAD_BUFFER_SIZE) { + } nri::Result UploadData(const nri::TextureUploadDesc* textureDataDescs, uint32_t textureDataDescNum, const nri::BufferUploadDesc* bufferDataDescs, uint32_t bufferDataDescNum); - private: +private: nri::Result Create(); void Destroy(); nri::Result UploadTextures(const nri::TextureUploadDesc* textureDataDescs, uint32_t textureDataDescNum); nri::Result UploadBuffers(const nri::BufferUploadDesc* bufferDataDescs, uint32_t bufferDataDescNum); - nri::Result BeginCommandBuffers(); nri::Result EndCommandBuffersAndSubmit(); bool CopyTextureContent(const nri::TextureUploadDesc& textureDataDesc, nri::Dim_t& arrayOffset, nri::Mip_t& mipOffset, bool& isCapacityInsufficient); void CopyTextureSubresourceContent(const nri::TextureSubresourceUploadDesc& subresource, uint64_t alignedRowPitch, uint64_t alignedSlicePitch); bool CopyBufferContent(const nri::BufferUploadDesc& bufferDataDesc, uint64_t& bufferContentOffset); - template - void DoTransition(const nri::TextureUploadDesc* textureDataDescs, uint32_t textureDataDescNum); - template - void DoTransition(const nri::BufferUploadDesc* bufferDataDescs, uint32_t bufferDataDescNum); const nri::CoreInterface& NRI; nri::Device& m_Device; @@ -32,6 +31,4 @@ struct HelperDataUpload { uint64_t m_UploadBufferSize = 0; uint64_t m_UploadBufferOffset = 0; uint64_t m_FenceValue = 1; - - static constexpr uint64_t COPY_ALIGMENT = 16; }; \ No newline at end of file diff --git a/Source/Shared/HelperDeviceMemoryAllocator.cpp b/Source/Shared/HelperDeviceMemoryAllocator.cpp index 462c59db..17d79d4d 100644 --- a/Source/Shared/HelperDeviceMemoryAllocator.cpp +++ b/Source/Shared/HelperDeviceMemoryAllocator.cpp @@ -1,20 +1,21 @@ #include "SharedExternal.h" +#include "HelperDeviceMemoryAllocator.h" + using namespace nri; -HelperDeviceMemoryAllocator::MemoryTypeGroup::MemoryTypeGroup(const StdAllocator& stdAllocator) - : buffers(stdAllocator), bufferOffsets(stdAllocator), textures(stdAllocator), textureOffsets(stdAllocator), memoryOffset(0) { +HelperDeviceMemoryAllocator::MemoryTypeGroup::MemoryTypeGroup(const StdAllocator& stdAllocator) : + buffers(stdAllocator), bufferOffsets(stdAllocator), textures(stdAllocator), textureOffsets(stdAllocator), memoryOffset(0) { } -HelperDeviceMemoryAllocator::HelperDeviceMemoryAllocator(const CoreInterface& NRI, Device& device, const StdAllocator& stdAllocator) - : m_NRI(NRI), - m_Device(device), - m_StdAllocator(stdAllocator), - m_Map(stdAllocator), - m_DedicatedBuffers(stdAllocator), - m_DedicatedTextures(stdAllocator), - m_BufferBindingDescs(stdAllocator), - m_TextureBindingDescs(stdAllocator) { +HelperDeviceMemoryAllocator::HelperDeviceMemoryAllocator(const CoreInterface& NRI, Device& device) : + m_NRI(NRI), + m_Device(device), + m_Map(((DeviceBase&)device).GetStdAllocator()), + m_DedicatedBuffers(((DeviceBase&)device).GetStdAllocator()), + m_DedicatedTextures(((DeviceBase&)device).GetStdAllocator()), + m_BufferBindingDescs(((DeviceBase&)device).GetStdAllocator()), + m_TextureBindingDescs(((DeviceBase&)device).GetStdAllocator()) { } uint32_t HelperDeviceMemoryAllocator::CalculateAllocationNumber(const ResourceGroupDesc& resourceGroupDesc) { @@ -134,7 +135,7 @@ void HelperDeviceMemoryAllocator::GroupByMemoryType(MemoryLocation memoryLocatio if (memoryDesc.mustBeDedicated) m_DedicatedBuffers.push_back(buffer); else { - MemoryTypeGroup& group = m_Map.try_emplace(memoryDesc.type, m_StdAllocator).first->second; + MemoryTypeGroup& group = m_Map.try_emplace(memoryDesc.type, ((DeviceBase&)m_Device).GetStdAllocator()).first->second; uint64_t offset = Align(group.memoryOffset, memoryDesc.alignment); @@ -157,7 +158,7 @@ void HelperDeviceMemoryAllocator::GroupByMemoryType(MemoryLocation memoryLocatio if (memoryDesc.mustBeDedicated) m_DedicatedTextures.push_back(texture); else { - MemoryTypeGroup& group = m_Map.try_emplace(memoryDesc.type, m_StdAllocator).first->second; + MemoryTypeGroup& group = m_Map.try_emplace(memoryDesc.type, ((DeviceBase&)m_Device).GetStdAllocator()).first->second; if (group.textures.empty() && group.memoryOffset > 0) group.memoryOffset = Align(group.memoryOffset, deviceDesc.bufferTextureGranularity); diff --git a/Source/Shared/HelperDeviceMemoryAllocator.h b/Source/Shared/HelperDeviceMemoryAllocator.h index fa00a233..e80ba303 100644 --- a/Source/Shared/HelperDeviceMemoryAllocator.h +++ b/Source/Shared/HelperDeviceMemoryAllocator.h @@ -4,12 +4,12 @@ template using Map = std::map, StdAllocator>>; struct HelperDeviceMemoryAllocator { - HelperDeviceMemoryAllocator(const nri::CoreInterface& NRI, nri::Device& device, const StdAllocator& stdAllocator); + HelperDeviceMemoryAllocator(const nri::CoreInterface& NRI, nri::Device& device); uint32_t CalculateAllocationNumber(const nri::ResourceGroupDesc& resourceGroupDesc); nri::Result AllocateAndBindMemory(const nri::ResourceGroupDesc& resourceGroupDesc, nri::Memory** allocations); - private: +private: struct MemoryTypeGroup; nri::Result TryToAllocateAndBindMemory(const nri::ResourceGroupDesc& resourceGroupDesc, nri::Memory** allocations, size_t& allocationNum); @@ -32,7 +32,6 @@ struct HelperDeviceMemoryAllocator { const nri::CoreInterface& m_NRI; nri::Device& m_Device; - const StdAllocator& m_StdAllocator; Map m_Map; Vector m_DedicatedBuffers; diff --git a/Source/Shared/HelperWaitIdle.cpp b/Source/Shared/HelperWaitIdle.cpp index e0d2e35a..86553c72 100644 --- a/Source/Shared/HelperWaitIdle.cpp +++ b/Source/Shared/HelperWaitIdle.cpp @@ -1,22 +1,26 @@ #include "SharedExternal.h" +#include "HelperWaitIdle.h" + using namespace nri; -HelperWaitIdle::HelperWaitIdle(const CoreInterface& NRI, Device& device, CommandQueue& commandQueue) : NRI(NRI), m_Device(device), m_CommandQueue(commandQueue) { - NRI.CreateFence(device, 0, m_Fence); -} +Result WaitIdle(const CoreInterface& NRI, Device& device, CommandQueue& commandQueue) { + Fence* fence = nullptr; + Result result = NRI.CreateFence(device, 0, fence); + if (result != Result::SUCCESS) + return result; -HelperWaitIdle::~HelperWaitIdle() { - if (m_Fence) - NRI.DestroyFence(*m_Fence); -} + FenceSubmitDesc fenceSubmitDesc = {}; + fenceSubmitDesc.fence = fence; + fenceSubmitDesc.value = 1; -Result HelperWaitIdle::WaitIdle() { - if (!m_Fence) - return Result::FAILURE; + QueueSubmitDesc queueSubmitDesc = {}; + queueSubmitDesc.signalFences = &fenceSubmitDesc; + queueSubmitDesc.signalFenceNum = 1; - NRI.QueueSignal(m_CommandQueue, *m_Fence, 1); - NRI.Wait(*m_Fence, 1); + NRI.QueueSubmit(commandQueue, queueSubmitDesc); + NRI.Wait(*fence, 1); + NRI.DestroyFence(*fence); return Result::SUCCESS; } diff --git a/Source/Shared/HelperWaitIdle.h b/Source/Shared/HelperWaitIdle.h index 00ea83e9..b3821612 100644 --- a/Source/Shared/HelperWaitIdle.h +++ b/Source/Shared/HelperWaitIdle.h @@ -1,14 +1,3 @@ #pragma once -struct HelperWaitIdle { - HelperWaitIdle(const nri::CoreInterface& NRI, nri::Device& device, nri::CommandQueue& commandQueue); - ~HelperWaitIdle(); - - nri::Result WaitIdle(); - - private: - const nri::CoreInterface& NRI; - nri::Device& m_Device; - nri::CommandQueue& m_CommandQueue; - nri::Fence* m_Fence = nullptr; -}; \ No newline at end of file +nri::Result WaitIdle(const nri::CoreInterface& NRI, nri::Device& device, nri::CommandQueue& commandQueue); diff --git a/Source/Shared/Lock.h b/Source/Shared/Lock.h index 3deace08..d281df93 100644 --- a/Source/Shared/Lock.h +++ b/Source/Shared/Lock.h @@ -30,7 +30,7 @@ struct alignas(LOCK_CACHELINE_SIZE) Lock { m_Atomic.store(0, std::memory_order_release); } - private: +private: std::atomic_uint32_t m_Atomic; }; @@ -43,6 +43,6 @@ struct ExclusiveScope { m_Lock.Release(); } - private: +private: Lock& m_Lock; }; diff --git a/Source/Shared/SharedExternal.cpp b/Source/Shared/SharedExternal.cpp index d0174b2e..d59b0516 100644 --- a/Source/Shared/SharedExternal.cpp +++ b/Source/Shared/SharedExternal.cpp @@ -367,6 +367,30 @@ nri::Result DisplayDescHelper::GetDisplayDesc(void* hwnd, nri::DisplayDesc& disp return nri::Result::SUCCESS; } +bool HasOutput() { + ComPtr factory; + HRESULT hr = CreateDXGIFactory(IID_PPV_ARGS(&factory)); + if (FAILED(hr)) + return false; + + uint32_t i = 0; + while (true) { + // Get adapter + ComPtr adapter; + hr = factory->EnumAdapters(i++, &adapter); + if (hr == DXGI_ERROR_NOT_FOUND) + break; + + // Check if there is an output + ComPtr output; + hr = adapter->EnumOutputs(0, &output); + if (hr != DXGI_ERROR_NOT_FOUND) + return true; + } + + return false; +} + #else uint32_t NRIFormatToDXGIFormat(nri::Format format) { @@ -951,3 +975,8 @@ void ConvertWcharToChar(const wchar_t* in, char* out, size_t outLength) { *out = 0; } + +uint64_t GetSwapChainId() { + static uint64_t id = 0; + return id++ << PRESENT_INDEX_BIT_NUM; +} diff --git a/Source/Shared/SharedExternal.h b/Source/Shared/SharedExternal.h index e8d65546..33452c51 100644 --- a/Source/Shared/SharedExternal.h +++ b/Source/Shared/SharedExternal.h @@ -6,8 +6,10 @@ #include "Extensions/NRIDeviceCreation.h" #include "Extensions/NRIHelper.h" +#include "Extensions/NRILowLatency.h" #include "Extensions/NRIMeshShader.h" #include "Extensions/NRIRayTracing.h" +#include "Extensions/NRIStreamer.h" #include "Extensions/NRISwapChain.h" #include "Extensions/NRIWrapperD3D11.h" #include "Extensions/NRIWrapperD3D12.h" @@ -24,16 +26,15 @@ typedef nri::MemoryAllocatorInterface MemoryAllocatorInterface; #include "StdAllocator.h" -#include "HelperDataUpload.h" -#include "HelperDeviceMemoryAllocator.h" -#include "HelperWaitIdle.h" - #ifdef _WIN32 # include #else typedef uint32_t DXGI_FORMAT; #endif +#define NRI_STRINGIFY_(token) #token +#define NRI_STRINGIFY(token) NRI_STRINGIFY_(token) + #define RETURN_ON_BAD_HRESULT(deviceBase, hr, msg) \ if (FAILED(hr)) { \ (deviceBase)->ReportMessage(nri::Message::TYPE_ERROR, __FILE__, __LINE__, msg##" failed, result = 0x%08X!", hr); \ @@ -58,11 +59,18 @@ typedef uint32_t DXGI_FORMAT; #define NRI_NODE_MASK 0x1 // mGPU is not planned +#define SHADER_EXT_UAV_SLOT 63 // TODO: D3D 11.1 assumed + #include "DeviceBase.h" -constexpr uint32_t COMMAND_QUEUE_TYPE_NUM = (uint32_t)nri::CommandQueueType::MAX_NUM; -constexpr uint32_t DEFAULT_TIMEOUT = 5000; // 5 sec -constexpr uint64_t VK_DEFAULT_TIMEOUT = DEFAULT_TIMEOUT * 1000000ull; +constexpr uint32_t TIMEOUT_PRESENT = 1000; // 1 sec +constexpr uint32_t TIMEOUT_FENCE = 5000; // 5 sec + +constexpr uint64_t PRESENT_INDEX_BIT_NUM = 56ull; + +constexpr uint64_t MsToUs(uint32_t x) { + return x * 1000000ull; +} constexpr void ReturnVoid() { } @@ -71,6 +79,28 @@ template constexpr void MaybeUnused([[maybe_unused]] const Args&... args) { } +// Format conversion +struct DxgiFormat { + DXGI_FORMAT typeless; + DXGI_FORMAT typed; +}; + +struct FormatProps { + uint16_t stride; + uint8_t blockWidth; + bool isInteger; +}; + +const DxgiFormat& GetDxgiFormat(nri::Format format); +const FormatProps& GetFormatProps(nri::Format format); + +nri::Format DXGIFormatToNRIFormat(uint32_t dxgiFormat); +nri::Format VKFormatToNRIFormat(uint32_t vkFormat); + +uint32_t NRIFormatToDXGIFormat(nri::Format format); +uint32_t NRIFormatToVKFormat(nri::Format format); + +// Misc inline nri::Vendor GetVendorFromID(uint32_t vendorID) { switch (vendorID) { case 0x10DE: @@ -84,31 +114,41 @@ inline nri::Vendor GetVendorFromID(uint32_t vendorID) { return nri::Vendor::UNKNOWN; } -struct DxgiFormat { - DXGI_FORMAT typeless; - DXGI_FORMAT typed; -}; +nri::Result GetResultFromHRESULT(long result); -struct FormatProps { - uint16_t stride; - uint8_t blockWidth; - bool isInteger; -}; +inline nri::Dim_t GetDimension(nri::GraphicsAPI api, const nri::TextureDesc& textureDesc, nri::Dim_t dimensionIndex, nri::Mip_t mip) { + assert(dimensionIndex < 3); -const DxgiFormat& GetDxgiFormat(nri::Format format); -const FormatProps& GetFormatProps(nri::Format format); + nri::Dim_t dim = textureDesc.depth; + if (dimensionIndex == 0) + dim = textureDesc.width; + else if (dimensionIndex == 1) + dim = textureDesc.height; -void CheckAndSetDefaultCallbacks(nri::CallbackInterface& callbackInterface); + dim = (nri::Dim_t)std::max(dim >> mip, 1); + + // TODO: VK doesn't require manual alignment, but probably we should use it here and during texture creation + if (api != nri::GraphicsAPI::VULKAN) + dim = Align(dim, dimensionIndex < 2 ? GetFormatProps(textureDesc.format).blockWidth : 1); + + return dim; +} + +// String conversion void ConvertCharToWchar(const char* in, wchar_t* out, size_t outLen); void ConvertWcharToChar(const wchar_t* in, char* out, size_t outLen); -nri::Result GetResultFromHRESULT(long result); -nri::Format DXGIFormatToNRIFormat(uint32_t dxgiFormat); -nri::Format VKFormatToNRIFormat(uint32_t vkFormat); +// Callbacks setup +void CheckAndSetDefaultCallbacks(nri::CallbackInterface& callbackInterface); -uint32_t NRIFormatToDXGIFormat(nri::Format format); -uint32_t NRIFormatToVKFormat(nri::Format format); +// Swap chain ID +uint64_t GetSwapChainId(); +inline uint64_t GetPresentIndex(uint64_t presentId) { + return presentId & ((1ull << PRESENT_INDEX_BIT_NUM) - 1ull); +} + +// Shared library struct Library; Library* LoadSharedLibrary(const char* path); void* GetSharedLibraryFunction(Library& library, const char* name); @@ -215,7 +255,7 @@ struct ComPtr { return m_ComPtr == lComPtr; } - protected: +protected: T* m_ComPtr; }; @@ -318,11 +358,13 @@ constexpr nri::FormatSupportBits D3D_FORMAT_SUPPORT_TABLE[] = { static_assert(GetCountOf(D3D_FORMAT_SUPPORT_TABLE) == (size_t)nri::Format::MAX_NUM, "some format is missing"); +bool HasOutput(); + struct DisplayDescHelper { - public: +public: nri::Result GetDisplayDesc(void* hwnd, nri::DisplayDesc& displayDesc); - protected: +protected: ComPtr m_DxgiFactory2; nri::DisplayDesc m_DisplayDesc = {}; bool m_HasDisplayDesc = false; diff --git a/Source/Shared/StdAllocator.h b/Source/Shared/StdAllocator.h index df8c8f99..fe9ec25f 100644 --- a/Source/Shared/StdAllocator.h +++ b/Source/Shared/StdAllocator.h @@ -135,7 +135,7 @@ struct StdAllocator { template using other = StdAllocator; - private: +private: MemoryAllocatorInterface m_Interface = {}; }; diff --git a/Source/Shared/Streamer.cpp b/Source/Shared/Streamer.cpp new file mode 100644 index 00000000..ca095268 --- /dev/null +++ b/Source/Shared/Streamer.cpp @@ -0,0 +1,246 @@ +#include "SharedExternal.h" + +#include "Streamer.h" + +using namespace nri; + +constexpr uint64_t CHUNK_SIZE = 65536; + +StreamerImpl::~StreamerImpl() { + for (GarbageInFlight& garbageInFlight : m_GarbageInFlight) { + m_NRI.DestroyBuffer(*garbageInFlight.buffer); + m_NRI.FreeMemory(*garbageInFlight.memory); + } + + m_NRI.DestroyBuffer(*m_ConstantBuffer); + m_NRI.DestroyBuffer(*m_DynamicBuffer); + + m_NRI.FreeMemory(*m_ConstantBufferMemory); + m_NRI.FreeMemory(*m_DynamicBufferMemory); +} + +Result StreamerImpl::Create(const StreamerDesc& desc) { + if (desc.constantBufferSize) { + // Create constant buffer + BufferDesc bufferDesc = {}; + bufferDesc.size = desc.constantBufferSize; + bufferDesc.usageMask = BufferUsageBits::CONSTANT_BUFFER; + + Result result = m_NRI.CreateBuffer(m_Device, bufferDesc, m_ConstantBuffer); + if (result != Result::SUCCESS) + return result; + + // Allocate memory + MemoryDesc memoryDesc = {}; + m_NRI.GetBufferMemoryInfo(*m_ConstantBuffer, desc.constantBufferMemoryLocation, memoryDesc); + + result = m_NRI.AllocateMemory(m_Device, memoryDesc.type, memoryDesc.size, m_ConstantBufferMemory); + if (result != Result::SUCCESS) + return result; + + // Bind to memory + BufferMemoryBindingDesc memoryBindingDesc = {}; + memoryBindingDesc.buffer = m_ConstantBuffer; + memoryBindingDesc.memory = m_ConstantBufferMemory; + + result = m_NRI.BindBufferMemory(m_Device, &memoryBindingDesc, 1); + if (result != Result::SUCCESS) + return result; + } + + m_Desc = desc; + + return Result::SUCCESS; +} + +uint32_t StreamerImpl::UpdateStreamerConstantBuffer(const void* data, uint32_t dataSize) { + const DeviceDesc& deviceDesc = m_NRI.GetDeviceDesc(m_Device); + uint32_t alignedSize = Align(dataSize, deviceDesc.constantBufferOffsetAlignment); + + // Update + if (m_ConstantDataOffset + alignedSize > m_Desc.constantBufferSize) + m_ConstantDataOffset = 0; + + uint32_t offset = m_ConstantDataOffset; + m_ConstantDataOffset += alignedSize; + + // Copy + uint8_t* dest = (uint8_t*)m_NRI.MapBuffer(*m_ConstantBuffer, offset, alignedSize); + if (dest) { + memcpy(dest, data, dataSize); + m_NRI.UnmapBuffer(*m_ConstantBuffer); + } + + return offset; +} + +uint64_t StreamerImpl::AddStreamerBufferUpdateRequest(const BufferUpdateRequestDesc& bufferUpdateRequestDesc) { + uint64_t alignedSize = Align(bufferUpdateRequestDesc.dataSize, 16); + + uint64_t offset = m_DynamicDataOffsetBase + m_DynamicDataOffset; + m_BufferRequests.push_back({bufferUpdateRequestDesc, m_DynamicDataOffset}); // store local offset + m_DynamicDataOffset += alignedSize; + + return offset; +} + +uint64_t StreamerImpl::AddStreamerTextureUpdateRequest(const TextureUpdateRequestDesc& textureUpdateRequestDesc) { + const DeviceDesc& deviceDesc = m_NRI.GetDeviceDesc(m_Device); + const TextureDesc& textureDesc = m_NRI.GetTextureDesc(*textureUpdateRequestDesc.dstTexture); + + Dim_t h = textureUpdateRequestDesc.dstRegionDesc.height; + h = h == WHOLE_SIZE ? GetDimension(deviceDesc.graphicsAPI, textureDesc, 1, textureUpdateRequestDesc.dstRegionDesc.mipOffset) : h; + + Dim_t d = textureUpdateRequestDesc.dstRegionDesc.depth; + d = d == WHOLE_SIZE ? GetDimension(deviceDesc.graphicsAPI, textureDesc, 2, textureUpdateRequestDesc.dstRegionDesc.mipOffset) : d; + + uint32_t alignedRowPitch = Align(textureUpdateRequestDesc.dataRowPitch, deviceDesc.uploadBufferTextureRowAlignment); + uint32_t alignedSlicePitch = Align(alignedRowPitch * h, deviceDesc.uploadBufferTextureSliceAlignment); + uint64_t alignedSize = alignedSlicePitch * d; + + uint64_t offset = m_DynamicDataOffsetBase + m_DynamicDataOffset; + m_TextureRequests.push_back({textureUpdateRequestDesc, m_DynamicDataOffset}); // store local offset + m_DynamicDataOffset += alignedSize; + + return offset; +} + +Result StreamerImpl::CopyStreamerUpdateRequests() { + if (!m_DynamicDataOffset) + return Result::SUCCESS; + + // Process garbage + for (size_t i = 0; i < m_GarbageInFlight.size(); i++) { + GarbageInFlight& garbageInFlight = m_GarbageInFlight[i]; + if (garbageInFlight.frameNum < m_Desc.frameInFlightNum) + garbageInFlight.frameNum++; + else { + m_NRI.DestroyBuffer(*garbageInFlight.buffer); + m_NRI.FreeMemory(*garbageInFlight.memory); + + m_GarbageInFlight[i--] = m_GarbageInFlight.back(); + m_GarbageInFlight.pop_back(); + } + } + + // Grow + if (m_DynamicDataOffsetBase + m_DynamicDataOffset > m_DynamicBufferSize) { + m_DynamicBufferSize = Align(m_DynamicDataOffset, CHUNK_SIZE) * (m_Desc.frameInFlightNum + 1); + + // Add the current buffer to the garbage collector immediately, but keep it alive for some frames + if (m_DynamicBuffer) + m_GarbageInFlight.push_back({m_DynamicBuffer, m_DynamicBufferMemory, 0}); + + { // Create new dynamic buffer + BufferDesc bufferDesc = {}; + bufferDesc.size = m_DynamicBufferSize; + bufferDesc.usageMask = m_Desc.dynamicBufferUsageBits; + + Result result = m_NRI.CreateBuffer(m_Device, bufferDesc, m_DynamicBuffer); + if (result != Result::SUCCESS) + return result; + } + + { // Allocate memory + MemoryDesc memoryDesc = {}; + m_NRI.GetBufferMemoryInfo(*m_DynamicBuffer, m_Desc.dynamicBufferMemoryLocation, memoryDesc); + + Result result = m_NRI.AllocateMemory(m_Device, memoryDesc.type, memoryDesc.size, m_DynamicBufferMemory); + if (result != Result::SUCCESS) + return result; + } + + { // Bind to memory + BufferMemoryBindingDesc memoryBindingDesc = {}; + memoryBindingDesc.buffer = m_DynamicBuffer; + memoryBindingDesc.memory = m_DynamicBufferMemory; + + Result result = m_NRI.BindBufferMemory(m_Device, &memoryBindingDesc, 1); + if (result != Result::SUCCESS) + return result; + } + } + + // Concatenate & copy to the internal buffer, gather requests with destinations + uint8_t* data = (uint8_t*)m_NRI.MapBuffer(*m_DynamicBuffer, m_DynamicDataOffsetBase, m_DynamicDataOffset); + if (data) { + const DeviceDesc& deviceDesc = m_NRI.GetDeviceDesc(m_Device); + + // Buffers + for (BufferUpdateRequest& request : m_BufferRequests) { + uint8_t* dst = data + request.offset; + memcpy(dst, request.desc.data, request.desc.dataSize); + + if (request.desc.dstBuffer) { + request.offset += m_DynamicDataOffsetBase; // convert to global offset + m_BufferRequestsWithDst.push_back(request); + } + } + + // Textures + for (TextureUpdateRequest& request : m_TextureRequests) { + uint8_t* dst = data + request.offset; + const TextureDesc& textureDesc = m_NRI.GetTextureDesc(*request.desc.dstTexture); + + Dim_t h = request.desc.dstRegionDesc.height; + h = h == WHOLE_SIZE ? GetDimension(deviceDesc.graphicsAPI, textureDesc, 1, request.desc.dstRegionDesc.mipOffset) : h; + + Dim_t d = request.desc.dstRegionDesc.depth; + d = d == WHOLE_SIZE ? GetDimension(deviceDesc.graphicsAPI, textureDesc, 2, request.desc.dstRegionDesc.mipOffset) : d; + + uint32_t alignedRowPitch = Align(request.desc.dataRowPitch, deviceDesc.uploadBufferTextureRowAlignment); + uint32_t alignedSlicePitch = Align(alignedRowPitch * h, deviceDesc.uploadBufferTextureSliceAlignment); + + for (uint32_t z = 0; z < d; z++) { + for (uint32_t y = 0; y < h; y++) { + uint8_t* dstRow = dst + z * alignedSlicePitch + y * alignedRowPitch; + const uint8_t* srcRow = (uint8_t*)request.desc.data + z * request.desc.dataSlicePitch + y * request.desc.dataRowPitch; + memcpy(dstRow, srcRow, request.desc.dataRowPitch); + } + } + + if (request.desc.dstTexture) { + request.offset += m_DynamicDataOffsetBase; // convert to global offset + m_TextureRequestsWithDst.push_back(request); + } + } + + m_NRI.UnmapBuffer(*m_DynamicBuffer); + } else + return Result::FAILURE; + + // Cleanup + m_BufferRequests.clear(); + m_TextureRequests.clear(); + + m_FrameIndex = (m_FrameIndex + 1) % (m_Desc.frameInFlightNum + 1); + + if (m_FrameIndex == 0) + m_DynamicDataOffsetBase = 0; + else + m_DynamicDataOffsetBase += m_DynamicDataOffset; + + m_DynamicDataOffset = 0; + + return Result::SUCCESS; +} + +void StreamerImpl::CmdUploadStreamerUpdateRequests(CommandBuffer& commandBuffer) { + // Buffers + for (const BufferUpdateRequest& request : m_BufferRequestsWithDst) + m_NRI.CmdCopyBuffer(commandBuffer, *request.desc.dstBuffer, request.desc.dstBufferOffset, *m_DynamicBuffer, request.offset, request.desc.dataSize); + + // Textures + for (const TextureUpdateRequest& request : m_TextureRequestsWithDst) { + TextureDataLayoutDesc dataLayout = {}; + dataLayout.offset = request.offset; + dataLayout.rowPitch = request.desc.dataRowPitch; + dataLayout.slicePitch = request.desc.dataSlicePitch; + + m_NRI.CmdUploadBufferToTexture(commandBuffer, *request.desc.dstTexture, request.desc.dstRegionDesc, *m_DynamicBuffer, dataLayout); + } + + // Cleanup + m_BufferRequestsWithDst.clear(); + m_TextureRequestsWithDst.clear(); +} diff --git a/Source/Shared/Streamer.h b/Source/Shared/Streamer.h new file mode 100644 index 00000000..b8383add --- /dev/null +++ b/Source/Shared/Streamer.h @@ -0,0 +1,69 @@ +#pragma once + +struct BufferUpdateRequest { + nri::BufferUpdateRequestDesc desc; + uint64_t offset; +}; + +struct TextureUpdateRequest { + nri::TextureUpdateRequestDesc desc; + uint64_t offset; +}; + +struct GarbageInFlight { + nri::Buffer* buffer; + nri::Memory* memory; + uint32_t frameNum; +}; + +struct StreamerImpl { + inline StreamerImpl(nri::Device& device, const nri::CoreInterface& NRI) : + m_Device(device), + m_NRI(NRI), + m_BufferRequests(((nri::DeviceBase&)device).GetStdAllocator()), + m_BufferRequestsWithDst(((nri::DeviceBase&)device).GetStdAllocator()), + m_TextureRequests(((nri::DeviceBase&)device).GetStdAllocator()), + m_TextureRequestsWithDst(((nri::DeviceBase&)device).GetStdAllocator()), + m_GarbageInFlight(((nri::DeviceBase&)device).GetStdAllocator()) { + } + + inline nri::Buffer* GetDynamicBuffer() { + return m_DynamicBuffer; + } + + inline nri::Buffer* GetConstantBuffer() { + return m_ConstantBuffer; + } + + inline nri::Device& GetDevice() { + return m_Device; + } + + ~StreamerImpl(); + + nri::Result Create(const nri::StreamerDesc& desc); + uint32_t UpdateStreamerConstantBuffer(const void* data, uint32_t dataSize); + uint64_t AddStreamerBufferUpdateRequest(const nri::BufferUpdateRequestDesc& bufferUpdateRequestDesc); + uint64_t AddStreamerTextureUpdateRequest(const nri::TextureUpdateRequestDesc& textureUpdateRequestDesc); + nri::Result CopyStreamerUpdateRequests(); + void CmdUploadStreamerUpdateRequests(nri::CommandBuffer& commandBuffer); + +private: + nri::Device& m_Device; + const nri::CoreInterface& m_NRI; + nri::StreamerDesc m_Desc = {}; + Vector m_BufferRequests; + Vector m_BufferRequestsWithDst; + Vector m_TextureRequests; + Vector m_TextureRequestsWithDst; + Vector m_GarbageInFlight; + nri::Buffer* m_ConstantBuffer = nullptr; + nri::Memory* m_ConstantBufferMemory = nullptr; + nri::Buffer* m_DynamicBuffer = nullptr; + nri::Memory* m_DynamicBufferMemory = nullptr; + uint32_t m_ConstantDataOffset = 0; + uint64_t m_DynamicDataOffset = 0; + uint64_t m_DynamicDataOffsetBase = 0; + uint64_t m_DynamicBufferSize = 0; + uint32_t m_FrameIndex = 0; +}; \ No newline at end of file diff --git a/Source/VK/AccelerationStructureVK.h b/Source/VK/AccelerationStructureVK.h index f48ab6e4..84c0ae71 100644 --- a/Source/VK/AccelerationStructureVK.h +++ b/Source/VK/AccelerationStructureVK.h @@ -49,11 +49,11 @@ struct AccelerationStructureVK { void GetMemoryInfo(MemoryDesc& memoryDesc) const; Result CreateDescriptor(Descriptor*& descriptor) const; - private: +private: void PrecreateBottomLevel(const AccelerationStructureDesc& accelerationStructureDesc); void PrecreateTopLevel(const AccelerationStructureDesc& accelerationStructureDesc); - private: +private: DeviceVK& m_Device; VkAccelerationStructureKHR m_Handle = VK_NULL_HANDLE; VkDeviceAddress m_DeviceAddress = 0; diff --git a/Source/VK/BufferVK.h b/Source/VK/BufferVK.h index 68eb3c92..47fbd8f3 100644 --- a/Source/VK/BufferVK.h +++ b/Source/VK/BufferVK.h @@ -43,7 +43,7 @@ struct BufferVK { void* Map(uint64_t offset, uint64_t size); void Unmap(); - private: +private: DeviceVK& m_Device; VkBuffer m_Handle = VK_NULL_HANDLE; VkDeviceAddress m_DeviceAddress = {}; diff --git a/Source/VK/BufferVK.hpp b/Source/VK/BufferVK.hpp index e52f05ba..814884d2 100644 --- a/Source/VK/BufferVK.hpp +++ b/Source/VK/BufferVK.hpp @@ -27,4 +27,4 @@ static void NRI_CALL UnmapBuffer(Buffer& buffer) { #pragma endregion -Define_Core_Buffer_PartiallyFillFunctionTable(VK) +Define_Core_Buffer_PartiallyFillFunctionTable(VK); diff --git a/Source/VK/CommandAllocatorVK.h b/Source/VK/CommandAllocatorVK.h index 5f988036..fce19e76 100644 --- a/Source/VK/CommandAllocatorVK.h +++ b/Source/VK/CommandAllocatorVK.h @@ -31,7 +31,7 @@ struct CommandAllocatorVK { Result CreateCommandBuffer(CommandBuffer*& commandBuffer); void Reset(); - private: +private: DeviceVK& m_Device; VkCommandPool m_Handle = VK_NULL_HANDLE; CommandQueueType m_Type = (CommandQueueType)0; diff --git a/Source/VK/CommandAllocatorVK.hpp b/Source/VK/CommandAllocatorVK.hpp index 40c4c6ca..6feb16f0 100644 --- a/Source/VK/CommandAllocatorVK.hpp +++ b/Source/VK/CommandAllocatorVK.hpp @@ -16,4 +16,4 @@ static void NRI_CALL ResetCommandAllocator(CommandAllocator& commandAllocator) { #pragma endregion -Define_Core_CommandAllocator_PartiallyFillFunctionTable(VK) +Define_Core_CommandAllocator_PartiallyFillFunctionTable(VK); diff --git a/Source/VK/CommandBufferVK.cpp b/Source/VK/CommandBufferVK.cpp index 5a9c7073..052ef175 100644 --- a/Source/VK/CommandBufferVK.cpp +++ b/Source/VK/CommandBufferVK.cpp @@ -512,65 +512,6 @@ inline void CommandBufferVK::DispatchIndirect(const Buffer& buffer, uint64_t off vk.CmdDispatchIndirect(m_Handle, bufferImpl.GetHandle(), offset); } -static inline VkPipelineStageFlags2 GetPipelineStageFlags(StageBits stageBits) { - // Check non-mask values first - if (stageBits == StageBits::ALL) - return VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; - - if (stageBits == StageBits::NONE) - return VK_PIPELINE_STAGE_2_NONE; - - // Gather bits - VkPipelineStageFlags2 flags = 0; - - if (stageBits & StageBits::INDEX_INPUT) - flags |= VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT; - - if (stageBits & StageBits::VERTEX_SHADER) - flags |= VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT; - - if (stageBits & StageBits::TESS_CONTROL_SHADER) - flags |= VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT; - - if (stageBits & StageBits::TESS_EVALUATION_SHADER) - flags |= VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT; - - if (stageBits & StageBits::GEOMETRY_SHADER) - flags |= VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT; - - if (stageBits & StageBits::MESH_CONTROL_SHADER) - flags |= VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT; - - if (stageBits & StageBits::MESH_EVALUATION_SHADER) - flags |= VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT; - - if (stageBits & StageBits::FRAGMENT_SHADER) - flags |= VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT; - - if (stageBits & StageBits::DEPTH_STENCIL_ATTACHMENT) - flags |= VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT; - - if (stageBits & StageBits::COLOR_ATTACHMENT) - flags |= VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; - - if (stageBits & StageBits::COMPUTE_SHADER) - flags |= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; - - if (stageBits & StageBits::RAY_TRACING_SHADERS) - flags |= VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR; - - if (stageBits & StageBits::INDIRECT) - flags |= VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT; - - if (stageBits & (StageBits::COPY | StageBits::CLEAR_STORAGE)) - flags |= VK_PIPELINE_STAGE_2_TRANSFER_BIT; - - if (stageBits & StageBits::ACCELERATION_STRUCTURE) - flags |= VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR; - - return flags; -} - static inline VkAccessFlags2 GetAccessFlags(AccessBits accessBits) { VkAccessFlags2 flags = 0; diff --git a/Source/VK/CommandBufferVK.h b/Source/VK/CommandBufferVK.h index ba8840c7..10bf8673 100644 --- a/Source/VK/CommandBufferVK.h +++ b/Source/VK/CommandBufferVK.h @@ -102,10 +102,10 @@ struct CommandBufferVK { void DrawMeshTasks(const DrawMeshTasksDesc& drawMeshTasksDesc); void DrawMeshTasksIndirect(const Buffer& buffer, uint64_t offset, uint32_t drawNum, uint32_t stride); - private: +private: void CopyWholeTexture(const TextureVK& dstTexture, const TextureVK& srcTexture); - private: +private: DeviceVK& m_Device; const PipelineVK* m_CurrentPipeline = nullptr; const PipelineLayoutVK* m_CurrentPipelineLayout = nullptr; diff --git a/Source/VK/CommandBufferVK.hpp b/Source/VK/CommandBufferVK.hpp index c12c2717..970c71ef 100644 --- a/Source/VK/CommandBufferVK.hpp +++ b/Source/VK/CommandBufferVK.hpp @@ -228,5 +228,6 @@ static void NRI_CALL CmdDrawMeshTasksIndirect(CommandBuffer& commandBuffer, cons #pragma endregion -Define_Core_CommandBuffer_PartiallyFillFunctionTable(VK) Define_RayTracing_CommandBuffer_PartiallyFillFunctionTable(VK) - Define_MeshShader_CommandBuffer_PartiallyFillFunctionTable(VK) +Define_Core_CommandBuffer_PartiallyFillFunctionTable(VK); +Define_RayTracing_CommandBuffer_PartiallyFillFunctionTable(VK); +Define_MeshShader_CommandBuffer_PartiallyFillFunctionTable(VK); diff --git a/Source/VK/CommandQueueVK.cpp b/Source/VK/CommandQueueVK.cpp index 57dead1c..1916fe6a 100644 --- a/Source/VK/CommandQueueVK.cpp +++ b/Source/VK/CommandQueueVK.cpp @@ -4,6 +4,9 @@ #include "CommandBufferVK.h" #include "CommandQueueVK.h" +#include "FenceVK.h" +#include "HelperDataUpload.h" +#include "SwapChainVK.h" using namespace nri; @@ -23,17 +26,48 @@ inline void CommandQueueVK::SetDebugName(const char* name) { m_Device.SetDebugNameToTrivialObject(VK_OBJECT_TYPE_QUEUE, (uint64_t)m_Handle, name); } -inline void CommandQueueVK::Submit(const QueueSubmitDesc& queueSubmitDesc) { +inline void CommandQueueVK::Submit(const QueueSubmitDesc& queueSubmitDesc, const SwapChain* swapChain) { ExclusiveScope lock(m_Lock); - VkCommandBuffer* commandBuffers = STACK_ALLOC(VkCommandBuffer, queueSubmitDesc.commandBufferNum); - for (uint32_t i = 0; i < queueSubmitDesc.commandBufferNum; i++) - commandBuffers[i] = *(CommandBufferVK*)queueSubmitDesc.commandBuffers[i]; - - VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr, 0, nullptr, nullptr, queueSubmitDesc.commandBufferNum, commandBuffers, 0, nullptr}; + VkSemaphoreSubmitInfo* waitSemaphores = STACK_ALLOC(VkSemaphoreSubmitInfo, queueSubmitDesc.waitFenceNum); + for (uint32_t i = 0; i < queueSubmitDesc.waitFenceNum; i++) { + waitSemaphores[i] = {VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO}; + waitSemaphores[i].semaphore = *(FenceVK*)queueSubmitDesc.waitFences[i].fence; + waitSemaphores[i].value = queueSubmitDesc.waitFences[i].value; + waitSemaphores[i].stageMask = GetPipelineStageFlags(queueSubmitDesc.waitFences[i].stages); + } + + VkCommandBufferSubmitInfo* commandBuffers = STACK_ALLOC(VkCommandBufferSubmitInfo, queueSubmitDesc.commandBufferNum); + for (uint32_t i = 0; i < queueSubmitDesc.commandBufferNum; i++) { + commandBuffers[i] = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO}; + commandBuffers[i].commandBuffer = *(CommandBufferVK*)queueSubmitDesc.commandBuffers[i]; + commandBuffers[i].deviceMask = NRI_NODE_MASK; + } + + VkSemaphoreSubmitInfo* signalSemaphores = STACK_ALLOC(VkSemaphoreSubmitInfo, queueSubmitDesc.signalFenceNum); + for (uint32_t i = 0; i < queueSubmitDesc.signalFenceNum; i++) { + signalSemaphores[i] = {VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO}; + signalSemaphores[i].semaphore = *(FenceVK*)queueSubmitDesc.signalFences[i].fence; + signalSemaphores[i].value = queueSubmitDesc.signalFences[i].value; + signalSemaphores[i].stageMask = GetPipelineStageFlags(queueSubmitDesc.signalFences[i].stages); + } + + VkSubmitInfo2 submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO_2}; + submitInfo.waitSemaphoreInfoCount = queueSubmitDesc.waitFenceNum; + submitInfo.pWaitSemaphoreInfos = waitSemaphores; + submitInfo.commandBufferInfoCount = queueSubmitDesc.commandBufferNum; + submitInfo.pCommandBufferInfos = commandBuffers; + submitInfo.signalSemaphoreInfoCount = queueSubmitDesc.signalFenceNum; + submitInfo.pSignalSemaphoreInfos = signalSemaphores; + + VkLatencySubmissionPresentIdNV presentId = {VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV}; + if (swapChain && m_Device.m_IsPresentIdSupported) { + presentId.presentID = ((const SwapChainVK*)swapChain)->GetPresentId(); + submitInfo.pNext = &presentId; + } const auto& vk = m_Device.GetDispatchTable(); - VkResult result = vk.QueueSubmit(m_Handle, 1, &submitInfo, VK_NULL_HANDLE); + VkResult result = vk.QueueSubmit2(m_Handle, 1, &submitInfo, VK_NULL_HANDLE); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, ReturnVoid(), "Submit: vkQueueSubmit returned %d", (int32_t)result); } diff --git a/Source/VK/CommandQueueVK.h b/Source/VK/CommandQueueVK.h index 068557b5..e17662ca 100644 --- a/Source/VK/CommandQueueVK.h +++ b/Source/VK/CommandQueueVK.h @@ -10,8 +10,8 @@ struct CommandQueueVK { inline CommandQueueVK(DeviceVK& device) : m_Device(device) { } - inline CommandQueueVK(DeviceVK& device, VkQueue queue, uint32_t familyIndex, CommandQueueType type) - : m_Device(device), m_FamilyIndex(familyIndex), m_Type(type), m_Handle(queue) { + inline CommandQueueVK(DeviceVK& device, VkQueue queue, uint32_t familyIndex, CommandQueueType type) : + m_Device(device), m_FamilyIndex(familyIndex), m_Type(type), m_Handle(queue) { } inline operator VkQueue() const { @@ -41,11 +41,11 @@ struct CommandQueueVK { //================================================================================================================ void SetDebugName(const char* name); - void Submit(const QueueSubmitDesc& queueSubmitDesc); + void Submit(const QueueSubmitDesc& queueSubmitDesc, const SwapChain* swapChain); Result UploadData(const TextureUploadDesc* textureUploadDescs, uint32_t textureUploadDescNum, const BufferUploadDesc* bufferUploadDescs, uint32_t bufferUploadDescNum); Result WaitForIdle(); - private: +private: DeviceVK& m_Device; VkQueue m_Handle = VK_NULL_HANDLE; uint32_t m_FamilyIndex = (uint32_t)-1; diff --git a/Source/VK/CommandQueueVK.hpp b/Source/VK/CommandQueueVK.hpp index e76ad163..2521f453 100644 --- a/Source/VK/CommandQueueVK.hpp +++ b/Source/VK/CommandQueueVK.hpp @@ -7,7 +7,7 @@ static void NRI_CALL SetCommandQueueDebugName(CommandQueue& commandQueue, const } static void NRI_CALL QueueSubmit(CommandQueue& commandQueue, const QueueSubmitDesc& workSubmissionDesc) { - ((CommandQueueVK&)commandQueue).Submit(workSubmissionDesc); + ((CommandQueueVK&)commandQueue).Submit(workSubmissionDesc, nullptr); } #pragma endregion @@ -28,4 +28,14 @@ static Result NRI_CALL WaitForIdle(CommandQueue& commandQueue) { #pragma endregion -Define_Core_CommandQueue_PartiallyFillFunctionTable(VK) Define_Helper_CommandQueue_PartiallyFillFunctionTable(VK) +#pragma region[ Low latency ] + +static void NRI_CALL QueueSubmitTrackable(CommandQueue& commandQueue, const QueueSubmitDesc& workSubmissionDesc, const SwapChain& swapChain) { + ((CommandQueueVK&)commandQueue).Submit(workSubmissionDesc, &swapChain); +} + +#pragma endregion + +Define_Core_CommandQueue_PartiallyFillFunctionTable(VK); +Define_Helper_CommandQueue_PartiallyFillFunctionTable(VK); +Define_LowLatency_CommandQueue_PartiallyFillFunctionTable(VK); diff --git a/Source/VK/ConversionVK.h b/Source/VK/ConversionVK.h index 80e02f07..a7917395 100644 --- a/Source/VK/ConversionVK.h +++ b/Source/VK/ConversionVK.h @@ -32,7 +32,7 @@ constexpr std::array DESCRI VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, // TEXTURE VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, // STORAGE_TEXTURE VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, // BUFFER - VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // STORAGE_BUFFER + VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, // STORAGE_BUFFER VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // STRUCTURED_BUFFER VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // STORAGE_STRUCTURED_BUFFER VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, // ACCELERATION_STRUCTURE @@ -42,6 +42,65 @@ constexpr VkDescriptorType GetDescriptorType(DescriptorType type) { return DESCRIPTOR_TYPES[(uint32_t)type]; } +constexpr VkPipelineStageFlags2 GetPipelineStageFlags(StageBits stageBits) { + // Check non-mask values first + if (stageBits == StageBits::ALL) + return VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + + if (stageBits == StageBits::NONE) + return VK_PIPELINE_STAGE_2_NONE; + + // Gather bits + VkPipelineStageFlags2 flags = 0; + + if (stageBits & StageBits::INDEX_INPUT) + flags |= VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT; + + if (stageBits & StageBits::VERTEX_SHADER) + flags |= VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT; + + if (stageBits & StageBits::TESS_CONTROL_SHADER) + flags |= VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT; + + if (stageBits & StageBits::TESS_EVALUATION_SHADER) + flags |= VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT; + + if (stageBits & StageBits::GEOMETRY_SHADER) + flags |= VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT; + + if (stageBits & StageBits::MESH_CONTROL_SHADER) + flags |= VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT; + + if (stageBits & StageBits::MESH_EVALUATION_SHADER) + flags |= VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT; + + if (stageBits & StageBits::FRAGMENT_SHADER) + flags |= VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT; + + if (stageBits & StageBits::DEPTH_STENCIL_ATTACHMENT) + flags |= VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT; + + if (stageBits & StageBits::COLOR_ATTACHMENT) + flags |= VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT; + + if (stageBits & StageBits::COMPUTE_SHADER) + flags |= VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; + + if (stageBits & StageBits::RAY_TRACING_SHADERS) + flags |= VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR; + + if (stageBits & StageBits::INDIRECT) + flags |= VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT; + + if (stageBits & (StageBits::COPY | StageBits::CLEAR_STORAGE)) + flags |= VK_PIPELINE_STAGE_2_TRANSFER_BIT; + + if (stageBits & StageBits::ACCELERATION_STRUCTURE) + flags |= VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR; + + return flags; +} + constexpr VkShaderStageFlags GetShaderStageFlags(StageBits stage) { if (stage & StageBits::VERTEX_SHADER) return VK_SHADER_STAGE_VERTEX_BIT; diff --git a/Source/VK/DescriptorPoolVK.cpp b/Source/VK/DescriptorPoolVK.cpp index 748bda54..2a87d5d0 100644 --- a/Source/VK/DescriptorPoolVK.cpp +++ b/Source/VK/DescriptorPoolVK.cpp @@ -90,7 +90,6 @@ inline Result DescriptorPoolVK::AllocateDescriptorSets( for (size_t i = 0; i < newSetNum; i++) { m_AllocatedSets[prevSetNum + i] = (DescriptorSetVK*)lowLevelAllocator.Allocate(lowLevelAllocator.userArg, sizeof(DescriptorSetVK), alignof(DescriptorSetVK)); - Construct(m_AllocatedSets[prevSetNum + i], 1, m_Device); } } @@ -103,24 +102,25 @@ inline Result DescriptorPoolVK::AllocateDescriptorSets( const DescriptorSetDesc& setDesc = pipelineLayoutVK.GetRuntimeBindingInfo().descriptorSetDescs[setIndexInPipelineLayout]; const bool hasVariableDescriptorNum = pipelineLayoutVK.GetRuntimeBindingInfo().hasVariableDescriptorNum[setIndexInPipelineLayout]; - VkDescriptorSetVariableDescriptorCountAllocateInfoEXT variableDescriptorCountInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT}; + VkDescriptorSetVariableDescriptorCountAllocateInfo variableDescriptorCountInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO}; variableDescriptorCountInfo.descriptorSetCount = 1; variableDescriptorCountInfo.pDescriptorCounts = &variableDescriptorNum; - const VkDescriptorSetAllocateInfo info = { - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, hasVariableDescriptorNum ? &variableDescriptorCountInfo : nullptr, m_Handle, 1, &setLayout}; + VkDescriptorSetAllocateInfo info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO}; + info.pNext = hasVariableDescriptorNum ? &variableDescriptorCountInfo : nullptr; + info.descriptorPool = m_Handle; + info.descriptorSetCount = 1; + info.pSetLayouts = &setLayout; const auto& vk = m_Device.GetDispatchTable(); - - VkResult result = VK_SUCCESS; - for (uint32_t i = 0; i < numberOfCopies && result == VK_SUCCESS; i++) { + for (uint32_t i = 0; i < numberOfCopies; i++) { VkDescriptorSet handle = VK_NULL_HANDLE; - result = vk.AllocateDescriptorSets(m_Device, &info, &handle); + VkResult result = vk.AllocateDescriptorSets(m_Device, &info, &handle); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkAllocateDescriptorSets returned %d", (int32_t)result); + ((DescriptorSetVK*)descriptorSets[i])->Create(handle, setDesc); } - RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkAllocateDescriptorSets returned %d", (int32_t)result); - return Result::SUCCESS; } diff --git a/Source/VK/DescriptorPoolVK.h b/Source/VK/DescriptorPoolVK.h index 59e29ff2..24c89e2a 100644 --- a/Source/VK/DescriptorPoolVK.h +++ b/Source/VK/DescriptorPoolVK.h @@ -35,7 +35,7 @@ struct DescriptorPoolVK { Result AllocateDescriptorSets( const PipelineLayout& pipelineLayout, uint32_t setIndexInPipelineLayout, DescriptorSet** descriptorSets, uint32_t numberOfCopies, uint32_t variableDescriptorNum); - private: +private: DeviceVK& m_Device; Vector m_AllocatedSets; VkDescriptorPool m_Handle = VK_NULL_HANDLE; diff --git a/Source/VK/DescriptorPoolVK.hpp b/Source/VK/DescriptorPoolVK.hpp index 5991952e..96b62f66 100644 --- a/Source/VK/DescriptorPoolVK.hpp +++ b/Source/VK/DescriptorPoolVK.hpp @@ -17,4 +17,4 @@ static void NRI_CALL ResetDescriptorPool(DescriptorPool& descriptorPool) { #pragma endregion -Define_Core_DescriptorPool_PartiallyFillFunctionTable(VK) +Define_Core_DescriptorPool_PartiallyFillFunctionTable(VK); diff --git a/Source/VK/DescriptorSetVK.cpp b/Source/VK/DescriptorSetVK.cpp index 4be5424b..68d2bf41 100644 --- a/Source/VK/DescriptorSetVK.cpp +++ b/Source/VK/DescriptorSetVK.cpp @@ -24,7 +24,7 @@ struct SlabAllocator { m_CurrentOffset = m_Memory; } - private: +private: uint8_t* m_CurrentOffset; size_t m_End; uint8_t* m_Memory; diff --git a/Source/VK/DescriptorSetVK.h b/Source/VK/DescriptorSetVK.h index afd647d0..5ecba1a5 100644 --- a/Source/VK/DescriptorSetVK.h +++ b/Source/VK/DescriptorSetVK.h @@ -30,7 +30,7 @@ struct DescriptorSetVK { void UpdateDynamicConstantBuffers(uint32_t bufferOffset, uint32_t descriptorNum, const Descriptor* const* descriptors); void Copy(const DescriptorSetCopyDesc& descriptorSetCopyDesc); - private: +private: DeviceVK& m_Device; VkDescriptorSet m_Handle = VK_NULL_HANDLE; const DescriptorSetDesc* m_Desc = nullptr; diff --git a/Source/VK/DescriptorSetVK.hpp b/Source/VK/DescriptorSetVK.hpp index 44040548..c364c386 100644 --- a/Source/VK/DescriptorSetVK.hpp +++ b/Source/VK/DescriptorSetVK.hpp @@ -20,4 +20,4 @@ static void NRI_CALL CopyDescriptorSet(DescriptorSet& descriptorSet, const Descr #pragma endregion -Define_Core_DescriptorSet_PartiallyFillFunctionTable(VK) +Define_Core_DescriptorSet_PartiallyFillFunctionTable(VK); diff --git a/Source/VK/DescriptorVK.cpp b/Source/VK/DescriptorVK.cpp index 88035938..5dabaf75 100644 --- a/Source/VK/DescriptorVK.cpp +++ b/Source/VK/DescriptorVK.cpp @@ -113,7 +113,7 @@ Result DescriptorVK::Create(const BufferViewDesc& bufferViewDesc) { const BufferVK& buffer = *(const BufferVK*)bufferViewDesc.buffer; m_Type = DescriptorTypeVK::BUFFER_VIEW; - m_Format = GetVkFormat((nri::Format)bufferViewDesc.format); + m_Format = GetVkFormat((Format)bufferViewDesc.format); m_BufferDesc.offset = bufferViewDesc.offset; m_BufferDesc.size = (bufferViewDesc.size == WHOLE_SIZE) ? VK_WHOLE_SIZE : bufferViewDesc.size; m_BufferDesc.handle = buffer.GetHandle(); diff --git a/Source/VK/DescriptorVK.h b/Source/VK/DescriptorVK.h index 979981c4..daef8fe8 100644 --- a/Source/VK/DescriptorVK.h +++ b/Source/VK/DescriptorVK.h @@ -117,11 +117,11 @@ struct DescriptorVK { void SetDebugName(const char* name); - private: +private: template Result CreateTextureView(const T& textureViewDesc); - private: +private: DeviceVK& m_Device; union { VkBufferView m_BufferView = VK_NULL_HANDLE; diff --git a/Source/VK/DescriptorVK.hpp b/Source/VK/DescriptorVK.hpp index d5e3c500..9f47cfce 100644 --- a/Source/VK/DescriptorVK.hpp +++ b/Source/VK/DescriptorVK.hpp @@ -27,4 +27,4 @@ static uint64_t NRI_CALL GetDescriptorNativeObject(const Descriptor& descriptor) #pragma endregion -Define_Core_Descriptor_PartiallyFillFunctionTable(VK) +Define_Core_Descriptor_PartiallyFillFunctionTable(VK); diff --git a/Source/VK/DeviceVK.cpp b/Source/VK/DeviceVK.cpp index aa944647..7fff0a7b 100644 --- a/Source/VK/DeviceVK.cpp +++ b/Source/VK/DeviceVK.cpp @@ -11,10 +11,12 @@ #include "DescriptorSetVK.h" #include "DescriptorVK.h" #include "FenceVK.h" +#include "HelperDeviceMemoryAllocator.h" #include "MemoryVK.h" #include "PipelineLayoutVK.h" #include "PipelineVK.h" #include "QueryPoolVK.h" +#include "Streamer.h" #include "SwapChainVK.h" #include "TextureVK.h" @@ -141,26 +143,33 @@ void DeviceVK::ProcessInstanceExtensions(Vector& desiredInstanceExt for (const VkExtensionProperties& props : supportedExts) REPORT_INFO(this, " %s (v%u)", props.extensionName, props.specVersion); - // Mandatory // TODO: review - desiredInstanceExts.push_back(VK_KHR_SURFACE_EXTENSION_NAME); + // Mandatory + if (IsExtensionSupported(VK_KHR_GET_SURFACE_CAPABILITIES_2_EXTENSION_NAME, supportedExts)) + desiredInstanceExts.push_back(VK_KHR_GET_SURFACE_CAPABILITIES_2_EXTENSION_NAME); + +#ifdef __APPLE__ + desiredInstanceExts.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); + desiredInstanceExts.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); +#endif + + // Optional + if (IsExtensionSupported(VK_KHR_SURFACE_EXTENSION_NAME, supportedExts)) { + desiredInstanceExts.push_back(VK_KHR_SURFACE_EXTENSION_NAME); + #ifdef VK_USE_PLATFORM_WIN32_KHR - desiredInstanceExts.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); + desiredInstanceExts.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); #endif #ifdef VK_USE_PLATFORM_METAL_EXT - desiredInstanceExts.push_back(VK_EXT_METAL_SURFACE_EXTENSION_NAME); + desiredInstanceExts.push_back(VK_EXT_METAL_SURFACE_EXTENSION_NAME); #endif #ifdef VK_USE_PLATFORM_XLIB_KHR - desiredInstanceExts.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); + desiredInstanceExts.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); #endif #ifdef VK_USE_PLATFORM_WAYLAND_KHR - desiredInstanceExts.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); -#endif -#ifdef __APPLE__ - desiredInstanceExts.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); - desiredInstanceExts.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + desiredInstanceExts.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME); #endif + } - // Optional if (IsExtensionSupported(VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME, supportedExts)) desiredInstanceExts.push_back(VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME); @@ -195,6 +204,12 @@ void DeviceVK::ProcessDeviceExtensions(Vector& desiredDeviceExts, b if (IsExtensionSupported(VK_KHR_SWAPCHAIN_EXTENSION_NAME, supportedExts)) desiredDeviceExts.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + if (IsExtensionSupported(VK_KHR_PRESENT_ID_EXTENSION_NAME, supportedExts)) + desiredDeviceExts.push_back(VK_KHR_PRESENT_ID_EXTENSION_NAME); + + if (IsExtensionSupported(VK_KHR_PRESENT_WAIT_EXTENSION_NAME, supportedExts)) + desiredDeviceExts.push_back(VK_KHR_PRESENT_WAIT_EXTENSION_NAME); + if (IsExtensionSupported(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, supportedExts)) desiredDeviceExts.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME); @@ -210,7 +225,22 @@ void DeviceVK::ProcessDeviceExtensions(Vector& desiredDeviceExts, b if (IsExtensionSupported(VK_KHR_PIPELINE_LIBRARY_EXTENSION_NAME, supportedExts)) desiredDeviceExts.push_back(VK_KHR_PIPELINE_LIBRARY_EXTENSION_NAME); + if (IsExtensionSupported(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME, supportedExts) && !disableRayTracing) + desiredDeviceExts.push_back(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME); + + if (IsExtensionSupported(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, supportedExts) && !disableRayTracing) + desiredDeviceExts.push_back(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); + + if (IsExtensionSupported(VK_KHR_RAY_QUERY_EXTENSION_NAME, supportedExts) && !disableRayTracing) + desiredDeviceExts.push_back(VK_KHR_RAY_QUERY_EXTENSION_NAME); + + if (IsExtensionSupported(VK_KHR_RAY_TRACING_MAINTENANCE_1_EXTENSION_NAME, supportedExts) && !disableRayTracing) + desiredDeviceExts.push_back(VK_KHR_RAY_TRACING_MAINTENANCE_1_EXTENSION_NAME); + // Optional (EXT) + if (IsExtensionSupported(VK_EXT_OPACITY_MICROMAP_EXTENSION_NAME, supportedExts) && !disableRayTracing) + desiredDeviceExts.push_back(VK_EXT_OPACITY_MICROMAP_EXTENSION_NAME); + if (IsExtensionSupported(VK_EXT_SAMPLE_LOCATIONS_EXTENSION_NAME, supportedExts)) desiredDeviceExts.push_back(VK_EXT_SAMPLE_LOCATIONS_EXTENSION_NAME); @@ -223,26 +253,9 @@ void DeviceVK::ProcessDeviceExtensions(Vector& desiredDeviceExts, b if (IsExtensionSupported(VK_EXT_MESH_SHADER_EXTENSION_NAME, supportedExts)) desiredDeviceExts.push_back(VK_EXT_MESH_SHADER_EXTENSION_NAME); - // Ray tracing - // It consumes CPU memory: enable if supported and not disabled - if (!disableRayTracing) { - // Mandatory - if (IsExtensionSupported(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME, supportedExts)) - desiredDeviceExts.push_back(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME); - - if (IsExtensionSupported(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, supportedExts)) - desiredDeviceExts.push_back(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME); - - // Optional - if (IsExtensionSupported(VK_KHR_RAY_QUERY_EXTENSION_NAME, supportedExts)) - desiredDeviceExts.push_back(VK_KHR_RAY_QUERY_EXTENSION_NAME); - - if (IsExtensionSupported(VK_KHR_RAY_TRACING_MAINTENANCE_1_EXTENSION_NAME, supportedExts)) - desiredDeviceExts.push_back(VK_KHR_RAY_TRACING_MAINTENANCE_1_EXTENSION_NAME); - - if (IsExtensionSupported(VK_EXT_OPACITY_MICROMAP_EXTENSION_NAME, supportedExts)) - desiredDeviceExts.push_back(VK_EXT_OPACITY_MICROMAP_EXTENSION_NAME); - } + // Optional + if (IsExtensionSupported(VK_NV_LOW_LATENCY_2_EXTENSION_NAME, supportedExts)) + desiredDeviceExts.push_back(VK_NV_LOW_LATENCY_2_EXTENSION_NAME); } template @@ -259,8 +272,8 @@ Result DeviceVK::CreateImplementation(Interface*& entity, const Args&... args) { return result; } -DeviceVK::DeviceVK(const CallbackInterface& callbacks, const StdAllocator& stdAllocator) - : DeviceBase(callbacks, stdAllocator), m_ConcurrentSharingModeQueueIndices(GetStdAllocator()) { +DeviceVK::DeviceVK(const CallbackInterface& callbacks, const StdAllocator& stdAllocator) : + DeviceBase(callbacks, stdAllocator), m_ConcurrentSharingModeQueueIndices(GetStdAllocator()) { m_Desc.graphicsAPI = GraphicsAPI::VULKAN; m_Desc.nriVersionMajor = NRI_VERSION_MAJOR; m_Desc.nriVersionMinor = NRI_VERSION_MINOR; @@ -386,7 +399,7 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi return res; } - res = ResolveInstanceDispatchTable(); + res = ResolveInstanceDispatchTable(desiredInstanceExts); if (res != Result::SUCCESS) return res; } @@ -451,6 +464,16 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi APPEND_EXT(shadingRateFeatures); } + VkPhysicalDevicePresentIdFeaturesKHR presentIdFeatures = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_ID_FEATURES_KHR}; + if (IsExtensionSupported(VK_KHR_PRESENT_ID_EXTENSION_NAME, desiredDeviceExts)) { + APPEND_EXT(presentIdFeatures); + } + + VkPhysicalDevicePresentWaitFeaturesKHR presentWaitFeatures = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_WAIT_FEATURES_KHR}; + if (IsExtensionSupported(VK_KHR_PRESENT_WAIT_EXTENSION_NAME, desiredDeviceExts)) { + APPEND_EXT(presentWaitFeatures); + } + VkPhysicalDeviceLineRasterizationFeaturesEXT lineRasterizationFeatures = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT}; if (IsExtensionSupported(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME, desiredDeviceExts)) { APPEND_EXT(lineRasterizationFeatures); @@ -520,7 +543,7 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi RETURN_ON_FAILURE(this, result == VK_SUCCESS, GetReturnCode(result), "vkCreateDevice returned %d", (int32_t)result); } - Result res = ResolveDispatchTable(desiredInstanceExts, desiredDeviceExts); + Result res = ResolveDispatchTable(desiredDeviceExts); if (res != Result::SUCCESS) return res; } @@ -567,7 +590,7 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi VkPhysicalDeviceAccelerationStructurePropertiesKHR accelerationStructureProps = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR}; if (IsExtensionSupported(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, desiredDeviceExts)) { APPEND_EXT(accelerationStructureProps); - m_Desc.isRaytracingSupported = true; + m_Desc.isRayTracingSupported = true; } VkPhysicalDeviceMeshShaderPropertiesEXT meshShaderProps = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_EXT}; @@ -579,11 +602,14 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi m_VK.GetPhysicalDeviceProperties2(m_PhysicalDevice, &props); // Internal features - m_IsDescriptorIndexingSupported = features12.descriptorIndexing ? true : false; - m_IsDeviceAddressSupported = features12.bufferDeviceAddress ? true : false; + m_IsDescriptorIndexingSupported = features12.descriptorIndexing; + m_IsDeviceAddressSupported = features12.bufferDeviceAddress; m_IsSwapChainMutableFormatSupported = IsExtensionSupported(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, desiredDeviceExts); + m_IsPresentIdSupported = presentIdFeatures.presentId; + m_IsPresentWaitSupported = m_IsPresentIdSupported && presentWaitFeatures.presentWait; + m_IsLowLatencySupported = m_IsPresentIdSupported && IsExtensionSupported(VK_NV_LOW_LATENCY_2_EXTENSION_NAME, desiredDeviceExts); - // Fill + // Fill desc const VkPhysicalDeviceLimits& limits = props.properties.limits; m_Desc.viewportMaxNum = limits.maxViewports; @@ -628,7 +654,7 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi m_Desc.storageBufferOffsetAlignment = (uint32_t)limits.minStorageBufferOffsetAlignment; m_Desc.storageBufferMaxRange = limits.maxStorageBufferRange; m_Desc.pushConstantsMaxSize = limits.maxPushConstantsSize; - m_Desc.bufferMaxSize = std::numeric_limits::max(); + m_Desc.bufferMaxSize = props13.maxBufferSize; m_Desc.bufferTextureGranularity = (uint32_t)limits.bufferImageGranularity; m_Desc.boundDescriptorSetMaxNum = limits.maxBoundDescriptorSets; @@ -706,19 +732,7 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi m_Desc.isFloat16Supported = features12.shaderFloat16; m_Desc.isIndependentFrontAndBackStencilReferenceAndMasksSupported = true; m_Desc.isLineSmoothingSupported = lineRasterizationFeatures.smoothLines; - - // Copy queue timestamp - uint32_t familyNum = 0; - m_VK.GetPhysicalDeviceQueueFamilyProperties(m_PhysicalDevice, &familyNum, nullptr); - - Vector familyProperties(familyNum, m_StdAllocator); - m_VK.GetPhysicalDeviceQueueFamilyProperties(m_PhysicalDevice, &familyNum, familyProperties.data()); - - uint32_t copyQueueTimestampValidBits = 0; - const uint32_t copyQueueFamilyIndex = m_FamilyIndices[(uint32_t)CommandQueueType::COPY]; - if (copyQueueFamilyIndex != INVALID_FAMILY_INDEX) - copyQueueTimestampValidBits = familyProperties[copyQueueFamilyIndex].timestampValidBits; - m_Desc.isCopyQueueTimestampSupported = copyQueueTimestampValidBits == 64; + m_Desc.isCopyQueueTimestampSupported = limits.timestampComputeAndGraphics; // Conservative raster if (conservativeRasterProps.fullyCoveredFragmentShaderInputVariable && conservativeRasterProps.primitiveOverestimationSize <= (1.0 / 256.0f)) @@ -747,6 +761,9 @@ Result DeviceVK::Create(const DeviceCreationDesc& deviceCreationDesc, const Devi m_Desc.meshEvaluationWorkGroupInvocationMaxNum = meshShaderProps.maxMeshWorkGroupInvocations; m_Desc.isMeshShaderPipelineStatsSupported = meshShaderFeatures.meshShaderQueries == VK_TRUE; m_Desc.isDrawMeshTasksIndirectSupported = true; + + m_Desc.isSwapChainSupported = IsExtensionSupported(VK_KHR_SWAPCHAIN_EXTENSION_NAME, desiredDeviceExts); + m_Desc.isLowLatencySupported = IsExtensionSupported(VK_NV_LOW_LATENCY_2_EXTENSION_NAME, desiredDeviceExts); } return FillFunctionTable(m_CoreInterface); @@ -1062,13 +1079,9 @@ void DeviceVK::FillFamilyIndices(bool useEnabledFamilyIndices, const uint32_t* e m_VK.GetPhysicalDeviceQueueFamilyProperties(m_PhysicalDevice, &familyNum, familyProps.data()); memset(m_FamilyIndices.data(), INVALID_FAMILY_INDEX, m_FamilyIndices.size() * sizeof(uint32_t)); + std::array scores = {}; for (uint32_t i = 0; i < familyProps.size(); i++) { - const VkQueueFlags mask = familyProps[i].queueFlags; - const bool graphics = mask & VK_QUEUE_GRAPHICS_BIT; - const bool compute = mask & VK_QUEUE_COMPUTE_BIT; - const bool copy = mask & VK_QUEUE_TRANSFER_BIT; - if (useEnabledFamilyIndices) { bool isFamilyEnabled = false; for (uint32_t j = 0; j < familyIndexNum && !isFamilyEnabled; j++) @@ -1078,12 +1091,34 @@ void DeviceVK::FillFamilyIndices(bool useEnabledFamilyIndices, const uint32_t* e continue; } - if (graphics) + VkQueueFlags flags = familyProps[i].queueFlags; + uint8_t score; + + bool graphics = flags & VK_QUEUE_GRAPHICS_BIT; + bool compute = flags & VK_QUEUE_COMPUTE_BIT; + bool copy = flags & VK_QUEUE_TRANSFER_BIT; + bool sparse = flags & VK_QUEUE_SPARSE_BINDING_BIT; + bool protect = flags & VK_QUEUE_PROTECTED_BIT; + bool video = flags & (VK_QUEUE_VIDEO_DECODE_BIT_KHR | VK_QUEUE_VIDEO_ENCODE_BIT_KHR); + bool opticalFlow = flags & VK_QUEUE_OPTICAL_FLOW_BIT_NV; + + score = (graphics ? 100 : 0) + (compute ? 10 : 0) + (copy ? 10 : 0) + (sparse ? 5 : 0) + (opticalFlow ? 2 : 0) + (video ? 1 : 0) + (protect ? 1 : 0); + if (score > scores[(uint32_t)CommandQueueType::GRAPHICS]) { m_FamilyIndices[(uint32_t)CommandQueueType::GRAPHICS] = i; - else if (compute) + scores[(uint32_t)CommandQueueType::GRAPHICS] = score; + } + + score = (!graphics ? 10 : 0) + (compute ? 100 : 0) + (!copy ? 10 : 0) + (sparse ? 5 : 0) + (opticalFlow ? 2 : 0) + (video ? 1 : 0) + (protect ? 1 : 0); + if (score > scores[(uint32_t)CommandQueueType::COMPUTE]) { m_FamilyIndices[(uint32_t)CommandQueueType::COMPUTE] = i; - else if (copy) + scores[(uint32_t)CommandQueueType::COMPUTE] = score; + } + + score = (!graphics ? 10 : 0) + (!compute ? 10 : 0) + (copy ? 100 : 0) + (sparse ? 5 : 0) + (opticalFlow ? 2 : 0) + (video ? 1 : 0) + (protect ? 1 : 0); + if (score > scores[(uint32_t)CommandQueueType::COPY]) { m_FamilyIndices[(uint32_t)CommandQueueType::COPY] = i; + scores[(uint32_t)CommandQueueType::COPY] = score; + } } } @@ -1160,33 +1195,33 @@ void DeviceVK::ReportDeviceGroupInfo() { } } -#define RESOLVE_OPTIONAL_DEVICE_FUNCTION(name) m_VK.name = (PFN_vk##name)m_VK.GetDeviceProcAddr(m_Device, "vk" #name) +#define MERGE_TOKENS2(a, b) a##b +#define MERGE_TOKENS3(a, b, c) a##b##c -#define RESOLVE_DEVICE_FUNCTION(name) \ - RESOLVE_OPTIONAL_DEVICE_FUNCTION(name); \ +#define GET_DEVICE_OPTIONAL_CORE_OR_KHR_PROC(name) \ + m_VK.name = (PFN_vk##name)m_VK.GetDeviceProcAddr(m_Device, NRI_STRINGIFY(MERGE_TOKENS2(vk, name))); \ if (!m_VK.name) { \ - REPORT_ERROR(this, "Failed to get device function: '%s'.", #name); \ - return Result::UNSUPPORTED; \ + m_VK.name = (PFN_vk##name)m_VK.GetDeviceProcAddr(m_Device, NRI_STRINGIFY(MERGE_TOKENS3(vk, name, KHR))); \ } -#define RESOLVE_DEVICE_FUNCTION_WITH_OTHER_NAME(functionName, otherName) \ - m_VK.functionName = (PFN_vk##functionName)m_VK.GetDeviceProcAddr(m_Device, otherName); \ - if (!m_VK.functionName) { \ - REPORT_ERROR(this, "Failed to get device function: '" otherName "'."); \ +#define GET_DEVICE_CORE_OR_KHR_PROC(name) \ + GET_DEVICE_OPTIONAL_CORE_OR_KHR_PROC(name) \ + if (!m_VK.name) { \ + REPORT_ERROR(this, "Failed to get device function: '%s'", NRI_STRINGIFY(MERGE_TOKENS2(vk, name))); \ return Result::UNSUPPORTED; \ } -#define RESOLVE_INSTANCE_FUNCTION(name) \ - m_VK.name = (PFN_vk##name)m_VK.GetInstanceProcAddr(m_Instance, "vk" #name); \ +#define GET_DEVICE_PROC(name) \ + m_VK.name = (PFN_vk##name)m_VK.GetDeviceProcAddr(m_Device, NRI_STRINGIFY(MERGE_TOKENS2(vk, name))); \ if (!m_VK.name) { \ - REPORT_ERROR(this, "Failed to get instance function: '%s'.", #name); \ + REPORT_ERROR(this, "Failed to get device function: '%s'", NRI_STRINGIFY(MERGE_TOKENS2(vk, name))); \ return Result::UNSUPPORTED; \ } -#define RESOLVE_PRE_INSTANCE_FUNCTION(name) \ - m_VK.name = (PFN_vk##name)m_VK.GetInstanceProcAddr(VK_NULL_HANDLE, "vk" #name); \ +#define GET_INSTANCE_PROC(name) \ + m_VK.name = (PFN_vk##name)m_VK.GetInstanceProcAddr(m_Instance, NRI_STRINGIFY(MERGE_TOKENS2(vk, name))); \ if (!m_VK.name) { \ - REPORT_ERROR(this, "Failed to get instance function: '%s'.", #name); \ + REPORT_ERROR(this, "Failed to get instance function: '%s'", NRI_STRINGIFY(MERGE_TOKENS2(vk, name))); \ return Result::UNSUPPORTED; \ } @@ -1195,199 +1230,195 @@ Result DeviceVK::ResolvePreInstanceDispatchTable() { m_VK.GetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)GetSharedLibraryFunction(*m_Loader, "vkGetInstanceProcAddr"); if (!m_VK.GetInstanceProcAddr) { - REPORT_ERROR(this, "Failed to get vkGetInstanceProcAddr."); + REPORT_ERROR(this, "Failed to get 'vkGetInstanceProcAddr'"); return Result::UNSUPPORTED; } - RESOLVE_PRE_INSTANCE_FUNCTION(CreateInstance); - RESOLVE_PRE_INSTANCE_FUNCTION(EnumerateInstanceExtensionProperties); - RESOLVE_PRE_INSTANCE_FUNCTION(EnumerateInstanceLayerProperties); + GET_INSTANCE_PROC(CreateInstance); + GET_INSTANCE_PROC(EnumerateInstanceExtensionProperties); + GET_INSTANCE_PROC(EnumerateInstanceLayerProperties); return Result::SUCCESS; } -Result DeviceVK::ResolveInstanceDispatchTable() { - RESOLVE_INSTANCE_FUNCTION(GetDeviceProcAddr); - RESOLVE_INSTANCE_FUNCTION(DestroyInstance); - RESOLVE_INSTANCE_FUNCTION(DestroyDevice); - RESOLVE_INSTANCE_FUNCTION(GetPhysicalDeviceMemoryProperties); - RESOLVE_INSTANCE_FUNCTION(GetDeviceGroupPeerMemoryFeatures); - RESOLVE_INSTANCE_FUNCTION(GetPhysicalDeviceFormatProperties); - RESOLVE_INSTANCE_FUNCTION(CreateDevice); - RESOLVE_INSTANCE_FUNCTION(GetDeviceQueue); - RESOLVE_INSTANCE_FUNCTION(EnumeratePhysicalDeviceGroups); - RESOLVE_INSTANCE_FUNCTION(GetPhysicalDeviceProperties2); - RESOLVE_INSTANCE_FUNCTION(GetPhysicalDeviceFeatures2); - RESOLVE_INSTANCE_FUNCTION(GetPhysicalDeviceQueueFamilyProperties); - RESOLVE_INSTANCE_FUNCTION(EnumerateDeviceExtensionProperties); - RESOLVE_INSTANCE_FUNCTION(GetPhysicalDeviceSurfaceFormatsKHR); - RESOLVE_INSTANCE_FUNCTION(GetPhysicalDeviceSurfaceSupportKHR); - RESOLVE_INSTANCE_FUNCTION(GetPhysicalDeviceSurfaceCapabilitiesKHR); - RESOLVE_INSTANCE_FUNCTION(GetPhysicalDeviceSurfacePresentModesKHR); - RESOLVE_INSTANCE_FUNCTION(DestroySurfaceKHR); +Result DeviceVK::ResolveInstanceDispatchTable(const Vector& desiredInstanceExts) { + GET_INSTANCE_PROC(GetDeviceProcAddr); + GET_INSTANCE_PROC(DestroyInstance); + GET_INSTANCE_PROC(DestroyDevice); + GET_INSTANCE_PROC(GetPhysicalDeviceMemoryProperties); + GET_INSTANCE_PROC(GetDeviceGroupPeerMemoryFeatures); + GET_INSTANCE_PROC(GetPhysicalDeviceFormatProperties); + GET_INSTANCE_PROC(CreateDevice); + GET_INSTANCE_PROC(GetDeviceQueue); + GET_INSTANCE_PROC(EnumeratePhysicalDeviceGroups); + GET_INSTANCE_PROC(GetPhysicalDeviceProperties2); + GET_INSTANCE_PROC(GetPhysicalDeviceFeatures2); + GET_INSTANCE_PROC(GetPhysicalDeviceQueueFamilyProperties); + GET_INSTANCE_PROC(EnumerateDeviceExtensionProperties); + + if (IsExtensionSupported(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, desiredInstanceExts)) { + GET_INSTANCE_PROC(SetDebugUtilsObjectNameEXT); + GET_INSTANCE_PROC(CmdBeginDebugUtilsLabelEXT); + GET_INSTANCE_PROC(CmdEndDebugUtilsLabelEXT); + } + + if (IsExtensionSupported(VK_KHR_SURFACE_EXTENSION_NAME, desiredInstanceExts)) { + GET_INSTANCE_PROC(GetPhysicalDeviceSurfaceFormatsKHR); + GET_INSTANCE_PROC(GetPhysicalDeviceSurfaceSupportKHR); + GET_INSTANCE_PROC(GetPhysicalDeviceSurfaceCapabilities2KHR); + GET_INSTANCE_PROC(GetPhysicalDeviceSurfacePresentModesKHR); + GET_INSTANCE_PROC(DestroySurfaceKHR); #if VK_USE_PLATFORM_WIN32_KHR - RESOLVE_INSTANCE_FUNCTION(CreateWin32SurfaceKHR); - RESOLVE_INSTANCE_FUNCTION(GetMemoryWin32HandlePropertiesKHR); + GET_INSTANCE_PROC(CreateWin32SurfaceKHR); + GET_INSTANCE_PROC(GetMemoryWin32HandlePropertiesKHR); #endif #if VK_USE_PLATFORM_METAL_EXT - RESOLVE_INSTANCE_FUNCTION(CreateMetalSurfaceEXT); + GET_INSTANCE_PROC(CreateMetalSurfaceEXT); #endif #if VK_USE_PLATFORM_XLIB_KHR - RESOLVE_INSTANCE_FUNCTION(CreateXlibSurfaceKHR); + GET_INSTANCE_PROC(CreateXlibSurfaceKHR); #endif #if VK_USE_PLATFORM_WAYLAND_KHR - RESOLVE_INSTANCE_FUNCTION(CreateWaylandSurfaceKHR); + GET_INSTANCE_PROC(CreateWaylandSurfaceKHR); #endif + } return Result::SUCCESS; } -Result DeviceVK::ResolveDispatchTable(const Vector& desiredInstanceExts, const Vector& desiredDeviceExts) { - RESOLVE_DEVICE_FUNCTION(CreateBuffer); - RESOLVE_DEVICE_FUNCTION(CreateImage); - RESOLVE_DEVICE_FUNCTION(CreateBufferView); - RESOLVE_DEVICE_FUNCTION(CreateImageView); - RESOLVE_DEVICE_FUNCTION(CreateSampler); - RESOLVE_DEVICE_FUNCTION(CreateFramebuffer); - RESOLVE_DEVICE_FUNCTION(CreateQueryPool); - RESOLVE_DEVICE_FUNCTION(CreateCommandPool); - RESOLVE_DEVICE_FUNCTION(CreateSemaphore); - RESOLVE_DEVICE_FUNCTION(CreateDescriptorPool); - RESOLVE_DEVICE_FUNCTION(CreatePipelineLayout); - RESOLVE_DEVICE_FUNCTION(CreateDescriptorSetLayout); - RESOLVE_DEVICE_FUNCTION(CreateShaderModule); - RESOLVE_DEVICE_FUNCTION(CreateGraphicsPipelines); - RESOLVE_DEVICE_FUNCTION(CreateComputePipelines); - RESOLVE_DEVICE_FUNCTION(DestroyBuffer); - RESOLVE_DEVICE_FUNCTION(DestroyImage); - RESOLVE_DEVICE_FUNCTION(DestroyBufferView); - RESOLVE_DEVICE_FUNCTION(DestroyImageView); - RESOLVE_DEVICE_FUNCTION(DestroySampler); - RESOLVE_DEVICE_FUNCTION(DestroyFramebuffer); - RESOLVE_DEVICE_FUNCTION(DestroyQueryPool); - RESOLVE_DEVICE_FUNCTION(DestroyCommandPool); - RESOLVE_DEVICE_FUNCTION(DestroySemaphore); - RESOLVE_DEVICE_FUNCTION(DestroyDescriptorPool); - RESOLVE_DEVICE_FUNCTION(DestroyPipelineLayout); - RESOLVE_DEVICE_FUNCTION(DestroyDescriptorSetLayout); - RESOLVE_DEVICE_FUNCTION(DestroyShaderModule); - RESOLVE_DEVICE_FUNCTION(DestroyPipeline); - RESOLVE_DEVICE_FUNCTION(AllocateMemory); - RESOLVE_DEVICE_FUNCTION(MapMemory); - RESOLVE_DEVICE_FUNCTION(UnmapMemory); - RESOLVE_DEVICE_FUNCTION(FreeMemory); - RESOLVE_DEVICE_FUNCTION(QueueWaitIdle); - RESOLVE_DEVICE_FUNCTION(AcquireNextImageKHR); - RESOLVE_DEVICE_FUNCTION(QueueSubmit); - RESOLVE_DEVICE_FUNCTION(QueuePresentKHR); - RESOLVE_DEVICE_FUNCTION(GetSemaphoreCounterValue); - RESOLVE_DEVICE_FUNCTION(WaitSemaphores); - RESOLVE_DEVICE_FUNCTION(ResetCommandPool); - RESOLVE_DEVICE_FUNCTION(ResetDescriptorPool); - RESOLVE_DEVICE_FUNCTION(AllocateCommandBuffers); - RESOLVE_DEVICE_FUNCTION(AllocateDescriptorSets); - RESOLVE_DEVICE_FUNCTION(FreeCommandBuffers); - RESOLVE_DEVICE_FUNCTION(FreeDescriptorSets); - RESOLVE_DEVICE_FUNCTION(UpdateDescriptorSets); - RESOLVE_DEVICE_FUNCTION(BeginCommandBuffer); - RESOLVE_DEVICE_FUNCTION(CmdSetViewport); - RESOLVE_DEVICE_FUNCTION(CmdSetScissor); - RESOLVE_DEVICE_FUNCTION(CmdSetDepthBounds); - RESOLVE_DEVICE_FUNCTION(CmdSetStencilReference); - RESOLVE_DEVICE_FUNCTION(CmdSetBlendConstants); - RESOLVE_DEVICE_FUNCTION(CmdClearAttachments); - RESOLVE_DEVICE_FUNCTION(CmdClearColorImage); - RESOLVE_DEVICE_FUNCTION(CmdBindVertexBuffers); - RESOLVE_DEVICE_FUNCTION(CmdBindIndexBuffer); - RESOLVE_DEVICE_FUNCTION(CmdBindPipeline); - RESOLVE_DEVICE_FUNCTION(CmdBindDescriptorSets); - RESOLVE_DEVICE_FUNCTION(CmdPushConstants); - RESOLVE_DEVICE_FUNCTION(CmdDispatch); - RESOLVE_DEVICE_FUNCTION(CmdDispatchIndirect); - RESOLVE_DEVICE_FUNCTION(CmdDraw); - RESOLVE_DEVICE_FUNCTION(CmdDrawIndexed); - RESOLVE_DEVICE_FUNCTION(CmdDrawIndirect); - RESOLVE_DEVICE_FUNCTION(CmdDrawIndexedIndirect); - RESOLVE_DEVICE_FUNCTION(CmdCopyBuffer); - RESOLVE_DEVICE_FUNCTION(CmdCopyImage); - RESOLVE_DEVICE_FUNCTION(CmdCopyBufferToImage); - RESOLVE_DEVICE_FUNCTION(CmdCopyImageToBuffer); - RESOLVE_DEVICE_FUNCTION(CmdPipelineBarrier2); - RESOLVE_DEVICE_FUNCTION(CmdBeginQuery); - RESOLVE_DEVICE_FUNCTION(CmdEndQuery); - RESOLVE_DEVICE_FUNCTION(CmdWriteTimestamp); - RESOLVE_DEVICE_FUNCTION(CmdCopyQueryPoolResults); - RESOLVE_DEVICE_FUNCTION(CmdResetQueryPool); - RESOLVE_DEVICE_FUNCTION(CmdFillBuffer); - RESOLVE_DEVICE_FUNCTION(EndCommandBuffer); - - RESOLVE_DEVICE_FUNCTION(CreateSwapchainKHR); - RESOLVE_DEVICE_FUNCTION(DestroySwapchainKHR); - RESOLVE_DEVICE_FUNCTION(GetSwapchainImagesKHR); - - RESOLVE_OPTIONAL_DEVICE_FUNCTION(CmdBeginRendering); - if (!m_VK.CmdBeginRendering) - RESOLVE_DEVICE_FUNCTION_WITH_OTHER_NAME(CmdBeginRendering, "vkCmdBeginRenderingKHR"); - - RESOLVE_OPTIONAL_DEVICE_FUNCTION(CmdEndRendering); - if (!m_VK.CmdEndRendering) - RESOLVE_DEVICE_FUNCTION_WITH_OTHER_NAME(CmdEndRendering, "vkCmdEndRenderingKHR"); - - RESOLVE_OPTIONAL_DEVICE_FUNCTION(BindBufferMemory2); - if (!m_VK.BindBufferMemory2) - RESOLVE_DEVICE_FUNCTION_WITH_OTHER_NAME(BindBufferMemory2, "vkBindBufferMemory2KHR"); - - RESOLVE_OPTIONAL_DEVICE_FUNCTION(BindImageMemory2); - if (!m_VK.BindImageMemory2) - RESOLVE_DEVICE_FUNCTION_WITH_OTHER_NAME(BindImageMemory2, "vkBindImageMemory2KHR"); - - RESOLVE_OPTIONAL_DEVICE_FUNCTION(GetBufferMemoryRequirements2); - if (!m_VK.GetBufferMemoryRequirements2) - RESOLVE_DEVICE_FUNCTION_WITH_OTHER_NAME(GetBufferMemoryRequirements2, "vkGetBufferMemoryRequirements2KHR"); - - RESOLVE_OPTIONAL_DEVICE_FUNCTION(GetImageMemoryRequirements2); - if (!m_VK.GetImageMemoryRequirements2) - RESOLVE_DEVICE_FUNCTION_WITH_OTHER_NAME(GetImageMemoryRequirements2, "vkGetImageMemoryRequirements2KHR"); - - RESOLVE_OPTIONAL_DEVICE_FUNCTION(GetBufferDeviceAddress); - if (!m_VK.GetBufferDeviceAddress) - RESOLVE_DEVICE_FUNCTION_WITH_OTHER_NAME(GetBufferDeviceAddress, "vkGetBufferDeviceAddressKHR"); +Result DeviceVK::ResolveDispatchTable(const Vector& desiredDeviceExts) { + GET_DEVICE_CORE_OR_KHR_PROC(CreateBuffer); + GET_DEVICE_CORE_OR_KHR_PROC(CreateImage); + GET_DEVICE_CORE_OR_KHR_PROC(CreateBufferView); + GET_DEVICE_CORE_OR_KHR_PROC(CreateImageView); + GET_DEVICE_CORE_OR_KHR_PROC(CreateSampler); + GET_DEVICE_CORE_OR_KHR_PROC(CreateFramebuffer); + GET_DEVICE_CORE_OR_KHR_PROC(CreateQueryPool); + GET_DEVICE_CORE_OR_KHR_PROC(CreateCommandPool); + GET_DEVICE_CORE_OR_KHR_PROC(CreateSemaphore); + GET_DEVICE_CORE_OR_KHR_PROC(CreateDescriptorPool); + GET_DEVICE_CORE_OR_KHR_PROC(CreatePipelineLayout); + GET_DEVICE_CORE_OR_KHR_PROC(CreateDescriptorSetLayout); + GET_DEVICE_CORE_OR_KHR_PROC(CreateShaderModule); + GET_DEVICE_CORE_OR_KHR_PROC(CreateGraphicsPipelines); + GET_DEVICE_CORE_OR_KHR_PROC(CreateComputePipelines); + GET_DEVICE_CORE_OR_KHR_PROC(DestroyBuffer); + GET_DEVICE_CORE_OR_KHR_PROC(DestroyImage); + GET_DEVICE_CORE_OR_KHR_PROC(DestroyBufferView); + GET_DEVICE_CORE_OR_KHR_PROC(DestroyImageView); + GET_DEVICE_CORE_OR_KHR_PROC(DestroySampler); + GET_DEVICE_CORE_OR_KHR_PROC(DestroyFramebuffer); + GET_DEVICE_CORE_OR_KHR_PROC(DestroyQueryPool); + GET_DEVICE_CORE_OR_KHR_PROC(DestroyCommandPool); + GET_DEVICE_CORE_OR_KHR_PROC(DestroySemaphore); + GET_DEVICE_CORE_OR_KHR_PROC(DestroyDescriptorPool); + GET_DEVICE_CORE_OR_KHR_PROC(DestroyPipelineLayout); + GET_DEVICE_CORE_OR_KHR_PROC(DestroyDescriptorSetLayout); + GET_DEVICE_CORE_OR_KHR_PROC(DestroyShaderModule); + GET_DEVICE_CORE_OR_KHR_PROC(DestroyPipeline); + GET_DEVICE_CORE_OR_KHR_PROC(AllocateMemory); + GET_DEVICE_CORE_OR_KHR_PROC(MapMemory); + GET_DEVICE_CORE_OR_KHR_PROC(UnmapMemory); + GET_DEVICE_CORE_OR_KHR_PROC(FreeMemory); + GET_DEVICE_CORE_OR_KHR_PROC(QueueWaitIdle); + GET_DEVICE_CORE_OR_KHR_PROC(QueueSubmit2); + GET_DEVICE_CORE_OR_KHR_PROC(GetSemaphoreCounterValue); + GET_DEVICE_CORE_OR_KHR_PROC(WaitSemaphores); + GET_DEVICE_CORE_OR_KHR_PROC(ResetCommandPool); + GET_DEVICE_CORE_OR_KHR_PROC(ResetDescriptorPool); + GET_DEVICE_CORE_OR_KHR_PROC(AllocateCommandBuffers); + GET_DEVICE_CORE_OR_KHR_PROC(AllocateDescriptorSets); + GET_DEVICE_CORE_OR_KHR_PROC(FreeCommandBuffers); + GET_DEVICE_CORE_OR_KHR_PROC(FreeDescriptorSets); + GET_DEVICE_CORE_OR_KHR_PROC(UpdateDescriptorSets); + GET_DEVICE_CORE_OR_KHR_PROC(BindBufferMemory2); + GET_DEVICE_CORE_OR_KHR_PROC(BindImageMemory2); + GET_DEVICE_CORE_OR_KHR_PROC(GetBufferMemoryRequirements2); + GET_DEVICE_CORE_OR_KHR_PROC(GetImageMemoryRequirements2); + GET_DEVICE_CORE_OR_KHR_PROC(BeginCommandBuffer); + GET_DEVICE_CORE_OR_KHR_PROC(CmdSetViewport); + GET_DEVICE_CORE_OR_KHR_PROC(CmdSetScissor); + GET_DEVICE_CORE_OR_KHR_PROC(CmdSetDepthBounds); + GET_DEVICE_CORE_OR_KHR_PROC(CmdSetStencilReference); + GET_DEVICE_CORE_OR_KHR_PROC(CmdSetBlendConstants); + GET_DEVICE_CORE_OR_KHR_PROC(CmdClearAttachments); + GET_DEVICE_CORE_OR_KHR_PROC(CmdClearColorImage); + GET_DEVICE_CORE_OR_KHR_PROC(CmdBindVertexBuffers); + GET_DEVICE_CORE_OR_KHR_PROC(CmdBindIndexBuffer); + GET_DEVICE_CORE_OR_KHR_PROC(CmdBindPipeline); + GET_DEVICE_CORE_OR_KHR_PROC(CmdBindDescriptorSets); + GET_DEVICE_CORE_OR_KHR_PROC(CmdPushConstants); + GET_DEVICE_CORE_OR_KHR_PROC(CmdDispatch); + GET_DEVICE_CORE_OR_KHR_PROC(CmdDispatchIndirect); + GET_DEVICE_CORE_OR_KHR_PROC(CmdDraw); + GET_DEVICE_CORE_OR_KHR_PROC(CmdDrawIndexed); + GET_DEVICE_CORE_OR_KHR_PROC(CmdDrawIndirect); + GET_DEVICE_CORE_OR_KHR_PROC(CmdDrawIndexedIndirect); + GET_DEVICE_CORE_OR_KHR_PROC(CmdCopyBuffer); + GET_DEVICE_CORE_OR_KHR_PROC(CmdCopyImage); + GET_DEVICE_CORE_OR_KHR_PROC(CmdCopyBufferToImage); + GET_DEVICE_CORE_OR_KHR_PROC(CmdCopyImageToBuffer); + GET_DEVICE_CORE_OR_KHR_PROC(CmdPipelineBarrier2); + GET_DEVICE_CORE_OR_KHR_PROC(CmdBeginQuery); + GET_DEVICE_CORE_OR_KHR_PROC(CmdEndQuery); + GET_DEVICE_CORE_OR_KHR_PROC(CmdWriteTimestamp); + GET_DEVICE_CORE_OR_KHR_PROC(CmdCopyQueryPoolResults); + GET_DEVICE_CORE_OR_KHR_PROC(CmdResetQueryPool); + GET_DEVICE_CORE_OR_KHR_PROC(CmdFillBuffer); + GET_DEVICE_CORE_OR_KHR_PROC(CmdBeginRendering); + GET_DEVICE_CORE_OR_KHR_PROC(CmdEndRendering); + GET_DEVICE_CORE_OR_KHR_PROC(EndCommandBuffer); + + GET_DEVICE_OPTIONAL_CORE_OR_KHR_PROC(GetBufferDeviceAddress); if (!m_VK.GetBufferDeviceAddress) m_IsDeviceAddressSupported = false; - // Instance specific - if (IsExtensionSupported(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, desiredInstanceExts)) { - RESOLVE_DEVICE_FUNCTION(SetDebugUtilsObjectNameEXT); - RESOLVE_DEVICE_FUNCTION(CmdBeginDebugUtilsLabelEXT); - RESOLVE_DEVICE_FUNCTION(CmdEndDebugUtilsLabelEXT); + // IMPORTANT: {} is mandatory here! + + if (IsExtensionSupported(VK_KHR_SWAPCHAIN_EXTENSION_NAME, desiredDeviceExts)) { + GET_DEVICE_PROC(AcquireNextImageKHR); + GET_DEVICE_PROC(QueuePresentKHR); + GET_DEVICE_PROC(CreateSwapchainKHR); + GET_DEVICE_PROC(DestroySwapchainKHR); + GET_DEVICE_PROC(GetSwapchainImagesKHR); } - // Device specific - if (IsExtensionSupported(VK_EXT_SAMPLE_LOCATIONS_EXTENSION_NAME, desiredDeviceExts)) { - RESOLVE_DEVICE_FUNCTION(CmdSetSampleLocationsEXT); + if (IsExtensionSupported(VK_KHR_PRESENT_WAIT_EXTENSION_NAME, desiredDeviceExts)) { + GET_DEVICE_PROC(WaitForPresentKHR); } if (IsExtensionSupported(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, desiredDeviceExts)) { - RESOLVE_DEVICE_FUNCTION(CreateAccelerationStructureKHR); - RESOLVE_DEVICE_FUNCTION(DestroyAccelerationStructureKHR); - RESOLVE_DEVICE_FUNCTION(GetAccelerationStructureDeviceAddressKHR); - RESOLVE_DEVICE_FUNCTION(GetAccelerationStructureBuildSizesKHR); - RESOLVE_DEVICE_FUNCTION(CmdBuildAccelerationStructuresKHR); - RESOLVE_DEVICE_FUNCTION(CmdCopyAccelerationStructureKHR); - RESOLVE_DEVICE_FUNCTION(CmdWriteAccelerationStructuresPropertiesKHR); + GET_DEVICE_PROC(CreateAccelerationStructureKHR); + GET_DEVICE_PROC(DestroyAccelerationStructureKHR); + GET_DEVICE_PROC(GetAccelerationStructureDeviceAddressKHR); + GET_DEVICE_PROC(GetAccelerationStructureBuildSizesKHR); + GET_DEVICE_PROC(CmdBuildAccelerationStructuresKHR); + GET_DEVICE_PROC(CmdCopyAccelerationStructureKHR); + GET_DEVICE_PROC(CmdWriteAccelerationStructuresPropertiesKHR); } if (IsExtensionSupported(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME, desiredDeviceExts)) { - RESOLVE_DEVICE_FUNCTION(CreateRayTracingPipelinesKHR); - RESOLVE_DEVICE_FUNCTION(GetRayTracingShaderGroupHandlesKHR); - RESOLVE_DEVICE_FUNCTION(CmdTraceRaysKHR); - RESOLVE_DEVICE_FUNCTION(CmdTraceRaysIndirect2KHR); + GET_DEVICE_PROC(CreateRayTracingPipelinesKHR); + GET_DEVICE_PROC(GetRayTracingShaderGroupHandlesKHR); + GET_DEVICE_PROC(CmdTraceRaysKHR); + GET_DEVICE_PROC(CmdTraceRaysIndirect2KHR); + } + + if (IsExtensionSupported(VK_EXT_SAMPLE_LOCATIONS_EXTENSION_NAME, desiredDeviceExts)) { + GET_DEVICE_PROC(CmdSetSampleLocationsEXT); } if (IsExtensionSupported(VK_EXT_MESH_SHADER_EXTENSION_NAME, desiredDeviceExts)) { - RESOLVE_DEVICE_FUNCTION(CmdDrawMeshTasksEXT); - RESOLVE_DEVICE_FUNCTION(CmdDrawMeshTasksIndirectEXT); + GET_DEVICE_PROC(CmdDrawMeshTasksEXT); + GET_DEVICE_PROC(CmdDrawMeshTasksIndirectEXT); + } + + if (IsExtensionSupported(VK_NV_LOW_LATENCY_2_EXTENSION_NAME, desiredDeviceExts)) { + GET_DEVICE_PROC(GetLatencyTimingsNV); + GET_DEVICE_PROC(LatencySleepNV); + GET_DEVICE_PROC(SetLatencyMarkerNV); + GET_DEVICE_PROC(SetLatencySleepModeNV); } return Result::SUCCESS; @@ -1428,8 +1459,7 @@ inline Result DeviceVK::CreateDescriptorPool(const DescriptorPoolDesc& descripto return CreateImplementation(descriptorPool, descriptorPoolDesc); } -Result DeviceVK::CreateBuffer(const BufferDesc& bufferDesc, Buffer*& buffer) // TODO: not inline -{ +Result DeviceVK::CreateBuffer(const BufferDesc& bufferDesc, Buffer*& buffer) { // TODO: not inline return CreateImplementation(buffer, bufferDesc); } @@ -1502,9 +1532,7 @@ inline Result DeviceVK::CreateCommandQueue(const CommandQueueVKDesc& commandQueu } CreateImplementation(commandQueue, commandQueueVKDesc); - - if (m_Queues[commandQueueTypeIndex] != nullptr) - Deallocate(GetStdAllocator(), m_Queues[commandQueueTypeIndex]); + Deallocate(GetStdAllocator(), m_Queues[commandQueueTypeIndex]); m_FamilyIndices[commandQueueTypeIndex] = commandQueueVKDesc.familyIndex; m_Queues[commandQueueTypeIndex] = (CommandQueueVK*)commandQueue; @@ -1580,8 +1608,7 @@ inline void DeviceVK::DestroyDescriptorPool(DescriptorPool& descriptorPool) { Deallocate(GetStdAllocator(), (DescriptorPoolVK*)&descriptorPool); } -void DeviceVK::DestroyBuffer(Buffer& buffer) // TODO: not inline -{ +void DeviceVK::DestroyBuffer(Buffer& buffer) { // TODO: not inline Deallocate(GetStdAllocator(), (BufferVK*)&buffer); } @@ -1766,13 +1793,13 @@ inline FormatSupportBits DeviceVK::GetFormatSupport(Format format) const { } inline uint32_t DeviceVK::CalculateAllocationNumber(const ResourceGroupDesc& resourceGroupDesc) const { - HelperDeviceMemoryAllocator allocator(m_CoreInterface, (Device&)*this, m_StdAllocator); + HelperDeviceMemoryAllocator allocator(m_CoreInterface, (Device&)*this); return allocator.CalculateAllocationNumber(resourceGroupDesc); } -inline Result DeviceVK::AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDesc, nri::Memory** allocations) { - HelperDeviceMemoryAllocator allocator(m_CoreInterface, (Device&)*this, m_StdAllocator); +inline Result DeviceVK::AllocateAndBindMemory(const ResourceGroupDesc& resourceGroupDesc, Memory** allocations) { + HelperDeviceMemoryAllocator allocator(m_CoreInterface, (Device&)*this); return allocator.AllocateAndBindMemory(resourceGroupDesc, allocations); } diff --git a/Source/VK/DeviceVK.h b/Source/VK/DeviceVK.h index 1e3a5e23..9330527f 100644 --- a/Source/VK/DeviceVK.h +++ b/Source/VK/DeviceVK.h @@ -29,7 +29,7 @@ struct DeviceVK final : public DeviceBase { return m_AllocationCallbackPtr; } - inline const std::array& GetQueueFamilyIndices() const { + inline const std::array& GetQueueFamilyIndices() const { return m_FamilyIndices; } @@ -130,8 +130,10 @@ struct DeviceVK final : public DeviceBase { Result FillFunctionTable(RayTracingInterface& rayTracingInterface) const; Result FillFunctionTable(MeshShaderInterface& meshShaderInterface) const; Result FillFunctionTable(HelperInterface& helperInterface) const; + Result FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const; + Result FillFunctionTable(StreamerInterface& streamerInterface) const; - private: +private: void FilterInstanceLayers(Vector& layers); void ProcessInstanceExtensions(Vector& desiredInstanceExts); void ProcessDeviceExtensions(Vector& desiredDeviceExts, bool disableRayTracing); @@ -142,22 +144,25 @@ struct DeviceVK final : public DeviceBase { Result CreateInstance(bool enableAPIValidation, const Vector& desiredInstanceExts); Result FindPhysicalDeviceGroup(const AdapterDesc* physicalDeviceGroup); Result ResolvePreInstanceDispatchTable(); - Result ResolveInstanceDispatchTable(); - Result ResolveDispatchTable(const Vector& desiredInstanceExts, const Vector& desiredDeviceExts); + Result ResolveInstanceDispatchTable(const Vector& desiredInstanceExts); + Result ResolveDispatchTable(const Vector& desiredDeviceExts); template Result CreateImplementation(Interface*& entity, const Args&... args); - public: +public: bool m_IsDescriptorIndexingSupported = false; bool m_IsDeviceAddressSupported = false; bool m_IsSwapChainMutableFormatSupported = false; + bool m_IsPresentIdSupported = false; + bool m_IsPresentWaitSupported = false; + bool m_IsLowLatencySupported = false; - private: +private: Vector m_ConcurrentSharingModeQueueIndices; VkPhysicalDevice m_PhysicalDevice = nullptr; - std::array m_FamilyIndices = {}; - std::array m_Queues = {}; + std::array m_FamilyIndices = {}; + std::array m_Queues = {}; DispatchTable m_VK = {}; DeviceDesc m_Desc = {}; VkPhysicalDeviceMemoryProperties m_MemoryProps = {}; diff --git a/Source/VK/DeviceVK.hpp b/Source/VK/DeviceVK.hpp index 52c559e6..ea9b5e74 100644 --- a/Source/VK/DeviceVK.hpp +++ b/Source/VK/DeviceVK.hpp @@ -1,9 +1,10 @@ // © 2021 NVIDIA Corporation -Declare_PartiallyFillFunctionTable_Functions(VK) +Declare_PartiallyFillFunctionTable_Functions(VK); + #pragma region[ Core ] - static const DeviceDesc& NRI_CALL GetDeviceDesc(const Device& device) { +static const DeviceDesc& NRI_CALL GetDeviceDesc(const Device& device) { return ((DeviceVK&)device).GetDesc(); } @@ -379,7 +380,7 @@ static void NRI_CALL DestroyAccelerationStructure(AccelerationStructure& acceler void FillFunctionTablePipelineVK(RayTracingInterface& rayTracingInterface); Result DeviceVK::FillFunctionTable(RayTracingInterface& rayTracingInterface) const { - if (!m_Desc.isRaytracingSupported) + if (!m_Desc.isRayTracingSupported) return Result::UNSUPPORTED; rayTracingInterface = {}; @@ -433,3 +434,83 @@ Result DeviceVK::FillFunctionTable(HelperInterface& helperInterface) const { } #pragma endregion + +#pragma region[ LowLatency ] + +Result DeviceVK::FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const { + lowLatencyInterface = {}; + + LowLatency_CommandQueue_PartiallyFillFunctionTableVK(lowLatencyInterface); + LowLatency_SwapChain_PartiallyFillFunctionTableVK(lowLatencyInterface); + + return ValidateFunctionTable(lowLatencyInterface); +} + +#pragma endregion + +#pragma region[ Streamer ] + +static Result CreateStreamer(Device& device, const StreamerDesc& streamerDesc, Streamer*& streamer) { + DeviceVK& deviceVK = (DeviceVK&)device; + + StreamerImpl* implementation = Allocate(deviceVK.GetStdAllocator(), device, deviceVK.GetCoreInterface()); + Result res = implementation->Create(streamerDesc); + + if (res == Result::SUCCESS) { + streamer = (Streamer*)implementation; + return Result::SUCCESS; + } + + Deallocate(deviceVK.GetStdAllocator(), implementation); + + return res; +} + +static void DestroyStreamer(Streamer& streamer) { + Deallocate(((DeviceBase&)((StreamerImpl&)streamer).GetDevice()).GetStdAllocator(), (StreamerImpl*)&streamer); +} + +static Buffer* GetStreamerConstantBuffer(Streamer& streamer) { + return ((StreamerImpl&)streamer).GetConstantBuffer(); +} + +static uint32_t UpdateStreamerConstantBuffer(Streamer& streamer, const void* data, uint32_t dataSize) { + return ((StreamerImpl&)streamer).UpdateStreamerConstantBuffer(data, dataSize); +} + +static uint64_t AddStreamerBufferUpdateRequest(Streamer& streamer, const BufferUpdateRequestDesc& bufferUpdateRequestDesc) { + return ((StreamerImpl&)streamer).AddStreamerBufferUpdateRequest(bufferUpdateRequestDesc); +} + +static uint64_t AddStreamerTextureUpdateRequest(Streamer& streamer, const TextureUpdateRequestDesc& textureUpdateRequestDesc) { + return ((StreamerImpl&)streamer).AddStreamerTextureUpdateRequest(textureUpdateRequestDesc); +} + +static Result CopyStreamerUpdateRequests(Streamer& streamer) { + return ((StreamerImpl&)streamer).CopyStreamerUpdateRequests(); +} + +static Buffer* GetStreamerDynamicBuffer(Streamer& streamer) { + return ((StreamerImpl&)streamer).GetDynamicBuffer(); +} + +static void CmdUploadStreamerUpdateRequests(CommandBuffer& commandBuffer, Streamer& streamer) { + ((StreamerImpl&)streamer).CmdUploadStreamerUpdateRequests(commandBuffer); +} + +Result DeviceVK::FillFunctionTable(StreamerInterface& streamerInterface) const { + streamerInterface = {}; + streamerInterface.CreateStreamer = ::CreateStreamer; + streamerInterface.DestroyStreamer = ::DestroyStreamer; + streamerInterface.GetStreamerConstantBuffer = ::GetStreamerConstantBuffer; + streamerInterface.UpdateStreamerConstantBuffer = ::UpdateStreamerConstantBuffer; + streamerInterface.AddStreamerBufferUpdateRequest = ::AddStreamerBufferUpdateRequest; + streamerInterface.AddStreamerTextureUpdateRequest = ::AddStreamerTextureUpdateRequest; + streamerInterface.CopyStreamerUpdateRequests = ::CopyStreamerUpdateRequests; + streamerInterface.GetStreamerDynamicBuffer = ::GetStreamerDynamicBuffer; + streamerInterface.CmdUploadStreamerUpdateRequests = ::CmdUploadStreamerUpdateRequests; + + return ValidateFunctionTable(streamerInterface); +} + +#pragma endregion diff --git a/Source/VK/DispatchTable.h b/Source/VK/DispatchTable.h index 27015e53..e3bea417 100644 --- a/Source/VK/DispatchTable.h +++ b/Source/VK/DispatchTable.h @@ -5,6 +5,9 @@ #define VULKAN_FUNCTION(name) PFN_vk##name name struct DispatchTable { + //==================================================================== + // Instance + //==================================================================== VULKAN_FUNCTION(GetInstanceProcAddr); VULKAN_FUNCTION(CreateInstance); VULKAN_FUNCTION(EnumerateInstanceExtensionProperties); @@ -23,9 +26,11 @@ struct DispatchTable { VULKAN_FUNCTION(GetPhysicalDeviceFeatures2); VULKAN_FUNCTION(GetPhysicalDeviceQueueFamilyProperties); VULKAN_FUNCTION(EnumerateDeviceExtensionProperties); + + // VK_KHR_surface VULKAN_FUNCTION(GetPhysicalDeviceSurfaceFormatsKHR); VULKAN_FUNCTION(GetPhysicalDeviceSurfaceSupportKHR); - VULKAN_FUNCTION(GetPhysicalDeviceSurfaceCapabilitiesKHR); + VULKAN_FUNCTION(GetPhysicalDeviceSurfaceCapabilities2KHR); VULKAN_FUNCTION(GetPhysicalDeviceSurfacePresentModesKHR); VULKAN_FUNCTION(DestroySurfaceKHR); @@ -43,6 +48,14 @@ struct DispatchTable { VULKAN_FUNCTION(CreateWaylandSurfaceKHR); #endif + // VK_EXT_debug_utils + VULKAN_FUNCTION(SetDebugUtilsObjectNameEXT); + VULKAN_FUNCTION(CmdBeginDebugUtilsLabelEXT); + VULKAN_FUNCTION(CmdEndDebugUtilsLabelEXT); + + //==================================================================== + // Device + //==================================================================== VULKAN_FUNCTION(CreateBuffer); VULKAN_FUNCTION(CreateImage); VULKAN_FUNCTION(CreateBufferView); @@ -58,8 +71,6 @@ struct DispatchTable { VULKAN_FUNCTION(CreateShaderModule); VULKAN_FUNCTION(CreateGraphicsPipelines); VULKAN_FUNCTION(CreateComputePipelines); - VULKAN_FUNCTION(CreateSwapchainKHR); - VULKAN_FUNCTION(DestroyBuffer); VULKAN_FUNCTION(DestroyImage); VULKAN_FUNCTION(DestroyBufferView); @@ -74,26 +85,14 @@ struct DispatchTable { VULKAN_FUNCTION(DestroyDescriptorSetLayout); VULKAN_FUNCTION(DestroyShaderModule); VULKAN_FUNCTION(DestroyPipeline); - VULKAN_FUNCTION(DestroySwapchainKHR); - VULKAN_FUNCTION(AllocateMemory); VULKAN_FUNCTION(MapMemory); VULKAN_FUNCTION(UnmapMemory); VULKAN_FUNCTION(FreeMemory); - VULKAN_FUNCTION(BindBufferMemory2); - VULKAN_FUNCTION(BindImageMemory2); - - VULKAN_FUNCTION(GetBufferMemoryRequirements2); - VULKAN_FUNCTION(GetImageMemoryRequirements2); - VULKAN_FUNCTION(QueueWaitIdle); - VULKAN_FUNCTION(AcquireNextImageKHR); - VULKAN_FUNCTION(QueueSubmit); - VULKAN_FUNCTION(QueuePresentKHR); - + VULKAN_FUNCTION(QueueSubmit2); VULKAN_FUNCTION(GetSemaphoreCounterValue); VULKAN_FUNCTION(WaitSemaphores); - VULKAN_FUNCTION(ResetCommandPool); VULKAN_FUNCTION(ResetDescriptorPool); VULKAN_FUNCTION(AllocateCommandBuffers); @@ -101,18 +100,19 @@ struct DispatchTable { VULKAN_FUNCTION(FreeCommandBuffers); VULKAN_FUNCTION(FreeDescriptorSets); VULKAN_FUNCTION(UpdateDescriptorSets); - + VULKAN_FUNCTION(BindBufferMemory2); + VULKAN_FUNCTION(BindImageMemory2); + VULKAN_FUNCTION(GetBufferMemoryRequirements2); + VULKAN_FUNCTION(GetImageMemoryRequirements2); + VULKAN_FUNCTION(GetBufferDeviceAddress); VULKAN_FUNCTION(BeginCommandBuffer); VULKAN_FUNCTION(CmdSetViewport); VULKAN_FUNCTION(CmdSetScissor); VULKAN_FUNCTION(CmdSetDepthBounds); VULKAN_FUNCTION(CmdSetStencilReference); - VULKAN_FUNCTION(CmdSetSampleLocationsEXT); VULKAN_FUNCTION(CmdSetBlendConstants); VULKAN_FUNCTION(CmdClearAttachments); VULKAN_FUNCTION(CmdClearColorImage); - VULKAN_FUNCTION(CmdBeginRendering); - VULKAN_FUNCTION(CmdEndRendering); VULKAN_FUNCTION(CmdBindVertexBuffers); VULKAN_FUNCTION(CmdBindIndexBuffer); VULKAN_FUNCTION(CmdBindPipeline); @@ -135,29 +135,47 @@ struct DispatchTable { VULKAN_FUNCTION(CmdCopyQueryPoolResults); VULKAN_FUNCTION(CmdResetQueryPool); VULKAN_FUNCTION(CmdFillBuffer); + VULKAN_FUNCTION(CmdBeginRendering); + VULKAN_FUNCTION(CmdEndRendering); VULKAN_FUNCTION(EndCommandBuffer); + // VK_KHR_swapchain + VULKAN_FUNCTION(AcquireNextImageKHR); + VULKAN_FUNCTION(QueuePresentKHR); + VULKAN_FUNCTION(CreateSwapchainKHR); + VULKAN_FUNCTION(DestroySwapchainKHR); VULKAN_FUNCTION(GetSwapchainImagesKHR); - VULKAN_FUNCTION(SetDebugUtilsObjectNameEXT); - VULKAN_FUNCTION(CmdBeginDebugUtilsLabelEXT); - VULKAN_FUNCTION(CmdEndDebugUtilsLabelEXT); + // VK_KHR_present_wait + VULKAN_FUNCTION(WaitForPresentKHR); + // VK_KHR_acceleration_structure VULKAN_FUNCTION(CreateAccelerationStructureKHR); - VULKAN_FUNCTION(CreateRayTracingPipelinesKHR); VULKAN_FUNCTION(DestroyAccelerationStructureKHR); VULKAN_FUNCTION(GetAccelerationStructureDeviceAddressKHR); VULKAN_FUNCTION(GetAccelerationStructureBuildSizesKHR); - VULKAN_FUNCTION(GetRayTracingShaderGroupHandlesKHR); VULKAN_FUNCTION(CmdBuildAccelerationStructuresKHR); VULKAN_FUNCTION(CmdCopyAccelerationStructureKHR); VULKAN_FUNCTION(CmdWriteAccelerationStructuresPropertiesKHR); + + // VK_KHR_ray_tracing_pipeline + VULKAN_FUNCTION(CreateRayTracingPipelinesKHR); + VULKAN_FUNCTION(GetRayTracingShaderGroupHandlesKHR); VULKAN_FUNCTION(CmdTraceRaysKHR); VULKAN_FUNCTION(CmdTraceRaysIndirect2KHR); - VULKAN_FUNCTION(GetBufferDeviceAddress); + // VK_EXT_sample_locations + VULKAN_FUNCTION(CmdSetSampleLocationsEXT); + + // VK_EXT_mesh_shader VULKAN_FUNCTION(CmdDrawMeshTasksEXT); VULKAN_FUNCTION(CmdDrawMeshTasksIndirectEXT); + + // VK_NV_low_latency2 + VULKAN_FUNCTION(GetLatencyTimingsNV); + VULKAN_FUNCTION(LatencySleepNV); + VULKAN_FUNCTION(SetLatencyMarkerNV); + VULKAN_FUNCTION(SetLatencySleepModeNV); }; #undef VULKAN_FUNCTION diff --git a/Source/VK/FenceVK.cpp b/Source/VK/FenceVK.cpp index b1fc92d1..4a9dc52c 100644 --- a/Source/VK/FenceVK.cpp +++ b/Source/VK/FenceVK.cpp @@ -9,16 +9,20 @@ using namespace nri; FenceVK::~FenceVK() { const auto& vk = m_Device.GetDispatchTable(); - if (m_Fence != VK_NULL_HANDLE) - vk.DestroySemaphore(m_Device, m_Fence, m_Device.GetAllocationCallbacks()); + if (m_Handle != VK_NULL_HANDLE) + vk.DestroySemaphore(m_Device, m_Handle, m_Device.GetAllocationCallbacks()); } Result FenceVK::Create(uint64_t initialValue) { - VkSemaphoreTypeCreateInfo timelineCreateInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, VK_SEMAPHORE_TYPE_TIMELINE, initialValue}; - VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &timelineCreateInfo, 0}; + VkSemaphoreTypeCreateInfo semaphoreTypeCreateInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO}; + semaphoreTypeCreateInfo.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE; + semaphoreTypeCreateInfo.initialValue = initialValue; + + VkSemaphoreCreateInfo semaphoreCreateInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO}; + semaphoreCreateInfo.pNext = &semaphoreTypeCreateInfo; const auto& vk = m_Device.GetDispatchTable(); - VkResult result = vk.CreateSemaphore((VkDevice)m_Device, &createInfo, m_Device.GetAllocationCallbacks(), &m_Fence); + VkResult result = vk.CreateSemaphore((VkDevice)m_Device, &semaphoreCreateInfo, m_Device.GetAllocationCallbacks(), &m_Handle); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vk.CreateSemaphore returned %d", (int32_t)result); return Result::SUCCESS; @@ -29,44 +33,26 @@ Result FenceVK::Create(uint64_t initialValue) { //================================================================================================================ inline void FenceVK::SetDebugName(const char* name) { - m_Device.SetDebugNameToTrivialObject(VK_OBJECT_TYPE_SEMAPHORE, (uint64_t)m_Fence, name); + m_Device.SetDebugNameToTrivialObject(VK_OBJECT_TYPE_SEMAPHORE, (uint64_t)m_Handle, name); } inline uint64_t FenceVK::GetFenceValue() const { uint64_t value = 0; const auto& vk = m_Device.GetDispatchTable(); - vk.GetSemaphoreCounterValue((VkDevice)m_Device, m_Fence, &value); + vk.GetSemaphoreCounterValue((VkDevice)m_Device, m_Handle, &value); return value; } -inline void FenceVK::QueueSignal(CommandQueueVK& commandQueue, uint64_t value) { - ExclusiveScope lock(commandQueue.GetLock()); - - VkTimelineSemaphoreSubmitInfo timelineInfo = {VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, nullptr, 0, nullptr, 1, &value}; - VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO, &timelineInfo, 0, nullptr, nullptr, 0, nullptr, 1, &m_Fence}; - - const auto& vk = m_Device.GetDispatchTable(); - vk.QueueSubmit((VkQueue)commandQueue, 1, &submitInfo, VK_NULL_HANDLE); -} - -inline void FenceVK::QueueWait(CommandQueueVK& commandQueue, uint64_t value) { - ExclusiveScope lock(commandQueue.GetLock()); - - VkPipelineStageFlags waitDstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; // TODO: matches D3D? - VkTimelineSemaphoreSubmitInfo timelineInfo = {VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, nullptr, 1, &value, 0, nullptr}; - VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO, &timelineInfo, 1, &m_Fence, &waitDstStageMask, 0, nullptr, 0, nullptr}; - - const auto& vk = m_Device.GetDispatchTable(); - vk.QueueSubmit((VkQueue)commandQueue, 1, &submitInfo, VK_NULL_HANDLE); -} - -inline void FenceVK::Wait(uint64_t value) { - VkSemaphoreWaitInfo waitInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, nullptr, 0, 1, &m_Fence, &value}; +void FenceVK::Wait(uint64_t value) { // TODO: not inline + VkSemaphoreWaitInfo semaphoreWaitInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO}; + semaphoreWaitInfo.semaphoreCount = 1; + semaphoreWaitInfo.pSemaphores = &m_Handle; + semaphoreWaitInfo.pValues = &value; const auto& vk = m_Device.GetDispatchTable(); - vk.WaitSemaphores((VkDevice)m_Device, &waitInfo, VK_DEFAULT_TIMEOUT); + vk.WaitSemaphores((VkDevice)m_Device, &semaphoreWaitInfo, MsToUs(TIMEOUT_FENCE)); } #include "FenceVK.hpp" diff --git a/Source/VK/FenceVK.h b/Source/VK/FenceVK.h index f1edebfc..5ba12a8b 100644 --- a/Source/VK/FenceVK.h +++ b/Source/VK/FenceVK.h @@ -11,6 +11,10 @@ struct FenceVK { inline FenceVK(DeviceVK& device) : m_Device(device) { } + inline operator VkSemaphore() const { + return m_Handle; + } + inline DeviceVK& GetDevice() const { return m_Device; } @@ -25,13 +29,11 @@ struct FenceVK { void SetDebugName(const char* name); uint64_t GetFenceValue() const; - void QueueSignal(CommandQueueVK& commandQueue, uint64_t value); - void QueueWait(CommandQueueVK& commandQueue, uint64_t value); void Wait(uint64_t value); - private: +private: DeviceVK& m_Device; - VkSemaphore m_Fence = VK_NULL_HANDLE; + VkSemaphore m_Handle = VK_NULL_HANDLE; }; } // namespace nri diff --git a/Source/VK/FenceVK.hpp b/Source/VK/FenceVK.hpp index a4ea7b24..b5f72c3f 100644 --- a/Source/VK/FenceVK.hpp +++ b/Source/VK/FenceVK.hpp @@ -6,14 +6,6 @@ static uint64_t NRI_CALL GetFenceValue(Fence& fence) { return ((FenceVK&)fence).GetFenceValue(); } -static void NRI_CALL QueueSignal(CommandQueue& commandQueue, Fence& fence, uint64_t value) { - return ((FenceVK&)fence).QueueSignal((CommandQueueVK&)commandQueue, value); -} - -static void NRI_CALL QueueWait(CommandQueue& commandQueue, Fence& fence, uint64_t value) { - return ((FenceVK&)fence).QueueWait((CommandQueueVK&)commandQueue, value); -} - static void NRI_CALL Wait(Fence& fence, uint64_t value) { ((FenceVK&)fence).Wait(value); } @@ -24,4 +16,4 @@ static void NRI_CALL SetFenceDebugName(Fence& fence, const char* name) { #pragma endregion -Define_Core_Fence_PartiallyFillFunctionTable(VK) +Define_Core_Fence_PartiallyFillFunctionTable(VK); diff --git a/Source/VK/MemoryVK.h b/Source/VK/MemoryVK.h index 925c993c..f422cd8f 100644 --- a/Source/VK/MemoryVK.h +++ b/Source/VK/MemoryVK.h @@ -43,7 +43,7 @@ struct MemoryVK { void SetDebugName(const char* name); - private: +private: DeviceVK& m_Device; VkDeviceMemory m_Handle = VK_NULL_HANDLE; uint8_t* m_MappedMemory = nullptr; diff --git a/Source/VK/PipelineLayoutVK.cpp b/Source/VK/PipelineLayoutVK.cpp index 6f2a2de4..a1589831 100644 --- a/Source/VK/PipelineLayoutVK.cpp +++ b/Source/VK/PipelineLayoutVK.cpp @@ -7,6 +7,72 @@ using namespace nri; +static void FillDescriptorBindings( + const DescriptorSetDesc& descriptorSetDesc, const uint32_t* bindingOffsets, VkDescriptorSetLayoutBinding*& bindings, VkDescriptorBindingFlags*& bindingFlags) { + const VkDescriptorBindingFlags commonBindingFlags = descriptorSetDesc.partiallyBound ? VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT : 0; + constexpr VkDescriptorBindingFlags variableSizedArrayFlags = VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT | VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT; + + for (uint32_t i = 0; i < descriptorSetDesc.rangeNum; i++) { + const DescriptorRangeDesc& range = descriptorSetDesc.ranges[i]; + + const uint32_t baseBindingIndex = range.baseRegisterIndex + bindingOffsets[(uint32_t)range.descriptorType]; + + if (range.isArray) { + *(bindingFlags++) = commonBindingFlags | (range.isDescriptorNumVariable ? variableSizedArrayFlags : 0); + + VkDescriptorSetLayoutBinding& descriptorBinding = *(bindings++); + descriptorBinding = {}; + descriptorBinding.binding = baseBindingIndex; + descriptorBinding.descriptorType = GetDescriptorType(range.descriptorType); + descriptorBinding.descriptorCount = range.descriptorNum; + descriptorBinding.stageFlags = GetShaderStageFlags(range.shaderStages); + } else { + for (uint32_t j = 0; j < range.descriptorNum; j++) { + *(bindingFlags++) = commonBindingFlags; + + VkDescriptorSetLayoutBinding& descriptorBinding = *(bindings++); + descriptorBinding = {}; + descriptorBinding.binding = baseBindingIndex + j; + descriptorBinding.descriptorType = GetDescriptorType(range.descriptorType); + descriptorBinding.descriptorCount = 1; + descriptorBinding.stageFlags = GetShaderStageFlags(range.shaderStages); + } + } + } +} + +static void FillDynamicConstantBufferBindings( + const DescriptorSetDesc& descriptorSetDesc, const uint32_t* bindingOffsets, VkDescriptorSetLayoutBinding*& bindings, VkDescriptorBindingFlags*& bindingFlags) { + for (uint32_t i = 0; i < descriptorSetDesc.dynamicConstantBufferNum; i++) { + const DynamicConstantBufferDesc& buffer = descriptorSetDesc.dynamicConstantBuffers[i]; + + *(bindingFlags++) = 0; + + VkDescriptorSetLayoutBinding& descriptorBinding = *(bindings++); + descriptorBinding = {}; + descriptorBinding.binding = buffer.registerIndex + bindingOffsets[(uint32_t)DescriptorType::CONSTANT_BUFFER]; + descriptorBinding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + descriptorBinding.descriptorCount = 1; + descriptorBinding.stageFlags = GetShaderStageFlags(buffer.shaderStages); + } +} + +static void FillPushConstantRanges(const PipelineLayoutDesc& pipelineLayoutDesc, VkPushConstantRange* pushConstantRanges) { + uint32_t offset = 0; + + for (uint32_t i = 0; i < pipelineLayoutDesc.pushConstantNum; i++) { + const PushConstantDesc& pushConstantDesc = pipelineLayoutDesc.pushConstants[i]; + + VkPushConstantRange& range = pushConstantRanges[i]; + range = {}; + range.stageFlags = GetShaderStageFlags(pushConstantDesc.shaderStages); + range.offset = offset; + range.size = pushConstantDesc.size; + + offset += pushConstantDesc.size; + } +} + PipelineLayoutVK::~PipelineLayoutVK() { const auto& vk = m_Device.GetDispatchTable(); const auto allocationCallbacks = m_Device.GetAllocationCallbacks(); @@ -115,20 +181,23 @@ VkDescriptorSetLayout PipelineLayoutVK::CreateSetLayout(const DescriptorSetDesc& } VkDescriptorSetLayoutBinding* bindings = ALLOCATE_SCRATCH(m_Device, VkDescriptorSetLayoutBinding, bindingMaxNum); - VkDescriptorBindingFlagsEXT* bindingFlags = ALLOCATE_SCRATCH(m_Device, VkDescriptorBindingFlagsEXT, bindingMaxNum); + VkDescriptorBindingFlags* bindingFlags = ALLOCATE_SCRATCH(m_Device, VkDescriptorBindingFlags, bindingMaxNum); VkDescriptorSetLayoutBinding* bindingsBegin = bindings; - VkDescriptorBindingFlagsEXT* bindingFlagsBegin = bindingFlags; + VkDescriptorBindingFlags* bindingFlagsBegin = bindingFlags; FillDescriptorBindings(descriptorSetDesc, bindingOffsets, bindings, bindingFlags); FillDynamicConstantBufferBindings(descriptorSetDesc, bindingOffsets, bindings, bindingFlags); const uint32_t bindingNum = uint32_t(bindings - bindingsBegin); - VkDescriptorSetLayoutBindingFlagsCreateInfoEXT bindingFlagsInfo = { - VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT, nullptr, bindingNum, bindingFlagsBegin}; + VkDescriptorSetLayoutBindingFlagsCreateInfo bindingFlagsInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO}; + bindingFlagsInfo.bindingCount = bindingNum; + bindingFlagsInfo.pBindingFlags = bindingFlagsBegin; - VkDescriptorSetLayoutCreateInfo info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, m_Device.m_IsDescriptorIndexingSupported ? &bindingFlagsInfo : nullptr, - (VkDescriptorSetLayoutCreateFlags)0, bindingNum, bindingsBegin}; + VkDescriptorSetLayoutCreateInfo info = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO}; + info.pNext = m_Device.m_IsDescriptorIndexingSupported ? &bindingFlagsInfo : nullptr; + info.bindingCount = bindingNum; + info.pBindings = bindingsBegin; VkDescriptorSetLayout handle = VK_NULL_HANDLE; const auto& vk = m_Device.GetDispatchTable(); @@ -143,73 +212,6 @@ VkDescriptorSetLayout PipelineLayoutVK::CreateSetLayout(const DescriptorSetDesc& return handle; } -void PipelineLayoutVK::FillDescriptorBindings( - const DescriptorSetDesc& descriptorSetDesc, const uint32_t* bindingOffsets, VkDescriptorSetLayoutBinding*& bindings, VkDescriptorBindingFlagsEXT*& bindingFlags) const { - const VkDescriptorBindingFlagsEXT commonBindingFlags = descriptorSetDesc.partiallyBound ? VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT : 0; - - constexpr VkDescriptorBindingFlagsEXT variableSizedArrayFlags = VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT | VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT; - - for (uint32_t i = 0; i < descriptorSetDesc.rangeNum; i++) { - const DescriptorRangeDesc& range = descriptorSetDesc.ranges[i]; - - const uint32_t baseBindingIndex = range.baseRegisterIndex + bindingOffsets[(uint32_t)range.descriptorType]; - - if (range.isArray) { - *(bindingFlags++) = commonBindingFlags | (range.isDescriptorNumVariable ? variableSizedArrayFlags : 0); - - VkDescriptorSetLayoutBinding& descriptorBinding = *(bindings++); - descriptorBinding = {}; - descriptorBinding.binding = baseBindingIndex; - descriptorBinding.descriptorType = GetDescriptorType(range.descriptorType); - descriptorBinding.descriptorCount = range.descriptorNum; - descriptorBinding.stageFlags = GetShaderStageFlags(range.shaderStages); - } else { - for (uint32_t j = 0; j < range.descriptorNum; j++) { - *(bindingFlags++) = commonBindingFlags; - - VkDescriptorSetLayoutBinding& descriptorBinding = *(bindings++); - descriptorBinding = {}; - descriptorBinding.binding = baseBindingIndex + j; - descriptorBinding.descriptorType = GetDescriptorType(range.descriptorType); - descriptorBinding.descriptorCount = 1; - descriptorBinding.stageFlags = GetShaderStageFlags(range.shaderStages); - } - } - } -} - -void PipelineLayoutVK::FillDynamicConstantBufferBindings( - const DescriptorSetDesc& descriptorSetDesc, const uint32_t* bindingOffsets, VkDescriptorSetLayoutBinding*& bindings, VkDescriptorBindingFlagsEXT*& bindingFlags) const { - for (uint32_t i = 0; i < descriptorSetDesc.dynamicConstantBufferNum; i++) { - const DynamicConstantBufferDesc& buffer = descriptorSetDesc.dynamicConstantBuffers[i]; - - *(bindingFlags++) = 0; - - VkDescriptorSetLayoutBinding& descriptorBinding = *(bindings++); - descriptorBinding = {}; - descriptorBinding.binding = buffer.registerIndex + bindingOffsets[(uint32_t)DescriptorType::CONSTANT_BUFFER]; - descriptorBinding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; - descriptorBinding.descriptorCount = 1; - descriptorBinding.stageFlags = GetShaderStageFlags(buffer.shaderStages); - } -} - -void PipelineLayoutVK::FillPushConstantRanges(const PipelineLayoutDesc& pipelineLayoutDesc, VkPushConstantRange* pushConstantRanges) const { - uint32_t offset = 0; - - for (uint32_t i = 0; i < pipelineLayoutDesc.pushConstantNum; i++) { - const PushConstantDesc& pushConstantDesc = pipelineLayoutDesc.pushConstants[i]; - - VkPushConstantRange& range = pushConstantRanges[i]; - range = {}; - range.stageFlags = GetShaderStageFlags(pushConstantDesc.shaderStages); - range.offset = offset; - range.size = pushConstantDesc.size; - - offset += pushConstantDesc.size; - } -} - void PipelineLayoutVK::FillRuntimeBindingInfo(const PipelineLayoutDesc& pipelineLayoutDesc, const uint32_t* bindingOffsets) { RuntimeBindingInfo& destination = m_RuntimeBindingInfo; const PipelineLayoutDesc& source = pipelineLayoutDesc; @@ -267,13 +269,13 @@ void PipelineLayoutVK::FillRuntimeBindingInfo(const PipelineLayoutDesc& pipeline } } -RuntimeBindingInfo::RuntimeBindingInfo(StdAllocator& allocator) - : hasVariableDescriptorNum(allocator), - descriptorSetRangeDescs(allocator), - dynamicConstantBufferDescs(allocator), - descriptorSetDescs(allocator), - pushConstantDescs(allocator), - pushConstantBindings(allocator) { +RuntimeBindingInfo::RuntimeBindingInfo(StdAllocator& allocator) : + hasVariableDescriptorNum(allocator), + descriptorSetRangeDescs(allocator), + dynamicConstantBufferDescs(allocator), + descriptorSetDescs(allocator), + pushConstantDescs(allocator), + pushConstantBindings(allocator) { } //================================================================================================================ diff --git a/Source/VK/PipelineLayoutVK.h b/Source/VK/PipelineLayoutVK.h index 949fe155..36ddfc2e 100644 --- a/Source/VK/PipelineLayoutVK.h +++ b/Source/VK/PipelineLayoutVK.h @@ -24,8 +24,8 @@ struct RuntimeBindingInfo { }; struct PipelineLayoutVK { - inline PipelineLayoutVK(DeviceVK& device) - : m_Device(device), m_RuntimeBindingInfo(device.GetStdAllocator()), m_DescriptorSetLayouts(device.GetStdAllocator()), m_DescriptorSetSpaces(device.GetStdAllocator()) { + inline PipelineLayoutVK(DeviceVK& device) : + m_Device(device), m_RuntimeBindingInfo(device.GetStdAllocator()), m_DescriptorSetLayouts(device.GetStdAllocator()), m_DescriptorSetSpaces(device.GetStdAllocator()) { } inline operator VkPipelineLayout() const { @@ -62,19 +62,12 @@ struct PipelineLayoutVK { void SetDebugName(const char* name); - private: +private: void FillBindingOffsets(bool ignoreGlobalSPIRVOffsets, uint32_t* bindingOffsets); - VkDescriptorSetLayout CreateSetLayout(const DescriptorSetDesc& descriptorSetDesc, const uint32_t* bindingOffsets); - - void FillDescriptorBindings( - const DescriptorSetDesc& descriptorSetDesc, const uint32_t* bindingOffsets, VkDescriptorSetLayoutBinding*& bindings, VkDescriptorBindingFlagsEXT*& bindingFlags) const; - void FillDynamicConstantBufferBindings( - const DescriptorSetDesc& descriptorSetDesc, const uint32_t* bindingOffsets, VkDescriptorSetLayoutBinding*& bindings, VkDescriptorBindingFlagsEXT*& bindingFlags) const; - - void FillPushConstantRanges(const PipelineLayoutDesc& pipelineLayoutDesc, VkPushConstantRange* pushConstantRanges) const; void FillRuntimeBindingInfo(const PipelineLayoutDesc& pipelineLayoutDesc, const uint32_t* bindingOffsets); + VkDescriptorSetLayout CreateSetLayout(const DescriptorSetDesc& descriptorSetDesc, const uint32_t* bindingOffsets); - private: +private: DeviceVK& m_Device; VkPipelineLayout m_Handle = VK_NULL_HANDLE; VkPipelineBindPoint m_PipelineBindPoint = VK_PIPELINE_BIND_POINT_MAX_ENUM; diff --git a/Source/VK/PipelineVK.h b/Source/VK/PipelineVK.h index 0182e8d5..49b3794b 100644 --- a/Source/VK/PipelineVK.h +++ b/Source/VK/PipelineVK.h @@ -38,10 +38,10 @@ struct PipelineVK { void SetDebugName(const char* name); Result WriteShaderGroupIdentifiers(uint32_t baseShaderGroupIndex, uint32_t shaderGroupNum, void* buffer) const; - private: +private: Result SetupShaderStage(VkPipelineShaderStageCreateInfo& stage, const ShaderDesc& shaderDesc, VkShaderModule*& modules); - private: +private: DeviceVK& m_Device; VkPipeline m_Handle = VK_NULL_HANDLE; VkPipelineBindPoint m_BindPoint = (VkPipelineBindPoint)0; diff --git a/Source/VK/QueryPoolVK.cpp b/Source/VK/QueryPoolVK.cpp index d15a54a4..6a7fc75e 100644 --- a/Source/VK/QueryPoolVK.cpp +++ b/Source/VK/QueryPoolVK.cpp @@ -16,7 +16,7 @@ QueryPoolVK::~QueryPoolVK() { Result QueryPoolVK::Create(const QueryPoolDesc& queryPoolDesc) { m_OwnsNativeObjects = true; - if (queryPoolDesc.queryType == QueryType::TIMESTAMP) + if (queryPoolDesc.queryType == QueryType::TIMESTAMP || queryPoolDesc.queryType == QueryType::TIMESTAMP_COPY_QUEUE) m_Type = VK_QUERY_TYPE_TIMESTAMP; else if (queryPoolDesc.queryType == QueryType::OCCLUSION) m_Type = VK_QUERY_TYPE_OCCLUSION; diff --git a/Source/VK/QueryPoolVK.h b/Source/VK/QueryPoolVK.h index 281d438e..ef917cb2 100644 --- a/Source/VK/QueryPoolVK.h +++ b/Source/VK/QueryPoolVK.h @@ -37,7 +37,7 @@ struct QueryPoolVK { return m_QuerySize; } - private: +private: DeviceVK& m_Device; VkQueryPool m_Handle = VK_NULL_HANDLE; VkQueryType m_Type = (VkQueryType)0; diff --git a/Source/VK/QueryPoolVK.hpp b/Source/VK/QueryPoolVK.hpp index cedce1a0..c003533c 100644 --- a/Source/VK/QueryPoolVK.hpp +++ b/Source/VK/QueryPoolVK.hpp @@ -12,4 +12,4 @@ static uint32_t NRI_CALL GetQuerySize(const QueryPool& queryPool) { #pragma endregion -Define_Core_QueryPool_PartiallyFillFunctionTable(VK) +Define_Core_QueryPool_PartiallyFillFunctionTable(VK); diff --git a/Source/VK/SwapChainVK.cpp b/Source/VK/SwapChainVK.cpp index d8614b4e..6e5701ac 100644 --- a/Source/VK/SwapChainVK.cpp +++ b/Source/VK/SwapChainVK.cpp @@ -3,6 +3,7 @@ #include "SharedVK.h" #include "CommandQueueVK.h" +#include "FenceVK.h" #include "SwapChainVK.h" #include "TextureVK.h" @@ -36,7 +37,8 @@ SwapChainVK::~SwapChainVK() { void SwapChainVK::Destroy() { for (size_t i = 0; i < m_Textures.size(); i++) Deallocate(m_Device.GetStdAllocator(), m_Textures[i]); - m_Textures.clear(); + + Deallocate(m_Device.GetStdAllocator(), m_LatencyFence); const auto& vk = m_Device.GetDispatchTable(); if (m_Handle) @@ -45,13 +47,41 @@ void SwapChainVK::Destroy() { if (m_Surface) vk.DestroySurfaceKHR(m_Device, m_Surface, m_Device.GetAllocationCallbacks()); - for (VkSemaphore& semaphore : m_Semaphores) - vk.DestroySemaphore(m_Device, semaphore, m_Device.GetAllocationCallbacks()); + for (VkSemaphore semaphore : m_ImageAcquiredSemaphores) { + if (semaphore) + vk.DestroySemaphore(m_Device, semaphore, m_Device.GetAllocationCallbacks()); + } + + for (VkSemaphore semaphore : m_RenderingFinishedSemaphores) { + if (semaphore) + vk.DestroySemaphore(m_Device, semaphore, m_Device.GetAllocationCallbacks()); + } } -Result SwapChainVK::CreateSurface(const SwapChainDesc& swapChainDesc) { +Result SwapChainVK::Create(const SwapChainDesc& swapChainDesc) { const auto& vk = m_Device.GetDispatchTable(); + m_CommandQueue = (CommandQueueVK*)swapChainDesc.commandQueue; + uint32_t familyIndex = m_CommandQueue->GetFamilyIndex(); + + // Create semaphores + for (VkSemaphore& semaphore : m_ImageAcquiredSemaphores) { + VkSemaphoreTypeCreateInfo timelineCreateInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, VK_SEMAPHORE_TYPE_BINARY, 0}; + VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &timelineCreateInfo, 0}; + + VkResult result = vk.CreateSemaphore((VkDevice)m_Device, &createInfo, m_Device.GetAllocationCallbacks(), &semaphore); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateSemaphore returned %d", (int32_t)result); + } + + for (VkSemaphore& semaphore : m_RenderingFinishedSemaphores) { + VkSemaphoreTypeCreateInfo timelineCreateInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, VK_SEMAPHORE_TYPE_BINARY, 0}; + VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &timelineCreateInfo, 0}; + + VkResult result = vk.CreateSemaphore((VkDevice)m_Device, &createInfo, m_Device.GetAllocationCallbacks(), &semaphore); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateSemaphore returned %d", (int32_t)result); + } + + // Create surface #ifdef VK_USE_PLATFORM_WIN32_KHR if (swapChainDesc.window.windows.hwnd) { VkWin32SurfaceCreateInfoKHR win32SurfaceInfo = {VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR}; @@ -59,8 +89,6 @@ Result SwapChainVK::CreateSurface(const SwapChainDesc& swapChainDesc) { VkResult result = vk.CreateWin32SurfaceKHR(m_Device, &win32SurfaceInfo, m_Device.GetAllocationCallbacks(), &m_Surface); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateWin32SurfaceKHR returned %d", (int32_t)result); - - return Result::SUCCESS; } #endif #ifdef VK_USE_PLATFORM_METAL_EXT @@ -70,8 +98,6 @@ Result SwapChainVK::CreateSurface(const SwapChainDesc& swapChainDesc) { VkResult result = vk.CreateMetalSurfaceEXT(m_Device, &metalSurfaceCreateInfo, m_Device.GetAllocationCallbacks(), &m_Surface); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateMetalSurfaceEXT returned %d", (int32_t)result); - - return Result::SUCCESS; } #endif #ifdef VK_USE_PLATFORM_XLIB_KHR @@ -82,8 +108,6 @@ Result SwapChainVK::CreateSurface(const SwapChainDesc& swapChainDesc) { VkResult result = vk.CreateXlibSurfaceKHR(m_Device, &xlibSurfaceInfo, m_Device.GetAllocationCallbacks(), &m_Surface); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateXlibSurfaceKHR returned %d", (int32_t)result); - - return Result::SUCCESS; } #endif #ifdef VK_USE_PLATFORM_WAYLAND_KHR @@ -94,176 +118,203 @@ Result SwapChainVK::CreateSurface(const SwapChainDesc& swapChainDesc) { VkResult result = vk.CreateWaylandSurfaceKHR(m_Device, &waylandSurfaceInfo, m_Device.GetAllocationCallbacks(), &m_Surface); RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateWaylandSurfaceKHR returned %d", (int32_t)result); - - return Result::SUCCESS; } #endif - return Result::INVALID_ARGUMENT; -} + // Surface caps + uint32_t textureNum = swapChainDesc.textureNum; + { + VkBool32 supported = VK_FALSE; + VkResult result = vk.GetPhysicalDeviceSurfaceSupportKHR(m_Device, familyIndex, m_Surface, &supported); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS && supported, GetReturnCode(result), "Surface is not supported"); -Result SwapChainVK::Create(const SwapChainDesc& swapChainDesc) { - const auto& vk = m_Device.GetDispatchTable(); + VkPhysicalDeviceSurfaceInfo2KHR surfaceInfo = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SURFACE_INFO_2_KHR}; + surfaceInfo.surface = m_Surface; - for (VkSemaphore& semaphore : m_Semaphores) { - VkSemaphoreTypeCreateInfo timelineCreateInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, VK_SEMAPHORE_TYPE_BINARY, 0}; - VkSemaphoreCreateInfo createInfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &timelineCreateInfo, 0}; - - VkResult result = vk.CreateSemaphore((VkDevice)m_Device, &createInfo, m_Device.GetAllocationCallbacks(), &semaphore); - RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateSemaphore returned %d", (int32_t)result); - } + VkSurfaceCapabilities2KHR sc = {VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_KHR}; - m_CommandQueue = (CommandQueueVK*)swapChainDesc.commandQueue; + VkLatencySurfaceCapabilitiesNV latencySurfaceCapabilities = {VK_STRUCTURE_TYPE_LATENCY_SURFACE_CAPABILITIES_NV}; + latencySurfaceCapabilities.presentModeCount = 8; + latencySurfaceCapabilities.pPresentModes = STACK_ALLOC(VkPresentModeKHR, latencySurfaceCapabilities.presentModeCount); - const Result nriResult = CreateSurface(swapChainDesc); - if (nriResult != Result::SUCCESS) - return nriResult; + if (m_Device.m_IsLowLatencySupported) + sc.pNext = &latencySurfaceCapabilities; - VkBool32 supported = VK_FALSE; - vk.GetPhysicalDeviceSurfaceSupportKHR(m_Device, m_CommandQueue->GetFamilyIndex(), m_Surface, &supported); + result = vk.GetPhysicalDeviceSurfaceCapabilities2KHR(m_Device, &surfaceInfo, &sc); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfaceCapabilitiesKHR returned %d", (int32_t)result); - if (supported == VK_FALSE) { - REPORT_ERROR(&m_Device, "The specified surface is not supported by the physical device"); - return Result::UNSUPPORTED; - } + bool isWidthValid = swapChainDesc.width >= sc.surfaceCapabilities.minImageExtent.width && swapChainDesc.width <= sc.surfaceCapabilities.maxImageExtent.width; + RETURN_ON_FAILURE(&m_Device, isWidthValid, Result::INVALID_ARGUMENT, "swapChainDesc.width is out of [%u, %u] range", sc.surfaceCapabilities.minImageExtent.width, + sc.surfaceCapabilities.maxImageExtent.width); - VkSurfaceCapabilitiesKHR capabilites = {}; - VkResult result = vk.GetPhysicalDeviceSurfaceCapabilitiesKHR(m_Device, m_Surface, &capabilites); - RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfaceCapabilitiesKHR returned %d", (int32_t)result); + bool isHeightValid = swapChainDesc.height >= sc.surfaceCapabilities.minImageExtent.height && swapChainDesc.height <= sc.surfaceCapabilities.maxImageExtent.height; + RETURN_ON_FAILURE(&m_Device, isHeightValid, Result::INVALID_ARGUMENT, "swapChainDesc.height is out of [%u, %u] range", sc.surfaceCapabilities.minImageExtent.height, + sc.surfaceCapabilities.maxImageExtent.height); - const bool isWidthValid = swapChainDesc.width >= capabilites.minImageExtent.width && swapChainDesc.width <= capabilites.maxImageExtent.width; - const bool isHeightValid = swapChainDesc.height >= capabilites.minImageExtent.height && swapChainDesc.height <= capabilites.maxImageExtent.height; + bool isTextureNumValid = textureNum >= sc.surfaceCapabilities.minImageCount && textureNum <= sc.surfaceCapabilities.maxImageCount; + RETURN_ON_FAILURE(&m_Device, isTextureNumValid, Result::INVALID_ARGUMENT, "swapChainDesc.height is out of [%u, %u] range", sc.surfaceCapabilities.minImageCount, + sc.surfaceCapabilities.maxImageCount); + } - if (!isWidthValid || !isHeightValid) { - REPORT_ERROR(&m_Device, "Invalid SwapChainVK buffer size"); - return Result::INVALID_ARGUMENT; + // Surface format + VkSurfaceFormatKHR surfaceFormat = {}; + { + uint32_t formatNum = 0; + VkResult result = vk.GetPhysicalDeviceSurfaceFormatsKHR(m_Device, m_Surface, &formatNum, nullptr); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfaceFormatsKHR returned %d", (int32_t)result); + + VkSurfaceFormatKHR* surfaceFormats = STACK_ALLOC(VkSurfaceFormatKHR, formatNum); + result = vk.GetPhysicalDeviceSurfaceFormatsKHR(m_Device, m_Surface, &formatNum, surfaceFormats); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfaceFormatsKHR returned %d", (int32_t)result); + + VkFormat format = g_swapChainFormat[(uint32_t)swapChainDesc.format]; + VkColorSpaceKHR colorSpace = g_colorSpace[(uint32_t)swapChainDesc.format]; + + uint32_t i = 0; + for (; i < formatNum; i++) { + if (surfaceFormats[i].format == format && surfaceFormats[i].colorSpace == colorSpace) { + surfaceFormat = surfaceFormats[i]; + break; + } + } + if (i == formatNum) { + REPORT_WARNING(&m_Device, "The requested format is not supported. Using 1st surface format from the list"); + surfaceFormat = surfaceFormats[0]; + } } - uint32_t formatNum = 0; - result = vk.GetPhysicalDeviceSurfaceFormatsKHR(m_Device, m_Surface, &formatNum, nullptr); - RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfaceFormatsKHR returned %d", (int32_t)result); + // Present mode + bool allowLowLatency = swapChainDesc.allowLowLatency && m_Device.m_IsLowLatencySupported; + VkPresentModeKHR presentMode = VK_PRESENT_MODE_IMMEDIATE_KHR; + { + uint32_t presentModeNum = 8; + VkPresentModeKHR* presentModes = STACK_ALLOC(VkPresentModeKHR, presentModeNum); + VkResult result = vk.GetPhysicalDeviceSurfacePresentModesKHR(m_Device, m_Surface, &presentModeNum, presentModes); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfacePresentModesKHR returned %d", (int32_t)result); + + VkPresentModeKHR vsyncOnModes[] = {VK_PRESENT_MODE_FIFO_RELAXED_KHR, VK_PRESENT_MODE_FIFO_KHR}; + VkPresentModeKHR vsyncOffModes[] = {VK_PRESENT_MODE_MAILBOX_KHR, VK_PRESENT_MODE_IMMEDIATE_KHR}; + const VkPresentModeKHR* modes = swapChainDesc.verticalSyncInterval ? vsyncOnModes : vsyncOffModes; + static_assert(GetCountOf(vsyncOnModes) == GetCountOf(vsyncOffModes)); + static_assert(GetCountOf(vsyncOnModes) == 2); + + if (allowLowLatency) + vsyncOffModes[0] = vsyncOffModes[1]; // dictated by "latencySurfaceCapabilities" + + uint32_t j = 0; + for (; j < 2; j++) { + uint32_t i = 0; + for (; i < presentModeNum; i++) { + if (modes[j] == presentModes[i]) { + presentMode = modes[j]; + break; + } + } + if (i != presentModeNum) + break; + REPORT_WARNING(&m_Device, "VkPresentModeKHR = %u is not supported...", modes[j]); + } + if (j == 2) + REPORT_WARNING(&m_Device, "No a suitable present mode found, switching to VK_PRESENT_MODE_IMMEDIATE_KHR..."); + } - VkSurfaceFormatKHR* surfaceFormats = STACK_ALLOC(VkSurfaceFormatKHR, formatNum); - result = vk.GetPhysicalDeviceSurfaceFormatsKHR(m_Device, m_Surface, &formatNum, surfaceFormats); - RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfaceFormatsKHR returned %d", (int32_t)result); + { // Swap chain + VkSwapchainCreateInfoKHR swapchainInfo = {VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR}; + swapchainInfo.flags = m_Device.m_IsSwapChainMutableFormatSupported ? VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR : (VkSwapchainCreateFlagsKHR)0; + swapchainInfo.surface = m_Surface; + swapchainInfo.minImageCount = textureNum; + swapchainInfo.imageFormat = surfaceFormat.format; + swapchainInfo.imageColorSpace = surfaceFormat.colorSpace; + swapchainInfo.imageExtent = {swapChainDesc.width, swapChainDesc.height}; + swapchainInfo.imageArrayLayers = 1; + swapchainInfo.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + swapchainInfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; + swapchainInfo.queueFamilyIndexCount = 1; + swapchainInfo.pQueueFamilyIndices = &familyIndex; + swapchainInfo.preTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR; + swapchainInfo.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + swapchainInfo.presentMode = presentMode; + const void** tail = &swapchainInfo.pNext; + + // Mutable formats + VkFormat mutableFormats[2]; + uint32_t mutableFormatNum = 0; + mutableFormats[mutableFormatNum++] = surfaceFormat.format; + switch (surfaceFormat.format) { + case VK_FORMAT_R8G8B8A8_UNORM: + mutableFormats[mutableFormatNum++] = VK_FORMAT_R8G8B8A8_SRGB; + break; + case VK_FORMAT_R8G8B8A8_SRGB: + mutableFormats[mutableFormatNum++] = VK_FORMAT_R8G8B8A8_UNORM; + break; + case VK_FORMAT_B8G8R8A8_UNORM: + mutableFormats[mutableFormatNum++] = VK_FORMAT_B8G8R8A8_SRGB; + break; + case VK_FORMAT_B8G8R8A8_SRGB: + mutableFormats[mutableFormatNum++] = VK_FORMAT_B8G8R8A8_UNORM; + break; + } - VkFormat format = g_swapChainFormat[(uint32_t)swapChainDesc.format]; - VkColorSpaceKHR colorSpace = g_colorSpace[(uint32_t)swapChainDesc.format]; + VkImageFormatListCreateInfo imageFormatListCreateInfo = {VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO}; + imageFormatListCreateInfo.pViewFormats = mutableFormats; + imageFormatListCreateInfo.viewFormatCount = mutableFormatNum; - VkSurfaceFormatKHR surfaceFormat = surfaceFormats[0]; - uint32_t i = 0; - for (; i < formatNum; i++) { - if (surfaceFormats[i].format == format && surfaceFormats[i].colorSpace == colorSpace) { - surfaceFormat = surfaceFormats[i]; - break; + if (m_Device.m_IsSwapChainMutableFormatSupported) { + APPEND_EXT(imageFormatListCreateInfo); } - } - if (i == formatNum) - REPORT_WARNING(&m_Device, "The requested format is not supported. Using 1st surface format from the list"); - - uint32_t presentModeNum = 0; - result = vk.GetPhysicalDeviceSurfacePresentModesKHR(m_Device, m_Surface, &presentModeNum, nullptr); - RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfacePresentModesKHR returned %d", (int32_t)result); - VkPresentModeKHR* presentModes = STACK_ALLOC(VkPresentModeKHR, presentModeNum); - result = vk.GetPhysicalDeviceSurfacePresentModesKHR(m_Device, m_Surface, &presentModeNum, presentModes); - RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkGetPhysicalDeviceSurfacePresentModesKHR returned %d", (int32_t)result); + // Low latency mode + VkSwapchainLatencyCreateInfoNV latencyCreateInfo = {VK_STRUCTURE_TYPE_SWAPCHAIN_LATENCY_CREATE_INFO_NV}; + latencyCreateInfo.latencyModeEnable = allowLowLatency; - // Both of these modes use v-sync for preseting, but FIFO blocks execution - VkPresentModeKHR desiredPresentMode = swapChainDesc.verticalSyncInterval ? VK_PRESENT_MODE_FIFO_KHR : VK_PRESENT_MODE_MAILBOX_KHR; + if (m_Device.m_IsLowLatencySupported && allowLowLatency) { + APPEND_EXT(latencyCreateInfo); + } - i = 0; - for (; i < presentModeNum; i++) { - if (desiredPresentMode == presentModes[i]) - break; - } - if (i == presentModeNum) { - REPORT_WARNING(&m_Device, "The present mode is not supported. Using the first mode from the list"); - desiredPresentMode = presentModes[0]; + // Create + VkResult result = vk.CreateSwapchainKHR(m_Device, &swapchainInfo, m_Device.GetAllocationCallbacks(), &m_Handle); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateSwapchainKHR returned %d", (int32_t)result); } - // Swap chain - const uint32_t familyIndex = m_CommandQueue->GetFamilyIndex(); - const uint32_t minImageNum = std::max(capabilites.minImageCount, swapChainDesc.textureNum); - - VkSwapchainCreateInfoKHR swapchainInfo = { - VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, - nullptr, - m_Device.m_IsSwapChainMutableFormatSupported ? VK_SWAPCHAIN_CREATE_MUTABLE_FORMAT_BIT_KHR : (VkSwapchainCreateFlagsKHR)0, - m_Surface, - minImageNum, - surfaceFormat.format, - surfaceFormat.colorSpace, - {swapChainDesc.width, swapChainDesc.height}, - 1, - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, - VK_SHARING_MODE_EXCLUSIVE, - 1, - &familyIndex, - VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR, - VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, - desiredPresentMode, - VK_FALSE, - VK_NULL_HANDLE, - }; - - // Swap chain: mutable formats - VkFormat mutableFormats[2]; - uint32_t mutableFormatNum = 0; - mutableFormats[mutableFormatNum++] = surfaceFormat.format; - switch (surfaceFormat.format) { - case VK_FORMAT_R8G8B8A8_UNORM: - mutableFormats[mutableFormatNum++] = VK_FORMAT_R8G8B8A8_SRGB; - break; - case VK_FORMAT_R8G8B8A8_SRGB: - mutableFormats[mutableFormatNum++] = VK_FORMAT_R8G8B8A8_UNORM; - break; - case VK_FORMAT_B8G8R8A8_UNORM: - mutableFormats[mutableFormatNum++] = VK_FORMAT_B8G8R8A8_SRGB; - break; - case VK_FORMAT_B8G8R8A8_SRGB: - mutableFormats[mutableFormatNum++] = VK_FORMAT_B8G8R8A8_UNORM; - break; + { // Swap chain images + uint32_t imageNum = 0; + vk.GetSwapchainImagesKHR(m_Device, m_Handle, &imageNum, nullptr); + + VkImage* imageHandles = STACK_ALLOC(VkImage, imageNum); + vk.GetSwapchainImagesKHR(m_Device, m_Handle, &imageNum, imageHandles); + + m_Textures.resize(imageNum); + for (uint32_t i = 0; i < imageNum; i++) { + TextureVKDesc desc = {}; + desc.vkImage = (NRIVkImage)imageHandles[i]; + desc.vkFormat = surfaceFormat.format; + desc.vkImageAspectFlags = VK_IMAGE_ASPECT_COLOR_BIT; + desc.vkImageType = VK_IMAGE_TYPE_2D; + desc.width = swapChainDesc.width; + desc.height = swapChainDesc.height; + desc.depth = 1; + desc.mipNum = 1; + desc.arraySize = 1; + desc.sampleNum = 1; + + TextureVK* texture = Allocate(m_Device.GetStdAllocator(), m_Device); + texture->Create(desc); + + m_Textures[i] = texture; + } } - VkImageFormatListCreateInfo imageFormatListCreateInfo = {VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO}; - imageFormatListCreateInfo.pViewFormats = mutableFormats; - imageFormatListCreateInfo.viewFormatCount = mutableFormatNum; - - if (m_Device.m_IsSwapChainMutableFormatSupported) - swapchainInfo.pNext = &imageFormatListCreateInfo; - - result = vk.CreateSwapchainKHR(m_Device, &swapchainInfo, m_Device.GetAllocationCallbacks(), &m_Handle); - RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkCreateSwapchainKHR returned %d", (int32_t)result); - - // Swap chain images - uint32_t imageNum = 0; - vk.GetSwapchainImagesKHR(m_Device, m_Handle, &imageNum, nullptr); - - VkImage* imageHandles = STACK_ALLOC(VkImage, imageNum); - vk.GetSwapchainImagesKHR(m_Device, m_Handle, &imageNum, imageHandles); - - m_Textures.resize(imageNum); - for (i = 0; i < imageNum; i++) { - TextureVKDesc desc = {}; - desc.vkImage = (NRIVkImage)imageHandles[i]; - desc.vkFormat = surfaceFormat.format; - desc.vkImageAspectFlags = VK_IMAGE_ASPECT_COLOR_BIT; - desc.vkImageType = VK_IMAGE_TYPE_2D; - desc.width = (uint16_t)swapchainInfo.imageExtent.width; - desc.height = (uint16_t)swapchainInfo.imageExtent.height; - desc.depth = 1; - desc.mipNum = 1; - desc.arraySize = 1; - desc.sampleNum = 1; - - TextureVK* texture = Allocate(m_Device.GetStdAllocator(), m_Device); - texture->Create(desc); - - m_Textures[i] = texture; + // Latency fence + if (allowLowLatency) { + m_LatencyFence = Allocate(m_Device.GetStdAllocator(), m_Device); + m_LatencyFence->Create(0); } - m_SwapChainDesc = swapChainDesc; + // Finalize + m_PresentId = GetSwapChainId(); + m_Desc = swapChainDesc; + m_Desc.waitable = m_Device.m_IsPresentWaitSupported && m_Desc.waitable; + m_Desc.allowLowLatency = allowLowLatency; return Result::SUCCESS; } @@ -284,42 +335,31 @@ inline Texture* const* SwapChainVK::GetTextures(uint32_t& textureNum) const { inline uint32_t SwapChainVK::AcquireNextTexture() { ExclusiveScope lock(m_CommandQueue->GetLock()); - const auto& vk = m_Device.GetDispatchTable(); - VkSemaphore semaphore = m_Semaphores[m_FrameIndex]; - VkResult result = vk.AcquireNextImageKHR(m_Device, m_Handle, VK_DEFAULT_TIMEOUT, semaphore, VK_NULL_HANDLE, &m_TextureIndex); - - if (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR) { - /* - TODO: currently we acquire swap chain image this way: - SwapChainVK::AcquireNextTexture - AcquireNextImageKHR(semaphore) - signal - QueueSubmit(semaphore) - wait - SwapChainVK::Present() - QueueSubmit(semaphore) - signal - QueuePresentKHR(semaphore) - wait - Would it be better to use 2 semaphores? - SwapChainVK::AcquireNextTexture - AcquireNextImageKHR(imageAcquiredSemaphore) - signal - SwapChainVK::Present() - QueueSubmit(imageAcquiredSemaphore) - wait - QueueSubmit(renderingFinishedSemaphore) - signal - QueuePresentKHR(renderingFinishedSemaphore) - wait - */ - const uint32_t waitDstStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; - VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr, 1, &semaphore, &waitDstStageMask, 0, nullptr, 0, nullptr}; - result = vk.QueueSubmit(*m_CommandQueue, 1, &submitInfo, VK_NULL_HANDLE); - if (result != VK_SUCCESS) - REPORT_ERROR(&m_Device, "vkQueueSubmit returned %d", (int32_t)result); - } else if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_ERROR_SURFACE_LOST_KHR) // TODO: find a better way, instead of returning an invalid index - m_TextureIndex = OUT_OF_DATE; - else + // Acquire next image (signal) + VkSemaphore imageAcquiredSemaphore = m_ImageAcquiredSemaphores[m_FrameIndex]; + VkResult result = vk.AcquireNextImageKHR(m_Device, m_Handle, MsToUs(TIMEOUT_PRESENT), imageAcquiredSemaphore, VK_NULL_HANDLE, &m_TextureIndex); + + if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_ERROR_SURFACE_LOST_KHR) + m_TextureIndex = OUT_OF_DATE; // TODO: find a better way, instead of returning an invalid index + else if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR) REPORT_ERROR(&m_Device, "vkAcquireNextImageKHR returned %d", (int32_t)result); return m_TextureIndex; } +inline Result SwapChainVK::WaitForPresent() { + const auto& vk = m_Device.GetDispatchTable(); + + if (m_Desc.waitable && GetPresentIndex(m_PresentId) != 0) { + VkResult result = vk.WaitForPresentKHR(m_Device, m_Handle, m_PresentId - 1, MsToUs(TIMEOUT_PRESENT)); + return GetReturnCode(result); + } + + return Result::UNSUPPORTED; +} + inline Result SwapChainVK::Present() { ExclusiveScope lock(m_CommandQueue->GetLock()); @@ -327,21 +367,135 @@ inline Result SwapChainVK::Present() { return Result::OUT_OF_DATE; const auto& vk = m_Device.GetDispatchTable(); + VkSemaphore imageAcquiredSemaphore = m_ImageAcquiredSemaphores[m_FrameIndex]; + VkSemaphore renderingFinishedSemaphore = m_RenderingFinishedSemaphores[m_FrameIndex]; + + { // Wait & Signal + VkSemaphoreSubmitInfo waitSemaphore = {VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO}; + waitSemaphore.semaphore = imageAcquiredSemaphore; + waitSemaphore.stageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + + VkSemaphoreSubmitInfo signalSemaphore = {VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO}; + signalSemaphore.semaphore = renderingFinishedSemaphore; + signalSemaphore.stageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + + VkSubmitInfo2 submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO_2}; + submitInfo.waitSemaphoreInfoCount = 1; + submitInfo.pWaitSemaphoreInfos = &waitSemaphore; + submitInfo.signalSemaphoreInfoCount = 1; + submitInfo.pSignalSemaphoreInfos = &signalSemaphore; + + VkLatencySubmissionPresentIdNV presentId = {VK_STRUCTURE_TYPE_LATENCY_SUBMISSION_PRESENT_ID_NV}; + presentId.presentID = m_PresentId; + if (m_Desc.allowLowLatency) + submitInfo.pNext = &presentId; + + VkResult result = vk.QueueSubmit2(*m_CommandQueue, 1, &submitInfo, VK_NULL_HANDLE); + RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkQueueSubmit2 returned %d", (int32_t)result); + } + + // Present (wait) + VkResult result; + { + VkPresentInfoKHR presentInfo = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR}; + presentInfo.waitSemaphoreCount = 1; + presentInfo.pWaitSemaphores = &renderingFinishedSemaphore; + presentInfo.swapchainCount = 1; + presentInfo.pSwapchains = &m_Handle; + presentInfo.pImageIndices = &m_TextureIndex; + + VkPresentIdKHR presentId = {VK_STRUCTURE_TYPE_PRESENT_ID_KHR}; + presentId.swapchainCount = 1; + presentId.pPresentIds = &m_PresentId; - VkSemaphore semaphore = m_Semaphores[m_FrameIndex]; - VkSubmitInfo submitInfo = {VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr, 0, nullptr, nullptr, 0, nullptr, 1, &semaphore}; - VkResult result = vk.QueueSubmit(*m_CommandQueue, 1, &submitInfo, VK_NULL_HANDLE); - RETURN_ON_FAILURE(&m_Device, result == VK_SUCCESS, GetReturnCode(result), "vkQueueSubmit returned %d", (int32_t)result); + if (m_Device.m_IsPresentIdSupported) + presentInfo.pNext = &presentId; - const VkPresentInfoKHR info = {VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, nullptr, 1, &semaphore, 1, &m_Handle, &m_TextureIndex, nullptr}; + if (m_Desc.allowLowLatency) + SetLatencyMarker((LatencyMarker)VK_LATENCY_MARKER_PRESENT_START_NV); - result = vk.QueuePresentKHR(*m_CommandQueue, &info); - if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR && result != VK_ERROR_OUT_OF_DATE_KHR && result != VK_ERROR_SURFACE_LOST_KHR) - REPORT_ERROR(&m_Device, "vkQueuePresentKHR returned %d", (int32_t)result); + result = vk.QueuePresentKHR(*m_CommandQueue, &presentInfo); + if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR && result != VK_ERROR_OUT_OF_DATE_KHR && result != VK_ERROR_SURFACE_LOST_KHR) + REPORT_ERROR(&m_Device, "vkQueuePresentKHR returned %d", (int32_t)result); + + if (m_Desc.allowLowLatency) + SetLatencyMarker((LatencyMarker)VK_LATENCY_MARKER_PRESENT_END_NV); + } m_FrameIndex = (m_FrameIndex + 1) % MAX_NUMBER_OF_FRAMES_IN_FLIGHT; + m_PresentId++; + + return GetReturnCode(result); +} + +inline Result SwapChainVK::SetLatencySleepMode(const LatencySleepMode& latencySleepMode) { + VkLatencySleepModeInfoNV sleepModeInfo = {VK_STRUCTURE_TYPE_LATENCY_SLEEP_MODE_INFO_NV}; + sleepModeInfo.lowLatencyMode = latencySleepMode.lowLatencyMode; + sleepModeInfo.lowLatencyBoost = latencySleepMode.lowLatencyBoost; + sleepModeInfo.minimumIntervalUs = latencySleepMode.minIntervalUs; + + const auto& vk = m_Device.GetDispatchTable(); + VkResult result = vk.SetLatencySleepModeNV(m_Device, m_Handle, &sleepModeInfo); return GetReturnCode(result); } +inline Result SwapChainVK::SetLatencyMarker(LatencyMarker latencyMarker) { + VkSetLatencyMarkerInfoNV markerInfo = {VK_STRUCTURE_TYPE_SET_LATENCY_MARKER_INFO_NV}; + markerInfo.presentID = m_PresentId; + markerInfo.marker = (VkLatencyMarkerNV)latencyMarker; + + const auto& vk = m_Device.GetDispatchTable(); + vk.SetLatencyMarkerNV(m_Device, m_Handle, &markerInfo); + + return Result::SUCCESS; +} + +inline Result SwapChainVK::LatencySleep() { + VkLatencySleepInfoNV sleepInfo = {VK_STRUCTURE_TYPE_LATENCY_SLEEP_INFO_NV}; + sleepInfo.signalSemaphore = *m_LatencyFence; + sleepInfo.value = m_PresentId; + + const auto& vk = m_Device.GetDispatchTable(); + VkResult result = vk.LatencySleepNV(m_Device, m_Handle, &sleepInfo); + + if (result == VK_SUCCESS) + m_LatencyFence->Wait(m_PresentId); + + return GetReturnCode(result); +} + +inline Result SwapChainVK::GetLatencyReport(LatencyReport& latencyReport) { + VkLatencyTimingsFrameReportNV timingsInfo[64] = {}; + for (uint32_t i = 0; i < GetCountOf(timingsInfo); i++) + timingsInfo[i].sType = VK_STRUCTURE_TYPE_LATENCY_TIMINGS_FRAME_REPORT_NV; + + VkGetLatencyMarkerInfoNV getTimingsInfo = {VK_STRUCTURE_TYPE_GET_LATENCY_MARKER_INFO_NV}; + getTimingsInfo.pTimings = timingsInfo; + getTimingsInfo.timingCount = GetCountOf(timingsInfo); + + const auto& vk = m_Device.GetDispatchTable(); + vk.GetLatencyTimingsNV(m_Device, m_Handle, &getTimingsInfo); + + latencyReport = {}; + if (getTimingsInfo.timingCount >= 64) { + const uint32_t i = 63; + latencyReport.inputSampleTimeUs = timingsInfo[i].inputSampleTimeUs; + latencyReport.simulationStartTimeUs = timingsInfo[i].simStartTimeUs; + latencyReport.simulationEndTimeUs = timingsInfo[i].simEndTimeUs; + latencyReport.renderSubmitStartTimeUs = timingsInfo[i].renderSubmitStartTimeUs; + latencyReport.renderSubmitEndTimeUs = timingsInfo[i].renderSubmitEndTimeUs; + latencyReport.presentStartTimeUs = timingsInfo[i].presentStartTimeUs; + latencyReport.presentEndTimeUs = timingsInfo[i].presentEndTimeUs; + latencyReport.driverStartTimeUs = timingsInfo[i].driverStartTimeUs; + latencyReport.driverEndTimeUs = timingsInfo[i].driverEndTimeUs; + latencyReport.osRenderQueueStartTimeUs = timingsInfo[i].osRenderQueueStartTimeUs; + latencyReport.osRenderQueueEndTimeUs = timingsInfo[i].osRenderQueueEndTimeUs; + latencyReport.gpuRenderStartTimeUs = timingsInfo[i].gpuRenderStartTimeUs; + latencyReport.gpuRenderEndTimeUs = timingsInfo[i].gpuRenderEndTimeUs; + } + + return Result::SUCCESS; +} + #include "SwapChainVK.hpp" diff --git a/Source/VK/SwapChainVK.h b/Source/VK/SwapChainVK.h index 5a68bc17..1b22c66b 100644 --- a/Source/VK/SwapChainVK.h +++ b/Source/VK/SwapChainVK.h @@ -4,20 +4,25 @@ namespace nri { -struct DeviceVK; struct CommandQueueVK; +struct DeviceVK; +struct FenceVK; struct TextureVK; // Let's keep things simple and hide it under the hood constexpr uint32_t MAX_NUMBER_OF_FRAMES_IN_FLIGHT = 8; struct SwapChainVK : public DisplayDescHelper { + SwapChainVK(DeviceVK& device); + ~SwapChainVK(); + inline DeviceVK& GetDevice() const { return m_Device; } - SwapChainVK(DeviceVK& device); - ~SwapChainVK(); + inline uint64_t GetPresentId() const { + return m_PresentId; + } Result Create(const SwapChainDesc& swapChainDesc); @@ -26,28 +31,36 @@ struct SwapChainVK : public DisplayDescHelper { //================================================================================================================ inline Result GetDisplayDesc(DisplayDesc& displayDesc) { - return DisplayDescHelper::GetDisplayDesc(m_SwapChainDesc.window.windows.hwnd, displayDesc); + return DisplayDescHelper::GetDisplayDesc(m_Desc.window.windows.hwnd, displayDesc); } void SetDebugName(const char* name); Texture* const* GetTextures(uint32_t& textureNum) const; uint32_t AcquireNextTexture(); + Result WaitForPresent(); Result Present(); - private: + Result SetLatencySleepMode(const LatencySleepMode& latencySleepMode); + Result SetLatencyMarker(LatencyMarker latencyMarker); + Result LatencySleep(); + Result GetLatencyReport(LatencyReport& latencyReport); + +private: void Destroy(); - Result CreateSurface(const SwapChainDesc& swapChainDesc); - private: +private: Vector m_Textures; - std::array m_Semaphores = {VK_NULL_HANDLE}; - SwapChainDesc m_SwapChainDesc = {}; + FenceVK* m_LatencyFence = nullptr; + std::array m_ImageAcquiredSemaphores = {VK_NULL_HANDLE}; + std::array m_RenderingFinishedSemaphores = {VK_NULL_HANDLE}; + SwapChainDesc m_Desc = {}; VkSwapchainKHR m_Handle = VK_NULL_HANDLE; VkSurfaceKHR m_Surface = VK_NULL_HANDLE; DeviceVK& m_Device; CommandQueueVK* m_CommandQueue = nullptr; + uint64_t m_PresentId = 0; uint32_t m_TextureIndex = 0; - uint32_t m_FrameIndex = 0; + uint8_t m_FrameIndex = 0; // in flight, not global }; } // namespace nri \ No newline at end of file diff --git a/Source/VK/SwapChainVK.hpp b/Source/VK/SwapChainVK.hpp index 18c82710..4ad3dd3a 100644 --- a/Source/VK/SwapChainVK.hpp +++ b/Source/VK/SwapChainVK.hpp @@ -12,7 +12,11 @@ static uint32_t NRI_CALL AcquireNextSwapChainTexture(SwapChain& swapChain) { return ((SwapChainVK&)swapChain).AcquireNextTexture(); } -static Result NRI_CALL SwapChainPresent(SwapChain& swapChain) { +static Result NRI_CALL WaitForPresent(SwapChain& swapChain) { + return ((SwapChainVK&)swapChain).WaitForPresent(); +} + +static Result NRI_CALL QueuePresent(SwapChain& swapChain) { return ((SwapChainVK&)swapChain).Present(); } @@ -20,4 +24,25 @@ static Result NRI_CALL GetDisplayDesc(SwapChain& swapChain, DisplayDesc& display return ((SwapChainVK&)swapChain).GetDisplayDesc(displayDesc); } -Define_SwapChain_PartiallyFillFunctionTable(VK) +#pragma region[ Low latency ] + +static Result SetLatencySleepMode(SwapChain& swapChain, const LatencySleepMode& latencySleepMode) { + return ((SwapChainVK&)swapChain).SetLatencySleepMode(latencySleepMode); +} + +static Result SetLatencyMarker(SwapChain& swapChain, LatencyMarker latencyMarker) { + return ((SwapChainVK&)swapChain).SetLatencyMarker(latencyMarker); +} + +static Result LatencySleep(SwapChain& swapChain) { + return ((SwapChainVK&)swapChain).LatencySleep(); +} + +static Result GetLatencyReport(const SwapChain& swapChain, LatencyReport& latencyReport) { + return ((SwapChainVK&)swapChain).GetLatencyReport(latencyReport); +} + +#pragma endregion + +Define_SwapChain_PartiallyFillFunctionTable(VK); +Define_LowLatency_SwapChain_PartiallyFillFunctionTable(VK); diff --git a/Source/VK/TextureVK.cpp b/Source/VK/TextureVK.cpp index 837e241f..8b83511a 100644 --- a/Source/VK/TextureVK.cpp +++ b/Source/VK/TextureVK.cpp @@ -60,7 +60,7 @@ Result TextureVK::Create(const TextureVKDesc& textureDesc) { m_OwnsNativeObjects = false; m_ImageAspectFlags = (VkImageAspectFlags)textureDesc.vkImageAspectFlags; m_Desc.type = GetTextureType((VkImageType)textureDesc.vkImageType); - m_Desc.usageMask = (nri::TextureUsageBits)(-1); // TODO: it's not right... + m_Desc.usageMask = (TextureUsageBits)(-1); // TODO: it's not right... m_Desc.format = VKFormatToNRIFormat((VkFormat)textureDesc.vkFormat); m_Desc.width = textureDesc.width; m_Desc.height = textureDesc.height; @@ -74,23 +74,6 @@ Result TextureVK::Create(const TextureVKDesc& textureDesc) { return Result::SUCCESS; } -Dim_t TextureVK::GetSize(Dim_t dimensionIndex, Mip_t mip) const { - assert(dimensionIndex < 3); - - Dim_t dim = m_Desc.depth; - if (dimensionIndex == 0) - dim = m_Desc.width; - else if (dimensionIndex == 1) - dim = m_Desc.height; - - dim = (Dim_t)std::max(dim >> mip, 1); - - // TODO: VK doesn't require manual alignment, but probably we should use it here and during texture creation - // dim = Align(dim, dimension < 2 ? GetFormatProps(m_Desc.format).blockWidth : 1); - - return dim; -} - //================================================================================================================ // NRI //================================================================================================================ diff --git a/Source/VK/TextureVK.h b/Source/VK/TextureVK.h index 95cc34e0..ea8d27bd 100644 --- a/Source/VK/TextureVK.h +++ b/Source/VK/TextureVK.h @@ -30,11 +30,14 @@ struct TextureVK { return m_Desc; } + inline Dim_t GetSize(Dim_t dimensionIndex, Mip_t mip = 0) const { + return GetDimension(GraphicsAPI::VULKAN, m_Desc, dimensionIndex, mip); + } + ~TextureVK(); Result Create(const TextureDesc& textureDesc); Result Create(const TextureVKDesc& textureDesc); - Dim_t GetSize(Dim_t dimensionIndex, Mip_t mip = 0) const; //================================================================================================================ // NRI @@ -43,7 +46,7 @@ struct TextureVK { void SetDebugName(const char* name); void GetMemoryInfo(MemoryLocation memoryLocation, MemoryDesc& memoryDesc) const; - private: +private: DeviceVK& m_Device; VkImage m_Handle = VK_NULL_HANDLE; TextureDesc m_Desc = {}; diff --git a/Source/VK/TextureVK.hpp b/Source/VK/TextureVK.hpp index 5c0ed014..9a2167e7 100644 --- a/Source/VK/TextureVK.hpp +++ b/Source/VK/TextureVK.hpp @@ -19,4 +19,4 @@ static void NRI_CALL GetTextureMemoryInfo(const Texture& texture, MemoryLocation #pragma endregion -Define_Core_Texture_PartiallyFillFunctionTable(VK) +Define_Core_Texture_PartiallyFillFunctionTable(VK); diff --git a/Source/Validation/AccelerationStructureVal.cpp b/Source/Validation/AccelerationStructureVal.cpp index 15c4a510..97231b08 100644 --- a/Source/Validation/AccelerationStructureVal.cpp +++ b/Source/Validation/AccelerationStructureVal.cpp @@ -14,37 +14,37 @@ AccelerationStructureVal::~AccelerationStructureVal() { if (m_Memory != nullptr) m_Memory->UnbindAccelerationStructure(*this); - m_RayTracingAPI.DestroyAccelerationStructure(*GetImpl()); + GetRayTracingInterface().DestroyAccelerationStructure(*GetImpl()); } void AccelerationStructureVal::GetMemoryInfo(MemoryDesc& memoryDesc) const { - m_RayTracingAPI.GetAccelerationStructureMemoryInfo(*GetImpl(), memoryDesc); + GetRayTracingInterface().GetAccelerationStructureMemoryInfo(*GetImpl(), memoryDesc); m_Device.RegisterMemoryType(memoryDesc.type, MemoryLocation::DEVICE); } uint64_t AccelerationStructureVal::GetUpdateScratchBufferSize() const { - return m_RayTracingAPI.GetAccelerationStructureUpdateScratchBufferSize(*GetImpl()); + return GetRayTracingInterface().GetAccelerationStructureUpdateScratchBufferSize(*GetImpl()); } uint64_t AccelerationStructureVal::GetBuildScratchBufferSize() const { - return m_RayTracingAPI.GetAccelerationStructureBuildScratchBufferSize(*GetImpl()); + return GetRayTracingInterface().GetAccelerationStructureBuildScratchBufferSize(*GetImpl()); } uint64_t AccelerationStructureVal::GetHandle() const { RETURN_ON_FAILURE(&m_Device, IsBoundToMemory(), 0, "GetAccelerationStructureHandle: AccelerationStructure is not bound to memory"); - return m_RayTracingAPI.GetAccelerationStructureHandle(*GetImpl()); + return GetRayTracingInterface().GetAccelerationStructureHandle(*GetImpl()); } uint64_t AccelerationStructureVal::GetNativeObject() const { RETURN_ON_FAILURE(&m_Device, IsBoundToMemory(), 0, "GetAccelerationStructureNativeObject: AccelerationStructure is not bound to memory"); - return m_RayTracingAPI.GetAccelerationStructureNativeObject(*GetImpl()); + return GetRayTracingInterface().GetAccelerationStructureNativeObject(*GetImpl()); } Result AccelerationStructureVal::CreateDescriptor(Descriptor*& descriptor) { Descriptor* descriptorImpl = nullptr; - const Result result = m_RayTracingAPI.CreateAccelerationStructureDescriptor(*GetImpl(), descriptorImpl); + const Result result = GetRayTracingInterface().CreateAccelerationStructureDescriptor(*GetImpl(), descriptorImpl); if (result == Result::SUCCESS) { RETURN_ON_FAILURE(&m_Device, descriptorImpl != nullptr, Result::FAILURE, "CreateAccelerationStructureDescriptor: 'impl' is NULL"); @@ -56,7 +56,7 @@ Result AccelerationStructureVal::CreateDescriptor(Descriptor*& descriptor) { void AccelerationStructureVal::SetDebugName(const char* name) { m_Name = name; - m_RayTracingAPI.SetAccelerationStructureDebugName(*GetImpl(), name); + GetRayTracingInterface().SetAccelerationStructureDebugName(*GetImpl(), name); } #include "AccelerationStructureVal.hpp" diff --git a/Source/Validation/AccelerationStructureVal.h b/Source/Validation/AccelerationStructureVal.h index 4bc4433e..6149eb8c 100644 --- a/Source/Validation/AccelerationStructureVal.h +++ b/Source/Validation/AccelerationStructureVal.h @@ -7,8 +7,8 @@ namespace nri { struct MemoryVal; struct AccelerationStructureVal final : public DeviceObjectVal { - AccelerationStructureVal(DeviceVal& device, AccelerationStructure* accelerationStructure, bool isBoundToMemory) - : DeviceObjectVal(device, accelerationStructure), m_RayTracingAPI(device.GetRayTracingInterface()), m_IsBoundToMemory(isBoundToMemory) { + AccelerationStructureVal(DeviceVal& device, AccelerationStructure* accelerationStructure, bool isBoundToMemory) : + DeviceObjectVal(device, accelerationStructure), m_IsBoundToMemory(isBoundToMemory) { } ~AccelerationStructureVal(); @@ -33,8 +33,7 @@ struct AccelerationStructureVal final : public DeviceObjectVal { void* Map(uint64_t offset, uint64_t size); void Unmap(); - private: +private: MemoryVal* m_Memory = nullptr; bool m_IsBoundToMemory = false; bool m_IsMapped = false; diff --git a/Source/Validation/BufferVal.hpp b/Source/Validation/BufferVal.hpp index 57bc4299..e90840d2 100644 --- a/Source/Validation/BufferVal.hpp +++ b/Source/Validation/BufferVal.hpp @@ -27,4 +27,4 @@ static void NRI_CALL UnmapBuffer(Buffer& buffer) { #pragma endregion -Define_Core_Buffer_PartiallyFillFunctionTable(Val) +Define_Core_Buffer_PartiallyFillFunctionTable(Val); diff --git a/Source/Validation/CommandBufferVal.cpp b/Source/Validation/CommandBufferVal.cpp index 91b4bdb8..b9d7d7b1 100644 --- a/Source/Validation/CommandBufferVal.cpp +++ b/Source/Validation/CommandBufferVal.cpp @@ -482,7 +482,7 @@ void CommandBufferVal::BuildTopLevelAccelerationStructure( Buffer& scratchImpl = *NRI_GET_IMPL(Buffer, &scratch); Buffer& bufferImpl = *NRI_GET_IMPL(Buffer, &buffer); - m_RayTracingAPI.CmdBuildTopLevelAccelerationStructure(*GetImpl(), instanceNum, bufferImpl, bufferOffset, flags, dstImpl, scratchImpl, scratchOffset); + GetRayTracingInterface().CmdBuildTopLevelAccelerationStructure(*GetImpl(), instanceNum, bufferImpl, bufferOffset, flags, dstImpl, scratchImpl, scratchOffset); } void CommandBufferVal::BuildBottomLevelAccelerationStructure( @@ -501,7 +501,7 @@ void CommandBufferVal::BuildBottomLevelAccelerationStructure( Vector objectImplArray(geometryObjectNum, m_Device.GetStdAllocator()); ConvertGeometryObjectsVal(objectImplArray.data(), geometryObjects, geometryObjectNum); - m_RayTracingAPI.CmdBuildBottomLevelAccelerationStructure(*GetImpl(), geometryObjectNum, objectImplArray.data(), flags, dstImpl, scratchImpl, scratchOffset); + GetRayTracingInterface().CmdBuildBottomLevelAccelerationStructure(*GetImpl(), geometryObjectNum, objectImplArray.data(), flags, dstImpl, scratchImpl, scratchOffset); } void CommandBufferVal::UpdateTopLevelAccelerationStructure(uint32_t instanceNum, const Buffer& buffer, uint64_t bufferOffset, AccelerationStructureBuildBits flags, @@ -523,7 +523,7 @@ void CommandBufferVal::UpdateTopLevelAccelerationStructure(uint32_t instanceNum, Buffer& scratchImpl = *NRI_GET_IMPL(Buffer, &scratch); Buffer& bufferImpl = *NRI_GET_IMPL(Buffer, &buffer); - m_RayTracingAPI.CmdUpdateTopLevelAccelerationStructure(*GetImpl(), instanceNum, bufferImpl, bufferOffset, flags, dstImpl, srcImpl, scratchImpl, scratchOffset); + GetRayTracingInterface().CmdUpdateTopLevelAccelerationStructure(*GetImpl(), instanceNum, bufferImpl, bufferOffset, flags, dstImpl, srcImpl, scratchImpl, scratchOffset); } void CommandBufferVal::UpdateBottomLevelAccelerationStructure(uint32_t geometryObjectNum, const GeometryObject* geometryObjects, AccelerationStructureBuildBits flags, @@ -544,7 +544,7 @@ void CommandBufferVal::UpdateBottomLevelAccelerationStructure(uint32_t geometryO Vector objectImplArray(geometryObjectNum, m_Device.GetStdAllocator()); ConvertGeometryObjectsVal(objectImplArray.data(), geometryObjects, geometryObjectNum); - m_RayTracingAPI.CmdUpdateBottomLevelAccelerationStructure(*GetImpl(), geometryObjectNum, objectImplArray.data(), flags, dstImpl, srcImpl, scratchImpl, scratchOffset); + GetRayTracingInterface().CmdUpdateBottomLevelAccelerationStructure(*GetImpl(), geometryObjectNum, objectImplArray.data(), flags, dstImpl, srcImpl, scratchImpl, scratchOffset); } void CommandBufferVal::CopyAccelerationStructure(AccelerationStructure& dst, AccelerationStructure& src, CopyMode copyMode) { @@ -555,7 +555,7 @@ void CommandBufferVal::CopyAccelerationStructure(AccelerationStructure& dst, Acc AccelerationStructure& dstImpl = *NRI_GET_IMPL(AccelerationStructure, &dst); AccelerationStructure& srcImpl = *NRI_GET_IMPL(AccelerationStructure, &src); - m_RayTracingAPI.CmdCopyAccelerationStructure(*GetImpl(), dstImpl, srcImpl, copyMode); + GetRayTracingInterface().CmdCopyAccelerationStructure(*GetImpl(), dstImpl, srcImpl, copyMode); } void CommandBufferVal::WriteAccelerationStructureSize( @@ -573,7 +573,7 @@ void CommandBufferVal::WriteAccelerationStructureSize( QueryPool& queryPoolImpl = *NRI_GET_IMPL(QueryPool, &queryPool); - m_RayTracingAPI.CmdWriteAccelerationStructureSize(*GetImpl(), accelerationStructures, accelerationStructureNum, queryPoolImpl, queryOffset); + GetRayTracingInterface().CmdWriteAccelerationStructureSize(*GetImpl(), accelerationStructures, accelerationStructureNum, queryPoolImpl, queryOffset); } void CommandBufferVal::DispatchRays(const DispatchRaysDesc& dispatchRaysDesc) { @@ -593,7 +593,7 @@ void CommandBufferVal::DispatchRays(const DispatchRaysDesc& dispatchRaysDesc) { dispatchRaysDescImpl.hitShaderGroups.buffer = NRI_GET_IMPL(Buffer, dispatchRaysDesc.hitShaderGroups.buffer); dispatchRaysDescImpl.callableShaders.buffer = NRI_GET_IMPL(Buffer, dispatchRaysDesc.callableShaders.buffer); - m_RayTracingAPI.CmdDispatchRays(*GetImpl(), dispatchRaysDescImpl); + GetRayTracingInterface().CmdDispatchRays(*GetImpl(), dispatchRaysDescImpl); } void CommandBufferVal::DispatchRaysIndirect(const Buffer& buffer, uint64_t offset) { @@ -601,11 +601,11 @@ void CommandBufferVal::DispatchRaysIndirect(const Buffer& buffer, uint64_t offse RETURN_ON_FAILURE(&m_Device, offset < bufferDesc.size, ReturnVoid(), "CmdDrawMeshTasksIndirect: offset is greater than the buffer size"); Buffer* bufferImpl = NRI_GET_IMPL(Buffer, &buffer); - m_RayTracingAPI.CmdDispatchRaysIndirect(*GetImpl(), *bufferImpl, offset); + GetRayTracingInterface().CmdDispatchRaysIndirect(*GetImpl(), *bufferImpl, offset); } void CommandBufferVal::DrawMeshTasks(const DrawMeshTasksDesc& drawMeshTasksDesc) { - m_MeshShaderAPI.CmdDrawMeshTasks(*GetImpl(), drawMeshTasksDesc); + GetMeshShaderInterface().CmdDrawMeshTasks(*GetImpl(), drawMeshTasksDesc); } void CommandBufferVal::DrawMeshTasksIndirect(const Buffer& buffer, uint64_t offset, uint32_t drawNum, uint32_t stride) { @@ -613,7 +613,7 @@ void CommandBufferVal::DrawMeshTasksIndirect(const Buffer& buffer, uint64_t offs RETURN_ON_FAILURE(&m_Device, offset < bufferDesc.size, ReturnVoid(), "CmdDrawMeshTasksIndirect: offset is greater than the buffer size"); Buffer* bufferImpl = NRI_GET_IMPL(Buffer, &buffer); - m_MeshShaderAPI.CmdDrawMeshTasksIndirect(*GetImpl(), *bufferImpl, offset, drawNum, stride); + GetMeshShaderInterface().CmdDrawMeshTasksIndirect(*GetImpl(), *bufferImpl, offset, drawNum, stride); } template diff --git a/Source/Validation/CommandBufferVal.h b/Source/Validation/CommandBufferVal.h index 88ca69c2..9d6f85bd 100644 --- a/Source/Validation/CommandBufferVal.h +++ b/Source/Validation/CommandBufferVal.h @@ -5,13 +5,8 @@ namespace nri { struct CommandBufferVal : public DeviceObjectVal { - CommandBufferVal(DeviceVal& device, CommandBuffer* commandBuffer, bool isWrapped) - : DeviceObjectVal(device, commandBuffer), - m_ValidationCommands(device.GetStdAllocator()), - m_RayTracingAPI(device.GetRayTracingInterface()), - m_MeshShaderAPI(device.GetMeshShaderInterface()), - m_IsRecordingStarted(isWrapped), - m_IsWrapped(isWrapped) { + CommandBufferVal(DeviceVal& device, CommandBuffer* commandBuffer, bool isWrapped) : + DeviceObjectVal(device, commandBuffer), m_ValidationCommands(device.GetStdAllocator()), m_IsRecordingStarted(isWrapped), m_IsWrapped(isWrapped) { } inline const Vector& GetValidationCommands() const { @@ -83,13 +78,11 @@ struct CommandBufferVal : public DeviceObjectVal { void DrawMeshTasks(const DrawMeshTasksDesc& drawMeshTasksDesc); void DrawMeshTasksIndirect(const Buffer& buffer, uint64_t offset, uint32_t drawNum, uint32_t stride); - private: +private: template Command& AllocateValidationCommand(); Vector m_ValidationCommands; - const RayTracingInterface& m_RayTracingAPI; - const MeshShaderInterface& m_MeshShaderAPI; int32_t m_AnnotationStack = 0; bool m_IsRecordingStarted = false; bool m_IsWrapped = false; diff --git a/Source/Validation/CommandBufferVal.hpp b/Source/Validation/CommandBufferVal.hpp index 882ec7d3..4882c998 100644 --- a/Source/Validation/CommandBufferVal.hpp +++ b/Source/Validation/CommandBufferVal.hpp @@ -227,5 +227,6 @@ static void NRI_CALL CmdDrawMeshTasksIndirect(CommandBuffer& commandBuffer, cons #pragma endregion -Define_Core_CommandBuffer_PartiallyFillFunctionTable(Val) Define_RayTracing_CommandBuffer_PartiallyFillFunctionTable(Val) - Define_MeshShader_CommandBuffer_PartiallyFillFunctionTable(Val) +Define_Core_CommandBuffer_PartiallyFillFunctionTable(Val); +Define_RayTracing_CommandBuffer_PartiallyFillFunctionTable(Val); +Define_MeshShader_CommandBuffer_PartiallyFillFunctionTable(Val); diff --git a/Source/Validation/CommandQueueVal.cpp b/Source/Validation/CommandQueueVal.cpp index 01747adb..f2c04a81 100644 --- a/Source/Validation/CommandQueueVal.cpp +++ b/Source/Validation/CommandQueueVal.cpp @@ -6,7 +6,9 @@ #include "BufferVal.h" #include "CommandBufferVal.h" #include "CommandQueueVal.h" +#include "FenceVal.h" #include "QueryPoolVal.h" +#include "SwapChainVal.h" #include "TextureVal.h" using namespace nri; @@ -65,15 +67,35 @@ void CommandQueueVal::SetDebugName(const char* name) { GetCoreInterface().SetCommandQueueDebugName(*GetImpl(), name); } -void CommandQueueVal::Submit(const QueueSubmitDesc& queueSubmitDesc) { +void CommandQueueVal::Submit(const QueueSubmitDesc& queueSubmitDesc, const SwapChain* swapChain) { ProcessValidationCommands((const CommandBufferVal* const*)queueSubmitDesc.commandBuffers, queueSubmitDesc.commandBufferNum); auto queueSubmitDescImpl = queueSubmitDesc; - queueSubmitDescImpl.commandBuffers = STACK_ALLOC(CommandBuffer*, queueSubmitDesc.commandBufferNum); + + FenceSubmitDesc* waitFences = STACK_ALLOC(FenceSubmitDesc, queueSubmitDesc.waitFenceNum); + for (uint32_t i = 0; i < queueSubmitDesc.waitFenceNum; i++) { + waitFences[i] = queueSubmitDesc.waitFences[i]; + waitFences[i].fence = NRI_GET_IMPL(Fence, waitFences[i].fence); + } + queueSubmitDescImpl.waitFences = waitFences; + + CommandBuffer** commandBuffers = STACK_ALLOC(CommandBuffer*, queueSubmitDesc.commandBufferNum); for (uint32_t i = 0; i < queueSubmitDesc.commandBufferNum; i++) - ((CommandBuffer**)queueSubmitDescImpl.commandBuffers)[i] = NRI_GET_IMPL(CommandBuffer, queueSubmitDesc.commandBuffers[i]); + commandBuffers[i] = NRI_GET_IMPL(CommandBuffer, queueSubmitDesc.commandBuffers[i]); + queueSubmitDescImpl.commandBuffers = commandBuffers; + + FenceSubmitDesc* signalFences = STACK_ALLOC(FenceSubmitDesc, queueSubmitDesc.signalFenceNum); + for (uint32_t i = 0; i < queueSubmitDesc.signalFenceNum; i++) { + signalFences[i] = queueSubmitDesc.signalFences[i]; + signalFences[i].fence = NRI_GET_IMPL(Fence, signalFences[i].fence); + } + queueSubmitDescImpl.signalFences = signalFences; - GetCoreInterface().QueueSubmit(*GetImpl(), queueSubmitDescImpl); + if (swapChain) { + SwapChain* swapChainImpl = NRI_GET_IMPL(SwapChain, swapChain); + m_Device.GetLowLatencyInterface().QueueSubmitTrackable(*GetImpl(), queueSubmitDescImpl, *swapChainImpl); + } else + GetCoreInterface().QueueSubmit(*GetImpl(), queueSubmitDescImpl); } Result CommandQueueVal::UploadData( @@ -105,11 +127,11 @@ Result CommandQueueVal::UploadData( bufferUploadDescsImpl[i].buffer = bufferVal->GetImpl(); } - return m_HelperAPI.UploadData(*GetImpl(), textureUploadDescsImpl, textureUploadDescNum, bufferUploadDescsImpl, bufferUploadDescNum); + return GetHelperInterface().UploadData(*GetImpl(), textureUploadDescsImpl, textureUploadDescNum, bufferUploadDescsImpl, bufferUploadDescNum); } Result CommandQueueVal::WaitForIdle() { - return m_HelperAPI.WaitForIdle(*GetImpl()); + return GetHelperInterface().WaitForIdle(*GetImpl()); } template diff --git a/Source/Validation/CommandQueueVal.h b/Source/Validation/CommandQueueVal.h index 9d5054d1..8a19c472 100644 --- a/Source/Validation/CommandQueueVal.h +++ b/Source/Validation/CommandQueueVal.h @@ -7,25 +7,23 @@ namespace nri { struct CommandBufferVal; struct CommandQueueVal : public DeviceObjectVal { - CommandQueueVal(DeviceVal& device, CommandQueue* commandQueue) : DeviceObjectVal(device, commandQueue), m_HelperAPI(device.GetHelperInterface()) { + inline CommandQueueVal(DeviceVal& device, CommandQueue* commandQueue) : DeviceObjectVal(device, commandQueue) { } //================================================================================================================ // NRI //================================================================================================================ void SetDebugName(const char* name); - void Submit(const QueueSubmitDesc& queueSubmitDesc); + void Submit(const QueueSubmitDesc& queueSubmitDesc, const SwapChain* swapChain); Result WaitForIdle(); Result UploadData(const TextureUploadDesc* textureUploadDescs, uint32_t textureUploadDescNum, const BufferUploadDesc* bufferUploadDescs, uint32_t bufferUploadDescNum); - private: +private: void ProcessValidationCommands(const CommandBufferVal* const* commandBuffers, uint32_t commandBufferNum); void ProcessValidationCommandBeginQuery(const uint8_t*& begin, const uint8_t* end); void ProcessValidationCommandEndQuery(const uint8_t*& begin, const uint8_t* end); void ProcessValidationCommandResetQuery(const uint8_t*& begin, const uint8_t* end); - - const HelperInterface& m_HelperAPI; }; } // namespace nri diff --git a/Source/Validation/CommandQueueVal.hpp b/Source/Validation/CommandQueueVal.hpp index 56264fd2..0e8f3580 100644 --- a/Source/Validation/CommandQueueVal.hpp +++ b/Source/Validation/CommandQueueVal.hpp @@ -7,7 +7,7 @@ static void NRI_CALL SetCommandQueueDebugName(CommandQueue& commandQueue, const } static void NRI_CALL QueueSubmit(CommandQueue& commandQueue, const QueueSubmitDesc& queueSubmitDesc) { - ((CommandQueueVal&)commandQueue).Submit(queueSubmitDesc); + ((CommandQueueVal&)commandQueue).Submit(queueSubmitDesc, nullptr); } #pragma endregion @@ -28,4 +28,14 @@ static Result NRI_CALL WaitForIdle(CommandQueue& commandQueue) { #pragma endregion -Define_Core_CommandQueue_PartiallyFillFunctionTable(Val) Define_Helper_CommandQueue_PartiallyFillFunctionTable(Val) +#pragma region[ Low latency ] + +static void NRI_CALL QueueSubmitTrackable(CommandQueue& commandQueue, const QueueSubmitDesc& workSubmissionDesc, const SwapChain& swapChain) { + ((CommandQueueVal&)commandQueue).Submit(workSubmissionDesc, &swapChain); +} + +#pragma endregion + +Define_Core_CommandQueue_PartiallyFillFunctionTable(Val); +Define_Helper_CommandQueue_PartiallyFillFunctionTable(Val); +Define_LowLatency_CommandQueue_PartiallyFillFunctionTable(Val); diff --git a/Source/Validation/DescriptorPoolVal.h b/Source/Validation/DescriptorPoolVal.h index b3b34d02..80a18460 100644 --- a/Source/Validation/DescriptorPoolVal.h +++ b/Source/Validation/DescriptorPoolVal.h @@ -7,10 +7,10 @@ namespace nri { struct DescriptorPoolVal : public DeviceObjectVal { - DescriptorPoolVal(DeviceVal& device, DescriptorPool* descriptorPool, uint32_t descriptorSetMaxNum) - : DeviceObjectVal(device, descriptorPool), - m_DescriptorSets(device.GetStdAllocator()), - m_SkipValidation(true) // TODO: we have to request "DescriptorPoolDesc" in "DescriptorPoolVKDesc" + DescriptorPoolVal(DeviceVal& device, DescriptorPool* descriptorPool, uint32_t descriptorSetMaxNum) : + DeviceObjectVal(device, descriptorPool), + m_DescriptorSets(device.GetStdAllocator()), + m_SkipValidation(true) // TODO: we have to request "DescriptorPoolDesc" in "DescriptorPoolVKDesc" { m_Desc.descriptorSetMaxNum = descriptorSetMaxNum; m_DescriptorSets.reserve(m_Desc.descriptorSetMaxNum); @@ -18,8 +18,8 @@ struct DescriptorPoolVal : public DeviceObjectVal { m_DescriptorSets.emplace_back(DescriptorSetVal(device)); } - DescriptorPoolVal(DeviceVal& device, DescriptorPool* descriptorPool, const DescriptorPoolDesc& descriptorPoolDesc) - : DeviceObjectVal(device, descriptorPool), m_DescriptorSets(device.GetStdAllocator()), m_Desc(descriptorPoolDesc) { + DescriptorPoolVal(DeviceVal& device, DescriptorPool* descriptorPool, const DescriptorPoolDesc& descriptorPoolDesc) : + DeviceObjectVal(device, descriptorPool), m_DescriptorSets(device.GetStdAllocator()), m_Desc(descriptorPoolDesc) { m_DescriptorSets.reserve(m_Desc.descriptorSetMaxNum); for (uint32_t i = 0; i < m_Desc.descriptorSetMaxNum; i++) m_DescriptorSets.emplace_back(DescriptorSetVal(device)); @@ -34,7 +34,7 @@ struct DescriptorPoolVal : public DeviceObjectVal { Result AllocateDescriptorSets( const PipelineLayout& pipelineLayout, uint32_t setIndexInPipelineLayout, DescriptorSet** descriptorSets, uint32_t instanceNum, uint32_t variableDescriptorNum); - private: +private: bool CheckDescriptorRange(const DescriptorRangeDesc& rangeDesc, uint32_t variableDescriptorNum); void IncrementDescriptorNum(const DescriptorRangeDesc& rangeDesc, uint32_t variableDescriptorNum); diff --git a/Source/Validation/DescriptorPoolVal.hpp b/Source/Validation/DescriptorPoolVal.hpp index 98e8f1da..0fd7c746 100644 --- a/Source/Validation/DescriptorPoolVal.hpp +++ b/Source/Validation/DescriptorPoolVal.hpp @@ -17,4 +17,4 @@ static void NRI_CALL ResetDescriptorPool(DescriptorPool& descriptorPool) { #pragma endregion -Define_Core_DescriptorPool_PartiallyFillFunctionTable(Val) +Define_Core_DescriptorPool_PartiallyFillFunctionTable(Val); diff --git a/Source/Validation/DescriptorSetVal.h b/Source/Validation/DescriptorSetVal.h index b1119320..9586c683 100644 --- a/Source/Validation/DescriptorSetVal.h +++ b/Source/Validation/DescriptorSetVal.h @@ -25,7 +25,7 @@ struct DescriptorSetVal : public DeviceObjectVal { void UpdateDynamicConstantBuffers(uint32_t baseBuffer, uint32_t bufferNum, const Descriptor* const* descriptors); void Copy(const DescriptorSetCopyDesc& descriptorSetCopyDesc); - private: +private: const DescriptorSetDesc* m_Desc = nullptr; }; diff --git a/Source/Validation/DescriptorSetVal.hpp b/Source/Validation/DescriptorSetVal.hpp index b96d3450..62675671 100644 --- a/Source/Validation/DescriptorSetVal.hpp +++ b/Source/Validation/DescriptorSetVal.hpp @@ -20,4 +20,4 @@ static void NRI_CALL CopyDescriptorSet(DescriptorSet& descriptorSet, const Descr #pragma endregion -Define_Core_DescriptorSet_PartiallyFillFunctionTable(Val) +Define_Core_DescriptorSet_PartiallyFillFunctionTable(Val); diff --git a/Source/Validation/DescriptorVal.cpp b/Source/Validation/DescriptorVal.cpp index b1faeae9..c457726c 100644 --- a/Source/Validation/DescriptorVal.cpp +++ b/Source/Validation/DescriptorVal.cpp @@ -10,8 +10,8 @@ using namespace nri; DescriptorVal::DescriptorVal(DeviceVal& device, Descriptor* descriptor, ResourceType resourceType) : DeviceObjectVal(device, descriptor), m_ResourceType(resourceType) { } -DescriptorVal::DescriptorVal(DeviceVal& device, Descriptor* descriptor, const BufferViewDesc& bufferViewDesc) - : DeviceObjectVal(device, descriptor), m_ResourceType(ResourceType::BUFFER) { +DescriptorVal::DescriptorVal(DeviceVal& device, Descriptor* descriptor, const BufferViewDesc& bufferViewDesc) : + DeviceObjectVal(device, descriptor), m_ResourceType(ResourceType::BUFFER) { switch (bufferViewDesc.viewType) { case BufferViewType::CONSTANT: m_ResourceViewType = ResourceViewType::CONSTANT_BUFFER_VIEW; @@ -28,8 +28,8 @@ DescriptorVal::DescriptorVal(DeviceVal& device, Descriptor* descriptor, const Bu } } -DescriptorVal::DescriptorVal(DeviceVal& device, Descriptor* descriptor, const Texture1DViewDesc& textureViewDesc) - : DeviceObjectVal(device, descriptor), m_ResourceType(ResourceType::TEXTURE) { +DescriptorVal::DescriptorVal(DeviceVal& device, Descriptor* descriptor, const Texture1DViewDesc& textureViewDesc) : + DeviceObjectVal(device, descriptor), m_ResourceType(ResourceType::TEXTURE) { switch (textureViewDesc.viewType) { case Texture1DViewType::SHADER_RESOURCE_1D: case Texture1DViewType::SHADER_RESOURCE_1D_ARRAY: @@ -51,8 +51,8 @@ DescriptorVal::DescriptorVal(DeviceVal& device, Descriptor* descriptor, const Te } } -DescriptorVal::DescriptorVal(DeviceVal& device, Descriptor* descriptor, const Texture2DViewDesc& textureViewDesc) - : DeviceObjectVal(device, descriptor), m_ResourceType(ResourceType::TEXTURE) { +DescriptorVal::DescriptorVal(DeviceVal& device, Descriptor* descriptor, const Texture2DViewDesc& textureViewDesc) : + DeviceObjectVal(device, descriptor), m_ResourceType(ResourceType::TEXTURE) { switch (textureViewDesc.viewType) { case Texture2DViewType::SHADER_RESOURCE_2D: case Texture2DViewType::SHADER_RESOURCE_2D_ARRAY: @@ -76,8 +76,8 @@ DescriptorVal::DescriptorVal(DeviceVal& device, Descriptor* descriptor, const Te } } -DescriptorVal::DescriptorVal(DeviceVal& device, Descriptor* descriptor, const Texture3DViewDesc& textureViewDesc) - : DeviceObjectVal(device, descriptor), m_ResourceType(ResourceType::TEXTURE) { +DescriptorVal::DescriptorVal(DeviceVal& device, Descriptor* descriptor, const Texture3DViewDesc& textureViewDesc) : + DeviceObjectVal(device, descriptor), m_ResourceType(ResourceType::TEXTURE) { switch (textureViewDesc.viewType) { case Texture3DViewType::SHADER_RESOURCE_3D: m_ResourceViewType = ResourceViewType::SHADER_RESOURCE; diff --git a/Source/Validation/DescriptorVal.h b/Source/Validation/DescriptorVal.h index ec6dfc62..04de5a4d 100644 --- a/Source/Validation/DescriptorVal.h +++ b/Source/Validation/DescriptorVal.h @@ -74,7 +74,7 @@ struct DescriptorVal : public DeviceObjectVal { //================================================================================================================ void SetDebugName(const char* name); - private: +private: ResourceType m_ResourceType = ResourceType::NONE; ResourceViewType m_ResourceViewType = ResourceViewType::NONE; }; diff --git a/Source/Validation/DescriptorVal.hpp b/Source/Validation/DescriptorVal.hpp index 1b16a785..4f970b5f 100644 --- a/Source/Validation/DescriptorVal.hpp +++ b/Source/Validation/DescriptorVal.hpp @@ -15,4 +15,4 @@ static uint64_t NRI_CALL GetDescriptorNativeObject(const Descriptor& descriptor) #pragma endregion -Define_Core_Descriptor_PartiallyFillFunctionTable(Val) +Define_Core_Descriptor_PartiallyFillFunctionTable(Val); diff --git a/Source/Validation/DeviceVal.cpp b/Source/Validation/DeviceVal.cpp index 5efedcbf..fe2faf92 100644 --- a/Source/Validation/DeviceVal.cpp +++ b/Source/Validation/DeviceVal.cpp @@ -35,15 +35,13 @@ static inline bool IsShaderStageValid(StageBits shaderStages, StageBits allowedS void ConvertGeometryObjectsVal(GeometryObject* destObjects, const GeometryObject* sourceObjects, uint32_t objectNum); QueryType GetQueryTypeVK(uint32_t queryTypeVK); -DeviceVal::DeviceVal(const CallbackInterface& callbacks, const StdAllocator& stdAllocator, DeviceBase& device) - : DeviceBase(callbacks, stdAllocator), m_Device(*(Device*)&device), m_Name(GetStdAllocator()), m_MemoryTypeMap(GetStdAllocator()) { +DeviceVal::DeviceVal(const CallbackInterface& callbacks, const StdAllocator& stdAllocator, DeviceBase& device) : + DeviceBase(callbacks, stdAllocator), m_Device(*(Device*)&device), m_Name(GetStdAllocator()), m_MemoryTypeMap(GetStdAllocator()) { } DeviceVal::~DeviceVal() { - for (size_t i = 0; i < m_CommandQueues.size(); i++) { - if (m_CommandQueues[i]) - Deallocate(GetStdAllocator(), m_CommandQueues[i]); - } + for (size_t i = 0; i < m_CommandQueues.size(); i++) + Deallocate(GetStdAllocator(), m_CommandQueues[i]); ((DeviceBase*)&m_Device)->Destroy(); } @@ -55,13 +53,23 @@ bool DeviceVal::Create() { return false; } - m_IsSwapChainSupported = deviceBase.FillFunctionTable(m_SwapChainAPI) == Result::SUCCESS; + if (deviceBase.FillFunctionTable(m_HelperAPI) != Result::SUCCESS) { + REPORT_ERROR(this, "Failed to get 'HelperInterface' interface"); + return false; + } + + if (deviceBase.FillFunctionTable(m_StreamerAPI) != Result::SUCCESS) { + REPORT_ERROR(this, "Failed to get 'StreamerInterface' interface"); + return false; + } + m_IsWrapperD3D11Supported = deviceBase.FillFunctionTable(m_WrapperD3D11API) == Result::SUCCESS; m_IsWrapperD3D12Supported = deviceBase.FillFunctionTable(m_WrapperD3D12API) == Result::SUCCESS; m_IsWrapperVKSupported = deviceBase.FillFunctionTable(m_WrapperVKAPI) == Result::SUCCESS; + m_IsSwapChainSupported = deviceBase.FillFunctionTable(m_SwapChainAPI) == Result::SUCCESS; m_IsRayTracingSupported = deviceBase.FillFunctionTable(m_RayTracingAPI) == Result::SUCCESS; - m_IsMeshShaderExtSupported = deviceBase.FillFunctionTable(m_MeshShaderAPI) == Result::SUCCESS; - deviceBase.FillFunctionTable(m_HelperAPI); + m_IsMeshShaderSupported = deviceBase.FillFunctionTable(m_MeshShaderAPI) == Result::SUCCESS; + m_IsLowLatencySupported = deviceBase.FillFunctionTable(m_LowLatencyAPI) == Result::SUCCESS; return true; } diff --git a/Source/Validation/DeviceVal.h b/Source/Validation/DeviceVal.h index 0d4cd7c4..3cd0417f 100644 --- a/Source/Validation/DeviceVal.h +++ b/Source/Validation/DeviceVal.h @@ -13,12 +13,20 @@ struct DeviceVal final : public DeviceBase { bool Create(); void RegisterMemoryType(MemoryType memoryType, MemoryLocation memoryLocation); + inline Device& GetImpl() const { + return m_Device; + } + inline const CoreInterface& GetCoreInterface() const { return m_CoreAPI; } - inline const SwapChainInterface& GetSwapChainInterface() const { - return m_SwapChainAPI; + inline const HelperInterface& GetHelperInterface() const { + return m_HelperAPI; + } + + inline const StreamerInterface& GetStreamerInterface() const { + return m_StreamerAPI; } inline const WrapperD3D11Interface& GetWrapperD3D11Interface() const { @@ -33,6 +41,10 @@ struct DeviceVal final : public DeviceBase { return m_WrapperVKAPI; } + inline const SwapChainInterface& GetSwapChainInterface() const { + return m_SwapChainAPI; + } + inline const RayTracingInterface& GetRayTracingInterface() const { return m_RayTracingAPI; } @@ -41,30 +53,14 @@ struct DeviceVal final : public DeviceBase { return m_MeshShaderAPI; } - inline const HelperInterface& GetHelperInterface() const { - return m_HelperAPI; + inline const LowLatencyInterface& GetLowLatencyInterface() const { + return m_LowLatencyAPI; } inline void* GetNativeObject() const { return m_CoreAPI.GetDeviceNativeObject(m_Device); } - NRIVkPhysicalDevice GetVkPhysicalDevice() const { - return m_WrapperVKAPI.GetVkPhysicalDevice(m_Device); - } - - NRIVkInstance GetVkInstance() const { - return m_WrapperVKAPI.GetVkInstance(m_Device); - } - - void* GetVkGetInstanceProcAddr() const { - return m_WrapperVKAPI.GetVkGetInstanceProcAddr(m_Device); - } - - void* GetVkGetDeviceProcAddr() const { - return m_WrapperVKAPI.GetVkGetDeviceProcAddr(m_Device); - } - inline Lock& GetLock() { return m_Lock; } @@ -142,33 +138,38 @@ struct DeviceVal final : public DeviceBase { void Destroy(); Result FillFunctionTable(CoreInterface& table) const; - Result FillFunctionTable(SwapChainInterface& table) const; + Result FillFunctionTable(HelperInterface& table) const; + Result FillFunctionTable(StreamerInterface& streamerInterface) const; Result FillFunctionTable(WrapperD3D11Interface& table) const; Result FillFunctionTable(WrapperD3D12Interface& table) const; Result FillFunctionTable(WrapperVKInterface& table) const; + Result FillFunctionTable(SwapChainInterface& table) const; Result FillFunctionTable(RayTracingInterface& table) const; Result FillFunctionTable(MeshShaderInterface& table) const; - Result FillFunctionTable(HelperInterface& table) const; + Result FillFunctionTable(LowLatencyInterface& table) const; - private: +private: Device& m_Device; String m_Name; CoreInterface m_CoreAPI = {}; - SwapChainInterface m_SwapChainAPI = {}; + HelperInterface m_HelperAPI = {}; + StreamerInterface m_StreamerAPI = {}; WrapperD3D11Interface m_WrapperD3D11API = {}; WrapperD3D12Interface m_WrapperD3D12API = {}; WrapperVKInterface m_WrapperVKAPI = {}; + SwapChainInterface m_SwapChainAPI = {}; RayTracingInterface m_RayTracingAPI = {}; MeshShaderInterface m_MeshShaderAPI = {}; - HelperInterface m_HelperAPI = {}; - std::array m_CommandQueues = {}; + LowLatencyInterface m_LowLatencyAPI = {}; + std::array m_CommandQueues = {}; UnorderedMap m_MemoryTypeMap; - bool m_IsSwapChainSupported = false; bool m_IsWrapperD3D11Supported = false; bool m_IsWrapperD3D12Supported = false; bool m_IsWrapperVKSupported = false; + bool m_IsSwapChainSupported = false; bool m_IsRayTracingSupported = false; - bool m_IsMeshShaderExtSupported = false; + bool m_IsMeshShaderSupported = false; + bool m_IsLowLatencySupported = false; Lock m_Lock; }; diff --git a/Source/Validation/DeviceVal.hpp b/Source/Validation/DeviceVal.hpp index 5482ab72..ee85f980 100644 --- a/Source/Validation/DeviceVal.hpp +++ b/Source/Validation/DeviceVal.hpp @@ -1,9 +1,10 @@ // © 2021 NVIDIA Corporation -Declare_PartiallyFillFunctionTable_Functions(Val) +Declare_PartiallyFillFunctionTable_Functions(Val); + #pragma region[ Core ] - static const DeviceDesc& NRI_CALL GetDeviceDesc(const Device& device) { +static const DeviceDesc& NRI_CALL GetDeviceDesc(const Device& device) { return ((const DeviceVal&)device).GetDesc(); } @@ -248,34 +249,6 @@ Result DeviceVal::FillFunctionTable(CoreInterface& coreInterface) const { #pragma endregion -#pragma region[ SwapChain ] - -static Result NRI_CALL CreateSwapChain(Device& device, const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { - return ((DeviceVal&)device).CreateSwapChain(swapChainDesc, swapChain); -} - -static void NRI_CALL DestroySwapChain(SwapChain& swapChain) { - if (!(&swapChain)) - return; - - GetDeviceVal(swapChain).DestroySwapChain(swapChain); -} - -Result DeviceVal::FillFunctionTable(SwapChainInterface& swapChainInterface) const { - if (!m_IsSwapChainSupported) - return Result::UNSUPPORTED; - - swapChainInterface = {}; - swapChainInterface.CreateSwapChain = ::CreateSwapChain; - swapChainInterface.DestroySwapChain = ::DestroySwapChain; - - SwapChain_PartiallyFillFunctionTableVal(swapChainInterface); - - return ValidateFunctionTable(swapChainInterface); -} - -#pragma endregion - #pragma region[ WrapperD3D11 ] #if NRI_USE_D3D11 @@ -295,22 +268,45 @@ static Result NRI_CALL CreateTextureD3D11(Device& device, const TextureD3D11Desc #endif Result DeviceVal::FillFunctionTable(WrapperD3D11Interface& wrapperD3D11Interface) const { -#if NRI_USE_D3D11 wrapperD3D11Interface = {}; +#if NRI_USE_D3D11 + if (!m_IsWrapperD3D11Supported) + return Result::UNSUPPORTED; + wrapperD3D11Interface.CreateCommandBufferD3D11 = ::CreateCommandBufferD3D11; wrapperD3D11Interface.CreateBufferD3D11 = ::CreateBufferD3D11; wrapperD3D11Interface.CreateTextureD3D11 = ::CreateTextureD3D11; return ValidateFunctionTable(wrapperD3D11Interface); #else - MaybeUnused(wrapperD3D11Interface); - return Result::UNSUPPORTED; #endif } #pragma endregion +#pragma region[ Helper ] + +static uint32_t NRI_CALL CountAllocationNum(Device& device, const ResourceGroupDesc& resourceGroupDesc) { + return ((DeviceVal&)device).CalculateAllocationNumber(resourceGroupDesc); +} + +static Result NRI_CALL AllocateAndBindMemory(Device& device, const ResourceGroupDesc& resourceGroupDesc, Memory** allocations) { + return ((DeviceVal&)device).AllocateAndBindMemory(resourceGroupDesc, allocations); +} + +Result DeviceVal::FillFunctionTable(HelperInterface& helperInterface) const { + helperInterface = {}; + helperInterface.CalculateAllocationNumber = ::CountAllocationNum; + helperInterface.AllocateAndBindMemory = ::AllocateAndBindMemory; + + Helper_CommandQueue_PartiallyFillFunctionTableVal(helperInterface); + + return ValidateFunctionTable(helperInterface); +} + +#pragma endregion + #pragma region[ WrapperD3D12 ] #if NRI_USE_D3D12 @@ -339,8 +335,11 @@ static Result NRI_CALL CreateAccelerationStructureD3D12( #endif Result DeviceVal::FillFunctionTable(WrapperD3D12Interface& wrapperD3D12Interface) const { -#if NRI_USE_D3D12 wrapperD3D12Interface = {}; +#if NRI_USE_D3D12 + if (!m_IsWrapperD3D12Supported) + return Result::UNSUPPORTED; + wrapperD3D12Interface.CreateCommandBufferD3D12 = ::CreateCommandBufferD3D12; wrapperD3D12Interface.CreateBufferD3D12 = ::CreateBufferD3D12; wrapperD3D12Interface.CreateTextureD3D12 = ::CreateTextureD3D12; @@ -350,8 +349,6 @@ Result DeviceVal::FillFunctionTable(WrapperD3D12Interface& wrapperD3D12Interface return ValidateFunctionTable(wrapperD3D12Interface); #else - MaybeUnused(wrapperD3D12Interface); - return Result::UNSUPPORTED; #endif } @@ -408,26 +405,29 @@ static Result NRI_CALL CreateAccelerationStructureVK( } static NRIVkPhysicalDevice NRI_CALL GetVkPhysicalDevice(const Device& device) { - return ((DeviceVal&)device).GetVkPhysicalDevice(); + return ((DeviceVal&)device).GetWrapperVKInterface().GetVkPhysicalDevice(((DeviceVal&)device).GetImpl()); } static NRIVkInstance NRI_CALL GetVkInstance(const Device& device) { - return ((DeviceVal&)device).GetVkInstance(); + return ((DeviceVal&)device).GetWrapperVKInterface().GetVkInstance(((DeviceVal&)device).GetImpl()); } static NRIVkInstance NRI_CALL GetVkGetInstanceProcAddr(const Device& device) { - return ((DeviceVal&)device).GetVkGetInstanceProcAddr(); + return ((DeviceVal&)device).GetWrapperVKInterface().GetVkGetInstanceProcAddr(((DeviceVal&)device).GetImpl()); } static NRIVkInstance NRI_CALL GetVkGetDeviceProcAddr(const Device& device) { - return ((DeviceVal&)device).GetVkGetDeviceProcAddr(); + return ((DeviceVal&)device).GetWrapperVKInterface().GetVkGetDeviceProcAddr(((DeviceVal&)device).GetImpl()); } #endif Result DeviceVal::FillFunctionTable(WrapperVKInterface& wrapperVKInterface) const { -#if NRI_USE_VULKAN wrapperVKInterface = {}; +#if NRI_USE_VULKAN + if (!m_IsWrapperVKSupported) + return Result::UNSUPPORTED; + wrapperVKInterface.CreateCommandQueueVK = ::CreateCommandQueueVK; wrapperVKInterface.CreateCommandAllocatorVK = ::CreateCommandAllocatorVK; wrapperVKInterface.CreateCommandBufferVK = ::CreateCommandBufferVK; @@ -447,14 +447,40 @@ Result DeviceVal::FillFunctionTable(WrapperVKInterface& wrapperVKInterface) cons return ValidateFunctionTable(wrapperVKInterface); #else - MaybeUnused(wrapperVKInterface); - return Result::UNSUPPORTED; #endif } #pragma endregion +#pragma region[ SwapChain ] + +static Result NRI_CALL CreateSwapChain(Device& device, const SwapChainDesc& swapChainDesc, SwapChain*& swapChain) { + return ((DeviceVal&)device).CreateSwapChain(swapChainDesc, swapChain); +} + +static void NRI_CALL DestroySwapChain(SwapChain& swapChain) { + if (!(&swapChain)) + return; + + GetDeviceVal(swapChain).DestroySwapChain(swapChain); +} + +Result DeviceVal::FillFunctionTable(SwapChainInterface& swapChainInterface) const { + swapChainInterface = {}; + if (!m_IsSwapChainSupported) + return Result::UNSUPPORTED; + + swapChainInterface.CreateSwapChain = ::CreateSwapChain; + swapChainInterface.DestroySwapChain = ::DestroySwapChain; + + SwapChain_PartiallyFillFunctionTableVal(swapChainInterface); + + return ValidateFunctionTable(swapChainInterface); +} + +#pragma endregion + #pragma region[ RayTracing ] static Result NRI_CALL CreateRayTracingPipeline(Device& device, const RayTracingPipelineDesc& pipelineDesc, Pipeline*& pipeline) { @@ -479,10 +505,10 @@ static void NRI_CALL DestroyAccelerationStructure(AccelerationStructure& acceler void FillFunctionTablePipelineVal(RayTracingInterface& rayTracingInterface); Result DeviceVal::FillFunctionTable(RayTracingInterface& rayTracingInterface) const { + rayTracingInterface = {}; if (!m_IsRayTracingSupported) return Result::UNSUPPORTED; - rayTracingInterface = {}; rayTracingInterface.CreateRayTracingPipeline = ::CreateRayTracingPipeline; rayTracingInterface.CreateAccelerationStructure = ::CreateAccelerationStructure; rayTracingInterface.BindAccelerationStructureMemory = ::BindAccelerationStructureMemory; @@ -500,10 +526,9 @@ Result DeviceVal::FillFunctionTable(RayTracingInterface& rayTracingInterface) co #pragma region[ MeshShader ] Result DeviceVal::FillFunctionTable(MeshShaderInterface& meshShaderInterface) const { - if (!m_IsMeshShaderExtSupported) - return Result::UNSUPPORTED; - meshShaderInterface = {}; + if (!m_IsMeshShaderSupported) + return Result::UNSUPPORTED; MeshShader_CommandBuffer_PartiallyFillFunctionTableVal(meshShaderInterface); @@ -512,24 +537,159 @@ Result DeviceVal::FillFunctionTable(MeshShaderInterface& meshShaderInterface) co #pragma endregion -#pragma region[ Helper ] +#pragma region[ LowLatency ] -static uint32_t NRI_CALL CountAllocationNum(Device& device, const ResourceGroupDesc& resourceGroupDesc) { - return ((DeviceVal&)device).CalculateAllocationNumber(resourceGroupDesc); +Result DeviceVal::FillFunctionTable(LowLatencyInterface& lowLatencyInterface) const { + lowLatencyInterface = {}; + if (!m_IsLowLatencySupported) + return Result::UNSUPPORTED; + + LowLatency_CommandQueue_PartiallyFillFunctionTableVal(lowLatencyInterface); + LowLatency_SwapChain_PartiallyFillFunctionTableVal(lowLatencyInterface); + + return ValidateFunctionTable(lowLatencyInterface); } -static Result NRI_CALL AllocateAndBindMemory(Device& device, const ResourceGroupDesc& resourceGroupDesc, Memory** allocations) { - return ((DeviceVal&)device).AllocateAndBindMemory(resourceGroupDesc, allocations); +#pragma endregion + +#pragma region[ Streamer ] + +struct StreamerVal : DeviceObjectVal { + inline StreamerVal(DeviceVal& device, Streamer* impl) : DeviceObjectVal(device, impl) { + } + + BufferVal* constantBuffer = nullptr; + BufferVal* dynamicBuffer = nullptr; + bool isDynamicBufferValid = false; +}; + +static Result CreateStreamer(Device& device, const StreamerDesc& streamerDesc, Streamer*& streamer) { + DeviceVal& deviceVal = (DeviceVal&)device; + bool isUpload = (streamerDesc.constantBufferMemoryLocation == MemoryLocation::HOST_UPLOAD || streamerDesc.constantBufferMemoryLocation == MemoryLocation::DEVICE_UPLOAD) && + (streamerDesc.dynamicBufferMemoryLocation == MemoryLocation::HOST_UPLOAD || streamerDesc.dynamicBufferMemoryLocation == MemoryLocation::DEVICE_UPLOAD); + RETURN_ON_FAILURE(&deviceVal, isUpload, Result::INVALID_ARGUMENT, "CreateStreamer: memory location must be an UPLOAD heap"); + + Streamer* impl = nullptr; + Result result = deviceVal.GetStreamerInterface().CreateStreamer(deviceVal.GetImpl(), streamerDesc, impl); + + if (result == Result::SUCCESS) { + RETURN_ON_FAILURE(&deviceVal, impl != nullptr, Result::FAILURE, "CreateStreamer: 'impl' is NULL"); + streamer = (Streamer*)Allocate(deviceVal.GetStdAllocator(), deviceVal, impl); + } + + return result; } -Result DeviceVal::FillFunctionTable(HelperInterface& helperInterface) const { - helperInterface = {}; - helperInterface.CalculateAllocationNumber = ::CountAllocationNum; - helperInterface.AllocateAndBindMemory = ::AllocateAndBindMemory; +static void DestroyStreamer(Streamer& streamer) { + DeviceVal& deviceVal = GetDeviceVal(streamer); + StreamerVal& streamerVal = (StreamerVal&)streamer; - Helper_CommandQueue_PartiallyFillFunctionTableVal(helperInterface); + streamerVal.GetStreamerInterface().DestroyStreamer(*NRI_GET_IMPL(Streamer, &streamer)); - return ValidateFunctionTable(helperInterface); + Deallocate(deviceVal.GetStdAllocator(), streamerVal.constantBuffer); + Deallocate(deviceVal.GetStdAllocator(), streamerVal.dynamicBuffer); + Deallocate(deviceVal.GetStdAllocator(), &streamerVal); +} + +static Buffer* GetStreamerConstantBuffer(Streamer& streamer) { + DeviceVal& deviceVal = GetDeviceVal(streamer); + StreamerVal& streamerVal = (StreamerVal&)streamer; + Buffer* buffer = streamerVal.GetStreamerInterface().GetStreamerConstantBuffer(*NRI_GET_IMPL(Streamer, &streamer)); + + if (!streamerVal.constantBuffer) + streamerVal.constantBuffer = Allocate(deviceVal.GetStdAllocator(), deviceVal, buffer); + + return (Buffer*)streamerVal.constantBuffer; +} + +static uint32_t UpdateStreamerConstantBuffer(Streamer& streamer, const void* data, uint32_t dataSize) { + DeviceVal& deviceVal = GetDeviceVal(streamer); + StreamerVal& streamerVal = (StreamerVal&)streamer; + + if (!dataSize) + REPORT_WARNING(&deviceVal, "UpdateStreamerConstantBuffer: 'dataSize = 0'"); + + return streamerVal.GetStreamerInterface().UpdateStreamerConstantBuffer(*NRI_GET_IMPL(Streamer, &streamer), data, dataSize); +} + +static uint64_t AddStreamerBufferUpdateRequest(Streamer& streamer, const BufferUpdateRequestDesc& bufferUpdateRequestDesc) { + DeviceVal& deviceVal = GetDeviceVal(streamer); + StreamerVal& streamerVal = (StreamerVal&)streamer; + streamerVal.isDynamicBufferValid = false; + + if (!bufferUpdateRequestDesc.dataSize) + REPORT_WARNING(&deviceVal, "AddStreamerBufferUpdateRequest: 'bufferUpdateRequestDesc.dataSize = 0'"); + + BufferUpdateRequestDesc bufferUpdateRequestDescImpl = bufferUpdateRequestDesc; + bufferUpdateRequestDescImpl.dstBuffer = NRI_GET_IMPL(Buffer, bufferUpdateRequestDesc.dstBuffer); + + return streamerVal.GetStreamerInterface().AddStreamerBufferUpdateRequest(*NRI_GET_IMPL(Streamer, &streamer), bufferUpdateRequestDescImpl); +} + +static uint64_t AddStreamerTextureUpdateRequest(Streamer& streamer, const TextureUpdateRequestDesc& textureUpdateRequestDesc) { + DeviceVal& deviceVal = GetDeviceVal(streamer); + StreamerVal& streamerVal = (StreamerVal&)streamer; + streamerVal.isDynamicBufferValid = false; + + if (!textureUpdateRequestDesc.dstTexture) + REPORT_ERROR(&deviceVal, "AddStreamerTextureUpdateRequest: 'textureUpdateRequestDesc.dstTexture' is NULL"); + if (!textureUpdateRequestDesc.dataRowPitch) + REPORT_WARNING(&deviceVal, "AddStreamerTextureUpdateRequest: 'textureUpdateRequestDesc.dataRowPitch = 0'"); + if (!textureUpdateRequestDesc.dataSlicePitch) + REPORT_WARNING(&deviceVal, "AddStreamerTextureUpdateRequest: 'textureUpdateRequestDesc.dataSlicePitch = 0'"); + + TextureUpdateRequestDesc textureUpdateRequestDescImpl = textureUpdateRequestDesc; + textureUpdateRequestDescImpl.dstTexture = NRI_GET_IMPL(Texture, textureUpdateRequestDesc.dstTexture); + + return streamerVal.GetStreamerInterface().AddStreamerTextureUpdateRequest(*NRI_GET_IMPL(Streamer, &streamer), textureUpdateRequestDescImpl); +} + +static Result CopyStreamerUpdateRequests(Streamer& streamer) { + StreamerVal& streamerVal = (StreamerVal&)streamer; + streamerVal.isDynamicBufferValid = true; + + return streamerVal.GetStreamerInterface().CopyStreamerUpdateRequests(*NRI_GET_IMPL(Streamer, &streamer)); +} + +static Buffer* GetStreamerDynamicBuffer(Streamer& streamer) { + DeviceVal& deviceVal = GetDeviceVal(streamer); + StreamerVal& streamerVal = (StreamerVal&)streamer; + + if (!streamerVal.isDynamicBufferValid) + REPORT_ERROR(&deviceVal, "'GetStreamerDynamicBuffer' must be called after 'CopyStreamerUpdateRequests'"); + + Buffer* buffer = streamerVal.GetStreamerInterface().GetStreamerDynamicBuffer(*NRI_GET_IMPL(Streamer, &streamer)); + + if (NRI_GET_IMPL(Buffer, streamerVal.dynamicBuffer) != buffer) { + Deallocate(deviceVal.GetStdAllocator(), streamerVal.dynamicBuffer); + streamerVal.dynamicBuffer = nullptr; + } + + if (!streamerVal.dynamicBuffer) + streamerVal.dynamicBuffer = Allocate(deviceVal.GetStdAllocator(), deviceVal, buffer); + + return (Buffer*)streamerVal.dynamicBuffer; +} + +static void CmdUploadStreamerUpdateRequests(CommandBuffer& commandBuffer, Streamer& streamer) { + StreamerVal& streamerVal = (StreamerVal&)streamer; + + streamerVal.GetStreamerInterface().CmdUploadStreamerUpdateRequests(*NRI_GET_IMPL(CommandBuffer, &commandBuffer), *NRI_GET_IMPL(Streamer, &streamer)); +} + +Result DeviceVal::FillFunctionTable(StreamerInterface& streamerInterface) const { + streamerInterface = {}; + streamerInterface.CreateStreamer = ::CreateStreamer; + streamerInterface.DestroyStreamer = ::DestroyStreamer; + streamerInterface.GetStreamerConstantBuffer = ::GetStreamerConstantBuffer; + streamerInterface.UpdateStreamerConstantBuffer = ::UpdateStreamerConstantBuffer; + streamerInterface.AddStreamerBufferUpdateRequest = ::AddStreamerBufferUpdateRequest; + streamerInterface.AddStreamerTextureUpdateRequest = ::AddStreamerTextureUpdateRequest; + streamerInterface.CopyStreamerUpdateRequests = ::CopyStreamerUpdateRequests; + streamerInterface.GetStreamerDynamicBuffer = ::GetStreamerDynamicBuffer; + streamerInterface.CmdUploadStreamerUpdateRequests = ::CmdUploadStreamerUpdateRequests; + + return ValidateFunctionTable(streamerInterface); } #pragma endregion diff --git a/Source/Validation/FenceVal.cpp b/Source/Validation/FenceVal.cpp index effdac8c..92ef281b 100644 --- a/Source/Validation/FenceVal.cpp +++ b/Source/Validation/FenceVal.cpp @@ -16,14 +16,6 @@ inline uint64_t FenceVal::GetFenceValue() const { return GetCoreInterface().GetFenceValue(*GetImpl()); } -inline void FenceVal::QueueSignal(CommandQueueVal& commandQueue, uint64_t value) { - GetCoreInterface().QueueSignal(*commandQueue.GetImpl(), *GetImpl(), value); -} - -inline void FenceVal::QueueWait(CommandQueueVal& commandQueue, uint64_t value) { - GetCoreInterface().QueueWait(*commandQueue.GetImpl(), *GetImpl(), value); -} - inline void FenceVal::Wait(uint64_t value) { GetCoreInterface().Wait(*GetImpl(), value); } diff --git a/Source/Validation/FenceVal.h b/Source/Validation/FenceVal.h index f9264a4b..9e655e45 100644 --- a/Source/Validation/FenceVal.h +++ b/Source/Validation/FenceVal.h @@ -18,8 +18,6 @@ struct FenceVal : public DeviceObjectVal { //================================================================================================================ uint64_t GetFenceValue() const; - void QueueSignal(CommandQueueVal& commandQueue, uint64_t value); - void QueueWait(CommandQueueVal& commandQueue, uint64_t value); void Wait(uint64_t value); void SetDebugName(const char* name); }; diff --git a/Source/Validation/FenceVal.hpp b/Source/Validation/FenceVal.hpp index 273cf4f8..ef3f7a32 100644 --- a/Source/Validation/FenceVal.hpp +++ b/Source/Validation/FenceVal.hpp @@ -6,14 +6,6 @@ static uint64_t NRI_CALL GetFenceValue(Fence& fence) { return ((FenceVal&)fence).GetFenceValue(); } -static void NRI_CALL QueueSignal(CommandQueue& commandQueue, Fence& fence, uint64_t value) { - return ((FenceVal&)fence).QueueSignal((CommandQueueVal&)commandQueue, value); -} - -static void NRI_CALL QueueWait(CommandQueue& commandQueue, Fence& fence, uint64_t value) { - return ((FenceVal&)fence).QueueWait((CommandQueueVal&)commandQueue, value); -} - static void NRI_CALL Wait(Fence& fence, uint64_t value) { ((FenceVal&)fence).Wait(value); } @@ -24,4 +16,4 @@ static void NRI_CALL SetFenceDebugName(Fence& fence, const char* name) { #pragma endregion -Define_Core_Fence_PartiallyFillFunctionTable(Val) +Define_Core_Fence_PartiallyFillFunctionTable(Val); diff --git a/Source/Validation/MemoryVal.cpp b/Source/Validation/MemoryVal.cpp index 9c8b2e38..1066fd42 100644 --- a/Source/Validation/MemoryVal.cpp +++ b/Source/Validation/MemoryVal.cpp @@ -12,8 +12,8 @@ using namespace nri; -MemoryVal::MemoryVal(DeviceVal& device, Memory* memory, uint64_t size, MemoryLocation memoryLocation) - : DeviceObjectVal(device, memory), m_Size(size), m_MemoryLocation(memoryLocation) { +MemoryVal::MemoryVal(DeviceVal& device, Memory* memory, uint64_t size, MemoryLocation memoryLocation) : + DeviceObjectVal(device, memory), m_Size(size), m_MemoryLocation(memoryLocation) { } #if NRI_USE_D3D12 diff --git a/Source/Validation/MemoryVal.h b/Source/Validation/MemoryVal.h index 64f3e6ba..2db04d0f 100644 --- a/Source/Validation/MemoryVal.h +++ b/Source/Validation/MemoryVal.h @@ -35,7 +35,7 @@ struct MemoryVal : public DeviceObjectVal { void SetDebugName(const char* name); - private: +private: std::vector m_Buffers; std::vector m_Textures; std::vector m_AccelerationStructures; diff --git a/Source/Validation/PipelineLayoutVal.cpp b/Source/Validation/PipelineLayoutVal.cpp index 629a0b37..c028d6f9 100644 --- a/Source/Validation/PipelineLayoutVal.cpp +++ b/Source/Validation/PipelineLayoutVal.cpp @@ -7,12 +7,12 @@ using namespace nri; -PipelineLayoutVal::PipelineLayoutVal(DeviceVal& device, PipelineLayout* pipelineLayout, const PipelineLayoutDesc& pipelineLayoutDesc) - : DeviceObjectVal(device, pipelineLayout), - m_DescriptorSetDescs(device.GetStdAllocator()), - m_PushConstantDescs(device.GetStdAllocator()), - m_DescriptorRangeDescs(device.GetStdAllocator()), - m_DynamicConstantBufferDescs(device.GetStdAllocator()) { +PipelineLayoutVal::PipelineLayoutVal(DeviceVal& device, PipelineLayout* pipelineLayout, const PipelineLayoutDesc& pipelineLayoutDesc) : + DeviceObjectVal(device, pipelineLayout), + m_DescriptorSetDescs(device.GetStdAllocator()), + m_PushConstantDescs(device.GetStdAllocator()), + m_DescriptorRangeDescs(device.GetStdAllocator()), + m_DynamicConstantBufferDescs(device.GetStdAllocator()) { uint32_t descriptorRangeDescNum = 0; uint32_t dynamicConstantBufferDescNum = 0; diff --git a/Source/Validation/PipelineLayoutVal.h b/Source/Validation/PipelineLayoutVal.h index 47f4bddb..ce3efe9d 100644 --- a/Source/Validation/PipelineLayoutVal.h +++ b/Source/Validation/PipelineLayoutVal.h @@ -16,7 +16,7 @@ struct PipelineLayoutVal : public DeviceObjectVal { //================================================================================================================ void SetDebugName(const char* name); - private: +private: PipelineLayoutDesc m_PipelineLayoutDesc; Vector m_DescriptorSetDescs; Vector m_PushConstantDescs; diff --git a/Source/Validation/PipelineVal.cpp b/Source/Validation/PipelineVal.cpp index fa026e03..9557b276 100644 --- a/Source/Validation/PipelineVal.cpp +++ b/Source/Validation/PipelineVal.cpp @@ -7,19 +7,19 @@ using namespace nri; -PipelineVal::PipelineVal(DeviceVal& device, Pipeline* pipeline) : DeviceObjectVal(device, pipeline), m_RayTracingAPI(device.GetRayTracingInterface()) { +PipelineVal::PipelineVal(DeviceVal& device, Pipeline* pipeline) : DeviceObjectVal(device, pipeline) { } -PipelineVal::PipelineVal(DeviceVal& device, Pipeline* pipeline, const GraphicsPipelineDesc& graphicsPipelineDesc) - : DeviceObjectVal(device, pipeline), m_RayTracingAPI(device.GetRayTracingInterface()), m_PipelineLayout(graphicsPipelineDesc.pipelineLayout) { +PipelineVal::PipelineVal(DeviceVal& device, Pipeline* pipeline, const GraphicsPipelineDesc& graphicsPipelineDesc) : + DeviceObjectVal(device, pipeline), m_PipelineLayout(graphicsPipelineDesc.pipelineLayout) { } -PipelineVal::PipelineVal(DeviceVal& device, Pipeline* pipeline, const ComputePipelineDesc& computePipelineDesc) - : DeviceObjectVal(device, pipeline), m_RayTracingAPI(device.GetRayTracingInterface()), m_PipelineLayout(computePipelineDesc.pipelineLayout) { +PipelineVal::PipelineVal(DeviceVal& device, Pipeline* pipeline, const ComputePipelineDesc& computePipelineDesc) : + DeviceObjectVal(device, pipeline), m_PipelineLayout(computePipelineDesc.pipelineLayout) { } -PipelineVal::PipelineVal(DeviceVal& device, Pipeline* pipeline, const RayTracingPipelineDesc& rayTracingPipelineDesc) - : DeviceObjectVal(device, pipeline), m_RayTracingAPI(device.GetRayTracingInterface()), m_PipelineLayout(rayTracingPipelineDesc.pipelineLayout) { +PipelineVal::PipelineVal(DeviceVal& device, Pipeline* pipeline, const RayTracingPipelineDesc& rayTracingPipelineDesc) : + DeviceObjectVal(device, pipeline), m_PipelineLayout(rayTracingPipelineDesc.pipelineLayout) { } void PipelineVal::SetDebugName(const char* name) { @@ -28,7 +28,7 @@ void PipelineVal::SetDebugName(const char* name) { } Result PipelineVal::WriteShaderGroupIdentifiers(uint32_t baseShaderGroupIndex, uint32_t shaderGroupNum, void* buffer) { - return m_RayTracingAPI.WriteShaderGroupIdentifiers(*GetImpl(), baseShaderGroupIndex, shaderGroupNum, buffer); + return GetRayTracingInterface().WriteShaderGroupIdentifiers(*GetImpl(), baseShaderGroupIndex, shaderGroupNum, buffer); } #include "PipelineVal.hpp" diff --git a/Source/Validation/PipelineVal.h b/Source/Validation/PipelineVal.h index 2d844f7d..1a4b5e9a 100644 --- a/Source/Validation/PipelineVal.h +++ b/Source/Validation/PipelineVal.h @@ -20,8 +20,7 @@ struct PipelineVal : public DeviceObjectVal { void SetDebugName(const char* name); Result WriteShaderGroupIdentifiers(uint32_t baseShaderGroupIndex, uint32_t shaderGroupNum, void* buffer); - private: - const RayTracingInterface& m_RayTracingAPI; +private: const PipelineLayout* m_PipelineLayout = nullptr; }; diff --git a/Source/Validation/QueryPoolVal.cpp b/Source/Validation/QueryPoolVal.cpp index c7ee26ab..20ff476f 100644 --- a/Source/Validation/QueryPoolVal.cpp +++ b/Source/Validation/QueryPoolVal.cpp @@ -7,8 +7,8 @@ using namespace nri; -QueryPoolVal::QueryPoolVal(DeviceVal& device, QueryPool* queryPool, QueryType queryType, uint32_t queryNum) - : DeviceObjectVal(device, queryPool), m_DeviceState(device.GetStdAllocator()), m_QueryType(queryType) { +QueryPoolVal::QueryPoolVal(DeviceVal& device, QueryPool* queryPool, QueryType queryType, uint32_t queryNum) : + DeviceObjectVal(device, queryPool), m_DeviceState(device.GetStdAllocator()), m_QueryType(queryType) { m_QueryNum = queryNum; if (queryNum != 0) { @@ -26,8 +26,7 @@ uint32_t QueryPoolVal::GetQuerySize() const { return GetCoreInterface().GetQuerySize(*GetImpl()); } -bool QueryPoolVal::SetQueryState(uint32_t offset, bool state) // TODO: not inline -{ +bool QueryPoolVal::SetQueryState(uint32_t offset, bool state) { // TODO: not inline const size_t batchIndex = offset >> 6; const uint64_t batchValue = m_DeviceState[batchIndex]; const size_t bitIndex = 1ull << (offset & 63); diff --git a/Source/Validation/QueryPoolVal.h b/Source/Validation/QueryPoolVal.h index a414df35..30b8a7a2 100644 --- a/Source/Validation/QueryPoolVal.h +++ b/Source/Validation/QueryPoolVal.h @@ -28,7 +28,7 @@ struct QueryPoolVal : public DeviceObjectVal { void SetDebugName(const char* name); uint32_t GetQuerySize() const; - private: +private: Vector m_DeviceState; uint32_t m_QueryNum; QueryType m_QueryType; diff --git a/Source/Validation/QueryPoolVal.hpp b/Source/Validation/QueryPoolVal.hpp index 653d0854..6d30d55e 100644 --- a/Source/Validation/QueryPoolVal.hpp +++ b/Source/Validation/QueryPoolVal.hpp @@ -12,4 +12,4 @@ static uint32_t NRI_CALL GetQuerySize(const QueryPool& queryPool) { #pragma endregion -Define_Core_QueryPool_PartiallyFillFunctionTable(Val) +Define_Core_QueryPool_PartiallyFillFunctionTable(Val); diff --git a/Source/Validation/SharedVal.h b/Source/Validation/SharedVal.h index 55dd6c1b..2758b8fa 100644 --- a/Source/Validation/SharedVal.h +++ b/Source/Validation/SharedVal.h @@ -27,7 +27,43 @@ struct DeviceObjectVal { return m_Device.GetCoreInterface(); } - protected: + inline const HelperInterface& GetHelperInterface() const { + return m_Device.GetHelperInterface(); + } + + inline const StreamerInterface& GetStreamerInterface() const { + return m_Device.GetStreamerInterface(); + } + + inline const WrapperD3D11Interface& GetWrapperD3D11Interface() const { + return m_Device.GetWrapperD3D11Interface(); + } + + inline const WrapperD3D12Interface& GetWrapperD3D12Interface() const { + return m_Device.GetWrapperD3D12Interface(); + } + + inline const WrapperVKInterface& GetWrapperVKInterface() const { + return m_Device.GetWrapperVKInterface(); + } + + inline const SwapChainInterface& GetSwapChainInterface() const { + return m_Device.GetSwapChainInterface(); + } + + inline const RayTracingInterface& GetRayTracingInterface() const { + return m_Device.GetRayTracingInterface(); + } + + inline const MeshShaderInterface& GetMeshShaderInterface() const { + return m_Device.GetMeshShaderInterface(); + } + + inline const LowLatencyInterface& GetLowLatencyInterface() const { + return m_Device.GetLowLatencyInterface(); + } + +protected: String m_Name; DeviceVal& m_Device; T* m_Impl = nullptr; diff --git a/Source/Validation/SwapChainVal.cpp b/Source/Validation/SwapChainVal.cpp index 0801ee67..254420dd 100644 --- a/Source/Validation/SwapChainVal.cpp +++ b/Source/Validation/SwapChainVal.cpp @@ -15,29 +15,62 @@ SwapChainVal::~SwapChainVal() { inline void SwapChainVal::SetDebugName(const char* name) { m_Name = name; - m_SwapChainAPI.SetSwapChainDebugName(*GetImpl(), name); + GetSwapChainInterface().SetSwapChainDebugName(*GetImpl(), name); } -inline Texture* const* SwapChainVal::GetTextures(uint32_t& textureNum) const { - Texture* const* textures = m_SwapChainAPI.GetSwapChainTextures(*GetImpl(), textureNum); +inline Texture* const* SwapChainVal::GetTextures(uint32_t& textureNum) { + Texture* const* textures = GetSwapChainInterface().GetSwapChainTextures(*GetImpl(), textureNum); - m_Textures.resize(textureNum); - for (uint32_t i = 0; i < textureNum; i++) - m_Textures[i] = Allocate(m_Device.GetStdAllocator(), m_Device, textures[i]); + if (m_Textures.empty()) { + for (uint32_t i = 0; i < textureNum; i++) { + TextureVal* textureVal = Allocate(m_Device.GetStdAllocator(), m_Device, textures[i]); + m_Textures.push_back(textureVal); + } + } return (Texture* const*)m_Textures.data(); } inline uint32_t SwapChainVal::AcquireNextTexture() { - return m_SwapChainAPI.AcquireNextSwapChainTexture(*GetImpl()); + return GetSwapChainInterface().AcquireNextSwapChainTexture(*GetImpl()); +} + +inline Result SwapChainVal::WaitForPresent() { + RETURN_ON_FAILURE(&m_Device, m_SwapChainDesc.waitable, Result::FAILURE, "Swap chain has not been created with 'waitable = true'"); + + return GetSwapChainInterface().WaitForPresent(*GetImpl()); } inline Result SwapChainVal::Present() { - return m_SwapChainAPI.SwapChainPresent(*GetImpl()); + return GetSwapChainInterface().QueuePresent(*GetImpl()); } inline Result SwapChainVal::GetDisplayDesc(DisplayDesc& displayDesc) const { - return m_SwapChainAPI.GetDisplayDesc(*GetImpl(), displayDesc); + return GetSwapChainInterface().GetDisplayDesc(*GetImpl(), displayDesc); +} + +inline Result SwapChainVal::SetLatencySleepMode(const LatencySleepMode& latencySleepMode) { + RETURN_ON_FAILURE(&m_Device, m_SwapChainDesc.allowLowLatency, Result::FAILURE, "Swap chain has not been created with 'allowLowLatency = true'"); + + return GetLowLatencyInterface().SetLatencySleepMode(*GetImpl(), latencySleepMode); +} + +inline Result SwapChainVal::SetLatencyMarker(LatencyMarker latencyMarker) { + RETURN_ON_FAILURE(&m_Device, m_SwapChainDesc.allowLowLatency, Result::FAILURE, "Swap chain has not been created with 'allowLowLatency = true'"); + + return GetLowLatencyInterface().SetLatencyMarker(*GetImpl(), latencyMarker); +} + +inline Result SwapChainVal::LatencySleep() { + RETURN_ON_FAILURE(&m_Device, m_SwapChainDesc.allowLowLatency, Result::FAILURE, "Swap chain has not been created with 'allowLowLatency = true'"); + + return GetLowLatencyInterface().LatencySleep(*GetImpl()); +} + +inline Result SwapChainVal::GetLatencyReport(LatencyReport& latencyReport) { + RETURN_ON_FAILURE(&m_Device, m_SwapChainDesc.allowLowLatency, Result::FAILURE, "Swap chain has not been created with 'allowLowLatency = true'"); + + return GetLowLatencyInterface().GetLatencyReport(*GetImpl(), latencyReport); } #include "SwapChainVal.hpp" diff --git a/Source/Validation/SwapChainVal.h b/Source/Validation/SwapChainVal.h index 40790ebb..7d65fc60 100644 --- a/Source/Validation/SwapChainVal.h +++ b/Source/Validation/SwapChainVal.h @@ -7,8 +7,8 @@ namespace nri { struct TextureVal; struct SwapChainVal : public DeviceObjectVal { - SwapChainVal(DeviceVal& device, SwapChain* swapChain, const SwapChainDesc& swapChainDesc) - : DeviceObjectVal(device, swapChain), m_SwapChainAPI(device.GetSwapChainInterface()), m_Textures(device.GetStdAllocator()), m_SwapChainDesc(swapChainDesc) { + SwapChainVal(DeviceVal& device, SwapChain* swapChain, const SwapChainDesc& swapChainDesc) : + DeviceObjectVal(device, swapChain), m_Textures(device.GetStdAllocator()), m_SwapChainDesc(swapChainDesc) { } ~SwapChainVal(); @@ -17,14 +17,19 @@ struct SwapChainVal : public DeviceObjectVal { // NRI //================================================================================================================ void SetDebugName(const char* name); - Texture* const* GetTextures(uint32_t& textureNum) const; + Texture* const* GetTextures(uint32_t& textureNum); uint32_t AcquireNextTexture(); + Result WaitForPresent(); Result Present(); Result GetDisplayDesc(DisplayDesc& displayDesc) const; - private: - const SwapChainInterface& m_SwapChainAPI; - mutable Vector m_Textures; + Result SetLatencySleepMode(const LatencySleepMode& latencySleepMode); + Result SetLatencyMarker(LatencyMarker latencyMarker); + Result LatencySleep(); + Result GetLatencyReport(LatencyReport& latencyReport); + +private: + Vector m_Textures; SwapChainDesc m_SwapChainDesc = {}; }; diff --git a/Source/Validation/SwapChainVal.hpp b/Source/Validation/SwapChainVal.hpp index 8ad7f5e9..3345e37c 100644 --- a/Source/Validation/SwapChainVal.hpp +++ b/Source/Validation/SwapChainVal.hpp @@ -14,7 +14,11 @@ static uint32_t NRI_CALL AcquireNextSwapChainTexture(SwapChain& swapChain) { return ((SwapChainVal&)swapChain).AcquireNextTexture(); } -static Result NRI_CALL SwapChainPresent(SwapChain& swapChain) { +static Result NRI_CALL WaitForPresent(SwapChain& swapChain) { + return ((SwapChainVal&)swapChain).WaitForPresent(); +} + +static Result NRI_CALL QueuePresent(SwapChain& swapChain) { return ((SwapChainVal&)swapChain).Present(); } @@ -24,4 +28,25 @@ static Result NRI_CALL GetDisplayDesc(SwapChain& swapChain, DisplayDesc& display #pragma endregion -Define_SwapChain_PartiallyFillFunctionTable(Val) +#pragma region[ Low latency ] + +static Result SetLatencySleepMode(SwapChain& swapChain, const LatencySleepMode& latencySleepMode) { + return ((SwapChainVal&)swapChain).SetLatencySleepMode(latencySleepMode); +} + +static Result SetLatencyMarker(SwapChain& swapChain, LatencyMarker latencyMarker) { + return ((SwapChainVal&)swapChain).SetLatencyMarker(latencyMarker); +} + +static Result LatencySleep(SwapChain& swapChain) { + return ((SwapChainVal&)swapChain).LatencySleep(); +} + +static Result GetLatencyReport(const SwapChain& swapChain, LatencyReport& latencyReport) { + return ((SwapChainVal&)swapChain).GetLatencyReport(latencyReport); +} + +#pragma endregion + +Define_SwapChain_PartiallyFillFunctionTable(Val); +Define_LowLatency_SwapChain_PartiallyFillFunctionTable(Val); diff --git a/Source/Validation/TextureVal.h b/Source/Validation/TextureVal.h index 14cf8261..c8551263 100644 --- a/Source/Validation/TextureVal.h +++ b/Source/Validation/TextureVal.h @@ -39,7 +39,7 @@ struct TextureVal : public DeviceObjectVal { void SetDebugName(const char* name); void GetMemoryInfo(MemoryLocation memoryLocation, MemoryDesc& memoryDesc) const; - private: +private: MemoryVal* m_Memory = nullptr; bool m_IsBoundToMemory = false; }; diff --git a/Source/Validation/TextureVal.hpp b/Source/Validation/TextureVal.hpp index 2b83576c..570cef55 100644 --- a/Source/Validation/TextureVal.hpp +++ b/Source/Validation/TextureVal.hpp @@ -19,4 +19,4 @@ static void NRI_CALL GetTextureMemoryInfo(const Texture& texture, MemoryLocation #pragma endregion -Define_Core_Texture_PartiallyFillFunctionTable(Val) +Define_Core_Texture_PartiallyFillFunctionTable(Val);