From 8e7968c48914873b1c98b21cfb079573f9488d79 Mon Sep 17 00:00:00 2001 From: qiaojbao Date: Wed, 30 Oct 2024 15:56:39 +0800 Subject: [PATCH] Update xgl from commit 2670e6ec Update Khronos Vulkan Headers to 1.3.297 Fix dEQP-VK.binding_model.descriptor_buffer.* - test failure Fix queue family index asserts Support for Disabling Color Compression setting for driver experiments Fixes for VK_KHR_maintenance5 flags Allow vkSetDebugUtilsObjectNameExt to label an acceleration structure in RRA Fix cache masks in renderpass barriers Implement VK_MESA_image_alignment_control support and enable it for vkd3d Fix crash happened in vkDestroyInstance_SG during test exit Always return surface formats if the screen is missing Add driver support for handling RT pipelines in RGA [VKD3D] Add VkPhysicalDeviceImageCompressionControlFeaturesEXT for VK_EXT_image_compression_control Remove unreferenced app profiles Add LDSPsGroupSize tuning option [KHR_PUSH_DESCRIPTOR] Change restriction for some entry points Ubuntu24.04: Improper scaling is observed in games @1080p Resolution Fix Llama2 Vulkan version only works with Mem Carve-out size >= 16GB Fix queueFlags in Device::Create Fix GPURT descriptor table node mapping Add workaround for the corruption of '7Days To Die' Fix unpackedBufferFormat in vertex offset mode Consider Transfer write (USAGE_TRANSFER_DST) dependencies when initializing the barrier policy for resources Bump up GPURT version to 49 Add missing mesh/task support [KHR_maintenance6] Add device extensions for existing entry points Add RenderOp Trace Controller to UberTrace DevModeMgr Consolidate preColorResolve and preDsResolve syncs into one flag Fix split raytracing layer dispatches too many workgroups Fix vkGetPhysicalDeviceImageFormatProperties does not take into account VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT Implement VK_EXT_device_generated_commands support Update PAL Version in XGL 905 Fix vkGetPipelineBinaryDataKHR() not writing binary key Fix GPU hang with vkd3d Raytracing sample Bump LLPC version to 75 Implement VK_EXT_pipeline_robustness support Add a layer to work-around an app side barrier issue for 'Baldur's Gate 3' Support the extension VK_EXT_depth_clamp_control Expose the extension VK_EXT_fragment_shader_interlock --- cmake/XglCompileDefinitions.cmake | 6 - cmake/XglOptions.cmake | 1 - cmake/XglOverrides.cmake | 1 - cmake/XglPackaging.cmake | 43 +- cmake/XglVersions.cmake | 8 +- icd/CMakeLists.txt | 2 + icd/Loader/LunarG/Lnx/amd-icd.json | 4 +- icd/api/app_profile.cpp | 87 ++- icd/api/app_shader_optimizer.cpp | 5 + icd/api/appopt/baldurs_gate3_layer.cpp | 116 ++++ icd/api/appopt/baldurs_gate3_layer.h | 57 ++ icd/api/appopt/gravity_mark_layer.cpp | 119 ++++ icd/api/appopt/gravity_mark_layer.h | 57 ++ .../generic/RedDeadRedemption2/profile.json | 3 + .../llpc/generic/StrangeBrigade/profile.json | 3 + icd/api/appopt/split_raytracing_layer.cpp | 16 +- icd/api/barrier_policy.cpp | 20 +- icd/api/compiler_solution_llpc.cpp | 3 +- icd/api/devmode/devmode_mgr.h | 18 + icd/api/devmode/devmode_rgp.cpp | 106 ++- icd/api/devmode/devmode_rgp.h | 20 + icd/api/devmode/devmode_ubertrace.cpp | 156 ++++- icd/api/devmode/devmode_ubertrace.h | 47 +- icd/api/entry.cpp | 29 + icd/api/graphics_pipeline_common.cpp | 113 +++- icd/api/include/app_profile.h | 6 +- icd/api/include/compiler_solution.h | 1 + icd/api/include/graphics_pipeline_common.h | 8 + .../khronos/sdk-1.3/vulkan/vulkan_core.h | 360 ++++++++++- icd/api/include/pipeline_compiler.h | 9 +- icd/api/include/vk_cmdbuffer.h | 41 +- icd/api/include/vk_conv.h | 110 +--- icd/api/include/vk_device.h | 60 +- icd/api/include/vk_extensions.h | 7 + icd/api/include/vk_formats.h | 7 +- icd/api/include/vk_image.h | 1 + icd/api/include/vk_indirect_commands_layout.h | 65 +- icd/api/include/vk_physical_device.h | 19 + icd/api/include/vk_pipeline.h | 37 ++ icd/api/pipeline_compiler.cpp | 47 +- icd/api/raytrace/ray_tracing_device.cpp | 15 +- icd/api/raytrace/vk_ray_tracing_pipeline.cpp | 610 +++++++++++++++--- icd/api/raytrace/vk_ray_tracing_pipeline.h | 55 +- icd/api/renderpass/renderpass_builder.cpp | 20 +- icd/api/renderpass/renderpass_types.h | 8 +- icd/api/sqtt/sqtt_layer.cpp | 12 + icd/api/strings/entry_points.txt | 16 +- icd/api/strings/extensions.txt | 6 + icd/api/vk_cmdbuffer.cpp | 232 ++++++- icd/api/vk_compute_pipeline.cpp | 47 +- icd/api/vk_conv.cpp | 9 +- icd/api/vk_device.cpp | 191 +++++- icd/api/vk_dispatch.cpp | 12 + icd/api/vk_event.cpp | 3 +- icd/api/vk_formats.cpp | 16 +- icd/api/vk_framebuffer.cpp | 13 + icd/api/vk_gpa_session.cpp | 4 +- icd/api/vk_graphics_pipeline.cpp | 5 + icd/api/vk_image.cpp | 78 ++- icd/api/vk_indirect_commands_layout.cpp | 408 ++++++++++++ icd/api/vk_physical_device.cpp | 294 +++++++-- icd/api/vk_pipeline.cpp | 285 +++++++- icd/api/vk_pipeline_binary.cpp | 3 +- icd/api/vk_queue.cpp | 13 + icd/api/vk_swapchain.cpp | 27 +- icd/layers/vk_layer_switchable_graphics.cpp | 12 +- icd/res/ver.h | 4 +- icd/settings/experiments_settings_xgl.json | 2 +- icd/settings/settings.cpp | 51 +- icd/settings/settings.h | 4 +- icd/settings/settings_xgl.json | 68 +- icd/tools/generate/shaderProfileTemplate.py | 47 ++ 72 files changed, 3824 insertions(+), 564 deletions(-) create mode 100644 icd/api/appopt/baldurs_gate3_layer.cpp create mode 100644 icd/api/appopt/baldurs_gate3_layer.h create mode 100644 icd/api/appopt/gravity_mark_layer.cpp create mode 100644 icd/api/appopt/gravity_mark_layer.h create mode 100644 icd/api/appopt/shader_profiles/llpc/generic/RedDeadRedemption2/profile.json create mode 100644 icd/api/appopt/shader_profiles/llpc/generic/StrangeBrigade/profile.json diff --git a/cmake/XglCompileDefinitions.cmake b/cmake/XglCompileDefinitions.cmake index 45e6b4de..797ac5ed 100644 --- a/cmake/XglCompileDefinitions.cmake +++ b/cmake/XglCompileDefinitions.cmake @@ -106,12 +106,6 @@ macro(xgl_set_compile_definitions) endif() #endif -#if VKI_RAY_TRACING -#endif - -#if VKI_RAY_TRACING -#endif - #if VKI_RAY_TRACING #endif diff --git a/cmake/XglOptions.cmake b/cmake/XglOptions.cmake index 2cf1a891..e018fe7c 100644 --- a/cmake/XglOptions.cmake +++ b/cmake/XglOptions.cmake @@ -44,7 +44,6 @@ macro(xgl_options) option(XGL_ENABLE_LTO "Build with LTO enabled?" ON) option(XGL_ENABLE_GCOV "Build with gcov source code coverage?" OFF) - #if VKI_BUILD_GFX115 option(XGL_BUILD_GFX115 "Build vulkan for GFX115" ON) #endif diff --git a/cmake/XglOverrides.cmake b/cmake/XglOverrides.cmake index 0f1ab516..e4ee602d 100644 --- a/cmake/XglOverrides.cmake +++ b/cmake/XglOverrides.cmake @@ -88,7 +88,6 @@ endmacro() macro(xgl_overrides_pal) ### For PAL ########################################################################################################### - set(PAL_BUILD_JEMALLOC OFF CACHE BOOL "Force jemalloc off" FORCE) set(PAL_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_PAL_CLIENT_MAJOR_VERSION} CACHE STRING "${PROJECT_NAME} override." FORCE) diff --git a/cmake/XglPackaging.cmake b/cmake/XglPackaging.cmake index 12bcaf02..05a84a49 100644 --- a/cmake/XglPackaging.cmake +++ b/cmake/XglPackaging.cmake @@ -44,7 +44,11 @@ function(identifyPackageType) endif() if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) - set(CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "cmake install prefix" FORCE) + if(PACKAGE_NAME STREQUAL "vulkan-amdgpu") + set(CMAKE_INSTALL_PREFIX "/opt/amdgpu" CACHE PATH "cmake install prefix" FORCE) + else() + set(CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "cmake install prefix" FORCE) + endif() if(TARGET_ARCHITECTURE_BITS EQUAL 64) if(PACKAGE_TYPE STREQUAL "DEB") set(CMAKE_INSTALL_LIBDIR "lib/x86_64-linux-gnu" CACHE PATH "cmake install libdir" FORCE) @@ -127,11 +131,22 @@ endfunction() function(generateInstallTargets) install(FILES ${CMAKE_BINARY_DIR}/icd/amd_icd${TARGET_ARCHITECTURE_BITS}.json COMPONENT icd DESTINATION /etc/vulkan/icd.d) install(FILES ${CMAKE_BINARY_DIR}/icd/amd_icd${TARGET_ARCHITECTURE_BITS}.json COMPONENT icd DESTINATION /etc/vulkan/implicit_layer.d) - if(EXISTS ${CMAKE_SOURCE_DIR}/LICENSE.txt) - install(FILES ${CMAKE_SOURCE_DIR}/LICENSE.txt COMPONENT icd DESTINATION share/doc/${PACKAGE_NAME}) - else() - message(WARNING "LICENSE.txt is not found under ${CMAKE_SOURCE_DIR}, please put it there") + + if(PACKAGE_NAME STREQUAL "vulkan-amdgpu" + ) + if(EXISTS ${CMAKE_SOURCE_DIR}/copyright) + install(FILES ${CMAKE_SOURCE_DIR}/copyright COMPONENT icd DESTINATION share/doc/${PACKAGE_NAME}) + else() + message(WARNING "copyright is not found under ${CMAKE_SOURCE_DIR}, please put it there") + endif() + elseif(PACKAGE_NAME STREQUAL "amdvlk") + if(EXISTS ${CMAKE_SOURCE_DIR}/LICENSE.txt) + install(FILES ${CMAKE_SOURCE_DIR}/LICENSE.txt COMPONENT icd DESTINATION share/doc/${PACKAGE_NAME}) + else() + message(WARNING "LICENSE.txt is not found under ${CMAKE_SOURCE_DIR}, please put it there") + endif() endif() + if(PACKAGE_TYPE STREQUAL "DEB") if(EXISTS ${CMAKE_SOURCE_DIR}/changelog.Debian.gz) install(FILES ${CMAKE_SOURCE_DIR}/changelog.Debian.gz COMPONENT icd DESTINATION share/doc/${PACKAGE_NAME}) @@ -148,7 +163,14 @@ function(generatePackageTarget) set(CPACK_PACKAGE_NAME "${PACKAGE_NAME}") set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices (AMD)") set(CPACK_PACKAGE_CONTACT "gpudriverdevsupport@amd.com") - set(CPACK_PACKAGE_HOMEPAGE_URL "https://github.com/GPUOpen-Drivers/AMDVLK") + + if(PACKAGE_NAME STREQUAL "vulkan-amdgpu" + ) + set(CPACK_PACKAGE_HOMEPAGE_URL "http://www.amd.com") + else() + set(CPACK_PACKAGE_HOMEPAGE_URL "https://github.com/GPUOpen-Drivers/AMDVLK") + endif() + set(CPACK_PACKAGE_RELOCATABLE OFF) set(CPACK_GENERATOR "${PACKAGE_TYPE}") set(CPACK_COMPONENTS_ALL "icd") @@ -161,8 +183,13 @@ function(generatePackageTarget) endif() set(CPACK_DEBIAN_PACKAGE_PRIORITY "optional") set(CPACK_DEBIAN_PACKAGE_SECTION "libs") - set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>=2.17), libgcc1 (>= 1:3.4), libstdc++6 (>= 5.2)") - set(CPACK_DEBIAN_PACKAGE_RECOMMENDS "libssl1.1") + if(PACKAGE_NAME STREQUAL "vulkan-amdgpu") + set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>=2.17), libgcc1 (>= 1:3.4), libstdc++6 (>= 5.2), libwayland-amdgpu-client0, amdgpu-core") + set(CPACK_DEBIAN_PACKAGE_RECOMMENDS "libssl1.1") + else() + set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>=2.17), libgcc1 (>= 1:3.4), libstdc++6 (>= 5.2)") + set(CPACK_DEBIAN_PACKAGE_RECOMMENDS "libssl1.1") + endif() if(PACKAGE_RELEASE) set(CPACK_PACKAGE_VERSION "${PACKAGE_VERSION}-${PACKAGE_RELEASE}") else() diff --git a/cmake/XglVersions.cmake b/cmake/XglVersions.cmake index e0f16371..52f9a3b9 100644 --- a/cmake/XglVersions.cmake +++ b/cmake/XglVersions.cmake @@ -25,10 +25,12 @@ include_guard() +# WARNING! This file is parsed by some scripts. Do not change the formatting or the case of set statements. + # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. # It must be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -set(ICD_PAL_CLIENT_MAJOR_VERSION "892") +set(ICD_PAL_CLIENT_MAJOR_VERSION "905") # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. # It describes the interface version of the gpuopen shared module (part of PAL) that the ICD supports. @@ -37,9 +39,9 @@ set(ICD_GPUOPEN_CLIENT_MAJOR_VERSION "42") #if VKI_RAY_TRACING # This will become the value of GPURT_CLIENT_INTERFACE_MAJOR_VERSION if VKI_RAY_TRACING=1. # It describes the interface version of the GpuRT shared module that the ICD supports. -set(ICD_GPURT_CLIENT_MAJOR_VERSION "48") +set(ICD_GPURT_CLIENT_MAJOR_VERSION "49") #endif # This will become the value of LLPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_LLPC=1. # It describes the version of the interface version of LLPC that the ICD supports. -set(ICD_LLPC_CLIENT_MAJOR_VERSION "74") +set(ICD_LLPC_CLIENT_MAJOR_VERSION "75") diff --git a/icd/CMakeLists.txt b/icd/CMakeLists.txt index 00baae1d..e800d879 100644 --- a/icd/CMakeLists.txt +++ b/icd/CMakeLists.txt @@ -154,6 +154,8 @@ target_sources(xgl PRIVATE api/vk_indirect_commands_layout.cpp api/appopt/barrier_filter_layer.cpp api/appopt/strange_brigade_layer.cpp + api/appopt/baldurs_gate3_layer.cpp + api/appopt/gravity_mark_layer.cpp api/appopt/g_shader_profile.cpp api/render_state_cache.cpp api/renderpass/renderpass_builder.cpp diff --git a/icd/Loader/LunarG/Lnx/amd-icd.json b/icd/Loader/LunarG/Lnx/amd-icd.json index 20285065..9248602a 100644 --- a/icd/Loader/LunarG/Lnx/amd-icd.json +++ b/icd/Loader/LunarG/Lnx/amd-icd.json @@ -2,13 +2,13 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.295" + "api_version": "1.3.297" }, "layer": { "name": "VK_LAYER_AMD_switchable_graphics_@ISABITS@", "type": "GLOBAL", "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.295", + "api_version": "1.3.297", "implementation_version": "1", "description": "AMD switchable graphics layer", "functions": { diff --git a/icd/api/app_profile.cpp b/icd/api/app_profile.cpp index 7ab4adb7..44c1f3ef 100644 --- a/icd/api/app_profile.cpp +++ b/icd/api/app_profile.cpp @@ -136,12 +136,6 @@ constexpr AppProfilePatternEntry AppNameDota2 = "dota" }; -constexpr AppProfilePatternEntry AppNameHalfLifeAlyx = -{ - PatternAppNameLower, - "hlvr" -}; - constexpr AppProfilePatternEntry AppEngineSource2 = { PatternEngineNameLower, @@ -214,12 +208,6 @@ constexpr AppProfilePatternEntry AppNameSeriousSam4Win = "serious sam 4 - 64bit" }; -constexpr AppProfilePatternEntry AppNameRomeRemasteredLinux = -{ - PatternAppNameLower, - "rome" -}; - constexpr AppProfilePatternEntry AppNameEnscape = { PatternAppNameLower, @@ -702,12 +690,6 @@ constexpr AppProfilePatternEntry AppNameSeriousSamVr = "serious sam vr: the last hope - 64bit- vr" }; -constexpr AppProfilePatternEntry AppNameSatisfactory = -{ - PatternAppNameLower, - "factorygame" -}; - constexpr AppProfilePatternEntry AppNameQuakeEnhanced = { PatternAppNameLower, @@ -798,6 +780,24 @@ constexpr AppProfilePatternEntry AppNameHoudini = "houdini" }; +constexpr AppProfilePatternEntry AppNameGravityMark = +{ + PatternAppNameLower, + "clayapp" +}; + +constexpr AppProfilePatternEntry AppNameSevenDaysToDie = +{ + PatternAppNameLower, + "7 days to die" +}; + +constexpr AppProfilePatternEntry AppNameGgmlVulkan = +{ + PatternAppNameLower, + "ggml-vulkan" +}; + // Section END of AppProfilePatternEntry for all games // This is a table of patterns. The first matching pattern in this table will be returned. @@ -878,15 +878,6 @@ AppProfilePattern AppPatternTable[] = } }, - { - AppProfile::HalfLifeAlyx, - { - AppNameHalfLifeAlyx, - AppEngineSource2, - PatternEnd - } - }, - { AppProfile::Talos, { @@ -1048,15 +1039,6 @@ AppProfilePattern AppPatternTable[] = } }, - { - AppProfile::RomeRemastered, - { - AppNameRomeRemasteredLinux, - AppEngineFeral3D, - PatternEnd - } - }, - { AppProfile::ThreeKingdoms, { @@ -1300,6 +1282,15 @@ AppProfilePattern AppPatternTable[] = } }, + { + AppProfile::SevenDaysToDie, + { + AppNameSevenDaysToDie, + AppEngineUnity, + PatternEnd + } + }, + { AppProfile::UnityEngine, { @@ -1335,6 +1326,14 @@ AppProfilePattern AppPatternTable[] = } }, + { + AppProfile::GravityMark, + { + AppNameGravityMark, + PatternEnd + } + }, + { AppProfile::SOTTR, { @@ -1523,14 +1522,6 @@ AppProfilePattern AppPatternTable[] = } }, - { - AppProfile::Satisfactory, - { - AppNameSatisfactory, - PatternEnd - } - }, - { AppProfile::QuakeEnhanced, { @@ -1647,6 +1638,14 @@ AppProfilePattern AppPatternTable[] = } }, + { + AppProfile::GgmlVulkan, + { + AppNameGgmlVulkan, + PatternEnd + } + }, + }; static char* GetExecutableName(size_t* pLength, bool includeExtension = false); diff --git a/icd/api/app_shader_optimizer.cpp b/icd/api/app_shader_optimizer.cpp index aca59ef2..6007df63 100644 --- a/icd/api/app_shader_optimizer.cpp +++ b/icd/api/app_shader_optimizer.cpp @@ -686,6 +686,11 @@ void ShaderOptimizer::ApplyProfileToGraphicsPipelineCreateInfo( pPalCreateInfo->rsState.binningOverride = createInfo.binningOverride; } + if (createInfo.apply.ldsPsGroupSizeOverride) + { + pPalCreateInfo->ldsPsGroupSizeOverride = createInfo.ldsPsGroupSizeOverride; + } + #if PAL_ENABLE_PRINTS_ASSERTS if (m_settings.pipelineProfileDbgPrintProfileMatch) { diff --git a/icd/api/appopt/baldurs_gate3_layer.cpp b/icd/api/appopt/baldurs_gate3_layer.cpp new file mode 100644 index 00000000..460db892 --- /dev/null +++ b/icd/api/appopt/baldurs_gate3_layer.cpp @@ -0,0 +1,116 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** +*********************************************************************************************************************** +* @file baldurs_gate3_layer.cpp +* @brief Implementation Baldur's Gate 3 Layer. +*********************************************************************************************************************** +*/ + +#include "baldurs_gate3_layer.h" + +#include "include/vk_image.h" +#include "include/vk_cmdbuffer.h" +#include "include/vk_device.h" + +namespace vk +{ + +namespace entry +{ + +namespace baldurs_gate3_layer +{ + +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier2KHR( + VkCommandBuffer cmdBuffer, + const VkDependencyInfoKHR* pDependencyInfo) +{ + CmdBuffer* pCmdBuffer = ApiCmdBuffer::ObjectFromHandle(cmdBuffer); + OptLayer* pLayer = pCmdBuffer->VkDevice()->GetAppOptLayer(); + + bool needsBarrierOverride = false; + + VkDependencyInfoKHR dependencyInfo = *pDependencyInfo; + VkImageMemoryBarrier2KHR imageBarriers[3]; + + const VkDependencyInfoKHR* pOverrideDependencyInfo = &dependencyInfo; + const VkImageMemoryBarrier2KHR* pImageMemoryBarriers = &imageBarriers[0]; + + if ((pDependencyInfo->memoryBarrierCount == 0) && + (pDependencyInfo->bufferMemoryBarrierCount == 0) && + (pDependencyInfo->imageMemoryBarrierCount == 3) && + (pDependencyInfo->pImageMemoryBarriers != nullptr) && + (pDependencyInfo->pImageMemoryBarriers[2].srcStageMask == VK_PIPELINE_STAGE_2_COPY_BIT_KHR) && + (pDependencyInfo->pImageMemoryBarriers[2].dstStageMask == VK_PIPELINE_STAGE_2_COPY_BIT_KHR) && + (pDependencyInfo->pImageMemoryBarriers[2].srcAccessMask == VK_ACCESS_2_TRANSFER_READ_BIT_KHR) && + (pDependencyInfo->pImageMemoryBarriers[2].dstAccessMask == VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR) && + (pDependencyInfo->pImageMemoryBarriers[2].oldLayout == VK_IMAGE_LAYOUT_UNDEFINED) && + (pDependencyInfo->pImageMemoryBarriers[2].newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) && + (Image::ObjectFromHandle + (pDependencyInfo->pImageMemoryBarriers[2].image)->GetFormat() == VK_FORMAT_B10G11R11_UFLOAT_PACK32) && + (Image::ObjectFromHandle + (pDependencyInfo->pImageMemoryBarriers[2].image)->GetImageSamples() == VK_SAMPLE_COUNT_1_BIT)) + { + for (uint32_t i = 0; i < 3; i++) + { + imageBarriers[i] = dependencyInfo.pImageMemoryBarriers[i]; + } + + imageBarriers[2].srcStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR; + + dependencyInfo.pImageMemoryBarriers = pImageMemoryBarriers; + + needsBarrierOverride = true; + } + + // Pass the barrier call on to the Vulkan driver + pLayer->GetNextLayer()->GetEntryPoints().vkCmdPipelineBarrier2KHR( + cmdBuffer, + (needsBarrierOverride) ? pOverrideDependencyInfo : pDependencyInfo); +} + +} // namespace baldurs_gate3_layer + +} // namespace entry + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#define BALDURS_GATE3_OVERRIDE_ALIAS(entry_name, func_name) \ + pDispatchTable->OverrideEntryPoints()->entry_name = vk::entry::baldurs_gate3_layer::func_name + +#define BALDURS_GATE3_OVERRIDE_ENTRY(entry_name) BALDURS_GATE3_OVERRIDE_ALIAS(entry_name, entry_name) + +// ===================================================================================================================== +void BaldursGate3Layer::OverrideDispatchTable( + DispatchTable* pDispatchTable) +{ + // Save current device dispatch table to use as the next layer. + m_nextLayer = *pDispatchTable; + + BALDURS_GATE3_OVERRIDE_ENTRY(vkCmdPipelineBarrier2KHR); +} + +} // namespace vk diff --git a/icd/api/appopt/baldurs_gate3_layer.h b/icd/api/appopt/baldurs_gate3_layer.h new file mode 100644 index 00000000..474b8bed --- /dev/null +++ b/icd/api/appopt/baldurs_gate3_layer.h @@ -0,0 +1,57 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** +*********************************************************************************************************************** +* @file baldurs_gate3_layer.h +* @brief Contains shadowed entry points related to baldur's gate 3. +*********************************************************************************************************************** +*/ + +#ifndef __BALDURS_GATE3_LAYER_H__ +#define __BALDURS_GATE3_LAYER_H__ + +#pragma once + +#include "opt_layer.h" + +namespace vk +{ +// ===================================================================================================================== +// Class for the Baldur's Gate 3 Layer to simplify calls to the overriden dispatch table from the layer's entrypoints +class BaldursGate3Layer final : public OptLayer +{ +public: + BaldursGate3Layer() {} + virtual ~BaldursGate3Layer() {} + + virtual void OverrideDispatchTable(DispatchTable* pDispatchTable) override; + +private: + PAL_DISALLOW_COPY_AND_ASSIGN(BaldursGate3Layer); +}; + +}; // namespace vk + +#endif /* __BALDURS_GATE3_LAYER_H__ */ diff --git a/icd/api/appopt/gravity_mark_layer.cpp b/icd/api/appopt/gravity_mark_layer.cpp new file mode 100644 index 00000000..8aeea3d6 --- /dev/null +++ b/icd/api/appopt/gravity_mark_layer.cpp @@ -0,0 +1,119 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** +*********************************************************************************************************************** +* @file gravity_mark_layer.cpp +* @brief Implementation Gravity Mark Layer. +*********************************************************************************************************************** +*/ + +#include "gravity_mark_layer.h" + +#include "include/vk_image.h" +#include "include/vk_cmdbuffer.h" +#include "include/vk_device.h" + +namespace vk +{ + +namespace entry +{ + +namespace gravity_mark_layer +{ + +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( + VkCommandBuffer cmdBuffer, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask, + VkDependencyFlags dependencyFlags, + uint32_t memoryBarrierCount, + const VkMemoryBarrier* pMemoryBarriers, + uint32_t bufferMemoryBarrierCount, + const VkBufferMemoryBarrier* pBufferMemoryBarriers, + uint32_t imageMemoryBarrierCount, + const VkImageMemoryBarrier* pImageMemoryBarriers) +{ + CmdBuffer* pCmdBuffer = ApiCmdBuffer::ObjectFromHandle(cmdBuffer); + OptLayer* pLayer = pCmdBuffer->VkDevice()->GetAppOptLayer(); + + // - corruption caused by incorrect barrier between CmdDispatch and CmdDrawIndexed calls which access the same + // R16G16B16A16_SFLOAT image + // - existing barrier from app specifies srcStageMask = TOP_OF_PIPE which is equivalent to VK_PIPELINE_STAGE_2_NONE + // - changing this to BOTTOM_OF_PIPE will correctly sync between the dispatch and draw calls, resolving corruption + + if ((imageMemoryBarrierCount == 1) && + (srcStageMask == VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT) && + (dstStageMask == (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT + | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT + | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT + | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) && + (pImageMemoryBarriers != nullptr) && + (Image::ObjectFromHandle(pImageMemoryBarriers[0].image)->GetFormat() == VK_FORMAT_R16G16B16A16_SFLOAT) && + (Image::ObjectFromHandle(pImageMemoryBarriers[0].image)->GetImageSamples() == VK_SAMPLE_COUNT_1_BIT) && + (pImageMemoryBarriers[0].srcAccessMask == VK_ACCESS_NONE) && + (pImageMemoryBarriers[0].dstAccessMask == VK_ACCESS_SHADER_READ_BIT) && + (pImageMemoryBarriers[0].oldLayout == VK_IMAGE_LAYOUT_GENERAL) && + (pImageMemoryBarriers[0].newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)) + { + srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + // Pass the barrier call on to the Vulkan driver + pLayer->GetNextLayer()->GetEntryPoints().vkCmdPipelineBarrier( + cmdBuffer, + srcStageMask, + dstStageMask, + dependencyFlags, + memoryBarrierCount, + pMemoryBarriers, + bufferMemoryBarrierCount, + pBufferMemoryBarriers, + imageMemoryBarrierCount, + pImageMemoryBarriers); +} + +} // namespace gravity_mark_layer + +} // namespace entry + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#define GRAVITY_MARK_OVERRIDE_ALIAS(entry_name, func_name) \ + pDispatchTable->OverrideEntryPoints()->entry_name = vk::entry::gravity_mark_layer::func_name + +#define GRAVITY_MARK_OVERRIDE_ENTRY(entry_name) GRAVITY_MARK_OVERRIDE_ALIAS(entry_name, entry_name) + +// ===================================================================================================================== +void GravityMarkLayer::OverrideDispatchTable( + DispatchTable* pDispatchTable) +{ + // Save current device dispatch table to use as the next layer. + m_nextLayer = *pDispatchTable; + + GRAVITY_MARK_OVERRIDE_ENTRY(vkCmdPipelineBarrier); +} + +} // namespace vk diff --git a/icd/api/appopt/gravity_mark_layer.h b/icd/api/appopt/gravity_mark_layer.h new file mode 100644 index 00000000..725b0654 --- /dev/null +++ b/icd/api/appopt/gravity_mark_layer.h @@ -0,0 +1,57 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** +*********************************************************************************************************************** +* @file gravity_mark_layer.h +* @brief Contains shadowed entry points related to Gravity Mark. +*********************************************************************************************************************** +*/ + +#ifndef __GRAVITY_MARK_LAYER_H__ +#define __GRAVITY_MARK_LAYER_H__ + +#pragma once + +#include "opt_layer.h" + +namespace vk +{ +// ===================================================================================================================== +// Class for the Gravity Mark Layer to simplify calls to the overriden dispatch table from the layer's entrypoints +class GravityMarkLayer final : public OptLayer +{ +public: + GravityMarkLayer() {} + virtual ~GravityMarkLayer() {} + + virtual void OverrideDispatchTable(DispatchTable* pDispatchTable) override; + +private: + PAL_DISALLOW_COPY_AND_ASSIGN(GravityMarkLayer); +}; + +}; // namespace vk + +#endif /* __GRAVITY_MARK_LAYER_H__ */ diff --git a/icd/api/appopt/shader_profiles/llpc/generic/RedDeadRedemption2/profile.json b/icd/api/appopt/shader_profiles/llpc/generic/RedDeadRedemption2/profile.json new file mode 100644 index 00000000..c43cb320 --- /dev/null +++ b/icd/api/appopt/shader_profiles/llpc/generic/RedDeadRedemption2/profile.json @@ -0,0 +1,3 @@ +{ + "entries": [] +} \ No newline at end of file diff --git a/icd/api/appopt/shader_profiles/llpc/generic/StrangeBrigade/profile.json b/icd/api/appopt/shader_profiles/llpc/generic/StrangeBrigade/profile.json new file mode 100644 index 00000000..c43cb320 --- /dev/null +++ b/icd/api/appopt/shader_profiles/llpc/generic/StrangeBrigade/profile.json @@ -0,0 +1,3 @@ +{ + "entries": [] +} \ No newline at end of file diff --git a/icd/api/appopt/split_raytracing_layer.cpp b/icd/api/appopt/split_raytracing_layer.cpp index d3355922..4324417d 100644 --- a/icd/api/appopt/split_raytracing_layer.cpp +++ b/icd/api/appopt/split_raytracing_layer.cpp @@ -69,6 +69,7 @@ void SplitRaytracingLayer::TraceRaysDispatchPerDevice( }; const Pal::DispatchDims blockDispatchSize = pPipeline->GetDispatchSize(blockSize); + const Pal::DispatchDims traceDispatchSize = pPipeline->GetDispatchSize(traceSize); // Lambda function used to help dispatch. auto dispatch = [pCmdBuffer, deviceIdx](Pal::DispatchDims offset, Pal::DispatchDims size) @@ -108,16 +109,19 @@ void SplitRaytracingLayer::TraceRaysDispatchPerDevice( }; // Split Z axis. - split(traceSize.z, blockDispatchSize.z, - [split, traceSize, blockDispatchSize, &dispatch](uint32_t offsetZ, uint32_t sizeZ) + split(traceDispatchSize.z, blockDispatchSize.z, + [split, traceDispatchSize, blockDispatchSize, &dispatch] + (uint32_t offsetZ, uint32_t sizeZ) { // Split Y axis. - split(traceSize.y, blockDispatchSize.y, - [split, traceSize, blockDispatchSize, &dispatch, offsetZ, sizeZ](uint32_t offsetY, uint32_t sizeY) + split(traceDispatchSize.y, blockDispatchSize.y, + [split, traceDispatchSize, blockDispatchSize, &dispatch, offsetZ, sizeZ] + (uint32_t offsetY, uint32_t sizeY) { //Split X axis. - split(traceSize.x, blockDispatchSize.x, - [&dispatch, offsetZ, sizeZ, offsetY, sizeY](uint32_t offsetX, uint32_t sizeX) + split(traceDispatchSize.x, blockDispatchSize.x, + [&dispatch, offsetZ, sizeZ, offsetY, sizeY] + (uint32_t offsetX, uint32_t sizeX) { Pal::DispatchDims offset = { diff --git a/icd/api/barrier_policy.cpp b/icd/api/barrier_policy.cpp index 6027bc96..3e6519b1 100644 --- a/icd/api/barrier_policy.cpp +++ b/icd/api/barrier_policy.cpp @@ -1015,6 +1015,7 @@ void ImageBarrierPolicy::InitImageCachePolicy( if (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) { supportedOutputCacheMask |= Pal::CoherCopyDst | Pal::CoherResolveDst | Pal::CoherClear; + supportedInputCacheMask |= Pal::CoherCopyDst | Pal::CoherResolveDst | Pal::CoherClear; } constexpr VkImageUsageFlags shaderReadFlags = VK_IMAGE_USAGE_SAMPLED_BIT @@ -1163,9 +1164,12 @@ void ImageBarrierPolicy::ApplyImageMemoryBarrier( bool skipMatchingLayouts) const { // Determine effective queue family indices. - uint32_t srcQueueFamilyIndex = (barrier.srcQueueFamilyIndex == VK_QUEUE_FAMILY_IGNORED) + // Note that matching source and destination families also means don't transfer queue family ownership. + uint32_t srcQueueFamilyIndex = ((barrier.srcQueueFamilyIndex == barrier.dstQueueFamilyIndex) || + (barrier.srcQueueFamilyIndex == VK_QUEUE_FAMILY_IGNORED)) ? currentQueueFamilyIndex : barrier.srcQueueFamilyIndex; - uint32_t dstQueueFamilyIndex = (barrier.dstQueueFamilyIndex == VK_QUEUE_FAMILY_IGNORED) + uint32_t dstQueueFamilyIndex = ((barrier.dstQueueFamilyIndex == barrier.srcQueueFamilyIndex) || + (barrier.dstQueueFamilyIndex == VK_QUEUE_FAMILY_IGNORED)) ? currentQueueFamilyIndex : barrier.dstQueueFamilyIndex; // Either the source or the destination queue family has to match the current queue family. @@ -1301,6 +1305,7 @@ void BufferBarrierPolicy::InitBufferCachePolicy( // Also need Pal::CoherShaderWrite here as vkCmdCopyQueryPoolResults uses a compute shader defined in the // Vulkan API layer when used with timestamp queries. supportedOutputCacheMask |= Pal::CoherCopyDst | Pal::CoherShaderWrite; + supportedInputCacheMask |= Pal::CoherCopyDst | Pal::CoherShaderWrite; // Buffer markers fall under the same PAL coherency rules as timestamp writes if (pDevice->IsExtensionEnabled(DeviceExtensions::AMD_BUFFER_MARKER)) @@ -1370,10 +1375,13 @@ void BufferBarrierPolicy::ApplyBufferMemoryBarrier( Pal::BarrierTransition* pPalBarrier) const { // Determine effective queue family indices. - uint32_t srcQueueFamilyIndex = (barrier.srcQueueFamilyIndex == VK_QUEUE_FAMILY_IGNORED) - ? currentQueueFamilyIndex : barrier.srcQueueFamilyIndex; - uint32_t dstQueueFamilyIndex = (barrier.dstQueueFamilyIndex == VK_QUEUE_FAMILY_IGNORED) - ? currentQueueFamilyIndex : barrier.dstQueueFamilyIndex; + // Note that matching source and destination families also means don't transfer queue family ownership. + uint32_t srcQueueFamilyIndex = ((barrier.srcQueueFamilyIndex == barrier.dstQueueFamilyIndex) || + (barrier.srcQueueFamilyIndex == VK_QUEUE_FAMILY_IGNORED)) + ? currentQueueFamilyIndex : barrier.srcQueueFamilyIndex; + uint32_t dstQueueFamilyIndex = ((barrier.dstQueueFamilyIndex == barrier.srcQueueFamilyIndex) || + (barrier.dstQueueFamilyIndex == VK_QUEUE_FAMILY_IGNORED)) + ? currentQueueFamilyIndex : barrier.dstQueueFamilyIndex; // Either the source or the destination queue family has to match the current queue family. VK_ASSERT((srcQueueFamilyIndex == currentQueueFamilyIndex) || (dstQueueFamilyIndex == currentQueueFamilyIndex)); diff --git a/icd/api/compiler_solution_llpc.cpp b/icd/api/compiler_solution_llpc.cpp index a5707b60..28335f7b 100644 --- a/icd/api/compiler_solution_llpc.cpp +++ b/icd/api/compiler_solution_llpc.cpp @@ -150,7 +150,7 @@ VkResult CompilerSolutionLlpc::BuildShaderModule( moduleInfo.shaderBin = shaderBinary; auto pPipelineCompiler = m_pPhysicalDevice->GetCompiler(); - pPipelineCompiler->ApplyPipelineOptions(pDevice, 0, &moduleInfo.options.pipelineOptions + pPipelineCompiler->ApplyPipelineOptions(pDevice, 0, &moduleInfo.options.pipelineOptions, nullptr ); #if VKI_RAY_TRACING @@ -954,6 +954,7 @@ VkResult CompilerSolutionLlpc::CreateRayTracingPipelineBinary( pPipelineBinary->librarySummary = pipelineOut.librarySummary; pPipelineBinary->isCps = pipelineOut.isCps; + pPipelineBinary->hasKernelEntry = pipelineOut.hasKernelEntry; } *pCompileTime = Util::GetPerfCpuTime() - startTime; diff --git a/icd/api/devmode/devmode_mgr.h b/icd/api/devmode/devmode_mgr.h index c806ea9f..634533dc 100644 --- a/icd/api/devmode/devmode_mgr.h +++ b/icd/api/devmode/devmode_mgr.h @@ -56,6 +56,14 @@ class PipelineBinaryCache; namespace vk { +struct AccelStructUserMarkerString +{ + static constexpr size_t MaxAccelStructLabelSize = 64; + + uint32_t length; + char string[MaxAccelStructLabelSize]; +}; + // ===================================================================================================================== // This class provides functionality to interact with the GPU Open Developer Mode message passing service and the rest // of the driver. @@ -100,6 +108,12 @@ class IDevMode virtual bool IsTracingEnabled() const = 0; virtual bool IsCrashAnalysisEnabled() const = 0; + virtual void RecordRenderOps( + uint32_t deviceIdx, + Queue* pQueue, + uint32_t drawCallCount, + uint32_t dispatchCallCount) = 0; + virtual Pal::Result TimedQueueSubmit( uint32_t deviceIdx, Queue* pQueue, @@ -143,6 +157,10 @@ class IDevMode uint32 markerStringDataSize, const char* pMarkerStringData) {} + virtual void LabelAccelStruct( + uint64_t deviceAddress, + const char* pString) {} + #endif }; diff --git a/icd/api/devmode/devmode_rgp.cpp b/icd/api/devmode/devmode_rgp.cpp index 8693d306..bab86493 100644 --- a/icd/api/devmode/devmode_rgp.cpp +++ b/icd/api/devmode/devmode_rgp.cpp @@ -46,10 +46,11 @@ // PAL headers #include "pal.h" +#include "palAutoBuffer.h" #include "palCmdAllocator.h" #include "palFence.h" #include "palQueueSemaphore.h" -#include "palHashBaseImpl.h" +#include "palHashMapImpl.h" #include "palListImpl.h" #include "palVectorImpl.h" #include "palStringTableTraceSource.h" @@ -71,6 +72,66 @@ namespace vk { +class DevModeRgpStringTableTraceSource : public GpuUtil::StringTableTraceSource +{ +public: + DevModeRgpStringTableTraceSource(Pal::IPlatform* pPlatform, DevModeRgp* pDevMode) + : StringTableTraceSource(pPlatform), m_pDevMode(pDevMode) {} + virtual ~DevModeRgpStringTableTraceSource() {} + + virtual void OnTraceFinished() override + { + const auto& accelStructNames = m_pDevMode->GetAccelStructUserMarkerTable(); + const uint32_t numStrings = accelStructNames.GetNumEntries(); + + if (numStrings > 0) + { + // Calculate the size of the string data for accelStruct names + uint32_t stringDataSizeInBytes = 0; + for (auto it = accelStructNames.Begin(); it.Get() != nullptr; it.Next()) + { + stringDataSizeInBytes += it.Get()->value.length; + } + + const uint32_t baseOffset = sizeof(uint32_t) * numStrings; + AutoBuffer stringOffsets( + numStrings, m_pPlatform); + Vector stringData(m_pPlatform); + constexpr uint32_t ExtraBytesPerString = 18; // To host "RRA_RA:
:" + // Reserve more space because we are storing strings in the format of "RRA_AS:
: