From 2cb5558b94c5dc839e093cb439057a1802426c8e Mon Sep 17 00:00:00 2001 From: jaxl Date: Mon, 23 Sep 2019 11:08:06 +0800 Subject: [PATCH] Update xgl from commit: f0e4f00 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Modify the NGG culling settings to be specified on a pipeline type basis instead of globally, * Enable VK_AMD_device_coherent_memory extension * VK_EXT_calibrated_timestamps: Enhance error handling when an invalid time domain is specified * [mGPU] vkEnumeratePhysicalDevices will always rank Navi after Vega/Polaris * Implement pipeline elf cache * Expose VK_EXT_line_rasterization extension * Enable VK_EXT_calibrated_timestamps extension * Tune shader performance for Serious Sam Fusion 2017 * Tune shader performance for DawnOfWar3 * Implement VK_KHR_pipeline_executable_properties * Fix Memory Leak in VK_Semaphore * Implement shader module async compile * App detect Elite Dangerous to avoid corruption * Bump up LLPC version to enable interface 32 “Add ShdaderModuleOptions in ShaderModuleBuildInfo” * Update PAL Interface in Vulkan to 527 * Update Vulkan headers to 1.1.121 * Build cwpack from external third_party/cwpack path * Fix crash when calling vk_EXT_debug_utils extension when using DevDriver * Add lots of missing enabled feature verification for device create --- CMakeLists.txt | 8 +- icd/CMakeLists.txt | 28 +- icd/Loader/LunarG/Lnx/amd-icd.json | 2 +- icd/api/app_profile.cpp | 21 + icd/api/app_shader_optimizer.cpp | 122 ++ icd/api/appopt/async_layer.cpp | 199 +++ icd/api/appopt/async_layer.h | 99 ++ icd/api/appopt/async_shader_module.cpp | 160 +++ icd/api/appopt/async_shader_module.h | 80 ++ icd/api/appopt/async_task_thread.h | 151 +++ icd/api/barrier_policy.cpp | 2 +- icd/api/compiler_solution_llpc.cpp | 13 +- icd/api/devmode/devmode_mgr.cpp | 301 ++++- icd/api/devmode/devmode_mgr.h | 23 +- icd/api/include/app_profile.h | 1 + icd/api/include/compiler_solution.h | 3 + icd/api/include/compiler_solution_llpc.h | 1 + .../devext/vk_amd_device_coherent_memory.h | 56 - .../include/khronos/sdk-1.1/vulkan_android.h | 9 +- icd/api/include/khronos/sdk-1.1/vulkan_core.h | 77 +- .../include/khronos/sdk-1.1/vulkan_fuchsia.h | 9 +- icd/api/include/khronos/sdk-1.1/vulkan_ggp.h | 9 +- icd/api/include/khronos/sdk-1.1/vulkan_ios.h | 9 +- .../include/khronos/sdk-1.1/vulkan_macos.h | 9 +- .../include/khronos/sdk-1.1/vulkan_metal.h | 9 +- icd/api/include/khronos/sdk-1.1/vulkan_vi.h | 9 +- .../include/khronos/sdk-1.1/vulkan_wayland.h | 9 +- .../include/khronos/sdk-1.1/vulkan_win32.h | 13 +- icd/api/include/khronos/sdk-1.1/vulkan_xcb.h | 9 +- icd/api/include/khronos/sdk-1.1/vulkan_xlib.h | 9 +- .../khronos/sdk-1.1/vulkan_xlib_xrandr.h | 9 +- icd/api/include/khronos/vulkan.h | 3 +- icd/api/include/pipeline_binary_cache.h | 196 +++ icd/api/include/pipeline_compiler.h | 38 +- icd/api/include/render_state_cache.h | 8 + icd/api/include/vk_cmdbuffer.h | 10 + icd/api/include/vk_device.h | 11 + icd/api/include/vk_dispatch.h | 3 +- icd/api/include/vk_extensions.h | 2 + icd/api/include/vk_graphics_pipeline.h | 3 + icd/api/include/vk_memory.h | 12 +- icd/api/include/vk_physical_device.h | 129 +- icd/api/include/vk_pipeline.h | 3 +- icd/api/include/vk_pipeline_cache.h | 5 +- icd/api/include/vk_semaphore.h | 85 +- icd/api/include/vk_shader.h | 5 +- icd/api/pipeline_binary_cache.cpp | 1125 +++++++++++++++++ icd/api/pipeline_compiler.cpp | 414 +++++- icd/api/render_state_cache.cpp | 30 + icd/api/renderpass/renderpass_builder.cpp | 56 +- icd/api/renderpass/renderpass_builder.h | 8 +- icd/api/renderpass/renderpass_logger.cpp | 33 +- icd/api/renderpass/renderpass_types.h | 9 +- icd/api/sqtt/sqtt_layer.cpp | 6 +- icd/api/strings/base_entry_points.txt | 3 + icd/api/strings/base_extensions.txt | 1 + icd/api/vk_cmdbuffer.cpp | 134 +- icd/api/vk_compute_pipeline.cpp | 32 + icd/api/vk_device.cpp | 145 ++- icd/api/vk_dispatch.cpp | 3 + icd/api/vk_graphics_pipeline.cpp | 91 +- icd/api/vk_image.cpp | 23 +- icd/api/vk_memory.cpp | 10 +- icd/api/vk_physical_device.cpp | 736 ++++++++--- icd/api/vk_physical_device_manager.cpp | 7 +- icd/api/vk_pipeline.cpp | 291 +++-- icd/api/vk_pipeline_cache.cpp | 28 +- icd/api/vk_queue.cpp | 20 +- icd/api/vk_render_pass.cpp | 72 +- icd/api/vk_semaphore.cpp | 258 ++-- icd/api/vk_shader.cpp | 5 +- icd/api/vk_swapchain.cpp | 32 +- icd/make/importdefs | 4 +- icd/res/ver.h | 2 +- icd/settings/settings.cpp | 25 +- icd/settings/settings_xgl.json | 204 ++- 76 files changed, 4859 insertions(+), 920 deletions(-) create mode 100644 icd/api/appopt/async_layer.cpp create mode 100644 icd/api/appopt/async_layer.h create mode 100644 icd/api/appopt/async_shader_module.cpp create mode 100644 icd/api/appopt/async_shader_module.h create mode 100644 icd/api/appopt/async_task_thread.h delete mode 100755 icd/api/include/khronos/devext/vk_amd_device_coherent_memory.h create mode 100644 icd/api/include/pipeline_binary_cache.h create mode 100644 icd/api/pipeline_binary_cache.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index d94ecbd9..737df5e6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,7 +49,6 @@ if(UNIX) endif() ### Cached Project Options ############################################################################################# -option(VK_INTERNAL_DEVELOPER "Build Internal Developer Build?" OFF) option(ICD_BUILD_LLPC "Build LLPC?" ON) option(XGL_BUILD_VEGA20 "Build open source vulkan for Vega20?" ON) @@ -183,6 +182,13 @@ if(EXISTS ${XGL_METROHASH_PATH} AND NOT TARGET metrohash) add_subdirectory(${XGL_METROHASH_PATH} ${PROJECT_BINARY_DIR}/metrohash) endif() +### cwpack ############################################################# +set(XGL_CWPACK_PATH ${PROJECT_SOURCE_DIR}/../third_party/cwpack CACHE PATH "The path of cwpack.") + +if(EXISTS ${XGL_CWPACK_PATH} AND NOT TARGET cwpack) + add_subdirectory(${XGL_CWPACK_PATH} ${PROJECT_BINARY_DIR}/cwpack) +endif() + ### LLPC ############################################################### set(XGL_ICD_PATH ${PROJECT_SOURCE_DIR}/icd CACHE PATH "The path of xgl, it is read-only.") set(XGL_LLPC_PATH ${PROJECT_SOURCE_DIR}/../llpc CACHE PATH "Specify the path to the LLPC.") diff --git a/icd/CMakeLists.txt b/icd/CMakeLists.txt index 31df07a2..60126c13 100644 --- a/icd/CMakeLists.txt +++ b/icd/CMakeLists.txt @@ -85,9 +85,6 @@ target_compile_definitions(xgl PRIVATE ${TARGET_ARCHITECTURE_ENDIANESS}ENDIAN_CP target_compile_definitions(xgl PRIVATE PAL_BUILD_GFX9=1) # Internal developer build -if(VK_INTERNAL_DEVELOPER) - target_compile_definitions(xgl PRIVATE VK_INTERNAL_DEVELOPER) -endif() if(ICD_BUILD_LLPC) target_compile_definitions(xgl PRIVATE ICD_BUILD_LLPC) @@ -189,6 +186,7 @@ target_sources(xgl PRIVATE api/gpu_event_mgr.cpp api/internal_mem_mgr.cpp api/pipeline_compiler.cpp + api/pipeline_binary_cache.cpp api/shader_cache.cpp api/stencil_ops_combiner.cpp api/vert_buf_binding_mgr.cpp @@ -236,6 +234,8 @@ target_sources(xgl PRIVATE api/vk_descriptor_update_template.cpp api/appopt/barrier_filter_layer.cpp api/appopt/strange_brigade_layer.cpp + api/appopt/async_layer.cpp + api/appopt/async_shader_module.cpp api/render_state_cache.cpp api/renderpass/renderpass_builder.cpp api/renderpass/renderpass_logger.cpp @@ -247,6 +247,21 @@ if(XGL_BUILD_GFX10) target_sources(xgl PRIVATE api/appopt/wolfenstein2_layer.cpp) endif() +# vk_physical_device.cpp uses the __DATE__ and __TIME__ macros to generate a pipelineCacheUUID. +# The following rule forces vk_physical_device.cpp to be re-compiled on every build, so that +# an up-to-date time/date is always used regardless of which files were touched since the last build. +add_custom_command( + OUTPUT "vk_physical_device.g" + COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_SOURCE_DIR}/api/vk_physical_device.cpp +) + +add_custom_target( + RebuildVkPhysicalDevice + DEPENDS "vk_physical_device.g" +) + +add_dependencies(xgl RebuildVkPhysicalDevice) + ### ICD Auto-generated String Files #################################################################################### set(ICD_STRING_DIR ${CMAKE_CURRENT_SOURCE_DIR}/api/strings) # ICD settings code generation main script @@ -401,10 +416,15 @@ if (UNIX) else() target_link_libraries(xgl PUBLIC -Wl,--whole-archive ${PROJECT_BINARY_DIR}/pal/metrohash/libmetrohash.a -Wl,--no-whole-archive) endif() + target_link_libraries(xgl PUBLIC -Wl,--whole-archive ${PROJECT_BINARY_DIR}/pal/gpuopen/core/libddCore.a -Wl,--no-whole-archive) target_link_libraries(xgl PUBLIC -Wl,--whole-archive ${PROJECT_BINARY_DIR}/pal/gpuopen/libgpuopen.a -Wl,--no-whole-archive) target_link_libraries(xgl PUBLIC -Wl,--whole-archive ${PROJECT_BINARY_DIR}/pal/vam/libvam.a -Wl,--no-whole-archive) target_link_libraries(xgl PUBLIC -Wl,--whole-archive ${PROJECT_BINARY_DIR}/pal/addrlib/libaddrlib.a -Wl,--no-whole-archive) - target_link_libraries(xgl PUBLIC -Wl,--whole-archive ${PROJECT_BINARY_DIR}/pal/cwpack/libcwpack.a -Wl,--no-whole-archive) + if(EXISTS ${XGL_CWPACK_PATH}) + target_link_libraries(xgl PUBLIC -Wl,--whole-archive ${PROJECT_BINARY_DIR}/cwpack/libcwpack.a -Wl,--no-whole-archive) + else() + target_link_libraries(xgl PUBLIC -Wl,--whole-archive ${PROJECT_BINARY_DIR}/pal/cwpack/libcwpack.a -Wl,--no-whole-archive) + endif() endif() endif() diff --git a/icd/Loader/LunarG/Lnx/amd-icd.json b/icd/Loader/LunarG/Lnx/amd-icd.json index 392071bc..01a797bd 100644 --- a/icd/Loader/LunarG/Lnx/amd-icd.json +++ b/icd/Loader/LunarG/Lnx/amd-icd.json @@ -1,7 +1,7 @@ { "file_format_version" : "1.0.0", "ICD" : { - "api_version" : "1.1.119" + "api_version" : "1.1.121" } } diff --git a/icd/api/app_profile.cpp b/icd/api/app_profile.cpp index 611c8730..15979c90 100644 --- a/icd/api/app_profile.cpp +++ b/icd/api/app_profile.cpp @@ -131,6 +131,12 @@ constexpr AppProfilePatternEntry AppEngineSource2 = "source2" }; +constexpr AppProfilePatternEntry AppEngineDXVK = +{ + PatternEngineNameLower, + "dxvk" +}; + constexpr AppProfilePatternEntry AppNameTalosWin32Bit = { PatternAppNameLower, @@ -275,6 +281,12 @@ constexpr AppProfilePatternEntry AppNameDiRT4 = "dirt4" }; +constexpr AppProfilePatternEntry AppNameEliteDangerous = +{ + PatternAppNameLower, + "elitedangerous64.exe" +}; + constexpr AppProfilePatternEntry PatternEnd = {}; // This is a table of patterns. The first matching pattern in this table will be returned. @@ -484,6 +496,15 @@ AppProfilePattern AppPatternTable[] = } }, + { + AppProfile::DxvkEliteDangerous, + { + AppNameEliteDangerous, + AppEngineDXVK, + PatternEnd + } + }, + { AppProfile::Feral3DEngine, { diff --git a/icd/api/app_shader_optimizer.cpp b/icd/api/app_shader_optimizer.cpp index f390dfc9..3a023193 100644 --- a/icd/api/app_shader_optimizer.cpp +++ b/icd/api/app_shader_optimizer.cpp @@ -845,6 +845,99 @@ void ShaderOptimizer::BuildAppProfileLlpc() m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xA3EB7292C77A0365; m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true; } + else if (appProfile == AppProfile::SeriousSamFusion) + { + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0xC79A37CD260277EFE5CA053E0978210F, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xE5CA053E0978210F; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xC79A37CD260277EF; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x8AF14CFF0496E80BD73AAFA65ED26E2C, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xD73AAFA65ED26E2C; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x8AF14CFF0496E80B; + m_appProfile.entries[i].action.createInfo.apply.binningOverride = true; + m_appProfile.entries[i].action.createInfo.binningOverride = Pal::BinningOverride::Enable; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x70379E5982D0D369FBF50B9F866B1DAA, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xFBF50B9F866B1DAA; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x70379E5982D0D369; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.apply.disableLoopUnrolls = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x620A4E559EB52DED870DB091946A7585, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x870DB091946A7585; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x620A4E559EB52DED; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.apply.disableLoopUnrolls = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0xB60900B3E1256DDFC7A889DBAC76F591, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xC7A889DBAC76F591; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xB60900B3E1256DDF; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.apply.disableLoopUnrolls = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x435A117D4C9A824B4E7F7BFEB93755B6, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x4E7F7BFEB93755B6; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x435A117D4C9A824B; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0xD22A4FE1A6B61288879B2B5C5F578EB0, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x879B2B5C5F578EB0; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xD22A4FE1A6B61288; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x9E4C92D858A5577856901799F5CBB608, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x56901799F5CBB608; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x9E4C92D858A55778; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0xE394E60E5EC992FD3688A97277E808F7, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x3688A97277E808F7; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xE394E60E5EC992FD; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0xC49272618F2AC58C8E0B62AA62452B75, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x8E0B62AA62452B75; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xC49272618F2AC58C; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.apply.disableLoopUnrolls = true; + } else if (appProfile == AppProfile::WarHammerII) { /////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -889,6 +982,35 @@ void ShaderOptimizer::BuildAppProfileLlpc() m_appProfile.entries[i].action.shaders[ShaderStageFragment].dynamicShaderInfo.maxWavesPerCu = 20u; } } + else if (appProfile == AppProfile::DawnOfWarIII) + { + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x4D6AE91E42846DDA45C950CF0DA3B6A1, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x45C950CF0DA3B6A1; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x4D6AE91E42846DDA; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x85D431DFF448DCDD802B5059F23C17E7, CS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageCompute].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageCompute].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageCompute].codeHash.lower = 0x802B5059F23C17E7; + m_appProfile.entries[i].pattern.shaders[ShaderStageCompute].codeHash.upper = 0x85D431DFF448DCDD; + m_appProfile.entries[i].action.shaders[ShaderStageCompute].shaderCreate.tuningOptions.useSiScheduler = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x549373FA25856E20D04006855D9CD368, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xD04006855D9CD368; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x549373FA25856E20; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true; + } } #if PAL_ENABLE_PRINTS_ASSERTS diff --git a/icd/api/appopt/async_layer.cpp b/icd/api/appopt/async_layer.cpp new file mode 100644 index 00000000..4f1cec7d --- /dev/null +++ b/icd/api/appopt/async_layer.cpp @@ -0,0 +1,199 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2019 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** +*********************************************************************************************************************** +* @file async_layer.cpp +* @brief Implementation of async compiler layer. +*********************************************************************************************************************** +*/ +#include "async_layer.h" +#include "async_shader_module.h" + +#include "include/vk_device.h" +#include "include/vk_shader.h" +#include "include/vk_graphics_pipeline.h" +#include "include/vk_compute_pipeline.h" +#include "palListImpl.h" + +namespace vk +{ + +namespace entry +{ + +namespace async +{ + +// ===================================================================================================================== +VKAPI_ATTR VkResult VKAPI_CALL vkCreateShaderModule( + VkDevice device, + const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkShaderModule* pShaderModule) +{ + Device* pDevice = ApiDevice::ObjectFromHandle(device); + const VkAllocationCallbacks* pAllocCB = pAllocator ? pAllocator : pDevice->VkInstance()->GetAllocCallbacks(); + return vk::async::ShaderModule::Create(pDevice, pCreateInfo, pAllocCB, pShaderModule); +} + +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkDestroyShaderModule( + VkDevice device, + VkShaderModule shaderModule, + const VkAllocationCallbacks* pAllocator) +{ + if (shaderModule != VK_NULL_HANDLE) + { + Device* pDevice = ApiDevice::ObjectFromHandle(device); + AsyncLayer* pAsyncLayer = pDevice->GetAsyncLayer(); + const VkAllocationCallbacks* pAllocCB = pAllocator ? pAllocator : pDevice->VkInstance()->GetAllocCallbacks(); + + pAsyncLayer->SyncAll(); + vk::async::ShaderModule::ObjectFromHandle(shaderModule)->Destroy(pDevice, pAllocCB); + } +} + +// ===================================================================================================================== +VKAPI_ATTR VkResult VKAPI_CALL vkCreateGraphicsPipelines( + VkDevice device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + Device* pDevice = ApiDevice::ObjectFromHandle(device); + AsyncLayer* pAsyncLayer = pDevice->GetAsyncLayer(); + VkResult result = VK_SUCCESS; + + for (uint32_t i = 0; (i < createInfoCount) && (result == VK_SUCCESS); ++i) + { + VkGraphicsPipelineCreateInfo createInfo = pCreateInfos[i]; + VkPipelineShaderStageCreateInfo stages[ShaderGfxStageCount]; + VK_ASSERT(createInfo.stageCount <= ShaderGfxStageCount); + for (uint32_t stage = 0; stage < createInfo.stageCount; ++stage) + { + stages[stage] = createInfo.pStages[stage]; + vk::async::ShaderModule* pModule = vk::async::ShaderModule::ObjectFromHandle(stages[stage].module); + stages[stage].module = pModule->GetNextLayerModule(); + } + createInfo.pStages = stages; + result = ASYNC_CALL_NEXT_LAYER(vkCreateGraphicsPipelines)(device, + pipelineCache, + 1, + &createInfo, + pAllocator, + pPipelines + i); + } + + return result; +} + +// ===================================================================================================================== +VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines( + VkDevice device, + VkPipelineCache pipelineCache, + uint32_t createInfoCount, + const VkComputePipelineCreateInfo* pCreateInfos, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipelines) +{ + Device* pDevice = ApiDevice::ObjectFromHandle(device); + AsyncLayer* pAsyncLayer = pDevice->GetAsyncLayer(); + VkResult result = VK_SUCCESS; + + for (uint32_t i = 0; (i < createInfoCount) && (result == VK_SUCCESS); ++i) + { + VkComputePipelineCreateInfo createInfo = pCreateInfos[i]; + VK_ASSERT(createInfo.stage.module != VK_NULL_HANDLE); + vk::async::ShaderModule* pModule = vk::async::ShaderModule::ObjectFromHandle(createInfo.stage.module); + createInfo.stage.module = pModule->GetNextLayerModule(); + result = ASYNC_CALL_NEXT_LAYER(vkCreateComputePipelines)(device, + pipelineCache, + 1, + &createInfo, + pAllocator, + pPipelines + i); + } + + return result; +} + +} // namespace async + +} // namespace entry + +// ===================================================================================================================== +AsyncLayer::AsyncLayer(Device* pDevice) + : + m_pDevice(pDevice), + m_pTaskThreads(), + m_taskId(0) +{ + Util::SystemInfo sysInfo = {}; + Util::QuerySystemInfo(&sysInfo); + m_activeThreadCount = Util::Min(MaxShaderModuleThreads, sysInfo.cpuLogicalCoreCount / 2); + for (uint32_t i = 0; i < m_activeThreadCount; ++i) + { + m_pTaskThreads[i] = VK_PLACEMENT_NEW(m_taskThreadBuffer[i]) async::TaskThread(this, pDevice->VkInstance()->Allocator()); + m_pTaskThreads[i]->Begin(); + } +} + +// ===================================================================================================================== +AsyncLayer::~AsyncLayer() +{ + for (uint32_t i = 0; i < m_activeThreadCount; ++i) + { + m_pTaskThreads[i]->SetStop(); + m_pTaskThreads[i]->Join(); + Util::Destructor(m_pTaskThreads[i]); + m_pTaskThreads[i] = nullptr; + } +} + +// ===================================================================================================================== +void AsyncLayer::SyncAll() +{ + for (uint32_t i = 0; i < m_activeThreadCount; ++i) + { + m_pTaskThreads[i]->SyncAll(); + } +} + +// ===================================================================================================================== +void AsyncLayer::OverrideDispatchTable( + DispatchTable* pDispatchTable) +{ + // Save current device dispatch table to use as the next layer. + m_nextLayer = *pDispatchTable; + + ASYNC_OVERRIDE_ENTRY(vkCreateShaderModule); + ASYNC_OVERRIDE_ENTRY(vkDestroyShaderModule); + ASYNC_OVERRIDE_ENTRY(vkCreateGraphicsPipelines); + ASYNC_OVERRIDE_ENTRY(vkCreateComputePipelines); +} + +} // namespace vk diff --git a/icd/api/appopt/async_layer.h b/icd/api/appopt/async_layer.h new file mode 100644 index 00000000..110daf77 --- /dev/null +++ b/icd/api/appopt/async_layer.h @@ -0,0 +1,99 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2019 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** +*********************************************************************************************************************** +* @file async_layer.h +* @brief Declaration of async compiler layer +*********************************************************************************************************************** +*/ + +#ifndef __ASYNC_LAYER_H__ +#define __ASYNC_LAYER_H__ + +#pragma once + +#include "opt_layer.h" +#include "async_task_thread.h" + +namespace vk +{ + +class Device; +class AsyncLayer; +struct PalAllocator; + +namespace async { class ShaderModule; } + +// Represents the shader module async compile info +struct ShaderModuleTask +{ + VkShaderModuleCreateInfo info; // Shader module create info + async::ShaderModule* pObj; // Output shader module object +}; + +// ===================================================================================================================== +// Class that specifies dispatch table override behavior for async compiler layers +class AsyncLayer : public OptLayer +{ +public: + AsyncLayer(Device* pDevice); + virtual ~AsyncLayer(); + + virtual void OverrideDispatchTable(DispatchTable* pDispatchTable) override; + + VK_INLINE Device* GetDevice() { return m_pDevice; } + + template + async::TaskThread* GetTaskThread() + { + static_assert(sizeof(Task) == sizeof(ShaderModuleTask), "Unexpected type"); + return (m_activeThreadCount > 0) ? m_pTaskThreads[(m_taskId++) % m_activeThreadCount] : nullptr; + } + + void SyncAll(); + +protected: + static constexpr uint32_t MaxShaderModuleThreads = 8; // Max thread count for shader module compile + Device* m_pDevice; // Vulkan Device object + async::TaskThread* m_pTaskThreads[MaxShaderModuleThreads]; // Async compiler threads + uint32_t m_taskId; // Hint to select compile thread + uint32_t m_activeThreadCount; // Active thread count + // Internal buffer for m_pTaskThreads + uint8_t m_taskThreadBuffer[MaxShaderModuleThreads] + [sizeof(async::TaskThread)]; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#define ASYNC_OVERRIDE_ALIAS(entry_name, func_name) \ + pDispatchTable->OverrideEntryPoints()->entry_name = vk::entry::async::func_name + +#define ASYNC_OVERRIDE_ENTRY(entry_name) ASYNC_OVERRIDE_ALIAS(entry_name, entry_name) +// Helper function to call the next layer's function by name +#define ASYNC_CALL_NEXT_LAYER(entry_name) \ + pAsyncLayer->GetNextLayer()->GetEntryPoints().entry_name + +} // namespace vk + +#endif /* __OPT_LAYER_H__ */ diff --git a/icd/api/appopt/async_shader_module.cpp b/icd/api/appopt/async_shader_module.cpp new file mode 100644 index 00000000..bc0a3f5a --- /dev/null +++ b/icd/api/appopt/async_shader_module.cpp @@ -0,0 +1,160 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2019 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** +*********************************************************************************************************************** +* @file async_shader_module.cpp +* @brief Implementation of class async::ShaderModule +*********************************************************************************************************************** +*/ +#include "async_layer.h" +#include "async_shader_module.h" + +#include "include/vk_device.h" +#include "include/vk_shader.h" +#include "palListImpl.h" + +namespace vk +{ + +namespace async +{ + +// ===================================================================================================================== +ShaderModule::ShaderModule( + VkShaderModule immedModule) + : + m_immedModule(immedModule), + m_asyncModule(VK_NULL_HANDLE) +{ +} + +// ===================================================================================================================== +// Creates async shdaer module object +VkResult ShaderModule::Create( + Device* pDevice, + const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkShaderModule* pShaderModule) +{ + AsyncLayer* pAsyncLayer = pDevice->GetAsyncLayer(); + VkShaderModule immedModule = VK_NULL_HANDLE; + + VK_ASSERT(pCreateInfo->flags == 0); + + // Build shader module with immedidate mode + auto result = ASYNC_CALL_NEXT_LAYER(vkCreateShaderModule)( + VkDevice(ApiDevice::FromObject(pDevice)), + pCreateInfo, + pAllocator, + &immedModule); + + if (result == VK_SUCCESS) + { + const size_t objSize = sizeof(ShaderModule); + void* pMemory = pDevice->AllocApiObject(objSize, pAllocator); + + if (pMemory == nullptr) + { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + VK_PLACEMENT_NEW(pMemory) ShaderModule(immedModule); + ShaderModule* pShaderModuleObj = static_cast(pMemory); + *pShaderModule = ShaderModule::HandleFromVoidPointer(pMemory); + + // Build shader module in async mode + pShaderModuleObj->AsyncBuildShaderModule(pDevice->GetAsyncLayer()); + } + + return result; +} + +// ===================================================================================================================== +// Destory async shader module object +VkResult ShaderModule::Destroy( + Device* pDevice, + const VkAllocationCallbacks* pAllocator) +{ + AsyncLayer* pAsyncLayer = pDevice->GetAsyncLayer(); + if (m_asyncModule == VK_NULL_HANDLE) + { + pAsyncLayer->SyncAll(); + } + + if (m_immedModule != VK_NULL_HANDLE) + { + ASYNC_CALL_NEXT_LAYER(vkDestroyShaderModule)( + VkDevice(ApiDevice::FromObject(pDevice)), + m_immedModule, + pAllocator); + } + + if (m_asyncModule != VK_NULL_HANDLE) + { + ASYNC_CALL_NEXT_LAYER(vkDestroyShaderModule)( + VkDevice(ApiDevice::FromObject(pDevice)), + m_asyncModule, + pAllocator); + } + + return VK_SUCCESS; +} + +// ===================================================================================================================== +// Builds shader module in async mode +void ShaderModule::AsyncBuildShaderModule( + AsyncLayer* pAsyncLayer) +{ + auto pTaskThread = pAsyncLayer->GetTaskThread(); + if (pTaskThread != nullptr) + { + vk::ShaderModule* pNextLayerModule = vk::ShaderModule::ObjectFromHandle(m_immedModule); + + ShaderModuleTask task = {}; + task.info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + task.info.pCode = reinterpret_cast(pNextLayerModule->GetCode()); + task.info.codeSize = pNextLayerModule->GetCodeSize(); + task.info.flags = VK_SHADER_MODULE_ENABLE_OPT_BIT; + task.pObj = this; + pTaskThread->AddTask(&task); + } +} + +// ===================================================================================================================== +// Creates shader module with shader module opt enabled. +void ShaderModule::Execute( + AsyncLayer* pAsyncLayer, + ShaderModuleTask* pTask) +{ + Device* pDevice = pAsyncLayer->GetDevice(); + ASYNC_CALL_NEXT_LAYER(vkCreateShaderModule)(VkDevice(ApiDevice::FromObject(pDevice)), + &pTask->info, + nullptr, + &m_asyncModule); +} + +} // namespace async + +} // namespace vk diff --git a/icd/api/appopt/async_shader_module.h b/icd/api/appopt/async_shader_module.h new file mode 100644 index 00000000..6ebe2e66 --- /dev/null +++ b/icd/api/appopt/async_shader_module.h @@ -0,0 +1,80 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2019 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** +*********************************************************************************************************************** +* @file async_shader_module.h +* @brief Header file of class async::ShaderModule +*********************************************************************************************************************** +*/ + +#ifndef __ASYNC_SHADER_MODULE_H__ +#define __ASYNC_SHADER_MODULE_H__ + +#pragma once + +#include "include/vk_dispatch.h" + +namespace vk +{ + +namespace async +{ + +// ===================================================================================================================== +// Implementation of a async shader module +class ShaderModule : public vk::NonDispatchable +{ +public: + static VkResult Create( + Device* pDevice, + const VkShaderModuleCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkShaderModule* pShaderModule); + + VkResult Destroy( + Device* pDevice, + const VkAllocationCallbacks* pAllocator); + + VK_INLINE VkShaderModule GetNextLayerModule() + { + return (m_asyncModule == VK_NULL_HANDLE) ? m_immedModule : m_asyncModule; + } + + void Execute(AsyncLayer* pAsyncLayer, ShaderModuleTask* pTask); + + void AsyncBuildShaderModule(AsyncLayer* pAsyncLayer); + +protected: + ShaderModule(VkShaderModule immedModule); + + VkShaderModule m_immedModule; // Shader module handle which is compiled with immedidate mode + VkShaderModule m_asyncModule; // Shader module handle which is compiled with async mode +}; + +} // namespace async + +} // namespace vk + +#endif diff --git a/icd/api/appopt/async_task_thread.h b/icd/api/appopt/async_task_thread.h new file mode 100644 index 00000000..0b02bf68 --- /dev/null +++ b/icd/api/appopt/async_task_thread.h @@ -0,0 +1,151 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2019 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** +*********************************************************************************************************************** +* @file async_task_thread.h +* @brief Implementation of template class async::TaskThread +*********************************************************************************************************************** +*/ +#ifndef __ASYNC_TASK_THREAD_H__ +#define __ASYNC_TASK_THREAD_H__ + +#pragma once + +#include "include/vk_alloccb.h" +#include "palThread.h" +#include "palMutex.h" +#include "palList.h" +#include "palEvent.h" + +namespace vk +{ + +class AsyncLayer; +struct PalAllocator; + +namespace async +{ + +// ===================================================================================================================== +// Represents the general thread for async shader/pipeline compiler. +template +class TaskThread : public Util::Thread +{ +public: + TaskThread(AsyncLayer* pAsyncLayer, PalAllocator* pAllocator) + : + m_pAsyncLayer(pAsyncLayer), + m_taskList(pAllocator), + m_stop(false) + { + m_lock.Init(); + Util::EventCreateFlags flags = {}; + flags.manualReset = false; + flags.initiallySignaled = false; + m_event.Init(flags); + } + + // Starts a new thread which starts by running function TaskThreadFunc. + VK_INLINE void Begin() + { + Util::Thread::Begin(ThreadFunc, this); + } + + // Adds task to list. + void AddTask(Task* pTask) + { + Util::MutexAuto mutexAuto(&m_lock); + m_taskList.PushBack(*pTask); + m_event.Set(); + } + + // Set flag stop and trig event. + VK_INLINE void SetStop() + { + m_event.Set(); + m_stop = true; + } + + // Returns until all tasks are executed. + void SyncAll() + { + m_event.Set(); + while (m_taskList.Begin() != m_taskList.End()) + { + Util::YieldThread(); + } + } + +protected: + // Async thread function + static void ThreadFunc( + void* pParam) + { + auto pThis = reinterpret_cast*>(pParam); + pThis->TaskThreadFunc(); + } + + // The implementation of async thread function + void TaskThreadFunc() + { + while (m_stop == false) + { + // Waits for new signal. + m_event.Wait(1.0f); + + Task task; + while (FetchTask(&task)) + { + task.pObj->Execute(m_pAsyncLayer, &task); + } + } + } + + // Fetches task in list, return false if task list is empty. + bool FetchTask(Task* pTask) + { + Util::MutexAuto mutexAuto(&m_lock); + auto beginIt = m_taskList.Begin(); + if (beginIt != m_taskList.End()) + { + *pTask = *(beginIt.Get()); + m_taskList.Erase(&beginIt); + return true; + } + return false; + } + + AsyncLayer* m_pAsyncLayer; // Async compiler layer object + Util::List m_taskList; // Async compile task list + volatile bool m_stop; // Flag to stop the thread + Util::Mutex m_lock; // Lock for accessing task list + Util::Event m_event; // Event to notify async thread +}; + +} // namespace async + +} // namespace vk + +#endif // __ASYNC_TASK_THREAD_H__ diff --git a/icd/api/barrier_policy.cpp b/icd/api/barrier_policy.cpp index ffe4e334..82c75741 100644 --- a/icd/api/barrier_policy.cpp +++ b/icd/api/barrier_policy.cpp @@ -157,7 +157,7 @@ class LayoutUsageHelper m_layoutUsageTable[2][usageIndex] = layoutUsage2; } - enum { LayoutUsageTableSize = VK_IMAGE_LAYOUT_RANGE_SIZE + 6 }; + enum { LayoutUsageTableSize = VK_IMAGE_LAYOUT_RANGE_SIZE + 10 }; uint32_t m_layoutUsageTable[MaxPalAspectsPerMask][LayoutUsageTableSize]; }; diff --git a/icd/api/compiler_solution_llpc.cpp b/icd/api/compiler_solution_llpc.cpp index c2200a29..5609a6b4 100644 --- a/icd/api/compiler_solution_llpc.cpp +++ b/icd/api/compiler_solution_llpc.cpp @@ -124,6 +124,7 @@ VkResult CompilerSolutionLlpc::CreateShaderCache( // Builds shader module from SPIR-V binary code. VkResult CompilerSolutionLlpc::BuildShaderModule( const Device* pDevice, + VkShaderModuleCreateFlags flags, size_t codeSize, const void* pCode, ShaderModuleHandle* pShaderModule, @@ -144,6 +145,10 @@ VkResult CompilerSolutionLlpc::BuildShaderModule( moduleInfo.pUserData = &pShaderMemory; moduleInfo.shaderBin.pCode = pCode; moduleInfo.shaderBin.codeSize = codeSize; +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 32 + PipelineCompiler::ApplyPipelineOptions(pDevice, 0, &moduleInfo.options.pipelineOptions); + moduleInfo.options.enableOpt = (flags & VK_SHADER_MODULE_ENABLE_OPT_BIT) ? true : false; +#endif Llpc::Result llpcResult = m_pLlpc->BuildShaderModule(&moduleInfo, &buildOut); @@ -447,7 +452,6 @@ VkResult CompilerSolutionLlpc::CreateLlpcCompiler() ShaderCacheMode shaderCacheMode = settings.shaderCacheMode; if ((appProfile == AppProfile::MadMax) || - (appProfile == AppProfile::SeriousSamFusion) || (appProfile == AppProfile::SedpEngine) || (appProfile == AppProfile::ThronesOfBritannia)) { @@ -457,6 +461,13 @@ VkResult CompilerSolutionLlpc::CreateLlpcCompiler() llpcOptions[numOptions++] = "-amdgpu-max-memory-clause=1"; } +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 33 + if (appProfile == AppProfile::DawnOfWarIII) + { + llpcOptions[numOptions++] = "-enable-load-scalarizer"; + } +#endif + // Force enable cache to disk to improve user experience if ((shaderCacheMode == ShaderCacheEnableRuntimeOnly) && ((appProfile == AppProfile::MadMax) || diff --git a/icd/api/devmode/devmode_mgr.cpp b/icd/api/devmode/devmode_mgr.cpp index fe43f25b..98ece72c 100644 --- a/icd/api/devmode/devmode_mgr.cpp +++ b/icd/api/devmode/devmode_mgr.cpp @@ -39,6 +39,7 @@ #include "include/vk_physical_device.h" #include "include/vk_utils.h" #include "include/vk_conv.h" +#include "include/pipeline_binary_cache.h" #include "sqtt/sqtt_layer.h" #include "sqtt/sqtt_mgr.h" @@ -127,6 +128,188 @@ static Pal::Result DevDriverToPalResult( return result; } +// ===================================================================================================================== +// Callback method for providing hashes and sizes for tracked pipelines to the PipelineUriService +static DevDriver::Result GetPipelineHashes( + DevDriver::PipelineUriService* pService, + void* pUserData, + DevDriver::ExclusionFlags /*flags*/) +{ + DevModeMgr* pDevModeMgr = static_cast(pUserData); + + DevDriver::Result result = DevDriver::Result::NotReady; + + Util::RWLockAuto cacheListLock(pDevModeMgr->GetPipelineReinjectionLock()); + + auto pipelineCacheIter = pDevModeMgr->GetPipelineCacheListIterator(); + + while (pipelineCacheIter.Get() != nullptr) + { + result = DevDriver::Result::Success; + + PipelineBinaryCache* pPipelineCache = *pipelineCacheIter.Get(); + + Util::RWLockAuto hashMappingLock(pPipelineCache->GetHashMappingLock()); + + auto hashMappingIter = pPipelineCache->GetHashMappingIterator(); + + while (hashMappingIter.Get() != nullptr) + { + const Pal::PipelineHash& internalPipelineHash = hashMappingIter.Get()->key; + const PipelineBinaryCache::CacheId& cacheId = hashMappingIter.Get()->value; + + Util::QueryResult query = {}; + + // Do not throw an error if entry is not found in cache (in case it was evicted) + if (pPipelineCache->QueryPipelineBinary(&cacheId, &query) == Util::Result::Success) + { + pService->AddHash(internalPipelineHash, query.dataSize); + } + + hashMappingIter.Next(); + } + + pipelineCacheIter.Next(); + } + + return result; +} + +// ===================================================================================================================== +// Callback method for providing binaries for tracked pipelines to the PipelineUriService +static DevDriver::Result GetPipelineCodeObjects( + DevDriver::PipelineUriService* pService, + void* pUserData, + DevDriver::ExclusionFlags /*flags*/, + const DevDriver::PipelineHash* pPipelineHashes, + size_t numHashes) +{ + DevModeMgr* pDevModeMgr = static_cast(pUserData); + + DevDriver::Result result = DevDriver::Result::NotReady; + + Util::RWLockAuto cacheListLock(pDevModeMgr->GetPipelineReinjectionLock()); + + auto pipelineCacheIter = pDevModeMgr->GetPipelineCacheListIterator(); + + while (pipelineCacheIter.Get() != nullptr) + { + result = DevDriver::Result::Success; + + PipelineBinaryCache* pPipelineCache = *pipelineCacheIter.Get(); + + if (pPipelineHashes != nullptr) + { + // A specific list of hashes were requested + for (uint32_t i = 0; i < numHashes; i += 1) + { + DevDriver::PipelineRecord record = {}; + record.header.hash = pPipelineHashes[i]; + + size_t binarySize = 0u; + const void* pBinary = nullptr; + + static_assert(sizeof(Pal::PipelineHash) == sizeof(record.header.hash), "Structure size mismatch"); + + PipelineBinaryCache::CacheId* pCacheId = + pPipelineCache->GetCacheIdForPipeline(reinterpret_cast(&record.header.hash)); + + if ((pCacheId != nullptr) && + (pPipelineCache->LoadPipelineBinary(pCacheId, &binarySize, &pBinary) == Util::Result::Success)) + { + record.pBinary = pBinary; + record.header.size = binarySize; + } + + // Empty record is written if hash is not found + pService->AddPipeline(record); + } + } + else + { + Util::RWLockAuto hashMappingLock(pPipelineCache->GetHashMappingLock()); + + auto hashMappingIter = pPipelineCache->GetHashMappingIterator(); + + while (hashMappingIter.Get() != nullptr) + { + Pal::PipelineHash& internalPipelineHash = hashMappingIter.Get()->key; + PipelineBinaryCache::CacheId& cacheId = hashMappingIter.Get()->value; + + size_t binarySize = 0u; + const void* pBinary = nullptr; + + if (pPipelineCache->LoadPipelineBinary(&cacheId, &binarySize, &pBinary) == Util::Result::Success) + { + DevDriver::PipelineRecord record = {}; + record.pBinary = pBinary; + record.header.size = binarySize; + record.header.hash = DevDriver::PipelineHash{ internalPipelineHash }; + + pService->AddPipeline(record); + } + + hashMappingIter.Next(); + } + } + + pipelineCacheIter.Next(); + } + + return result; +} + +// ===================================================================================================================== +// Callback method for reinjecting binaries back into the cache +static DevDriver::Result InjectPipelineCodeObjects( + void* pUserData, + DevDriver::PipelineRecordsIterator& pipelineIter) +{ + DevModeMgr* pDevModeMgr = static_cast(pUserData); + + DevDriver::Result result = DevDriver::Result::NotReady; + + uint32_t replacedCount = 0u; + DevDriver::PipelineRecord record; + + Util::RWLockAuto cacheListLock(pDevModeMgr->GetPipelineReinjectionLock()); + + auto pipelineCacheIter = pDevModeMgr->GetPipelineCacheListIterator(); + + while (pipelineCacheIter.Get() != nullptr) + { + result = DevDriver::Result::Success; + + PipelineBinaryCache* pPipelineCache = *pipelineCacheIter.Get(); + + while (pipelineIter.Get(&record)) + { + static_assert(sizeof(PipelineBinaryCache::CacheId) == sizeof(record.header.hash), "Structure size mismatch"); + + size_t binarySize = static_cast(record.header.size); + const PipelineBinaryCache::CacheId* pInternalPipelineHash = + reinterpret_cast(&record.header.hash); + + if (pPipelineCache->StoreReinjectionBinary(pInternalPipelineHash, binarySize, record.pBinary) == Util::Result::Success) + { + replacedCount++; + } + + pipelineIter.Next(); + } + + pipelineCacheIter.Next(); + } + + if ((result == DevDriver::Result::Success) && + (replacedCount == 0u)) + { + result = DevDriver::Result::Error; + } + + return result; +} + // ===================================================================================================================== DevModeMgr::DevModeMgr(Instance* pInstance) : @@ -140,7 +323,6 @@ DevModeMgr::DevModeMgr(Instance* pInstance) m_hardwareSupportsTracing(false), m_rgpServerSupportsTracing(false), m_finalized(false), - m_tracingEnabled(false), m_numPrepFrames(0), m_traceGpuMemLimit(0), m_enableInstTracing(false), @@ -150,7 +332,8 @@ DevModeMgr::DevModeMgr(Instance* pInstance) m_globalFrameIndex(1), // Must start from 1 according to RGP spec m_traceFrameBeginTag(0), m_traceFrameEndTag(0), - m_targetApiPsoHash(0) + m_targetApiPsoHash(0), + m_pipelineCaches(pInstance->Allocator()) { memset(&m_trace, 0, sizeof(m_trace)); } @@ -214,6 +397,11 @@ Pal::Result DevModeMgr::Init() } } + if (result == Pal::Result::Success) + { + m_pipelineReinjectionLock.Init(); + } + return result; } @@ -263,9 +451,7 @@ void DevModeMgr::Finalize( // Finalize the devmode manager m_pDevDriverServer->Finalize(); - // Figure out if tracing support should be enabled or not m_finalized = true; - m_tracingEnabled = (m_pRGPServer != nullptr) && m_pRGPServer->TracesEnabled(); } // ===================================================================================================================== @@ -298,7 +484,7 @@ void DevModeMgr::NotifyFrameEnd( bool actualPresent) { // Get the RGP message server - if ((m_pRGPServer != nullptr) && m_pRGPServer->TracesEnabled()) + if (IsTracingEnabled()) { // Only act if this present is coming from the same device that started the trace if (m_trace.status != TraceStatus::Idle) @@ -578,7 +764,7 @@ void DevModeMgr::NotifyFrameBegin( // Wait for the driver to be resumed in case it's been paused. WaitForDriverResume(); - if ((m_pRGPServer != nullptr) && m_pRGPServer->TracesEnabled()) + if (IsTracingEnabled()) { // Check for pending traces here also in case the application presents before submitting any work. This // may transition Idle to Pending which we will handle immediately below @@ -1027,7 +1213,7 @@ Pal::Result DevModeMgr::TracePreparingToRunningStep( const Queue* pQueue) { VK_ASSERT(pState->status == TraceStatus::Preparing); - VK_ASSERT(m_tracingEnabled); + VK_ASSERT(IsTracingEnabled()); // We can only trace using a single device at a time currently, so recreate RGP trace // resources against this new one if the device is changing. @@ -1804,7 +1990,7 @@ Pal::Result DevModeMgr::InitRGPTracing( Pal::Result result = Pal::Result::Success; - if ((m_tracingEnabled == false) || // Tracing is globally disabled + if ((IsTracingEnabled() == false) || // Tracing is globally disabled (m_pRGPServer == nullptr) || // There is no RGP server (this should never happen) (pDevice->NumPalDevices() > 1)) // MGPU device group tracing is not currently supported { @@ -1998,8 +2184,6 @@ Pal::Result DevModeMgr::InitRGPTracing( if (m_pRGPServer != nullptr) { m_pRGPServer->DisableTraces(); - - m_tracingEnabled = false; } // Clean up if we failed @@ -2149,6 +2333,21 @@ Pal::Result DevModeMgr::TimedWaitQueueSemaphore( return result; } +// ===================================================================================================================== +bool DevModeMgr::IsTracingEnabled() const +{ + VK_ASSERT(m_finalized); + + if (m_finalized) + { + return (m_pRGPServer != nullptr) && m_pRGPServer->TracesEnabled(); + } + else + { + return false; + } +} + // ===================================================================================================================== Pal::Result DevModeMgr::TimedQueueSubmit( uint32_t deviceIdx, @@ -2362,6 +2561,88 @@ void DevModeMgr::CleanupEtwClient() } #endif +// ===================================================================================================================== +// Registers a pipeline binary cache object with the pipeline URI service and initializes the pipeline URI service +// the first time a pipeline binary cache object is registered +Util::Result DevModeMgr::RegisterPipelineCache( + PipelineBinaryCache* pPipelineCache, + uint32_t postSizeLimit) +{ + Util::Result result = Util::Result::Success; + + if (m_pPipelineUriService == nullptr) + { + void* pStorage = m_pInstance->AllocMem(sizeof(DevDriver::PipelineUriService), VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + + if (pStorage != nullptr) + { + m_pPipelineUriService = VK_PLACEMENT_NEW(pStorage) DevDriver::PipelineUriService(); + } + else + { + result = Util::Result::ErrorOutOfMemory; + } + + if (result == Util::Result::Success) + { + DevDriver::PipelineUriService::DriverInfo driverInfo; + driverInfo.pUserData = static_cast(this); + driverInfo.pfnGetPipelineHashes = &GetPipelineHashes; + driverInfo.pfnGetPipelineCodeObjects = &GetPipelineCodeObjects; + driverInfo.pfnInjectPipelineCodeObjects = &InjectPipelineCodeObjects; + driverInfo.postSizeLimit = postSizeLimit * 1024; + + DevDriver::Result devDriverResult = m_pPipelineUriService->Init(driverInfo); + + if (devDriverResult == DevDriver::Result::Success) + { + devDriverResult = m_pDevDriverServer->GetMessageChannel()->RegisterService(m_pPipelineUriService); + } + + if (devDriverResult != DevDriver::Result::Success) + { + result = Util::Result::ErrorUnavailable; + } + } + } + + if (result == Util::Result::Success) + { + Util::RWLockAuto readWriteLock(&m_pipelineReinjectionLock); + + result = m_pipelineCaches.PushBack(pPipelineCache); + } + + return result; +} + +// ===================================================================================================================== +// Deregisters a pipeline binary cache with the pipeline URI service +void DevModeMgr::DeregisterPipelineCache( + PipelineBinaryCache* pPipelineCache) +{ + Util::RWLockAuto readWriteLock(&m_pipelineReinjectionLock); + + auto it = m_pipelineCaches.Begin(); + + while (it.Get() != nullptr) + { + PipelineBinaryCache* element = *it.Get(); + + if (pPipelineCache == element) + { + m_pipelineCaches.Erase(&it); + + // Each element should only be in the list once; break out of loop once found + break; + } + else + { + it.Next(); + } + } +} + }; // namespace vk #endif diff --git a/icd/api/devmode/devmode_mgr.h b/icd/api/devmode/devmode_mgr.h index e5e5455e..e137146d 100644 --- a/icd/api/devmode/devmode_mgr.h +++ b/icd/api/devmode/devmode_mgr.h @@ -107,6 +107,7 @@ class Pipeline; class Queue; class SqttCmdBufferState; class CmdBuffer; +class PipelineBinaryCache; }; namespace vk @@ -147,8 +148,7 @@ class DevModeMgr void StartInstructionTrace(CmdBuffer* pCmdBuffer); void StopInstructionTrace(CmdBuffer* pCmdBuffer); - VK_INLINE bool IsTracingEnabled() const - { VK_ASSERT(m_finalized); return m_tracingEnabled; } + bool IsTracingEnabled() const; Pal::Result TimedQueueSubmit( uint32_t deviceIdx, @@ -176,6 +176,19 @@ class DevModeMgr VK_INLINE bool GetTraceFrameBeginTag(uint64_t* pTag) const; VK_INLINE bool GetTraceFrameEndTag(uint64_t* pTag) const; + Util::Result RegisterPipelineCache( + PipelineBinaryCache* pPipelineCache, + uint32_t postSizeLimit); + + void DeregisterPipelineCache( + PipelineBinaryCache* pPipelineCache); + + VK_INLINE Util::ListIterator GetPipelineCacheListIterator() + { return m_pipelineCaches.Begin(); } + + VK_INLINE Util::RWLock* GetPipelineReinjectionLock() + { return &m_pipelineReinjectionLock; } + private: // Steps that an RGP trace goes through enum class TraceStatus @@ -309,7 +322,6 @@ class DevModeMgr bool m_rgpServerSupportsTracing; // True if gpuopen protocol successfully enabled // tracing bool m_finalized; - bool m_tracingEnabled; // True if tracing is currently enabled (master flag) uint32_t m_numPrepFrames; uint32_t m_traceGpuMemLimit; bool m_enableInstTracing; // Enable instruction-level SQTT tokens @@ -323,6 +335,11 @@ class DevModeMgr uint32_t m_traceFrameEndIndex; uint64_t m_targetApiPsoHash; + using PipelineCacheList = Util::List; + + PipelineCacheList m_pipelineCaches; + Util::RWLock m_pipelineReinjectionLock; + PAL_DISALLOW_DEFAULT_CTOR(DevModeMgr); PAL_DISALLOW_COPY_AND_ASSIGN(DevModeMgr); #endif diff --git a/icd/api/include/app_profile.h b/icd/api/include/app_profile.h index 8ad77946..bf814c17 100644 --- a/icd/api/include/app_profile.h +++ b/icd/api/include/app_profile.h @@ -74,6 +74,7 @@ enum class AppProfile : uint32_t WorldWarZ, // WorldWarZ ThreeKingdoms, // Three Kingdoms by Feral3D DiRT4, // DiRT4 by Feral3D + DxvkEliteDangerous, // Elite Dangerous by Frontier Developments IdTechEngine, // id Tech Engine (Default) Feral3DEngine, // Feral3D Engine (Default) diff --git a/icd/api/include/compiler_solution.h b/icd/api/include/compiler_solution.h index 14c80c06..f056efed 100644 --- a/icd/api/include/compiler_solution.h +++ b/icd/api/include/compiler_solution.h @@ -79,6 +79,7 @@ struct GraphicsPipelineCreateInfo VkFormat dbFormat; PipelineOptimizerKey pipelineProfileKey; PipelineCompilerType compilerType; + bool elfWasCached; Util::MetroHash::Hash basePipelineHash; PipelineCreationFeedback pipelineFeedback; }; @@ -92,6 +93,7 @@ struct ComputePipelineCreateInfo size_t tempBufferStageSize; PipelineOptimizerKey pipelineProfileKey; PipelineCompilerType compilerType; + bool elfWasCached; Util::MetroHash::Hash basePipelineHash; PipelineCreationFeedback pipelineFeedback; }; @@ -118,6 +120,7 @@ class CompilerSolution virtual VkResult BuildShaderModule( const Device* pDevice, + VkShaderModuleCreateFlags flags, size_t codeSize, const void* pCode, ShaderModuleHandle* pShaderModule, diff --git a/icd/api/include/compiler_solution_llpc.h b/icd/api/include/compiler_solution_llpc.h index 47339a71..76778172 100644 --- a/icd/api/include/compiler_solution_llpc.h +++ b/icd/api/include/compiler_solution_llpc.h @@ -59,6 +59,7 @@ class CompilerSolutionLlpc : public CompilerSolution virtual VkResult BuildShaderModule( const Device* pDevice, + VkShaderModuleCreateFlags flags, size_t codeSize, const void* pCode, ShaderModuleHandle* pShaderModule, diff --git a/icd/api/include/khronos/devext/vk_amd_device_coherent_memory.h b/icd/api/include/khronos/devext/vk_amd_device_coherent_memory.h deleted file mode 100755 index f2c4a549..00000000 --- a/icd/api/include/khronos/devext/vk_amd_device_coherent_memory.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************************************************************************/ -/** - ********************************************************************************************************************** - * @file vk_amd_device_coherent_memory.h - * @brief Temporary internal header for wave limit control. Should be removed once the extension is published - * and the API gets included in the official Vulkan header. - ********************************************************************************************************************** - */ -#ifndef VKI_AMD_DEVICE_COHERENT_MEMORY_H_ -#define VKI_AMD_DEVICE_COHERENT_MEMORY_H_ - -#include "vk_internal_ext_helper.h" - -#define VK_AMD_device_coherent_memory 1 -#define VK_AMD_DEVICE_COHERENT_MEMORY_SPEC_VERSION 1 -#define VK_AMD_DEVICE_COHERENT_MEMORY_EXTENSION_NAME "VK_AMD_device_coherent_memory" - -#define VK_AMD_DEVICE_COHERENT_MEMORY_EXTENSION_NUMBER 230 -#define VK_AMD_DEVICE_COHERENT_MEMORY_EXTENSION_ENUM(type, offset) \ - VK_EXTENSION_ENUM(VK_AMD_DEVICE_COHERENT_MEMORY_EXTENSION_NUMBER, type, offset) - -#define VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD VK_EXTENSION_BIT(VkMemoryPropertyFlagBits, 6) -#define VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD VK_EXTENSION_BIT(VkMemoryPropertyFlagBits, 7) - -typedef struct VkPhysicalDeviceCoherentMemoryFeaturesAMD { - VkStructureType sType; - void* pNext; - VkBool32 deviceCoherentMemory; -} VkPhysicalDeviceCoherentMemoryFeaturesAMD; - -#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD VK_AMD_DEVICE_COHERENT_MEMORY_EXTENSION_ENUM(VkStructureType, 0) - -#endif /* VKI_AMD_DEVICE_COHERENT_MEMORY_H_ */ diff --git a/icd/api/include/khronos/sdk-1.1/vulkan_android.h b/icd/api/include/khronos/sdk-1.1/vulkan_android.h index 18618024..9b8d3e27 100644 --- a/icd/api/include/khronos/sdk-1.1/vulkan_android.h +++ b/icd/api/include/khronos/sdk-1.1/vulkan_android.h @@ -1,10 +1,6 @@ #ifndef VULKAN_ANDROID_H_ #define VULKAN_ANDROID_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* ** Copyright (c) 2015-2019 The Khronos Group Inc. ** @@ -27,6 +23,11 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + #define VK_KHR_android_surface 1 struct ANativeWindow; diff --git a/icd/api/include/khronos/sdk-1.1/vulkan_core.h b/icd/api/include/khronos/sdk-1.1/vulkan_core.h index e811bebe..50f72f67 100644 --- a/icd/api/include/khronos/sdk-1.1/vulkan_core.h +++ b/icd/api/include/khronos/sdk-1.1/vulkan_core.h @@ -1,10 +1,6 @@ #ifndef VULKAN_CORE_H_ #define VULKAN_CORE_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* ** Copyright (c) 2015-2019 The Khronos Group Inc. ** @@ -27,6 +23,11 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + #define VK_VERSION_1_0 1 #include "vk_platform.h" @@ -43,7 +44,7 @@ extern "C" { #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff) #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff) // Version of this file -#define VK_HEADER_VERSION 119 +#define VK_HEADER_VERSION 121 #define VK_NULL_HANDLE 0 @@ -490,6 +491,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT = 1000225001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT = 1000225002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD = 1000227000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD = 1000229000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT = 1000237000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT = 1000238000, VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT = 1000238001, @@ -1240,6 +1242,7 @@ typedef enum VkSamplerAddressMode { VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE = 2, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER = 3, VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE = 4, + VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR = VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_BEGIN_RANGE = VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_END_RANGE = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_SAMPLER_ADDRESS_MODE_RANGE_SIZE = (VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER - VK_SAMPLER_ADDRESS_MODE_REPEAT + 1), @@ -1506,6 +1509,8 @@ typedef enum VkMemoryPropertyFlagBits { VK_MEMORY_PROPERTY_HOST_CACHED_BIT = 0x00000008, VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT = 0x00000010, VK_MEMORY_PROPERTY_PROTECTED_BIT = 0x00000020, + VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD = 0x00000040, + VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD = 0x00000080, VK_MEMORY_PROPERTY_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkMemoryPropertyFlagBits; typedef VkFlags VkMemoryPropertyFlags; @@ -5080,7 +5085,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkAcquireNextImage2KHR( #define VK_KHR_display 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDisplayKHR) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDisplayModeKHR) -#define VK_KHR_DISPLAY_SPEC_VERSION 21 +#define VK_KHR_DISPLAY_SPEC_VERSION 23 #define VK_KHR_DISPLAY_EXTENSION_NAME "VK_KHR_display" typedef enum VkDisplayPlaneAlphaFlagBitsKHR { @@ -5203,7 +5208,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateDisplayPlaneSurfaceKHR( #define VK_KHR_display_swapchain 1 -#define VK_KHR_DISPLAY_SWAPCHAIN_SPEC_VERSION 9 +#define VK_KHR_DISPLAY_SWAPCHAIN_SPEC_VERSION 10 #define VK_KHR_DISPLAY_SWAPCHAIN_EXTENSION_NAME "VK_KHR_display_swapchain" typedef struct VkDisplayPresentInfoKHR { VkStructureType sType; @@ -5226,7 +5231,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateSharedSwapchainsKHR( #define VK_KHR_sampler_mirror_clamp_to_edge 1 -#define VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_SPEC_VERSION 1 +#define VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_SPEC_VERSION 3 #define VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME "VK_KHR_sampler_mirror_clamp_to_edge" @@ -5242,7 +5247,7 @@ typedef VkPhysicalDeviceMultiviewProperties VkPhysicalDeviceMultiviewPropertiesK #define VK_KHR_get_physical_device_properties2 1 -#define VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_SPEC_VERSION 1 +#define VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_SPEC_VERSION 2 #define VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME "VK_KHR_get_physical_device_properties2" typedef VkPhysicalDeviceFeatures2 VkPhysicalDeviceFeatures2KHR; @@ -5307,7 +5312,7 @@ VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceSparseImageFormatProperties2KHR( #define VK_KHR_device_group 1 -#define VK_KHR_DEVICE_GROUP_SPEC_VERSION 3 +#define VK_KHR_DEVICE_GROUP_SPEC_VERSION 4 #define VK_KHR_DEVICE_GROUP_EXTENSION_NAME "VK_KHR_device_group" typedef VkPeerMemoryFeatureFlags VkPeerMemoryFeatureFlagsKHR; @@ -6092,7 +6097,7 @@ typedef struct VkImageFormatListCreateInfoKHR { #define VK_KHR_sampler_ycbcr_conversion 1 typedef VkSamplerYcbcrConversion VkSamplerYcbcrConversionKHR; -#define VK_KHR_SAMPLER_YCBCR_CONVERSION_SPEC_VERSION 1 +#define VK_KHR_SAMPLER_YCBCR_CONVERSION_SPEC_VERSION 14 #define VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME "VK_KHR_sampler_ycbcr_conversion" typedef VkSamplerYcbcrModelConversion VkSamplerYcbcrModelConversionKHR; @@ -6811,7 +6816,7 @@ VKAPI_ATTR uint32_t VKAPI_CALL vkGetImageViewHandleNVX( #define VK_AMD_draw_indirect_count 1 -#define VK_AMD_DRAW_INDIRECT_COUNT_SPEC_VERSION 1 +#define VK_AMD_DRAW_INDIRECT_COUNT_SPEC_VERSION 2 #define VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME "VK_AMD_draw_indirect_count" typedef void (VKAPI_PTR *PFN_vkCmdDrawIndirectCountAMD)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride); typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexedIndirectCountAMD)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride); @@ -7047,7 +7052,7 @@ typedef struct VkPhysicalDeviceASTCDecodeFeaturesEXT { #define VK_EXT_conditional_rendering 1 -#define VK_EXT_CONDITIONAL_RENDERING_SPEC_VERSION 1 +#define VK_EXT_CONDITIONAL_RENDERING_SPEC_VERSION 2 #define VK_EXT_CONDITIONAL_RENDERING_EXTENSION_NAME "VK_EXT_conditional_rendering" typedef enum VkConditionalRenderingFlagBitsEXT { @@ -7679,7 +7684,7 @@ typedef struct VkPipelineRasterizationDepthClipStateCreateInfoEXT { #define VK_EXT_hdr_metadata 1 -#define VK_EXT_HDR_METADATA_SPEC_VERSION 1 +#define VK_EXT_HDR_METADATA_SPEC_VERSION 2 #define VK_EXT_HDR_METADATA_EXTENSION_NAME "VK_EXT_hdr_metadata" typedef struct VkXYColorEXT { float x; @@ -7863,7 +7868,7 @@ VKAPI_ATTR void VKAPI_CALL vkSubmitDebugUtilsMessageEXT( #define VK_EXT_sampler_filter_minmax 1 -#define VK_EXT_SAMPLER_FILTER_MINMAX_SPEC_VERSION 1 +#define VK_EXT_SAMPLER_FILTER_MINMAX_SPEC_VERSION 2 #define VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME "VK_EXT_sampler_filter_minmax" typedef enum VkSamplerReductionModeEXT { @@ -8446,6 +8451,15 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkAccelerationStructureNV) #define VK_NV_RAY_TRACING_EXTENSION_NAME "VK_NV_ray_tracing" #define VK_SHADER_UNUSED_NV (~0U) +typedef enum VkAccelerationStructureTypeNV { + VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV = 0, + VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV = 1, + VK_ACCELERATION_STRUCTURE_TYPE_BEGIN_RANGE_NV = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV, + VK_ACCELERATION_STRUCTURE_TYPE_END_RANGE_NV = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV, + VK_ACCELERATION_STRUCTURE_TYPE_RANGE_SIZE_NV = (VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV - VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV + 1), + VK_ACCELERATION_STRUCTURE_TYPE_MAX_ENUM_NV = 0x7FFFFFFF +} VkAccelerationStructureTypeNV; + typedef enum VkRayTracingShaderGroupTypeNV { VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV = 0, VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV = 1, @@ -8465,15 +8479,6 @@ typedef enum VkGeometryTypeNV { VK_GEOMETRY_TYPE_MAX_ENUM_NV = 0x7FFFFFFF } VkGeometryTypeNV; -typedef enum VkAccelerationStructureTypeNV { - VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV = 0, - VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV = 1, - VK_ACCELERATION_STRUCTURE_TYPE_BEGIN_RANGE_NV = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV, - VK_ACCELERATION_STRUCTURE_TYPE_END_RANGE_NV = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV, - VK_ACCELERATION_STRUCTURE_TYPE_RANGE_SIZE_NV = (VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV - VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV + 1), - VK_ACCELERATION_STRUCTURE_TYPE_MAX_ENUM_NV = 0x7FFFFFFF -} VkAccelerationStructureTypeNV; - typedef enum VkCopyAccelerationStructureModeNV { VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_NV = 0, VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_NV = 1, @@ -8741,7 +8746,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCompileDeferredNV( #define VK_NV_representative_fragment_test 1 -#define VK_NV_REPRESENTATIVE_FRAGMENT_TEST_SPEC_VERSION 1 +#define VK_NV_REPRESENTATIVE_FRAGMENT_TEST_SPEC_VERSION 2 #define VK_NV_REPRESENTATIVE_FRAGMENT_TEST_EXTENSION_NAME "VK_NV_representative_fragment_test" typedef struct VkPhysicalDeviceRepresentativeFragmentTestFeaturesNV { VkStructureType sType; @@ -8900,7 +8905,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetCalibratedTimestampsEXT( #define VK_AMD_shader_core_properties 1 -#define VK_AMD_SHADER_CORE_PROPERTIES_SPEC_VERSION 1 +#define VK_AMD_SHADER_CORE_PROPERTIES_SPEC_VERSION 2 #define VK_AMD_SHADER_CORE_PROPERTIES_EXTENSION_NAME "VK_AMD_shader_core_properties" typedef struct VkPhysicalDeviceShaderCorePropertiesAMD { VkStructureType sType; @@ -9090,7 +9095,7 @@ typedef struct VkPhysicalDeviceFragmentShaderBarycentricFeaturesNV { #define VK_NV_shader_image_footprint 1 -#define VK_NV_SHADER_IMAGE_FOOTPRINT_SPEC_VERSION 1 +#define VK_NV_SHADER_IMAGE_FOOTPRINT_SPEC_VERSION 2 #define VK_NV_SHADER_IMAGE_FOOTPRINT_EXTENSION_NAME "VK_NV_shader_image_footprint" typedef struct VkPhysicalDeviceShaderImageFootprintFeaturesNV { VkStructureType sType; @@ -9451,6 +9456,17 @@ typedef struct VkPhysicalDeviceShaderCoreProperties2AMD { +#define VK_AMD_device_coherent_memory 1 +#define VK_AMD_DEVICE_COHERENT_MEMORY_SPEC_VERSION 1 +#define VK_AMD_DEVICE_COHERENT_MEMORY_EXTENSION_NAME "VK_AMD_device_coherent_memory" +typedef struct VkPhysicalDeviceCoherentMemoryFeaturesAMD { + VkStructureType sType; + void* pNext; + VkBool32 deviceCoherentMemory; +} VkPhysicalDeviceCoherentMemoryFeaturesAMD; + + + #define VK_EXT_memory_budget 1 #define VK_EXT_MEMORY_BUDGET_SPEC_VERSION 1 #define VK_EXT_MEMORY_BUDGET_EXTENSION_NAME "VK_EXT_memory_budget" @@ -9538,15 +9554,16 @@ typedef struct VkImageStencilUsageCreateInfoEXT { #define VK_EXT_validation_features 1 -#define VK_EXT_VALIDATION_FEATURES_SPEC_VERSION 1 +#define VK_EXT_VALIDATION_FEATURES_SPEC_VERSION 2 #define VK_EXT_VALIDATION_FEATURES_EXTENSION_NAME "VK_EXT_validation_features" typedef enum VkValidationFeatureEnableEXT { VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT = 0, VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT = 1, + VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT = 2, VK_VALIDATION_FEATURE_ENABLE_BEGIN_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT, - VK_VALIDATION_FEATURE_ENABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT, - VK_VALIDATION_FEATURE_ENABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_RESERVE_BINDING_SLOT_EXT - VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT + 1), + VK_VALIDATION_FEATURE_ENABLE_END_RANGE_EXT = VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT, + VK_VALIDATION_FEATURE_ENABLE_RANGE_SIZE_EXT = (VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT - VK_VALIDATION_FEATURE_ENABLE_GPU_ASSISTED_EXT + 1), VK_VALIDATION_FEATURE_ENABLE_MAX_ENUM_EXT = 0x7FFFFFFF } VkValidationFeatureEnableEXT; diff --git a/icd/api/include/khronos/sdk-1.1/vulkan_fuchsia.h b/icd/api/include/khronos/sdk-1.1/vulkan_fuchsia.h index 4c62a7c2..81ebe55d 100644 --- a/icd/api/include/khronos/sdk-1.1/vulkan_fuchsia.h +++ b/icd/api/include/khronos/sdk-1.1/vulkan_fuchsia.h @@ -1,10 +1,6 @@ #ifndef VULKAN_FUCHSIA_H_ #define VULKAN_FUCHSIA_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* ** Copyright (c) 2015-2019 The Khronos Group Inc. ** @@ -27,6 +23,11 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + #define VK_FUCHSIA_imagepipe_surface 1 #define VK_FUCHSIA_IMAGEPIPE_SURFACE_SPEC_VERSION 1 diff --git a/icd/api/include/khronos/sdk-1.1/vulkan_ggp.h b/icd/api/include/khronos/sdk-1.1/vulkan_ggp.h index 3d67c4b8..fd306131 100644 --- a/icd/api/include/khronos/sdk-1.1/vulkan_ggp.h +++ b/icd/api/include/khronos/sdk-1.1/vulkan_ggp.h @@ -1,10 +1,6 @@ #ifndef VULKAN_GGP_H_ #define VULKAN_GGP_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* ** Copyright (c) 2015-2019 The Khronos Group Inc. ** @@ -27,6 +23,11 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + #define VK_GGP_stream_descriptor_surface 1 #define VK_GGP_STREAM_DESCRIPTOR_SURFACE_SPEC_VERSION 1 diff --git a/icd/api/include/khronos/sdk-1.1/vulkan_ios.h b/icd/api/include/khronos/sdk-1.1/vulkan_ios.h index 1846df52..72ef1a8a 100644 --- a/icd/api/include/khronos/sdk-1.1/vulkan_ios.h +++ b/icd/api/include/khronos/sdk-1.1/vulkan_ios.h @@ -1,10 +1,6 @@ #ifndef VULKAN_IOS_H_ #define VULKAN_IOS_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* ** Copyright (c) 2015-2019 The Khronos Group Inc. ** @@ -27,6 +23,11 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + #define VK_MVK_ios_surface 1 #define VK_MVK_IOS_SURFACE_SPEC_VERSION 2 diff --git a/icd/api/include/khronos/sdk-1.1/vulkan_macos.h b/icd/api/include/khronos/sdk-1.1/vulkan_macos.h index dca623b0..e6e5deaa 100644 --- a/icd/api/include/khronos/sdk-1.1/vulkan_macos.h +++ b/icd/api/include/khronos/sdk-1.1/vulkan_macos.h @@ -1,10 +1,6 @@ #ifndef VULKAN_MACOS_H_ #define VULKAN_MACOS_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* ** Copyright (c) 2015-2019 The Khronos Group Inc. ** @@ -27,6 +23,11 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + #define VK_MVK_macos_surface 1 #define VK_MVK_MACOS_SURFACE_SPEC_VERSION 2 diff --git a/icd/api/include/khronos/sdk-1.1/vulkan_metal.h b/icd/api/include/khronos/sdk-1.1/vulkan_metal.h index 16505237..3dec68c7 100644 --- a/icd/api/include/khronos/sdk-1.1/vulkan_metal.h +++ b/icd/api/include/khronos/sdk-1.1/vulkan_metal.h @@ -1,10 +1,6 @@ #ifndef VULKAN_METAL_H_ #define VULKAN_METAL_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* ** Copyright (c) 2015-2019 The Khronos Group Inc. ** @@ -27,6 +23,11 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + #define VK_EXT_metal_surface 1 diff --git a/icd/api/include/khronos/sdk-1.1/vulkan_vi.h b/icd/api/include/khronos/sdk-1.1/vulkan_vi.h index 50aa27df..6fb66f9d 100644 --- a/icd/api/include/khronos/sdk-1.1/vulkan_vi.h +++ b/icd/api/include/khronos/sdk-1.1/vulkan_vi.h @@ -1,10 +1,6 @@ #ifndef VULKAN_VI_H_ #define VULKAN_VI_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* ** Copyright (c) 2015-2019 The Khronos Group Inc. ** @@ -27,6 +23,11 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + #define VK_NN_vi_surface 1 #define VK_NN_VI_SURFACE_SPEC_VERSION 1 diff --git a/icd/api/include/khronos/sdk-1.1/vulkan_wayland.h b/icd/api/include/khronos/sdk-1.1/vulkan_wayland.h index 12a5f045..599d05b2 100644 --- a/icd/api/include/khronos/sdk-1.1/vulkan_wayland.h +++ b/icd/api/include/khronos/sdk-1.1/vulkan_wayland.h @@ -1,10 +1,6 @@ #ifndef VULKAN_WAYLAND_H_ #define VULKAN_WAYLAND_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* ** Copyright (c) 2015-2019 The Khronos Group Inc. ** @@ -27,6 +23,11 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + #define VK_KHR_wayland_surface 1 #define VK_KHR_WAYLAND_SURFACE_SPEC_VERSION 6 diff --git a/icd/api/include/khronos/sdk-1.1/vulkan_win32.h b/icd/api/include/khronos/sdk-1.1/vulkan_win32.h index a61a7d88..20a1dc0e 100644 --- a/icd/api/include/khronos/sdk-1.1/vulkan_win32.h +++ b/icd/api/include/khronos/sdk-1.1/vulkan_win32.h @@ -1,10 +1,6 @@ #ifndef VULKAN_WIN32_H_ #define VULKAN_WIN32_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* ** Copyright (c) 2015-2019 The Khronos Group Inc. ** @@ -27,6 +23,11 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + #define VK_KHR_win32_surface 1 #define VK_KHR_WIN32_SURFACE_SPEC_VERSION 6 @@ -246,7 +247,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetMemoryWin32HandleNV( #define VK_NV_win32_keyed_mutex 1 -#define VK_NV_WIN32_KEYED_MUTEX_SPEC_VERSION 1 +#define VK_NV_WIN32_KEYED_MUTEX_SPEC_VERSION 2 #define VK_NV_WIN32_KEYED_MUTEX_EXTENSION_NAME "VK_NV_win32_keyed_mutex" typedef struct VkWin32KeyedMutexAcquireReleaseInfoNV { VkStructureType sType; @@ -263,7 +264,7 @@ typedef struct VkWin32KeyedMutexAcquireReleaseInfoNV { #define VK_EXT_full_screen_exclusive 1 -#define VK_EXT_FULL_SCREEN_EXCLUSIVE_SPEC_VERSION 3 +#define VK_EXT_FULL_SCREEN_EXCLUSIVE_SPEC_VERSION 4 #define VK_EXT_FULL_SCREEN_EXCLUSIVE_EXTENSION_NAME "VK_EXT_full_screen_exclusive" typedef enum VkFullScreenExclusiveEXT { diff --git a/icd/api/include/khronos/sdk-1.1/vulkan_xcb.h b/icd/api/include/khronos/sdk-1.1/vulkan_xcb.h index 7d6905d2..4cc0bc0c 100644 --- a/icd/api/include/khronos/sdk-1.1/vulkan_xcb.h +++ b/icd/api/include/khronos/sdk-1.1/vulkan_xcb.h @@ -1,10 +1,6 @@ #ifndef VULKAN_XCB_H_ #define VULKAN_XCB_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* ** Copyright (c) 2015-2019 The Khronos Group Inc. ** @@ -27,6 +23,11 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + #define VK_KHR_xcb_surface 1 #define VK_KHR_XCB_SURFACE_SPEC_VERSION 6 diff --git a/icd/api/include/khronos/sdk-1.1/vulkan_xlib.h b/icd/api/include/khronos/sdk-1.1/vulkan_xlib.h index 7a05d297..ee2b48ac 100644 --- a/icd/api/include/khronos/sdk-1.1/vulkan_xlib.h +++ b/icd/api/include/khronos/sdk-1.1/vulkan_xlib.h @@ -1,10 +1,6 @@ #ifndef VULKAN_XLIB_H_ #define VULKAN_XLIB_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* ** Copyright (c) 2015-2019 The Khronos Group Inc. ** @@ -27,6 +23,11 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + #define VK_KHR_xlib_surface 1 #define VK_KHR_XLIB_SURFACE_SPEC_VERSION 6 diff --git a/icd/api/include/khronos/sdk-1.1/vulkan_xlib_xrandr.h b/icd/api/include/khronos/sdk-1.1/vulkan_xlib_xrandr.h index 3a209530..08c4fd72 100644 --- a/icd/api/include/khronos/sdk-1.1/vulkan_xlib_xrandr.h +++ b/icd/api/include/khronos/sdk-1.1/vulkan_xlib_xrandr.h @@ -1,10 +1,6 @@ #ifndef VULKAN_XLIB_XRANDR_H_ #define VULKAN_XLIB_XRANDR_H_ 1 -#ifdef __cplusplus -extern "C" { -#endif - /* ** Copyright (c) 2015-2019 The Khronos Group Inc. ** @@ -27,6 +23,11 @@ extern "C" { */ +#ifdef __cplusplus +extern "C" { +#endif + + #define VK_EXT_acquire_xlib_display 1 #define VK_EXT_ACQUIRE_XLIB_DISPLAY_SPEC_VERSION 1 diff --git a/icd/api/include/khronos/vulkan.h b/icd/api/include/khronos/vulkan.h index db50d603..1836dda9 100644 --- a/icd/api/include/khronos/vulkan.h +++ b/icd/api/include/khronos/vulkan.h @@ -55,8 +55,6 @@ #include "devext/vk_amd_gpa_interface.h" -#include "devext/vk_amd_device_coherent_memory.h" - enum class DynamicStatesInternal : uint32_t { VIEWPORT = 0, SCISSOR, @@ -70,6 +68,7 @@ enum class DynamicStatesInternal : uint32_t { VIEWPORT_W_SCALING_NV, DISCARD_RECTANGLE_EXT, SAMPLE_LOCATIONS_EXT, + LINE_STIPPLE_EXT, DynamicStatesInternalCount }; diff --git a/icd/api/include/pipeline_binary_cache.h b/icd/api/include/pipeline_binary_cache.h new file mode 100644 index 00000000..554e6ade --- /dev/null +++ b/icd/api/include/pipeline_binary_cache.h @@ -0,0 +1,196 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2018-2019 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** +*********************************************************************************************************************** +* @file pipeline_binary_cache.h +* @brief Declaration of Vulkan interface for a PAL layered cache specializing in pipeline binaries +*********************************************************************************************************************** +*/ + +#include "pipeline_compiler.h" + +#include "palHashMap.h" +#include "palMetroHash.h" +#include "palVector.h" +#include "palCacheLayer.h" + +namespace Util +{ +class IPlatformKey; +} // namespace Util + +namespace vk +{ + +// Unified pipeline cache interface +class PipelineBinaryCache +{ +public: + using CacheId = Util::MetroHash::Hash; + + static PipelineBinaryCache* Create( + Instance* pInstance, + size_t initDataSize, + const void* pInitData, + bool internal, + const Llpc::GfxIpVersion& gfxIp, + const PhysicalDevice* pPhysicalDevice); + + VkResult Initialize( + const PhysicalDevice* pPhysicalDevice, + size_t initDataSize, + const void* pInitData); + + Util::Result QueryPipelineBinary( + const CacheId* pCacheId, + Util::QueryResult* pQuery); + + Util::Result LoadPipelineBinary( + const CacheId* pCacheId, + size_t* pPipelineBinarySize, + const void** ppPipelineBinary); + + Util::Result StorePipelineBinary( + const CacheId* pCacheId, + size_t pipelineBinarySize, + const void* pPipelineBinary); + +#if ICD_GPUOPEN_DEVMODE_BUILD + Util::Result LoadReinjectionBinary( + const CacheId* pInternalPipelineHash, + size_t* pPipelineBinarySize, + const void** ppPipelineBinary); + + Util::Result StoreReinjectionBinary( + const CacheId* pInternalPipelineHash, + size_t pipelineBinarySize, + const void* pPipelineBinary); + + using HashMapping = Util::HashMap; + + void RegisterHashMapping( + const Pal::PipelineHash* pInternalPipelineHash, + const CacheId* pCacheId); + + CacheId* GetCacheIdForPipeline( + const Pal::PipelineHash* pInternalPipelineHash); + + VK_INLINE HashMapping::Iterator GetHashMappingIterator() + { return m_hashMapping.Begin(); } + + VK_INLINE Util::RWLock* GetHashMappingLock() + { return &m_hashMappingLock; } +#endif + + void FreePipelineBinary(const void* pPipelineBinary); + + void Destroy() { this->~PipelineBinaryCache(); } + +private: + + PAL_DISALLOW_DEFAULT_CTOR(PipelineBinaryCache); + PAL_DISALLOW_COPY_AND_ASSIGN(PipelineBinaryCache); + + explicit PipelineBinaryCache( + Instance* pInstance, + const Llpc::GfxIpVersion& gfxIp, + bool internal); + ~PipelineBinaryCache(); + + VkResult InitializePlatformKey( + const PhysicalDevice* pPhysicalDevice, + const RuntimeSettings& settings); + + VkResult OrderLayers( + const RuntimeSettings& settings); + + VkResult AddLayerToChain( + Util::ICacheLayer* pLayer, + Util::ICacheLayer** pBottomLayer); + + VkResult InitLayers( + const PhysicalDevice* pPhysicalDevice, + size_t initDataSize, + const void* pInitData, + bool internal, + const RuntimeSettings& settings); + +#if ICD_GPUOPEN_DEVMODE_BUILD + VkResult InitReinjectionLayer( + const RuntimeSettings& settings); + + Util::Result InjectBinariesFromDirectory( + const RuntimeSettings& settings); +#endif + + VkResult InitMemoryCacheLayer( + const RuntimeSettings& settings); + + VkResult InitArchiveLayers( + const PhysicalDevice* pPhysicalDevice, + const RuntimeSettings& settings); + + Util::IArchiveFile* OpenReadOnlyArchive(const char* path, const char* fileName, size_t bufferSize); + Util::IArchiveFile* OpenWritableArchive(const char* path, const char* fileName, size_t bufferSize); + Util::ICacheLayer* CreateFileLayer(Util::IArchiveFile* pFile); + + // Override the driver's default location + static constexpr char EnvVarPath[] = "AMD_VK_PIPELINE_CACHE_PATH"; + + // Override the driver's default name (Hash of application name) + static constexpr char EnvVarFileName[] = "AMD_VK_PIPELINE_CACHE_FILENAME"; + + // Filename of an additional, read-only archive + static constexpr char EnvVarReadOnlyFileName[] = "AMD_VK_PIPELINE_CACHE_READ_ONLY_FILENAME"; + + static const uint32_t ArchiveType; // TypeId created by hashed string VK_SHADER_PIPELINE_CACHE + static const uint32_t ElfType; // TypeId created by hashed string VK_PIPELINE_ELF + + Llpc::GfxIpVersion m_gfxIp; // Compared against e_flags of reinjected elf files + + Instance* const m_pInstance; // Allocator for use when interacting with the cache + Util::IPlatformKey* m_pPlatformKey; // Platform identifying key + Util::ICacheLayer* m_pTopLayer; // Top layer of the cache chain where queries are submitted + +#if ICD_GPUOPEN_DEVMODE_BUILD + Util::ICacheLayer* m_pReinjectionLayer; // Reinjection interface layer + + HashMapping m_hashMapping; // Maps the internalPipelineHash to the appropriate CacheId + Util::RWLock m_hashMappingLock; // Prevents collisions during writes to the map +#endif + + Util::ICacheLayer* m_pMemoryLayer; + + // Archive based cache layers + using FileVector = Util::Vector; + using LayerVector = Util::Vector; + Util::ICacheLayer* m_pArchiveLayer; // Top of a chain of loaded archives. + FileVector m_openFiles; + LayerVector m_archiveLayers; + + bool m_isInternalCache; +}; + +} // namespace vk diff --git a/icd/api/include/pipeline_compiler.h b/icd/api/include/pipeline_compiler.h index 5c40f207..af4b014c 100644 --- a/icd/api/include/pipeline_compiler.h +++ b/icd/api/include/pipeline_compiler.h @@ -51,6 +51,8 @@ class PipelineCompiler; struct VbBindingInfo; struct ShaderModuleHandle; +class PipelineBinaryCache; + // ===================================================================================================================== class PipelineCompiler { @@ -73,11 +75,17 @@ class PipelineCompiler PipelineCompilerType GetShaderCacheType(); + static void ApplyPipelineOptions( + const Device* pDevice, + VkPipelineCreateFlags flags, + Llpc::PipelineOptions* pOptions); + VkResult BuildShaderModule( - const Device* pDevice, - size_t codeSize, - const void* pCode, - ShaderModuleHandle* pModule); + const Device* pDevice, + VkShaderModuleCreateFlags flags, + size_t codeSize, + const void* pCode, + ShaderModuleHandle* pModule); VkResult CreateGraphicsPipelineBinary( Device* pDevice, @@ -131,6 +139,15 @@ class PipelineCompiler void FreeGraphicsPipelineCreateInfo(GraphicsPipelineCreateInfo* pCreateInfo); +#if ICD_GPUOPEN_DEVMODE_BUILD + Util::Result RegisterAndLoadReinjectionBinary( + const Pal::PipelineHash* pInternalPipelineHash, + const Util::MetroHash::Hash* pCacheId, + size_t* pBinarySize, + const void** ppPipelineBinary, + PipelineCache* pPipelineCache = nullptr); +#endif + template PipelineCompilerType CheckCompilerType(const PipelineBuildInfo* pPipelineBuildInfo); @@ -154,6 +171,8 @@ class PipelineCompiler , Llpc::NggState* pNggState ); + void GetElfCacheMetricString(char* pOutStr, size_t outStrSize); + template bool ReplacePipelineBinary( const PipelineBuildInfo* pPipelineBuildInfo, @@ -189,6 +208,17 @@ class PipelineCompiler CompilerSolutionLlpc m_compilerSolutionLlpc; + // PipelineBinaryCache is only available for closed source at this time. + // PipelineBinaryCache is only enabled for Windows at this time. + PipelineBinaryCache* m_pBinaryCache; // Pipeline binary cache object + + // Metrics + uint32_t m_cacheAttempts; // Number of attempted cache loads + uint32_t m_cacheHits; // Number of cache hits + uint32_t m_totalBinaries; // Total number of binaries compiled or fetched + int64_t m_totalTimeSpent; // Accumulation of time spent either loading or compiling pipeline + // binaries + void GetPipelineCreationInfoNext( const VkStructHeader* pHeader, const VkPipelineCreationFeedbackCreateInfoEXT** ppPipelineCreationFeadbackCreateInfo); diff --git a/icd/api/include/render_state_cache.h b/icd/api/include/render_state_cache.h index 84e83c17..ff61d148 100644 --- a/icd/api/include/render_state_cache.h +++ b/icd/api/include/render_state_cache.h @@ -146,6 +146,9 @@ class RenderStateCache uint32_t CreateComputeWaveLimits(const Pal::DynamicComputeShaderInfo& waveLimits); void DestroyComputeWaveLimits(const Pal::DynamicComputeShaderInfo& waveLimits, uint32_t token); + uint32_t CreateLineStipple(const Pal::LineStippleStateParams& params); + void DestroyLineStipple(const Pal::LineStippleStateParams& params, uint32_t token); + void Destroy(); private: @@ -274,6 +277,11 @@ class RenderStateCache PalAllocator> m_pointLineRasterState; uint32_t m_pointLineRasterStateNextId; + Util::HashMap m_lineStippleState; + uint32_t m_lineStippleStateNextId; + Util::HashMap m_depthBias; diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h index bd36874a..05eb0bbe 100644 --- a/icd/api/include/vk_cmdbuffer.h +++ b/icd/api/include/vk_cmdbuffer.h @@ -151,6 +151,7 @@ struct AllGpuRenderState uint32_t inputAssemblyState; uint32_t triangleRasterState; uint32_t pointLineRasterState; + uint32_t lineStippleState; uint32_t depthBiasState; uint32_t blendConst; uint32_t depthBounds; @@ -181,6 +182,7 @@ struct AllGpuRenderState Pal::ScissorRectParams scissor; Pal::ViewportParams viewport; + Pal::LineStippleStateParams lineStipple; }; // This structure describes current render state within a command buffer during its building. @@ -424,6 +426,10 @@ class CmdBuffer const Pal::ScissorRectParams& params, uint32_t staticToken); + void SetLineStippleEXT( + const Pal::LineStippleStateParams& params, + uint32_t staticToken); + void SetLineWidth( float lineWidth); @@ -566,6 +572,10 @@ class CmdBuffer uint32_t counterOffset, uint32_t vertexStride); + void SetLineStippleEXT( + uint32_t lineStippleFactor, + uint16_t lineStipplePattern); + VK_INLINE void SetDeviceMask(uint32_t deviceMask) { // Ensure we are enabling valid devices within the group diff --git a/icd/api/include/vk_device.h b/icd/api/include/vk_device.h index 8054fab8..69986635 100644 --- a/icd/api/include/vk_device.h +++ b/icd/api/include/vk_device.h @@ -89,6 +89,7 @@ class Queue; class SqttMgr; class SwapChain; class ChillMgr; +class AsyncLayer; // ===================================================================================================================== // Specifies properties for importing a semaphore, it's an encapsulation of VkImportSemaphoreFdInfoKHR and @@ -498,6 +499,10 @@ class Device VK_INLINE BarrierFilterLayer* GetBarrierFilterLayer() { return m_pBarrierFilterLayer; } + VK_INLINE AsyncLayer* GetAsyncLayer() + { + return m_pAsyncLayer; + } VK_INLINE Util::Mutex* GetMemoryMutex() { return &m_memoryMutex; } @@ -604,6 +609,7 @@ class Device const DeviceExtensions::Enabled m_enabledExtensions; // Enabled device extensions DispatchTable m_dispatchTable; // Device dispatch table SqttMgr* m_pSqttMgr; // Manager for developer mode SQ thread tracing + AsyncLayer* m_pAsyncLayer; // State for async compiler layer, otherwise null OptLayer* m_pAppOptLayer; // State for an app-specific layer, otherwise null BarrierFilterLayer* m_pBarrierFilterLayer; // State for enabling barrier filtering, otherwise null Util::Mutex m_memoryMutex; // Shared mutex used occasionally by memory objects @@ -937,6 +943,11 @@ VKAPI_ATTR VkResult VKAPI_CALL vkSetDebugUtilsObjectTagEXT( VkDevice device, const VkDebugUtilsObjectTagInfoEXT* pTagInfo); +VKAPI_ATTR void VKAPI_CALL vkCmdSetLineStippleEXT( + VkCommandBuffer commandBuffer, + uint32_t lineStippleFactor, + uint16_t lineStipplePattern); + } // namespace entry } // namespace vk diff --git a/icd/api/include/vk_dispatch.h b/icd/api/include/vk_dispatch.h index b76d19e2..e22b9eea 100644 --- a/icd/api/include/vk_dispatch.h +++ b/icd/api/include/vk_dispatch.h @@ -117,11 +117,12 @@ class DispatchTable m_table[index] = func; } -protected: bool CheckAPIVersion(uint32_t apiVersion); bool CheckInstanceExtension(InstanceExtensions::ExtensionId id); bool CheckDeviceExtension(DeviceExtensions::ExtensionId id); +protected: + union { EntryPoints m_func; diff --git a/icd/api/include/vk_extensions.h b/icd/api/include/vk_extensions.h index b3ceeb75..49015c37 100644 --- a/icd/api/include/vk_extensions.h +++ b/icd/api/include/vk_extensions.h @@ -258,6 +258,7 @@ class DeviceExtensions : public Extensions KHR_GET_MEMORY_REQUIREMENTS2, KHR_IMAGE_FORMAT_LIST, KHR_SWAPCHAIN_MUTABLE_FORMAT, + KHR_SHADER_FLOAT_CONTROLS, EXT_INLINE_UNIFORM_BLOCK, KHR_SHADER_ATOMIC_INT64, KHR_DRIVER_PROPERTIES, @@ -297,6 +298,7 @@ class DeviceExtensions : public Extensions EXT_HOST_QUERY_RESET, EXT_BUFFER_DEVICE_ADDRESS, EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, + EXT_LINE_RASTERIZATION, KHR_UNIFORM_BUFFER_STANDARD_LAYOUT, EXT_SUBGROUP_SIZE_CONTROL, diff --git a/icd/api/include/vk_graphics_pipeline.h b/icd/api/include/vk_graphics_pipeline.h index 6a278689..4fb1a937 100644 --- a/icd/api/include/vk_graphics_pipeline.h +++ b/icd/api/include/vk_graphics_pipeline.h @@ -213,6 +213,7 @@ class GraphicsPipeline : public Pipeline, public NonDispatchable // Image needs to be a friend class to be able to create wrapper API memory objects friend class Image; - bool m_allocationCounted; - uint32_t m_sizeAccountedForDeviceMask; - Pal::IImage* m_pExternalPalImage; - uint32_t m_primaryDeviceIndex; + bool m_allocationCounted; + uint32_t m_sizeAccountedForDeviceMask; + Pal::IImage* m_pExternalPalImage; + uint32_t m_primaryDeviceIndex; // Cache the handle of GPU memory which is on the first device, if the Gpumemory can be inter-process sharing. Pal::OsExternalHandle m_sharedGpuMemoryHandle; + // m_handleCloseNeeded indicates if m_sharedGpuMemoryHandle should be closed. + // m_handleCloseNeeded is true in two cases : + // 1. When m_sharedGpuMemoryHandle is shared via NtHandle and is not externally opended; + // Or 2. When m_sharedGpuMemoryHandle is imported based on a name rather than a handle; // Marks that the logical device's allocation count is incremented and needs to be decremented during the // destruction of this memory object. diff --git a/icd/api/include/vk_physical_device.h b/icd/api/include/vk_physical_device.h index 423814e0..161393df 100644 --- a/icd/api/include/vk_physical_device.h +++ b/icd/api/include/vk_physical_device.h @@ -238,12 +238,56 @@ class PhysicalDevice return Util::WideBitfieldIsSet(m_formatFeatureMsaaTarget, formatIndex); } - void GetPhysicalDeviceIDProperties( - uint8_t* pDeviceUUID, - uint8_t* pDriverUUID, - uint8_t* pDeviceLUID, - uint32_t* pDeviceNodeMask, - VkBool32* pDeviceLUIDValid) const; + VK_INLINE void GetPhysicalDeviceIDProperties( + uint8_t* pDeviceUUID, + uint8_t* pDriverUUID, + uint8_t* pDeviceLUID, + uint32_t* pDeviceNodeMask, + VkBool32* pDeviceLUIDValid) const; + + VK_INLINE void GetPhysicalDeviceMaintenance3Properties( + uint32_t* pMaxPerSetDescriptors, + VkDeviceSize* pMaxMemoryAllocationSize) const; + + VK_INLINE void GetPhysicalDeviceMultiviewProperties( + uint32_t* pMaxMultiviewViewCount, + uint32_t* pMaxMultiviewInstanceIndex) const; + + VK_INLINE void GetPhysicalDevicePointClippingProperties( + VkPointClippingBehavior* pPointClippingBehavior) const; + + VK_INLINE void GetPhysicalDeviceProtectedMemoryProperties( + VkBool32* pProtectedNoFault) const; + + VK_INLINE void GetPhysicalDeviceSubgroupProperties( + uint32_t* pSubgroupSize, + VkShaderStageFlags* pSupportedStages, + VkSubgroupFeatureFlags* pSupportedOperations, + VkBool32* pQuadOperationsInAllStages) const; + + VK_INLINE void GetPhysicalDeviceDriverProperties( + VkDriverIdKHR* pDriverID, + char* pDriverName, + char* pDriverInfo, + VkConformanceVersionKHR* pConformanceVersion) const; + + template + VK_INLINE void GetPhysicalDeviceFloatControlsProperties( + T pFloatControlsProperties) const; + + template + VK_INLINE void GetPhysicalDeviceDescriptorIndexingProperties( + T pDescriptorIndexingProperties) const; + + VK_INLINE void GetPhysicalDeviceDepthStencilResolveProperties( + VkResolveModeFlagsKHR* pSupportedDepthResolveModes, + VkResolveModeFlagsKHR* pSupportedStencilResolveModes, + VkBool32* pIndependentResolveNone, + VkBool32* pIndependentResolve) const; + + VK_INLINE void GetPhysicalDeviceSamplerFilterMinmaxProperties( + VkBool32* pFilterMinmaxSingleComponentFormats, + VkBool32* pFilterMinmaxImageComponentMapping) const; VkResult GetExternalMemoryProperties( bool isSparse, @@ -267,6 +311,75 @@ class PhysicalDevice uint32_t* pPropertyCount, utils::ArrayView properties) const; + VK_INLINE void GetPhysicalDevice16BitStorageFeatures( + VkBool32* pStorageBuffer16BitAccess, + VkBool32* pUniformAndStorageBuffer16BitAccess, + VkBool32* pStoragePushConstant16, + VkBool32* pStorageInputOutput16) const; + + VK_INLINE void GetPhysicalDeviceMultiviewFeatures( + VkBool32* pMultiview, + VkBool32* pMultiviewGeometryShader, + VkBool32* pMultiviewTessellationShader) const; + + VK_INLINE void GetPhysicalDeviceVariablePointerFeatures( + VkBool32* pVariablePointersStorageBuffer, + VkBool32* pVariablePointers) const; + + VK_INLINE void GetPhysicalDeviceProtectedMemoryFeatures( + VkBool32* pProtectedMemory) const; + + VK_INLINE void GetPhysicalDeviceSamplerYcbcrConversionFeatures( + VkBool32* pSamplerYcbcrConversion) const; + + VK_INLINE void GetPhysicalDeviceShaderDrawParameterFeatures( + VkBool32* pShaderDrawParameters) const; + + VK_INLINE void GetPhysicalDevice8BitStorageFeatures( + VkBool32* pStorageBuffer8BitAccess, + VkBool32* pUniformAndStorageBuffer8BitAccess, + VkBool32* pStoragePushConstant8) const; + + VK_INLINE void GetPhysicalDeviceShaderAtomicInt64Features( + VkBool32* pShaderBufferInt64Atomics, + VkBool32* pShaderSharedInt64Atomics) const; + + VK_INLINE void GetPhysicalDeviceFloat16Int8Features( + VkBool32* pShaderFloat16, + VkBool32* pShaderInt8) const; + + template + VK_INLINE void GetPhysicalDeviceDescriptorIndexingFeatures( + T pDescriptorIndexingFeatures) const; + + VK_INLINE void GetPhysicalDeviceScalarBlockLayoutFeatures( + VkBool32* pScalarBlockLayout) const; + + VK_INLINE void GetPhysicalDeviceImagelessFramebufferFeatures( + VkBool32* pImagelessFramebuffer) const; + + VK_INLINE void GetPhysicalDeviceUniformBufferStandardLayoutFeatures( + VkBool32* pUniformBufferStandardLayout) const; + + VK_INLINE void GetPhysicalDeviceSubgroupExtendedTypesFeatures( + VkBool32* pShaderSubgroupExtendedTypes) const; + + VK_INLINE void GetPhysicalDeviceSeparateDepthStencilLayoutsFeatures( + VkBool32* pSeparateDepthStencilLayouts) const; + + VK_INLINE void GetPhysicalDeviceHostQueryResetFeatures( + VkBool32* pHostQueryReset) const; + + VK_INLINE void GetPhysicalDeviceBufferAddressFeatures( + VkBool32* pBufferDeviceAddress, + VkBool32* pBufferDeviceAddressCaptureReplay, + VkBool32* pBufferDeviceAddressMultiDevice) const; + + VK_INLINE void GetPhysicalDeviceVulkanMemoryModelFeatures( + VkBool32* pVulkanMemoryModel, + VkBool32* pVulkanMemoryModelDeviceScope, + VkBool32* pVulkanMemoryModelAvailabilityVisibilityChains) const; + VkResult GetPhysicalDeviceCalibrateableTimeDomainsEXT( uint32_t* pTimeDomainCount, VkTimeDomainEXT* pTimeDomains); @@ -480,6 +593,9 @@ class PhysicalDevice VK_INLINE const DeviceExtensions::Supported& GetSupportedExtensions() const { return m_supportedExtensions; } + VK_INLINE const DeviceExtensions::Supported& GetAllowedExtensions() const + { return m_allowedExtensions; } + VK_INLINE bool IsExtensionSupported(DeviceExtensions::ExtensionId id) const { return m_supportedExtensions.IsExtensionSupported(id); } @@ -574,6 +690,7 @@ class PhysicalDevice bool m_prtOnDmaSupported; DeviceExtensions::Supported m_supportedExtensions; + DeviceExtensions::Supported m_allowedExtensions; // Device properties related to the VK_AMD_gpu_perf_api_interface extension PhysicalDeviceGpaProperties m_gpaProps; diff --git a/icd/api/include/vk_pipeline.h b/icd/api/include/vk_pipeline.h index 23bb4f1f..42024085 100644 --- a/icd/api/include/vk_pipeline.h +++ b/icd/api/include/vk_pipeline.h @@ -82,7 +82,8 @@ struct UserDataLayout }; // Structure containing information about a retrievable pipeline binary. These are only retained by Pipeline objects -// when specific device extensions (VK_AMD_shader_info) that can query them are enabled. +// when specific device extensions (VK_AMD_shader_info/VK_KHR_pipeline_executable_properties) that can query them are +// enabled. struct PipelineBinaryInfo { static PipelineBinaryInfo* Create(size_t size, const void* pBinary, const VkAllocationCallbacks* pAllocator); diff --git a/icd/api/include/vk_pipeline_cache.h b/icd/api/include/vk_pipeline_cache.h index 567093b5..50d6f210 100644 --- a/icd/api/include/vk_pipeline_cache.h +++ b/icd/api/include/vk_pipeline_cache.h @@ -77,9 +77,11 @@ class PipelineCache : public NonDispatchable VkResult Merge(uint32_t srcCacheCount, const PipelineCache** ppSrcCaches); + VK_INLINE PipelineBinaryCache* GetPipelineCache() { return m_pBinaryCache; } protected: PipelineCache(const Device* pDevice, - ShaderCache* pShaderCaches + ShaderCache* pShaderCaches, + PipelineBinaryCache* pBinaryCache ); virtual ~PipelineCache(); @@ -87,6 +89,7 @@ class PipelineCache : public NonDispatchable const Device*const m_pDevice; ShaderCache m_shaderCaches[MaxPalDevices]; + PipelineBinaryCache* m_pBinaryCache; // Pipeline binary cache object }; namespace entry diff --git a/icd/api/include/vk_semaphore.h b/icd/api/include/vk_semaphore.h index 96380f22..a4035dc7 100644 --- a/icd/api/include/vk_semaphore.h +++ b/icd/api/include/vk_semaphore.h @@ -58,7 +58,7 @@ class Semaphore : public NonDispatchable static VkResult PopulateInDeviceGroup( Device* pDevice, Pal::IQueueSemaphore* pPalSemaphores[MaxPalDevices], - int32_t* pSemaphoreCount); + uint32_t* pSemaphoreCount); VkResult ImportSemaphore( Device* pDevice, @@ -80,44 +80,23 @@ class Semaphore : public NonDispatchable Semaphore* pSemaphore, uint64_t value); - VK_FORCEINLINE Pal::IQueueSemaphore* PalSemaphore(uint32_t deviceIdx) const - { - return m_pPalSemaphores[deviceIdx]; - } - - VK_FORCEINLINE Pal::IQueueSemaphore* PalTemporarySemaphore(uint32_t deviceIdx) const - { - return m_pPalTemporarySemaphores[deviceIdx]; - } + void SetTemporarySemaphore( + Pal::IQueueSemaphore* pPalImportedSemaphore[], + uint32_t semaphoreCount, + Pal::OsExternalHandle importedHandle); - VK_FORCEINLINE void ClearTemporarySemaphore() - { - memset(m_pPalTemporarySemaphores, 0, sizeof(m_pPalTemporarySemaphores)); - - } + void SetSemaphore( + Pal::IQueueSemaphore* pPalImportedSemaphore[], + uint32_t semaphoreCount, + Pal::OsExternalHandle importedHandle); - VK_FORCEINLINE void SetTemporarySemaphore( - Pal::IQueueSemaphore* pPalTemporarySemaphore[], - int32_t semaphoreCount, - Pal::OsExternalHandle tempHandle) - { - for (int32_t i = 0; i < semaphoreCount; i++) - { - m_pPalTemporarySemaphores[i] = pPalTemporarySemaphore[i]; - } - for (uint32_t i = semaphoreCount; i < MaxPalDevices; i++) - { - m_pPalTemporarySemaphores[i] = nullptr; - } - m_sharedSemaphoreTempHandle = tempHandle; - } + void DestroySemaphore( + const Device* pDevice); - VK_FORCEINLINE Pal::OsExternalHandle GetHandle() const - { - return (m_sharedSemaphoreTempHandle == 0) ? m_sharedSemaphoreHandle : m_sharedSemaphoreTempHandle; - } + void DestroyTemporarySemaphore( + const Device* pDevice); - VkResult Destroy( + void Destroy( const Device* pDevice, const VkAllocationCallbacks* pAllocator); @@ -126,6 +105,24 @@ class Semaphore : public NonDispatchable VkExternalSemaphoreHandleTypeFlagBits handleType, Pal::OsExternalHandle* pHandle); + VK_FORCEINLINE Pal::IQueueSemaphore* PalSemaphore(uint32_t deviceIdx) const + { + return m_useTempSemaphore ? m_pPalTemporarySemaphores[deviceIdx] : m_pPalSemaphores[deviceIdx]; + } + + VK_FORCEINLINE Pal::OsExternalHandle GetHandle() const + { + return (m_useTempSemaphore) ? m_sharedSemaphoreTempHandle : m_sharedSemaphoreHandle; + } + + VK_FORCEINLINE void RestoreSemaphore() + { + if (m_useTempSemaphore) + { + m_useTempSemaphore = false; + } + } + VK_FORCEINLINE bool IsTimelineSemaphore() const { #if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 458 @@ -138,15 +135,16 @@ class Semaphore : public NonDispatchable private: Semaphore( Pal::IQueueSemaphore* pPalSemaphore[], - int32_t semaphoreCount, + uint32_t semaphoreCount, const Pal::QueueSemaphoreCreateInfo& palCreateInfo, Pal::OsExternalHandle sharedSemaphorehandle) : + m_palCreateInfo(palCreateInfo), + m_useTempSemaphore(false), m_sharedSemaphoreHandle(sharedSemaphorehandle), - m_sharedSemaphoreTempHandle(0), - m_palCreateInfo(palCreateInfo) + m_sharedSemaphoreTempHandle(0) { - for (int32_t i = 0; i < semaphoreCount; i++) + for (uint32_t i = 0; i < semaphoreCount; i++) { m_pPalSemaphores[i] = pPalSemaphore[i]; } @@ -155,21 +153,24 @@ class Semaphore : public NonDispatchable m_pPalSemaphores[i] = nullptr; } - ClearTemporarySemaphore(); + memset(m_pPalTemporarySemaphores, 0, sizeof(m_pPalTemporarySemaphores)); } - Pal::IQueueSemaphore* m_pPalSemaphores[MaxPalDevices]; + Pal::QueueSemaphoreCreateInfo m_palCreateInfo; + Pal::IQueueSemaphore* m_pPalSemaphores[MaxPalDevices]; // Temporary-completion semaphore special for swapchain // which will be associated with a signaled semaphore // in AcquireNextImage. Pal::IQueueSemaphore* m_pPalTemporarySemaphores[MaxPalDevices]; + // m_useTempSemaphore indicates whether temporary Semaphore is in use. + bool m_useTempSemaphore; // For now the m_sharedSemaphoreHandle and m_sharedSemaphoreTempHandle are only used by Windows driver to cache the // semaphore's handle when the semaphore object is creating. Pal::OsExternalHandle m_sharedSemaphoreHandle; Pal::OsExternalHandle m_sharedSemaphoreTempHandle; - Pal::QueueSemaphoreCreateInfo m_palCreateInfo; + }; namespace entry diff --git a/icd/api/include/vk_shader.h b/icd/api/include/vk_shader.h index f6fba708..014863f4 100644 --- a/icd/api/include/vk_shader.h +++ b/icd/api/include/vk_shader.h @@ -36,6 +36,9 @@ namespace Pal { enum class ResourceMappingNodeType : Pal::uint32; } +// NOTE: Internal shader module create flag, please modify it if it conflict with vulkan header files. +#define VK_SHADER_MODULE_ENABLE_OPT_BIT 0x40000000u + namespace vk { @@ -78,7 +81,7 @@ class ShaderModule : public NonDispatchable protected: ShaderModule(size_t codeSize, const void* pCode); - VkResult Init(const Device* pDevice); + VkResult Init(const Device* pDevice, VkShaderModuleCreateFlags flags); size_t m_codeSize; const void* m_pCode; diff --git a/icd/api/pipeline_binary_cache.cpp b/icd/api/pipeline_binary_cache.cpp new file mode 100644 index 00000000..af5740d9 --- /dev/null +++ b/icd/api/pipeline_binary_cache.cpp @@ -0,0 +1,1125 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2018-2019 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** +*********************************************************************************************************************** +* @file pipeline_binary_cache.cpp +* @brief Implementation of the Vulkan interface for PAL layered caching. +*********************************************************************************************************************** +*/ + +#include "include/pipeline_binary_cache.h" +#include "include/vk_physical_device.h" + +#include "palArchiveFile.h" +#include "palPlatformKey.h" +#include "palSysMemory.h" +#include "palVectorImpl.h" +#include "palHashBaseImpl.h" +#include "palFile.h" +#if ICD_GPUOPEN_DEVMODE_BUILD +#include "palPipelineAbiProcessorImpl.h" + +#include "devmode/devmode_mgr.h" +#endif +#include +#include + +namespace vk +{ +#define _MAX_FNAME NAME_MAX + +constexpr char PipelineBinaryCache::EnvVarPath[]; +constexpr char PipelineBinaryCache::EnvVarFileName[]; +constexpr char PipelineBinaryCache::EnvVarReadOnlyFileName[]; + +static constexpr char ArchiveTypeString[] = "VK_SHADER_PIPELINE_CACHE"; +static constexpr size_t ArchiveTypeStringLen = sizeof(ArchiveTypeString); +static constexpr char ElfTypeString[] = "VK_PIPELINE_ELF"; +static constexpr size_t ElfTypeStringLen = sizeof(ElfTypeString); + +const uint32_t PipelineBinaryCache::ArchiveType = Util::HashString(ArchiveTypeString, ArchiveTypeStringLen); +const uint32_t PipelineBinaryCache::ElfType = Util::HashString(ElfTypeString, ElfTypeStringLen); + +#if ICD_GPUOPEN_DEVMODE_BUILD +static Util::Hash128 ParseHash128(const char* str); +#endif + +// ===================================================================================================================== +// Allocate and initialize a PipelineBinaryCache object +PipelineBinaryCache* PipelineBinaryCache::Create( + Instance* pInstance, + size_t initDataSize, + const void* pInitData, + bool internal, + const Llpc::GfxIpVersion& gfxIp, + const PhysicalDevice* pPhysicalDevice) +{ + PipelineBinaryCache* pObj = nullptr; + void* pMem = pInstance->AllocMem(sizeof(PipelineBinaryCache), VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (pMem != nullptr) + { + pObj = VK_PLACEMENT_NEW(pMem) PipelineBinaryCache(pInstance, gfxIp, internal); + + if (pObj->Initialize(pPhysicalDevice, initDataSize, pInitData) != VK_SUCCESS) + { + pObj->Destroy(); + pInstance->FreeMem(pMem); + pObj = nullptr; + } + } + return pObj; +} + +// ===================================================================================================================== +PipelineBinaryCache::PipelineBinaryCache( + Instance* pInstance, + const Llpc::GfxIpVersion& gfxIp, + bool internal) + : + m_pInstance { pInstance }, + m_pPlatformKey { nullptr }, + m_pTopLayer { nullptr }, +#if ICD_GPUOPEN_DEVMODE_BUILD + m_pReinjectionLayer{ nullptr }, + m_hashMapping { 32, pInstance->Allocator() }, +#endif + m_pMemoryLayer { nullptr }, + m_pArchiveLayer { nullptr }, + m_openFiles { pInstance->Allocator() }, + m_archiveLayers { pInstance->Allocator() }, + m_isInternalCache { internal } +{ + // Without copy constructor, a class type variable can't be initialized in initialization list with gcc 4.8.5. + // Initialize m_gfxIp here instead to make gcc 4.8.5 work. + m_gfxIp = gfxIp; +} + +// ===================================================================================================================== +PipelineBinaryCache::~PipelineBinaryCache() +{ + if (m_pPlatformKey != nullptr) + { + m_pPlatformKey->Destroy(); + m_pInstance->FreeMem(m_pPlatformKey); + } + + for (FileVector::Iter i = m_openFiles.Begin(); i.IsValid(); i.Next()) + { + i.Get()->Destroy(); + m_pInstance->FreeMem(i.Get()); + } + + m_openFiles.Clear(); + + for (LayerVector::Iter i = m_archiveLayers.Begin(); i.IsValid(); i.Next()) + { + i.Get()->Destroy(); + m_pInstance->FreeMem(i.Get()); + } + + m_archiveLayers.Clear(); + + if (m_pMemoryLayer != nullptr) + { + m_pMemoryLayer->Destroy(); + m_pInstance->FreeMem(m_pMemoryLayer); + } + +#if ICD_GPUOPEN_DEVMODE_BUILD + if (m_pReinjectionLayer != nullptr) + { + m_pReinjectionLayer->Destroy(); + } +#endif +} + +// ===================================================================================================================== +// Query if a pipeline binary exists in cache +Util::Result PipelineBinaryCache::QueryPipelineBinary( + const CacheId* pCacheId, + Util::QueryResult* pQuery) +{ + VK_ASSERT(m_pTopLayer != nullptr); + + return m_pTopLayer->Query(pCacheId, pQuery); +} + +// ===================================================================================================================== +// Attempt to load a graphics pipeline binary from cache +Util::Result PipelineBinaryCache::LoadPipelineBinary( + const CacheId* pCacheId, + size_t* pPipelineBinarySize, + const void** ppPipelineBinary) +{ + VK_ASSERT(m_pTopLayer != nullptr); + + Util::QueryResult query = {}; + Util::Result result = m_pTopLayer->Query(pCacheId, &query); + + if (result == Util::Result::Success) + { + void* pOutputMem = m_pInstance->AllocMem( + query.dataSize, + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (pOutputMem != nullptr) + { + result = m_pTopLayer->Load(&query, pOutputMem); + + if (result == Util::Result::Success) + { + *pPipelineBinarySize = query.dataSize; + *ppPipelineBinary = pOutputMem; + } + else + { + m_pInstance->FreeMem(pOutputMem); + } + } + } + + return result; +} + +// ===================================================================================================================== +// Attempt to store a binary into a cache chain +Util::Result PipelineBinaryCache::StorePipelineBinary( + const CacheId* pCacheId, + size_t pipelineBinarySize, + const void* pPipelineBinary) +{ + VK_ASSERT(m_pTopLayer != nullptr); + + return m_pTopLayer->Store(pCacheId, pPipelineBinary, pipelineBinarySize); +} + +#if ICD_GPUOPEN_DEVMODE_BUILD +// ===================================================================================================================== +// Introduces a mapping from an internal pipeline hash to a cache ID +void PipelineBinaryCache::RegisterHashMapping( + const Pal::PipelineHash* pInternalPipelineHash, + const CacheId* pCacheId) +{ + VK_ASSERT(pInternalPipelineHash != nullptr); + VK_ASSERT(pCacheId != nullptr); + + if (m_pReinjectionLayer != nullptr) + { + Util::RWLockAuto readWriteLock(&m_hashMappingLock); + + m_hashMapping.Insert(*pInternalPipelineHash, *pCacheId); + } +} + +// ===================================================================================================================== +// Retrieves the cache ID that maps to the given internal pipeline hash, pCacheId is unchanged if no mapping found +PipelineBinaryCache::CacheId* PipelineBinaryCache::GetCacheIdForPipeline( + const Pal::PipelineHash* pInternalPipelineHash) +{ + VK_ASSERT(pInternalPipelineHash != nullptr); + + CacheId* pCacheId = nullptr; + + if (m_pReinjectionLayer != nullptr) + { + Util::RWLockAuto readWriteLock(&m_hashMappingLock); + + pCacheId = m_hashMapping.FindKey(*pInternalPipelineHash); + } + + return pCacheId; +} + +// ===================================================================================================================== +// Attempt to load a binary from the reinjection cache layer +Util::Result PipelineBinaryCache::LoadReinjectionBinary( + const CacheId* pInternalPipelineHash, + size_t* pPipelineBinarySize, + const void** ppPipelineBinary) +{ + Util::Result result = Util::Result::ErrorUnavailable; + + if (m_pReinjectionLayer != nullptr) + { + Util::QueryResult query = {}; + result = m_pReinjectionLayer->Query(pInternalPipelineHash, &query); + + if (result == Util::Result::Success) + { + void* pOutputMem = m_pInstance->AllocMem( + query.dataSize, + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (pOutputMem != nullptr) + { + result = m_pReinjectionLayer->Load(&query, pOutputMem); + + if (result == Util::Result::Success) + { + *pPipelineBinarySize = query.dataSize; + *ppPipelineBinary = pOutputMem; + } + else + { + m_pInstance->FreeMem(pOutputMem); + } + } + } + } + + return result; +} + +// ===================================================================================================================== +// Attempt to store a binary into the reinjection cache layer +Util::Result PipelineBinaryCache::StoreReinjectionBinary( + const CacheId* pInternalPipelineHash, + size_t pipelineBinarySize, + const void* pPipelineBinary) +{ + Util::Result result = Util::Result::ErrorUnavailable; + + if (m_pReinjectionLayer != nullptr) + { + uint32_t gfxIpMajor = 0u; + uint32_t gfxIpMinor = 0u; + uint32_t gfxIpStepping = 0u; + + Util::Abi::PipelineAbiProcessor processor(m_pInstance->Allocator()); + result = processor.LoadFromBuffer(pPipelineBinary, pipelineBinarySize); + + if (result == Util::Result::Success) + { + processor.GetGfxIpVersion(&gfxIpMajor, &gfxIpMinor, &gfxIpStepping); + + if (gfxIpMajor == m_gfxIp.major && + gfxIpMinor == m_gfxIp.minor && + gfxIpStepping == m_gfxIp.stepping) + { + result = m_pReinjectionLayer->Store(pInternalPipelineHash, pPipelineBinary, pipelineBinarySize); + } + else + { + result = Util::Result::ErrorIncompatibleDevice; + } + } + } + + return result; +} + +#endif +// ===================================================================================================================== +// Free memory allocated by our allocator +void PipelineBinaryCache::FreePipelineBinary( + const void* pPipelineBinary) +{ + if (pPipelineBinary != nullptr) + { + m_pInstance->FreeMem(const_cast(pPipelineBinary)); + } +} + +// ===================================================================================================================== +// Build the cache layer chain +VkResult PipelineBinaryCache::Initialize( + const PhysicalDevice* pPhysicalDevice, + size_t initDataSize, + const void* pInitData) +{ + VkResult result = VK_SUCCESS; + + const RuntimeSettings& settings = pPhysicalDevice->GetRuntimeSettings(); + + if (result == VK_SUCCESS) + { + result = InitializePlatformKey(pPhysicalDevice, settings); + } + + if (result == VK_SUCCESS) + { + result = InitLayers(pPhysicalDevice, initDataSize, pInitData, m_isInternalCache, settings); + } + + if (result == VK_SUCCESS) + { + result = OrderLayers(settings); + } + +#if ICD_GPUOPEN_DEVMODE_BUILD + if ((result == VK_SUCCESS) && + (m_pReinjectionLayer != nullptr)) + { + Util::Result palResult = m_pInstance->GetDevModeMgr()->RegisterPipelineCache( + this, + settings.devModePipelineUriServicePostSizeLimit); + + if (palResult == Util::Result::Success) + { + palResult = m_hashMapping.Init(); + } + + if (palResult == Util::Result::Success) + { + palResult = m_hashMappingLock.Init(); + } + + if (palResult != Util::Result::Success) + { + m_pReinjectionLayer->Destroy(); + m_pReinjectionLayer = nullptr; + + // Fail silently so that the pipeline cache may still be used for other purposes. + PAL_ASSERT_ALWAYS(); + } + } +#endif + + return result; +} + +// ===================================================================================================================== +// Generate our platform key +VkResult PipelineBinaryCache::InitializePlatformKey( + const PhysicalDevice* pPhysicalDevice, + const RuntimeSettings& settings) +{ + static constexpr Util::HashAlgorithm KeyAlgorithm = Util::HashAlgorithm::Sha1; + + struct + { + VkPhysicalDeviceProperties properties; + char* timestamp[sizeof(__TIMESTAMP__)]; + } initialData; + + memset(&initialData, 0, sizeof(initialData)); + + VkResult result = pPhysicalDevice->GetDeviceProperties(&initialData.properties); + + if (result == VK_SUCCESS) + { + size_t memSize = Util::GetPlatformKeySize(KeyAlgorithm); + void* pMem = m_pInstance->AllocMem(memSize, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (pMem == nullptr) + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + else + { + if (settings.markPipelineCacheWithBuildTimestamp) + { + memcpy(initialData.timestamp, __TIMESTAMP__, sizeof(__TIMESTAMP__)); + } + + if (Util::CreatePlatformKey(KeyAlgorithm, &initialData, sizeof(initialData), pMem, &m_pPlatformKey) != + Util::Result::Success) + { + m_pInstance->FreeMem(pMem); + result = VK_ERROR_INITIALIZATION_FAILED; + } + } + } + + return result; +} + +#if ICD_GPUOPEN_DEVMODE_BUILD +// ===================================================================================================================== +// Initialize reinjection cache layer +VkResult PipelineBinaryCache::InitReinjectionLayer( + const RuntimeSettings& settings) +{ + VkResult result = VK_ERROR_FEATURE_NOT_PRESENT; + + if (m_pInstance->GetDevModeMgr() != nullptr) + { + Util::MemoryCacheCreateInfo info = {}; + Util::AllocCallbacks allocCbs = { + m_pInstance->GetAllocCallbacks(), + allocator::PalAllocFuncDelegator, + allocator::PalFreeFuncDelegator + }; + + info.baseInfo.pCallbacks = &allocCbs; + info.maxObjectCount = SIZE_MAX; + info.maxMemorySize = SIZE_MAX; + info.evictOnFull = false; + info.evictDuplicates = true; + + size_t memSize = Util::GetMemoryCacheLayerSize(&info); + void* pMem = m_pInstance->AllocMem(memSize, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (pMem == nullptr) + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + else + { + result = PalToVkResult( + Util::CreateMemoryCacheLayer( + &info, + pMem, + &m_pReinjectionLayer)); + + if (result != VK_SUCCESS) + { + m_pInstance->FreeMem(pMem); + } + } + + if (result == VK_SUCCESS) + { + result = PalToVkResult(InjectBinariesFromDirectory(settings)); + } + } + + return result; +} + +// ===================================================================================================================== +// Helper function for converting a 32-digit hexadecimal (in C string format) to a Hash128 object +Util::Hash128 ParseHash128(const char* str) +{ + Util::Hash128 hash; + const uint32_t stride = 2u; // 1 byte = 2 hex digits + const uint32_t byteCount = (sizeof(hash.bytes) / sizeof(*hash.bytes)); + uint32_t stringIndex = 0u; + char buffer[stride + 1u]; + buffer[stride] = '\n'; + + // Using little-endian byte order + for (uint32_t byteIndex = 0u; byteIndex < byteCount; byteIndex++) + { + stringIndex = (byteCount - byteIndex - 1) * stride; + memcpy(buffer, &str[stringIndex], stride); + hash.bytes[byteIndex] = static_cast(strtoul(buffer, nullptr, 16)); + } + + return hash; +} + +// ===================================================================================================================== +// Adds binaries to reinjection cache layer from a directory source +Util::Result PipelineBinaryCache::InjectBinariesFromDirectory( + const RuntimeSettings& settings) +{ + Util::Result result = Util::Result::Success; + + if (settings.devModeElfReplacementDirectoryEnable) + { + Util::File file; + char filePath[260]; // Windows MAX_PATH = 260 + uint32_t fileCount = 0u; + const char** ppFileNames = nullptr; + size_t fileNameBufferSize = 0u; + void* pFileNameBuffer = nullptr; + size_t dirLength = strlen(settings.devModeElfReplacementDirectory) + 1u; + + Util::Hash128 pipelineHash = {}; + size_t pipelineBinarySize = 0u; + void* pPipelineBinary = nullptr; + + // Get the number of files in dir and the size of the buffer to hold their names + result = Util::ListDir( + settings.devModeElfReplacementDirectory, + &fileCount, + nullptr, + &fileNameBufferSize, + nullptr); + + if (fileCount == 0u) + { + return result; + } + + if (result == Util::Result::Success) + { + // Allocate space for ppFileNames and pFileNameBuffer + ppFileNames = (const char**)m_pInstance->AllocMem( + (sizeof(const char*) * fileCount), + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + pFileNameBuffer = m_pInstance->AllocMem( + fileNameBufferSize, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + // Populate ppFileNames and pFileNameBuffer + result = Util::ListDir( + settings.devModeElfReplacementDirectory, + &fileCount, + ppFileNames, + &fileNameBufferSize, + pFileNameBuffer); + + if (result != Util::Result::Success) + { + m_pInstance->FreeMem(pFileNameBuffer); + m_pInstance->FreeMem(ppFileNames); + } + } + + if (result == Util::Result::Success) + { + // Store each file into cache + strcpy(filePath, settings.devModeElfReplacementDirectory); + strcat(filePath, "\\"); + for (uint32_t fileIndex = 0; fileIndex < fileCount; fileIndex++) + { + filePath[dirLength] = '\0'; + strcat(filePath, ppFileNames[fileIndex]); + + ppFileNames[fileIndex] = strstr(ppFileNames[fileIndex], "_0x"); + + if ((ppFileNames[fileIndex] != nullptr) && + (strlen(ppFileNames[fileIndex]) >= 32)) + { + ppFileNames[fileIndex] += 3u; + pipelineHash = ParseHash128(ppFileNames[fileIndex]); + + if (Util::File::Exists(filePath)) + { + pipelineBinarySize = Util::File::GetFileSize(filePath); + pPipelineBinary = m_pInstance->AllocMem(pipelineBinarySize, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (pPipelineBinary != nullptr) + { + if (file.Open( + filePath, + Util::FileAccessRead | Util::FileAccessBinary) == Util::Result::Success) + { + if (file.Read(pPipelineBinary, pipelineBinarySize, nullptr) == Util::Result::Success) + { + StoreReinjectionBinary(&pipelineHash, pipelineBinarySize, pPipelineBinary); + } + else + { + VK_NEVER_CALLED(); + } + + file.Close(); + } + else + { + VK_NEVER_CALLED(); + } + } + + m_pInstance->FreeMem(pPipelineBinary); + } + else + { + VK_NEVER_CALLED(); + } + } + } + + m_pInstance->FreeMem(pFileNameBuffer); + m_pInstance->FreeMem(ppFileNames); + } + } + + return result; +} +#endif + +// ===================================================================================================================== +// Initialize memory layer +VkResult PipelineBinaryCache::InitMemoryCacheLayer( + const RuntimeSettings& settings) +{ + VK_ASSERT(m_pMemoryLayer == nullptr); + + Util::AllocCallbacks allocCallbacks = {}; + allocCallbacks.pClientData = m_pInstance->GetAllocCallbacks(); + allocCallbacks.pfnAlloc = allocator::PalAllocFuncDelegator; + allocCallbacks.pfnFree = allocator::PalFreeFuncDelegator; + + Util::MemoryCacheCreateInfo createInfo = {}; + createInfo.baseInfo.pCallbacks = &allocCallbacks; + createInfo.maxObjectCount = SIZE_MAX; + createInfo.maxMemorySize = SIZE_MAX; + createInfo.evictOnFull = true; + createInfo.evictDuplicates = true; + + size_t layerSize = Util::GetMemoryCacheLayerSize(&createInfo); + void* pMem = m_pInstance->AllocMem(layerSize, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + VkResult result = VK_SUCCESS; + + if (pMem == nullptr) + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + else + { + result = PalToVkResult(CreateMemoryCacheLayer(&createInfo, pMem, &m_pMemoryLayer)); + VK_ASSERT(result == VK_SUCCESS); + + if (result != VK_SUCCESS) + { + m_pInstance->FreeMem(pMem); + } + } + + return result; +} + +// ===================================================================================================================== +// Open an archive file from disk for read +Util::IArchiveFile* PipelineBinaryCache::OpenReadOnlyArchive( + const char* pFilePath, + const char* pFileName, + size_t bufferSize) +{ + VK_ASSERT(pFilePath != nullptr); + VK_ASSERT(pFileName != nullptr); + + Util::ArchiveFileOpenInfo info = {}; + Util::IArchiveFile* pFile = nullptr; + + Util::AllocCallbacks allocCbs = { + m_pInstance->GetAllocCallbacks(), + allocator::PalAllocFuncDelegator, + allocator::PalFreeFuncDelegator + }; + + Util::Strncpy(info.filePath, pFilePath, sizeof(info.filePath)); + Util::Strncpy(info.fileName, pFileName, sizeof(info.fileName)); + + info.pMemoryCallbacks = &allocCbs; + info.pPlatformKey = m_pPlatformKey; + info.archiveType = ArchiveType; + info.useStrictVersionControl = true; + info.allowWriteAccess = false; + info.allowCreateFile = false; + info.allowAsyncFileIo = true; + info.useBufferedReadMemory = (bufferSize > 0); + info.maxReadBufferMem = bufferSize; + + size_t memSize = Util::GetArchiveFileObjectSize(&info); + void* pMem = m_pInstance->AllocMem(memSize, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (pMem != nullptr) + { + Util::Result openResult = Util::OpenArchiveFile(&info, pMem, &pFile); + + if (openResult == Util::Result::Success) + { + if (info.useBufferedReadMemory) + { + pFile->Preload(0, info.maxReadBufferMem); + } + } + else + { + m_pInstance->FreeMem(pMem); + pFile = nullptr; + } + } + + return pFile; +} + +// ===================================================================================================================== +// Open an archive file from disk for read + write +Util::IArchiveFile* PipelineBinaryCache::OpenWritableArchive( + const char* pFilePath, + const char* pFileName, + size_t bufferSize) +{ + VK_ASSERT(pFilePath != nullptr); + VK_ASSERT(pFileName != nullptr); + + Util::ArchiveFileOpenInfo info = {}; + Util::IArchiveFile* pFile = nullptr; + + Util::AllocCallbacks allocCbs = { + m_pInstance->GetAllocCallbacks(), + allocator::PalAllocFuncDelegator, + allocator::PalFreeFuncDelegator + }; + + Util::Strncpy(info.filePath, pFilePath, sizeof(info.filePath)); + Util::Strncpy(info.fileName, pFileName, sizeof(info.fileName)); + + info.pMemoryCallbacks = &allocCbs; + info.pPlatformKey = m_pPlatformKey; + info.archiveType = ArchiveType; + info.useStrictVersionControl = true; + info.allowWriteAccess = true; + info.allowCreateFile = true; + info.allowAsyncFileIo = true; + info.useBufferedReadMemory = (bufferSize > 0); + info.maxReadBufferMem = bufferSize; + + size_t memSize = Util::GetArchiveFileObjectSize(&info); + void* pMem = m_pInstance->AllocMem(memSize, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (pMem != nullptr) + { + Util::Result openResult = Util::OpenArchiveFile(&info, pMem, &pFile); + + if (openResult == Util::Result::ErrorIncompatibleLibrary) + { + if (Util::DeleteArchiveFile(&info) == Util::Result::Success) + { + openResult = Util::OpenArchiveFile(&info, pMem, &pFile); + } + } + + if (openResult == Util::Result::Success) + { + if (info.useBufferedReadMemory) + { + pFile->Preload(0, info.maxReadBufferMem); + } + } + else + { + m_pInstance->FreeMem(pMem); + pFile = nullptr; + } + } + + return pFile; +} + +// ===================================================================================================================== +// Create a cache layer from an open file +Util::ICacheLayer* PipelineBinaryCache::CreateFileLayer( + Util::IArchiveFile* pFile) +{ + VK_ASSERT(pFile != nullptr); + Util::ArchiveFileCacheCreateInfo info = {}; + Util::ICacheLayer* pLayer = nullptr; + + Util::AllocCallbacks allocCbs = { + m_pInstance->GetAllocCallbacks(), + allocator::PalAllocFuncDelegator, + allocator::PalFreeFuncDelegator + }; + + info.baseInfo.pCallbacks = &allocCbs; + info.pFile = pFile; + info.pPlatformKey = m_pPlatformKey; + info.dataTypeId = ElfType; + + size_t memSize = Util::GetArchiveFileCacheLayerSize(&info); + void* pMem = m_pInstance->AllocMem(memSize, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (pMem != nullptr) + { + if (Util::CreateArchiveFileCacheLayer(&info, pMem, &pLayer) != Util::Result::Success) + { + m_pInstance->FreeMem(pMem); + pLayer = nullptr; + } + } + + return pLayer; +} + +// ===================================================================================================================== +// Open the archive file and initialize its cache layer +VkResult PipelineBinaryCache::InitArchiveLayers( + const PhysicalDevice* pPhysicalDevice, + const RuntimeSettings& settings) +{ + VkResult result = VK_SUCCESS; + + // Buffer to hold constructed path + char pathBuffer[_MAX_FNAME] = {}; + // If the environment variable AMD_VK_PIPELINE_CACHE_PATH is set, obey it first + const char* pCachePath = getenv(EnvVarPath); + + // otherwise fetch the cache location from PAL + if (pCachePath == nullptr) + { + // Default to a fail state here in case we cannot build the default path + result = VK_ERROR_INITIALIZATION_FAILED; + + if (settings.usePipelineCachingDefaultLocation) + { + const char* pCacheSubPath = settings.pipelineCachingDefaultLocation; + const char* pUserDataPath = pPhysicalDevice->PalDevice()->GetCacheFilePath(); + + if ((pCacheSubPath != nullptr) && + (pUserDataPath != nullptr)) + { + // Construct the path in the local buffer. Consider it valid if not empty + if (Util::Snprintf(pathBuffer, _MAX_FNAME, "%s%s", pUserDataPath, pCacheSubPath) > 0) + { + pCachePath = pathBuffer; + result = VK_SUCCESS; + } + } + } + } + + // Load the primary archive file + if (result == VK_SUCCESS) + { + // Assume that the first layer we open should be the "primary" source and optimize its memory access + constexpr size_t PrimayrLayerBufferSize = 64 * 1024 * 1024; + constexpr size_t SecondaryLayerBufferSize = 8 * 1024 * 1024; + + // Open the optional read only cache file. This may fail gracefully + const char* const pThirdPartyFileName = getenv(EnvVarReadOnlyFileName); + Util::ICacheLayer* pThirdPartyLayer = nullptr; + + if (pThirdPartyFileName != nullptr) + { + Util::IArchiveFile* pFile = OpenReadOnlyArchive(pCachePath, pThirdPartyFileName, PrimayrLayerBufferSize); + + if (pFile != nullptr) + { + Util::ICacheLayer* pLayer = CreateFileLayer(pFile); + + if (pLayer != nullptr) + { + m_openFiles.PushBack(pFile); + m_archiveLayers.PushBack(pLayer); + + pThirdPartyLayer = pLayer; + + // If third party layer is given to us, have it be the primary layer + m_pArchiveLayer = pLayer; + } + else + { + pFile->Destroy(); + m_pInstance->FreeMem(pFile); + } + } + } + + // Buffer to hold constructed filename + char nameBuffer[_MAX_FNAME] = {}; + + const char* const pCacheFileName = getenv(EnvVarFileName); + + if (pCacheFileName == nullptr) + { + // If no naming scheme is given, compute the name by AppHash + PlatformKey + Util::Hash128 appHash = {}; + char* pExecutablePtr = nullptr; + + Util::Result palResult = Util::GetExecutableName(nameBuffer, &pExecutablePtr, sizeof(nameBuffer)); + VK_ASSERT(IsErrorResult(palResult) == false); + Util::MetroHash128::Hash(reinterpret_cast(nameBuffer), sizeof(nameBuffer), appHash.bytes); + + Util::Snprintf( + nameBuffer, + sizeof(nameBuffer), + "%llX%llX", + Util::MetroHash::Compact64(&appHash), + m_pPlatformKey->GetKey64()); + } + else + { + Util::Strncpy(nameBuffer, pCacheFileName, sizeof(nameBuffer)); + } + + Util::ICacheLayer* pWriteLayer = nullptr; + Util::ICacheLayer* pLastReadLayer = pThirdPartyLayer; + + char* const nameEnd = &nameBuffer[strnlen(nameBuffer, sizeof(nameBuffer))]; + const size_t charsRemaining = sizeof(nameBuffer) - (nameEnd - nameBuffer); + + constexpr int MaxAttempts = 10; + for (int attemptCt = 0; attemptCt < MaxAttempts; ++attemptCt) + { + size_t bufferSize = (m_pArchiveLayer == nullptr) ? PrimayrLayerBufferSize : SecondaryLayerBufferSize; + + // Create the final name based off the attempt + *nameEnd = '\0'; + if (attemptCt == 0) + { + Util::Strncat(nameBuffer, sizeof(nameBuffer), ".parc"); + } + else + { + Util::Snprintf(nameEnd, charsRemaining, "_%d.parc", attemptCt); + } + + Util::IArchiveFile* pFile = OpenWritableArchive(pCachePath, nameBuffer, bufferSize); + bool readOnly = false; + + // Attempt to open the file as a read only instead if we failed + if (pFile == nullptr) + { + pFile = OpenReadOnlyArchive(pCachePath, nameBuffer, bufferSize); + readOnly = true; + } + + // Only create the layer if one of the two above calls successfully openned the file + if (pFile != nullptr) + { + Util::ICacheLayer* pLayer = CreateFileLayer(pFile); + + if (pLayer != nullptr) + { + m_openFiles.PushBack(pFile); + m_archiveLayers.PushBack(pLayer); + + if (pLastReadLayer != nullptr) + { + pLastReadLayer->SetLoadPolicy(Util::ICacheLayer::LinkPolicy::PassCalls); + pLastReadLayer->SetStorePolicy(Util::ICacheLayer::LinkPolicy::Skip | Util::ICacheLayer::LinkPolicy::PassData); + // Connect to previous read layer as read-through / write-through + skip + pLastReadLayer->Link(pLayer); + } + + // Ensure the first read or write layer is set to "top" of the chain. + if (m_pArchiveLayer == nullptr) + { + m_pArchiveLayer = pLayer; + } + + if (readOnly) + { + pLastReadLayer = pLayer; + } + else + { + pWriteLayer = pLayer; + break; + } + } + else + { + pFile->Destroy(); + m_pInstance->FreeMem(pFile); + } + } + } + + if (m_pArchiveLayer == nullptr) + { + result = VK_ERROR_INITIALIZATION_FAILED; + } + + VK_ASSERT(pWriteLayer != nullptr); + } + + return result; +} + +// ===================================================================================================================== +// Initialize layers (a single layer that supports storage for binaries needs to succeed) +VkResult PipelineBinaryCache::InitLayers( + const PhysicalDevice* pPhysicalDevice, + size_t initDataSize, + const void* pInitData, + bool internal, + const RuntimeSettings& settings) +{ + VkResult result = VK_ERROR_INITIALIZATION_FAILED; + +#if ICD_GPUOPEN_DEVMODE_BUILD + if ((InitReinjectionLayer(settings) == VK_SUCCESS)) + { + result = VK_SUCCESS; + } +#endif + + if (InitMemoryCacheLayer(settings) == VK_SUCCESS) + { + result = VK_SUCCESS; + } + + // If cache handle is vkPipelineCache, we shouldn't store it to disk. + if (internal) + { + if (InitArchiveLayers(pPhysicalDevice, settings) == VK_SUCCESS) + { + result = VK_SUCCESS; + } + } + + return result; +} + +VkResult PipelineBinaryCache::AddLayerToChain( + Util::ICacheLayer* pLayer, + Util::ICacheLayer** pBottomLayer) +{ + VkResult result = VK_SUCCESS; + + if (pLayer != nullptr) + { + if (m_pTopLayer == nullptr) + { + m_pTopLayer = pLayer; + *pBottomLayer = pLayer; + } + else + { + if ((*pBottomLayer)->Link(pLayer) == Util::Result::Success) + { + *pBottomLayer = pLayer; + } + else + { + result = VK_ERROR_INITIALIZATION_FAILED; + } + } + } + + return result; +} + +// ===================================================================================================================== +// Order the layers for desired caching behaviour +VkResult PipelineBinaryCache::OrderLayers( + const RuntimeSettings& settings) +{ + VkResult result = VK_SUCCESS; + Util::ICacheLayer* pBottomLayer = nullptr; + m_pTopLayer = nullptr; + + if (result == VK_SUCCESS) + { + result = AddLayerToChain(m_pMemoryLayer, &pBottomLayer); + } + + if (result == VK_SUCCESS) + { + result = AddLayerToChain(m_pArchiveLayer, &pBottomLayer); + } + + if ((result == VK_SUCCESS) && + (m_pTopLayer == nullptr)) + { + // The cache is not very useful if no layers are available. + result = VK_ERROR_INITIALIZATION_FAILED; + } + + return result; +} + +} // namespace vk diff --git a/icd/api/pipeline_compiler.cpp b/icd/api/pipeline_compiler.cpp index f7b6c787..40b0c206 100644 --- a/icd/api/pipeline_compiler.cpp +++ b/icd/api/pipeline_compiler.cpp @@ -42,6 +42,8 @@ #include "palFile.h" #include "palHashSetImpl.h" +#include "include/pipeline_binary_cache.h" + #include "palPipelineAbiProcessorImpl.h" #include @@ -57,13 +59,46 @@ PipelineCompiler::PipelineCompiler( : m_pPhysicalDevice(pPhysicalDevice) , m_compilerSolutionLlpc(pPhysicalDevice) + , m_pBinaryCache(nullptr) + , m_cacheAttempts(0) + , m_cacheHits(0) + , m_totalBinaries(0) + , m_totalTimeSpent(0) { } +// ===================================================================================================================== +// Dump pipeline elf cache metrics to a string +void PipelineCompiler::GetElfCacheMetricString( + char* pOutStr, + size_t outStrSize) +{ + const int64_t freq = Util::GetPerfFrequency(); + + const int64_t avgUs = ((m_totalTimeSpent / m_totalBinaries) * 1000000) / freq; + const double avgMs = avgUs / 1000.0; + + const int64_t totalUs = (m_totalTimeSpent * 1000000) / freq; + const double totalMs = totalUs / 1000.0; + + const double hitRate = m_cacheAttempts > 0 ? + (static_cast(m_cacheHits) / static_cast(m_cacheAttempts)) : + 0.0; + + static constexpr char metricFmtString[] = + "Cache hit rate - %0.1f%%\n" + "Total request count - %d\n" + "Total time spent - %0.1f ms\n" + "Average time spent per request - %0.3f ms\n"; + + Util::Snprintf(pOutStr, outStrSize, metricFmtString, hitRate * 100, m_totalBinaries, totalMs, avgMs); +} + // ===================================================================================================================== PipelineCompiler::~PipelineCompiler() { + VK_ASSERT(m_pBinaryCache == nullptr); } // ===================================================================================================================== @@ -119,6 +154,17 @@ VkResult PipelineCompiler::Initialize() result = m_compilerSolutionLlpc.Initialize(); } + if ((result == VK_SUCCESS) && + ((settings.usePalPipelineCaching) || + (m_pPhysicalDevice->VkInstance()->GetDevModeMgr() != nullptr))) + { + m_pBinaryCache = PipelineBinaryCache::Create( + m_pPhysicalDevice->VkInstance(), 0, nullptr, true, m_gfxIp, m_pPhysicalDevice); + + // This isn't a terminal failure, the device can continue without the pipeline cache if need be. + VK_ALERT(m_pBinaryCache == nullptr); + } + return result; } @@ -128,6 +174,13 @@ void PipelineCompiler::Destroy() { m_compilerSolutionLlpc.Destroy(); + if (m_pBinaryCache) + { + m_pBinaryCache->Destroy(); + m_pPhysicalDevice->VkInstance()->FreeMem(m_pBinaryCache); + m_pBinaryCache = nullptr; + } + } // ===================================================================================================================== @@ -207,10 +260,11 @@ bool PipelineCompiler::LoadReplaceShaderBinary( // ===================================================================================================================== // Builds shader module from SPIR-V binary code. VkResult PipelineCompiler::BuildShaderModule( - const Device* pDevice, - size_t codeSize, - const void* pCode, - ShaderModuleHandle* pShaderModule) + const Device* pDevice, + VkShaderModuleCreateFlags flags, + size_t codeSize, + const void* pCode, + ShaderModuleHandle* pShaderModule) { const RuntimeSettings* pSettings = &m_pPhysicalDevice->GetRuntimeSettings(); auto pInstance = m_pPhysicalDevice->Manager()->VkInstance(); @@ -235,7 +289,7 @@ VkResult PipelineCompiler::BuildShaderModule( if (compilerMask & (1 << PipelineCompilerTypeLlpc)) { - result = m_compilerSolutionLlpc.BuildShaderModule(pDevice, codeSize, pCode, pShaderModule, hash); + result = m_compilerSolutionLlpc.BuildShaderModule(pDevice, flags, codeSize, pCode, pShaderModule, hash); } if (findReplaceShader) @@ -314,7 +368,7 @@ bool PipelineCompiler::ReplacePipelineShaderModule( if (LoadReplaceShaderBinary(hash64, &codeSize, &pCode)) { - VkResult result = BuildShaderModule(pDevice, codeSize, pCode, pShaderModule); + VkResult result = BuildShaderModule(pDevice, 0, codeSize, pCode, pShaderModule); if (result == VK_SUCCESS) { pShaderInfo->pModuleData = ShaderModule::GetShaderData(compilerType, pShaderModule); @@ -566,6 +620,86 @@ VkResult PipelineCompiler::CreateGraphicsPipelineBinary( #endif } + // PAL Pipeline caching + Util::Result cacheResult = Util::Result::Success; + + int64_t cacheTime = 0; + + bool isUserCacheHit = false; + bool isInternalCacheHit = false; + + PipelineBinaryCache* pPipelineBinaryCache = nullptr; + + if ((pPipelineCache != nullptr) && (pPipelineCache->GetPipelineCache() != nullptr)) + { + pPipelineBinaryCache = pPipelineCache->GetPipelineCache(); + } + + if (shouldCompile && ((pPipelineBinaryCache != nullptr) || (m_pBinaryCache != nullptr))) + { + int64_t startTime = Util::GetPerfCpuTime(); + Util::MetroHash128 hash = {}; + hash.Update(pipelineHash); + hash.Update(pCreateInfo->pipelineInfo.vs.options); + hash.Update(pCreateInfo->pipelineInfo.tes.options); + hash.Update(pCreateInfo->pipelineInfo.tcs.options); + hash.Update(pCreateInfo->pipelineInfo.gs.options); + hash.Update(pCreateInfo->pipelineInfo.fs.options); + hash.Update(pCreateInfo->pipelineInfo.options); + hash.Update(pCreateInfo->pipelineInfo.nggState); + hash.Update(pCreateInfo->flags); + hash.Update(pCreateInfo->dbFormat); + hash.Update(pCreateInfo->pipelineProfileKey); + hash.Update(deviceIdx); + hash.Update(pCreateInfo->compilerType); + hash.Finalize(pCacheId->bytes); + + const void* pPipelineBinary = nullptr; + + if (pPipelineBinaryCache != nullptr) + { + cacheResult = pPipelineBinaryCache->LoadPipelineBinary(pCacheId, pPipelineBinarySize, &pPipelineBinary); + if (cacheResult == Util::Result::Success) + { + isUserCacheHit = true; + pCreateInfo->pipelineFeedback.hitApplicationCache = true; + *ppPipelineBinary = pPipelineBinary; + } + } + m_cacheAttempts++; + + if (m_pBinaryCache != nullptr) + { + // If user cache is already hit, we just need query if it is in internal cache, + // don't need heavy loading work. + if (isUserCacheHit) + { + Util::QueryResult query = {}; + cacheResult = m_pBinaryCache->QueryPipelineBinary(pCacheId, &query); + } + else + { + cacheResult = m_pBinaryCache->LoadPipelineBinary(pCacheId, pPipelineBinarySize, &pPipelineBinary); + } + if (cacheResult == Util::Result::Success) + { + isInternalCacheHit = true; + if (!isUserCacheHit) + { + *ppPipelineBinary = pPipelineBinary; + } + } + } + if (isUserCacheHit || isInternalCacheHit) + { + pCreateInfo->elfWasCached = true; + shouldCompile = false; + m_cacheHits++; + } + + cacheTime = Util::GetPerfCpuTime() - startTime; + } + if ((pCreateInfo->compilerType == PipelineCompilerTypeLlpc) && shouldCompile) { result = m_compilerSolutionLlpc.CreateGraphicsPipelineBinary(pDevice, deviceIdx, pPipelineCache, pCreateInfo, @@ -573,6 +707,33 @@ VkResult PipelineCompiler::CreateGraphicsPipelineBinary( pipelineHash, &compileTime); } + if ((pPipelineBinaryCache != nullptr) && + (isUserCacheHit == false) && + (result == VK_SUCCESS)) + { + cacheResult = pPipelineBinaryCache->StorePipelineBinary( + pCacheId, + *pPipelineBinarySize, + *ppPipelineBinary); + + VK_ASSERT(Util::IsErrorResult(cacheResult) == false); + } + + if ((m_pBinaryCache != nullptr) && + (isInternalCacheHit == false) && + (result == VK_SUCCESS)) + { + cacheResult = m_pBinaryCache->StorePipelineBinary( + pCacheId, + *pPipelineBinarySize, + *ppPipelineBinary); + + VK_ASSERT(Util::IsErrorResult(cacheResult) == false); + } + + m_totalTimeSpent += pCreateInfo->elfWasCached ? cacheTime : compileTime; + m_totalBinaries++; + if (settings.shaderReplaceMode == ShaderReplaceShaderISA) { ReplacePipelineIsaCode(pDevice, pipelineHash, *ppPipelineBinary, *pPipelineBinarySize); @@ -671,12 +832,112 @@ VkResult PipelineCompiler::CreateComputePipelineBinary( } } + // PAL Pipeline caching + Util::Result cacheResult = Util::Result::Success; + + int64_t cacheTime = 0; + + bool isUserCacheHit = false; + bool isInternalCacheHit = false; + + PipelineBinaryCache* pPipelineBinaryCache = nullptr; + + if ((pPipelineCache != nullptr) && (pPipelineCache->GetPipelineCache() != nullptr)) + { + pPipelineBinaryCache = pPipelineCache->GetPipelineCache(); + } + + if (shouldCompile && ((pPipelineBinaryCache != nullptr) || (m_pBinaryCache != nullptr))) + { + int64_t startTime = Util::GetPerfCpuTime(); + Util::MetroHash128 hash = {}; + hash.Update(pipelineHash); + hash.Update(pCreateInfo->pipelineInfo.cs.options); + hash.Update(pCreateInfo->pipelineInfo.options); + hash.Update(pCreateInfo->flags); + hash.Update(pCreateInfo->pipelineProfileKey); + hash.Update(deviceIdx); + hash.Update(pCreateInfo->compilerType); + hash.Finalize(pCacheId->bytes); + + const void* pPipelineBinary = nullptr; + + if (pPipelineBinaryCache != nullptr) + { + cacheResult = pPipelineBinaryCache->LoadPipelineBinary(pCacheId, pPipelineBinarySize, &pPipelineBinary); + if (cacheResult == Util::Result::Success) + { + isUserCacheHit = true; + pCreateInfo->pipelineFeedback.hitApplicationCache = true; + *ppPipelineBinary = pPipelineBinary; + } + } + m_cacheAttempts++; + + if (m_pBinaryCache != nullptr) + { + // If user cache is already hit, we just need query if it is in internal cache, + // don't need heavy loading work. + if (isUserCacheHit) + { + Util::QueryResult query = {}; + cacheResult = m_pBinaryCache->QueryPipelineBinary(pCacheId, &query); + } + else + { + cacheResult = m_pBinaryCache->LoadPipelineBinary(pCacheId, pPipelineBinarySize, &pPipelineBinary); + } + if (cacheResult == Util::Result::Success) + { + isInternalCacheHit = true; + if (!isUserCacheHit) + { + *ppPipelineBinary = pPipelineBinary; + } + } + } + if (isUserCacheHit || isInternalCacheHit) + { + pCreateInfo->elfWasCached = true; + shouldCompile = false; + m_cacheHits++; + } + + cacheTime = Util::GetPerfCpuTime() - startTime; + } + if ((pCreateInfo->compilerType == PipelineCompilerTypeLlpc) && shouldCompile) { result = m_compilerSolutionLlpc.CreateComputePipelineBinary(pDevice, deviceIdx, pPipelineCache, pCreateInfo, pPipelineBinarySize, ppPipelineBinary, pPipelineDumpHandle, pipelineHash, &compileTime); } + if ((pPipelineBinaryCache != nullptr) && + (isUserCacheHit == false) && + (result == VK_SUCCESS)) + { + cacheResult = pPipelineBinaryCache->StorePipelineBinary( + pCacheId, + *pPipelineBinarySize, + *ppPipelineBinary); + + VK_ASSERT(Util::IsErrorResult(cacheResult) == false); + } + + if ((m_pBinaryCache != nullptr) && + (isInternalCacheHit == false) && + (result == VK_SUCCESS)) + { + cacheResult = m_pBinaryCache->StorePipelineBinary( + pCacheId, + *pPipelineBinarySize, + *ppPipelineBinary); + + VK_ASSERT(Util::IsErrorResult(cacheResult) == false); + } + + m_totalTimeSpent += pCreateInfo->elfWasCached ? cacheTime : compileTime; + m_totalBinaries++; if (settings.shaderReplaceMode == ShaderReplaceShaderISA) { ReplacePipelineIsaCode(pDevice, pipelineHash, *ppPipelineBinary, *pPipelineBinarySize); @@ -771,7 +1032,7 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( VkResult result = VK_SUCCESS; const RuntimeSettings& settings = m_pPhysicalDevice->GetRuntimeSettings(); auto pInstance = m_pPhysicalDevice->Manager()->VkInstance(); - + auto flags = pIn->flags; EXTRACT_VK_STRUCTURES_0( gfxPipeline, GraphicsPipelineCreateInfo, @@ -966,7 +1227,14 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( if (m_gfxIp.major >= 10) { - pCreateInfo->pipelineInfo.nggState.enableNgg = settings.enableNgg; + const bool hasGs = pStageInfos[ShaderStageGeometry] != nullptr; + const bool hasTess = pStageInfos[ShaderStageTessControl] != nullptr; + const GraphicsPipelineType pipelineType = + hasTess ? (hasGs ? GraphicsPipelineTypeTessGs : GraphicsPipelineTypeTess) : + (hasGs ? GraphicsPipelineTypeGs : GraphicsPipelineTypeVsFs); + + pCreateInfo->pipelineInfo.nggState.enableNgg = + Util::TestAnyFlagSet(settings.enableNgg, pipelineType); pCreateInfo->pipelineInfo.nggState.enableGsUse = settings.nggEnableGsUse; pCreateInfo->pipelineInfo.nggState.forceNonPassthrough = settings.nggForceNonPassthrough; pCreateInfo->pipelineInfo.nggState.alwaysUsePrimShaderTable = settings.nggAlwaysUsePrimShaderTable; @@ -995,25 +1263,7 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( pCreateInfo->pipelineInfo.nggState.vertsPerSubgroup = settings.nggVertsPerSubgroup; } - if (pDevice->IsExtensionEnabled(DeviceExtensions::AMD_SHADER_INFO)) - { - pCreateInfo->pipelineInfo.options.includeDisassembly = true; - pCreateInfo->pipelineInfo.options.includeIr = true; -#if (LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 25) && (LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 27) - pCreateInfo->pipelineInfo.options.includeIrBinary = true; -#endif - } - - if (pDevice->IsExtensionEnabled(DeviceExtensions::EXT_SCALAR_BLOCK_LAYOUT)) - { - pCreateInfo->pipelineInfo.options.scalarBlockLayout = true; - } -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 23 - if (pDevice->GetEnabledFeatures().robustBufferAccess) - { - pCreateInfo->pipelineInfo.options.robustBufferAccess = true; - } -#endif + ApplyPipelineOptions(pDevice, flags, &pCreateInfo->pipelineInfo.options); if (pLayout != nullptr) { @@ -1133,6 +1383,8 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( pShaderInfo->pModuleData = pShaderModule->GetShaderData(pCreateInfo->compilerType); } + pCreateInfo->elfWasCached = false; + return result; } @@ -1175,6 +1427,36 @@ uint32_t PipelineCompiler::GetCompilerCollectionMask() return availCompilerMask; } +// ===================================================================================================================== +void PipelineCompiler::ApplyPipelineOptions( + const Device* pDevice, + VkPipelineCreateFlags flags, + Llpc::PipelineOptions* pOptions) +{ + if (pDevice->IsExtensionEnabled(DeviceExtensions::AMD_SHADER_INFO) || + (pDevice->IsExtensionEnabled(DeviceExtensions::KHR_PIPELINE_EXECUTABLE_PROPERTIES) && + ((flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) != 0))) + { + pOptions->includeDisassembly = true; + pOptions->includeIr = true; +#if (LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 25) && (LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 27) + pOptions->includeIrBinary = true; +#endif + } + + if (pDevice->IsExtensionEnabled(DeviceExtensions::EXT_SCALAR_BLOCK_LAYOUT)) + { + pOptions->scalarBlockLayout = true; + } + +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 23 + if (pDevice->GetEnabledFeatures().robustBufferAccess) + { + pOptions->robustBufferAccess = true; + } +#endif +} + // ===================================================================================================================== // Converts Vulkan compute pipeline parameters to an internal structure VkResult PipelineCompiler::ConvertComputePipelineInfo( @@ -1198,26 +1480,7 @@ VkResult PipelineCompiler::ConvertComputePipelineInfo( pLayout = PipelineLayout::ObjectFromHandle(pIn->layout); } pCreateInfo->flags = pIn->flags; - - if (pDevice->IsExtensionEnabled(DeviceExtensions::AMD_SHADER_INFO)) - { - pCreateInfo->pipelineInfo.options.includeDisassembly = true; - pCreateInfo->pipelineInfo.options.includeIr = true; -#if (LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 25) && (LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 27) - pCreateInfo->pipelineInfo.options.includeIrBinary = true; -#endif - } - - if (pDevice->IsExtensionEnabled(DeviceExtensions::EXT_SCALAR_BLOCK_LAYOUT)) - { - pCreateInfo->pipelineInfo.options.scalarBlockLayout = true; - } -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 23 - if (pDevice->GetEnabledFeatures().robustBufferAccess) - { - pCreateInfo->pipelineInfo.options.robustBufferAccess = true; - } -#endif + ApplyPipelineOptions(pDevice, pIn->flags, &pCreateInfo->pipelineInfo.options); if (pLayout != nullptr) { @@ -1378,6 +1641,11 @@ void PipelineCompiler::FreeComputePipelineBinary( const void* pPipelineBinary, size_t binarySize) { + if (pCreateInfo->elfWasCached) + { + m_pPhysicalDevice->Manager()->VkInstance()->FreeMem(const_cast(pPipelineBinary)); + } + else { if (pCreateInfo->compilerType == PipelineCompilerTypeLlpc) { @@ -1394,6 +1662,11 @@ void PipelineCompiler::FreeGraphicsPipelineBinary( const void* pPipelineBinary, size_t binarySize) { + if (pCreateInfo->elfWasCached) + { + m_pPhysicalDevice->Manager()->VkInstance()->FreeMem(const_cast(pPipelineBinary)); + } + else { if (pCreateInfo->compilerType == PipelineCompilerTypeLlpc) { @@ -1431,4 +1704,51 @@ void PipelineCompiler::FreeGraphicsPipelineCreateInfo( } } +#if ICD_GPUOPEN_DEVMODE_BUILD +Util::Result PipelineCompiler::RegisterAndLoadReinjectionBinary( + const Pal::PipelineHash* pInternalPipelineHash, + const Util::MetroHash::Hash* pCacheId, + size_t* pBinarySize, + const void** ppPipelineBinary, + PipelineCache* pPipelineCache) +{ + Util::Result result = Util::Result::NotFound; + + PipelineBinaryCache* pPipelineBinaryCache = m_pBinaryCache; + + if ((pPipelineCache != nullptr) && (pPipelineCache->GetPipelineCache() != nullptr)) + { + pPipelineBinaryCache = pPipelineCache->GetPipelineCache(); + } + + if (pPipelineBinaryCache != nullptr) + { + pPipelineBinaryCache->RegisterHashMapping( + pInternalPipelineHash, + pCacheId); + + static_assert(sizeof(Pal::PipelineHash) == sizeof(PipelineBinaryCache::CacheId), "Structure size mismatch"); + + if (m_pBinaryCache != nullptr) + { + result = m_pBinaryCache->LoadReinjectionBinary( + reinterpret_cast(pInternalPipelineHash), + pBinarySize, + ppPipelineBinary); + } + + if ((result == Util::Result::NotFound) && + (pPipelineBinaryCache != m_pBinaryCache)) + { + result = pPipelineBinaryCache->LoadReinjectionBinary( + reinterpret_cast(pInternalPipelineHash), + pBinarySize, + ppPipelineBinary); + } + } + + return result; +} +#endif + } diff --git a/icd/api/render_state_cache.cpp b/icd/api/render_state_cache.cpp index 0cbee2ed..346f2787 100644 --- a/icd/api/render_state_cache.cpp +++ b/icd/api/render_state_cache.cpp @@ -52,6 +52,8 @@ RenderStateCache::RenderStateCache( m_triangleRasterStateNextId(FirstStaticRenderStateToken), m_pointLineRasterState(NumStateBuckets, pDevice->VkInstance()->Allocator()), m_pointLineRasterStateNextId(FirstStaticRenderStateToken), + m_lineStippleState(NumStateBuckets, pDevice->VkInstance()->Allocator()), + m_lineStippleStateNextId(FirstStaticRenderStateToken), m_depthBias(NumStateBuckets, pDevice->VkInstance()->Allocator()), m_depthBiasNextId(FirstStaticRenderStateToken), m_blendConst(NumStateBuckets, pDevice->VkInstance()->Allocator()), @@ -99,6 +101,11 @@ VkResult RenderStateCache::Init() result = m_pointLineRasterState.Init(); } + if (result == Pal::Result::Success) + { + result = m_lineStippleState.Init(); + } + if (result == Pal::Result::Success) { result = m_depthBias.Init(); @@ -1108,4 +1115,27 @@ void RenderStateCache::DestroyComputeWaveLimits( &m_computeWaveLimits); } +// ===================================================================================================================== +uint32_t RenderStateCache::CreateLineStipple( + const Pal::LineStippleStateParams& params) +{ + return CreateStaticParamsState( + OptRenderStateCacheStaticLineStipple, + params, + &m_lineStippleState, + &m_lineStippleStateNextId); +} + +// ===================================================================================================================== +void RenderStateCache::DestroyLineStipple( + const Pal::LineStippleStateParams& params, + uint32_t token) +{ + return DestroyStaticParamsState( + OptRenderStateCacheStaticLineStipple, + params, + token, + &m_lineStippleState); +} + }; diff --git a/icd/api/renderpass/renderpass_builder.cpp b/icd/api/renderpass/renderpass_builder.cpp index f4ac9d38..6fe38a3f 100644 --- a/icd/api/renderpass/renderpass_builder.cpp +++ b/icd/api/renderpass/renderpass_builder.cpp @@ -288,8 +288,8 @@ RenderPassBuilder::AttachmentState::AttachmentState( loaded(false), resolvesInFlight(false) { - prevReferenceLayout.layout = pDesc->initialLayout; - prevReferenceLayout.extraUsage = 0; + prevReferenceLayout.layout = pDesc->initialLayout; + prevReferenceLayout.extraUsage = 0; } // ===================================================================================================================== @@ -449,7 +449,6 @@ Pal::Result RenderPassBuilder::BuildLoadOps( RPLoadOpClearInfo clearInfo = {}; clearInfo.attachment = attachment; - clearInfo.layout = pAttachment->prevReferenceLayout; clearInfo.aspect = clearAspect; // Load-op clear only if requested and the first reference isn't a resolve attachment (which will overwrite @@ -541,9 +540,9 @@ Pal::Result RenderPassBuilder::BuildDepthStencilAttachmentReferences( Pal::Result result = Pal::Result::Success; SubpassState* pSubpass = &m_pSubpasses[subpass]; - pSubpass->bindTargets.depthStencil.attachment = VK_ATTACHMENT_UNUSED; - pSubpass->bindTargets.depthStencil.layout.layout = VK_IMAGE_LAYOUT_UNDEFINED; - pSubpass->bindTargets.depthStencil.layout.extraUsage = 0; + pSubpass->bindTargets.depthStencil.attachment = VK_ATTACHMENT_UNUSED; + pSubpass->bindTargets.depthStencil.layout.layout = VK_IMAGE_LAYOUT_UNDEFINED; + pSubpass->bindTargets.depthStencil.layout.extraUsage = 0; if (desc.depthStencilAttachment.attachment != VK_ATTACHMENT_UNUSED) { @@ -551,13 +550,13 @@ Pal::Result RenderPassBuilder::BuildDepthStencilAttachmentReferences( if (reference.attachment != VK_ATTACHMENT_UNUSED) { - RPImageLayout layout = { reference.layout, 0 }; + RPImageLayout layout = { reference.layout, 0 }; - result = TrackAttachmentUsage(subpass, AttachRefDepthStencil, reference.attachment, - layout, &pSubpass->syncTop); + result = TrackAttachmentUsage(subpass, AttachRefDepthStencil, reference.attachment, layout, + &pSubpass->syncTop); - pSubpass->bindTargets.depthStencil.attachment = reference.attachment; - pSubpass->bindTargets.depthStencil.layout = layout; + pSubpass->bindTargets.depthStencil.attachment = reference.attachment; + pSubpass->bindTargets.depthStencil.layout = layout; } } @@ -586,7 +585,7 @@ Pal::Result RenderPassBuilder::BuildInputAttachmentReferences( if (reference.attachment != VK_ATTACHMENT_UNUSED) { - RPImageLayout layout = { reference.layout, 0 }; + RPImageLayout layout = { reference.layout, 0 }; result = TrackAttachmentUsage(subpass, AttachRefInput, reference.attachment, layout, &pSubpass->syncTop); @@ -626,7 +625,8 @@ Pal::Result RenderPassBuilder::BuildResolveAttachmentReferences( if (result == Pal::Result::Success) { - result = TrackAttachmentUsage(subpass, AttachRefResolveDst, dst.attachment, dstLayout, &pSubpass->syncPreResolve); + result = TrackAttachmentUsage(subpass, AttachRefResolveDst, dst.attachment, dstLayout, + &pSubpass->syncPreResolve); } if (result == Pal::Result::Success) @@ -657,8 +657,8 @@ Pal::Result RenderPassBuilder::BuildResolveAttachmentReferences( const AttachmentReference& src = subpassDesc.depthStencilAttachment; const AttachmentReference& dst = subpassDesc.depthStencilResolveAttachment; - const RPImageLayout srcLayout = { src.layout, Pal::LayoutResolveSrc }; - const RPImageLayout dstLayout = { dst.layout, Pal::LayoutResolveDst }; + const RPImageLayout srcLayout = { src.layout, Pal::LayoutResolveSrc }; + const RPImageLayout dstLayout = { dst.layout, Pal::LayoutResolveDst }; result = TrackAttachmentUsage(subpass, AttachRefResolveSrc, src.attachment, srcLayout, &pSubpass->syncPreResolve); @@ -674,18 +674,19 @@ Pal::Result RenderPassBuilder::BuildResolveAttachmentReferences( // If depth stencil resovle attachment will be cleared, using top sync point to guarantee metadata init before clear. SyncPointState* pSync = hasClearOp ? &pSubpass->syncTop : &pSubpass->syncPreResolve; - result = TrackAttachmentUsage(subpass, AttachRefResolveDst, dst.attachment, dstLayout, pSync); + result = TrackAttachmentUsage(subpass, AttachRefResolveDst, dst.attachment, dstLayout, + pSync); } if (result == Pal::Result::Success) { RPResolveInfo resolve = {}; - resolve.src.attachment = src.attachment; - resolve.src.layout = m_pAttachments[src.attachment].prevReferenceLayout; + resolve.src.attachment = src.attachment; + resolve.src.layout = m_pAttachments[src.attachment].prevReferenceLayout; - resolve.dst.attachment = dst.attachment; - resolve.dst.layout = m_pAttachments[dst.attachment].prevReferenceLayout; + resolve.dst.attachment = dst.attachment; + resolve.dst.layout = m_pAttachments[dst.attachment].prevReferenceLayout; result = pSubpass->resolves.PushBack(resolve); @@ -719,7 +720,7 @@ Pal::Result RenderPassBuilder::BuildEndState() // Execute final layout changes. for (uint32_t a = 0; (a < m_attachmentCount) && (result == Pal::Result::Success); ++a) { - const RPImageLayout finalLayout = { m_pAttachments[a].pDesc->finalLayout, 0 }; + const RPImageLayout finalLayout = { m_pAttachments[a].pDesc->finalLayout, 0 }; result = TrackAttachmentUsage( VK_SUBPASS_EXTERNAL, @@ -984,11 +985,11 @@ bool RenderPassBuilder::WritesToAttachment( // This is a general function to track render pass usage of a particular attachment between subpasses. It triggers // automatic layout transitions as well as load-ops when that attachment is first used. Pal::Result RenderPassBuilder::TrackAttachmentUsage( - uint32_t subpass, - AttachRefType refType, - uint32_t attachment, - RPImageLayout layout, - SyncPointState* pSync) + uint32_t subpass, + AttachRefType refType, + uint32_t attachment, + RPImageLayout layout, + SyncPointState* pSync) { Pal::Result result = Pal::Result::Success; @@ -1006,7 +1007,8 @@ Pal::Result RenderPassBuilder::TrackAttachmentUsage( // Detect if an automatic layout transition is needed and insert one to the given sync point if so. Note that // these happen before load ops are triggered (below). - if (pAttachment->prevReferenceLayout != layout) + if ((pAttachment->prevReferenceLayout != layout) + ) { RPTransitionInfo transition = {}; diff --git a/icd/api/renderpass/renderpass_builder.h b/icd/api/renderpass/renderpass_builder.h index 13204d52..799b15d9 100644 --- a/icd/api/renderpass/renderpass_builder.h +++ b/icd/api/renderpass/renderpass_builder.h @@ -182,8 +182,12 @@ class RenderPassBuilder Pal::Result BuildSamplePatternMemoryStore(uint32_t attachment); Pal::Result BuildEndState(); - Pal::Result TrackAttachmentUsage(uint32_t subpass, AttachRefType refType, uint32_t attachment, - RPImageLayout layout, SyncPointState* pSync); + Pal::Result TrackAttachmentUsage( + uint32_t subpass, + AttachRefType refType, + uint32_t attachment, + RPImageLayout layout, + SyncPointState* pSync); void WaitForResolves(SyncPointState* pSync); void WaitForResolvesFromSubpass(uint32_t subpass, SyncPointState* pSync); diff --git a/icd/api/renderpass/renderpass_logger.cpp b/icd/api/renderpass/renderpass_logger.cpp index d9df6d90..e4c451ca 100644 --- a/icd/api/renderpass/renderpass_logger.cpp +++ b/icd/api/renderpass/renderpass_logger.cpp @@ -220,7 +220,7 @@ void RenderPassLogger::LogAttachmentReference( const AttachmentReference& reference) { LogAttachment(reference.attachment); - Log(" in %s", ImageLayoutString(reference.layout, false)); + Log(" in %s, %s", ImageLayoutString(reference.layout, false), ImageLayoutString(reference.stencilLayout, false)); Log(" aspectMask "); LogImageAspectMask(reference.aspectMask, false); } @@ -229,7 +229,11 @@ void RenderPassLogger::LogAttachmentReference( void RenderPassLogger::LogAttachmentReference( const RPAttachmentReference& reference) { - LogAttachment(reference.attachment); Log(" in "); LogImageLayout(reference.layout); + LogAttachment(reference.attachment); + Log(" in "); + LogImageLayout(reference.layout); + Log(", "); + LogImageLayout(reference.stencilLayout); } // ===================================================================================================================== @@ -576,15 +580,15 @@ void RenderPassLogger::LogRenderPassCreateInfo( const AttachmentDescription& desc = info.pAttachments[i]; Log("info.pAttachments[%d] = {\n", i); - Log(" .flags = 0x%x\n", desc.flags); - Log(" .format = "); LogFormat(desc.format, false); Log("\n"); - Log(" .samples = 0x%x\n", desc.samples); - Log(" .loadOp = %s\n", LoadOpString(desc.loadOp)); - Log(" .storeOp = %s\n", StoreOpString(desc.storeOp)); - Log(" .stencilLoadOp = %s\n", LoadOpString(desc.stencilLoadOp)); - Log(" .stencilStoreOp = %s\n", StoreOpString(desc.stencilStoreOp)); - Log(" .initialLayout = %s\n", ImageLayoutString(desc.initialLayout, false)); - Log(" .finalLayout = %s\n", ImageLayoutString(desc.finalLayout, false)); + Log(" .flags = 0x%x\n", desc.flags); + Log(" .format = "); LogFormat(desc.format, false); Log("\n"); + Log(" .samples = 0x%x\n", desc.samples); + Log(" .loadOp = %s\n", LoadOpString(desc.loadOp)); + Log(" .storeOp = %s\n", StoreOpString(desc.storeOp)); + Log(" .stencilLoadOp = %s\n", LoadOpString(desc.stencilLoadOp)); + Log(" .stencilStoreOp = %s\n", StoreOpString(desc.stencilStoreOp)); + Log(" .initialLayout = %s\n", ImageLayoutString(desc.initialLayout, false)); + Log(" .finalLayout = %s\n", ImageLayoutString(desc.finalLayout, false)); Log("}\n"); } @@ -863,7 +867,6 @@ void RenderPassLogger::LogExecuteRPLoadOpClear( Log("%s[%d]:\n", pVar, i); Log(" .attachment = %u\n", clear.attachment); - Log(" .layout = "); LogImageLayout(clear.layout); Log("\n"); Log(" .aspect = "); if (clear.aspect == VK_IMAGE_ASPECT_COLOR_BIT) @@ -974,9 +977,9 @@ void RenderPassLogger::LogExecuteRPSyncPoint( const VkFormat format = m_pInfo->pAttachments[attachment].format; Log( "%s.pTransitions[%d]:\n", pName, i); - Log( " .attachment = "); LogAttachment(attachment); Log("\n"); - Log( " .prevLayout = "); LogImageLayout(tr.prevLayout); Log("\n"); - Log( " .nextLayout = "); LogImageLayout(tr.nextLayout); Log("\n"); + Log( " .attachment = "); LogAttachment(attachment); Log("\n"); + Log( " .prevLayout = "); LogImageLayout(tr.prevLayout); Log("\n"); + Log( " .nextLayout = "); LogImageLayout(tr.nextLayout); Log("\n"); } LogEndSource(); diff --git a/icd/api/renderpass/renderpass_types.h b/icd/api/renderpass/renderpass_types.h index 1873a284..4b4b0272 100644 --- a/icd/api/renderpass/renderpass_types.h +++ b/icd/api/renderpass/renderpass_types.h @@ -63,16 +63,16 @@ struct RPAttachmentReference // Describes information about an automatic layout transition happening inside a render pass instance. struct RPTransitionInfo { - uint32_t attachment; // Attachment being transitioned - RPImageLayout prevLayout; // Previous layout - RPImageLayout nextLayout; // Next layout + uint32_t attachment; // Attachment being transitioned + RPImageLayout prevLayout; // Previous layout + RPImageLayout nextLayout; // Next layout union { struct { uint32_t isInitialLayoutTransition : 1; - uint32_t reserved : 30; + uint32_t reserved : 31; }; uint32_t u32All; } flags; @@ -82,7 +82,6 @@ struct RPTransitionInfo struct RPLoadOpClearInfo { uint32_t attachment; // Attachment to be cleared - RPImageLayout layout; // Layout the attachment is currently in and will be used in after the clear VkImageAspectFlags aspect; // Which image aspects are to be cleared }; diff --git a/icd/api/sqtt/sqtt_layer.cpp b/icd/api/sqtt/sqtt_layer.cpp index 618e59be..1103242f 100644 --- a/icd/api/sqtt/sqtt_layer.cpp +++ b/icd/api/sqtt/sqtt_layer.cpp @@ -41,6 +41,7 @@ #include "include/vk_physical_device.h" #include "include/vk_queue.h" #include "include/vk_instance.h" +#include "include/vk_extensions.h" #include "sqtt/sqtt_layer.h" #include "sqtt/sqtt_mgr.h" @@ -2276,7 +2277,10 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( } // namespace entry #define SQTT_OVERRIDE_ALIAS(entry_name, func_name) \ - pDispatchTable->OverrideEntryPoints()->entry_name = vk::entry::sqtt::func_name + if (pDispatchTable->entry_name##_condition) \ + { \ + pDispatchTable->OverrideEntryPoints()->entry_name = vk::entry::sqtt::func_name; \ + } #define SQTT_OVERRIDE_ENTRY(entry_name) SQTT_OVERRIDE_ALIAS(entry_name, entry_name) diff --git a/icd/api/strings/base_entry_points.txt b/icd/api/strings/base_entry_points.txt index fb9a5823..13595221 100644 --- a/icd/api/strings/base_entry_points.txt +++ b/icd/api/strings/base_entry_points.txt @@ -337,3 +337,6 @@ vkGetCalibratedTimestampsEXT @device @dext(EXT_calibr vkGetPipelineExecutablePropertiesKHR @device @dext(KHR_pipeline_executable_properties) vkGetPipelineExecutableStatisticsKHR @device @dext(KHR_pipeline_executable_properties) vkGetPipelineExecutableInternalRepresentationsKHR @device @dext(KHR_pipeline_executable_properties) + +vkCmdSetLineStippleEXT @device @dext(EXT_line_rasterization) + diff --git a/icd/api/strings/base_extensions.txt b/icd/api/strings/base_extensions.txt index db3697d3..6e22799b 100644 --- a/icd/api/strings/base_extensions.txt +++ b/icd/api/strings/base_extensions.txt @@ -108,3 +108,4 @@ VK_EXT_shader_demote_to_helper_invocation VK_EXT_subgroup_size_control VK_EXT_calibrated_timestamps VK_KHR_pipeline_executable_properties +VK_EXT_line_rasterization diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp index 7c5bf575..40601688 100644 --- a/icd/api/vk_cmdbuffer.cpp +++ b/icd/api/vk_cmdbuffer.cpp @@ -65,39 +65,6 @@ namespace vk namespace { -// ===================================================================================================================== -// This finds the subset of an images subres ranges that need to be transitioned based changes between the source and -// destination layouts. In the event that no layout transitions are required, a single transition is still returned -// to handle cache syncs. -void FindDepthStencilLayoutTransitionRanges( - const Pal::ImageLayout oldLayouts[MaxPalDepthAspectsPerMask], - const Pal::ImageLayout newLayouts[MaxPalDepthAspectsPerMask], - uint32_t* pStartRange, - uint32_t* pNumRangeTransitions) -{ - // Assume the default case that both transitions are required. - uint32_t startRange = 0; - uint32_t numTransitions = MaxPalDepthAspectsPerMask; - - if ((oldLayouts[0].usages == newLayouts[0].usages) && - (oldLayouts[0].engines == newLayouts[0].engines)) - { - // Skip the depth transition - numTransitions--; - - startRange++; - } - else if ((oldLayouts[1].usages == newLayouts[1].usages) && - (oldLayouts[1].engines == newLayouts[1].engines)) - { - // Skip the stencil transition - numTransitions--; - } - - *pStartRange = startRange; - *pNumRangeTransitions = numTransitions; -} - // ===================================================================================================================== // Creates a compatible PAL "clear box" structure from attachment + render area for a renderpass clear. Pal::Box BuildClearBox( @@ -3190,6 +3157,7 @@ void CmdBuffer::ExecuteBarriers( pNextMain->imageInfo.pImage = nullptr; + uint32_t layoutIdx = 0; uint32_t palRangeIdx = 0; uint32_t palRangeCount = 0; Pal::SubresRange palRanges[MaxPalAspectsPerMask]; @@ -3202,25 +3170,28 @@ void CmdBuffer::ExecuteBarriers( palRanges, &palRangeCount); - bool hasDepthAndStencil = ((pImageMemoryBarriers[i].subresourceRange.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) && - (pImageMemoryBarriers[i].subresourceRange.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)); - - // If image has a depth/stencil format with both depth and stencil components, then aspectMask member of - // subresourceRange must include both VK_IMAGE_ASPECT_DEPTH_BIT and VK_IMAGE_ASPECT_STENCIL_BIT - VK_ASSERT(hasDepthAndStencil == pImage->HasDepthAndStencil()); - - if (layoutChanging && hasDepthAndStencil) + if (layoutChanging && Formats::HasStencil(format)) { - // With both depth and stencil, there should be two ranges - VK_ASSERT(palRangeCount == MaxPalDepthAspectsPerMask); - - // Combined depth and stencil images may transition independently based on their layouts, so determine - // the appropriate subset of ranges to transition in case one can be skipped. - FindDepthStencilLayoutTransitionRanges( - oldLayouts, - newLayouts, - &palRangeIdx, - &palRangeCount); + if (palRangeCount == MaxPalDepthAspectsPerMask) + { + // Find the subset of an images subres ranges that need to be transitioned based changes between the + // source and destination layouts. + if ((oldLayouts[0].usages == newLayouts[0].usages) && + (oldLayouts[0].engines == newLayouts[0].engines)) + { + // Skip the depth transition + palRangeCount--; + + palRangeIdx++; + layoutIdx++; + } + else if ((oldLayouts[1].usages == newLayouts[1].usages) && + (oldLayouts[1].engines == newLayouts[1].engines)) + { + // Skip the stencil transition + palRangeCount--; + } + } } VK_ASSERT(palRangeCount > 0 && palRangeCount <= MaxPalAspectsPerMask); @@ -3257,8 +3228,8 @@ void CmdBuffer::ExecuteBarriers( pDestTransition[transitionIdx].dstCacheMask = barrierTransition.dstCacheMask; pDestTransition[transitionIdx].imageInfo.pImage = pImage->PalImage(DefaultDeviceIndex); pDestTransition[transitionIdx].imageInfo.subresRange = palRanges[palRangeIdx]; - pDestTransition[transitionIdx].imageInfo.oldLayout = oldLayouts[transitionIdx]; - pDestTransition[transitionIdx].imageInfo.newLayout = newLayouts[transitionIdx]; + pDestTransition[transitionIdx].imageInfo.oldLayout = oldLayouts[layoutIdx]; + pDestTransition[transitionIdx].imageInfo.newLayout = newLayouts[layoutIdx]; if (pSampleLocationsInfoEXT == nullptr) { @@ -3274,6 +3245,7 @@ void CmdBuffer::ExecuteBarriers( pDestTransition[transitionIdx].imageInfo.pQuadSamplePattern = &pLocations[locationIndex]; } + layoutIdx++; palRangeIdx++; } @@ -4462,8 +4434,7 @@ void CmdBuffer::RPSyncPoint( const RPSyncPointInfo& syncPoint, VirtualStackFrame* pVirtStack) { - const uint32_t barrierOptions = m_pDevice->GetRuntimeSettings().resourceBarrierOptions; - const auto& rpBarrier = syncPoint.barrier; + const auto& rpBarrier = syncPoint.barrier; Pal::BarrierInfo barrier = {}; @@ -4508,8 +4479,11 @@ void CmdBuffer::RPSyncPoint( { const Pal::ImageAspect aspect = attachment.subresRange[sr].startSubres.aspect; + const RPImageLayout nextLayout = + tr.nextLayout; + const Pal::ImageLayout newLayout = attachment.pImage->GetAttachmentLayout( - tr.nextLayout, + nextLayout, aspect, this); @@ -5597,6 +5571,43 @@ void CmdBuffer::DrawIndirectByteCount( } } +// ===================================================================================================================== +void CmdBuffer::SetLineStippleEXT( + const Pal::LineStippleStateParams& params, + uint32_t staticToken) +{ + m_state.allGpuState.lineStipple = params; + + utils::IterateMask deviceGroup(m_cbBeginDeviceMask); + while (deviceGroup.Iterate()) + { + const uint32_t deviceIdx = deviceGroup.Index(); + PalCmdBuffer(deviceIdx)->CmdSetLineStippleState(m_state.allGpuState.lineStipple); + } + + m_state.allGpuState.staticTokens.lineStippleState = staticToken; +} + +// ===================================================================================================================== +void CmdBuffer::SetLineStippleEXT( + uint32_t lineStippleFactor, + uint16_t lineStipplePattern) +{ + // The line stipple factor is adjusted by one (carried over from OpenGL) + m_state.allGpuState.lineStipple.lineStippleScale = (lineStippleFactor - 1); + + // The bit field to describe the stipple pattern + m_state.allGpuState.lineStipple.lineStippleValue = lineStipplePattern; + + utils::IterateMask deviceGroup(m_curDeviceMask); + while (deviceGroup.Iterate()) + { + PalCmdBuffer(deviceGroup.Index())->CmdSetLineStippleState(m_state.allGpuState.lineStipple); + } + + m_state.allGpuState.staticTokens.lineStippleState = DynamicRenderStateToken; +} + // ===================================================================================================================== RenderPassInstanceState::RenderPassInstanceState( PalAllocator* pAllocator) @@ -6503,6 +6514,17 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndirectByteCountEXT( vertexStride); } +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkCmdSetLineStippleEXT( + VkCommandBuffer commandBuffer, + uint32_t lineStippleFactor, + uint16_t lineStipplePattern) +{ + ApiCmdBuffer::ObjectFromHandle(commandBuffer)->SetLineStippleEXT( + lineStippleFactor, + lineStipplePattern); +} + } // namespace entry } // namespace vk diff --git a/icd/api/vk_compute_pipeline.cpp b/icd/api/vk_compute_pipeline.cpp index 51a997b8..e7434a9d 100644 --- a/icd/api/vk_compute_pipeline.cpp +++ b/icd/api/vk_compute_pipeline.cpp @@ -242,6 +242,38 @@ VkResult ComputePipeline::Create( Util::VoidPtrInc(pPalMem, deviceIdx * pipelineSize), &pPalPipeline[deviceIdx]); +#if ICD_GPUOPEN_DEVMODE_BUILD + // Temporarily reinject post Pal pipeline creation (when the internal pipeline hash is available). + // The reinjection cache layer can be linked back into the pipeline cache chain once the + // Vulkan pipeline cache key can be stored (and read back) inside the ELF as metadata. + if ((pDevice->VkInstance()->GetDevModeMgr() != nullptr) && + (palResult == Util::Result::Success)) + { + const auto& info = pPalPipeline[deviceIdx]->GetInfo(); + + palResult = pDevice->GetCompiler(deviceIdx)->RegisterAndLoadReinjectionBinary( + &info.internalPipelineHash, + &cacheId[deviceIdx], + &localPipelineInfo.pipeline.pipelineBinarySize, + &localPipelineInfo.pipeline.pPipelineBinary, + pPipelineCache); + + if (palResult == Util::Result::Success) + { + pPalPipeline[deviceIdx]->Destroy(); + + palResult = pDevice->PalDevice(deviceIdx)->CreateComputePipeline( + localPipelineInfo.pipeline, + Util::VoidPtrInc(pPalMem, deviceIdx * pipelineSize), + &pPalPipeline[deviceIdx]); + } + else if (palResult == Util::Result::NotFound) + { + // If a replacement was not found, proceed with the original + palResult = Util::Result::Success; + } + } +#endif } result = PalToVkResult(palResult); diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index e6c4633c..b66c573e 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -69,6 +69,8 @@ #include "sqtt/sqtt_mgr.h" #include "sqtt/sqtt_rgp_annotations.h" +#include "appopt/async_layer.h" + #include "appopt/barrier_filter_layer.h" #include "appopt/strange_brigade_layer.h" @@ -218,6 +220,7 @@ Device::Device( m_enabledExtensions(enabledExtensions), m_dispatchTable(DispatchTable::Type::DEVICE, m_pInstance, this), m_pSqttMgr(nullptr), + m_pAsyncLayer(nullptr), m_pAppOptLayer(nullptr), m_pBarrierFilterLayer(nullptr), m_allocationSizeTracking(m_settings.memoryDeviceOverallocationAllowed ? false : true), @@ -359,7 +362,7 @@ VkResult Device::Create( { if (!DeviceExtensions::EnableExtensions(pCreateInfo->ppEnabledExtensionNames, pCreateInfo->enabledExtensionCount, - pPhysicalDevice->GetSupportedExtensions(), + pPhysicalDevice->GetAllowedExtensions(), enabledDeviceExtensions)) { return VK_ERROR_EXTENSION_NOT_PRESENT; @@ -501,6 +504,47 @@ VkResult Device::Create( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: { vkResult = VerifyRequestedPhysicalDeviceFeatures( @@ -517,6 +561,76 @@ VkResult Device::Create( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES_KHR: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; + } case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: { @@ -1065,6 +1179,19 @@ VkResult Device::Initialize( } } + if ((result == VK_SUCCESS) && m_settings.enableAsyncCompile) + { + void* pMemory = VkInstance()->AllocMem(sizeof(AsyncLayer), VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + + if (pMemory != nullptr) + { + m_pAsyncLayer = VK_PLACEMENT_NEW(pMemory) AsyncLayer(this); + } + else + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + } if (result == VK_SUCCESS) { result = PalToVkResult(m_memoryMutex.Init()); @@ -1180,6 +1307,12 @@ void Device::InitDispatchTable() { m_pBarrierFilterLayer->OverrideDispatchTable(&m_dispatchTable); } + + // Install the async compile layer if needed + if (m_pAsyncLayer != nullptr) + { + m_pAsyncLayer->OverrideDispatchTable(&m_dispatchTable); + } } // ===================================================================================================================== @@ -1398,6 +1531,13 @@ VkResult Device::Destroy(const VkAllocationCallbacks* pAllocator) VkInstance()->FreeMem(m_pAppOptLayer); } + if (m_pAsyncLayer != nullptr) + { + Util::Destructor(m_pAsyncLayer); + + VkInstance()->FreeMem(m_pAsyncLayer); + } + for (uint32_t i = 0; i < Queue::MaxQueueFamilies; ++i) { for (uint32_t j = 0; (j < Queue::MaxQueuesPerFamily) && (m_pQueues[i][j] != nullptr); ++j) @@ -1500,6 +1640,7 @@ VkResult Device::CreateInternalComputePipeline( // Build shader module result = pCompiler->BuildShaderModule( this, + 0, codeByteSize, pCode, &shaderModule); @@ -2257,6 +2398,8 @@ VkResult Device::GetCalibratedTimestamps( default: // An invalid time domain value was specified. Return error. result = VK_ERROR_OUT_OF_HOST_MEMORY; + pTimestamps[i] = 0; + VK_NEVER_CALLED(); break; } } diff --git a/icd/api/vk_dispatch.cpp b/icd/api/vk_dispatch.cpp index dff282c6..289de7e4 100644 --- a/icd/api/vk_dispatch.cpp +++ b/icd/api/vk_dispatch.cpp @@ -550,8 +550,11 @@ void DispatchTable::Init() INIT_DISPATCH_ENTRY(vkResetQueryPoolEXT ); + INIT_DISPATCH_ENTRY(vkCmdSetLineStippleEXT ); + INIT_DISPATCH_ENTRY(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ); INIT_DISPATCH_ENTRY(vkGetCalibratedTimestampsEXT ); + } // ===================================================================================================================== diff --git a/icd/api/vk_graphics_pipeline.cpp b/icd/api/vk_graphics_pipeline.cpp index 5e33f68a..6f21a64b 100644 --- a/icd/api/vk_graphics_pipeline.cpp +++ b/icd/api/vk_graphics_pipeline.cpp @@ -228,6 +228,7 @@ void GraphicsPipeline::GenerateHashFromRasterizationStateCreateInfo( const VkPipelineRasterizationStateRasterizationOrderAMD* pRasterizationOrder; const VkPipelineRasterizationStateStreamCreateInfoEXT* pStreamCreateInfo; const VkPipelineRasterizationDepthClipStateCreateInfoEXT* pRsDepthClip; + const VkPipelineRasterizationLineStateCreateInfoEXT* pLineState; }; pInfo = static_cast(desc.pNext); @@ -258,6 +259,14 @@ void GraphicsPipeline::GenerateHashFromRasterizationStateCreateInfo( pBaseHasher->Update(pRsDepthClip->depthClipEnable); break; + case VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT: + pBaseHasher->Update(pLineState->lineRasterizationMode); + pBaseHasher->Update(pLineState->stippledLineEnable); + pBaseHasher->Update(pLineState->lineStippleFactor); + pBaseHasher->Update(pLineState->lineStipplePattern); + + break; + default: break; } @@ -614,6 +623,7 @@ void GraphicsPipeline::BuildRasterizationState( const VkPipelineRasterizationStateRasterizationOrderAMD* pRsOrder; const VkPipelineRasterizationConservativeStateCreateInfoEXT* pRsConservative; const VkPipelineRasterizationStateStreamCreateInfoEXT* pRsStream; + const VkPipelineRasterizationLineStateCreateInfoEXT* pRsRasterizationLine; }; // By default rasterization is disabled, unless rasterization creation info is present @@ -716,6 +726,34 @@ void GraphicsPipeline::BuildRasterizationState( pInfo->rasterizationStream = pRsStream->rasterizationStream; } break; + case VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT: + { + pInfo->bresenhamEnable = + (pRsRasterizationLine->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT); + + // Bresenham Lines need axis aligned end caps + if (pInfo->bresenhamEnable) + { + pInfo->pipeline.rsState.perpLineEndCapsEnable = false; + } + else if (pRsRasterizationLine->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT) + { + pInfo->pipeline.rsState.perpLineEndCapsEnable = true; + } + + pInfo->msaa.flags.enableLineStipple = pRsRasterizationLine->stippledLineEnable; + + pInfo->immedInfo.lineStippleParams.lineStippleScale = (pRsRasterizationLine->lineStippleFactor - 1); + pInfo->immedInfo.lineStippleParams.lineStippleValue = pRsRasterizationLine->lineStipplePattern; + + if (pRsRasterizationLine->stippledLineEnable && + (dynamicStateFlags[static_cast(DynamicStatesInternal::LINE_STIPPLE_EXT)] == false)) + { + pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::LINE_STIPPLE_EXT); + } + } + break; + default: // Skip any unknown extension structures break; @@ -825,6 +863,10 @@ void GraphicsPipeline::ConvertGraphicsPipelineInfo( dynamicStateFlags[static_cast(DynamicStatesInternal::SAMPLE_LOCATIONS_EXT)] = true; break; + case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT: + dynamicStateFlags[static_cast(DynamicStatesInternal::LINE_STIPPLE_EXT)] = true; + break; + default: // skip unknown dynamic state break; @@ -917,7 +959,8 @@ void GraphicsPipeline::ConvertGraphicsPipelineInfo( PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) - bool multisampleEnable = (pMs->rasterizationSamples != 1); + bool multisampleEnable = (pMs->rasterizationSamples != 1) && + (pInfo->bresenhamEnable == false); bool customSampleLocations = ((pPipelineSampleLocationsStateCreateInfoEXT != nullptr) && (pPipelineSampleLocationsStateCreateInfoEXT->sampleLocationsEnable)); @@ -1298,6 +1341,39 @@ VkResult GraphicsPipeline::Create( Util::VoidPtrInc(pSystemMem, palOffset), &pPalPipeline[deviceIdx]); +#if ICD_GPUOPEN_DEVMODE_BUILD + // Temporarily reinject post Pal pipeline creation (when the internal pipeline hash is available). + // The reinjection cache layer can be linked back into the pipeline cache chain once the + // Vulkan pipeline cache key can be stored (and read back) inside the ELF as metadata. + if ((pDevice->VkInstance()->GetDevModeMgr() != nullptr) && + (palResult == Util::Result::Success)) + { + const auto& info = pPalPipeline[deviceIdx]->GetInfo(); + + palResult = pDevice->GetCompiler(deviceIdx)->RegisterAndLoadReinjectionBinary( + &info.internalPipelineHash, + &cacheId[deviceIdx], + &localPipelineInfo.pipeline.pipelineBinarySize, + &localPipelineInfo.pipeline.pPipelineBinary, + pPipelineCache); + + if (palResult == Util::Result::Success) + { + pPalPipeline[deviceIdx]->Destroy(); + + palResult = pPalDevice->CreateGraphicsPipeline( + localPipelineInfo.pipeline, + Util::VoidPtrInc(pSystemMem, palOffset), + &pPalPipeline[deviceIdx]); + } + else if (palResult == Util::Result::NotFound) + { + // If a replacement was not found, proceed with the original + palResult = Util::Result::Success; + } + } +#endif + VK_ASSERT(palSize == pPalDevice->GetGraphicsPipelineSize(localPipelineInfo.pipeline, nullptr)); palOffset += palSize; } @@ -1488,6 +1564,7 @@ void GraphicsPipeline::CreateStaticState() pStaticTokens->scissorRect = DynamicRenderStateToken; pStaticTokens->samplePattern = DynamicRenderStateToken; pStaticTokens->waveLimits = DynamicRenderStateToken; + pStaticTokens->lineStippleState = DynamicRenderStateToken; if (ContainsStaticState(DynamicStatesInternal::LINE_WIDTH)) { @@ -1525,6 +1602,11 @@ void GraphicsPipeline::CreateStaticState() pStaticTokens->samplePattern = pCache->CreateSamplePattern(m_info.samplePattern); } + if (ContainsStaticState(DynamicStatesInternal::LINE_STIPPLE_EXT)) + { + pStaticTokens->lineStippleState = pCache->CreateLineStipple(m_info.lineStippleParams); + } + } // ===================================================================================================================== @@ -1698,6 +1780,13 @@ void GraphicsPipeline::BindToCmdBuffer( pRenderState->allGpuState.staticTokens.pointLineRasterState = newTokens.pointLineRasterState; } + if (ContainsStaticState(DynamicStatesInternal::LINE_STIPPLE_EXT) && + CmdBuffer::IsStaticStateDifferent(oldTokens.lineStippleState, newTokens.lineStippleState)) + { + pPalCmdBuf->CmdSetLineStippleState(m_info.lineStippleParams); + pRenderState->allGpuState.staticTokens.lineStippleState = newTokens.lineStippleState; + } + if (ContainsStaticState(DynamicStatesInternal::DEPTH_BIAS) && CmdBuffer::IsStaticStateDifferent(oldTokens.depthBiasState, newTokens.depthBias)) { diff --git a/icd/api/vk_image.cpp b/icd/api/vk_image.cpp index 4316cfb0..7d53246d 100644 --- a/icd/api/vk_image.cpp +++ b/icd/api/vk_image.cpp @@ -207,12 +207,11 @@ Image::Image( } // ===================================================================================================================== -static VkResult ConvertImageCreateInfo( +static void ConvertImageCreateInfo( const Device* pDevice, const VkImageCreateInfo* pCreateInfo, Pal::ImageCreateInfo* pPalCreateInfo) { - VkResult result = VK_SUCCESS; VkImageUsageFlags imageUsage = pCreateInfo->usage; const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); @@ -270,7 +269,6 @@ static VkResult ConvertImageCreateInfo( // regarding DCC. pPalCreateInfo->flags.perSubresInit = 1; - return result; } // ===================================================================================================================== @@ -449,6 +447,7 @@ VkResult Image::Create( const uint32_t numDevices = pDevice->NumPalDevices(); const bool isSparse = (pCreateInfo->flags & SparseEnablingFlags) != 0; + const bool hasDepthStencilAspect = Formats::IsDepthStencilFormat(pCreateInfo->format); VkResult result = VkResult::VK_SUCCESS; // It indicates the stencil aspect will be read by shader, so it is only meaningful if the image contains the @@ -478,7 +477,11 @@ VkResult Image::Create( { VK_ASSERT(pCreateInfo == pVkImageCreateInfo); pImageCreateInfo = pVkImageCreateInfo; - result = ConvertImageCreateInfo(pDevice, pImageCreateInfo, &palCreateInfo); + ConvertImageCreateInfo(pDevice, pImageCreateInfo, &palCreateInfo); + + // Fail image creation if the sample count is not supported based on the setting + result = ((settings.limitSampleCounts & pImageCreateInfo->samples) != 0) ? VK_SUCCESS + : VK_ERROR_OUT_OF_HOST_MEMORY; // The setting of stencilShaderRead will be overrode, if // VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO_EXT exists. @@ -561,6 +564,16 @@ VkResult Image::Create( } } + // When the VK image is sharable, the depthStencil PAL usage flag must be set in order for the underlying + // surface to be depth/stencil (and not color). Otherwise, the image cannot be shared with OpenGL. Core + // OpenGL does not allow for texture usage to be specified, thus all textures with a depth/stencil aspect + // result in depth/stencil surfaces. + if ((hasDepthStencilAspect && imageFlags.externallyShareable) && + (imageFlags.externalD3DHandle == false)) + { + palCreateInfo.usageFlags.depthStencil = true; + } + Util::AutoBuffer palFormatList( viewFormatCount, pDevice->VkInstance()->Allocator()); @@ -664,7 +677,7 @@ VkResult Image::Create( VK_ASSERT(((pImageCreateInfo->flags & VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT) == 0) || (pImageCreateInfo->imageType == VK_IMAGE_TYPE_3D)); - if (imageFlags.androidPresentable) + if ((result == VK_SUCCESS) && (imageFlags.androidPresentable)) { VkDeviceMemory pDeviceMemory = {}; result = Image::CreatePresentableImage( diff --git a/icd/api/vk_memory.cpp b/icd/api/vk_memory.cpp index 4f824c91..b1aad6a2 100644 --- a/icd/api/vk_memory.cpp +++ b/icd/api/vk_memory.cpp @@ -633,7 +633,8 @@ VkResult Memory::OpenExternalSharedImage( palOpenInfo.resourceInfo.flags.ntHandle = importInfo.isNtHandle; Pal::Result palResult = Pal::Result::Success; - if (importInfo.handle == 0) + const bool openedViaName = (importInfo.handle == 0); + if (openedViaName) { } @@ -749,7 +750,8 @@ Memory::Memory( m_allocationCounted(false), m_sizeAccountedForDeviceMask(0), m_pExternalPalImage(nullptr), - m_primaryDeviceIndex(primaryIndex) + m_primaryDeviceIndex(primaryIndex), + m_sharedGpuMemoryHandle(0) { // PAL info is not available for memory objects allocated for presentable images memset(&m_info, 0, sizeof(m_info)); @@ -843,7 +845,9 @@ VkResult Memory::OpenExternalMemory( VK_ASSERT(ppMemory != nullptr); const uint32_t allocationMask = (1 << DefaultMemoryInstanceIdx); - if (importInfo.handle == 0) + const bool openedViaName = (importInfo.handle == 0); + + if (openedViaName) { } else diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index 66444de4..c09acdca 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -61,6 +61,7 @@ #include "palMsaaState.h" #include "palScreen.h" #include "palHashLiteralString.h" +#include #include #undef max @@ -257,6 +258,7 @@ PhysicalDevice::PhysicalDevice( m_appProfile(appProfile), m_prtOnDmaSupported(true), m_supportedExtensions(), + m_allowedExtensions(), m_compiler(this) { memset(&m_limits, 0, sizeof(m_limits)); @@ -376,7 +378,17 @@ static void GetFormatFeatureFlags( } } - if (!Formats::IsDepthStencilFormat(format)) + if (Formats::IsDepthStencilFormat(format)) + { + if (imageTiling == VK_IMAGE_TILING_LINEAR) + { + retFlags = static_cast(0); + } + + retFlags &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; + retFlags &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + } + else { retFlags &= ~VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT; } @@ -649,6 +661,39 @@ VkResult PhysicalDevice::Initialize() } } + // Add device coherent memory type based on memory types which have been added in m_memoryProperties.memoryTypes + // In PAL, uncached device memory, which is always device coherent, will be allocated. + if (m_properties.gfxipProperties.flags.supportGl2Uncached) + { + uint32_t currentTypeCount = m_memoryProperties.memoryTypeCount; + for (uint32_t memoryTypeIndex = 0; memoryTypeIndex < currentTypeCount; ++memoryTypeIndex) + { + VkMemoryType& currentmemoryType = m_memoryProperties.memoryTypes[memoryTypeIndex]; + VkMemoryType& lastMemoryType = m_memoryProperties.memoryTypes[m_memoryProperties.memoryTypeCount]; + + // Add device coherent memory type based on below type: + // 1. Visible and host coherent + // 2. Invisible + if (((currentmemoryType.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && + (currentmemoryType.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) || + (currentmemoryType.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) + { + lastMemoryType.heapIndex = currentmemoryType.heapIndex; + lastMemoryType.propertyFlags = currentmemoryType.propertyFlags | + VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD | + VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD; + + m_memoryVkIndexToPalHeap[m_memoryProperties.memoryTypeCount] = + m_memoryVkIndexToPalHeap[memoryTypeIndex]; + m_memoryPalHeapToVkIndexBits[m_memoryVkIndexToPalHeap[m_memoryProperties.memoryTypeCount]] |= + (1UL << memoryTypeIndex); + + m_memoryTypeMask |= 1 << m_memoryProperties.memoryTypeCount; + ++m_memoryProperties.memoryTypeCount; + } + } + } + VK_ASSERT(m_memoryProperties.memoryTypeCount <= VK_MAX_MEMORY_TYPES); VK_ASSERT(m_memoryProperties.memoryHeapCount <= Pal::GpuHeapCount); } @@ -826,6 +871,8 @@ void PhysicalDevice::PopulateFormatProperties() void PhysicalDevice::PopulateExtensions() { m_supportedExtensions = GetAvailableExtensions(VkInstance(), this); + m_allowedExtensions = m_supportedExtensions; + } // ===================================================================================================================== @@ -1066,6 +1113,7 @@ VkResult PhysicalDevice::GetImageFormatProperties( memset(pImageFormatProperties, 0, sizeof(VkImageFormatProperties)); const auto& imageProps = PalProperties().imageProperties; + const RuntimeSettings& settings = m_pSettingsLoader->GetSettings(); Pal::SwizzledFormat palFormat = VkToPalFormat(format); @@ -1275,7 +1323,8 @@ VkResult PhysicalDevice::GetImageFormatProperties( } else { - pImageFormatProperties->sampleCounts = MaxSampleCountToSampleCountFlags(Pal::MaxMsaaFragments); + pImageFormatProperties->sampleCounts = MaxSampleCountToSampleCountFlags(Pal::MaxMsaaFragments) & + settings.limitSampleCounts; } pImageFormatProperties->maxExtent.width = imageProps.maxDimensions.width; @@ -2195,6 +2244,12 @@ void PhysicalDevice::PopulateLimits() VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT; + // framebufferColorSampleCounts, framebufferDepthSampleCounts, framebufferStencilSampleCounts and + // framebufferNoAttachmentSampleCounts are already clamped by the setting in GetImageFormatProperties() in + // GetMaxFormatSampleCount() above. However, because the value of framebufferColorSampleCounts is + // hardcoded above, we are limiting it according to the setting again. + m_limits.framebufferColorSampleCounts &= settings.limitSampleCounts; + m_sampleLocationSampleCounts = m_limits.framebufferColorSampleCounts; if (m_properties.gfxipProperties.flags.support1xMsaaSampleLocations == false) @@ -2264,10 +2319,11 @@ void PhysicalDevice::PopulateLimits() minStorageCount = (minStorageCount == UINT_MAX) ? 0 : minStorageCount; // This is a sanity check on the above logic. - VK_ASSERT(minSampledCount == 8); - VK_ASSERT(minSampledIntCount == 8); - VK_ASSERT(minSampledDepthCount == 8); - VK_ASSERT(minSampledStencilCount == 8); + // Make sure that the sample count masks in the settings were not set when checking this. + VK_ASSERT((settings.limitSampleCounts != 0xFFFFFFFF) || minSampledCount == 8); + VK_ASSERT((settings.limitSampleCounts != 0xFFFFFFFF) || minSampledIntCount == 8); + VK_ASSERT((settings.limitSampleCounts != 0xFFFFFFFF) || minSampledDepthCount == 8); + VK_ASSERT((settings.limitSampleCounts != 0xFFFFFFFF) || minSampledStencilCount == 8); // Sample counts supported for all non-integer, integer, depth, and stencil sampled images, respectively m_limits.sampledImageColorSampleCounts = MaxSampleCountToSampleCountFlags(minSampledCount); @@ -3022,6 +3078,8 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_CREATE_RENDERPASS2)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_CALIBRATED_TIMESTAMPS)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_HDR_METADATA)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(AMD_SHADER_INFO)); @@ -3108,10 +3166,18 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_HOST_QUERY_RESET)); + if ((pPhysicalDevice == nullptr) || + (pPhysicalDevice->PalProperties().gfxipProperties.flags.supportGl2Uncached)) + { + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(AMD_DEVICE_COHERENT_MEMORY)); + } + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_UNIFORM_BUFFER_STANDARD_LAYOUT)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_SUBGROUP_SIZE_CONTROL)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_LINE_RASTERIZATION)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_IMAGELESS_FRAMEBUFFER)); return availableExtensions; @@ -3307,7 +3373,6 @@ VkResult PhysicalDevice::EnumerateExtensionProperties( ) const { VkResult result = VK_SUCCESS; - const DeviceExtensions::Supported& supportedExtensions = GetSupportedExtensions(); const uint32_t extensionCount = supportedExtensions.GetExtensionCount(); @@ -3446,6 +3511,194 @@ void PhysicalDevice::GetPhysicalDeviceIDProperties( strlen(pDriverUuidString)); } +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceMaintenance3Properties( + uint32_t* pMaxPerSetDescriptors, + VkDeviceSize* pMaxMemoryAllocationSize + ) const +{ + // We don't have limits on number of desc sets + *pMaxPerSetDescriptors = UINT32_MAX; + + // Return 2GB in bytes as max allocation size + *pMaxMemoryAllocationSize = 2u * 1024u * 1024u * 1024u; +} + +// ==================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceMultiviewProperties( + uint32_t* pMaxMultiviewViewCount, + uint32_t* pMaxMultiviewInstanceIndex + ) const +{ + *pMaxMultiviewViewCount = Pal::MaxViewInstanceCount; + *pMaxMultiviewInstanceIndex = UINT_MAX; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDevicePointClippingProperties( + VkPointClippingBehavior* pPointClippingBehavior + ) const +{ + // Points are clipped when their centers fall outside the clip volume, i.e. the desktop GL behavior. + *pPointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceProtectedMemoryProperties( + VkBool32* pProtectedNoFault + ) const +{ + *pProtectedNoFault = VK_FALSE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceSubgroupProperties( + uint32_t* pSubgroupSize, + VkShaderStageFlags* pSupportedStages, + VkSubgroupFeatureFlags* pSupportedOperations, + VkBool32* pQuadOperationsInAllStages + ) const +{ + *pSubgroupSize = GetSubgroupSize(); + + *pSupportedStages = VK_SHADER_STAGE_VERTEX_BIT | + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | + VK_SHADER_STAGE_GEOMETRY_BIT | + VK_SHADER_STAGE_FRAGMENT_BIT | + VK_SHADER_STAGE_COMPUTE_BIT; + + *pSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | + VK_SUBGROUP_FEATURE_VOTE_BIT | + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | + VK_SUBGROUP_FEATURE_BALLOT_BIT | + VK_SUBGROUP_FEATURE_SHUFFLE_BIT | + VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT | + VK_SUBGROUP_FEATURE_QUAD_BIT; + + *pQuadOperationsInAllStages = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceDriverProperties( + VkDriverIdKHR* pDriverID, + char* pDriverName, + char* pDriverInfo, + VkConformanceVersionKHR* pConformanceVersion + ) const +{ + *pDriverID = VULKAN_DRIVER_ID; + + Util::Strncpy(pDriverName, VULKAN_DRIVER_NAME_STR, VK_MAX_DRIVER_NAME_SIZE_KHR); + Util::Strncpy(pDriverInfo, VULKAN_DRIVER_INFO_STR, VK_MAX_DRIVER_INFO_SIZE_KHR); + + pConformanceVersion->major = CTS_VERSION_MAJOR; + pConformanceVersion->minor = CTS_VERSION_MINOR; + pConformanceVersion->subminor = CTS_VERSION_SUBMINOR; + pConformanceVersion->patch = CTS_VERSION_PATCH; +} + +// ===================================================================================================================== +template +void PhysicalDevice::GetPhysicalDeviceFloatControlsProperties( + T pFloatControlsProperties + ) const +{ + pFloatControlsProperties->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR; + pFloatControlsProperties->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR; + + pFloatControlsProperties->shaderSignedZeroInfNanPreserveFloat32 = VK_TRUE; + pFloatControlsProperties->shaderSignedZeroInfNanPreserveFloat64 = VK_TRUE; + + pFloatControlsProperties->shaderDenormPreserveFloat32 = VK_TRUE; + pFloatControlsProperties->shaderDenormPreserveFloat64 = VK_TRUE; + + pFloatControlsProperties->shaderDenormFlushToZeroFloat32 = VK_TRUE; + pFloatControlsProperties->shaderDenormFlushToZeroFloat64 = VK_TRUE; + + pFloatControlsProperties->shaderRoundingModeRTEFloat32 = VK_TRUE; + pFloatControlsProperties->shaderRoundingModeRTEFloat64 = VK_TRUE; + + pFloatControlsProperties->shaderRoundingModeRTZFloat32 = VK_TRUE; + pFloatControlsProperties->shaderRoundingModeRTZFloat64 = VK_TRUE; + + if (PalProperties().gfxipProperties.flags.supportDoubleRate16BitInstructions) + { + pFloatControlsProperties->shaderSignedZeroInfNanPreserveFloat16 = VK_TRUE; + pFloatControlsProperties->shaderDenormPreserveFloat16 = VK_TRUE; + pFloatControlsProperties->shaderDenormFlushToZeroFloat16 = VK_TRUE; + pFloatControlsProperties->shaderRoundingModeRTEFloat16 = VK_TRUE; + pFloatControlsProperties->shaderRoundingModeRTZFloat16 = VK_TRUE; + } + else + { + pFloatControlsProperties->shaderSignedZeroInfNanPreserveFloat16 = VK_FALSE; + pFloatControlsProperties->shaderDenormPreserveFloat16 = VK_FALSE; + pFloatControlsProperties->shaderDenormFlushToZeroFloat16 = VK_FALSE; + pFloatControlsProperties->shaderRoundingModeRTEFloat16 = VK_FALSE; + pFloatControlsProperties->shaderRoundingModeRTZFloat16 = VK_FALSE; + } +} + +// ===================================================================================================================== +template +void PhysicalDevice::GetPhysicalDeviceDescriptorIndexingProperties( + T pDescriptorIndexingProperties + ) const +{ + pDescriptorIndexingProperties->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX; + pDescriptorIndexingProperties->shaderUniformBufferArrayNonUniformIndexingNative = VK_FALSE; + pDescriptorIndexingProperties->shaderSampledImageArrayNonUniformIndexingNative = VK_FALSE; + pDescriptorIndexingProperties->shaderStorageBufferArrayNonUniformIndexingNative = VK_FALSE; + pDescriptorIndexingProperties->shaderStorageImageArrayNonUniformIndexingNative = VK_FALSE; + pDescriptorIndexingProperties->shaderInputAttachmentArrayNonUniformIndexingNative = VK_FALSE; + pDescriptorIndexingProperties->robustBufferAccessUpdateAfterBind = VK_FALSE; + pDescriptorIndexingProperties->quadDivergentImplicitLod = VK_FALSE; + pDescriptorIndexingProperties->maxPerStageDescriptorUpdateAfterBindSamplers = UINT32_MAX; + pDescriptorIndexingProperties->maxPerStageDescriptorUpdateAfterBindUniformBuffers = UINT32_MAX; + pDescriptorIndexingProperties->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX; + pDescriptorIndexingProperties->maxPerStageDescriptorUpdateAfterBindSampledImages = UINT32_MAX; + pDescriptorIndexingProperties->maxPerStageDescriptorUpdateAfterBindStorageImages = UINT32_MAX; + pDescriptorIndexingProperties->maxPerStageDescriptorUpdateAfterBindInputAttachments = UINT32_MAX; + pDescriptorIndexingProperties->maxPerStageUpdateAfterBindResources = UINT32_MAX; + pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindSamplers = UINT32_MAX; + pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindUniformBuffers = UINT32_MAX; + pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MaxDynamicUniformDescriptors; + pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX; + pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MaxDynamicStorageDescriptors; + pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindSampledImages = UINT32_MAX; + pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindStorageImages = UINT32_MAX; + pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindInputAttachments = UINT32_MAX; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceDepthStencilResolveProperties( + VkResolveModeFlagsKHR* pSupportedDepthResolveModes, + VkResolveModeFlagsKHR* pSupportedStencilResolveModes, + VkBool32* pIndependentResolveNone, + VkBool32* pIndependentResolve + ) const +{ + *pSupportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR | + VK_RESOLVE_MODE_MIN_BIT_KHR | + VK_RESOLVE_MODE_MAX_BIT_KHR; + *pSupportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR | + VK_RESOLVE_MODE_MIN_BIT_KHR | + VK_RESOLVE_MODE_MAX_BIT_KHR; + *pIndependentResolveNone = VK_TRUE; + *pIndependentResolve = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceSamplerFilterMinmaxProperties( + VkBool32* pFilterMinmaxSingleComponentFormats, + VkBool32* pFilterMinmaxImageComponentMapping + ) const +{ + *pFilterMinmaxSingleComponentFormats = VK_TRUE; + *pFilterMinmaxImageComponentMapping = IsPerChannelMinMaxFilteringSupported(); +} + // ===================================================================================================================== VkResult PhysicalDevice::GetExternalMemoryProperties( bool isSparse, @@ -3492,6 +3745,216 @@ VkResult PhysicalDevice::GetExternalMemoryProperties( return result; } +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDevice16BitStorageFeatures( + VkBool32* pStorageBuffer16BitAccess, + VkBool32* pUniformAndStorageBuffer16BitAccess, + VkBool32* pStoragePushConstant16, + VkBool32* pStorageInputOutput16 + ) const +{ + // We support 16-bit buffer load/store on all ASICs + *pStorageBuffer16BitAccess = VK_TRUE; + *pUniformAndStorageBuffer16BitAccess = VK_TRUE; + + // We don't plan to support 16-bit push constants + *pStoragePushConstant16 = VK_FALSE; + + // Currently we seem to only support 16-bit inputs/outputs on ASICs supporting + // 16-bit ALU. It's unclear at this point whether we can do any better. + if (PalProperties().gfxipProperties.flags.support16BitInstructions && + ((GetRuntimeSettings().optOnlyEnableFP16ForGfx9Plus == false) || + (PalProperties().gfxLevel >= Pal::GfxIpLevel::GfxIp9))) + { + *pStorageInputOutput16 = VK_TRUE; + } + else + { + *pStorageInputOutput16 = VK_FALSE; + } +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceMultiviewFeatures( + VkBool32* pMultiview, + VkBool32* pMultiviewGeometryShader, + VkBool32* pMultiviewTessellationShader + ) const +{ + *pMultiview = VK_TRUE; + *pMultiviewGeometryShader = VK_FALSE; + *pMultiviewTessellationShader = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceVariablePointerFeatures( + VkBool32* pVariablePointersStorageBuffer, + VkBool32* pVariablePointers + ) const +{ + *pVariablePointers = VK_TRUE; + *pVariablePointersStorageBuffer = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceProtectedMemoryFeatures( + VkBool32* pProtectedMemory + ) const +{ + *pProtectedMemory = VK_FALSE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceSamplerYcbcrConversionFeatures( + VkBool32* pSamplerYcbcrConversion + ) const +{ + *pSamplerYcbcrConversion = VK_FALSE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceShaderDrawParameterFeatures( + VkBool32* pShaderDrawParameters + ) const +{ + *pShaderDrawParameters = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDevice8BitStorageFeatures( + VkBool32* pStorageBuffer8BitAccess, + VkBool32* pUniformAndStorageBuffer8BitAccess, + VkBool32* pStoragePushConstant8 + ) const +{ + *pStorageBuffer8BitAccess = VK_TRUE; + *pUniformAndStorageBuffer8BitAccess = VK_TRUE; + + // We don't plan to support 8-bit push constants + *pStoragePushConstant8 = VK_FALSE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceShaderAtomicInt64Features( + VkBool32* pShaderBufferInt64Atomics, + VkBool32* pShaderSharedInt64Atomics + ) const +{ + *pShaderBufferInt64Atomics = VK_TRUE; + *pShaderSharedInt64Atomics = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceFloat16Int8Features( + VkBool32* pShaderFloat16, + VkBool32* pShaderInt8 + ) const +{ + *pShaderFloat16 = PalProperties().gfxipProperties.flags.supportDoubleRate16BitInstructions ? VK_TRUE : VK_FALSE; + *pShaderInt8 = VK_TRUE; +} + +// ===================================================================================================================== +template +void PhysicalDevice::GetPhysicalDeviceDescriptorIndexingFeatures( + T pDescriptorIndexingFeatures + ) const +{ + pDescriptorIndexingFeatures->shaderInputAttachmentArrayDynamicIndexing = VK_FALSE; + pDescriptorIndexingFeatures->shaderUniformTexelBufferArrayDynamicIndexing = VK_TRUE; + pDescriptorIndexingFeatures->shaderStorageTexelBufferArrayDynamicIndexing = VK_TRUE; + pDescriptorIndexingFeatures->shaderUniformBufferArrayNonUniformIndexing = VK_TRUE; + pDescriptorIndexingFeatures->shaderSampledImageArrayNonUniformIndexing = VK_TRUE; + pDescriptorIndexingFeatures->shaderStorageBufferArrayNonUniformIndexing = VK_TRUE; + pDescriptorIndexingFeatures->shaderStorageImageArrayNonUniformIndexing = VK_TRUE; + pDescriptorIndexingFeatures->shaderInputAttachmentArrayNonUniformIndexing = VK_FALSE; + pDescriptorIndexingFeatures->shaderUniformTexelBufferArrayNonUniformIndexing = VK_TRUE; + pDescriptorIndexingFeatures->shaderStorageTexelBufferArrayNonUniformIndexing = VK_TRUE; + pDescriptorIndexingFeatures->descriptorBindingUniformBufferUpdateAfterBind = VK_TRUE; + pDescriptorIndexingFeatures->descriptorBindingSampledImageUpdateAfterBind = VK_TRUE; + pDescriptorIndexingFeatures->descriptorBindingStorageImageUpdateAfterBind = VK_TRUE; + pDescriptorIndexingFeatures->descriptorBindingStorageBufferUpdateAfterBind = VK_TRUE; + pDescriptorIndexingFeatures->descriptorBindingUniformTexelBufferUpdateAfterBind = VK_TRUE; + pDescriptorIndexingFeatures->descriptorBindingStorageTexelBufferUpdateAfterBind = VK_TRUE; + pDescriptorIndexingFeatures->descriptorBindingUpdateUnusedWhilePending = VK_TRUE; + pDescriptorIndexingFeatures->descriptorBindingPartiallyBound = VK_TRUE; + pDescriptorIndexingFeatures->descriptorBindingVariableDescriptorCount = VK_TRUE; + pDescriptorIndexingFeatures->runtimeDescriptorArray = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceScalarBlockLayoutFeatures( + VkBool32* pScalarBlockLayout + ) const +{ + *pScalarBlockLayout = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceImagelessFramebufferFeatures( + VkBool32* pImagelessFramebuffer + ) const +{ + *pImagelessFramebuffer = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceUniformBufferStandardLayoutFeatures( + VkBool32* pUniformBufferStandardLayout + ) const +{ + *pUniformBufferStandardLayout = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceSubgroupExtendedTypesFeatures( + VkBool32* pShaderSubgroupExtendedTypes + ) const +{ + *pShaderSubgroupExtendedTypes = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceSeparateDepthStencilLayoutsFeatures( + VkBool32* pSeparateDepthStencilLayouts + ) const +{ + *pSeparateDepthStencilLayouts = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceHostQueryResetFeatures( + VkBool32* pHostQueryReset + ) const +{ + *pHostQueryReset = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceBufferAddressFeatures( + VkBool32* pBufferDeviceAddress, + VkBool32* pBufferDeviceAddressCaptureReplay, + VkBool32* pBufferDeviceAddressMultiDevice + ) const +{ + *pBufferDeviceAddress = VK_TRUE; + *pBufferDeviceAddressCaptureReplay = VK_FALSE; + *pBufferDeviceAddressMultiDevice = VK_FALSE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceVulkanMemoryModelFeatures( + VkBool32* pVulkanMemoryModel, + VkBool32* pVulkanMemoryModelDeviceScope, + VkBool32* pVulkanMemoryModelAvailabilityVisibilityChains +) const +{ + *pVulkanMemoryModel = VK_TRUE; + *pVulkanMemoryModelDeviceScope = VK_TRUE; + *pVulkanMemoryModelAvailabilityVisibilityChains = VK_FALSE; + +} + // ===================================================================================================================== // Retrieve device feature support. Called in response to vkGetPhysicalDeviceFeatures2 // NOTE: Don't memset here. Otherwise, VerifyRequestedPhysicalDeviceFeatures needs to compare member by member @@ -3518,25 +3981,11 @@ void PhysicalDevice::GetFeatures2( VkPhysicalDevice16BitStorageFeatures* pStorageFeatures = reinterpret_cast(pHeader); - // We support 16-bit buffer load/store on all ASICs - pStorageFeatures->storageBuffer16BitAccess = VK_TRUE; - pStorageFeatures->uniformAndStorageBuffer16BitAccess = VK_TRUE; - - // We don't plan to support 16-bit push constants - pStorageFeatures->storagePushConstant16 = VK_FALSE; - - // Currently we seem to only support 16-bit inputs/outputs on ASICs supporting - // 16-bit ALU. It's unclear at this point whether we can do any better. - if (PalProperties().gfxipProperties.flags.support16BitInstructions && - ((GetRuntimeSettings().optOnlyEnableFP16ForGfx9Plus == false) || - (PalProperties().gfxLevel >= Pal::GfxIpLevel::GfxIp9))) - { - pStorageFeatures->storageInputOutput16 = VK_TRUE; - } - else - { - pStorageFeatures->storageInputOutput16 = VK_FALSE; - } + GetPhysicalDevice16BitStorageFeatures( + &pStorageFeatures->storageBuffer16BitAccess, + &pStorageFeatures->uniformAndStorageBuffer16BitAccess, + &pStorageFeatures->storagePushConstant16, + &pStorageFeatures->storageInputOutput16); break; } @@ -3544,11 +3993,11 @@ void PhysicalDevice::GetFeatures2( { VkPhysicalDevice8BitStorageFeaturesKHR* pStorageFeatures = reinterpret_cast(pHeader); - pStorageFeatures->storageBuffer8BitAccess = VK_TRUE; - pStorageFeatures->uniformAndStorageBuffer8BitAccess = VK_TRUE; - // We don't plan to support 8-bit push constants - pStorageFeatures->storagePushConstant8 = VK_FALSE; + GetPhysicalDevice8BitStorageFeatures( + &pStorageFeatures->storageBuffer8BitAccess, + &pStorageFeatures->uniformAndStorageBuffer8BitAccess, + &pStorageFeatures->storagePushConstant8); break; } @@ -3558,8 +4007,9 @@ void PhysicalDevice::GetFeatures2( VkPhysicalDeviceShaderAtomicInt64FeaturesKHR* pShaderAtomicInt64Features = reinterpret_cast(pHeader); - pShaderAtomicInt64Features->shaderBufferInt64Atomics = VK_TRUE; - pShaderAtomicInt64Features->shaderSharedInt64Atomics = VK_TRUE; + GetPhysicalDeviceShaderAtomicInt64Features( + &pShaderAtomicInt64Features->shaderBufferInt64Atomics, + &pShaderAtomicInt64Features->shaderSharedInt64Atomics); break; } @@ -3581,7 +4031,8 @@ void PhysicalDevice::GetFeatures2( VkPhysicalDeviceSamplerYcbcrConversionFeatures* pSamplerYcbcrConversionFeatures = reinterpret_cast(pHeader); - pSamplerYcbcrConversionFeatures->samplerYcbcrConversion = VK_FALSE; + GetPhysicalDeviceSamplerYcbcrConversionFeatures( + &pSamplerYcbcrConversionFeatures->samplerYcbcrConversion); break; } @@ -3591,8 +4042,10 @@ void PhysicalDevice::GetFeatures2( VkPhysicalDeviceVariablePointerFeatures* pVariablePointerFeatures = reinterpret_cast(pHeader); - pVariablePointerFeatures->variablePointers = VK_TRUE; - pVariablePointerFeatures->variablePointersStorageBuffer = VK_TRUE; + GetPhysicalDeviceVariablePointerFeatures( + &pVariablePointerFeatures->variablePointersStorageBuffer, + &pVariablePointerFeatures->variablePointers); + break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: @@ -3600,7 +4053,7 @@ void PhysicalDevice::GetFeatures2( VkPhysicalDeviceProtectedMemoryFeatures* pProtectedMemory = reinterpret_cast(pHeader); - pProtectedMemory->protectedMemory = VK_FALSE; + GetPhysicalDeviceProtectedMemoryFeatures(&pProtectedMemory->protectedMemory); break; } @@ -3610,9 +4063,10 @@ void PhysicalDevice::GetFeatures2( VkPhysicalDeviceMultiviewFeatures* pMultiviewFeatures = reinterpret_cast(pHeader); - pMultiviewFeatures->multiview = VK_TRUE; - pMultiviewFeatures->multiviewGeometryShader = VK_FALSE; - pMultiviewFeatures->multiviewTessellationShader = VK_TRUE; + GetPhysicalDeviceMultiviewFeatures( + &pMultiviewFeatures->multiview, + &pMultiviewFeatures->multiviewGeometryShader, + &pMultiviewFeatures->multiviewTessellationShader); break; } @@ -3622,36 +4076,18 @@ void PhysicalDevice::GetFeatures2( VkPhysicalDeviceShaderDrawParameterFeatures* pShaderDrawParameterFeatures = reinterpret_cast(pHeader); - pShaderDrawParameterFeatures->shaderDrawParameters = VK_TRUE; + GetPhysicalDeviceShaderDrawParameterFeatures( + &pShaderDrawParameterFeatures->shaderDrawParameters); break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: { - VkPhysicalDeviceDescriptorIndexingFeaturesEXT * pDescIndexingFeatures = - reinterpret_cast(pHeader); - - pDescIndexingFeatures->shaderInputAttachmentArrayDynamicIndexing = VK_FALSE; - pDescIndexingFeatures->shaderUniformTexelBufferArrayDynamicIndexing = VK_TRUE; - pDescIndexingFeatures->shaderStorageTexelBufferArrayDynamicIndexing = VK_TRUE; - pDescIndexingFeatures->shaderUniformBufferArrayNonUniformIndexing = VK_TRUE; - pDescIndexingFeatures->shaderSampledImageArrayNonUniformIndexing = VK_TRUE; - pDescIndexingFeatures->shaderStorageBufferArrayNonUniformIndexing = VK_TRUE; - pDescIndexingFeatures->shaderStorageImageArrayNonUniformIndexing = VK_TRUE; - pDescIndexingFeatures->shaderInputAttachmentArrayNonUniformIndexing = VK_FALSE; - pDescIndexingFeatures->shaderUniformTexelBufferArrayNonUniformIndexing = VK_TRUE; - pDescIndexingFeatures->shaderStorageTexelBufferArrayNonUniformIndexing = VK_TRUE; - pDescIndexingFeatures->descriptorBindingUniformBufferUpdateAfterBind = VK_TRUE; - pDescIndexingFeatures->descriptorBindingSampledImageUpdateAfterBind = VK_TRUE; - pDescIndexingFeatures->descriptorBindingStorageImageUpdateAfterBind = VK_TRUE; - pDescIndexingFeatures->descriptorBindingStorageBufferUpdateAfterBind = VK_TRUE; - pDescIndexingFeatures->descriptorBindingUniformTexelBufferUpdateAfterBind = VK_TRUE; - pDescIndexingFeatures->descriptorBindingStorageTexelBufferUpdateAfterBind = VK_TRUE; - pDescIndexingFeatures->descriptorBindingUpdateUnusedWhilePending = VK_TRUE; - pDescIndexingFeatures->descriptorBindingPartiallyBound = VK_TRUE; - pDescIndexingFeatures->descriptorBindingVariableDescriptorCount = VK_TRUE; - pDescIndexingFeatures->runtimeDescriptorArray = VK_TRUE; + VkPhysicalDeviceDescriptorIndexingFeaturesEXT* pDescIndexingFeatures = + reinterpret_cast(pHeader); + + GetPhysicalDeviceDescriptorIndexingFeatures(pDescIndexingFeatures); break; } @@ -3661,9 +4097,10 @@ void PhysicalDevice::GetFeatures2( VkPhysicalDeviceFloat16Int8FeaturesKHR* pFloat16Int8Features = reinterpret_cast(pHeader); - pFloat16Int8Features->shaderFloat16 = - PalProperties().gfxipProperties.flags.supportDoubleRate16BitInstructions ? VK_TRUE : VK_FALSE; - pFloat16Int8Features->shaderInt8 = VK_TRUE; + GetPhysicalDeviceFloat16Int8Features( + &pFloat16Int8Features->shaderFloat16, + &pFloat16Int8Features->shaderInt8); + break; } @@ -3683,7 +4120,7 @@ void PhysicalDevice::GetFeatures2( VkPhysicalDeviceScalarBlockLayoutFeaturesEXT* pScalarBlockLayoutFeatures = reinterpret_cast(pHeader); - pScalarBlockLayoutFeatures->scalarBlockLayout = VK_TRUE; + GetPhysicalDeviceScalarBlockLayoutFeatures(&pScalarBlockLayoutFeatures->scalarBlockLayout); break; } @@ -3703,9 +4140,10 @@ void PhysicalDevice::GetFeatures2( VkPhysicalDeviceVulkanMemoryModelFeaturesKHR* pMemoryModel = reinterpret_cast(pHeader); - pMemoryModel->vulkanMemoryModel = VK_TRUE; - pMemoryModel->vulkanMemoryModelDeviceScope = VK_TRUE; - pMemoryModel->vulkanMemoryModelAvailabilityVisibilityChains = VK_FALSE; + GetPhysicalDeviceVulkanMemoryModelFeatures( + &pMemoryModel->vulkanMemoryModel, + &pMemoryModel->vulkanMemoryModelDeviceScope, + &pMemoryModel->vulkanMemoryModelAvailabilityVisibilityChains); break; } @@ -3739,8 +4177,9 @@ void PhysicalDevice::GetFeatures2( { VkPhysicalDeviceHostQueryResetFeaturesEXT* pHostQueryReset = reinterpret_cast(pHeader); - pHostQueryReset->hostQueryReset = - IsExtensionSupported(DeviceExtensions::EXT_HOST_QUERY_RESET) ? VK_TRUE : VK_FALSE; + + GetPhysicalDeviceHostQueryResetFeatures(&pHostQueryReset->hostQueryReset); + break; } @@ -3751,6 +4190,8 @@ void PhysicalDevice::GetFeatures2( bool deviceCoherentMemoryEnabled = false; + deviceCoherentMemoryEnabled = PalProperties().gfxipProperties.flags.supportGl2Uncached; + pDeviceCoherentMemory->deviceCoherentMemory = deviceCoherentMemoryEnabled; break; } @@ -3760,18 +4201,36 @@ void PhysicalDevice::GetFeatures2( VkPhysicalDeviceBufferAddressFeaturesEXT* pBufferAddressFeatures = reinterpret_cast(pHeader); - pBufferAddressFeatures->bufferDeviceAddress = VK_TRUE; - pBufferAddressFeatures->bufferDeviceAddressCaptureReplay = VK_FALSE; - pBufferAddressFeatures->bufferDeviceAddressMultiDevice = VK_FALSE; + GetPhysicalDeviceBufferAddressFeatures( + &pBufferAddressFeatures->bufferDeviceAddress, + &pBufferAddressFeatures->bufferDeviceAddressCaptureReplay, + &pBufferAddressFeatures->bufferDeviceAddressMultiDevice); break; } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: + { + VkPhysicalDeviceLineRasterizationFeaturesEXT* pPhysicalDeviceLineRasterizationFeaturesEXT = + reinterpret_cast(pHeader); + pPhysicalDeviceLineRasterizationFeaturesEXT->rectangularLines = VK_FALSE; + pPhysicalDeviceLineRasterizationFeaturesEXT->bresenhamLines = VK_TRUE; + pPhysicalDeviceLineRasterizationFeaturesEXT->smoothLines = VK_FALSE; + + pPhysicalDeviceLineRasterizationFeaturesEXT->stippledRectangularLines = VK_FALSE; + pPhysicalDeviceLineRasterizationFeaturesEXT->stippledBresenhamLines = VK_TRUE; + pPhysicalDeviceLineRasterizationFeaturesEXT->stippledSmoothLines = VK_FALSE; + + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: { VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR* pUniformBufferStandardLayoutFeatures = reinterpret_cast(pHeader); - pUniformBufferStandardLayoutFeatures->uniformBufferStandardLayout = VK_TRUE; + GetPhysicalDeviceUniformBufferStandardLayoutFeatures( + &pUniformBufferStandardLayoutFeatures->uniformBufferStandardLayout); break; } @@ -3791,7 +4250,17 @@ void PhysicalDevice::GetFeatures2( VkPhysicalDeviceImagelessFramebufferFeaturesKHR* pImagelessFramebufferFeatures = reinterpret_cast(pHeader); - pImagelessFramebufferFeatures->imagelessFramebuffer = VK_TRUE; + GetPhysicalDeviceImagelessFramebufferFeatures(&pImagelessFramebufferFeatures->imagelessFramebuffer); + + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: + { + VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR* pPipelineExecutablePropertiesFeatures = + reinterpret_cast(pHeader); + + pPipelineExecutablePropertiesFeatures->pipelineExecutableInfo = VK_TRUE; break; } @@ -3943,11 +4412,13 @@ void PhysicalDevice::GetDeviceProperties2( VkPhysicalDeviceDriverPropertiesKHR* pDriverProperties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT* pVertexAttributeDivisorProperties; + VkPhysicalDeviceFloatControlsPropertiesKHR* pFloatControlsProperties; VkPhysicalDeviceInlineUniformBlockPropertiesEXT* pInlineUniformBlockProperties; VkPhysicalDevicePCIBusInfoPropertiesEXT* pPCIBusInfoProperties; VkPhysicalDeviceTransformFeedbackPropertiesEXT* pFeedbackProperties; VkPhysicalDeviceDepthStencilResolvePropertiesKHR* pDepthStencilResolveProperties; VkPhysicalDeviceSubgroupSizeControlPropertiesEXT* pSubgroupSizeControlProperties; + VkPhysicalDeviceLineRasterizationPropertiesEXT* pLineRasterizationProperties; }; for (pProp = pProperties; pHeader != nullptr; pHeader = pHeader->pNext) @@ -3956,8 +4427,7 @@ void PhysicalDevice::GetDeviceProperties2( { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: { - // Points are clipped when their centers fall outside the clip volume, i.e. the desktop GL behavior. - pPointClippingProperties->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES; + GetPhysicalDevicePointClippingProperties(&pPointClippingProperties->pointClippingBehavior); break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: @@ -3989,56 +4459,41 @@ void PhysicalDevice::GetDeviceProperties2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: { - // We don't have limits on number of desc sets - pMaintenance3Properties->maxPerSetDescriptors = UINT32_MAX; - - // Return 2GB in bytes as max allocation size - pMaintenance3Properties->maxMemoryAllocationSize = 2u * 1024u * 1024u * 1024u; + GetPhysicalDeviceMaintenance3Properties( + &pMaintenance3Properties->maxPerSetDescriptors, + &pMaintenance3Properties->maxMemoryAllocationSize); break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: { - pProtectedMemoryProperties->protectedNoFault = VK_FALSE; + GetPhysicalDeviceProtectedMemoryProperties(&pProtectedMemoryProperties->protectedNoFault); break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: { - pMultiviewProperties->maxMultiviewViewCount = Pal::MaxViewInstanceCount; - pMultiviewProperties->maxMultiviewInstanceIndex = UINT_MAX; - + GetPhysicalDeviceMultiviewProperties( + &pMultiviewProperties->maxMultiviewViewCount, + &pMultiviewProperties->maxMultiviewInstanceIndex); break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: { - pSubgroupProperties->subgroupSize = GetSubgroupSize(); - - pSubgroupProperties->supportedStages = VK_SHADER_STAGE_VERTEX_BIT | - VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | - VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | - VK_SHADER_STAGE_GEOMETRY_BIT | - VK_SHADER_STAGE_FRAGMENT_BIT | - VK_SHADER_STAGE_COMPUTE_BIT; - - pSubgroupProperties->supportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | - VK_SUBGROUP_FEATURE_VOTE_BIT | - VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | - VK_SUBGROUP_FEATURE_BALLOT_BIT | - VK_SUBGROUP_FEATURE_SHUFFLE_BIT | - VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT | - VK_SUBGROUP_FEATURE_QUAD_BIT; - - pSubgroupProperties->quadOperationsInAllStages = VK_TRUE; - + GetPhysicalDeviceSubgroupProperties( + &pSubgroupProperties->subgroupSize, + &pSubgroupProperties->supportedStages, + &pSubgroupProperties->supportedOperations, + &pSubgroupProperties->quadOperationsInAllStages); break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: { - pMinMaxProperties->filterMinmaxImageComponentMapping = IsPerChannelMinMaxFilteringSupported(); - pMinMaxProperties->filterMinmaxSingleComponentFormats = VK_TRUE; + GetPhysicalDeviceSamplerFilterMinmaxProperties( + &pMinMaxProperties->filterMinmaxSingleComponentFormats, + &pMinMaxProperties->filterMinmaxImageComponentMapping); break; } @@ -4098,28 +4553,7 @@ void PhysicalDevice::GetDeviceProperties2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: { - pDescriptorIndexingProperties->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX; - pDescriptorIndexingProperties->shaderUniformBufferArrayNonUniformIndexingNative = VK_FALSE; - pDescriptorIndexingProperties->shaderSampledImageArrayNonUniformIndexingNative = VK_FALSE; - pDescriptorIndexingProperties->shaderStorageBufferArrayNonUniformIndexingNative = VK_FALSE; - pDescriptorIndexingProperties->shaderStorageImageArrayNonUniformIndexingNative = VK_FALSE; - pDescriptorIndexingProperties->shaderInputAttachmentArrayNonUniformIndexingNative = VK_FALSE; - pDescriptorIndexingProperties->maxPerStageDescriptorUpdateAfterBindSamplers = UINT32_MAX; - pDescriptorIndexingProperties->maxPerStageDescriptorUpdateAfterBindUniformBuffers = UINT32_MAX; - pDescriptorIndexingProperties->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX; - pDescriptorIndexingProperties->maxPerStageDescriptorUpdateAfterBindSampledImages = UINT32_MAX; - pDescriptorIndexingProperties->maxPerStageDescriptorUpdateAfterBindStorageImages = UINT32_MAX; - pDescriptorIndexingProperties->maxPerStageDescriptorUpdateAfterBindInputAttachments = UINT32_MAX; - pDescriptorIndexingProperties->maxPerStageUpdateAfterBindResources = UINT32_MAX; - pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindSamplers = UINT32_MAX; - pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindUniformBuffers = UINT32_MAX; - pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MaxDynamicUniformDescriptors; - pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX; - pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MaxDynamicStorageDescriptors; - pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindSampledImages = UINT32_MAX; - pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindStorageImages = UINT32_MAX; - pDescriptorIndexingProperties->maxDescriptorSetUpdateAfterBindInputAttachments = UINT32_MAX; - + GetPhysicalDeviceDescriptorIndexingProperties(pDescriptorIndexingProperties); break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: @@ -4139,16 +4573,11 @@ void PhysicalDevice::GetDeviceProperties2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: { - pDriverProperties->driverID = VULKAN_DRIVER_ID; - - Util::Strncpy(pDriverProperties->driverName, VULKAN_DRIVER_NAME_STR, VK_MAX_DRIVER_NAME_SIZE_KHR); - Util::Strncpy(pDriverProperties->driverInfo, VULKAN_DRIVER_INFO_STR, VK_MAX_DRIVER_INFO_SIZE_KHR); - - pDriverProperties->conformanceVersion.major = CTS_VERSION_MAJOR; - pDriverProperties->conformanceVersion.minor = CTS_VERSION_MINOR; - pDriverProperties->conformanceVersion.subminor = CTS_VERSION_SUBMINOR; - pDriverProperties->conformanceVersion.patch = CTS_VERSION_PATCH; - + GetPhysicalDeviceDriverProperties( + &pDriverProperties->driverID, + pDriverProperties->driverName, + pDriverProperties->driverInfo, + &pDriverProperties->conformanceVersion); break; } @@ -4158,6 +4587,12 @@ void PhysicalDevice::GetDeviceProperties2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR: + { + GetPhysicalDeviceFloatControlsProperties(pFloatControlsProperties); + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: { const Pal::DeviceProperties& palProps = PalProperties(); @@ -4200,14 +4635,11 @@ void PhysicalDevice::GetDeviceProperties2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR: { - pDepthStencilResolveProperties->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR | - VK_RESOLVE_MODE_MIN_BIT_KHR | - VK_RESOLVE_MODE_MAX_BIT_KHR; - pDepthStencilResolveProperties->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR | - VK_RESOLVE_MODE_MIN_BIT_KHR | - VK_RESOLVE_MODE_MAX_BIT_KHR; - pDepthStencilResolveProperties->independentResolveNone = VK_TRUE; - pDepthStencilResolveProperties->independentResolve = VK_TRUE; + GetPhysicalDeviceDepthStencilResolveProperties( + &pDepthStencilResolveProperties->supportedDepthResolveModes, + &pDepthStencilResolveProperties->supportedStencilResolveModes, + &pDepthStencilResolveProperties->independentResolveNone, + &pDepthStencilResolveProperties->independentResolve); break; } @@ -4225,6 +4657,12 @@ void PhysicalDevice::GetDeviceProperties2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: + { + pLineRasterizationProperties->lineSubPixelPrecisionBits = Pal::SubPixelBits; + break; + } + default: break; } diff --git a/icd/api/vk_physical_device_manager.cpp b/icd/api/vk_physical_device_manager.cpp index 806fc1b9..4a6a0d39 100644 --- a/icd/api/vk_physical_device_manager.cpp +++ b/icd/api/vk_physical_device_manager.cpp @@ -395,6 +395,8 @@ VkResult PhysicalDeviceManager::UpdateLockedPhysicalDeviceList(void) std::vector sortedList; sortedList.reserve(deviceCount); + constexpr float memPerfFactor = 0.1f; + // Populate the list with the physical device handles, sorted by gfxipPerfRating and other criteria. for (uint32_t currentDeviceIndex = 0; currentDeviceIndex < deviceCount; ++currentDeviceIndex) { @@ -407,8 +409,9 @@ VkResult PhysicalDeviceManager::UpdateLockedPhysicalDeviceList(void) PerfIndex perf; perf.gpuIndex = info.gpuIndex; - perf.perfRating = info.gfxipProperties.performance.gfxipPerfRating * - info.gfxipProperties.shaderCore.numShaderEngines; + perf.perfRating = info.gfxipProperties.performance.gfxipPerfRating + + static_cast(info.gpuMemoryProperties.performance.memPerfRating * memPerfFactor); + perf.presentMode = 0; perf.hasAttachedScreens = info.attachedScreenCount > 0; perf.device = deviceList[currentDeviceIndex]; diff --git a/icd/api/vk_pipeline.cpp b/icd/api/vk_pipeline.cpp index 84afd6c8..e0de2559 100644 --- a/icd/api/vk_pipeline.cpp +++ b/icd/api/vk_pipeline.cpp @@ -45,19 +45,20 @@ namespace vk { -// ShaderType to string conversion table. -const char* ApiShaderTypeStrings[] = +// The names of hardware shader stages used in PAL metadata, in Util::Abi::HardwareStage order. +static const char* HwStageNames[] = { - "CS", - "VS", - "HS", - "DS", - "GS", - "PS", + ".ls", + ".hs", + ".es", + ".gs", + ".vs", + ".ps", + ".cs" }; -static_assert(VK_ARRAY_SIZE(ApiShaderTypeStrings) == Pal::NumShaderTypes, - "Number of PAL/API shader types should match."); +static_assert(VK_ARRAY_SIZE(HwStageNames) == static_cast(Util::Abi::HardwareStage::Count), + "Number of HwStageNames and PAL HW stages should match."); // The number of executable statistics to return through // the vkGetPipelineExecutableStatisticsKHR function @@ -276,6 +277,9 @@ VkResult Pipeline::GetShaderDisassembly( { // Copy disassemble code memcpy(pBuffer, pSymbolBase, symbolSize); + + // Null terminate the last char + static_cast(pBuffer)[symbolSize - 1] = '\0'; } } else @@ -498,6 +502,124 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetShaderInfoAMD( return result; } +// ===================================================================================================================== +static void BuildPipelineNameDescription( + char* pName, + char* pDescription, + Util::Abi::HardwareStage hwStage, + uint32_t palShaderMask) +{ + // Build a name and description string for the HW Shader + const char* apiString = HwStageNames[static_cast(hwStage)]; + strncpy(pName, apiString, VK_MAX_DESCRIPTION_SIZE); + + // Build the description string using the VkShaderStageFlagBits + // that correspond to the HW Shader + char shaderDescription[VK_MAX_DESCRIPTION_SIZE]; + + // Beginning of the description + Util::Strncpy(shaderDescription, "Executable handles following Vulkan stages: ", VK_MAX_DESCRIPTION_SIZE); + + if (palShaderMask & Pal::ApiShaderStageCompute) + { + Util::Strncat(shaderDescription, VK_MAX_DESCRIPTION_SIZE, " VK_SHADER_STAGE_COMPUTE_BIT "); + } + + if (palShaderMask & Pal::ApiShaderStageVertex) + { + Util::Strncat(shaderDescription, VK_MAX_DESCRIPTION_SIZE, " VK_SHADER_STAGE_VERTEX_BIT "); + } + + if (palShaderMask & Pal::ApiShaderStageHull) + { + Util::Strncat(shaderDescription, VK_MAX_DESCRIPTION_SIZE, " VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT "); + } + + if (palShaderMask & Pal::ApiShaderStageDomain) + { + Util::Strncat(shaderDescription, VK_MAX_DESCRIPTION_SIZE, " VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT "); + } + + if (palShaderMask & Pal::ApiShaderStageGeometry) + { + Util::Strncat(shaderDescription, VK_MAX_DESCRIPTION_SIZE, " VK_SHADER_STAGE_GEOMETRY_BIT "); + } + + if (palShaderMask & Pal::ApiShaderStagePixel) + { + Util::Strncat(shaderDescription, VK_MAX_DESCRIPTION_SIZE, " VK_SHADER_STAGE_FRAGMENT_BIT "); + } + + // Copy built string to the description with remainder of the string \0 filled. + // Having the \0 to VK_MAX_DESCRIPTION_SIZE is a requirement to get the cts tests to pass. + strncpy(pDescription, shaderDescription, VK_MAX_DESCRIPTION_SIZE); +} + +// ===================================================================================================================== +static uint32_t CountNumberOfHWStages( + uint32_t* pHwStageMask, + const Util::Abi::ApiHwShaderMapping& apiToHwShader) +{ + VK_ASSERT(pHwStageMask != nullptr); + + *pHwStageMask = 0; + for (uint32_t i = 0; i < static_cast(Util::Abi::ApiShaderType::Count); i++) + { + uint32_t hwStage = 0; + if (Util::BitMaskScanForward(&hwStage, apiToHwShader.apiShaders[static_cast(i)])) + { + *pHwStageMask |= (1 << hwStage); + } + } + + // The number of bits set in the HW Mask is the number HW shaders used + return Util::CountSetBits(*pHwStageMask); +} + +// ===================================================================================================================== +// Get HW Stage for executable index +static Util::Abi::HardwareStage GetHwStageForExecutableIndex( + uint32_t executableIndex, + uint32_t hwStageMask) +{ + uint32_t hwStage = 0; + for (uint32_t i = 0; i <= executableIndex; ++i) + { + Util::BitMaskScanForward(&hwStage, hwStageMask); + hwStageMask &= ~(1 << hwStage); + } + + // HW Stage should never exceed number of available HW Stages + VK_ASSERT(hwStage < static_cast(Util::Abi::HardwareStage::Count)); + + return static_cast(hwStage); +} + +// ===================================================================================================================== +// Convert from the HW Shader stage back to the corresponding API Stage +static Pal::ShaderType GetApiShaderFromHwShader( + Util::Abi::HardwareStage hwStage, + const Util::Abi::ApiHwShaderMapping& apiToHwShader) +{ + Pal::ShaderType apiShaderType = Pal::ShaderType::Compute; + for (uint32_t i = 0; i < static_cast(Util::Abi::ApiShaderType::Count); ++i) + { + uint32_t apiHWStage = 0; + Util::BitMaskScanForward(&apiHWStage, apiToHwShader.apiShaders[i]); + + if (apiToHwShader.apiShaders[i] & (1 << static_cast(hwStage))) + { + apiShaderType = static_cast(i); + break; + } + } + + // API shaders should never exceed number of shader types + VK_ASSERT(static_cast(apiShaderType) < static_cast(Pal::NumShaderTypes)); + + return apiShaderType; +} + // ===================================================================================================================== VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineExecutablePropertiesKHR( VkDevice device, @@ -509,59 +631,62 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineExecutablePropertiesKHR( const Pal::IPipeline* pPalPipeline = pPipeline->PalPipeline(DefaultDeviceIndex); const Util::Abi::ApiHwShaderMapping apiToHwShader = pPalPipeline->ApiHwShaderMapping(); - uint32_t numStages = 0; - for (uint32 i = 0; i < static_cast(Util::Abi::ApiShaderType::Count); i++) - { - numStages += (apiToHwShader.apiShaders[i] != 0) ? 1 : 0; - } + // Count the number of hardware stages that are used in this pipeline + uint32_t hwStageMask = 0; + uint32_t numHWStages = CountNumberOfHWStages(&hwStageMask, apiToHwShader); + // If pProperties == nullptr the call to this function is just ment to return the number of executables + // in the pipeline if (pProperties == nullptr) { - *pExecutableCount = numStages; + *pExecutableCount = numHWStages; return VK_SUCCESS; } - uint32_t outputCount = 0; + VkShaderStatisticsInfoAMD vkShaderStats = {}; + Pal::ShaderStats palStats = {}; + uint32_t outputCount = 0; + + // Return the name / description for the pExecutableCount number of executables. for (uint32 i = 0; - ((i < static_cast(Util::Abi::ApiShaderType::Count)) && (outputCount < *pExecutableCount)); - i++) + Util::BitMaskScanForward(&i, hwStageMask); + (hwStageMask &= ~(1 << i)) && (outputCount < *pExecutableCount)) { - if (apiToHwShader.apiShaders[i] != 0) - { - VkShaderStatisticsInfoAMD vkShaderStats = {}; - Pal::ShaderStats palStats = {}; + // Get an api shader type for the corresponding HW Shader + Pal::ShaderType shaderType = GetApiShaderFromHwShader(static_cast(i), apiToHwShader); - Pal::Result palResult = pPalPipeline->GetShaderStats( - static_cast(i), - &palStats, - true); + // Get the shader stats from the shader in the pipeline + Pal::Result palResult = pPalPipeline->GetShaderStats(shaderType, &palStats, true); - ConvertShaderInfoStatistics(palStats, &vkShaderStats); + // Covert to the pal statistics to VkShaderStatisticsInfoAMD + ConvertShaderInfoStatistics(palStats, &vkShaderStats); - // API String - const char* apiString = ApiShaderTypeStrings[static_cast(i)]; - strncpy(pProperties[outputCount].name, apiString, VK_MAX_DESCRIPTION_SIZE); - strncpy(pProperties[outputCount].description, apiString, VK_MAX_DESCRIPTION_SIZE); + // Set VkShaderStageFlagBits as an output property + pProperties[outputCount].stages = vkShaderStats.shaderStageMask; - // Set VkShaderStageFlagBits - pProperties[outputCount].stages = vkShaderStats.shaderStageMask; + // Build the name and description of the output property + BuildPipelineNameDescription( + pProperties[outputCount].name, + pProperties[outputCount].description, + static_cast(i), + palStats.shaderStageMask); - // Add subgroup size for Compute - if (vkShaderStats.shaderStageMask & VK_SHADER_STAGE_COMPUTE_BIT) - { - pProperties[outputCount].subgroupSize = vkShaderStats.computeWorkGroupSize[0] * - vkShaderStats.computeWorkGroupSize[1] * - vkShaderStats.computeWorkGroupSize[2]; - } + // If this is a compute shader, report the workgroup size + if (vkShaderStats.shaderStageMask & VK_SHADER_STAGE_COMPUTE_BIT) + { + pProperties[outputCount].subgroupSize = vkShaderStats.computeWorkGroupSize[0] * + vkShaderStats.computeWorkGroupSize[1] * + vkShaderStats.computeWorkGroupSize[2]; + } - outputCount++; - } - } + outputCount++; + } // Write out the number of stages written *pExecutableCount = outputCount; - return (*pExecutableCount < numStages) ? VK_INCOMPLETE : VK_SUCCESS; + // If the requested number of executables was less than the available number of hw stages, return Incomplete + return (*pExecutableCount < numHWStages) ? VK_INCOMPLETE : VK_SUCCESS; } // ===================================================================================================================== @@ -575,37 +700,40 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineExecutableStatisticsKHR( const Pal::IPipeline* pPalPipeline = pPipeline->PalPipeline(DefaultDeviceIndex); const Util::Abi::ApiHwShaderMapping apiToHwShader = pPalPipeline->ApiHwShaderMapping(); + // If pStatisticCount == nullptr the call to this function is just ment to return the number of statistics + // for an executable in a pipeline. if (pStatistics == nullptr) { - // Returning to statics value per shader executable *pStatisticCount = ExecutableStatisticsCount; return VK_SUCCESS; } - uint32_t index = 0; - uint32_t apiStages[static_cast(Util::Abi::ApiShaderType::Count)] = {}; - for (uint32 i = 0; i < static_cast(Util::Abi::ApiShaderType::Count); i++) - { - if (apiToHwShader.apiShaders[i] != 0) - { - apiStages[index] = i; - index++; - } - } + // Count the number of hardware stages that are used in this pipeline + uint32_t hwStageMask = 0; + uint32_t numHWStages = CountNumberOfHWStages(&hwStageMask, apiToHwShader); + + // The executable index should be less than the number of HW Stages. + VK_ASSERT(pExecutableInfo->executableIndex < numHWStages); + + // Get hwStage for executable index + Util::Abi::HardwareStage hwStage = GetHwStageForExecutableIndex(pExecutableInfo->executableIndex, hwStageMask); + + // Get an api shader type for the corresponding HW Shader + Pal::ShaderType shaderType = GetApiShaderFromHwShader(hwStage, apiToHwShader); + // Get the shader stats for the corresponding API stage VkShaderStatisticsInfoAMD vkShaderStats = {}; Pal::ShaderStats palStats = {}; - Pal::Result palResult = pPalPipeline->GetShaderStats( - static_cast(apiStages[pExecutableInfo->executableIndex]), - &palStats, - true); + Pal::Result palResult = pPalPipeline->GetShaderStats(shaderType, &palStats, true); + // Return error is the there are now statics for stage. if (palResult != Pal::Result::Success) { return VK_ERROR_OUT_OF_HOST_MEMORY; } + // Convert from PAL to VK statistics ConvertShaderInfoStatistics(palStats, &vkShaderStats); VkPipelineExecutableStatisticKHR executableStatics[ExecutableStatisticsCount] = @@ -643,6 +771,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineExecutableStatisticsKHR( // Copy pStatisticCount number of statistics memcpy(pStatistics, executableStatics, (sizeof(VkPipelineExecutableStatisticKHR) * (*pStatisticCount))); + // If the requested number of statistics was less than the available number of statics, + // return Incomplete return ((*pStatisticCount) < ExecutableStatisticsCount) ? VK_INCOMPLETE : VK_SUCCESS; } @@ -669,33 +799,38 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineExecutableInternalRepresentationsKHR return VK_INCOMPLETE; } - uint32_t index = 0; - uint32_t apiStages[static_cast(Util::Abi::ApiShaderType::Count)] = {}; - for (uint32 i = 0; i < static_cast(Util::Abi::ApiShaderType::Count); i++) - { - if (apiToHwShader.apiShaders[i] != 0) - { - apiStages[index] = i; - index++; - } - } + // Count the number of hardware stages that are used in this pipeline + uint32_t hwStageMask = 0; + uint32_t numHWStages = CountNumberOfHWStages(&hwStageMask, apiToHwShader); - // Get the ABI HW Shader - uint32_t vkStage = apiStages[pExecutableInfo->executableIndex]; + // Get hwStage for executable index + Util::Abi::HardwareStage hwStage = GetHwStageForExecutableIndex(pExecutableInfo->executableIndex, hwStageMask); - // API String - const char* apiString = ApiShaderTypeStrings[static_cast(vkStage)]; - strncpy(pInternalRepresentations[0].name, apiString, VK_MAX_DESCRIPTION_SIZE); - strncpy(pInternalRepresentations[0].description, apiString, VK_MAX_DESCRIPTION_SIZE); - pInternalRepresentations[0].isText = VK_TRUE; + // Convert from the HW Shader stage back to the corresponding API Stage + Pal::ShaderType apiShaderType = GetApiShaderFromHwShader(hwStage, apiToHwShader); + // Get the shader stats from the shader in the pipeline + Pal::ShaderStats palStats = {}; + Pal::Result palResult = pPalPipeline->GetShaderStats(apiShaderType, &palStats, true); + + // Build the name and description of the output property + BuildPipelineNameDescription( + pInternalRepresentations[0].name, + pInternalRepresentations[0].description, + static_cast(hwStage), + palStats.shaderStageMask); + + // Get the text based disassembly of the shader VkResult result = pPipeline->GetShaderDisassembly( pDevice, pPalPipeline, - static_cast(vkStage), + apiShaderType, &(pInternalRepresentations[0].dataSize), pInternalRepresentations[0].pData); + // Mark that the output disassembly is text formated + pInternalRepresentations[0].isText = VK_TRUE; + // Update the number of representations written *pInternalRepresentationCount = 1; diff --git a/icd/api/vk_pipeline_cache.cpp b/icd/api/vk_pipeline_cache.cpp index faaacec5..32273a7e 100644 --- a/icd/api/vk_pipeline_cache.cpp +++ b/icd/api/vk_pipeline_cache.cpp @@ -32,16 +32,20 @@ #include "include/vk_pipeline_cache.h" #include "palAutoBuffer.h" +#include "include/pipeline_binary_cache.h" + namespace vk { // ===================================================================================================================== PipelineCache::PipelineCache( const Device* pDevice, - ShaderCache* pShaderCaches + ShaderCache* pShaderCaches, + PipelineBinaryCache* pBinaryCache ) : - m_pDevice(pDevice) + m_pDevice(pDevice), + m_pBinaryCache(pBinaryCache) { memcpy(m_shaderCaches, pShaderCaches, sizeof(m_shaderCaches[0]) * pDevice->NumPalDevices()); memset(m_shaderCaches + pDevice->NumPalDevices(), @@ -174,7 +178,19 @@ VkResult PipelineCache::Create( if (result == VK_SUCCESS) { - PipelineCache* pCache = VK_PLACEMENT_NEW(pMemory) PipelineCache(pDevice, shaderCaches); + PipelineBinaryCache* pBinaryCache = nullptr; + if (((settings.usePalPipelineCaching) || + (pDevice->VkPhysicalDevice(DefaultDeviceIndex)->VkInstance()->GetDevModeMgr() != nullptr)) && + (settings.allowExternalPipelineCacheObject)) + { + pBinaryCache = PipelineBinaryCache::Create(pDevice->VkPhysicalDevice(DefaultDeviceIndex)->VkInstance(), + pCreateInfo->initialDataSize, pCreateInfo->pInitialData, false, + pDevice->GetCompiler(DefaultDeviceIndex)->GetGfxIp(), pDevice->VkPhysicalDevice(DefaultDeviceIndex)); + + // This isn't a terminal failure, the device can continue without the pipeline cache if need be. + VK_ALERT(pBinaryCache == nullptr); + } + PipelineCache* pCache = VK_PLACEMENT_NEW(pMemory) PipelineCache(pDevice, shaderCaches, pBinaryCache); *pPipelineCache = PipelineCache::HandleFromVoidPointer(pMemory); } else @@ -191,6 +207,12 @@ VkResult PipelineCache::Destroy( const Device* pDevice, const VkAllocationCallbacks* pAllocator) { + if (m_pBinaryCache) + { + m_pBinaryCache->Destroy(); + pDevice->VkPhysicalDevice(DefaultDeviceIndex)->VkInstance()->FreeMem(m_pBinaryCache); + m_pBinaryCache = nullptr; + } this->~PipelineCache(); diff --git a/icd/api/vk_queue.cpp b/icd/api/vk_queue.cpp index c32a1a7a..cb99ed11 100644 --- a/icd/api/vk_queue.cpp +++ b/icd/api/vk_queue.cpp @@ -473,10 +473,6 @@ VkResult Queue::PalSignalSemaphores( if (timedQueueEvents == false) { - if (pVkSemaphore->PalTemporarySemaphore(deviceIdx)) - { - pPalSemaphore = pVkSemaphore->PalTemporarySemaphore(deviceIdx); - } palResult = PalQueue(deviceIdx)->SignalQueueSemaphore(pPalSemaphore, pointValue); } else @@ -548,16 +544,9 @@ VkResult Queue::PalWaitSemaphores( VK_ASSERT(deviceIdx < m_pDevice->NumPalDevices()); - // Wait for the temporary semaphore. - if (pSemaphore->PalTemporarySemaphore(deviceIdx) != nullptr) - { - pPalSemaphore = pSemaphore->PalTemporarySemaphore(deviceIdx); - pSemaphore->ClearTemporarySemaphore(); - } - else - { - pPalSemaphore = pSemaphore->PalSemaphore(deviceIdx); - } + // Wait for the semaphore. + pPalSemaphore = pSemaphore->PalSemaphore(deviceIdx); + pSemaphore->RestoreSemaphore(); if (pPalSemaphore != nullptr) { @@ -577,7 +566,6 @@ VkResult Queue::PalWaitSemaphores( #endif } } - } return PalToVkResult(palResult); @@ -642,7 +630,7 @@ VkResult Queue::Present( for (pVkPresentInfoKHR = pPresentInfo; pHeader != nullptr; pHeader = pHeader->pNext) { - switch (static_cast(pHeader->sType)) + switch (static_cast(pHeader->sType)) { case VK_STRUCTURE_TYPE_PRESENT_INFO_KHR: pVkInfo = pVkPresentInfoKHR; diff --git a/icd/api/vk_render_pass.cpp b/icd/api/vk_render_pass.cpp index f640e5cb..03129509 100644 --- a/icd/api/vk_render_pass.cpp +++ b/icd/api/vk_render_pass.cpp @@ -165,69 +165,69 @@ static uint64_t GenerateRenderPassHash( // ===================================================================================================================== AttachmentReference::AttachmentReference() : - attachment (VK_ATTACHMENT_UNUSED), - layout (VK_IMAGE_LAYOUT_UNDEFINED), - aspectMask (VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM) + attachment (VK_ATTACHMENT_UNUSED), + layout (VK_IMAGE_LAYOUT_UNDEFINED), + aspectMask (VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM) { } // ===================================================================================================================== void AttachmentReference::Init(const VkAttachmentReference& attachRef) { - attachment = attachRef.attachment; - layout = attachRef.layout; - aspectMask = VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM; + attachment = attachRef.attachment; + layout = attachRef.layout; + aspectMask = VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM; } // ===================================================================================================================== void AttachmentReference::Init(const VkAttachmentReference2KHR& attachRef) { - attachment = attachRef.attachment; - layout = attachRef.layout; - aspectMask = attachRef.aspectMask; + attachment = attachRef.attachment; + layout = attachRef.layout; + aspectMask = attachRef.aspectMask; } // ===================================================================================================================== AttachmentDescription::AttachmentDescription() : - flags (0), - format (VK_FORMAT_UNDEFINED), - samples (VK_SAMPLE_COUNT_1_BIT), - loadOp (VK_ATTACHMENT_LOAD_OP_DONT_CARE), - storeOp (VK_ATTACHMENT_STORE_OP_DONT_CARE), - stencilLoadOp (VK_ATTACHMENT_LOAD_OP_DONT_CARE), - stencilStoreOp (VK_ATTACHMENT_STORE_OP_DONT_CARE), - initialLayout (VK_IMAGE_LAYOUT_UNDEFINED), - finalLayout (VK_IMAGE_LAYOUT_UNDEFINED) + flags (0), + format (VK_FORMAT_UNDEFINED), + samples (VK_SAMPLE_COUNT_1_BIT), + loadOp (VK_ATTACHMENT_LOAD_OP_DONT_CARE), + storeOp (VK_ATTACHMENT_STORE_OP_DONT_CARE), + stencilLoadOp (VK_ATTACHMENT_LOAD_OP_DONT_CARE), + stencilStoreOp (VK_ATTACHMENT_STORE_OP_DONT_CARE), + initialLayout (VK_IMAGE_LAYOUT_UNDEFINED), + finalLayout (VK_IMAGE_LAYOUT_UNDEFINED) { } // ===================================================================================================================== void AttachmentDescription::Init(const VkAttachmentDescription& attachDesc) { - flags = attachDesc.flags; - format = attachDesc.format; - samples = attachDesc.samples; - loadOp = attachDesc.loadOp; - storeOp = attachDesc.storeOp; - stencilLoadOp = attachDesc.stencilLoadOp; - stencilStoreOp = attachDesc.stencilStoreOp; - initialLayout = attachDesc.initialLayout; - finalLayout = attachDesc.finalLayout; + flags = attachDesc.flags; + format = attachDesc.format; + samples = attachDesc.samples; + loadOp = attachDesc.loadOp; + storeOp = attachDesc.storeOp; + stencilLoadOp = attachDesc.stencilLoadOp; + stencilStoreOp = attachDesc.stencilStoreOp; + initialLayout = attachDesc.initialLayout; + finalLayout = attachDesc.finalLayout; } // ===================================================================================================================== void AttachmentDescription::Init(const VkAttachmentDescription2KHR& attachDesc) { - flags = attachDesc.flags; - format = attachDesc.format; - samples = attachDesc.samples; - loadOp = attachDesc.loadOp; - storeOp = attachDesc.storeOp; - stencilLoadOp = attachDesc.stencilLoadOp; - stencilStoreOp = attachDesc.stencilStoreOp; - initialLayout = attachDesc.initialLayout; - finalLayout = attachDesc.finalLayout; + flags = attachDesc.flags; + format = attachDesc.format; + samples = attachDesc.samples; + loadOp = attachDesc.loadOp; + storeOp = attachDesc.storeOp; + stencilLoadOp = attachDesc.stencilLoadOp; + stencilStoreOp = attachDesc.stencilStoreOp; + initialLayout = attachDesc.initialLayout; + finalLayout = attachDesc.finalLayout; } // ===================================================================================================================== diff --git a/icd/api/vk_semaphore.cpp b/icd/api/vk_semaphore.cpp index d185d64f..b53f4d27 100644 --- a/icd/api/vk_semaphore.cpp +++ b/icd/api/vk_semaphore.cpp @@ -38,10 +38,10 @@ namespace vk VkResult Semaphore::PopulateInDeviceGroup( Device* pDevice, Pal::IQueueSemaphore* pPalSemaphores[MaxPalDevices], - int32_t* pSemaphoreCount) + uint32_t* pSemaphoreCount) { Pal::Result palResult = Pal::Result::Success; - int32_t count = 1; + uint32_t count = 1; // Linux don't support LDA chain. The semaphore allocated from one device cannot be used directly // on Peer devices. // In order to support that, we have to create the semaphore in the first device and import the payload @@ -119,10 +119,10 @@ VkResult Semaphore::PopulateInDeviceGroup( // ===================================================================================================================== // Creates a new queue semaphore object. VkResult Semaphore::Create( - Device* pDevice, - const VkSemaphoreCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkSemaphore* pSemaphore) + Device* pDevice, + const VkSemaphoreCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkSemaphore* pSemaphore) { Pal::QueueSemaphoreCreateInfo palCreateInfo = {}; palCreateInfo.maxCount = 1; @@ -130,6 +130,7 @@ VkResult Semaphore::Create( Pal::QueueSemaphoreExportInfo exportInfo = {}; // Allocate sufficient memory + VkResult vkResult = VK_SUCCESS; Pal::Result palResult; const size_t palSemaphoreSize = pDevice->PalDevice(DefaultDeviceIndex)->GetQueueSemaphoreSize(palCreateInfo, &palResult); VK_ASSERT(palResult == Pal::Result::Success); @@ -144,15 +145,15 @@ VkResult Semaphore::Create( switch (static_cast(pHeader->sType)) { case VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO: - { - break; - } + { + break; + } case VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO: - { - // mark this semaphore as shareable. - palCreateInfo.flags.shareable = 1; - break; - } + { + // mark this semaphore as shareable. + palCreateInfo.flags.shareable = 1; + break; + } default: break; } @@ -160,59 +161,69 @@ VkResult Semaphore::Create( if (pDevice->NumPalDevices() > 1) { // mark this semaphore as shareable. - palCreateInfo.flags.shareable = 1; + palCreateInfo.flags.shareable = 1; } - - void* pMemory = pAllocator->pfnAllocation( + // Allocate memory for VK_Semaphore and palSemaphore separately + void* pVKSemaphoreMemory = pAllocator->pfnAllocation( pAllocator->pUserData, - sizeof(Semaphore) + palSemaphoreSize, + sizeof(Semaphore), VK_DEFAULT_MEM_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pMemory == nullptr) - { - return VK_ERROR_OUT_OF_HOST_MEMORY; - } - - size_t palOffset = sizeof(Semaphore); - - // Create the PAL object - Pal::IQueueSemaphore* pPalSemaphores[MaxPalDevices] = {nullptr}; - - if (palResult == Pal::Result::Success) - { - palResult = pDevice->PalDevice(DefaultDeviceIndex)->CreateQueueSemaphore( - palCreateInfo, - Util::VoidPtrInc(pMemory, palOffset), - &pPalSemaphores[0]); - } + void* pPalSemaphoreMemory = pDevice->VkInstance()->AllocMem( + palSemaphoreSize, + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (palResult == Pal::Result::Success) + if ((pVKSemaphoreMemory != nullptr) && (pPalSemaphoreMemory != nullptr)) { - int32_t semaphoreCount = 1; + // Allocation Succeed. Create the PAL object + Pal::IQueueSemaphore* pPalSemaphores[MaxPalDevices] = { nullptr }; - VkResult result = PopulateInDeviceGroup(pDevice, pPalSemaphores, &semaphoreCount); + if (palResult == Pal::Result::Success) + { + palResult = pDevice->PalDevice(DefaultDeviceIndex)->CreateQueueSemaphore( + palCreateInfo, + pPalSemaphoreMemory, + &pPalSemaphores[0]); + } - if (result == VK_SUCCESS) + if (palResult == Pal::Result::Success) { - Pal::OsExternalHandle handle = 0; - // On success, construct the API object and return to the caller - VK_PLACEMENT_NEW(pMemory) Semaphore(pPalSemaphores, semaphoreCount, palCreateInfo, handle); + uint32_t semaphoreCount = 1; - *pSemaphore = Semaphore::HandleFromVoidPointer(pMemory); + vkResult = PopulateInDeviceGroup(pDevice, pPalSemaphores, &semaphoreCount); - return VK_SUCCESS; - } - else - { - palResult = Pal::Result::ErrorOutOfGpuMemory; + if (vkResult == VK_SUCCESS) + { + Pal::OsExternalHandle handle = 0; + // On success, construct the API object and return to the caller + VK_PLACEMENT_NEW(pVKSemaphoreMemory) Semaphore(pPalSemaphores, semaphoreCount, palCreateInfo, handle); + *pSemaphore = Semaphore::HandleFromVoidPointer(pVKSemaphoreMemory); + + vkResult = VK_SUCCESS; + } + else + { + vkResult = PalToVkResult(Pal::Result::ErrorOutOfGpuMemory); + } } } - // Something broke. Free the memory and return error. - pAllocator->pfnFree(pAllocator->pUserData, pMemory); + else + { + //Allocation Failed. + vkResult = VK_ERROR_OUT_OF_HOST_MEMORY; + } + + if (vkResult != VK_SUCCESS) + { + // Something broke. Free the memory. + pAllocator->pfnFree(pAllocator->pUserData, pVKSemaphoreMemory); + pDevice->VkInstance()->FreeMem(pPalSemaphoreMemory); + } - return PalToVkResult(palResult); + return vkResult; } // ===================================================================================================================== @@ -238,8 +249,8 @@ VkResult Semaphore::ImportSemaphore( Device* pDevice, const ImportSemaphoreInfo& importInfo) { - VkResult result = VK_SUCCESS; - Pal::Result palResult = Pal::Result::Success; + VkResult vkResult = VK_SUCCESS; + Pal::Result palResult = Pal::Result::Success; Pal::ExternalQueueSemaphoreOpenInfo palOpenInfo = {}; VkExternalSemaphoreHandleTypeFlags handleType = importInfo.handleType; @@ -277,35 +288,22 @@ VkResult Semaphore::ImportSemaphore( if (palResult == Pal::Result::Success) { - int32_t semaphoreCount = 1; + uint32_t semaphoreCount = 1; - result = PopulateInDeviceGroup(pDevice, pPalSemaphores, &semaphoreCount); + vkResult = PopulateInDeviceGroup(pDevice, pPalSemaphores, &semaphoreCount); - if (result == VK_SUCCESS) + if (vkResult == VK_SUCCESS) { + DestroyTemporarySemaphore(pDevice); if ((importInfo.importFlags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT)) { + m_useTempSemaphore = true; SetTemporarySemaphore(pPalSemaphores, semaphoreCount, palOpenInfo.externalSemaphore); } else { - m_pPalSemaphores[0]->Destroy(); - m_pPalSemaphores[0] = pPalSemaphores[0]; - m_sharedSemaphoreTempHandle = palOpenInfo.externalSemaphore; - - for (uint32_t deviceIdx = 1; deviceIdx < pDevice->NumPalDevices(); deviceIdx ++) - { - if (m_pPalSemaphores[deviceIdx] != nullptr) - { - m_pPalSemaphores[deviceIdx]->Destroy(); - pDevice->VkInstance()->FreeMem(m_pPalSemaphores[deviceIdx]); - m_pPalSemaphores[deviceIdx] = pPalSemaphores[deviceIdx]; - } - else - { - break; - } - } + DestroySemaphore(pDevice); + SetSemaphore(pPalSemaphores, semaphoreCount, palOpenInfo.externalSemaphore); } } else @@ -315,53 +313,93 @@ VkResult Semaphore::ImportSemaphore( } else { - result = PalToVkResult(palResult); + vkResult = PalToVkResult(palResult); pDevice->VkInstance()->FreeMem(pMemory); } } else { - result = VK_ERROR_OUT_OF_HOST_MEMORY; + vkResult = VK_ERROR_OUT_OF_HOST_MEMORY; } } - return result; + + return vkResult; } // ===================================================================================================================== // vkDestroyObject entry point for queue semaphore objects. -VkResult Semaphore::Destroy( +void Semaphore::Destroy( const Device* pDevice, const VkAllocationCallbacks* pAllocator) { + DestroyTemporarySemaphore(pDevice); + DestroySemaphore(pDevice); + Util::Destructor(this); + pAllocator->pfnFree(pAllocator->pUserData, this); +} - ClearTemporarySemaphore(); +// ===================================================================================================================== +// Copy imported semaphore into m_pPalTemporarySemaphores +void Semaphore::SetTemporarySemaphore( + Pal::IQueueSemaphore* pPalImportedSemaphore[], + uint32_t semaphoreCount, + Pal::OsExternalHandle importedHandle) +{ + for (uint32_t i = 0; i < semaphoreCount; i++) + { + m_pPalTemporarySemaphores[i] = pPalImportedSemaphore[i]; + } - m_pPalSemaphores[0]->Destroy(); + m_sharedSemaphoreTempHandle = importedHandle; +} - for (uint32_t deviceIdx = 1; deviceIdx < pDevice->NumPalDevices(); deviceIdx ++) +// ===================================================================================================================== +// Copy imported semaphore into m_pPalSemaphores +void Semaphore::SetSemaphore( + Pal::IQueueSemaphore* pPalImportedSemaphore[], + uint32_t semaphoreCount, + Pal::OsExternalHandle importedHandle) +{ + for (uint32_t i = 0; i < semaphoreCount; i++) { - if (m_pPalSemaphores[deviceIdx] != nullptr) - { - m_pPalSemaphores[deviceIdx]->Destroy(); - pDevice->VkInstance()->FreeMem(m_pPalSemaphores[deviceIdx]); - } - else - { - break; - } + m_pPalSemaphores[i] = pPalImportedSemaphore[i]; } - // the sempahore is imported from external - if (Util::VoidPtrInc(this,sizeof(Semaphore)) != m_pPalSemaphores[0]) + m_sharedSemaphoreHandle = importedHandle; +} + +// ===================================================================================================================== +// Calling destructor, freeing memory and closing handle for temporary semaphore +void Semaphore::DestroyTemporarySemaphore( + const Device* pDevice) +{ + for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) { - pDevice->VkInstance()->FreeMem(m_pPalSemaphores[0]); + if (m_pPalTemporarySemaphores[deviceIdx] != nullptr) + { + m_pPalTemporarySemaphores[deviceIdx]->Destroy(); + pDevice->VkInstance()->FreeMem(m_pPalTemporarySemaphores[deviceIdx]); + m_pPalTemporarySemaphores[deviceIdx] = nullptr; + } } - Util::Destructor(this); +} - pAllocator->pfnFree(pAllocator->pUserData, this); +// ===================================================================================================================== +// Calling destructor, freeing memory and closing handle for semaphore +void Semaphore::DestroySemaphore( + const Device* pDevice) +{ + for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) + { + if (m_pPalSemaphores[deviceIdx] != nullptr) + { + m_pPalSemaphores[deviceIdx]->Destroy(); + pDevice->VkInstance()->FreeMem(m_pPalSemaphores[deviceIdx]); + m_pPalSemaphores[deviceIdx] = nullptr; + } + } - return VK_SUCCESS; } // ===================================================================================================================== @@ -375,14 +413,7 @@ VkResult Semaphore::GetSemaphoreCounterValue( if (pSemaphore != nullptr) { - if (pSemaphore->PalTemporarySemaphore(DefaultDeviceIndex) != nullptr) - { - pPalSemaphore = pSemaphore->PalTemporarySemaphore(DefaultDeviceIndex); - } - else - { - pPalSemaphore = pSemaphore->PalSemaphore(DefaultDeviceIndex); - } + pPalSemaphore = pSemaphore->PalSemaphore(DefaultDeviceIndex); palResult = pPalSemaphore->QuerySemaphoreValue(pValue); } @@ -397,21 +428,13 @@ VkResult Semaphore::WaitSemaphoreValue( uint64_t timeout) { Pal::Result palResult = Pal::Result::Success; - Pal::IQueueSemaphore* pPalSemaphore = nullptr; if (pSemaphore != nullptr) { VK_ASSERT(pSemaphore->IsTimelineSemaphore()); - if (pSemaphore->PalTemporarySemaphore(DefaultDeviceIndex) != nullptr) - { - pPalSemaphore = pSemaphore->PalTemporarySemaphore(DefaultDeviceIndex); - pSemaphore->ClearTemporarySemaphore(); - } - else - { - pPalSemaphore = pSemaphore->PalSemaphore(DefaultDeviceIndex); - } + pPalSemaphore = pSemaphore->PalSemaphore(DefaultDeviceIndex); + pSemaphore->RestoreSemaphore(); palResult = pPalSemaphore->WaitSemaphoreValue(value, timeout); } @@ -429,14 +452,7 @@ VkResult Semaphore::SignalSemaphoreValue( if (pSemaphore != nullptr) { - if (pSemaphore->PalTemporarySemaphore(DefaultDeviceIndex) != nullptr) - { - pPalSemaphore = pSemaphore->PalTemporarySemaphore(DefaultDeviceIndex); - } - else - { - pPalSemaphore = pSemaphore->PalSemaphore(DefaultDeviceIndex); - } + pPalSemaphore = pSemaphore->PalSemaphore(DefaultDeviceIndex); palResult = pPalSemaphore->SignalSemaphoreValue(value); } diff --git a/icd/api/vk_shader.cpp b/icd/api/vk_shader.cpp index fea12d34..66c762de 100644 --- a/icd/api/vk_shader.cpp +++ b/icd/api/vk_shader.cpp @@ -172,7 +172,7 @@ VkResult ShaderModule::Create( VK_PLACEMENT_NEW(pMemory) ShaderModule(pCreateInfo->codeSize, pCode); ShaderModule* pShaderModuleObj = static_cast(pMemory); - VkResult vkResult = pShaderModuleObj->Init(pDevice); + VkResult vkResult = pShaderModuleObj->Init(pDevice, pCreateInfo->flags); VK_ASSERT(vkResult == VK_SUCCESS); *pShaderModule = ShaderModule::HandleFromVoidPointer(pMemory); @@ -182,10 +182,11 @@ VkResult ShaderModule::Create( // ===================================================================================================================== // Initialize shader module object, performing SPIR-V to AMD IL shader binary conversion. -VkResult ShaderModule::Init(const Device* pDevice) +VkResult ShaderModule::Init(const Device* pDevice, VkShaderModuleCreateFlags flags) { PipelineCompiler* pCompiler = pDevice->GetCompiler(DefaultDeviceIndex); return pCompiler->BuildShaderModule(pDevice, + flags, m_codeSize, m_pCode, &m_handle diff --git a/icd/api/vk_swapchain.cpp b/icd/api/vk_swapchain.cpp index 90989aac..1d9220d2 100644 --- a/icd/api/vk_swapchain.cpp +++ b/icd/api/vk_swapchain.cpp @@ -961,24 +961,26 @@ bool SwapChain::IsSuboptimal(uint32_t deviceIdx) VkSurfaceCapabilitiesKHR surfaceCapabilities = {}; Pal::OsDisplayHandle displayHandle = 0; - VK_ASSERT(m_properties.pSurface != nullptr); - - const VkResult result = m_pDevice->VkPhysicalDevice(deviceIdx)->GetSurfaceCapabilities( - Surface::HandleFromObject(m_properties.pSurface), - displayHandle, - &surfaceCapabilities); - - if (result == VK_SUCCESS) + if (m_pDevice->GetRuntimeSettings().ignoreSuboptimalSwapchainSize == false) { - // Magic width/height value meaning that the surface is resized to match the swapchain's extent. - constexpr uint32_t SwapchainBasedSize = 0xFFFFFFFF; + VK_ASSERT(m_properties.pSurface != nullptr); + + const VkResult result = m_pDevice->VkPhysicalDevice(deviceIdx)->GetSurfaceCapabilities( + Surface::HandleFromObject(m_properties.pSurface), + displayHandle, + &surfaceCapabilities); - if (((surfaceCapabilities.currentExtent.width != SwapchainBasedSize) || - (surfaceCapabilities.currentExtent.height != SwapchainBasedSize)) - ) + if (result == VK_SUCCESS) { - suboptimal = ((surfaceCapabilities.currentExtent.width != m_properties.imageCreateInfo.extent.width) || - (surfaceCapabilities.currentExtent.height != m_properties.imageCreateInfo.extent.height)); + // Magic width/height value meaning that the surface is resized to match the swapchain's extent. + constexpr uint32_t SwapchainBasedSize = 0xFFFFFFFF; + + if ((surfaceCapabilities.currentExtent.width != SwapchainBasedSize) || + (surfaceCapabilities.currentExtent.height != SwapchainBasedSize)) + { + suboptimal = ((surfaceCapabilities.currentExtent.width != m_properties.imageCreateInfo.extent.width) || + (surfaceCapabilities.currentExtent.height != m_properties.imageCreateInfo.extent.height)); + } } } diff --git a/icd/make/importdefs b/icd/make/importdefs index f1bd24ba..7c766840 100644 --- a/icd/make/importdefs +++ b/icd/make/importdefs @@ -1,7 +1,7 @@ # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. It must # be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -ICD_PAL_CLIENT_MAJOR_VERSION = 525 +ICD_PAL_CLIENT_MAJOR_VERSION = 527 ICD_PAL_CLIENT_MINOR_VERSION = 0 # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. It describes @@ -17,4 +17,4 @@ ICD_SCPC_CLIENT_MAJOR_VERSION = 49 # This will become the value of LLPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_LLPC=1. It describes the version of the # interface version of LLPC that the ICD supports. -ICD_LLPC_CLIENT_MAJOR_VERSION = 31 +ICD_LLPC_CLIENT_MAJOR_VERSION = 32 diff --git a/icd/res/ver.h b/icd/res/ver.h index f38be6fb..2b0550aa 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 105 +#define VULKAN_ICD_BUILD_VERSION 109 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index 1c201ed7..3c17dc44 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -190,6 +190,9 @@ void VulkanSettingsLoader::OverrideProfiledSettings( // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we // can't do any better than returning a non-null function pointer for them. m_settings.lenientInstanceFuncQuery = true; + + // This works around a crash at app startup. + m_settings.ignoreSuboptimalSwapchainSize = true; } if (appProfile == AppProfile::WolfensteinII) @@ -246,6 +249,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings( m_settings.optimizeCmdbufMode = EnableOptimizeCmdbuf; + m_settings.usePalPipelineCaching = true; if (info.revision == Pal::AsicRevision::Vega20) { m_settings.dccBitsPerPixelThreshold = 16; @@ -285,6 +289,9 @@ void VulkanSettingsLoader::OverrideProfiledSettings( m_settings.prefetchShaders = true; m_settings.disableMsaaStencilShaderRead = true; + // Dota 2 will be the pilot for pal pipeline caching. + m_settings.usePalPipelineCaching = true; + m_settings.shaderCacheMode = ShaderCacheForceInternalCacheOnDisk; } @@ -363,6 +370,20 @@ void VulkanSettingsLoader::OverrideProfiledSettings( } } + if (appProfile == AppProfile::DxvkEliteDangerous) + { + m_settings.disableSkipFceOptimization = true; + } + + // By allowing the enable/disable to be set by environment variable, any third party platform owners can enable or + // disable the feature based on their internal feedback and not have to wait for a driver update to catch issues + + const char* pPipelineCacheEnvVar = getenv(m_settings.pipelineCachingEnvironmentVariable); + + if (pPipelineCacheEnvVar != nullptr) + { + m_settings.usePalPipelineCaching = (atoi(pPipelineCacheEnvVar) >= 0); + } } // ===================================================================================================================== @@ -541,9 +562,7 @@ void VulkanSettingsLoader::UpdatePalSettings() pPalSettings->enableGpuEventMultiSlot = m_settings.enableGpuEventMultiSlot; } - // Setting disableSkipFceOptimization to false enables an optimization in PAL that disregards the FCE in a transition - // if one of the built in clear colors are used (white/black) and the image is TCC compatible. - pPalSettings->disableSkipFceOptimization = false; + pPalSettings->disableSkipFceOptimization = m_settings.disableSkipFceOptimization; } diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json index ae5dbff5..fe84c653 100644 --- a/icd/settings/settings_xgl.json +++ b/icd/settings/settings_xgl.json @@ -192,6 +192,32 @@ "Description": "'Enable' enables pipeline binning." } ] + }, + { + "Name": "GraphicsPipelineType", + "IsEnum": true, + "Values": [ + { + "Name": "GraphicsPipelineTypeVsFs", + "Value": 1, + "Description": "VS PS pipelines" + }, + { + "Name": "GraphicsPipelineTypeGs", + "Value": 2, + "Description": "VS GS PS pipelines" + }, + { + "Name": "GraphicsPipelineTypeTess", + "Value": 4, + "Description": "Tess pipelines without GS" + }, + { + "Name": "GraphicsPipelineTypeTessGs", + "Value": 8, + "Description": "Tess pipeline with GS" + } + ] } ], "Settings": [ @@ -840,6 +866,19 @@ "Type": "bool", "VariableName": "enableAcquireBeforeSignal" }, + { + "Name": "IgnoreSuboptimalSwapchainSize", + "Description": "When true, no check is done to see if the swapchain surface size has changed since creation.", + "Tags": [ + "Present" + ], + "Defaults": { + "Default": false + }, + "Scope": "Driver", + "Type": "bool", + "VariableName": "ignoreSuboptimalSwapchainSize" + }, { "Description": "Enable pipeline dump, pipeline is stored with .pipe format. You must set AMD_DEBUG_DIR and make sure $AMD_DEBUG_DIR + pipelineDumpDir is an available directory.", "Tags": [ @@ -1134,14 +1173,21 @@ "Scope": "Driver" }, { - "Description": "Enable NGG mode, use an implicit primitive shader. Use this instead of PAL setting, NggEnableMode.", + "Description": "Enable NGG mode, use an implicit primitive shader on a per-pipeline type basis. Use this instead of PAL setting, NggEnableMode.", "Tags": [ "SPIRV Options" ], + "Flags": { + "IsHex": true, + "IsBitmask": true + }, "Defaults": { - "Default": true + "Default": 4294967295 }, - "Type": "bool", + "ValidValues": { + "Name": "GraphicsPipelineType" + }, + "Type": "uint32", "VariableName": "enableNgg", "Name": "EnableNgg", "Scope": "Driver" @@ -1192,7 +1238,7 @@ "SPIRV Options" ], "Defaults": { - "Default": "NggCompactSubgroup" + "Default": "NggCompactVertices" }, "Type": "enum", "VariableName": "nggCompactionMode", @@ -2284,42 +2330,6 @@ "VariableName": "dbgBarrierPreCacheDstMask", "Name": "DbgBarrierPreCacheDstMask" }, - { - "Name": "VulkanOverlayEnable", - "Description": "Enable specific channels of the Vulkan debug overlay through a bitmask. Only takes effect for drivers compiled with the .intdev target (VK_INTERNAL_DEVELOPER=1).", - "Tags": [ - "General" - ], - "Flags": { - "IsHex": true, - "IsBitmask": true - }, - "Defaults": { - "Default": 0 - }, - "ValidValues": { - "Values": [ - { - "Name": "OverlayPresentInfo", - "Value": 1, - "Description": "Enable information about presents" - }, - { - "Name": "OverlayBarrierInfo", - "Value": 2, - "Description": "Enable information about barriers" - }, - { - "Name": "OverlayBarrierFiltering", - "Value": 4, - "Description": "Barrier filter layer debugging information" - } - ] - }, - "Type": "uint32", - "VariableName": "overlayEnable", - "Scope": "Driver" - }, { "Description": "Instanced vertex buffer table ring size.", "Tags": [ @@ -2456,7 +2466,7 @@ "VariableName": "barrierFilterOptions" }, { - "Description": "Path to a file that contains application-specific barrier filter profiles. The contents are read on startup and used if BarrierFilterOptions SkipWithAppProfile, SkipWithAppProfileRegen, or SkipWithIntDevOverlay are set. On exit, an updated profile is written to this same file. This setting only triggers on debug builds or builds made with the VK_INTERNAL_DEVELOPER=1 option.", + "Description": "Path to a file that contains application-specific barrier filter profiles. The contents are read on startup and used if BarrierFilterOptions SkipWithAppProfile, SkipWithAppProfileRegen, or SkipWithIntDevOverlay are set. On exit, an updated profile is written to this same file. This setting only triggers on debug builds.", "Tags": [ "General" ], @@ -2603,6 +2613,22 @@ "Type": "uint32", "VariableName": "asyncComputeQueueLimit" }, + { + "Name": "LimitSampleCounts", + "Description": "Mask the sample counts returned in vkPhysicalDeviceLimits and vkGetPhysicalDeviceImageProperties (OriginalValue & ThisValue). This setting does not actually affect support for sample counts, only what is returned to the application. This value should be a combination of VKSampleCountFlags", + "Tags": [ + "General" + ], + "Defaults": { + "Default": 4294967295 + }, + "Flags": { + "IsHex": true + }, + "Scope": "Driver", + "Type": "uint32", + "VariableName": "limitSampleCounts" + }, { "Description": "Custom device allocation count limitation, when larger than 0.", "Tags": [ @@ -3522,6 +3548,32 @@ "Type": "bool", "VariableName": "enableMlModelForWaveSize" }, + { + "Description": "Enable async compile for shader module and pipelines.", + "Tags": [ + "Optimization" + ], + "Defaults": { + "Default": false + }, + "Scope": "Driver", + "Type": "bool", + "VariableName": "enableAsyncCompile", + "Name": "EnableAsyncCompile" + }, + { + "Description": "Setting to false enables an optimization in PAL that disregards the FCE in a transition if one of the built in clear colors are used (white/black) and the image is TCC compatible.", + "Tags": [ + "Optimization" + ], + "Defaults": { + "Default": false + }, + "Scope": "Driver", + "Type": "bool", + "VariableName": "disableSkipFceOptimization", + "Name": "DisableSkipFceOptimization" + }, { "Description": "Determines the pipeline binary max size limit (in KB) used by the PipelineUriService when injecting pipeline binaries back into the driver. The limit is 256KB by default.", "Tags": [ @@ -3655,7 +3707,7 @@ }, { "Value": 128, - "Description": "Write bound shader hashes as user event markers (requires VK_INTERNAL_DEVELOPER)" + "Description": "Write bound shader hashes as user event markers" }, { "Value": 256, @@ -3760,74 +3812,6 @@ "VariableName": "devModeSqttInternalUserEventPrefix", "Size": 512 }, - { - "Description": "If true, driver will insert user event strings of renderpass instances into RGP traces. Requires internal developer build (VK_INTERNAL_DEVELOPER).", - "Tags": [ - "Developer Mode" - ], - "Defaults": { - "Default": false - }, - "Scope": "Driver", - "Type": "bool", - "VariableName": "devModeSqttMarkRenderPasses", - "Name": "DevModeSqttMarkRenderPasses" - }, - { - "Description": "If true, driver will insert user event strings when pipelines are bound (including shader hashes). Requires internal developer build (VK_INTERNAL_DEVELOPER).", - "Tags": [ - "Developer Mode" - ], - "Defaults": { - "Default": false - }, - "Scope": "Driver", - "Type": "bool", - "VariableName": "devModeSqttMarkPipelineBinds", - "Name": "DevModeSqttMarkPipelineBinds" - }, - { - "Description": "If true, driver will insert user event strings when color/depth targets are bound. Requires internal developer build (VK_INTERNAL_DEVELOPER).", - "Tags": [ - "Developer Mode" - ], - "Defaults": { - "Default": false - }, - "Scope": "Driver", - "Type": "bool", - "VariableName": "devModeSqttMarkTargetBinds", - "Name": "DevModeSqttMarkTargetBinds" - }, - { - "Description": "If true, driver will insert user event strings when pipeline barriers happen. Requires internal developer build (VK_INTERNAL_DEVELOPER).", - "Tags": [ - "Developer Mode" - ], - "Defaults": { - "Default": false - }, - "Scope": "Driver", - "Type": "bool", - "VariableName": "devModeSqttMarkPipelineBarriers", - "Name": "DevModeSqttMarkPipelineBarriers" - }, - { - "Description": "Track metadata for object types (each bit in the mask corresponds with enum value of VkDebugReportObjectTypeEXT). Set to all f's to track all types. Setting this will cause any debug names to be included with any of the other internal markings such as pipeline binds. Requires internal developer build (VK_INTERNAL_DEVELOPER). WARNING: Will likely interfere with multithreaded command buffer recording concurrency.", - "Tags": [ - "Developer Mode" - ], - "Flags": { - "IsHex": true - }, - "Defaults": { - "Default": 0 - }, - "Scope": "Driver", - "Type": "uint64", - "VariableName": "devModeSqttTrackObjectMetaData", - "Name": "DevModeSqttObjectMetaData" - }, { "Description": "If TRUE, DevModeSqttTrace[Begin|End]TagValues are used to override trace parameter begin/end command buffer debug object tag values. This is mainly used for debugging.", "Tags": [