diff --git a/cmake/Modules/FindAMDBoost.cmake b/cmake/Modules/FindAMDBoost.cmake deleted file mode 100644 index 0099c468..00000000 --- a/cmake/Modules/FindAMDBoost.cmake +++ /dev/null @@ -1,112 +0,0 @@ -## - ####################################################################################################################### - # - # Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. - # - # Permission is hereby granted, free of charge, to any person obtaining a copy - # of this software and associated documentation files (the "Software"), to deal - # in the Software without restriction, including without limitation the rights - # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - # copies of the Software, and to permit persons to whom the Software is - # furnished to do so, subject to the following conditions: - # - # The above copyright notice and this permission notice shall be included in all - # copies or substantial portions of the Software. - # - # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - # SOFTWARE. - # - ####################################################################################################################### - -# Output -# Boost_FOUND -# Boost_ROOT_DIR -# Boost_INCLUDE_DIRS -# Boost_LIBRARY_DIRS - -# CMAKE-TODO: -# There is a built in FindBoost module: https://cmake.org/cmake/help/latest/module/FindBoost.html -# But our DK version is very inconsistent and is not structured the same way. More testing required. - -if(NOT DEFINED Boost_FOUND) - if(NOT DEFINED AMDBoost_FIND_VERSION) - message(FATAL_ERROR "A version to search for must be specified.") - endif() - - if(NOT DEFINED TARGET_ARCHITECTURE_BITS) - message(FATAL_ERROR "TARGET_ARCHITECTURE_BITS must be defined.") - endif() - - if(NOT DEFINED GLOBAL_ROOT_DK_DIR) - message(FATAL_ERROR "GLOBAL_ROOT_DK_DIR must be specified.") - endif() - - set(BOOST_VER ${AMDBoost_FIND_VERSION_MAJOR}.${AMDBoost_FIND_VERSION_MINOR}.${AMDBoost_FIND_VERSION_PATCH}) - - if(MSVC) - #MSVC++ 11.0 MSVC_VERSION == 1700 (Visual Studio 2012) - #MSVC++ 12.0 MSVC_VERSION == 1800 (Visual Studio 2013) - #MSVC++ 14.0 MSVC_VERSION == 1900 (Visual Studio 2015) - if(MSVC_VERSION EQUAL 1700) - set(Boost_ROOT_DIR ${GLOBAL_ROOT_DK_DIR}/boost/${BOOST_VER}/vc11 CACHE PATH "Boost root directory.") - elseif(MSVC_VERSION GREATER_EQUAL 1800) # CMAKE-TODO: Set to GREATER_EQUAL until VS projects are supported correctly. - set(Boost_ROOT_DIR ${GLOBAL_ROOT_DK_DIR}/boost/${BOOST_VER}/vc12 CACHE PATH "Boost root directory.") - else() - message(FATAL_ERROR "The MSVC Version: ${MSVC_VERSION} is currently unsopported for: ${CMAKE_PARENT_LIST_FILE}") - endif() - message(STATUS "Boost Version: ${BOOST_VER} for MSVC Version: ${MSVC_VERSION}") - elseif(CMAKE_COMPILER_IS_GNUCC) - set(Boost_ROOT_DIR ${GLOBAL_ROOT_DK_DIR}/boost/${BOOST_VER}/gcc-${CMAKE_CXX_COMPILER_VERSION} CACHE PATH "Boost root directory.") - message(STATUS "Boost Version: ${BOOST_VER} for GCC Version: ${CMAKE_CXX_COMPILER_VERSION}") - endif() - mark_as_advanced(Boost_ROOT_DIR) - - message(STATUS "Boost: ${Boost_ROOT_DIR}") - -if (Boost_ROOT_DIR) - set(Boost_INCLUDE_DIRS - ${Boost_ROOT_DIR}/include - CACHE PATH "Boost include directories." - ) - mark_as_advanced(Boost_INCLUDE_DIRS) - - if(WIN32) - if(TARGET_ARCHITECTURE_BITS EQUAL 64) - set(Boost_LIBRARY_DIRS - ${Boost_ROOT_DIR}/lib/x64 - CACHE PATH "Boost library directories." - ) - elseif(TARGET_ARCHITECTURE_BITS EQUAL 32) - set(Boost_LIBRARY_DIRS - ${Boost_ROOT_DIR}/lib/x86-fastcall - CACHE PATH "Boost library directories." - ) - endif() - elseif(UNIX) - if(TARGET_ARCHITECTURE_BITS EQUAL 64) - set(Boost_LIBRARY_DIRS - ${Boost_ROOT_DIR}/lib/x64-fPIC - CACHE PATH "Boost library directories." - ) - elseif(TARGET_ARCHITECTURE_BITS EQUAL 32) - set(Boost_LIBRARY_DIRS - ${Boost_ROOT_DIR}/lib/x86-fPIC - CACHE PATH "Boost library directories." - ) - endif() - endif() - mark_as_advanced(Boost_LIBRARY_DIRS) - - set(Boost_FOUND 1) - else() - set(Boost_FOUND 0) - endif() - - set(Boost_FOUND ${Boost_FOUND} CACHE STRING "Was Boost found?") - mark_as_advanced(Boost_FOUND) -endif() diff --git a/cmake/Modules/FindAMDNinja.cmake b/cmake/Modules/FindAMDNinja.cmake deleted file mode 100644 index a7a736dd..00000000 --- a/cmake/Modules/FindAMDNinja.cmake +++ /dev/null @@ -1,48 +0,0 @@ -## - ####################################################################################################################### - # - # Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. - # - # Permission is hereby granted, free of charge, to any person obtaining a copy - # of this software and associated documentation files (the "Software"), to deal - # in the Software without restriction, including without limitation the rights - # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - # copies of the Software, and to permit persons to whom the Software is - # furnished to do so, subject to the following conditions: - # - # The above copyright notice and this permission notice shall be included in all - # copies or substantial portions of the Software. - # - # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - # SOFTWARE. - # - ####################################################################################################################### - -# Output -# Ninja_FOUND -# Ninja_DIR -# Ninja_EXECUTABLE - -if(NOT DEFINED Ninja_FOUND) - if(NOT DEFINED AMDNinja_FIND_VERSION) - message(FATAL_ERROR "A version to search for must be specified.") - endif() - if(NOT DEFINED GLOBAL_ROOT_DK_DIR) - message(FATAL_ERROR "GLOBAL_ROOT_DK_DIR must be specified.") - endif() - - set(Ninja_DIR ${GLOBAL_ROOT_DK_DIR}/ninja/${AMDNinja_FIND_VERSION} CACHE FILEPATH "Ninja Direction") - mark_as_advanced(Ninja_DIR) - set(Ninja_EXECUTABLE ${Ninja_DIR}/ninja.exe CACHE FILEPATH "Ninja Executable") - mark_as_advanced(Ninja_EXECUTABLE) - - message(STATUS "Ninja: ${Ninja_EXECUTABLE}") - - set(Ninja_FOUND ${Ninja_FOUND} CACHE STRING "Was Ninja found?") - mark_as_advanced(Ninja_FOUND) -endif() diff --git a/cmake/Modules/XglSetupAmdGlobalRoots.cmake b/cmake/Modules/XglSetupAmdGlobalRoots.cmake index fa7d9ab6..7f1718f1 100644 --- a/cmake/Modules/XglSetupAmdGlobalRoots.cmake +++ b/cmake/Modules/XglSetupAmdGlobalRoots.cmake @@ -23,18 +23,6 @@ # ####################################################################################################################### -# find_dk_root must be available -if(NOT DEFINED GLOBAL_ROOT_DK_DIR) - execute_process( - COMMAND find_dk_root - OUTPUT_VARIABLE GLOBAL_ROOT_DK_DIR - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - if(NOT ("${GLOBAL_ROOT_DK_DIR}" STREQUAL "")) - set(GLOBAL_ROOT_DK_DIR ${GLOBAL_ROOT_DK_DIR} CACHE PATH "Global root dk directory..") - endif() -endif() - if(NOT DEFINED GLOBAL_ROOT_SRC_DIR) if(EXISTS ${PROJECT_SOURCE_DIR}/../../drivers) get_filename_component(GLOBAL_ROOT_SRC_DIR ${PROJECT_SOURCE_DIR}/../.. ABSOLUTE) diff --git a/cmake/XglCompileDefinitions.cmake b/cmake/XglCompileDefinitions.cmake index 3fac14e5..45e6b4de 100644 --- a/cmake/XglCompileDefinitions.cmake +++ b/cmake/XglCompileDefinitions.cmake @@ -49,43 +49,23 @@ macro(xgl_set_compile_definitions) target_compile_definitions(xgl PRIVATE ICD_BUILD_LLPC) endif() - target_compile_definitions(xgl PRIVATE PAL_BUILD_GFX9=1) - - if(XGL_BUILD_NAVI12) - target_compile_definitions(xgl PRIVATE VKI_BUILD_NAVI12=1) - endif() - -#if VKI_BUILD_GFX11 - if(XGL_BUILD_GFX11) - target_compile_definitions(xgl PRIVATE VKI_BUILD_GFX11=1) +#if VKI_ENABLE_DEBUG_BARRIERS + if(VKI_ENABLE_DEBUG_BARRIERS) + target_compile_definitions(xgl PRIVATE VKI_ENABLE_DEBUG_BARRIERS) endif() #endif - -#if VKI_BUILD_NAVI31 - if(XGL_BUILD_NAVI31) - target_compile_definitions(xgl PRIVATE VKI_BUILD_NAVI31=1) +#if VKI_RUNTIME_APP_PROFILE + if(VKI_RUNTIME_APP_PROFILE) + target_compile_definitions(xgl PRIVATE VKI_RUNTIME_APP_PROFILE) endif() #endif - -#if VKI_BUILD_NAVI32 - if(XGL_BUILD_NAVI32) - target_compile_definitions(xgl PRIVATE VKI_BUILD_NAVI32=1) +#if VKI_DEVMODE_COMPILER_SETTINGS + if(VKI_DEVMODE_COMPILER_SETTINGS) + target_compile_definitions(xgl PRIVATE VKI_DEVMODE_COMPILER_SETTINGS) endif() #endif -#if VKI_BUILD_NAVI33 - if(XGL_BUILD_NAVI33) - target_compile_definitions(xgl PRIVATE VKI_BUILD_NAVI33=1) - endif() -#endif - - if(XGL_BUILD_PHOENIX1) - target_compile_definitions(xgl PRIVATE VKI_BUILD_PHOENIX1=1) - endif() - - if(XGL_BUILD_PHOENIX2) - target_compile_definitions(xgl PRIVATE VKI_BUILD_PHOENIX2=1) - endif() + target_compile_definitions(xgl PRIVATE PAL_BUILD_GFX9=1) #if VKI_BUILD_GFX115 if(XGL_BUILD_GFX115) @@ -99,18 +79,6 @@ macro(xgl_set_compile_definitions) endif() #endif - if(XGL_BUILD_REMBRANDT) - target_compile_definitions(xgl PRIVATE VKI_BUILD_REMBRANDT=1) - endif() - - if(XGL_BUILD_RAPHAEL) - target_compile_definitions(xgl PRIVATE VKI_BUILD_RAPHAEL=1) - endif() - - if(XGL_BUILD_MENDOCINO) - target_compile_definitions(xgl PRIVATE VKI_BUILD_MENDOCINO=1) - endif() - #if VKI_RAY_TRACING if (VKI_RAY_TRACING) target_compile_definitions(xgl PRIVATE VKI_RAY_TRACING=1) @@ -119,12 +87,6 @@ macro(xgl_set_compile_definitions) endif() #endif -#if VKI_KHR_DISPLAY - if(VKI_KHR_DISPLAY) - target_compile_definitions(xgl PRIVATE VKI_KHR_DISPLAY) - endif() -#endif - #if VKI_NORMALIZED_TRIG_FUNCTIONS if(VKI_NORMALIZED_TRIG_FUNCTIONS) target_compile_definitions(xgl PRIVATE VKI_NORMALIZED_TRIG_FUNCTIONS) diff --git a/cmake/XglOptions.cmake b/cmake/XglOptions.cmake index bac81e04..2cf1a891 100644 --- a/cmake/XglOptions.cmake +++ b/cmake/XglOptions.cmake @@ -29,46 +29,25 @@ macro(xgl_options) ### Cached Project Options ############################################################################################# - option(XGL_ENABLE_PRINTS_ASSERTS "Build with debug print enabled?" OFF) - - option(XGL_ENABLE_LTO "Build with LTO enabled?" ON) - - option(XGL_ENABLE_GCOV "Build with gcov source code coverage?" OFF) - - option(XGL_BUILD_GFX103 "Build vulkan for GFX103" ON) - - option(XGL_BUILD_NAVI12 "Build vulkan for Navi12" ON) - - option(XGL_BUILD_REMBRANDT "Build vulkan for REMBRANDT" ON) - - option(XGL_BUILD_RAPHAEL "Build vulkan for RAPHAEL" ON) - - option(XGL_BUILD_MENDOCINO "Build vulkan for MENDOCINO" ON) - -#if VKI_BUILD_GFX11 - option(XGL_BUILD_GFX11 "Build vulkan for GFX11" ON) +#if VKI_ENABLE_DEBUG_BARRIERS + option(VKI_ENABLE_DEBUG_BARRIERS "Build with debug barriers enabled?" OFF) #endif - -#if VKI_BUILD_NAVI31 - option(XGL_BUILD_NAVI31 "Build vulkan for Navi31" ON) +#if VKI_RUNTIME_APP_PROFILE + option(VKI_RUNTIME_APP_PROFILE "Build with runtime app profile?" OFF) #endif - -#if VKI_BUILD_NAVI32 - option(XGL_BUILD_NAVI32 "Build vulkan for Navi32" ON) +#if VKI_DEVMODE_COMPILER_SETTINGS + option(VKI_DEVMODE_COMPILER_SETTINGS "Build with devmode compiler settings?" OFF) #endif -#if VKI_BUILD_NAVI33 - option(XGL_BUILD_NAVI33 "Build vulkan for Navi33" ON) -#endif + option(XGL_ENABLE_PRINTS_ASSERTS "Build with debug print enabled?" OFF) - option(XGL_BUILD_PHOENIX1 "Build vulkan for PHOENIX1" ON) + option(XGL_ENABLE_LTO "Build with LTO enabled?" ON) - option(XGL_BUILD_PHOENIX2 "Build vulkan for PHOENIX2" ON) + option(XGL_ENABLE_GCOV "Build with gcov source code coverage?" OFF) #if VKI_BUILD_GFX115 option(XGL_BUILD_GFX115 "Build vulkan for GFX115" ON) #endif - #if VKI_BUILD_STRIX1 option(XGL_BUILD_STRIX1 "Build vulkan for STRIX1" ON) #endif @@ -85,7 +64,7 @@ macro(xgl_options) option(VKI_GPU_DECOMPRESS "Build vulkan with GPU_DECOMPRESS" ON) #endif - option(ICD_BUILD_LLPC "Build LLPC?" ON) + option(ICD_BUILD_LLPC "Build LLPC?" ON) option(XGL_LLVM_UPSTREAM "Build with upstreamed LLVM?" OFF) diff --git a/cmake/XglOverrides.cmake b/cmake/XglOverrides.cmake index b856086e..0f1ab516 100644 --- a/cmake/XglOverrides.cmake +++ b/cmake/XglOverrides.cmake @@ -103,11 +103,8 @@ macro(xgl_overrides_pal) set(PAL_BUILD_GPUOPEN ${ICD_GPUOPEN_DEVMODE_BUILD} CACHE BOOL "${PROJECT_NAME} override." FORCE) - if(XGL_BUILD_NAVI31 OR XGL_BUILD_NAVI32 OR XGL_BUILD_NAVI33 OR XGL_BUILD_PHOENIX1) - set(PAL_BUILD_GFX11 1 CACHE BOOL "${PROJECT_NAME} override." FORCE) - endif() - - set(PAL_BUILD_PHOENIX2 ${XGL_BUILD_PHOENIX2} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(PAL_BUILD_GFX11 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(PAL_BUILD_PHOENIX2 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) #if VKI_BUILD_GFX115 set(PAL_BUILD_GFX115 ${XGL_BUILD_GFX115} CACHE BOOL "${PROJECT_NAME} override." FORCE) @@ -141,33 +138,16 @@ macro(xgl_overrides_vkgc) set(LLPC_BUILD_TESTS ${XGL_BUILD_TESTS} CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_NAVI12 ${XGL_BUILD_NAVI12} CACHE BOOL "${PROJECT_NAME} override." FORCE) - - set(LLPC_BUILD_REMBRANDT ${XGL_BUILD_REMBRANDT} CACHE BOOL "${PROJECT_NAME} override." FORCE) - - set(LLPC_BUILD_RAPHAEL ${XGL_BUILD_RAPHAEL} CACHE BOOL "${PROJECT_NAME} override." FORCE) - - set(LLPC_BUILD_MENDOCINO ${XGL_BUILD_MENDOCINO} CACHE BOOL "${PROJECT_NAME} override." FORCE) - -#if VKI_BUILD_GFX11 - set(LLPC_BUILD_GFX11 ${XGL_BUILD_GFX11} CACHE BOOL "${PROJECT_NAME} override." FORCE) -#endif - -#if VKI_BUILD_NAVI31 - set(LLPC_BUILD_NAVI31 ${XGL_BUILD_NAVI31} CACHE BOOL "${PROJECT_NAME} override." FORCE) -#endif - -#if VKI_BUILD_NAVI32 - set(LLPC_BUILD_NAVI32 ${XGL_BUILD_NAVI32} CACHE BOOL "${PROJECT_NAME} override." FORCE) -#endif - -#if VKI_BUILD_NAVI33 - set(LLPC_BUILD_NAVI33 ${XGL_BUILD_NAVI33} CACHE BOOL "${PROJECT_NAME} override." FORCE) -#endif - - set(LLPC_BUILD_PHOENIX1 ${XGL_BUILD_PHOENIX1} CACHE BOOL "${PROJECT_NAME} override." FORCE) - - set(LLPC_BUILD_PHOENIX2 ${XGL_BUILD_PHOENIX2} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_NAVI12 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_REMBRANDT ON CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_RAPHAEL ON CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_MENDOCINO ON CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_GFX11 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_NAVI31 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_NAVI32 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_NAVI33 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_PHOENIX1 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_PHOENIX2 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) #if VKI_BUILD_GFX115 set(LLPC_BUILD_GFX115 ${XGL_BUILD_GFX115} CACHE BOOL "${PROJECT_NAME} override." FORCE) @@ -187,6 +167,14 @@ macro(xgl_overrides) set(GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_GPUOPEN_CLIENT_MAJOR_VERSION}) endif() +#if VKI_BUILD_GFX115 +#if VKI_BUILD_STRIX1 + if(XGL_BUILD_STRIX1) + set(XGL_BUILD_GFX115 ON CACHE BOOL "XGL_BUILD_GFX115 override." FORCE) + endif() +#endif +#endif + xgl_get_path() if(XGL_BUILD_TESTS) diff --git a/cmake/XglVersions.cmake b/cmake/XglVersions.cmake index 7b95dbdd..e0f16371 100644 --- a/cmake/XglVersions.cmake +++ b/cmake/XglVersions.cmake @@ -28,7 +28,7 @@ include_guard() # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. # It must be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -set(ICD_PAL_CLIENT_MAJOR_VERSION "888") +set(ICD_PAL_CLIENT_MAJOR_VERSION "892") # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. # It describes the interface version of the gpuopen shared module (part of PAL) that the ICD supports. @@ -37,7 +37,7 @@ set(ICD_GPUOPEN_CLIENT_MAJOR_VERSION "42") #if VKI_RAY_TRACING # This will become the value of GPURT_CLIENT_INTERFACE_MAJOR_VERSION if VKI_RAY_TRACING=1. # It describes the interface version of the GpuRT shared module that the ICD supports. -set(ICD_GPURT_CLIENT_MAJOR_VERSION "47") +set(ICD_GPURT_CLIENT_MAJOR_VERSION "48") #endif # This will become the value of LLPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_LLPC=1. diff --git a/icd/CMakeLists.txt b/icd/CMakeLists.txt index 52531fd9..00baae1d 100644 --- a/icd/CMakeLists.txt +++ b/icd/CMakeLists.txt @@ -135,6 +135,7 @@ target_sources(xgl PRIVATE api/vk_instance.cpp api/vk_memory.cpp api/vk_pipeline.cpp + api/vk_pipeline_binary.cpp api/vk_pipeline_layout.cpp api/vk_pipeline_cache.cpp api/vk_private_data_slot.cpp @@ -189,15 +190,6 @@ if(ICD_BUILD_LLPC) ) endif() -# vk_utils.cpp uses the __DATE__ and __TIME__ macros to generate a pipelineCacheUUID. The following -# rule forces vk_utils.cpp to be re-compiled on every build, so that an up-to-date time/date -# is always used regardless of which files were touched since the last build. -add_custom_command( - TARGET xgl PRE_BUILD - COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_SOURCE_DIR}/api/vk_utils.cpp - COMMENT "Touching vk_utils.cpp" -) - ### ICD Auto-generated Shader Profiles Files ################################## # ICD_GENDIR Path to the code generation tools set(ICD_GENDIR ${CMAKE_CURRENT_SOURCE_DIR}/tools/generate) diff --git a/icd/Loader/LunarG/Lnx/amd-icd.json b/icd/Loader/LunarG/Lnx/amd-icd.json index ba102a99..20285065 100644 --- a/icd/Loader/LunarG/Lnx/amd-icd.json +++ b/icd/Loader/LunarG/Lnx/amd-icd.json @@ -2,13 +2,13 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.293" + "api_version": "1.3.295" }, "layer": { "name": "VK_LAYER_AMD_switchable_graphics_@ISABITS@", "type": "GLOBAL", "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.293", + "api_version": "1.3.295", "implementation_version": "1", "description": "AMD switchable graphics layer", "functions": { diff --git a/icd/api/app_profile.cpp b/icd/api/app_profile.cpp index 27e608b3..7ab4adb7 100644 --- a/icd/api/app_profile.cpp +++ b/icd/api/app_profile.cpp @@ -792,6 +792,12 @@ constexpr AppProfilePatternEntry AppEngineXenon = "xenonengine" }; +constexpr AppProfilePatternEntry AppNameHoudini = +{ + PatternAppNameLower, + "houdini" +}; + // Section END of AppProfilePatternEntry for all games // This is a table of patterns. The first matching pattern in this table will be returned. @@ -1397,14 +1403,6 @@ AppProfilePattern AppPatternTable[] = } }, - { - AppProfile::Source2Engine, - { - AppEngineSource2, - PatternEnd - } - }, - { AppProfile::DxvkGodOfWar, { @@ -1615,6 +1613,14 @@ AppProfilePattern AppPatternTable[] = } }, + { + AppProfile::Source2Engine, + { + AppEngineSource2, + PatternEnd + } + }, + { AppProfile::WindowKill, { @@ -1631,7 +1637,16 @@ AppProfilePattern AppPatternTable[] = AppEngineXenon, PatternEnd } - } + }, + + { + AppProfile::Houdini, + { + AppNameHoudini, + PatternEnd + } + }, + }; static char* GetExecutableName(size_t* pLength, bool includeExtension = false); diff --git a/icd/api/app_resource_optimizer.cpp b/icd/api/app_resource_optimizer.cpp index 5118967b..dd4a8644 100644 --- a/icd/api/app_resource_optimizer.cpp +++ b/icd/api/app_resource_optimizer.cpp @@ -63,7 +63,7 @@ void ResourceOptimizer::Init() BuildTuningProfile(); -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE BuildRuntimeProfile(); #endif @@ -127,7 +127,7 @@ void ResourceOptimizer::OverrideImageCreateInfo( ApplyProfileToImageCreateInfo(m_tuningProfile, resourceKey, pCreateInfo); -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE ApplyProfileToImageCreateInfo(m_runtimeProfile, resourceKey, pCreateInfo); #endif @@ -141,7 +141,7 @@ void ResourceOptimizer::OverrideImageViewCreateInfo( ApplyProfileToImageViewCreateInfo(m_tuningProfile, resourceKey, pPalViewInfo); -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE ApplyProfileToImageViewCreateInfo(m_runtimeProfile, resourceKey, pPalViewInfo); #endif } @@ -457,7 +457,7 @@ void ResourceOptimizer::BuildAppProfile() } } -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE void ResourceOptimizer::BuildRuntimeProfile() { memset(&m_runtimeProfile, 0, sizeof(m_runtimeProfile)); diff --git a/icd/api/app_shader_optimizer.cpp b/icd/api/app_shader_optimizer.cpp index 6595e97a..aca59ef2 100644 --- a/icd/api/app_shader_optimizer.cpp +++ b/icd/api/app_shader_optimizer.cpp @@ -39,7 +39,7 @@ #include "palDbgPrint.h" #include "palFile.h" -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE #include "utils/json_reader.h" #endif @@ -68,7 +68,7 @@ void ShaderOptimizer::Init() m_appShaderProfile.PipelineProfileToJson(m_tuningProfile, m_settings.pipelineProfileDumpFile); } -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE BuildRuntimeProfile(); #endif } @@ -136,7 +136,7 @@ bool ShaderOptimizer::HasMatchingProfileEntry( foundMatch = HasMatchingProfileEntry(m_tuningProfile, pipelineKey); } -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE if (foundMatch == false) { foundMatch = HasMatchingProfileEntry(m_runtimeProfile, pipelineKey); @@ -192,7 +192,7 @@ void ShaderOptimizer::CalculateMatchingProfileEntriesHash( { CalculateMatchingProfileEntriesHash(m_appProfile, pipelineKey, pHasher); CalculateMatchingProfileEntriesHash(m_tuningProfile, pipelineKey, pHasher); -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE CalculateMatchingProfileEntriesHash(m_runtimeProfile, pipelineKey, pHasher); #endif } @@ -393,7 +393,7 @@ void ShaderOptimizer::OverrideShaderCreateInfo( ApplyProfileToShaderCreateInfo(m_tuningProfile, pipelineKey, shaderIndex, options); -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE ApplyProfileToShaderCreateInfo(m_runtimeProfile, pipelineKey, shaderIndex, options); #endif } @@ -534,7 +534,7 @@ void ShaderOptimizer::OverrideGraphicsPipelineCreateInfo( ApplyProfileToGraphicsPipelineCreateInfo( m_tuningProfile, pipelineKey, shaderStages, pPalCreateInfo, pGraphicsShaderInfos); -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE ApplyProfileToGraphicsPipelineCreateInfo( m_runtimeProfile, pipelineKey, shaderStages, pPalCreateInfo, pGraphicsShaderInfos); #endif @@ -549,7 +549,7 @@ void ShaderOptimizer::OverrideComputePipelineCreateInfo( ApplyProfileToComputePipelineCreateInfo(m_tuningProfile, pipelineKey, pDynamicCompueShaderInfo); -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE ApplyProfileToComputePipelineCreateInfo(m_runtimeProfile, pipelineKey, pDynamicCompueShaderInfo); #endif } @@ -567,7 +567,7 @@ ShaderOptimizer::~ShaderOptimizer() { pAllocCB->pfnFree(pAllocCB->pUserData, m_tuningProfile.pEntries); } -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE if (m_runtimeProfile.pEntries != nullptr) { pAllocCB->pfnFree(pAllocCB->pUserData, m_runtimeProfile.pEntries); @@ -1182,6 +1182,20 @@ void ShaderOptimizer::BuildAppProfileLlpc() m_appShaderProfile.BuildAppProfileLlpc(appProfile, gfxIpLevel, asicRevision, &m_appProfile); + if ((appProfile == AppProfile::MadMax) || + (appProfile == AppProfile::SedpEngine) || + (appProfile == AppProfile::ThronesOfBritannia)) + { + i = m_appProfile.entryCount++; + PipelineProfileEntry *pEntry = &m_appProfile.pEntries[i]; + pEntry->pattern.match.always = true; + for (uint32_t stage = 0; stage < ShaderStageCount; ++stage) + { + pEntry->action.shaders[stage].shaderCreate.apply.useSiScheduler = true; + pEntry->action.shaders[stage].shaderCreate.tuningOptions.useSiScheduler = true; + } + } + if (appProfile == AppProfile::ShadowOfTheTombRaider) { i = m_appProfile.entryCount++; @@ -1224,6 +1238,14 @@ void ShaderOptimizer::BuildAppProfileLlpc() pEntry->action.shaders[ShaderStage::ShaderStageCompute].shaderCreate.apply.workaroundStorageImageFormats = true; } + if (appProfile == AppProfile::Houdini) + { + i = m_appProfile.entryCount++; + PipelineProfileEntry *pEntry = &m_appProfile.pEntries[i]; + pEntry->pattern.match.always = true; + pEntry->action.shaders[ShaderStage::ShaderStageCompute].shaderCreate.apply.workaroundStorageImageFormats = true; + } + if (appProfile == AppProfile::ELEX2) { i = m_appProfile.entryCount++; @@ -1250,7 +1272,7 @@ void ShaderOptimizer::PrintProfileEntryMatch( { pProfile = "Application"; } -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE else if (&profile == &m_runtimeProfile) { pProfile = "Runtime"; @@ -1310,7 +1332,7 @@ void ShaderOptimizer::PrintProfileEntryMatch( } #endif -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE // ===================================================================================================================== void ShaderOptimizer::RuntimeProfileParseError() { diff --git a/icd/api/appopt/shader_profiles/llpc/generic/Talos/profile.json b/icd/api/appopt/shader_profiles/llpc/generic/Talos/profile.json index aeefe6c8..4f10b94c 100644 --- a/icd/api/appopt/shader_profiles/llpc/generic/Talos/profile.json +++ b/icd/api/appopt/shader_profiles/llpc/generic/Talos/profile.json @@ -77,7 +77,8 @@ }, "action": { "ps": { - "useSiScheduler": true + "useSiScheduler": true, + "vgprLimit": 48 } } }, diff --git a/icd/api/appopt/shader_profiles/llpc/gfxIp11_0/Navi33/RainbowSixExtraction/profile.json b/icd/api/appopt/shader_profiles/llpc/gfxIp11_0/Navi33/RainbowSixExtraction/profile.json new file mode 100644 index 00000000..8e4586c2 --- /dev/null +++ b/icd/api/appopt/shader_profiles/llpc/gfxIp11_0/Navi33/RainbowSixExtraction/profile.json @@ -0,0 +1,54 @@ +{ + "entries": [ + { + "pattern": { + "ps": { + "codeHash": "0xdf44ae88f263605d 6d21f3936125b78b" + } + }, + "action": { + "ps": { + "disableLoopUnrolls": true, + "waveSize": 64 + } + } + }, + { + "pattern": { + "ps": { + "codeHash": "0x4dab409a1cee9aee 2b8c1d18f83bc11d" + } + }, + "action": { + "ps": { + "waveSize": 32 + } + } + }, + { + "pattern": { + "ps": { + "codeHash": "0x5df94d2774fadfee 941f5d1b215994fc" + } + }, + "action": { + "ps": { + "allowReZ": 1, + "waveSize": 64 + } + } + }, + { + "pattern": { + "ps": { + "codeHash": "0x295d1b6cd2aff9c4 9a96d6ffac16b5e9" + } + }, + "action": { + "ps": { + "aggressiveInvariantLoads": "EnableOptimization" + } + } + } + ] +} \ No newline at end of file diff --git a/icd/api/appopt/split_raytracing_layer.cpp b/icd/api/appopt/split_raytracing_layer.cpp index 0578124a..d3355922 100644 --- a/icd/api/appopt/split_raytracing_layer.cpp +++ b/icd/api/appopt/split_raytracing_layer.cpp @@ -50,63 +50,92 @@ void SplitRaytracingLayer::TraceRaysDispatchPerDevice( const RuntimeSettings& settings = pCmdBuffer->VkDevice()->GetRuntimeSettings(); const RayTracingPipeline* pPipeline = pCmdBuffer->RenderState()->pRayTracingPipeline; + const Pal::DispatchDims traceSize = + { + .x = width, + .y = height, + .z = depth + }; + const uint32_t splitX = settings.rtDispatchSplitX; const uint32_t splitY = settings.rtDispatchSplitY; const uint32_t splitZ = settings.rtDispatchSplitZ; - const uint32_t blockW = (width + splitX - 1) / splitX; - const uint32_t blockH = (height + splitY - 1) / splitY; - const uint32_t blockD = (depth + splitZ - 1) / splitZ; - - uint32_t dispatchSizeX = 0; - uint32_t dispatchSizeY = 0; - uint32_t dispatchSizeZ = 0; + const Pal::DispatchDims blockSize = + { + .x = (traceSize.x + splitX - 1) / splitX, + .y = (traceSize.y + splitY - 1) / splitY, + .z = (traceSize.z + splitZ - 1) / splitZ + }; - pPipeline->GetDispatchSize(&dispatchSizeX, &dispatchSizeY, &dispatchSizeZ, blockW, blockH, blockD); + const Pal::DispatchDims blockDispatchSize = pPipeline->GetDispatchSize(blockSize); - for (uint32_t z = 0; z < splitZ; z++) - { - uint32_t zOffset = z * blockD; - for (uint32_t x = 0; x < splitX; x++) + // Lambda function used to help dispatch. + auto dispatch = [pCmdBuffer, deviceIdx](Pal::DispatchDims offset, Pal::DispatchDims size) + { + pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdDispatchOffset( + offset, + size, + size); + + // To avoid TDR, the large dispatch is split into mulitple smaller sub-dispatches. However, + // when a MCBP event arrives, PFP may have already processed all dispatch commands, so mulitple + // smaller sub-dispatches cannot be interrupted by MCBP in this case. + // The Barrier below is used to stall the PFP and allow MCBP to happen between dispatches. + Pal::BarrierTransition transition = {}; + transition.srcCacheMask = Pal::CoherShaderRead; + transition.dstCacheMask = Pal::CoherShaderRead; + const Pal::HwPipePoint postCs = Pal::HwPipePostCs; + Pal::BarrierInfo barrierInfo = {}; + barrierInfo.pipePointWaitCount = 1; + barrierInfo.pPipePoints = &postCs; + barrierInfo.waitPoint = Pal::HwPipeTop; + pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdBarrier(barrierInfo); + }; + + // Lambda function used to help splitting. + auto split = [](uint32_t size, uint32_t incSize, auto&& fun) { - uint32_t xOffset = x * blockW; - for (uint32_t y = 0; y < splitY; y++) + uint32_t i = 0; + for (; i <= size - incSize; i += incSize) { - uint32_t yOffset = y * blockH; - - uint32_t dispatchOffsetX = 0; - uint32_t dispatchOffsetY = 0; - uint32_t dispatchOffsetZ = 0; - - pPipeline->GetDispatchSize(&dispatchOffsetX, - &dispatchOffsetY, - &dispatchOffsetZ, - xOffset, - yOffset, - zOffset); - - pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdDispatchOffset( - { dispatchOffsetX, dispatchOffsetY, dispatchOffsetZ }, - { dispatchSizeX, dispatchSizeY, dispatchSizeZ }, - { dispatchSizeX, dispatchSizeY, dispatchSizeZ }); - - // To avoid TDR, the large dispatch is split into mulitple smaller sub-dispatches. However, - // when a MCBP event arrives, PFP may have already processed all dispatch commands, so mulitple - // smaller sub-dispatches cannot be interrupted by MCBP in this case. - // The Barrier below is used to stall the PFP and allow MCBP to happen between dispatches. - Pal::BarrierTransition transition = {}; - transition.srcCacheMask = Pal::CoherShaderRead; - transition.dstCacheMask = Pal::CoherShaderRead; - const Pal::HwPipePoint postCs = Pal::HwPipePostCs; - Pal::BarrierInfo barrierInfo = {}; - barrierInfo.pipePointWaitCount = 1; - barrierInfo.pPipePoints = &postCs; - barrierInfo.waitPoint = Pal::HwPipeTop; - pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdBarrier(barrierInfo); - + fun(i, incSize); + } + if (i < size) + { + fun(i, size - i); } + }; + + // Split Z axis. + split(traceSize.z, blockDispatchSize.z, + [split, traceSize, blockDispatchSize, &dispatch](uint32_t offsetZ, uint32_t sizeZ) + { + // Split Y axis. + split(traceSize.y, blockDispatchSize.y, + [split, traceSize, blockDispatchSize, &dispatch, offsetZ, sizeZ](uint32_t offsetY, uint32_t sizeY) + { + //Split X axis. + split(traceSize.x, blockDispatchSize.x, + [&dispatch, offsetZ, sizeZ, offsetY, sizeY](uint32_t offsetX, uint32_t sizeX) + { + Pal::DispatchDims offset = + { + .x = offsetX, + .y = offsetY, + .z = offsetZ + }; + Pal::DispatchDims size = + { + .x = sizeX, + .y = sizeY, + .z = sizeZ + }; + dispatch(offset, size); + }); + }); } - } + ); } // ===================================================================================================================== diff --git a/icd/api/compiler_solution.cpp b/icd/api/compiler_solution.cpp index eb43bcdd..84f8929c 100644 --- a/icd/api/compiler_solution.cpp +++ b/icd/api/compiler_solution.cpp @@ -369,4 +369,29 @@ uint32_t CompilerSolution::GetRayTracingVgprLimit( } #endif +bool CompilerSolution::ClonePipelineBinary( + const Vkgc::BinaryData* pProvidedBinary, + Vkgc::BinaryData* pNewBinary) +{ + bool success = false; + + // Create memory, to be freed later, just as StoreShaderBinaryToCache does. The VkInstance allocation callbacks + // are used here for consistency with the PipelineBinaryCache that backs the PipelineCache. + void* pBinaryData = m_pPhysicalDevice->Manager()->VkInstance()->AllocMem( + pProvidedBinary->codeSize, + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (pBinaryData != nullptr) + { + memcpy(pBinaryData, pProvidedBinary->pCode, pProvidedBinary->codeSize); + + pNewBinary->pCode = pBinaryData; + pNewBinary->codeSize = pProvidedBinary->codeSize; + success = true; + } + + return success; +} + } diff --git a/icd/api/compiler_solution_llpc.cpp b/icd/api/compiler_solution_llpc.cpp index c9928c39..a5707b60 100644 --- a/icd/api/compiler_solution_llpc.cpp +++ b/icd/api/compiler_solution_llpc.cpp @@ -131,8 +131,7 @@ void CompilerSolutionLlpc::Destroy() // Builds shader module from SPIR-V binary code. VkResult CompilerSolutionLlpc::BuildShaderModule( const Device* pDevice, - VkShaderModuleCreateFlags flags, - VkShaderModuleCreateFlags internalShaderFlags, + ShaderModuleFlags flags, const Vkgc::BinaryData& shaderBinary, ShaderModuleHandle* pShaderModule, const PipelineOptimizerKey& profileKey) @@ -155,7 +154,7 @@ VkResult CompilerSolutionLlpc::BuildShaderModule( ); #if VKI_RAY_TRACING - if ((internalShaderFlags & VK_INTERNAL_SHADER_FLAGS_RAY_TRACING_INTERNAL_SHADER_BIT) != 0) + if ((flags & ShaderModuleInternalRayTracingShader) != 0) { moduleInfo.options.pipelineOptions.internalRtShaders = true; } @@ -442,6 +441,8 @@ VkResult CompilerSolutionLlpc::CreateGraphicsShaderBinary( PipelineCache* pPipelineCache, GraphicsLibraryType gplType, GraphicsPipelineBinaryCreateInfo* pCreateInfo, + const Vkgc::BinaryData* pProvidedBinary, + const Util::MetroHash::Hash* pProvidedBinaryHash, void* pPipelineDumpHandle, GplModuleState* pModuleState) { @@ -458,6 +459,8 @@ VkResult CompilerSolutionLlpc::CreateGraphicsShaderBinary( int64_t startTime = Util::GetPerfCpuTime(); bool binaryProvided = false; + binaryProvided = (pProvidedBinary != nullptr) && (pProvidedBinary->codeSize > 0); + if (binaryProvided == false) { Util::MetroHash128 hasher; @@ -466,6 +469,10 @@ VkResult CompilerSolutionLlpc::CreateGraphicsShaderBinary( hasher.Update(m_pPhysicalDevice->GetSettingsLoader()->GetSettingsHash()); hasher.Finalize(cacheId.bytes); } + else + { + cacheId = *pProvidedBinaryHash; + } Vkgc::BinaryData finalBinary = {}; if ((pDevice->GetRuntimeSettings().shaderReplaceMode == ShaderReplacePipelineBinaryHash) || @@ -502,6 +509,18 @@ VkResult CompilerSolutionLlpc::CreateGraphicsShaderBinary( { LoadShaderBinaryFromCache(pPipelineCache, &cacheId, &shaderLibraryBinary, &hitCache, &hitAppCache); } + else + { + if (ClonePipelineBinary(pProvidedBinary, &shaderLibraryBinary)) + { + hitCache = true; + hitAppCache = true; + } + else + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + } if (pPipelineCache != nullptr) { @@ -981,7 +1000,6 @@ VkResult CompilerSolutionLlpc::CreateLlpcCompiler( const uint32_t MaxLlpcOptions = 32; Llpc::ICompiler* pCompiler = nullptr; const RuntimeSettings& settings = m_pPhysicalDevice->GetRuntimeSettings(); - AppProfile appProfile = m_pPhysicalDevice->GetAppProfile(); // Get the executable name and path char executableNameBuffer[PATH_MAX]; char* pExecutablePtr; @@ -1046,26 +1064,9 @@ VkResult CompilerSolutionLlpc::CreateLlpcCompiler( // NOTE: For testing consistency, these options should be kept the same as those of // "amdllpc" (Init()). - // WARNING: Do not conditionally add options based on GFXIP version as these will - // break support for systems with a mixture of ASICs. GFXIP dependent options - // should be subtarget features or handled in LLVM backend. - - if ((appProfile == AppProfile::SeriousSamFusion) || - (appProfile == AppProfile::Talos)) - { - llpcOptions[numOptions++] = "-unroll-partial-threshold=700"; - } - - ShaderCacheMode shaderCacheMode = settings.shaderCacheMode; - if ((appProfile == AppProfile::MadMax) || - (appProfile == AppProfile::SedpEngine) || - (appProfile == AppProfile::ThronesOfBritannia)) - { - llpcOptions[numOptions++] = "-enable-si-scheduler"; - // si-scheduler interacts badly with SIFormMemoryClauses pass, so - // disable the effect of that pass by limiting clause length to 1. - llpcOptions[numOptions++] = "-amdgpu-max-memory-clause=1"; - } + // WARNING: Do not conditionally add options! + // GFXIP or AppProfile dependent options should be set via pipeline options structure in LLPC + // or subtarget features handled in LLVM backend. #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 66 optionLength = Util::Snprintf(pOptionBuffer, bufSize, "-executable-name=%s", pExecutablePtr); @@ -1081,6 +1082,7 @@ VkResult CompilerSolutionLlpc::CreateLlpcCompiler( pOptionBuffer += optionLength; bufSize -= optionLength; + ShaderCacheMode shaderCacheMode = settings.shaderCacheMode; optionLength = Util::Snprintf(pOptionBuffer, bufSize, "-shader-cache-mode=%d", shaderCacheMode); ++optionLength; llpcOptions[numOptions++] = pOptionBuffer; diff --git a/icd/api/debug_printf.cpp b/icd/api/debug_printf.cpp index 866ce8fa..2976d8cb 100644 --- a/icd/api/debug_printf.cpp +++ b/icd/api/debug_printf.cpp @@ -114,8 +114,8 @@ void DebugPrintf::BindPipeline( { m_pPipeline = pPipeline; - const size_t bufferSrdSize = - pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().gfxipProperties.srdSizes.bufferView; + const size_t bufferSrdSize = pDevice->VkPhysicalDevice(DefaultDeviceIndex)-> + PalProperties().gfxipProperties.srdSizes.untypedBufferView; void* pTable = pCmdBuffer->CmdAllocateEmbeddedData( bufferSrdSize, bufferSrdSize, &tableVa); @@ -142,6 +142,7 @@ void DebugPrintf::BindPipeline( pSubSections->Reserve(1); ParseFormatStringsToSubSection(it.Get()->value.printStr, pSubSections); } + constexpr VkSemaphoreTypeCreateInfo semaphoreTypeInfo { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, @@ -174,7 +175,7 @@ void DebugPrintf::Init( const Device* pDevice) { const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); - if ((settings.enableDebugPrintf) && (m_state == Uninitialized)) + if ((pDevice->GetEnabledFeatures().enableDebugPrintf) && (m_state == Uninitialized)) { m_state = Enabled; m_pPipeline = nullptr; @@ -280,6 +281,7 @@ uint64_t DebugPrintf::ProcessDebugPrintfBuffer( outputDecodedSpecifiers.Reserve(5); // Set pPtr point to the head of the system memory pPtr = pPrintBuffer; + while ((bufferSize - decodeOffset) > 1) { // Decode entry @@ -329,11 +331,16 @@ uint64_t DebugPrintf::ProcessDebugPrintfBuffer( varIndex, &outputDecodedSpecifiers[varIndex]); } + OutputBufferString(formatString, *pSubSections, &outputBufferStr); + decodeOffset += outputsInDwords; } + WriteToFile(pFile, outputBufferStr); + pDevice->VkInstance()->FreeMem(pPrintBuffer); + m_frame++; } } @@ -739,6 +746,7 @@ void DebugPrintf::DecodeFormatStringsFromElf( } } } + bool found = true; PrintfElfString* pElfString = nullptr; result = pFormatStrings->FindAllocate(hashValue, &found, &pElfString); @@ -770,3 +778,4 @@ void DebugPrintf::DecodeFormatStringsFromElf( } } } + diff --git a/icd/api/graphics_pipeline_common.cpp b/icd/api/graphics_pipeline_common.cpp index cfe52317..cf510311 100644 --- a/icd/api/graphics_pipeline_common.cpp +++ b/icd/api/graphics_pipeline_common.cpp @@ -379,18 +379,6 @@ static VkFormat GetDepthFormat( return format; } -// ===================================================================================================================== -static uint32_t GetColorAttachmentCount( - const RenderPass* pRenderPass, - const uint32_t subpassIndex, - const VkPipelineRenderingCreateInfo* pPipelineRenderingCreateInfo -) -{ - return (pRenderPass != nullptr) ? pRenderPass->GetSubpassColorReferenceCount(subpassIndex) : - (pPipelineRenderingCreateInfo != nullptr) ? pPipelineRenderingCreateInfo->colorAttachmentCount : - 0u; -} - // ===================================================================================================================== static VkShaderStageFlagBits GetLibraryActiveShaderStages( const VkGraphicsPipelineLibraryFlagsEXT libFlags) @@ -1518,7 +1506,7 @@ static void BuildMultisampleState( pInfo->immedInfo.msaaCreateInfo.shaderExportMaskSamples = subpassCoverageSampleCount; pInfo->immedInfo.msaaCreateInfo.sampleMask = (pMs->pSampleMask != nullptr) ? pMs->pSampleMask[0] - : 0xffffffff; + : 0xffff; pInfo->immedInfo.msaaCreateInfo.sampleClusters = subpassCoverageSampleCount; pInfo->immedInfo.msaaCreateInfo.alphaToCoverageSamples = subpassCoverageSampleCount; pInfo->immedInfo.msaaCreateInfo.occlusionQuerySamples = subpassDepthSampleCount; @@ -1705,7 +1693,8 @@ static void BuildColorBlendState( pInfo->staticStateMask |= 1ULL << static_cast(DynamicStatesInternal::LogicOpEnable); } - if (GetColorAttachmentCount(pRenderPass, subpass, pRendering) != 0) + const uint32 numColorTargets = GraphicsPipelineCommon::GetColorAttachmentCount(pRenderPass, subpass, pRendering); + if (numColorTargets != 0) { if (pCb != nullptr) { @@ -1713,11 +1702,18 @@ static void BuildColorBlendState( pInfo->immedInfo.logicOpEnable = pCb->logicOpEnable; } - uint32_t numColorTargets = 0; + bool useBlendAttachments = false; const VkPipelineColorWriteCreateInfoEXT* pColorWriteCreateInfo = nullptr; if (pCb != nullptr) { - numColorTargets = Min(pCb->attachmentCount, Pal::MaxColorTargets); + // If the pipeline is created with these 3 states as dynamic, the attachmentCount from the + // VkPipelineColorBlendStateCreateInfo is ignored. + if ((IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::ColorBlendEnable) == false) || + (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::ColorBlendEquation) == false) || + (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::ColorWriteMask) == false)) + { + useBlendAttachments = true; + } const void* pNext = static_cast(pCb->pNext); @@ -1746,11 +1742,6 @@ static void BuildColorBlendState( } } - if (pRendering != nullptr) - { - numColorTargets = Min(pRendering->colorAttachmentCount, Pal::MaxColorTargets); - } - pInfo->immedInfo.colorWriteEnable = 0; pInfo->immedInfo.colorWriteMask = 0; @@ -1788,8 +1779,13 @@ static void BuildColorBlendState( // disable shader writes through that target. if (pCbDst->swizzledFormat.format != Pal::ChNumFormat::Undefined) { - const VkPipelineColorBlendAttachmentState* pSrc = - (pCb != nullptr) ? &pCb->pAttachments[i] : nullptr; + const VkPipelineColorBlendAttachmentState* pSrc = nullptr; + + if (useBlendAttachments && (i < pCb->attachmentCount)) + { + pSrc = &pCb->pAttachments[i]; + } + VkColorComponentFlags colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | @@ -1945,14 +1941,15 @@ static void BuildPreRasterizationShaderState( #endif GraphicsPipelineObjectCreateInfo* pInfo) { - if (pIn->pTessellationState != nullptr) + // Set patch control points only if tessellation shader is enabled. + pInfo->immedInfo.inputAssemblyState.patchControlPoints = 0; + if (pInfo->activeStages & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) { - pInfo->immedInfo.inputAssemblyState.patchControlPoints = static_cast( - pIn->pTessellationState->patchControlPoints); - } - else - { - pInfo->immedInfo.inputAssemblyState.patchControlPoints = 0; + if (pIn->pTessellationState != nullptr) + { + pInfo->immedInfo.inputAssemblyState.patchControlPoints = static_cast( + pIn->pTessellationState->patchControlPoints); + } } // Build states via VkPipelineRasterizationStateCreateInfo @@ -3240,4 +3237,15 @@ void GraphicsPipelineCommon::HandleExtensionStructs( } } +// ===================================================================================================================== +uint32_t GraphicsPipelineCommon::GetColorAttachmentCount( + const RenderPass* pRenderPass, + const uint32_t subpassIndex, + const VkPipelineRenderingCreateInfo* pPipelineRenderingCreateInfo) +{ + return (pRenderPass != nullptr) ? pRenderPass->GetSubpassColorReferenceCount(subpassIndex) : + (pPipelineRenderingCreateInfo != nullptr) ? pPipelineRenderingCreateInfo->colorAttachmentCount : + 0u; +} + } diff --git a/icd/api/icd_main.cpp b/icd/api/icd_main.cpp index 5f42745a..6b845c21 100644 --- a/icd/api/icd_main.cpp +++ b/icd/api/icd_main.cpp @@ -29,6 +29,8 @@ ************************************************************************************************************************ */ +#include + #if defined(__unix__) & (__GNUC__ == 5) #include @@ -73,3 +75,10 @@ namespace std { } #endif + +extern "C" unsigned int GetSettingsBlobsAll( + unsigned char* pBuffer, + size_t bufferSize) +{ + return DevDriver::SettingsBlobNode::GetAllSettingsBlobs(pBuffer, bufferSize); +} diff --git a/icd/api/include/app_profile.h b/icd/api/include/app_profile.h index 86d1af44..3e04a17a 100644 --- a/icd/api/include/app_profile.h +++ b/icd/api/include/app_profile.h @@ -148,6 +148,7 @@ enum class AppProfile : uint32_t DXVK, // DXVK WindowKill, // Windowkill by torcado Archean, // Archean by batcholi + Houdini, // Houdini }; struct ProfileSettings diff --git a/icd/api/include/app_resource_optimizer.h b/icd/api/include/app_resource_optimizer.h index c98fa0aa..314f7b06 100644 --- a/icd/api/include/app_resource_optimizer.h +++ b/icd/api/include/app_resource_optimizer.h @@ -185,7 +185,7 @@ class ResourceOptimizer void BuildTuningProfile(); void BuildAppProfile(); -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE void BuildRuntimeProfile(); #endif @@ -195,7 +195,7 @@ class ResourceOptimizer ResourceProfile m_tuningProfile; ResourceProfile m_appProfile; -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE ResourceProfile m_runtimeProfile; #endif diff --git a/icd/api/include/app_shader_optimizer.h b/icd/api/include/app_shader_optimizer.h index 4ac850b5..cd83e834 100644 --- a/icd/api/include/app_shader_optimizer.h +++ b/icd/api/include/app_shader_optimizer.h @@ -205,7 +205,7 @@ class ShaderOptimizer void BuildAppProfileLlpc(); -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE void BuildRuntimeProfile(); void RuntimeProfileParseError(); #endif @@ -225,7 +225,7 @@ class ShaderOptimizer ShaderProfile m_appShaderProfile; -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE PipelineProfile m_runtimeProfile; #endif diff --git a/icd/api/include/compiler_solution.h b/icd/api/include/compiler_solution.h index 5493be7c..04429172 100644 --- a/icd/api/include/compiler_solution.h +++ b/icd/api/include/compiler_solution.h @@ -57,6 +57,8 @@ struct GraphicsPipelineLibraryInfo; struct DeferredWorkload; #endif +typedef uint32_t ShaderModuleFlags; + enum FreeCompilerBinary : uint32_t { FreeWithCompiler = 0, @@ -279,8 +281,7 @@ class CompilerSolution virtual VkResult BuildShaderModule( const Device* pDevice, - VkShaderModuleCreateFlags flags, - VkShaderModuleCreateFlags internalShaderFlags, + ShaderModuleFlags flags, const Vkgc::BinaryData& shaderBinary, ShaderModuleHandle* pShaderModule, const PipelineOptimizerKey& profileKey) = 0; @@ -304,6 +305,8 @@ class CompilerSolution PipelineCache* pPipelineCache, GraphicsLibraryType gplType, GraphicsPipelineBinaryCreateInfo* pCreateInfo, + const Vkgc::BinaryData* pProvidedBinary, + const Util::MetroHash::Hash* pProvidedBinaryHash, void* pPipelineDumpHandle, GplModuleState* pModuleState) = 0; @@ -389,6 +392,10 @@ class CompilerSolution bool hitAppCache, Vkgc::BinaryData* pCacheBinary); + bool ClonePipelineBinary( + const Vkgc::BinaryData* pProvidedBinary, + Vkgc::BinaryData* pNewBinary); + PhysicalDevice* m_pPhysicalDevice; // Vulkan physical device object Vkgc::GfxIpVersion m_gfxIp; // Graphics IP version info, used by Vkgc Pal::GfxIpLevel m_gfxIpLevel; // Graphics IP level diff --git a/icd/api/include/compiler_solution_llpc.h b/icd/api/include/compiler_solution_llpc.h index 9e49dae7..63028cf0 100644 --- a/icd/api/include/compiler_solution_llpc.h +++ b/icd/api/include/compiler_solution_llpc.h @@ -86,8 +86,7 @@ class CompilerSolutionLlpc final : public CompilerSolution virtual VkResult BuildShaderModule( const Device* pDevice, - VkShaderModuleCreateFlags flags, - VkShaderModuleCreateFlags internalShaderFlags, + ShaderModuleFlags flags, const Vkgc::BinaryData& shaderBinary, ShaderModuleHandle* pShaderModule, const PipelineOptimizerKey& profileKey) override; @@ -111,6 +110,8 @@ class CompilerSolutionLlpc final : public CompilerSolution PipelineCache* pPipelineCache, GraphicsLibraryType gplType, GraphicsPipelineBinaryCreateInfo* pCreateInfo, + const Vkgc::BinaryData* pProvidedBinary, + const Util::MetroHash::Hash* pProvidedBinaryHash, void* pPipelineDumpHandle, GplModuleState* pModuleState) override; diff --git a/icd/api/include/debug_printf.h b/icd/api/include/debug_printf.h index d9f1d918..d010f106 100644 --- a/icd/api/include/debug_printf.h +++ b/icd/api/include/debug_printf.h @@ -51,8 +51,8 @@ typedef Util::Vector PrintfBit; // Printf Elf string and bits position struct PrintfElfString { - PrintfString printStr; // Printf format string - PrintfBit bit64s; // Bit positions of output variables + PrintfString printStr; // Printf format string + PrintfBit bit64s; // Bit positions of output variables PrintfElfString() : printStr(nullptr), bit64s(nullptr) { } diff --git a/icd/api/include/graphics_pipeline_common.h b/icd/api/include/graphics_pipeline_common.h index bc7cab99..be1ef9a8 100644 --- a/icd/api/include/graphics_pipeline_common.h +++ b/icd/api/include/graphics_pipeline_common.h @@ -284,6 +284,12 @@ class GraphicsPipelineCommon : public Pipeline const VkGraphicsPipelineCreateInfo* pCreateInfo, GraphicsPipelineExtStructs* pExtStructs); + // Gets the color attachment count from either the renderpass or the pipeline rendering + static uint32_t GetColorAttachmentCount( + const RenderPass* pRenderPass, + const uint32_t subpassIndex, + const VkPipelineRenderingCreateInfo* pPipelineRenderingCreateInfo); + protected: // Convert API information into internal create info used to create internal pipeline object static void BuildPipelineObjectCreateInfo( diff --git a/icd/api/include/khronos/devext/vk_amd_gpa_interface.h b/icd/api/include/khronos/devext/vk_amd_gpa_interface.h index fd5a24b8..5aa89396 100644 --- a/icd/api/include/khronos/devext/vk_amd_gpa_interface.h +++ b/icd/api/include/khronos/devext/vk_amd_gpa_interface.h @@ -109,10 +109,8 @@ typedef enum VkGpaPerfBlockAMD VK_GPA_PERF_BLOCK_GE_DIST_AMD = 46, VK_GPA_PERF_BLOCK_GE_SE_AMD = 47, VK_GPA_PERF_BLOCK_DF_MALL_AMD = 48, -#if VKI_BUILD_GFX11 VK_GPA_PERF_BLOCK_SQ_WGP_AMD = 49, VK_GPA_PERF_BLOCK_PC_AMD = 50, -#endif VK_GPA_PERF_BLOCK_MAX_ENUM_AMD = 0x7FFFFFFF } VkGpaPerfBlockAMD; diff --git a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h index 49916033..6f1c17f2 100644 --- a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h +++ b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h @@ -69,7 +69,7 @@ extern "C" { #define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0 // Version of this file -#define VK_HEADER_VERSION 293 +#define VK_HEADER_VERSION 295 // Complete version of this file #define VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION(0, 1, 3, VK_HEADER_VERSION) @@ -189,6 +189,8 @@ typedef enum VkResult { VK_ERROR_INVALID_VIDEO_STD_PARAMETERS_KHR = -1000299000, VK_ERROR_COMPRESSION_EXHAUSTED_EXT = -1000338000, VK_INCOMPATIBLE_SHADER_BINARY_EXT = 1000482000, + VK_PIPELINE_BINARY_MISSING_KHR = 1000483000, + VK_ERROR_NOT_ENOUGH_SPACE_KHR = -1000483000, VK_ERROR_OUT_OF_POOL_MEMORY_KHR = VK_ERROR_OUT_OF_POOL_MEMORY, VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR = VK_ERROR_INVALID_EXTERNAL_HANDLE, VK_ERROR_FRAGMENTATION_EXT = VK_ERROR_FRAGMENTATION, @@ -694,7 +696,6 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD = 1000189000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT = 1000190000, VK_STRUCTURE_TYPE_PRESENT_FRAME_TOKEN_GGP = 1000191000, - VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV = 1000201000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV = 1000202000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV = 1000202001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_FOOTPRINT_FEATURES_NV = 1000204000, @@ -1043,6 +1044,16 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT = 1000482000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_PROPERTIES_EXT = 1000482001, VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT = 1000482002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_BINARY_FEATURES_KHR = 1000483000, + VK_STRUCTURE_TYPE_PIPELINE_BINARY_CREATE_INFO_KHR = 1000483001, + VK_STRUCTURE_TYPE_PIPELINE_BINARY_INFO_KHR = 1000483002, + VK_STRUCTURE_TYPE_PIPELINE_BINARY_KEY_KHR = 1000483003, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_BINARY_PROPERTIES_KHR = 1000483004, + VK_STRUCTURE_TYPE_RELEASE_CAPTURED_PIPELINE_DATA_INFO_KHR = 1000483005, + VK_STRUCTURE_TYPE_PIPELINE_BINARY_DATA_INFO_KHR = 1000483006, + VK_STRUCTURE_TYPE_PIPELINE_CREATE_INFO_KHR = 1000483007, + VK_STRUCTURE_TYPE_DEVICE_PIPELINE_BINARY_INTERNAL_CACHE_CONTROL_KHR = 1000483008, + VK_STRUCTURE_TYPE_PIPELINE_BINARY_HANDLES_INFO_KHR = 1000483009, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TILE_PROPERTIES_FEATURES_QCOM = 1000484000, VK_STRUCTURE_TYPE_TILE_PROPERTIES_QCOM = 1000484001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_AMIGO_PROFILING_FEATURES_SEC = 1000485000, @@ -1075,6 +1086,8 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR = 1000506002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PER_VIEW_RENDER_AREAS_FEATURES_QCOM = 1000510000, VK_STRUCTURE_TYPE_MULTIVIEW_PER_VIEW_RENDER_AREAS_RENDER_PASS_BEGIN_INFO_QCOM = 1000510001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_KHR = 1000201000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_PROPERTIES_KHR = 1000511000, VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_CAPABILITIES_KHR = 1000512000, VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PICTURE_INFO_KHR = 1000512001, VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_PROFILE_INFO_KHR = 1000512003, @@ -1243,6 +1256,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES, VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_KHR, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_NV = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_KHR, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES, @@ -1421,6 +1435,7 @@ typedef enum VkObjectType { VK_OBJECT_TYPE_MICROMAP_EXT = 1000396000, VK_OBJECT_TYPE_OPTICAL_FLOW_SESSION_NV = 1000464000, VK_OBJECT_TYPE_SHADER_EXT = 1000482000, + VK_OBJECT_TYPE_PIPELINE_BINARY_KHR = 1000483000, VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR = VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE, VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_KHR = VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION, VK_OBJECT_TYPE_PRIVATE_DATA_SLOT_EXT = VK_OBJECT_TYPE_PRIVATE_DATA_SLOT, @@ -7606,6 +7621,7 @@ typedef enum VkColorSpaceKHR { VK_COLOR_SPACE_BT709_NONLINEAR_EXT = 1000104006, VK_COLOR_SPACE_BT2020_LINEAR_EXT = 1000104007, VK_COLOR_SPACE_HDR10_ST2084_EXT = 1000104008, + // VK_COLOR_SPACE_DOLBYVISION_EXT is deprecated, but no reason was given in the API XML VK_COLOR_SPACE_DOLBYVISION_EXT = 1000104009, VK_COLOR_SPACE_HDR10_HLG_EXT = 1000104010, VK_COLOR_SPACE_ADOBERGB_LINEAR_EXT = 1000104011, @@ -11184,6 +11200,7 @@ static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_NO_PROTECTED_ACCE static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_PROTECTED_ACCESS_ONLY_BIT_EXT = 0x40000000ULL; static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_RAY_TRACING_DISPLACEMENT_MICROMAP_BIT_NV = 0x10000000ULL; static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_DESCRIPTOR_BUFFER_BIT_EXT = 0x20000000ULL; +static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR = 0x80000000ULL; typedef VkFlags64 VkBufferUsageFlags2KHR; @@ -11318,6 +11335,128 @@ typedef struct VkPhysicalDeviceRayTracingPositionFetchFeaturesKHR { +// VK_KHR_pipeline_binary is a preprocessor guard. Do not pass it to API calls. +#define VK_KHR_pipeline_binary 1 +VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkPipelineBinaryKHR) +#define VK_MAX_PIPELINE_BINARY_KEY_SIZE_KHR 32U +#define VK_KHR_PIPELINE_BINARY_SPEC_VERSION 1 +#define VK_KHR_PIPELINE_BINARY_EXTENSION_NAME "VK_KHR_pipeline_binary" +typedef struct VkPhysicalDevicePipelineBinaryFeaturesKHR { + VkStructureType sType; + void* pNext; + VkBool32 pipelineBinaries; +} VkPhysicalDevicePipelineBinaryFeaturesKHR; + +typedef struct VkPhysicalDevicePipelineBinaryPropertiesKHR { + VkStructureType sType; + void* pNext; + VkBool32 pipelineBinaryInternalCache; + VkBool32 pipelineBinaryInternalCacheControl; + VkBool32 pipelineBinaryPrefersInternalCache; + VkBool32 pipelineBinaryPrecompiledInternalCache; + VkBool32 pipelineBinaryCompressedData; +} VkPhysicalDevicePipelineBinaryPropertiesKHR; + +typedef struct VkDevicePipelineBinaryInternalCacheControlKHR { + VkStructureType sType; + const void* pNext; + VkBool32 disableInternalCache; +} VkDevicePipelineBinaryInternalCacheControlKHR; + +typedef struct VkPipelineBinaryKeyKHR { + VkStructureType sType; + void* pNext; + uint32_t keySize; + uint8_t key[VK_MAX_PIPELINE_BINARY_KEY_SIZE_KHR]; +} VkPipelineBinaryKeyKHR; + +typedef struct VkPipelineBinaryDataKHR { + size_t dataSize; + void* pData; +} VkPipelineBinaryDataKHR; + +typedef struct VkPipelineBinaryKeysAndDataKHR { + uint32_t binaryCount; + const VkPipelineBinaryKeyKHR* pPipelineBinaryKeys; + const VkPipelineBinaryDataKHR* pPipelineBinaryData; +} VkPipelineBinaryKeysAndDataKHR; + +typedef struct VkPipelineCreateInfoKHR { + VkStructureType sType; + void* pNext; +} VkPipelineCreateInfoKHR; + +typedef struct VkPipelineBinaryCreateInfoKHR { + VkStructureType sType; + const void* pNext; + const VkPipelineBinaryKeysAndDataKHR* pKeysAndDataInfo; + VkPipeline pipeline; + const VkPipelineCreateInfoKHR* pPipelineCreateInfo; +} VkPipelineBinaryCreateInfoKHR; + +typedef struct VkPipelineBinaryInfoKHR { + VkStructureType sType; + const void* pNext; + uint32_t binaryCount; + const VkPipelineBinaryKHR* pPipelineBinaries; +} VkPipelineBinaryInfoKHR; + +typedef struct VkReleaseCapturedPipelineDataInfoKHR { + VkStructureType sType; + void* pNext; + VkPipeline pipeline; +} VkReleaseCapturedPipelineDataInfoKHR; + +typedef struct VkPipelineBinaryDataInfoKHR { + VkStructureType sType; + void* pNext; + VkPipelineBinaryKHR pipelineBinary; +} VkPipelineBinaryDataInfoKHR; + +typedef struct VkPipelineBinaryHandlesInfoKHR { + VkStructureType sType; + const void* pNext; + uint32_t pipelineBinaryCount; + VkPipelineBinaryKHR* pPipelineBinaries; +} VkPipelineBinaryHandlesInfoKHR; + +typedef VkResult (VKAPI_PTR *PFN_vkCreatePipelineBinariesKHR)(VkDevice device, const VkPipelineBinaryCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipelineBinaryHandlesInfoKHR* pBinaries); +typedef void (VKAPI_PTR *PFN_vkDestroyPipelineBinaryKHR)(VkDevice device, VkPipelineBinaryKHR pipelineBinary, const VkAllocationCallbacks* pAllocator); +typedef VkResult (VKAPI_PTR *PFN_vkGetPipelineKeyKHR)(VkDevice device, const VkPipelineCreateInfoKHR* pPipelineCreateInfo, VkPipelineBinaryKeyKHR* pPipelineKey); +typedef VkResult (VKAPI_PTR *PFN_vkGetPipelineBinaryDataKHR)(VkDevice device, const VkPipelineBinaryDataInfoKHR* pInfo, VkPipelineBinaryKeyKHR* pPipelineBinaryKey, size_t* pPipelineBinaryDataSize, void* pPipelineBinaryData); +typedef VkResult (VKAPI_PTR *PFN_vkReleaseCapturedPipelineDataKHR)(VkDevice device, const VkReleaseCapturedPipelineDataInfoKHR* pInfo, const VkAllocationCallbacks* pAllocator); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineBinariesKHR( + VkDevice device, + const VkPipelineBinaryCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineBinaryHandlesInfoKHR* pBinaries); + +VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineBinaryKHR( + VkDevice device, + VkPipelineBinaryKHR pipelineBinary, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineKeyKHR( + VkDevice device, + const VkPipelineCreateInfoKHR* pPipelineCreateInfo, + VkPipelineBinaryKeyKHR* pPipelineKey); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineBinaryDataKHR( + VkDevice device, + const VkPipelineBinaryDataInfoKHR* pInfo, + VkPipelineBinaryKeyKHR* pPipelineBinaryKey, + size_t* pPipelineBinaryDataSize, + void* pPipelineBinaryData); + +VKAPI_ATTR VkResult VKAPI_CALL vkReleaseCapturedPipelineDataKHR( + VkDevice device, + const VkReleaseCapturedPipelineDataInfoKHR* pInfo, + const VkAllocationCallbacks* pAllocator); +#endif + + // VK_KHR_cooperative_matrix is a preprocessor guard. Do not pass it to API calls. #define VK_KHR_cooperative_matrix 1 #define VK_KHR_COOPERATIVE_MATRIX_SPEC_VERSION 2 @@ -11397,6 +11536,25 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR #endif +// VK_KHR_compute_shader_derivatives is a preprocessor guard. Do not pass it to API calls. +#define VK_KHR_compute_shader_derivatives 1 +#define VK_KHR_COMPUTE_SHADER_DERIVATIVES_SPEC_VERSION 1 +#define VK_KHR_COMPUTE_SHADER_DERIVATIVES_EXTENSION_NAME "VK_KHR_compute_shader_derivatives" +typedef struct VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR { + VkStructureType sType; + void* pNext; + VkBool32 computeDerivativeGroupQuads; + VkBool32 computeDerivativeGroupLinear; +} VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR; + +typedef struct VkPhysicalDeviceComputeShaderDerivativesPropertiesKHR { + VkStructureType sType; + void* pNext; + VkBool32 meshAndTaskShaderDerivatives; +} VkPhysicalDeviceComputeShaderDerivativesPropertiesKHR; + + + // VK_KHR_video_decode_av1 is a preprocessor guard. Do not pass it to API calls. #define VK_KHR_video_decode_av1 1 #include "vk_video/vulkan_video_codec_av1std.h" @@ -14265,12 +14423,7 @@ typedef VkPipelineCreationFeedback VkPipelineCreationFeedbackEXT; #define VK_NV_compute_shader_derivatives 1 #define VK_NV_COMPUTE_SHADER_DERIVATIVES_SPEC_VERSION 1 #define VK_NV_COMPUTE_SHADER_DERIVATIVES_EXTENSION_NAME "VK_NV_compute_shader_derivatives" -typedef struct VkPhysicalDeviceComputeShaderDerivativesFeaturesNV { - VkStructureType sType; - void* pNext; - VkBool32 computeDerivativeGroupQuads; - VkBool32 computeDerivativeGroupLinear; -} VkPhysicalDeviceComputeShaderDerivativesFeaturesNV; +typedef VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR VkPhysicalDeviceComputeShaderDerivativesFeaturesNV; diff --git a/icd/api/include/pipeline_compiler.h b/icd/api/include/pipeline_compiler.h index dcd3acb1..8e6f6732 100644 --- a/icd/api/include/pipeline_compiler.h +++ b/icd/api/include/pipeline_compiler.h @@ -157,8 +157,7 @@ class PipelineCompiler VkResult BuildShaderModule( const Device* pDevice, - const VkShaderModuleCreateFlags flags, - const VkShaderModuleCreateFlags internalShaderFlags, + const ShaderModuleFlags flags, const Vkgc::BinaryData& shaderBinary, ShaderModuleHandle* pShaderModule); @@ -182,6 +181,8 @@ class PipelineCompiler PipelineCache* pPipelineCache, GraphicsLibraryType gplType, GraphicsPipelineBinaryCreateInfo* pCreateInfo, + const Vkgc::BinaryData* pProvidedBinary, + const Util::MetroHash::Hash* pProvidedBinaryHash, GplModuleState* pModuleState); VkResult CreateColorExportShaderLibrary( @@ -345,34 +346,10 @@ class PipelineCompiler bool needCache, GraphicsPipelineBinaryCreateInfo* pCreateInfo); - void GetComputePipelineCacheId( - uint32_t deviceIdx, - ComputePipelineBinaryCreateInfo* pCreateInfo, - uint64_t pipelineHash, - const Util::MetroHash::Hash& settingsHash, - Util::MetroHash::Hash* pCacheId); - - void GetGraphicsPipelineCacheId( - uint32_t deviceIdx, - GraphicsPipelineBinaryCreateInfo* pCreateInfo, - uint64_t pipelineHash, - const Util::MetroHash::Hash& settingsHash, - Util::MetroHash::Hash* pCacheId); - void GetColorExportShaderCacheId( GraphicsPipelineBinaryCreateInfo* pCreateInfo, Util::MetroHash::Hash* pCacheId); -#if VKI_RAY_TRACING - void GetRayTracingPipelineCacheId( - uint32_t deviceIdx, - uint32_t numDevices, - RayTracingPipelineBinaryCreateInfo* pCreateInfo, - uint64_t pipelineHash, - const Util::MetroHash::Hash& settingsHash, - Util::MetroHash::Hash* pCacheId); -#endif - static void BuildNggState( const Device* pDevice, const VkShaderStageFlagBits activeStages, @@ -520,21 +497,19 @@ class PipelineCompiler #endif VkResult LoadShaderModuleFromCache( - const VkShaderModuleCreateFlags flags, - const VkShaderModuleCreateFlags internalShaderFlags, + const ShaderModuleFlags flags, const uint32_t compilerMask, const Util::MetroHash::Hash& uniqueHash, ShaderModuleHandle* pShaderModule); void StoreShaderModuleToCache( - const VkShaderModuleCreateFlags flags, - const VkShaderModuleCreateFlags internalShaderFlags, + const ShaderModuleFlags flags, const uint32_t compilerMask, const Util::MetroHash::Hash& uniqueHash, ShaderModuleHandle* pShaderModule); Util::MetroHash::Hash GetShaderModuleCacheHash( - const VkShaderModuleCreateFlags flags, + const ShaderModuleFlags flags, const uint32_t compilerMask, const Util::MetroHash::Hash& uniqueHash); diff --git a/icd/api/include/vk_buffer_view.h b/icd/api/include/vk_buffer_view.h index 7375e7ec..5b385f7e 100644 --- a/icd/api/include/vk_buffer_view.h +++ b/icd/api/include/vk_buffer_view.h @@ -56,7 +56,6 @@ class BufferView final : public NonDispatchable const Pal::gpusize* bufferAddress, const VkFormat format, const uint32_t deviceNum, - const size_t srdSize, void* pSrdMemory); VkResult Destroy( diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h index 694098bf..8755dff7 100644 --- a/icd/api/include/vk_cmdbuffer.h +++ b/icd/api/include/vk_cmdbuffer.h @@ -923,7 +923,8 @@ class CmdBuffer template void PushDescriptorSetKHR( VkPipelineBindPoint pipelineBindPoint, @@ -1105,8 +1106,7 @@ class CmdBuffer void PalCmdAcquire( Pal::AcquireReleaseInfo* pAcquireReleaseInfo, - uint32_t eventCount, - const VkEvent* pEvents, + const VkEvent event, Pal::MemBarrier* const pBufferBarriers, const Buffer** const ppBuffers, Pal::ImgBarrier* const pImageBarriers, @@ -1116,8 +1116,7 @@ class CmdBuffer void PalCmdRelease( Pal::AcquireReleaseInfo* pAcquireReleaseInfo, - uint32_t eventCount, - const VkEvent* pEvents, + const VkEvent event, Pal::MemBarrier* const pBufferBarriers, const Buffer** const ppBuffers, Pal::ImgBarrier* const pImageBarriers, @@ -1316,7 +1315,7 @@ class CmdBuffer uint32_t NumDeviceEvents(uint32_t numEvents) const { return m_numPalDevices * numEvents; } -#if VK_ENABLE_DEBUG_BARRIERS +#if VKI_ENABLE_DEBUG_BARRIERS void DbgBarrierPreCmd(uint64_t cmd) { if (m_dbgBarrierPreCmdMask & (cmd)) @@ -1488,9 +1487,10 @@ class CmdBuffer const VkPushConstantsInfoKHR* pPushConstantsInfo); template + size_t samplerDescSize, + size_t typedBufferDescSize, + size_t untypedBufferDescSize, + uint32_t numPalDevices> void PushDescriptorSet2KHR( const VkPushDescriptorSetInfoKHR* pPushDescriptorSetInfo); @@ -1514,6 +1514,16 @@ class CmdBuffer bool isBegin); private: + + void BatchedLoadOpClears( + uint32_t clearCount, + const ImageView** pImageViews, + const Pal::ClearColor* pClearColors, + const Pal::ImageLayout* pClearLayouts, + const Pal::SubresRange* pRanges, + const Pal::SwizzledFormat* pClearFormats, + uint32_t viewMask); + PAL_DISALLOW_COPY_AND_ASSIGN(CmdBuffer); void ValidateGraphicsStates(); @@ -1585,9 +1595,8 @@ class CmdBuffer const VkImageMemoryBarrier* pImageMemoryBarriers); void ExecuteAcquireRelease( - uint32_t eventCount, - const VkEvent* pEvents, uint32_t dependencyCount, + const VkEvent* pEvents, const VkDependencyInfoKHR* pDependencyInfos, AcquireReleaseMode acquireReleaseMode, uint32_t rgpBarrierReasonType); @@ -1687,7 +1696,7 @@ class CmdBuffer #endif void ReleaseResources(); -#if VK_ENABLE_DEBUG_BARRIERS +#if VKI_ENABLE_DEBUG_BARRIERS void DbgCmdBarrier(bool preCmd); #endif @@ -1742,7 +1751,8 @@ class CmdBuffer template static VKAPI_ATTR void VKAPI_CALL CmdPushDescriptorSetKHR( VkCommandBuffer commandBuffer, @@ -1773,7 +1783,8 @@ class CmdBuffer template static VKAPI_ATTR void VKAPI_CALL CmdPushDescriptorSet2KHR( VkCommandBuffer commandBuffer, @@ -1955,8 +1966,7 @@ class CmdBuffer uint32_t preBindDefaultState : 1; uint32_t useReleaseAcquire : 1; uint32_t useSplitReleaseAcquire : 1; - uint32_t useBackupBuffer : 1; - uint32_t reserved2 : 3; + uint32_t useBackupBuffer : 1; uint32_t isRenderingSuspended : 1; #if VKI_RAY_TRACING uint32_t hasRayTracing : 1; @@ -1964,7 +1974,7 @@ class CmdBuffer uint32_t reserved4 : 1; #endif uint32_t offsetMode : 1; - uint32_t reserved : 13; + uint32_t reserved : 16; }; }; @@ -1996,7 +2006,7 @@ class CmdBuffer RenderPassInstanceState m_renderPassInstance; TransformFeedbackState* m_pTransformFeedbackState; -#if VK_ENABLE_DEBUG_BARRIERS +#if VKI_ENABLE_DEBUG_BARRIERS uint64_t m_dbgBarrierPreCmdMask; uint64_t m_dbgBarrierPostCmdMask; #endif diff --git a/icd/api/include/vk_compute_pipeline.h b/icd/api/include/vk_compute_pipeline.h index c2c0bddc..5f01590f 100644 --- a/icd/api/include/vk_compute_pipeline.h +++ b/icd/api/include/vk_compute_pipeline.h @@ -112,6 +112,7 @@ class ComputePipeline final : public Pipeline, public NonDispatchable(VK_GPA_PERF_BLOCK_GE1_AMD) == static_cast(Pal::GpuBlock::Ge1)) && (static_cast(VK_GPA_PERF_BLOCK_GE_DIST_AMD) == static_cast(Pal::GpuBlock::GeDist)) && (static_cast(VK_GPA_PERF_BLOCK_GE_SE_AMD) == static_cast(Pal::GpuBlock::GeSe)) && - (static_cast(VK_GPA_PERF_BLOCK_DF_MALL_AMD) == static_cast(Pal::GpuBlock::DfMall)) -#if VKI_BUILD_GFX11 - && (static_cast(VK_GPA_PERF_BLOCK_SQ_WGP_AMD) == static_cast(Pal::GpuBlock::SqWgp)) && + (static_cast(VK_GPA_PERF_BLOCK_DF_MALL_AMD) == static_cast(Pal::GpuBlock::DfMall)) && + (static_cast(VK_GPA_PERF_BLOCK_SQ_WGP_AMD) == static_cast(Pal::GpuBlock::SqWgp)) && (static_cast(VK_GPA_PERF_BLOCK_PC_AMD) == static_cast(Pal::GpuBlock::Pc)) -#endif , "Need to update function convert::GpuBlock"); diff --git a/icd/api/include/vk_defines.h b/icd/api/include/vk_defines.h index 849d5e60..ad7448b7 100644 --- a/icd/api/include/vk_defines.h +++ b/icd/api/include/vk_defines.h @@ -188,6 +188,9 @@ namespace vk // The maximum number of sets that can appear in a pipeline layout static const uint32_t MaxDescriptorSets = 32; + // The maximum size of a buffer SRD + static const uint32_t MaxBufferSrdSize = 8; + // The maximum size of push constants in bytes static const uint32_t MaxPushConstants = 256; diff --git a/icd/api/include/vk_descriptor_set.h b/icd/api/include/vk_descriptor_set.h index a51cccba..939bb2cf 100644 --- a/icd/api/include/vk_descriptor_set.h +++ b/icd/api/include/vk_descriptor_set.h @@ -324,7 +324,8 @@ class DescriptorUpdate template static VKAPI_ATTR void VKAPI_CALL UpdateDescriptorSets( VkDevice device, @@ -336,7 +337,8 @@ class DescriptorUpdate template static void WriteDescriptorSets( const Device* pDevice, diff --git a/icd/api/include/vk_descriptor_update_template.h b/icd/api/include/vk_descriptor_update_template.h index b23c2a88..84e6bbe4 100644 --- a/icd/api/include/vk_descriptor_update_template.h +++ b/icd/api/include/vk_descriptor_update_template.h @@ -104,7 +104,8 @@ class DescriptorUpdateTemplate final : public NonDispatchable static PfnUpdateEntry GetUpdateEntryFunc( VkDescriptorType descriptorType, diff --git a/icd/api/include/vk_device.h b/icd/api/include/vk_device.h index 9ce4d34f..d9038d27 100644 --- a/icd/api/include/vk_device.h +++ b/icd/api/include/vk_device.h @@ -168,7 +168,9 @@ class Device uint32 reserved2 : 1; uint32 deviceGeneratedCommands : 1; uint32 robustVertexBufferExtend : 1; - uint32 reserved : 11; + uint32 enableDebugPrintf : 1; + uint32 reserved3 : 1; + uint32 reserved : 9; }; uint32 u32All; @@ -193,7 +195,8 @@ class Device struct { - uint32_t bufferView; + uint32_t typedBufferView; + uint32_t untypedBufferView; uint32_t imageView; uint32_t fmaskView; uint32_t sampler; @@ -809,7 +812,7 @@ class Device const uint8_t* pCode, uint32_t numUserDataNodes, Vkgc::ResourceMappingRootNode* pUserDataNodes, - VkShaderModuleCreateFlags internalShaderFlags, + ShaderModuleFlags flags, bool forceWave64, const VkSpecializationInfo* pSpecializationInfo, InternalPipeline* pInternalPipeline); @@ -1444,6 +1447,34 @@ VKAPI_ATTR void VKAPI_CALL vkGetGeneratedCommandsMemoryRequirementsNV( const VkGeneratedCommandsMemoryRequirementsInfoNV* pInfo, VkMemoryRequirements2* pMemoryRequirements); +VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineBinariesKHR( + VkDevice device, + const VkPipelineBinaryCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineBinaryHandlesInfoKHR* pBinaries); + +VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineBinaryKHR( + VkDevice device, + VkPipelineBinaryKHR pipelineBinary, + const VkAllocationCallbacks* pAllocator); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineKeyKHR( + VkDevice device, + const VkPipelineCreateInfoKHR* pPipelineCreateInfo, + VkPipelineBinaryKeyKHR* pPipelineBinaryKey); + +VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineBinaryDataKHR( + VkDevice device, + const VkPipelineBinaryDataInfoKHR* pInfo, + VkPipelineBinaryKeyKHR* pPipelineBinaryKey, + size_t* pPipelineBinaryDataSize, + void* pPipelineBinaryData); + +VKAPI_ATTR VkResult VKAPI_CALL vkReleaseCapturedPipelineDataKHR( + VkDevice device, + const VkReleaseCapturedPipelineDataInfoKHR* pInfo, + const VkAllocationCallbacks* pAllocator); + } // namespace entry } // namespace vk diff --git a/icd/api/include/vk_extensions.h b/icd/api/include/vk_extensions.h index 0424401f..a1a7549d 100644 --- a/icd/api/include/vk_extensions.h +++ b/icd/api/include/vk_extensions.h @@ -323,6 +323,7 @@ class DeviceExtensions final : public Extensions KHR_MAINTENANCE7, KHR_MAP_MEMORY2, KHR_MULTIVIEW, + KHR_PIPELINE_BINARY, KHR_PIPELINE_EXECUTABLE_PROPERTIES, KHR_PIPELINE_LIBRARY, KHR_PUSH_DESCRIPTOR, diff --git a/icd/api/include/vk_formats.h b/icd/api/include/vk_formats.h index 7d3a2497..452b5bd3 100755 --- a/icd/api/include/vk_formats.h +++ b/icd/api/include/vk_formats.h @@ -283,13 +283,19 @@ bool Formats::IsEtc2Format( // Returns true if the given format is a valid RT Vertex Buffer format. bool Formats::IsRTVertexFormat(VkFormat format) { - return (VK_FORMAT_R32G32_SFLOAT == format) || - (VK_FORMAT_R32G32B32_SFLOAT == format) || - (VK_FORMAT_R16G16_SFLOAT == format) || - (VK_FORMAT_R16G16B16A16_SFLOAT == format) || - (VK_FORMAT_R16G16_SNORM == format) || - (VK_FORMAT_R16G16B16A16_SNORM == format) || - (VK_FORMAT_R16G16B16A16_UNORM == format); + return (VK_FORMAT_R32G32_SFLOAT == format) || + (VK_FORMAT_R32G32B32_SFLOAT == format) || + (VK_FORMAT_R16G16_SFLOAT == format) || + (VK_FORMAT_R16G16B16A16_SFLOAT == format) || + (VK_FORMAT_R16G16_SNORM == format) || + (VK_FORMAT_R16G16B16A16_SNORM == format) || + (VK_FORMAT_R16G16B16A16_UNORM == format) || + (VK_FORMAT_R16G16_UNORM == format) || + (VK_FORMAT_A2B10G10R10_UNORM_PACK32 == format) || + (VK_FORMAT_R8G8B8A8_UNORM == format) || + (VK_FORMAT_R8G8_UNORM == format) || + (VK_FORMAT_R8G8B8A8_SNORM == format) || + (VK_FORMAT_R8G8_SNORM == format); } // ===================================================================================================================== diff --git a/icd/api/include/vk_graphics_pipeline.h b/icd/api/include/vk_graphics_pipeline.h index a0e9e55d..796d965a 100644 --- a/icd/api/include/vk_graphics_pipeline.h +++ b/icd/api/include/vk_graphics_pipeline.h @@ -227,6 +227,7 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch Device* const pDevice, Pal::IPipeline** pPalPipeline, const PipelineLayout* pLayout, + PipelineBinaryStorage* pBinaryStorage, const GraphicsPipelineObjectImmedInfo& immedInfo, uint64_t staticStateMask, GraphicsPipelineObjectFlags flags, @@ -277,6 +278,7 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch PipelineCache* pPipelineCache, const Util::MetroHash::Hash* pCacheIds, uint64_t apiPsoHash, + const PipelineBinaryStorage& binaryStorage, GraphicsPipelineObjectCreateInfo* pObjectCreateInfo, VkPipeline* pPipeline); diff --git a/icd/api/include/vk_graphics_pipeline_library.h b/icd/api/include/vk_graphics_pipeline_library.h index 7647f357..2569b1c8 100644 --- a/icd/api/include/vk_graphics_pipeline_library.h +++ b/icd/api/include/vk_graphics_pipeline_library.h @@ -96,6 +96,7 @@ class GraphicsPipelineLibrary final : public GraphicsPipelineCommon, public NonD const Util::MetroHash::Hash& elfHash, const uint64_t apiHash, const GplModuleState* pGplModuleStates, + PipelineBinaryStorage* pBinaryStorage, const PipelineLayout* pPipelineLayout); static VkResult CreatePartialPipelineBinary( diff --git a/icd/api/include/vk_indirect_commands_layout.h b/icd/api/include/vk_indirect_commands_layout.h index dc80025c..eadcb646 100644 --- a/icd/api/include/vk_indirect_commands_layout.h +++ b/icd/api/include/vk_indirect_commands_layout.h @@ -64,7 +64,8 @@ enum IndirectCommandsActionType Draw = 0, DrawIndexed, Dispatch, - DrawMeshTask + DrawMeshTask, + TraceRay }; struct IndirectCommandsInfo @@ -72,6 +73,7 @@ struct IndirectCommandsInfo IndirectCommandsActionType actionType; IndirectCommandsLayoutType layoutType; uint32_t strideInBytes; + uint32_t preActionArgSizeInBytes; }; // ===================================================================================================================== diff --git a/icd/api/include/vk_physical_device.h b/icd/api/include/vk_physical_device.h index f85f9206..98f19642 100644 --- a/icd/api/include/vk_physical_device.h +++ b/icd/api/include/vk_physical_device.h @@ -387,6 +387,26 @@ class PhysicalDevice void GetDevicePropertiesMaxBufferSize( VkDeviceSize* pMaxBufferSize) const; + void GetPhysicalDeviceLineSubPixelPrecisionBits( + uint32_t* pLineSubPixelPrecisionBits) const; + + void GetPhysicalDeviceVertexAttributeDivisorProperties( + uint32_t* pMaxVertexAttribDivisor, + VkBool32* pSupportsNonZeroFirstInstance) const; + + void GetPhysicalDeviceMaintenance5Properties( + VkBool32* pEarlyFragmentMultisampleCoverageAfterSampleCounting, + VkBool32* pEarlyFragmentSampleMaskTestBeforeSampleCounting, + VkBool32* pDepthStencilSwizzleOneSupport, + VkBool32* pPolygonModePointSize, + VkBool32* pNonStrictSinglePixelWideLinesUseParallelogram, + VkBool32* pNonStrictWideLinesUseParallelogram) const; + + void GetPhysicalDeviceMaintenance6Properties( + VkBool32* pBlockTexelViewCompatibleMultipleLayers, + uint32_t* pMaxCombinedImageSamplerDescriptorCount, + VkBool32* pFragmentShadingRateClampCombinerInputs) const; + void GetPhysicalDeviceDriverProperties( VkDriverId* pDriverID, char* pDriverName, @@ -522,6 +542,14 @@ template VkBool32* pVulkanMemoryModelDeviceScope, VkBool32* pVulkanMemoryModelAvailabilityVisibilityChains) const; + void GetPhysicalDeviceLineRasterizationFeatures( + VkBool32* pRectangularLines, + VkBool32* pBresenhamLines, + VkBool32* pSmoothLines, + VkBool32* pStippledRectangularLines, + VkBool32* pStippledBresenhamLines, + VkBool32* pStippledSmoothLines) const; + VkResult GetPhysicalDeviceCalibrateableTimeDomainsEXT( uint32_t* pTimeDomainCount, VkTimeDomainEXT* pTimeDomains); diff --git a/icd/api/include/vk_pipeline.h b/icd/api/include/vk_pipeline.h index c6919bc0..9118d278 100644 --- a/icd/api/include/vk_pipeline.h +++ b/icd/api/include/vk_pipeline.h @@ -72,6 +72,20 @@ struct PipelineBinaryInfo Util::MetroHash::Hash binaryHash; }; +constexpr uint32 MaxPipelineBinaryInfoCount = Util::Max(MaxPalDevices, static_cast(GraphicsLibraryCount)); + +// If a pipeline is created with VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR set, it must retain its binaries so that we +// can create VkPipelineBinaryKHR objects from it at any time. We can't rely on our in-memory cache, because it can be +// disabled or have its entries evicted. This struct lets the pipeline store up to MaxPalDevices binaries and retrieve +// them by key or device index. +struct PipelineBinaryStorage +{ + // For monolithic pipelines this stores a single packed blob per device (same as how caching works). For graphics + // pipeline libraries, this stores an elf binary blob per graphics library type. + PipelineBinaryInfo binaryInfo[MaxPipelineBinaryInfoCount]; + uint32 binaryCount; +}; + enum class DynamicStatesInternal : uint32_t { Viewport = 0, @@ -133,6 +147,7 @@ enum class DynamicStatesInternal : uint32_t struct PipelineExtStructs { const VkPipelineCreationFeedbackCreateInfoEXT* pPipelineCreationFeedbackCreateInfoEXT; + const VkPipelineBinaryInfoKHR* pPipelineBinaryInfoKHR; }; // ===================================================================================================================== @@ -243,6 +258,24 @@ class Pipeline Util::MetroHash::Hash* pCacheId ); + const PipelineBinaryStorage* GetBinaryStorage() const + { return m_pBinaryStorage; } + + // See the implementation note about memory ownership behavior. + static void InsertBinaryData( + PipelineBinaryStorage* pBinaryStorage, + const uint32 binaryIndex, + const Util::MetroHash::Hash& key, + const size_t dataSize, + const void* pData); + + VkResult FreeBinaryStorage( + const VkAllocationCallbacks* pAllocator); + + static void FreeBinaryStorage( + PipelineBinaryStorage* pBinaryStorage, + const VkAllocationCallbacks* pAllocator); + static void FreeTempModules( const Device* pDevice, const uint32_t maxStageCount, @@ -259,6 +292,7 @@ class Pipeline void Init( Pal::IPipeline** pPalPipeline, const PipelineLayout* pLayout, + PipelineBinaryStorage* pBinaryStorage, uint64_t staticStateMask, #if VKI_RAY_TRACING uint32_t dispatchRaysUserDataOffset, @@ -310,6 +344,7 @@ class Pipeline private: PAL_DISALLOW_COPY_AND_ASSIGN(Pipeline); + PipelineBinaryStorage* m_pBinaryStorage; PrintfFormatMap* m_pFormatStrings; }; diff --git a/icd/api/include/vk_pipeline_binary.h b/icd/api/include/vk_pipeline_binary.h new file mode 100644 index 00000000..ec3ade3b --- /dev/null +++ b/icd/api/include/vk_pipeline_binary.h @@ -0,0 +1,106 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ + +#ifndef __VK_PIPELINE_BINARY_H__ +#define __VK_PIPELINE_BINARY_H__ + +#pragma once + +#include "include/vk_dispatch.h" +#include "include/vk_pipeline.h" + +#include "palMetroHash.h" + +namespace vk +{ +class Device; + +class PipelineBinary final : public NonDispatchable +{ +public: + static VkResult CreatePipelineBinaries( + Device* pDevice, + const VkPipelineBinaryCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineBinaryHandlesInfoKHR* pBinaries); + + VkResult DestroyPipelineBinary( + Device* pDevice, + const VkAllocationCallbacks* pAllocator); + + static VkResult GetPipelineKey( + const Device* pDevice, + const VkPipelineCreateInfoKHR* pPipelineCreateInfo, + VkPipelineBinaryKeyKHR* pPipelineBinaryKey); + + VkResult GetPipelineBinaryData( + VkPipelineBinaryKeyKHR* pPipelineBinaryKey, + size_t* pPipelineBinaryDataSize, + void* pPipelineBinaryData); + + static VkResult ReleaseCapturedPipelineData( + Device* pDevice, + Pipeline* pPipeline, + const VkAllocationCallbacks* pAllocator); + + static void ReadFromPipelineBinaryKey( + const VkPipelineBinaryKeyKHR& inKey, + Util::MetroHash::Hash* pOutKey); + + const Util::MetroHash::Hash& BinaryKey() const + { return m_binaryKey; } + + const Vkgc::BinaryData& BinaryData() const + { return m_binaryData; } + +protected: + +private: + PAL_DISALLOW_COPY_AND_ASSIGN(PipelineBinary); + + PipelineBinary( + const Util::MetroHash::Hash& binaryKey, + const Vkgc::BinaryData& binaryData); + + // Pipeline binary doesn't contain the key itself. + static VkResult Create( + Device* pDevice, + const Util::MetroHash::Hash& binaryKey, + const Vkgc::BinaryData& binaryData, + const VkAllocationCallbacks* pAllocator, + VkPipelineBinaryKHR* pPipelineBinary); + + static void WriteToPipelineBinaryKey( + const void* pSrcData, + const size_t dataSize, + VkPipelineBinaryKeyKHR* pDstKey); + + const Util::MetroHash::Hash m_binaryKey; + const Vkgc::BinaryData m_binaryData; +}; + +} // namespace vk + +#endif /* __VK_PIPELINE_BINARY_H__ */ diff --git a/icd/api/include/vk_pipeline_layout.h b/icd/api/include/vk_pipeline_layout.h index f7c58f78..b78a2b06 100644 --- a/icd/api/include/vk_pipeline_layout.h +++ b/icd/api/include/vk_pipeline_layout.h @@ -1,4 +1,4 @@ -/* +/* *********************************************************************************************************************** * * Copyright (c) 2014-2024 Advanced Micro Devices, Inc. All Rights Reserved. @@ -256,6 +256,12 @@ class PipelineLayout final : public NonDispatchable @@ -97,6 +99,13 @@ class ShaderModule final : public NonDispatchable static void* GetFirstValidShaderData(const ShaderModuleHandle* pHandle); + static ShaderModuleFlags ConvertVkShaderModuleCreateFlags(VkShaderModuleCreateFlags flags) + { + // There aren't any VkShaderModuleCreateFlags yet, but this function should be implemented when one is added. + VK_ASSERT(flags == 0); + return 0; + } + protected: ShaderModule(size_t codeSize, const void* pCode, VkShaderModuleCreateFlags flags); VkResult Init(Device* pDevice); @@ -105,7 +114,7 @@ class ShaderModule final : public NonDispatchable const void* m_pCode; ShaderModuleHandle m_handle; Pal::ShaderHash m_codeHash; - VkShaderModuleCreateFlags m_flags; + ShaderModuleFlags m_flags; private: PAL_DISALLOW_COPY_AND_ASSIGN(ShaderModule); diff --git a/icd/api/pipeline_compiler.cpp b/icd/api/pipeline_compiler.cpp index a0236cb1..3e8e7927 100644 --- a/icd/api/pipeline_compiler.cpp +++ b/icd/api/pipeline_compiler.cpp @@ -196,11 +196,11 @@ static void ApplyProfileOptions( static bool SupportInternalModuleCache( const PhysicalDevice* pDevice, const uint32_t compilerMask, - const VkShaderModuleCreateFlags internalShaderFlags) + const ShaderModuleFlags flags) { bool supportInternalModuleCache = false; - if (Util::TestAnyFlagSet(internalShaderFlags, VK_INTERNAL_SHADER_FLAGS_FORCE_UNCACHED_BIT)) + if (Util::TestAnyFlagSet(flags, ShaderModuleForceUncached)) { supportInternalModuleCache = false; } @@ -330,12 +330,10 @@ VkResult PipelineCompiler::Initialize() m_gfxIp.major = 10; m_gfxIp.minor = 3; break; -#if VKI_BUILD_GFX11 case Pal::GfxIpLevel::GfxIp11_0: m_gfxIp.major = 11; m_gfxIp.minor = 0; break; -#endif #if VKI_BUILD_GFX115 case Pal::GfxIpLevel::GfxIp11_5: m_gfxIp.major = 11; @@ -481,15 +479,24 @@ bool PipelineCompiler::LoadReplaceShaderBinary( // ===================================================================================================================== // Generates shader module cache hash ID Util::MetroHash::Hash PipelineCompiler::GetShaderModuleCacheHash( - const VkShaderModuleCreateFlags flags, + const ShaderModuleFlags flags, const uint32_t compilerMask, const Util::MetroHash::Hash& uniqueHash) { + // None of the internal flags require hashing, but any new API provided one might. + constexpr ShaderModuleFlags AllInternalFlags = +#if VKI_RAY_TRACING + ShaderModuleInternalRayTracingShader | +#endif + ShaderModuleInternalShader | + ShaderModuleAllowDelayConversion | + ShaderModuleForceUncached; + VK_ASSERT(Util::TestAnyFlagSet(flags, ~AllInternalFlags) == false); + Util::MetroHash128 hasher; Util::MetroHash::Hash hash; hasher.Update(compilerMask); hasher.Update(uniqueHash); - hasher.Update(flags); hasher.Update(m_pPhysicalDevice->GetSettingsLoader()->GetSettingsHash()); hasher.Finalize(hash.bytes); return hash; @@ -498,8 +505,7 @@ Util::MetroHash::Hash PipelineCompiler::GetShaderModuleCacheHash( // ===================================================================================================================== // Loads shader module from cache, include both run-time cache and binary cache VkResult PipelineCompiler::LoadShaderModuleFromCache( - const VkShaderModuleCreateFlags flags, - const VkShaderModuleCreateFlags internalShaderFlags, + const ShaderModuleFlags flags, const uint32_t compilerMask, const Util::MetroHash::Hash& uniqueHash, ShaderModuleHandle* pShaderModule) @@ -507,7 +513,7 @@ VkResult PipelineCompiler::LoadShaderModuleFromCache( VkResult result = VK_ERROR_INITIALIZATION_FAILED; const bool supportInternalModuleCache = - SupportInternalModuleCache(m_pPhysicalDevice, compilerMask, internalShaderFlags); + SupportInternalModuleCache(m_pPhysicalDevice, compilerMask, flags); const bool delayConversion = false; VK_ASSERT(pShaderModule->pRefCount == nullptr); @@ -585,8 +591,7 @@ VkResult PipelineCompiler::LoadShaderModuleFromCache( // ===================================================================================================================== // Stores shader module to cache, include both run-time cache and binary cache void PipelineCompiler::StoreShaderModuleToCache( - const VkShaderModuleCreateFlags flags, - const VkShaderModuleCreateFlags internalShaderFlags, + const ShaderModuleFlags flags, const uint32_t compilerMask, const Util::MetroHash::Hash& uniqueHash, ShaderModuleHandle* pShaderModule) @@ -594,7 +599,7 @@ void PipelineCompiler::StoreShaderModuleToCache( VK_ASSERT(pShaderModule->pRefCount == nullptr); const bool supportInternalModuleCache = - SupportInternalModuleCache(m_pPhysicalDevice, compilerMask, internalShaderFlags); + SupportInternalModuleCache(m_pPhysicalDevice, compilerMask, flags); if (supportInternalModuleCache) { @@ -633,8 +638,7 @@ void PipelineCompiler::StoreShaderModuleToCache( // Builds shader module from SPIR-V binary code. VkResult PipelineCompiler::BuildShaderModule( const Device* pDevice, - const VkShaderModuleCreateFlags flags, - const VkShaderModuleCreateFlags internalShaderFlags, + const ShaderModuleFlags flags, const Vkgc::BinaryData& shaderBinary, ShaderModuleHandle* pShaderModule) { @@ -652,6 +656,8 @@ VkResult PipelineCompiler::BuildShaderModule( bool findReplaceShader = false; + ShaderModuleFlags shaderFlags = flags; + Vkgc::BinaryData finalData = shaderBinary; if ((pSettings->shaderReplaceMode == ShaderReplaceShaderHash) || (pSettings->shaderReplaceMode == ShaderReplaceShaderHashPipelineBinaryHash)) @@ -664,11 +670,11 @@ VkResult PipelineCompiler::BuildShaderModule( finalData = replaceBinary; Util::MetroHash64::Hash( reinterpret_cast(replaceBinary.pCode), replaceBinary.codeSize, uniqueHash.bytes); + } } - result = LoadShaderModuleFromCache( - flags, internalShaderFlags, compilerMask, uniqueHash, pShaderModule); + result = LoadShaderModuleFromCache(shaderFlags, compilerMask, uniqueHash, pShaderModule); if (result != VK_SUCCESS) { @@ -676,15 +682,14 @@ VkResult PipelineCompiler::BuildShaderModule( { result = m_compilerSolutionLlpc.BuildShaderModule( pDevice, - flags, - internalShaderFlags, + shaderFlags, finalData, pShaderModule, PipelineOptimizerKey{}); } - StoreShaderModuleToCache(flags, internalShaderFlags, compilerMask, uniqueHash, pShaderModule); + StoreShaderModuleToCache(shaderFlags, compilerMask, uniqueHash, pShaderModule); } else if ((pSettings->enablePipelineDump) ) @@ -800,7 +805,7 @@ bool PipelineCompiler::ReplacePipelineShaderModule( if (LoadReplaceShaderBinary(hash64, &shaderBinary)) { VkResult result = - BuildShaderModule(pDevice, 0, 0, shaderBinary, pShaderModule); + BuildShaderModule(pDevice, 0, shaderBinary, pShaderModule); if (result == VK_SUCCESS) { @@ -1194,6 +1199,8 @@ VkResult PipelineCompiler::CreateGraphicsShaderBinary( PipelineCache* pPipelineCache, GraphicsLibraryType gplType, GraphicsPipelineBinaryCreateInfo* pCreateInfo, + const Vkgc::BinaryData* pProvidedBinary, + const Util::MetroHash::Hash* pProvidedBinaryHash, GplModuleState* pModuleState) { VkResult result = VK_SUCCESS; @@ -1226,6 +1233,8 @@ VkResult PipelineCompiler::CreateGraphicsShaderBinary( pPipelineCache, gplType, pCreateInfo, + pProvidedBinary, + pProvidedBinaryHash, pPipelineDumpHandle, pModuleState); @@ -1655,7 +1664,6 @@ void BuildLlpcVertexInputDescriptors( { VK_ASSERT(pVbInfo != nullptr); - const uint32_t srdDwSize = pDevice->GetProperties().descriptorSizes.bufferView / sizeof(uint32_t); uint32_t activeBindings = 0; // Sort the strides by binding slot @@ -1823,9 +1831,7 @@ static void MergePipelineOptions( pDst->extendedRobustness.nullDescriptor |= src.extendedRobustness.nullDescriptor; pDst->extendedRobustness.robustBufferAccess |= src.extendedRobustness.robustBufferAccess; pDst->extendedRobustness.robustImageAccess |= src.extendedRobustness.robustImageAccess; -#if VKI_BUILD_GFX11 pDst->optimizeTessFactor |= src.optimizeTessFactor; -#endif pDst->enableInterpModePatch |= src.enableInterpModePatch; pDst->pageMigrationEnabled |= src.pageMigrationEnabled; pDst->optimizationLevel |= src.optimizationLevel; @@ -2079,11 +2085,11 @@ void PipelineCompiler::BuildNggState( // NOTE: To support unrestrict dynamic primtive topology, we need full disable NGG on gfx10. bool disallowNgg = unrestrictedPrimitiveTopology; -#if VKI_BUILD_GFX11 + // On gfx11, we needn't program GS output primitive type on VsPs pipeline, so we can support unrestrict dynamic // primtive topology with NGG. disallowNgg = (disallowNgg && (deviceProp.gfxLevel < Pal::GfxIpLevel::GfxIp11_0)); -#endif + if (disallowNgg) { pCreateInfo->pipelineInfo.nggState.enableNgg = false; @@ -2399,11 +2405,18 @@ static void BuildColorBlendState( { auto pRendering = extStructs.pPipelineRenderingCreateInfo; + const uint32 numColorTargets = GraphicsPipelineCommon::GetColorAttachmentCount(pRenderPass, subpass, pRendering); if ((pCb != nullptr) || (pRendering != nullptr)) { - const uint32_t numColorTargets = (pRendering != nullptr) ? - Util::Min(pRendering->colorAttachmentCount, Pal::MaxColorTargets) : - Util::Min(pCb->attachmentCount, Pal::MaxColorTargets); + bool useBlendAttachments = false; + // If the pipeline is created with these 3 states as dynamic, the attachmentCount from the + // VkPipelineColorBlendStateCreateInfo is ignored. + if ((IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::ColorBlendEnable) == false) || + (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::ColorBlendEquation) == false) || + (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::ColorWriteMask) == false)) + { + useBlendAttachments = true; + } if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::ColorBlendEquation) || IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::ColorBlendEnable)) @@ -2458,7 +2471,7 @@ static void BuildColorBlendState( VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; - if ((pCb != nullptr) && (i < pCb->attachmentCount)) + if (useBlendAttachments && (i < pCb->attachmentCount)) { const VkPipelineColorBlendAttachmentState& src = pCb->pAttachments[i]; if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::ColorWriteMask) == false) @@ -3066,6 +3079,12 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( result = BuildPipelineResourceMapping(pDevice, pPipelineLayout, availableStageMask, pCreateInfo); } + + // Primitive Generated Query is only used for traditional shaders, disable it for mesh shader. + if (pCreateInfo->pipelineInfo.mesh.pModuleData != nullptr) + { + pCreateInfo->pipelineInfo.options.enablePrimGeneratedQuery = false; + } } if (result == VK_SUCCESS) @@ -3295,9 +3314,7 @@ void PipelineCompiler::ApplyPipelineOptions( pOptions->enableRelocatableShaderElf = settings.enableRelocatableShaders; pOptions->disableImageResourceCheck = settings.disableImageResourceTypeCheck; -#if VKI_BUILD_GFX11 pOptions->optimizeTessFactor = settings.optimizeTessFactor != OptimizeTessFactorDisable; -#endif pOptions->forceCsThreadIdSwizzling = settings.forceCsThreadIdSwizzling; pOptions->overrideThreadGroupSizeX = settings.overrideThreadGroupSizeX; pOptions->overrideThreadGroupSizeY = settings.overrideThreadGroupSizeY; @@ -4322,9 +4339,7 @@ void PipelineCompiler::SetRayTracingState( bvhInfo.boxSortHeuristic = Pal::BoxSortHeuristic::ClosestFirst; bvhInfo.flags.useZeroOffset = 1; bvhInfo.flags.returnBarycentrics = 1; -#if VKI_BUILD_GFX11 bvhInfo.flags.pointerFlags = settings.rtEnableNodePointerFlags; -#endif // Bypass Mall cache read/write if no alloc policy is set for SRDs. // This global setting applies to every BVH SRD. @@ -4409,7 +4424,6 @@ void PipelineCompiler::SetRayTracingState( auto rtCounterMode = pDevice->RayTrace()->TraceRayCounterMode(DefaultDeviceIndex); pRtState->enableRayTracingCounters = (rtCounterMode != GpuRt::TraceRayCounterMode::TraceRayCounterDisable); -#if VKI_BUILD_GFX11 // Enable hardware traversal stack on RTIP 2.0+ if (settings.emulatedRtIpLevel > EmulatedRtIpLevel1_1) { @@ -4427,7 +4441,6 @@ void PipelineCompiler::SetRayTracingState( pRtState->enableRayTracingHwTraversalStack = 1; } } -#endif Pal::RayTracingIpLevel rayTracingIp = pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().gfxipProperties.rayTracingIp; @@ -4441,11 +4454,9 @@ void PipelineCompiler::SetRayTracingState( case EmulatedRtIpLevel1_1: rayTracingIp = Pal::RayTracingIpLevel::RtIp1_1; break; -#if VKI_BUILD_GFX11 case EmulatedRtIpLevel2_0: rayTracingIp = Pal::RayTracingIpLevel::RtIp2_0; break; -#endif default: VK_ASSERT(false); break; @@ -4460,11 +4471,9 @@ void PipelineCompiler::SetRayTracingState( case Pal::RayTracingIpLevel::RtIp1_1: pRtState->rtIpVersion = Vkgc::RtIpVersion({ 1, 1 }); break; -#if VKI_BUILD_GFX11 case Pal::RayTracingIpLevel::RtIp2_0: pRtState->rtIpVersion = Vkgc::RtIpVersion({ 2, 0 }); break; -#endif default: VK_NEVER_CALLED(); break; @@ -4472,7 +4481,7 @@ void PipelineCompiler::SetRayTracingState( pRtState->gpurtFeatureFlags = GpuRtShaderLibraryFlags(pDevice); - const GpuRt::PipelineShaderCode codePatch = GpuRt::GetShaderLibraryCode(pRtState->gpurtFeatureFlags); + const GpuRt::PipelineShaderCode codePatch = GpuRt::GetShaderLibraryCode(rayTracingIp, pRtState->gpurtFeatureFlags); VK_ASSERT(codePatch.dxilSize > 0); pRtState->gpurtShaderLibrary.pCode = codePatch.pSpvCode; @@ -4809,94 +4818,6 @@ static VkPipelineCreateFlags2KHR GetCacheIdControlFlags( return in & (~CacheIdIgnoreFlags); } -// ===================================================================================================================== -// The pipeline cache ID contains additional inputs outside the shader creation information for pipeline executable -// properties as well as options to avoid user error when changing performance tuning, compiler, or any other settings. -static void GetCommonPipelineCacheId( - uint32_t deviceIdx, - VkPipelineCreateFlags2KHR flags, - const PipelineOptimizerKey* pPipelineProfileKey, - PipelineCompilerType compilerType, - uint64_t pipelineHash, - const Util::MetroHash::Hash& settingsHash, - Util::MetroHash128* pHash) -{ - pHash->Update(pipelineHash); - pHash->Update(deviceIdx); - pHash->Update(GetCacheIdControlFlags(flags)); - pHash->Update(compilerType); - pHash->Update(settingsHash); - pHash->Update(pPipelineProfileKey->shaderCount); - - for (uint32_t shaderIdx = 0; shaderIdx < pPipelineProfileKey->shaderCount; ++shaderIdx) - { - pHash->Update(pPipelineProfileKey->pShaders[shaderIdx]); - } -} - -// ===================================================================================================================== -void PipelineCompiler::GetComputePipelineCacheId( - uint32_t deviceIdx, - ComputePipelineBinaryCreateInfo* pCreateInfo, - uint64_t pipelineHash, - const Util::MetroHash::Hash& settingsHash, - Util::MetroHash::Hash* pCacheId) -{ - Util::MetroHash128 hash = {}; - - GetCommonPipelineCacheId( - deviceIdx, - pCreateInfo->flags, - pCreateInfo->pPipelineProfileKey, - pCreateInfo->compilerType, - pipelineHash, - settingsHash, - &hash); - - hash.Update(pCreateInfo->pipelineInfo.cs.options); - hash.Update(pCreateInfo->pipelineInfo.options); - - hash.Finalize(pCacheId->bytes); -} - -// ===================================================================================================================== -void PipelineCompiler::GetGraphicsPipelineCacheId( - uint32_t deviceIdx, - GraphicsPipelineBinaryCreateInfo* pCreateInfo, - uint64_t pipelineHash, - const Util::MetroHash::Hash& settingsHash, - Util::MetroHash::Hash* pCacheId) -{ - Util::MetroHash128 hash = {}; - - GetCommonPipelineCacheId( - deviceIdx, - pCreateInfo->flags, - pCreateInfo->pPipelineProfileKey, - pCreateInfo->compilerType, - pipelineHash, - settingsHash, - &hash); - - hash.Update(pCreateInfo->pipelineInfo.task.options); - hash.Update(pCreateInfo->pipelineInfo.vs.options); - hash.Update(pCreateInfo->pipelineInfo.tes.options); - hash.Update(pCreateInfo->pipelineInfo.tcs.options); - hash.Update(pCreateInfo->pipelineInfo.gs.options); - hash.Update(pCreateInfo->pipelineInfo.mesh.options); - hash.Update(pCreateInfo->pipelineInfo.fs.options); - hash.Update(pCreateInfo->pipelineInfo.options); - hash.Update(pCreateInfo->pipelineInfo.nggState); - hash.Update(pCreateInfo->dbFormat); - hash.Update(pCreateInfo->pipelineInfo.dynamicVertexStride); - hash.Update(pCreateInfo->pipelineInfo.enableUberFetchShader); - hash.Update(pCreateInfo->pipelineInfo.rsState); - - hash.Update(pCreateInfo->pBinaryMetadata->pointSizeUsed); - - hash.Finalize(pCacheId->bytes); -} - // ===================================================================================================================== void PipelineCompiler::GetColorExportShaderCacheId( GraphicsPipelineBinaryCreateInfo* pCreateInfo, @@ -4915,34 +4836,6 @@ void PipelineCompiler::GetColorExportShaderCacheId( hash.Finalize(pCacheId->bytes); } -#if VKI_RAY_TRACING -// ===================================================================================================================== -void PipelineCompiler::GetRayTracingPipelineCacheId( - uint32_t deviceIdx, - uint32_t numDevices, - RayTracingPipelineBinaryCreateInfo* pCreateInfo, - uint64_t pipelineHash, - const Util::MetroHash::Hash& settingsHash, - Util::MetroHash::Hash* pCacheId) -{ - Util::MetroHash128 hash = {}; - - GetCommonPipelineCacheId( - deviceIdx, - pCreateInfo->flags, - pCreateInfo->pPipelineProfileKey, - pCreateInfo->compilerType, - pipelineHash, - settingsHash, - &hash); - - hash.Update(numDevices); - hash.Update(pCreateInfo->pipelineInfo.options); - - hash.Finalize(pCacheId->bytes); -} -#endif - // ===================================================================================================================== void PipelineCompiler::BuildPipelineInternalBufferData( const PipelineLayout* pPipelineLayout, diff --git a/icd/api/raytrace/ray_tracing_device.cpp b/icd/api/raytrace/ray_tracing_device.cpp index a01de905..3a097064 100644 --- a/icd/api/raytrace/ray_tracing_device.cpp +++ b/icd/api/raytrace/ray_tracing_device.cpp @@ -35,6 +35,7 @@ #include "sqtt/sqtt_rgp_annotations.h" #include "palAutoBuffer.h" #include "palVectorImpl.h" +#include "palArchiveFile.h" #include "gpurt/gpurtLib.h" #include "g_gpurtOptions.h" @@ -70,6 +71,7 @@ RayTracingDevice::~RayTracingDevice() VkResult RayTracingDevice::Init() { VkResult result = VK_SUCCESS; + const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); if (InitAccelStructTracker() != VK_SUCCESS) { @@ -106,7 +108,7 @@ VkResult RayTracingDevice::Init() initInfo.accelStructTrackerGpuAddr = GetAccelStructTrackerGpuVa(deviceIdx); initInfo.deviceSettings.emulatedRtIpLevel = Pal::RayTracingIpLevel::None; - switch (m_pDevice->GetRuntimeSettings().emulatedRtIpLevel) + switch (settings.emulatedRtIpLevel) { case EmulatedRtIpLevelNone: break; @@ -114,11 +116,9 @@ VkResult RayTracingDevice::Init() case EmulatedRtIpLevel1_1: initInfo.deviceSettings.emulatedRtIpLevel = Pal::RayTracingIpLevel::RtIp1_1; break; -#if VKI_BUILD_GFX11 case EmulatedRtIpLevel2_0: initInfo.deviceSettings.emulatedRtIpLevel = Pal::RayTracingIpLevel::RtIp2_0; break; -#endif default: break; } @@ -252,7 +252,6 @@ void RayTracingDevice::CreateGpuRtDeviceSettings( pDeviceSettings->accelerationStructureUUID = GetAccelerationStructureUUID( m_pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties()); pDeviceSettings->enableMergeSort = settings.enableMergeSort; - pDeviceSettings->fastBuildThreshold = settings.fastBuildThreshold; pDeviceSettings->lbvhBuildThreshold = settings.lbvhBuildThreshold; pDeviceSettings->enableBVHBuildDebugCounters = settings.enableBvhBuildDebugCounters; pDeviceSettings->enableInsertBarriersInBuildAS = settings.enableInsertBarriersInBuildAs; @@ -278,6 +277,8 @@ void RayTracingDevice::CreateGpuRtDeviceSettings( pDeviceSettings->enableMergedEncodeBuild = settings.enableMergedEncodeBuild; pDeviceSettings->enableMergedEncodeUpdate = settings.enableMergedEncodeUpdate; + pDeviceSettings->checkBufferOverlapsInBatch = settings.rtCheckBufferOverlapsInBatch; + pDeviceSettings->disableCompaction = settings.rtDisableAccelStructCompaction; } // ===================================================================================================================== @@ -511,7 +512,7 @@ VkResult RayTracingDevice::InitAccelStructTracker() // Ensure the SRD size matches with the size reported by PAL VK_ASSERT(sizeof(pTracker->srd) >= - m_pDevice->VkPhysicalDevice(deviceIdx)->PalProperties().gfxipProperties.srdSizes.bufferView); + m_pDevice->VkPhysicalDevice(deviceIdx)->PalProperties().gfxipProperties.srdSizes.untypedBufferView); pPalDevice->CreateUntypedBufferViewSrds(1, &viewInfo, &pTracker->srd); } @@ -545,7 +546,7 @@ VkResult RayTracingDevice::InitAccelStructTracker() // Create null view if tracking is disabled. memcpy(&m_accelStructTrackerResources[deviceIdx].srd[0], props.gfxipProperties.nullSrds.pNullBufferView, - props.gfxipProperties.srdSizes.bufferView); + props.gfxipProperties.srdSizes.untypedBufferView); } } @@ -684,9 +685,7 @@ void RayTracingDevice::SetDispatchInfo( dispatchInfo.stateObjectHash = apiHash; dispatchInfo.boxSortMode = settings.boxSortingHeuristic; -#if VKI_BUILD_GFX11 dispatchInfo.usesNodePtrFlags = settings.rtEnableNodePointerFlags ? 1 : 0; -#endif if (pipelineType == GpuRt::RtPipelineType::RayTracing) { @@ -821,10 +820,19 @@ Pal::Result RayTracingDevice::ClientCreateInternalComputePipeline( const void* pPipelineBinary = nullptr; size_t pipelineBinarySize = 0; + Vkgc::BinaryData spvBin = + { + .codeSize = buildInfo.code.spvSize, + .pCode = buildInfo.code.pSpvCode + }; + Vkgc::ResourceMappingRootNode nodes[GpuRt::MaxInternalPipelineNodes] = {}; Vkgc::ResourceMappingNode subNodes[GpuRt::MaxInternalPipelineNodes] = {}; uint32_t subNodeIndex = 0; - const uint32_t bufferSrdSizeDw = pDevice->GetProperties().descriptorSizes.bufferView / sizeof(uint32_t); + const uint32_t typedBufferSrdSizeDw = + pDevice->GetProperties().descriptorSizes.typedBufferView / sizeof(uint32_t); + const uint32_t untypedBufferSrdSizeDw = + pDevice->GetProperties().descriptorSizes.untypedBufferView / sizeof(uint32_t); for (uint32_t nodeIndex = 0; nodeIndex < buildInfo.nodeCount; ++nodeIndex) { @@ -863,8 +871,17 @@ Pal::Result RayTracingDevice::ClientCreateInternalComputePipeline( } else if (node.type == GpuRt::NodeType::Srv) { - nodes[nodeIndex].node.type = - Vkgc::ResourceMappingNodeType::DescriptorResource; + nodes[nodeIndex].node.type = Vkgc::ResourceMappingNodeType::DescriptorResource; + + if (node.srdStride == 2) + { + nodes[nodeIndex].node.type = Vkgc::ResourceMappingNodeType::DescriptorBufferCompact; + } + else if (node.srdStride == 4) + { + nodes[nodeIndex].node.type = Vkgc::ResourceMappingNodeType::DescriptorBuffer; + } + nodes[nodeIndex].node.sizeInDwords = node.dwSize; nodes[nodeIndex].node.offsetInDwords = node.dwOffset; nodes[nodeIndex].node.srdRange.set = node.descSet; @@ -888,22 +905,28 @@ Pal::Result RayTracingDevice::ClientCreateInternalComputePipeline( { case GpuRt::NodeType::UavTable: pSubNode->type = Vkgc::ResourceMappingNodeType::DescriptorBuffer; + pSubNode->sizeInDwords = untypedBufferSrdSizeDw; break; case GpuRt::NodeType::TypedUavTable: pSubNode->type = Vkgc::ResourceMappingNodeType::DescriptorTexelBuffer; + pSubNode->sizeInDwords = typedBufferSrdSizeDw; break; case GpuRt::NodeType::ConstantBufferTable: pSubNode->type = Vkgc::ResourceMappingNodeType::DescriptorConstBuffer; + pSubNode->sizeInDwords = untypedBufferSrdSizeDw; break; case GpuRt::NodeType::SrvTable: + pSubNode->type = Vkgc::ResourceMappingNodeType::DescriptorResource; + pSubNode->sizeInDwords = untypedBufferSrdSizeDw; + break; case GpuRt::NodeType::TypedSrvTable: pSubNode->type = Vkgc::ResourceMappingNodeType::DescriptorResource; + pSubNode->sizeInDwords = typedBufferSrdSizeDw; break; default: VK_NEVER_CALLED(); } pSubNode->offsetInDwords = 0; - pSubNode->sizeInDwords = bufferSrdSizeDw; pSubNode->srdRange.set = node.descSet; pSubNode->srdRange.binding = node.binding; } @@ -934,8 +957,6 @@ Pal::Result RayTracingDevice::ClientCreateInternalComputePipeline( compileConstants.pConstants }; - Vkgc::BinaryData spvBin = { buildInfo.code.spvSize, buildInfo.code.pSpvCode }; - bool forceWave64 = false; // Overide wave size for these GpuRT shader types @@ -947,11 +968,11 @@ Pal::Result RayTracingDevice::ClientCreateInternalComputePipeline( forceWave64 = true; } - result = pDevice->CreateInternalComputePipeline(buildInfo.code.spvSize, - static_cast(buildInfo.code.pSpvCode), + result = pDevice->CreateInternalComputePipeline(spvBin.codeSize, + static_cast(spvBin.pCode), buildInfo.nodeCount, nodes, - VK_INTERNAL_SHADER_FLAGS_RAY_TRACING_INTERNAL_SHADER_BIT, + ShaderModuleInternalRayTracingShader, forceWave64, &specializationInfo, &pDevice->GetInternalRayTracingPipeline()); diff --git a/icd/api/raytrace/ray_tracing_device.h b/icd/api/raytrace/ray_tracing_device.h index 246489eb..75789250 100644 --- a/icd/api/raytrace/ray_tracing_device.h +++ b/icd/api/raytrace/ray_tracing_device.h @@ -132,16 +132,6 @@ class RayTracingDevice void* pConstants); private: - Device* m_pDevice; - - GpuRt::IDevice* m_pGpuRtDevice[MaxPalDevices]; - GpuRt::DeviceSettings m_gpurtDeviceSettings; - GpurtOptions m_gpurtOptions; - - uint32_t m_profileRayFlags; // Ray flag override for profiling - uint32_t m_profileMaxIterations; // Max traversal iterations - - CmdContext m_cmdContext[MaxPalDevices]; // GPURT Callback Functions static Pal::Result ClientAllocateGpuMemory( @@ -211,6 +201,17 @@ class RayTracingDevice void CollectGpurtOptions(GpurtOptions* const pGpurtOptions) const; + Device* m_pDevice; + + GpuRt::IDevice* m_pGpuRtDevice[MaxPalDevices]; + GpuRt::DeviceSettings m_gpurtDeviceSettings; + GpurtOptions m_gpurtOptions; + + uint32_t m_profileRayFlags; // Ray flag override for profiling + uint32_t m_profileMaxIterations; // Max traversal iterations + + CmdContext m_cmdContext[MaxPalDevices]; + BvhBatchLayer* m_pBvhBatchLayer; SplitRaytracingLayer* m_pSplitRaytracingLayer; diff --git a/icd/api/raytrace/vk_acceleration_structure.cpp b/icd/api/raytrace/vk_acceleration_structure.cpp index 3295d5c7..bfeb83d4 100644 --- a/icd/api/raytrace/vk_acceleration_structure.cpp +++ b/icd/api/raytrace/vk_acceleration_structure.cpp @@ -556,6 +556,24 @@ GpuRt::Geometry AccelerationStructure::ClientConvertAccelStructBuildGeometryKHR( case VK_FORMAT_R16G16_SNORM: pTriangles->vertexFormat = GpuRt::VertexFormat::R16G16_Snorm; break; + case VK_FORMAT_R16G16_UNORM: + pTriangles->vertexFormat = GpuRt::VertexFormat::R16G16_Unorm; + break; + case VK_FORMAT_A2B10G10R10_UNORM_PACK32: + pTriangles->vertexFormat = GpuRt::VertexFormat::R10G10B10A2_Unorm; + break; + case VK_FORMAT_R8G8B8A8_UNORM: + pTriangles->vertexFormat = GpuRt::VertexFormat::R8G8B8A8_Unorm; + break; + case VK_FORMAT_R8G8B8A8_SNORM: + pTriangles->vertexFormat = GpuRt::VertexFormat::R8G8B8A8_Snorm; + break; + case VK_FORMAT_R8G8_UNORM: + pTriangles->vertexFormat = GpuRt::VertexFormat::R8G8_Unorm; + break; + case VK_FORMAT_R8G8_SNORM: + pTriangles->vertexFormat = GpuRt::VertexFormat::R8G8_Snorm; + break; default: VK_NEVER_CALLED(); pTriangles->vertexFormat = GpuRt::VertexFormat::R32G32B32_Float; diff --git a/icd/api/raytrace/vk_ray_tracing_pipeline.cpp b/icd/api/raytrace/vk_ray_tracing_pipeline.cpp index d2a65adb..69e2db0a 100644 --- a/icd/api/raytrace/vk_ray_tracing_pipeline.cpp +++ b/icd/api/raytrace/vk_ray_tracing_pipeline.cpp @@ -30,6 +30,7 @@ #include "include/vk_shader.h" #include "include/vk_device.h" #include "include/vk_instance.h" +#include "include/vk_pipeline_binary.h" #include "include/vk_pipeline_cache.h" #include "include/vk_pipeline_layout.h" #include "include/vk_memory.h" @@ -330,6 +331,7 @@ void RayTracingPipeline::Init( uint32_t shaderLibraryCount, Pal::IShaderLibrary** ppPalShaderLibrary, const PipelineLayout* pPipelineLayout, + PipelineBinaryStorage* pBinaryStorage, const ShaderOptimizerKey* pShaderOptKeys, const ImmedInfo& immedInfo, uint64_t staticStateMask, @@ -350,6 +352,7 @@ void RayTracingPipeline::Init( Pipeline::Init( ppPalPipeline, pPipelineLayout, + pBinaryStorage, staticStateMask, dispatchRaysUserDataOffset, cacheHash, @@ -482,6 +485,29 @@ VkResult RayTracingPipeline::CreateImpl( PipelineCompiler::InitPipelineCreationFeedback(pPipelineCreationFeedbackCreateInfo); bool binariesProvided = false; Util::MetroHash::Hash cacheId[MaxPalDevices] = {}; + Vkgc::BinaryData providedBinaries[MaxPalDevices] = {}; + + auto pPipelineBinaryInfoKHR = extStructs.pPipelineBinaryInfoKHR; + + if (pPipelineBinaryInfoKHR != nullptr) + { + if (pPipelineBinaryInfoKHR->binaryCount > 0) + { + VK_ASSERT(pPipelineBinaryInfoKHR->binaryCount == m_pDevice->NumPalDevices()); + binariesProvided = true; + } + + for (uint32_t binaryIndex = 0; + (binaryIndex < pPipelineBinaryInfoKHR->binaryCount) && (result == VK_SUCCESS); + ++binaryIndex) + { + const auto pBinary = PipelineBinary::ObjectFromHandle( + pPipelineBinaryInfoKHR->pPipelineBinaries[binaryIndex]); + + cacheId[binaryIndex] = pBinary->BinaryKey(); + providedBinaries[binaryIndex] = pBinary->BinaryData(); + } + } RayTracingPipelineShaderStageInfo shaderInfo = {}; PipelineOptimizerKey optimizerKey = {}; @@ -693,6 +719,10 @@ VkResult RayTracingPipeline::CreateImpl( bool storeBinaryToPipeline = false; bool storeBinaryToCache = true; + PipelineBinaryStorage binaryStorage = {}; + + storeBinaryToPipeline = (flags & VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR) != 0; + storeBinaryToCache = (flags & VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR) == 0; for (uint32_t deviceIdx = 0; (result == VK_SUCCESS) && (deviceIdx < m_pDevice->NumPalDevices()); deviceIdx++) { @@ -732,10 +762,40 @@ VkResult RayTracingPipeline::CreateImpl( isInternalCacheHit); } + if (storeBinaryToPipeline) + { + // Store single packed blob of binaries from cache instead of separate binaries. + void* pMemory = pAllocator->pfnAllocation( + pAllocator->pUserData, + cachedBinData.codeSize, + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); // retained in the pipeline object + + if (pMemory != nullptr) + { + memcpy( + pMemory, + cachedBinData.pCode, + cachedBinData.codeSize); + + InsertBinaryData( + &binaryStorage, + deviceIdx, + cacheId[deviceIdx], + cachedBinData.codeSize, + pMemory); + } + else + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + } } } else { + cachedBinData = providedBinaries[deviceIdx]; + cacheResult = Util::Result::Success; } if (cacheResult == Util::Result::Success) @@ -836,6 +896,35 @@ VkResult RayTracingPipeline::CreateImpl( isInternalCacheHit); } + if (storeBinaryToPipeline) + { + // Store compiled binaries packed into a single blob instead of separately. + void* pMemory = pAllocator->pfnAllocation( + pAllocator->pUserData, + cachedBinData.codeSize, + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); // retained in the pipeline object + + if (pMemory != nullptr) + { + memcpy( + pMemory, + cachedBinData.pCode, + cachedBinData.codeSize); + + InsertBinaryData( + &binaryStorage, + deviceIdx, + cacheId[deviceIdx], + cachedBinData.codeSize, + pMemory); + } + else + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + else { m_pDevice->VkInstance()->FreeMem(const_cast(cachedBinData.pCode)); } @@ -925,6 +1014,7 @@ VkResult RayTracingPipeline::CreateImpl( // Create the PAL pipeline object. Pal::IShaderLibrary** ppShaderLibraries = nullptr; ShaderGroupInfo* pShaderGroupInfos = nullptr; + PipelineBinaryStorage* pPermBinaryStorage = nullptr; Pal::IPipeline* pPalPipeline [MaxPalDevices] = {}; ShaderGroupStackSizes* pShaderGroupStackSizes[MaxPalDevices] = {}; gpusize traceRayGpuVas [MaxPalDevices] = {}; @@ -934,6 +1024,7 @@ VkResult RayTracingPipeline::CreateImpl( size_t shaderLibraryPalMemSize = 0; size_t shaderGroupStackSizesMemSize = 0; size_t shaderGroupInfosMemSize = 0; + size_t binaryStorageSize = 0; const size_t shaderOptKeysSize = optimizerKey.shaderCount * sizeof(ShaderOptimizerKey); @@ -966,6 +1057,7 @@ VkResult RayTracingPipeline::CreateImpl( shaderGroupInfosMemSize = sizeof(ShaderGroupInfo) * totalGroupCount; shaderGroupStackSizesMemSize = (((funcCount > 0) || hasLibraries) ? 1 : 0) * sizeof(ShaderGroupStackSizes) * totalGroupCount * m_pDevice->NumPalDevices(); + binaryStorageSize = (storeBinaryToPipeline ? 1 : 0 ) * sizeof(PipelineBinaryStorage); const size_t totalSize = pipelineMemSize + @@ -973,6 +1065,7 @@ VkResult RayTracingPipeline::CreateImpl( shaderLibraryPalMemSize + shaderGroupStackSizesMemSize + shaderGroupInfosMemSize + + binaryStorageSize + shaderOptKeysSize; pSystemMem = pAllocator->pfnAllocation( @@ -1007,6 +1100,15 @@ VkResult RayTracingPipeline::CreateImpl( PopulateShaderGroupInfos(pCreateInfo, pShaderGroupInfos, totalGroupCount); + if (storeBinaryToPipeline) + { + pPermBinaryStorage = static_cast( + Util::VoidPtrInc(pShaderGroupsStackSizesMem, shaderGroupStackSizesMemSize)); + + // Simply copy the existing allocations to the new struct. + *pPermBinaryStorage = binaryStorage; + } + // Transfer shader optimizer keys to permanent storage. memcpy(pShaderOptKeys, optimizerKey.pShaders, shaderOptKeysSize); optimizerKey.pShaders = static_cast(pShaderOptKeys); @@ -1526,6 +1628,7 @@ VkResult RayTracingPipeline::CreateImpl( funcCount * m_pDevice->NumPalDevices(), ppShaderLibraries, localPipelineInfo.pLayout, + pPermBinaryStorage, optimizerKey.pShaders, localPipelineInfo.immedInfo, localPipelineInfo.staticStateMask, @@ -1542,7 +1645,7 @@ VkResult RayTracingPipeline::CreateImpl( cacheId[DefaultDeviceIndex], apiPsoHash, elfHash); - if (settings.enableDebugPrintf) + if (m_pDevice->GetEnabledFeatures().enableDebugPrintf) { ClearFormatString(); for (uint32_t i = 0; i < pipelineBinaries[DefaultDeviceIndex].pipelineBinCount; ++i) @@ -1557,6 +1660,8 @@ VkResult RayTracingPipeline::CreateImpl( } else { + // Free the binaries only if we failed to create the pipeline. + FreeBinaryStorage(&binaryStorage, pAllocator); for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++) { @@ -2262,15 +2367,10 @@ uint32_t RayTracingPipeline::UpdateShaderGroupIndex( } // ===================================================================================================================== -void RayTracingPipeline::GetDispatchSize( - uint32_t* pDispatchSizeX, - uint32_t* pDispatchSizeY, - uint32_t* pDispatchSizeZ, - uint32_t width, - uint32_t height, - uint32_t depth) const +Pal::DispatchDims RayTracingPipeline::GetDispatchSize( + Pal::DispatchDims size) const { - VK_ASSERT((pDispatchSizeX != nullptr) && (pDispatchSizeY != nullptr) && (pDispatchSizeZ != nullptr)); + Pal::DispatchDims dispatchSize = {}; const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); @@ -2280,31 +2380,33 @@ void RayTracingPipeline::GetDispatchSize( if (flattenThreadGroupSize == 0) { - *pDispatchSizeX = Util::RoundUpQuotient(width, settings.rtThreadGroupSizeX); - *pDispatchSizeY = Util::RoundUpQuotient(height, settings.rtThreadGroupSizeY); - *pDispatchSizeZ = Util::RoundUpQuotient(depth, settings.rtThreadGroupSizeZ); + dispatchSize.x = Util::RoundUpQuotient(size.x, settings.rtThreadGroupSizeX); + dispatchSize.y = Util::RoundUpQuotient(size.y, settings.rtThreadGroupSizeY); + dispatchSize.z = Util::RoundUpQuotient(size.z, settings.rtThreadGroupSizeZ); } else { - uint32_t dispatchSize = 0; + uint32_t x = 0; - if ((width > 1) && (height > 1)) + if ((size.x > 1) && (size.y > 1)) { const uint32_t tileHeight = flattenThreadGroupSize / RayTracingTileWidth; - const uint32_t paddedWidth = Util::Pow2Align(width, RayTracingTileWidth); - const uint32_t paddedHeight = Util::Pow2Align(height, tileHeight); + const uint32_t paddedWidth = Util::Pow2Align(size.x, RayTracingTileWidth); + const uint32_t paddedHeight = Util::Pow2Align(size.y, tileHeight); - dispatchSize = Util::RoundUpQuotient(paddedWidth * paddedHeight, flattenThreadGroupSize); + x = Util::RoundUpQuotient(paddedWidth * paddedHeight, flattenThreadGroupSize); } else { - dispatchSize = Util::RoundUpQuotient(width * height, flattenThreadGroupSize); + x = Util::RoundUpQuotient(size.x * size.y, flattenThreadGroupSize); } - *pDispatchSizeX = dispatchSize; - *pDispatchSizeY = depth; - *pDispatchSizeZ = 1; + dispatchSize.x = x; + dispatchSize.y = size.z; + dispatchSize.z = 1; } + + return dispatchSize; } // ===================================================================================================================== diff --git a/icd/api/raytrace/vk_ray_tracing_pipeline.h b/icd/api/raytrace/vk_ray_tracing_pipeline.h index 394c7f68..19a4c5e7 100644 --- a/icd/api/raytrace/vk_ray_tracing_pipeline.h +++ b/icd/api/raytrace/vk_ray_tracing_pipeline.h @@ -30,6 +30,7 @@ #include "include/vk_pipeline.h" #include "include/internal_mem_mgr.h" +#include "include/vk_pipeline_binary.h" #include "palPipeline.h" #include "palVector.h" @@ -256,12 +257,7 @@ class RayTracingPipeline final : public Pipeline, public NonDispatchablesrcStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR; pBarrier->dstStageMask |= VK_PIPELINE_STAGE_2_RESOLVE_BIT_KHR; - pBarrier->implicitSrcCacheMask |= pBarrier->flags.preColorResolveSync ? Pal::CoherColorTarget : - Pal::CoherDepthStencilTarget; - pBarrier->implicitDstCacheMask |= Pal::CoherResolveDst; + // It's possible that color and DS are both included in a single barrier for resolves. As a result of that we + // cannot rely on preColorResolveSync and preDsResolveSync to determine the cache mask here. Instead, include + // both here and then in RPSyncPoint we use excludeAccessMask to filter out unnecessary mask. + pBarrier->implicitSrcCacheMask |= Pal::CoherColorTarget | Pal::CoherDepthStencilTarget; + + // Ideally, we should specify CoherResolveSrc here but since this preResolveSync barrier can specify multiple + // image transitions it's possible that different images in this barrier are used as src and dst for the + // resolve operation. Thus, it's better to specify just CoherResolve here. + pBarrier->implicitDstCacheMask |= Pal::CoherResolve; } // Wait for (non-auto-synced) pre-clear if necessary. No need to augment the pipe point because the prior work falls @@ -1009,13 +1015,14 @@ static void ConvertImplicitSyncs( // Augment the active source pipeline stages for resolves if we need to wait for prior resolves to complete if (pBarrier->flags.postResolveSync) { - // TopOfPipe causes a stall at PFP which is not really needed for images. As an optimization for Acq-Rel - // barriers we instead set dstStage to Blt here. + // Wait until the prior resolves complete pBarrier->srcStageMask |= VK_PIPELINE_STAGE_2_RESOLVE_BIT_KHR; - pBarrier->dstStageMask |= VK_PIPELINE_STAGE_2_BLIT_BIT_KHR; + pBarrier->dstStageMask |= VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR | + VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR; - pBarrier->implicitSrcCacheMask |= Pal::CoherResolveSrc; - pBarrier->implicitDstCacheMask |= Pal::CoherResolveDst; + pBarrier->implicitSrcCacheMask |= Pal::CoherResolve; + pBarrier->implicitDstCacheMask |= Pal::CoherColorTarget | Pal::CoherDepthStencilTarget; } if (pBarrier->flags.implicitExternalOutgoing && settings.implicitExternalSynchronization) diff --git a/icd/api/renderpass/renderpass_types.h b/icd/api/renderpass/renderpass_types.h index 6c7d4a92..ac6859d7 100644 --- a/icd/api/renderpass/renderpass_types.h +++ b/icd/api/renderpass/renderpass_types.h @@ -173,8 +173,7 @@ union SubpassStateFlags uint32_t hasExternalIncoming : 1; // True if an explicit VkSubpassDependency exists with src = // VK_SUBPASS_EXTERNAL and dst = this. uint32_t hasExternalOutgoing : 1; // Same as above, but src and dst reversed. - uint32_t reserved1 : 2; - uint32_t reserved : 26; + uint32_t reserved : 28; }; uint32_t u32All; }; diff --git a/icd/api/sqtt/sqtt_layer.cpp b/icd/api/sqtt/sqtt_layer.cpp index b9c020ba..ad814674 100644 --- a/icd/api/sqtt/sqtt_layer.cpp +++ b/icd/api/sqtt/sqtt_layer.cpp @@ -1142,12 +1142,12 @@ void SqttCmdBufferState::DebugMarkerInsert( void SqttCmdBufferState::DebugLabelBegin( const VkDebugUtilsLabelEXT* pMarkerInfo) { - DevUserMarkerString userMarkerString; + DevUserMarkerString userMarkerString = {}; userMarkerString.length = static_cast(strlen(pMarkerInfo->pLabelName)) + 1; Util::Strncpy(userMarkerString.string, pMarkerInfo->pLabelName, sizeof(userMarkerString.string)); m_userMarkerStrings.PushBack(userMarkerString); - Pal::Developer::UserMarkerOpInfo opInfo; + Pal::Developer::UserMarkerOpInfo opInfo = {}; opInfo.opType = static_cast(Pal::Developer::UserMarkerOpType::Push); opInfo.strIndex = static_cast(m_userMarkerStrings.size()); m_userMarkerOpHistory.PushBack(opInfo.u32All); @@ -1158,7 +1158,7 @@ void SqttCmdBufferState::DebugLabelBegin( // ===================================================================================================================== void SqttCmdBufferState::DebugLabelEnd() { - Pal::Developer::UserMarkerOpInfo opInfo; + Pal::Developer::UserMarkerOpInfo opInfo = {}; opInfo.opType = static_cast(Pal::Developer::UserMarkerOpType::Pop); m_userMarkerOpHistory.PushBack(opInfo.u32All); @@ -1477,6 +1477,50 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirectCountKHR( pSqtt->EndEntryPoint(); } +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndirectCount( + VkCommandBuffer cmdBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countOffset, + uint32_t maxDrawCount, + uint32_t stride) +{ + SQTT_SETUP(); + + pSqtt->BeginEntryPoint(RgpSqttMarkerGeneralApiType::CmdDrawIndirectCount); + pSqtt->BeginEventMarkers(RgpSqttMarkerEventType::CmdDrawIndirectCount); + + SQTT_CALL_NEXT_LAYER(vkCmdDrawIndirectCount)(cmdBuffer, buffer, offset, countBuffer, countOffset, maxDrawCount, + stride); + + pSqtt->EndEventMarkers(); + pSqtt->EndEntryPoint(); +} + +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirectCount( + VkCommandBuffer cmdBuffer, + VkBuffer buffer, + VkDeviceSize offset, + VkBuffer countBuffer, + VkDeviceSize countOffset, + uint32_t maxDrawCount, + uint32_t stride) +{ + SQTT_SETUP(); + + pSqtt->BeginEntryPoint(RgpSqttMarkerGeneralApiType::CmdDrawIndexedIndirectCount); + pSqtt->BeginEventMarkers(RgpSqttMarkerEventType::CmdDrawIndexedIndirectCount); + + SQTT_CALL_NEXT_LAYER(vkCmdDrawIndexedIndirectCount)(cmdBuffer, buffer, offset, countBuffer, countOffset, + maxDrawCount, stride); + + pSqtt->EndEventMarkers(); + pSqtt->EndEntryPoint(); +} + // ===================================================================================================================== VKAPI_ATTR void VKAPI_CALL vkCmdDrawMeshTasksEXT( VkCommandBuffer cmdBuffer, @@ -2924,6 +2968,8 @@ void SqttOverrideDispatchTable( SQTT_OVERRIDE_ENTRY(vkCmdDrawIndexedIndirectCountAMD); SQTT_OVERRIDE_ENTRY(vkCmdDrawIndirectCountKHR); SQTT_OVERRIDE_ENTRY(vkCmdDrawIndexedIndirectCountKHR); + SQTT_OVERRIDE_ENTRY(vkCmdDrawIndirectCount); + SQTT_OVERRIDE_ENTRY(vkCmdDrawIndexedIndirectCount); SQTT_OVERRIDE_ENTRY(vkCmdDrawMeshTasksEXT); SQTT_OVERRIDE_ENTRY(vkCmdDrawMeshTasksIndirectCountEXT); SQTT_OVERRIDE_ENTRY(vkCmdDrawMeshTasksIndirectEXT); diff --git a/icd/api/sqtt/sqtt_rgp_annotations.h b/icd/api/sqtt/sqtt_rgp_annotations.h index dcc1d146..0c3cf3a2 100644 --- a/icd/api/sqtt/sqtt_rgp_annotations.h +++ b/icd/api/sqtt/sqtt_rgp_annotations.h @@ -217,6 +217,8 @@ enum class RgpSqttMarkerEventType : uint32_t CmdDrawMeshTasksEXT = 41, // vkCmdDrawMeshTasksEXT CmdDrawMeshTasksIndirectCountEXT = 42, // vkCmdDrawMeshTasksIndirectCountEXT CmdDrawMeshTasksIndirectEXT = 43, // vkCmdDrawMeshTasksIndirectEXT + CmdDrawIndirectCount = 44, // vkCmdDrawIndirectCount + CmdDrawIndexedIndirectCount = 45, // vkCmdDrawIndexedIndirectCount #if VKI_RAY_TRACING ShaderIndirectModeMask = 0x800000, // Used to mark whether the shader is compiled in indirect mode or not // This mask can only be used with CmdTraceRaysKHR and CmdTraceRaysIndirectKHR @@ -509,6 +511,8 @@ enum class RgpSqttMarkerGeneralApiType : uint32_t CmdDrawMeshTasksEXT = 47, CmdDrawMeshTasksIndirectCountEXT = 48, CmdDrawMeshTasksIndirectEXT = 49, + CmdDrawIndirectCount = 50, + CmdDrawIndexedIndirectCount = 51, Invalid = 0xffffffff }; diff --git a/icd/api/strings/entry_points.txt b/icd/api/strings/entry_points.txt index 92260482..75362511 100644 --- a/icd/api/strings/entry_points.txt +++ b/icd/api/strings/entry_points.txt @@ -583,5 +583,11 @@ vkCmdBindDescriptorBufferEmbeddedSamplers2EXT @device @dext(KHR_main vkCmdSetRenderingAttachmentLocationsKHR @device @dext(KHR_dynamic_rendering_local_read) vkCmdSetRenderingInputAttachmentIndicesKHR @device @dext(KHR_dynamic_rendering_local_read) +vkCreatePipelineBinariesKHR @device @dext(KHR_pipeline_binary) +vkDestroyPipelineBinaryKHR @device @dext(KHR_pipeline_binary) +vkGetPipelineKeyKHR @device @dext(KHR_pipeline_binary) +vkGetPipelineBinaryDataKHR @device @dext(KHR_pipeline_binary) +vkReleaseCapturedPipelineDataKHR @device @dext(KHR_pipeline_binary) + vkCmdSetDepthBias2EXT @device @dext(EXT_depth_bias_control) diff --git a/icd/api/strings/extensions.txt b/icd/api/strings/extensions.txt index ef9fdbe4..ed839be3 100644 --- a/icd/api/strings/extensions.txt +++ b/icd/api/strings/extensions.txt @@ -196,6 +196,7 @@ VK_EXT_image_2d_view_of_3d VK_EXT_depth_clamp_zero_one VK_EXT_primitives_generated_query VK_EXT_non_seamless_cube_map +VK_KHR_pipeline_binary VK_EXT_image_sliced_view_of_3d VK_KHR_shader_maximal_reconvergence VK_EXT_shader_module_identifier diff --git a/icd/api/vk_buffer.cpp b/icd/api/vk_buffer.cpp index 0d35fa27..5c314096 100644 --- a/icd/api/vk_buffer.cpp +++ b/icd/api/vk_buffer.cpp @@ -127,7 +127,16 @@ VkResult Buffer::Create( VK_ASSERT(pDevice->VkPhysicalDevice(DefaultDeviceIndex)->GetPrtFeatures() & Pal::PrtFeatureBuffer); } - if ((pCreateInfo->flags & VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT) != 0) + // Use the descriptor table VA range for descriptor buffers because we need to program descriptors + // with a single (32-bit) user data entry and there is no such guarentee with the default VA range. + if ((Device::GetBufferUsageFlagBits(pCreateInfo) & + (VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_PUSH_DESCRIPTORS_DESCRIPTOR_BUFFER_BIT_EXT)) != 0) + { + gpuMemoryCreateInfo.vaRange = Pal::VaRange::DescriptorTable; + } + else if ((pCreateInfo->flags & VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT) != 0) { gpuMemoryCreateInfo.vaRange = Pal::VaRange::CaptureReplay; } diff --git a/icd/api/vk_buffer_view.cpp b/icd/api/vk_buffer_view.cpp index f1196a96..ab2e2f62 100644 --- a/icd/api/vk_buffer_view.cpp +++ b/icd/api/vk_buffer_view.cpp @@ -46,9 +46,11 @@ VkResult BufferView::Create( // Allocate memory for the buffer view const size_t apiSize = sizeof(BufferView); - const size_t bufferSrdSize = - pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().gfxipProperties.srdSizes.bufferView; - size_t srdSize = bufferSrdSize; + const size_t srdSize = (pCreateInfo->format == VK_FORMAT_UNDEFINED) ? + pDevice->VkPhysicalDevice(DefaultDeviceIndex)-> + PalProperties().gfxipProperties.srdSizes.untypedBufferView : + pDevice->VkPhysicalDevice(DefaultDeviceIndex)-> + PalProperties().gfxipProperties.srdSizes.typedBufferView; const size_t objSize = apiSize + (srdSize * pDevice->NumPalDevices()); @@ -87,7 +89,6 @@ VkResult BufferView::Create( bufferAddress, pCreateInfo->format, pDevice->NumPalDevices(), - srdSize, pSrdMemory); VK_PLACEMENT_NEW(pMemory) BufferView(pDevice, static_cast(srdSize), pSrdMemory); @@ -105,7 +106,6 @@ void BufferView::BuildSrd( const Pal::gpusize* bufferAddress, const VkFormat format, const uint32_t deviceNum, - const size_t srdSize, void* pSrdMemory) { // Build the SRD @@ -129,19 +129,20 @@ void BufferView::BuildSrd( if (format != VK_FORMAT_UNDEFINED) { + const uint32_t srdSize = + pDevice->VkPhysicalDevice(deviceIdx)->PalProperties().gfxipProperties.srdSizes.typedBufferView; pDevice->PalDevice(deviceIdx)->CreateTypedBufferViewSrds( 1, &info, Util::VoidPtrInc(pSrdMemory, srdSize * deviceIdx)); } else { - info.stride = 0; // Raw buffers have a zero byte stride + const uint32_t srdSize = + pDevice->VkPhysicalDevice(deviceIdx)->PalProperties().gfxipProperties.srdSizes.untypedBufferView; + info.stride = 0; // Raw buffers have a zero byte stride pDevice->PalDevice(deviceIdx)->CreateUntypedBufferViewSrds( 1, &info, Util::VoidPtrInc(pSrdMemory, srdSize * deviceIdx)); } - - VK_ASSERT(srdSize >= - pDevice->VkPhysicalDevice(deviceIdx)->PalProperties().gfxipProperties.srdSizes.bufferView); } } diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp index 869c08a9..c4b2c71e 100644 --- a/icd/api/vk_cmdbuffer.cpp +++ b/icd/api/vk_cmdbuffer.cpp @@ -620,7 +620,7 @@ CmdBuffer::CmdBuffer( m_optimizeCmdbufMode = settings.optimizeCmdbufMode; m_asyncComputeQueueMaxWavesPerCu = settings.asyncComputeQueueMaxWavesPerCu; -#if VK_ENABLE_DEBUG_BARRIERS +#if VKI_ENABLE_DEBUG_BARRIERS m_dbgBarrierPreCmdMask = settings.dbgBarrierPreCmdEnable; m_dbgBarrierPostCmdMask = settings.dbgBarrierPostCmdEnable; #endif @@ -1417,17 +1417,7 @@ VkResult CmdBuffer::Begin( VK_ASSERT(m_flags.is2ndLvl); pInheritanceRenderingInfo = static_cast(pNext); - - inheritedStateParams.colorTargetCount = pInheritanceRenderingInfo->colorAttachmentCount; inheritedStateParams.stateFlags.targetViewState = 1; - - for (uint32_t i = 0; i < inheritedStateParams.colorTargetCount; i++) - { - inheritedStateParams.colorTargetSwizzledFormats[i] = - VkToPalFormat(pInheritanceRenderingInfo->pColorAttachmentFormats[i], settings); - - inheritedStateParams.sampleCount[i] = pInheritanceRenderingInfo->rasterizationSamples; - } } else if (pHeader->sType == VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_LOCATION_INFO_KHR) { @@ -1473,15 +1463,7 @@ VkResult CmdBuffer::Begin( { VK_ASSERT(m_flags.is2ndLvl); - inheritedStateParams.colorTargetCount = pRenderPass->GetSubpassColorReferenceCount(currentSubPass); inheritedStateParams.stateFlags.targetViewState = 1; - - for (uint32_t i = 0; i < inheritedStateParams.colorTargetCount; i++) - { - inheritedStateParams.colorTargetSwizzledFormats[i] = - VkToPalFormat(pRenderPass->GetColorAttachmentFormat(currentSubPass, i), settings); - inheritedStateParams.sampleCount[i] = pRenderPass->GetColorAttachmentSamples(currentSubPass, i); - } } Pal::Result result = PalCmdBufferBegin(cmdInfo); @@ -2805,7 +2787,8 @@ PFN_vkCmdBindDescriptorSets CmdBuffer::GetCmdBindDescriptorSetsFunc( // ===================================================================================================================== template VKAPI_ATTR void VKAPI_CALL CmdBuffer::CmdPushDescriptorSetKHR( VkCommandBuffer commandBuffer, @@ -2817,7 +2800,8 @@ VKAPI_ATTR void VKAPI_CALL CmdBuffer::CmdPushDescriptorSetKHR( { CmdBuffer* pCmdBuffer = ApiCmdBuffer::ObjectFromHandle(commandBuffer); - pCmdBuffer->PushDescriptorSetKHR( + pCmdBuffer->PushDescriptorSetKHR + ( pipelineBindPoint, layout, set, @@ -2830,19 +2814,34 @@ template PFN_vkCmdPushDescriptorSetKHR CmdBuffer::GetCmdPushDescriptorSetKHRFunc( const Device* pDevice) { - const size_t imageDescSize = pDevice->GetProperties().descriptorSizes.imageView; - const size_t samplerDescSize = pDevice->GetProperties().descriptorSizes.sampler; - const size_t bufferDescSize = pDevice->GetProperties().descriptorSizes.bufferView; + const size_t imageDescSize = pDevice->GetProperties().descriptorSizes.imageView; + const size_t samplerDescSize = pDevice->GetProperties().descriptorSizes.sampler; + const size_t typedBufferDescSize = pDevice->GetProperties().descriptorSizes.typedBufferView; + const size_t untypedBufferDescSize = pDevice->GetProperties().descriptorSizes.untypedBufferView; PFN_vkCmdPushDescriptorSetKHR pFunc = nullptr; - if ((imageDescSize == 32) && - (samplerDescSize == 16) && - (bufferDescSize == 16)) + if ((imageDescSize == 32) && + (samplerDescSize == 16) && + (typedBufferDescSize == 16) && + (untypedBufferDescSize == 16)) + { + pFunc = &CmdPushDescriptorSetKHR< + 32, + 16, + 16, + 16, + numPalDevices>; + } + else if ((imageDescSize == 32) && + (samplerDescSize == 16) && + (typedBufferDescSize == 24) && + (untypedBufferDescSize == 16)) { pFunc = &CmdPushDescriptorSetKHR< 32, 16, + 24, 16, numPalDevices>; } @@ -4659,7 +4658,6 @@ void CmdBuffer::SetEvent2( { ExecuteAcquireRelease(1, &event, - 1, pDependencyInfo, Release, RgpBarrierExternalCmdWaitEvents); @@ -4725,6 +4723,20 @@ void CmdBuffer::LoadOpClearColor( const Pal::Rect* pDeviceGroupRenderArea, const VkRenderingInfo* pRenderingInfo) { + if (m_pSqttState != nullptr) + { + m_pSqttState->BeginRenderPassColorClear(); + } + + const ImageView* pImageViews[Pal::MaxColorTargets] = {}; + Pal::ClearColor clearColors[Pal::MaxColorTargets] = {}; + Pal::ImageLayout imageLayouts[Pal::MaxColorTargets] = {}; + Pal::SubresRange ranges[Pal::MaxColorTargets] = {}; + Pal::SwizzledFormat clearFormats[Pal::MaxColorTargets] = {}; + + uint32_t clearCount = 0; + + // Collect information on the number of clears to decide if we need to batch. for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; ++i) { const VkRenderingAttachmentInfo& attachmentInfo = pRenderingInfo->pColorAttachments[i]; @@ -4733,63 +4745,80 @@ void CmdBuffer::LoadOpClearColor( { // Get the image view from the attachment info const ImageView* const pImageView = ImageView::ObjectFromHandle(attachmentInfo.imageView); - if (pImageView != VK_NULL_HANDLE) + if (pImageView != nullptr) { - // Get the attachment image + pImageViews[clearCount] = pImageView; + const Image* pImage = pImageView->GetImage(); // Convert the clear color to the format of the attachment view - Pal::SwizzledFormat clearFormat = VkToPalFormat( + clearFormats[clearCount] = VkToPalFormat( pImageView->GetViewFormat(), m_pDevice->GetRuntimeSettings()); - Pal::ClearColor clearColor = VkToPalClearColor( + + clearColors[clearCount] = VkToPalClearColor( attachmentInfo.clearValue.color, - clearFormat); + clearFormats[clearCount]); // Get subres range from the image view - Pal::SubresRange subresRange = {}; - pImageView->GetFrameBufferAttachmentSubresRange(&subresRange); + pImageView->GetFrameBufferAttachmentSubresRange(&ranges[clearCount]); // Override the number of slices with layerCount from pBeginRendering - subresRange.numSlices = pRenderingInfo->layerCount; - - const auto clearSubresRanges = LoadOpClearSubresRanges( - pRenderingInfo->viewMask, - subresRange); + ranges[clearCount].numSlices = pRenderingInfo->layerCount; // Clear Layout - const Pal::ImageLayout clearLayout = pImage->GetBarrierPolicy().GetAspectLayout( + imageLayouts[clearCount] = pImage->GetBarrierPolicy().GetAspectLayout( attachmentInfo.imageLayout, - subresRange.startSubres.plane, + ranges[clearCount].startSubres.plane, GetQueueFamilyIndex(), pImage->GetFormat()); - utils::IterateMask deviceGroup(GetDeviceMask()); - - do - { - const uint32_t deviceIdx = deviceGroup.Index(); - - // Clear Box - Pal::Box clearBox = BuildClearBox( - pDeviceGroupRenderArea[deviceIdx], - *pImageView); - - PalCmdBuffer(deviceIdx)->CmdClearColorImage( - *pImage->PalImage(deviceIdx), - clearLayout, - clearColor, - clearFormat, - clearSubresRanges.NumElements(), - clearSubresRanges.Data(), - 1, - &clearBox, - Pal::ColorClearAutoSync); - } - while (deviceGroup.IterateNext()); + clearCount++; } } } + + if (clearCount > 1) + { + BatchedLoadOpClears(clearCount, + pImageViews, + clearColors, + imageLayouts, + ranges, + clearFormats, + pRenderingInfo->viewMask); + } + else if (clearCount == 1) + { + VK_ASSERT(pImageViews[0] != nullptr); + const auto clearSubresRanges = LoadOpClearSubresRanges(pRenderingInfo->viewMask, ranges[0]); + + utils::IterateMask deviceGroup(GetDeviceMask()); + + do + { + const uint32_t deviceIdx = deviceGroup.Index(); + + // Clear Box + Pal::Box clearBox = BuildClearBox(pDeviceGroupRenderArea[deviceIdx], *(pImageViews[0])); + + PalCmdBuffer(deviceIdx)->CmdClearColorImage( + *(pImageViews[0]->GetImage()->PalImage(deviceIdx)), + imageLayouts[0], + clearColors[0], + clearFormats[0], + clearSubresRanges.NumElements(), + clearSubresRanges.Data(), + 1, + &clearBox, + Pal::ColorClearAutoSync); + } while (deviceGroup.IterateNext()); + } + + if (m_pSqttState != nullptr) + { + m_pSqttState->EndRenderPassColorClear(); + } } // ===================================================================================================================== @@ -4798,6 +4827,11 @@ void CmdBuffer::LoadOpClearDepthStencil( const Pal::Rect* pDeviceGroupRenderArea, const VkRenderingInfo* pRenderingInfo) { + if (m_pSqttState != nullptr) + { + m_pSqttState->BeginRenderPassDepthStencilClear(); + } + // Note that no allocation will be performed, so Util::Vector allocator is nullptr. Util::Vector clearSubresRanges{ nullptr }; @@ -4886,6 +4920,11 @@ void CmdBuffer::LoadOpClearDepthStencil( } while (deviceGroup.IterateNext()); } + + if (m_pSqttState != nullptr) + { + m_pSqttState->EndRenderPassDepthStencilClear(); + } } // ===================================================================================================================== @@ -5062,7 +5101,7 @@ void CmdBuffer::BeginRendering( } while (deviceGroup.IterateNext()); - if (!skipClears) + if (skipClears == false) { PalCmdSuspendPredication(true); @@ -5112,6 +5151,11 @@ void CmdBuffer::ResolveImage( VkImageAspectFlags aspectMask, const DynamicRenderingAttachments& dynamicRenderingAttachments) { + if (m_pSqttState != nullptr) + { + m_pSqttState->BeginRenderPassResolve(); + } + Pal::ImageResolveRegion regions[MaxPalDevices] = {}; for (uint32_t idx = 0; idx < m_allGpuState.dynamicRenderingInstance.renderAreaCount; idx++) @@ -5186,27 +5230,58 @@ void CmdBuffer::ResolveImage( m_allGpuState.dynamicRenderingInstance.renderAreaCount, regions, m_curDeviceMask); + + if (m_pSqttState != nullptr) + { + m_pSqttState->EndRenderPassResolve(); + } } // ===================================================================================================================== // For Dynamic Rendering we need to wait for draws to finish before we do resolves. void CmdBuffer::PostDrawPreResolveSync() { - Pal::BarrierInfo barrierInfo = {}; - barrierInfo.waitPoint = Pal::HwPipePreCs; + if (m_flags.useReleaseAcquire) + { + Pal::AcquireReleaseInfo barrierInfo = + { + .srcGlobalStageMask = Pal::PipelineStageColorTarget | Pal::PipelineStageDsTarget, + .dstGlobalStageMask = Pal::PipelineStageBlt, + .srcGlobalAccessMask = Pal::CoherColorTarget | Pal::CoherDepthStencilTarget, + .dstGlobalAccessMask = Pal::CoherResolveSrc, + .memoryBarrierCount = 0, + .pMemoryBarriers = nullptr, + .imageBarrierCount = 0, + .pImageBarriers = nullptr, + .reason = RgpBarrierExternalRenderPassSync + }; - const Pal::HwPipePoint pipePoint = Pal::HwPipePostPs; - barrierInfo.pipePointWaitCount = 1; - barrierInfo.pPipePoints = &pipePoint; + PalCmdReleaseThenAcquire( + &barrierInfo, + nullptr, + nullptr, + nullptr, + nullptr, + m_curDeviceMask); + } + else + { + Pal::BarrierInfo barrierInfo = {}; + barrierInfo.waitPoint = Pal::HwPipePreCs; - Pal::BarrierTransition transition = {}; - transition.srcCacheMask = Pal::CoherColorTarget | Pal::CoherDepthStencilTarget; - transition.dstCacheMask = Pal::CoherShader; + const Pal::HwPipePoint pipePoint = Pal::HwPipePostPs; + barrierInfo.pipePointWaitCount = 1; + barrierInfo.pPipePoints = &pipePoint; - barrierInfo.transitionCount = 1; - barrierInfo.pTransitions = &transition; + Pal::BarrierTransition transition = {}; + transition.srcCacheMask = Pal::CoherColorTarget | Pal::CoherDepthStencilTarget; + transition.dstCacheMask = Pal::CoherShader; - PalCmdBarrier(barrierInfo, m_curDeviceMask); + barrierInfo.transitionCount = 1; + barrierInfo.pTransitions = &transition; + + PalCmdBarrier(barrierInfo, m_curDeviceMask); + } } // ===================================================================================================================== @@ -5682,7 +5757,6 @@ void CmdBuffer::WaitEvents2( ExecuteAcquireRelease(eventRangeCount, pEvents + i, - eventRangeCount, pDependencyInfos + i, Acquire, RgpBarrierExternalCmdWaitEvents); @@ -5835,13 +5909,14 @@ void CmdBuffer::WaitEventsSync2ToSync1( // ===================================================================================================================== // Based on Dependency Info, execute Acquire or Release according to the mode. void CmdBuffer::ExecuteAcquireRelease( - uint32_t eventCount, - const VkEvent* pEvents, uint32_t dependencyCount, + const VkEvent* pEvents, const VkDependencyInfoKHR* pDependencyInfos, AcquireReleaseMode acquireReleaseMode, uint32_t rgpBarrierReasonType) { + VK_ASSERT((acquireReleaseMode == ReleaseThenAcquire) || (pEvents != nullptr)); + const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); uint32_t barrierCount = 0; @@ -5858,7 +5933,7 @@ void CmdBuffer::ExecuteAcquireRelease( maxImageMemoryBarriers = Util::Max(pDependencyInfos[i].imageMemoryBarrierCount, maxImageMemoryBarriers); } - if ((eventCount > 0) || (barrierCount > 0)) + if ((pEvents != nullptr) || (barrierCount > 0)) { VirtualStackFrame virtStackFrame(m_pStackAllocator); @@ -6139,8 +6214,7 @@ void CmdBuffer::ExecuteAcquireRelease( PalCmdRelease( &acquireReleaseInfo, - eventCount, - pEvents, + pEvents[j], pPalBufferMemoryBarriers, ppBuffers, pPalImageBarriers, @@ -6166,8 +6240,7 @@ void CmdBuffer::ExecuteAcquireRelease( PalCmdAcquire( &acquireReleaseInfo, - eventCount, - pEvents, + pEvents[j], pPalBufferMemoryBarriers, ppBuffers, pPalImageBarriers, @@ -6574,9 +6647,8 @@ void CmdBuffer::PipelineBarrier2( if (m_flags.useReleaseAcquire) { - ExecuteAcquireRelease(0, + ExecuteAcquireRelease(1, nullptr, - 1, pDependencyInfo, ReleaseThenAcquire, RgpBarrierExternalCmdPipelineBarrier); @@ -7286,8 +7358,7 @@ void CmdBuffer::PalCmdReleaseThenAcquire( // ===================================================================================================================== void CmdBuffer::PalCmdAcquire( Pal::AcquireReleaseInfo* pAcquireReleaseInfo, - uint32_t eventCount, - const VkEvent* pEvents, + const VkEvent event, Pal::MemBarrier* const pBufferBarriers, const Buffer** const ppBuffers, Pal::ImgBarrier* const pImageBarriers, @@ -7300,7 +7371,7 @@ void CmdBuffer::PalCmdAcquire( // in the header, but temporarily you may use the generic "unknown" reason so as not to block you. VK_ASSERT(pAcquireReleaseInfo->reason != 0); - Event* pEvent = Event::ObjectFromHandle(pEvents[0]); + Event* pEvent = Event::ObjectFromHandle(event); utils::IterateMask deviceGroup(deviceMask); do @@ -7314,53 +7385,23 @@ void CmdBuffer::PalCmdAcquire( pImageBarriers[i].pImage = ppImages[i]->PalImage(deviceIdx); } } - pAcquireReleaseInfo->pImageBarriers = pImageBarriers; + pAcquireReleaseInfo->pImageBarriers = pImageBarriers; pAcquireReleaseInfo->pMemoryBarriers = pBufferBarriers; if (pEvent->IsUseToken()) { - // Allocate space to store sync token values (automatically rewound on unscope) - Pal::ReleaseToken* pSyncTokens = eventCount > 0 ? - pVirtStackFrame->AllocArray(eventCount) : nullptr; + Pal::ReleaseToken syncToken = {}; - if (pSyncTokens != nullptr) - { - for (uint32_t i = 0; i < eventCount; ++i) - { - pSyncTokens[i] = Event::ObjectFromHandle(pEvents[i])->GetSyncToken(); - } - - PalCmdBuffer(deviceIdx)->CmdAcquire(*pAcquireReleaseInfo, eventCount, pSyncTokens); - - pVirtStackFrame->FreeArray(pSyncTokens); - } - else - { - m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; - } + syncToken = pEvent->GetSyncToken(); + PalCmdBuffer(deviceIdx)->CmdAcquire(*pAcquireReleaseInfo, 1u, &syncToken); } else { - // Allocate space to store signaled event pointers (automatically rewound on unscope) - const Pal::IGpuEvent** ppGpuEvents = eventCount > 0 ? - pVirtStackFrame->AllocArray(eventCount) : nullptr; - - if (ppGpuEvents != nullptr) - { - for (uint32_t i = 0; i < eventCount; ++i) - { - ppGpuEvents[i] = Event::ObjectFromHandle(pEvents[i])->PalEvent(deviceIdx); - } + const Pal::IGpuEvent* pGpuEvent = {}; - PalCmdBuffer(deviceIdx)->CmdAcquireEvent(*pAcquireReleaseInfo, eventCount, ppGpuEvents); - - pVirtStackFrame->FreeArray(ppGpuEvents); - } - else - { - m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; - } + pGpuEvent = pEvent->PalEvent(deviceIdx); + PalCmdBuffer(deviceIdx)->CmdAcquireEvent(*pAcquireReleaseInfo, 1u, &pGpuEvent); } } while (deviceGroup.IterateNext()); @@ -7369,8 +7410,7 @@ void CmdBuffer::PalCmdAcquire( // ===================================================================================================================== void CmdBuffer::PalCmdRelease( Pal::AcquireReleaseInfo* pAcquireReleaseInfo, - uint32_t eventCount, - const VkEvent* pEvents, + const VkEvent event, Pal::MemBarrier* const pBufferBarriers, const Buffer** const ppBuffers, Pal::ImgBarrier* const pImageBarriers, @@ -7382,9 +7422,7 @@ void CmdBuffer::PalCmdRelease( // in the header, but temporarily you may use the generic "unknown" reason so as not to block you. VK_ASSERT(pAcquireReleaseInfo->reason != 0); - VK_ASSERT(eventCount == 1); - - Event* pEvent = Event::ObjectFromHandle(*pEvents); + Event* pEvent = Event::ObjectFromHandle(event); utils::IterateMask deviceGroup(deviceMask); do @@ -7398,8 +7436,8 @@ void CmdBuffer::PalCmdRelease( pImageBarriers[i].pImage = ppImages[i]->PalImage(deviceIdx); } } - pAcquireReleaseInfo->pImageBarriers = pImageBarriers; + pAcquireReleaseInfo->pImageBarriers = pImageBarriers; pAcquireReleaseInfo->pMemoryBarriers = pBufferBarriers; if (pEvent->IsUseToken()) @@ -7814,7 +7852,6 @@ void CmdBuffer::BeginRenderPass( while (deviceGroup.IterateNext()); RPBeginSubpass(); - } else { @@ -7834,7 +7871,6 @@ void CmdBuffer::NextSubPass( if (m_renderPassInstance.subpass != VK_SUBPASS_EXTERNAL) { - // End the previous subpass RPEndSubpass(); @@ -7843,7 +7879,6 @@ void CmdBuffer::NextSubPass( // Begin the next subpass RPBeginSubpass(); - } DbgBarrierPostCmd(DbgBarrierNextSubpass); @@ -8074,7 +8109,6 @@ void CmdBuffer::RPBeginSubpass() // Set view instance mask, on devices in render pass instance's device mask SetViewInstanceMask(GetRpDeviceMask()); - } // ===================================================================================================================== @@ -8296,6 +8330,19 @@ void CmdBuffer::RPSyncPoint( Pal::BarrierTransition imageTransition = { }; + // Remove depth stencil related stage/access masks for color attachment transitions and remove color + // target related stage/access mask for depth stencils. + const uint32_t excludeStageMask = + attachment.pImage->IsColorFormat() ? (~Pal::PipelineStageDsTarget) : + (attachment.pImage->IsDepthStencilFormat() ? + (~Pal::PipelineStageColorTarget) : + (Pal::PipelineStageAllStages)); + const uint32_t excludeAccessMask = + attachment.pImage->IsColorFormat() ? (~Pal::CoherDepthStencilTarget) : + (attachment.pImage->IsDepthStencilFormat() ? + (~Pal::CoherColorTarget) : + (Pal::CoherAllUsages)); + for (uint32_t sr = 0; sr < attachment.subresRangeCount; ++sr) { const uint32_t plane = attachment.subresRange[sr].startSubres.plane; @@ -8325,10 +8372,14 @@ void CmdBuffer::RPSyncPoint( ppImages[acquireReleaseInfo.imageBarrierCount] = attachment.pImage; - pPalTransitions[acquireReleaseInfo.imageBarrierCount].srcStageMask = srcStageMask; - pPalTransitions[acquireReleaseInfo.imageBarrierCount].dstStageMask = dstStageMask; - pPalTransitions[acquireReleaseInfo.imageBarrierCount].srcAccessMask = srcAccessMask; - pPalTransitions[acquireReleaseInfo.imageBarrierCount].dstAccessMask = dstAccessMask; + pPalTransitions[acquireReleaseInfo.imageBarrierCount].srcStageMask = srcStageMask & + excludeStageMask; + pPalTransitions[acquireReleaseInfo.imageBarrierCount].dstStageMask = dstStageMask & + excludeStageMask; + pPalTransitions[acquireReleaseInfo.imageBarrierCount].srcAccessMask = srcAccessMask & + excludeAccessMask; + pPalTransitions[acquireReleaseInfo.imageBarrierCount].dstAccessMask = dstAccessMask & + excludeAccessMask; // We set the pImage to nullptr by default here. But, this will be computed correctly later for // each device including DefaultDeviceIndex based on the deviceId. pPalTransitions[acquireReleaseInfo.imageBarrierCount].pImage = nullptr; @@ -8336,6 +8387,9 @@ void CmdBuffer::RPSyncPoint( pPalTransitions[acquireReleaseInfo.imageBarrierCount].newLayout = newLayout; pPalTransitions[acquireReleaseInfo.imageBarrierCount].subresRange = attachment.subresRange[sr]; + VK_ASSERT((pPalTransitions[acquireReleaseInfo.imageBarrierCount].srcStageMask != 0) && + (pPalTransitions[acquireReleaseInfo.imageBarrierCount].dstStageMask != 0)); + const Pal::MsaaQuadSamplePattern* pQuadSamplePattern = nullptr; if (attachment.pImage->IsSampleLocationsCompatibleDepth() && @@ -8695,12 +8749,6 @@ void CmdBuffer::RPResolveAttachments( uint32_t count, const RPResolveInfo* pResolves) { - // Notify SQTT annotator that we are doing a render pass resolve operation - if (m_pSqttState != nullptr) - { - m_pSqttState->BeginRenderPassResolve(); - } - for (uint32_t i = 0; i < count; ++i) { const RPResolveInfo& params = pResolves[i]; @@ -8709,11 +8757,6 @@ void CmdBuffer::RPResolveAttachments( RPResolveMsaa(params); } } - - if (m_pSqttState != nullptr) - { - m_pSqttState->EndRenderPassResolve(); - } } // ===================================================================================================================== @@ -8721,6 +8764,11 @@ void CmdBuffer::RPResolveAttachments( void CmdBuffer::RPResolveMsaa( const RPResolveInfo& params) { + if (m_pSqttState != nullptr) + { + m_pSqttState->BeginRenderPassResolve(); + } + const Framebuffer::Attachment& srcAttachment = m_allGpuState.pFramebuffer->GetAttachment(params.src.attachment); const Framebuffer::Attachment& dstAttachment = @@ -8849,6 +8897,11 @@ void CmdBuffer::RPResolveMsaa( regions, GetRpDeviceMask()); } + + if (m_pSqttState != nullptr) + { + m_pSqttState->EndRenderPassResolve(); + } } // ===================================================================================================================== @@ -9307,7 +9360,8 @@ VkDescriptorSet CmdBuffer::InitPushDescriptorSet( // ===================================================================================================================== template void CmdBuffer::PushDescriptorSetKHR( VkPipelineBindPoint pipelineBindPoint, @@ -9430,7 +9484,7 @@ void CmdBuffer::PushDescriptorSetKHR( break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - DescriptorUpdate::WriteBufferDescriptors( + DescriptorUpdate::WriteBufferDescriptors( params.pTexelBufferView, deviceIdx, pDestAddr, @@ -9439,7 +9493,7 @@ void CmdBuffer::PushDescriptorSetKHR( break; case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - DescriptorUpdate::WriteBufferDescriptors( + DescriptorUpdate::WriteBufferDescriptors( params.pTexelBufferView, deviceIdx, pDestAddr, @@ -9448,7 +9502,7 @@ void CmdBuffer::PushDescriptorSetKHR( break; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - DescriptorUpdate::WriteBufferInfoDescriptors( + DescriptorUpdate::WriteBufferInfoDescriptors( m_pDevice, params.pBufferInfo, deviceIdx, @@ -9458,7 +9512,7 @@ void CmdBuffer::PushDescriptorSetKHR( break; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - DescriptorUpdate::WriteBufferInfoDescriptors( + DescriptorUpdate::WriteBufferInfoDescriptors( m_pDevice, params.pBufferInfo, deviceIdx, @@ -10123,7 +10177,7 @@ void CmdBuffer::SetRenderingInputAttachmentIndices( } -#if VK_ENABLE_DEBUG_BARRIERS +#if VKI_ENABLE_DEBUG_BARRIERS // ===================================================================================================================== // This function inserts a command before or after a particular Vulkan command if the given runtime settings are asking // for it. @@ -10949,7 +11003,7 @@ void CmdBuffer::GetRayTracingDispatchArgs( memcpy(pConstants->descriptorTable.accelStructTrackerSrd, m_pDevice->RayTrace()->GetAccelStructTrackerSrd(deviceIdx), - m_pDevice->GetProperties().descriptorSizes.bufferView); + m_pDevice->GetProperties().descriptorSizes.untypedBufferView); if (pPipeline->CheckIsCps()) { @@ -11093,6 +11147,8 @@ void CmdBuffer::TraceRaysIndirect( const VkStridedDeviceAddressRegionKHR& callableShaderBindingTable, VkDeviceAddress indirectDeviceAddress) { + DbgBarrierPreCmd(DbgTraceRays); + utils::IterateMask deviceGroup(m_curDeviceMask); do @@ -11110,6 +11166,8 @@ void CmdBuffer::TraceRaysIndirect( GetUserMarkerContextValue()); } while (deviceGroup.IterateNext()); + + DbgBarrierPostCmd(DbgTraceRays); } // ===================================================================================================================== @@ -11162,12 +11220,8 @@ void CmdBuffer::TraceRaysDispatchPerDevice( uint32_t depth) { const RayTracingPipeline* pPipeline = pCmdBuffer->m_allGpuState.pRayTracingPipeline; - uint32_t dispatchSizeX = 0; - uint32_t dispatchSizeY = 0; - uint32_t dispatchSizeZ = 0; - - pPipeline->GetDispatchSize(&dispatchSizeX, &dispatchSizeY, &dispatchSizeZ, width, height, depth); - pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdDispatch({ dispatchSizeX, dispatchSizeY, dispatchSizeZ }); + const Pal::DispatchDims dispatchSize = pPipeline->GetDispatchSize({ .x = width, .y = height, .z = depth }); + pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdDispatch(dispatchSize); } // ===================================================================================================================== @@ -11181,8 +11235,6 @@ void CmdBuffer::TraceRaysIndirectPerDevice( VkDeviceAddress indirectDeviceAddress, uint64_t userMarkerContext) { - DbgBarrierPreCmd(DbgTraceRays); - const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); const RayTracingPipeline* pPipeline = m_allGpuState.pRayTracingPipeline; @@ -11201,7 +11253,8 @@ void CmdBuffer::TraceRaysIndirectPerDevice( // Pre-pass gpusize initConstantsVa = 0; - const gpusize scratchBufferSize = sizeof(VkTraceRaysIndirectCommandKHR); + const gpusize scratchBufferSize = + sizeof(VkTraceRaysIndirectCommandKHR); InternalMemory* pScratchMemory = nullptr; VkResult result = GetRayTracingIndirectMemory(scratchBufferSize, &pScratchMemory); @@ -11213,6 +11266,7 @@ void CmdBuffer::TraceRaysIndirectPerDevice( 2, &initConstantsVa)); + memset(pInitConstants, 0, sizeof(GpuRt::InitExecuteIndirectConstants)); pInitConstants->maxIterations = m_pDevice->RayTrace()->GetProfileMaxIterations(); pInitConstants->profileRayFlags = m_pDevice->RayTrace()->GetProfileRayFlags(); @@ -11240,6 +11294,12 @@ void CmdBuffer::TraceRaysIndirectPerDevice( pInitConstants->rtThreadGroupSizeZ = 1; } + pInitConstants->bindingArgsSize = + 0; + + pInitConstants->inputBytesPerDispatch = 0; + pInitConstants->outputBytesPerDispatch = 0; + GpuRt::InitExecuteIndirectUserData initUserData = {}; initUserData.constantsVa = initConstantsVa; @@ -11299,9 +11359,9 @@ void CmdBuffer::TraceRaysIndirectPerDevice( 1, &constGpuAddrLow); - PalCmdBuffer(deviceIdx)->CmdDispatchIndirect(pScratchMemory->GpuVirtAddr(deviceIdx)); - - DbgBarrierPostCmd(DbgTraceRays); + { + PalCmdBuffer(deviceIdx)->CmdDispatchIndirect(pScratchMemory->GpuVirtAddr(deviceIdx)); + } } // ===================================================================================================================== @@ -11453,7 +11513,7 @@ void CmdBuffer::BindRayQueryConstants( { memcpy(constants.descriptorTable.accelStructTrackerSrd, VkDevice()->RayTrace()->GetAccelStructTrackerSrd(deviceIdx), - VkDevice()->GetProperties().descriptorSizes.bufferView); + VkDevice()->GetProperties().descriptorSizes.untypedBufferView); } if (rtCountersEnabled) @@ -11705,6 +11765,98 @@ void CmdBuffer::BindDescriptorBufferEmbeddedSamplers( } } +// ===================================================================================================================== +// Batch LoadOp clears on multiple color attachments instead of using PAL's ColorClearAutoSync, this will reduce the +// amount of barriers from 2 per clear to 2 for the entire batch. This is currently only used for Dynamic Rendering +// as the renderpass code has it's own version of this. +void CmdBuffer::BatchedLoadOpClears( + uint32_t clearCount, + const ImageView** pImageViews, + const Pal::ClearColor* pClearColors, + const Pal::ImageLayout* pClearLayouts, + const Pal::SubresRange* pRanges, + const Pal::SwizzledFormat* pClearFormats, + uint32_t viewMask) +{ + VK_ASSERT_MSG(clearCount > 1, "Pal::ColorClearAutoSync is recommended for single clears"); + + Pal::ImgBarrier imageBarriers[Pal::MaxColorTargets] = {}; + const Image* images[Pal::MaxColorTargets] = {}; + + for (uint32_t i = 0; i < clearCount; i++) + { + Pal::ImgBarrier* pPreSyncBarrier = &imageBarriers[i]; + + pPreSyncBarrier->srcStageMask = Pal::PipelineStageColorTarget; + pPreSyncBarrier->dstStageMask = Pal::PipelineStageBlt; + pPreSyncBarrier->srcAccessMask = Pal::CoherColorTarget; + pPreSyncBarrier->dstAccessMask = Pal::CoherClear; + pPreSyncBarrier->oldLayout = pClearLayouts[i]; + pPreSyncBarrier->newLayout = pClearLayouts[i]; + + pImageViews[i]->GetFrameBufferAttachmentSubresRange(&pPreSyncBarrier->subresRange); + + // This is filled out later in PalCmdReleaseThenAcquire() + pPreSyncBarrier->pImage = nullptr; + + images[i] = pImageViews[i]->GetImage(); + } + + // Issue the pre sync barrier + Pal::AcquireReleaseInfo acqRelInfo = {}; + + acqRelInfo.reason = Pal::Developer::BarrierReason::BarrierReasonPreSyncClear; + acqRelInfo.imageBarrierCount = clearCount; + acqRelInfo.pImageBarriers = imageBarriers; + + PalCmdReleaseThenAcquire(&acqRelInfo, nullptr, nullptr, imageBarriers, images, m_curDeviceMask); + + // Issue the actual clear + for (uint32_t i = 0; i < clearCount; i++) + { + //Modify the barriers for postSync clear + Pal::ImgBarrier* pPostSyncBarrier = &imageBarriers[i]; + + pPostSyncBarrier->srcStageMask = Pal::PipelineStageBlt; + pPostSyncBarrier->dstStageMask = Pal::PipelineStageColorTarget; + pPostSyncBarrier->srcAccessMask = Pal::CoherClear; + pPostSyncBarrier->dstAccessMask = Pal::CoherColorTarget; + + const auto clearSubresRanges = LoadOpClearSubresRanges(viewMask, pRanges[i]); + + utils::IterateMask deviceGroup(GetDeviceMask()); + + do + { + const uint32_t deviceIdx = deviceGroup.Index(); + + // Clear Box + Pal::Box clearBox = BuildClearBox( + m_allGpuState.dynamicRenderingInstance.renderArea[deviceIdx], + *(pImageViews[i])); + + PalCmdBuffer(deviceIdx)->CmdClearColorImage( + *(images[i]->PalImage(deviceIdx)), + pClearLayouts[i], + pClearColors[i], + pClearFormats[i], + clearSubresRanges.NumElements(), + clearSubresRanges.Data(), + 1, + &clearBox, + 0); + } + while (deviceGroup.IterateNext()); + } + + //Issue the post sync barrier + acqRelInfo.reason = Pal::Developer::BarrierReason::BarrierReasonPostSyncClear; + acqRelInfo.imageBarrierCount = clearCount; + acqRelInfo.pImageBarriers = imageBarriers; + + PalCmdReleaseThenAcquire(&acqRelInfo, nullptr, nullptr, imageBarriers, images, m_curDeviceMask); +} + // ===================================================================================================================== void CmdBuffer::ValidateGraphicsStates() { @@ -12811,7 +12963,8 @@ void CmdBuffer::PushConstants2KHR( // ===================================================================================================================== template VKAPI_ATTR void VKAPI_CALL CmdBuffer::CmdPushDescriptorSet2KHR( VkCommandBuffer commandBuffer, @@ -12819,21 +12972,24 @@ VKAPI_ATTR void VKAPI_CALL CmdBuffer::CmdPushDescriptorSet2KHR( { CmdBuffer* pCmdBuffer = ApiCmdBuffer::ObjectFromHandle(commandBuffer); - pCmdBuffer->PushDescriptorSet2KHR( + pCmdBuffer->PushDescriptorSet2KHR + ( pPushDescriptorSetInfo); } // ===================================================================================================================== template + size_t samplerDescSize, + size_t typedBufferDescSize, + size_t untypedBufferDescSize, + uint32_t numPalDevices> void CmdBuffer::PushDescriptorSet2KHR( const VkPushDescriptorSetInfoKHR* pPushDescriptorSetInfo) { if ((pPushDescriptorSetInfo->stageFlags & ShaderStageAllGraphics) != 0) { - PushDescriptorSetKHR( + PushDescriptorSetKHR + ( VK_PIPELINE_BIND_POINT_GRAPHICS, pPushDescriptorSetInfo->layout, pPushDescriptorSetInfo->set, @@ -12843,7 +12999,8 @@ void CmdBuffer::PushDescriptorSet2KHR( if ((pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) != 0) { - PushDescriptorSetKHR( + PushDescriptorSetKHR + ( VK_PIPELINE_BIND_POINT_COMPUTE, pPushDescriptorSetInfo->layout, pPushDescriptorSetInfo->set, @@ -12853,7 +13010,8 @@ void CmdBuffer::PushDescriptorSet2KHR( #if VKI_RAY_TRACING if ((pPushDescriptorSetInfo->stageFlags & ShaderStageAllRayTracing) != 0) { - PushDescriptorSetKHR( + PushDescriptorSetKHR + ( VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, pPushDescriptorSetInfo->layout, pPushDescriptorSetInfo->set, @@ -13015,19 +13173,34 @@ template PFN_vkCmdPushDescriptorSet2KHR CmdBuffer::GetCmdPushDescriptorSet2KHRFunc( const Device* pDevice) { - const size_t imageDescSize = pDevice->GetProperties().descriptorSizes.imageView; - const size_t samplerDescSize = pDevice->GetProperties().descriptorSizes.sampler; - const size_t bufferDescSize = pDevice->GetProperties().descriptorSizes.bufferView; + const size_t imageDescSize = pDevice->GetProperties().descriptorSizes.imageView; + const size_t samplerDescSize = pDevice->GetProperties().descriptorSizes.sampler; + const size_t typedBufferDescSize = pDevice->GetProperties().descriptorSizes.typedBufferView; + const size_t untypedBufferDescSize = pDevice->GetProperties().descriptorSizes.untypedBufferView; PFN_vkCmdPushDescriptorSet2KHR pFunc = nullptr; - if ((imageDescSize == 32) && - (samplerDescSize == 16) && - (bufferDescSize == 16)) + if ((imageDescSize == 32) && + (samplerDescSize == 16) && + (typedBufferDescSize == 16) && + (untypedBufferDescSize == 16)) + { + pFunc = &CmdPushDescriptorSet2KHR< + 32, + 16, + 16, + 16, + numPalDevices>; + } + else if ((imageDescSize == 32) && + (samplerDescSize == 16) && + (typedBufferDescSize == 24) && + (untypedBufferDescSize == 16)) { pFunc = &CmdPushDescriptorSet2KHR< 32, 16, + 24, 16, numPalDevices>; } diff --git a/icd/api/vk_cmdbuffer_transfer.cpp b/icd/api/vk_cmdbuffer_transfer.cpp index 610636f3..9b03c795 100644 --- a/icd/api/vk_cmdbuffer_transfer.cpp +++ b/icd/api/vk_cmdbuffer_transfer.cpp @@ -747,8 +747,8 @@ void CmdBuffer::CopyQueryPoolResults( // =================================================================================================================== // Command to write a timestamp value to a location in a Timestamp query pool void CmdBuffer::QueryCopy( - const QueryPool* pBasePool, - const Buffer* pDestBuffer, + const QueryPool* pBasePool, + const Buffer* pDestBuffer, uint32_t firstQuery, uint32_t queryCount, VkDeviceSize destOffset, @@ -799,19 +799,23 @@ void CmdBuffer::QueryCopy( uint32_t userData[16]; // Figure out which user data registers should contain what compute constants - const uint32_t storageViewSize = m_pDevice->GetProperties().descriptorSizes.bufferView; + const uint32_t untypedViewSize = m_pDevice->GetProperties().descriptorSizes.untypedBufferView; + const uint32_t typedViewSize = m_pDevice->GetProperties().descriptorSizes.typedBufferView; + + const uint32_t storageViewSize = m_pDevice->UseStridedCopyQueryResults() ? untypedViewSize : typedViewSize; const uint32_t storageViewDwSize = storageViewSize / sizeof(uint32_t); - const uint32_t viewOffset = 0; - const uint32_t bufferViewOffset = storageViewDwSize; - const uint32_t queryCountOffset = bufferViewOffset + storageViewDwSize; - const uint32_t copyFlagsOffset = queryCountOffset + 1; - const uint32_t copyStrideOffset = copyFlagsOffset + 1; - const uint32_t firstQueryOffset = copyStrideOffset + 1; - const uint32_t ptrQueryOffset = firstQueryOffset + 1; - const uint32_t userDataCount = ptrQueryOffset + 1; + const uint32_t bufferViewDwSize = untypedViewSize / sizeof(uint32_t); + const uint32_t viewOffset = 0; + const uint32_t bufferViewOffset = storageViewDwSize; + const uint32_t queryCountOffset = bufferViewOffset + bufferViewDwSize; + const uint32_t copyFlagsOffset = queryCountOffset + 1; + const uint32_t copyStrideOffset = copyFlagsOffset + 1; + const uint32_t firstQueryOffset = copyStrideOffset + 1; + const uint32_t ptrQueryOffset = firstQueryOffset + 1; + const uint32_t userDataCount = ptrQueryOffset + 1; // Make sure they agree with pipeline mapping - VK_ASSERT(viewOffset == pipeline.userDataNodeOffsets[0]); + VK_ASSERT(viewOffset == pipeline.userDataNodeOffsets[0]); VK_ASSERT(bufferViewOffset == pipeline.userDataNodeOffsets[1]); VK_ASSERT(queryCountOffset == pipeline.userDataNodeOffsets[2]); VK_ASSERT(userDataCount <= VK_ARRAY_SIZE(userData)); diff --git a/icd/api/vk_compute_pipeline.cpp b/icd/api/vk_compute_pipeline.cpp index dca14a6e..0fbe3289 100644 --- a/icd/api/vk_compute_pipeline.cpp +++ b/icd/api/vk_compute_pipeline.cpp @@ -29,6 +29,7 @@ #include "include/vk_shader.h" #include "include/vk_device.h" #include "include/vk_instance.h" +#include "include/vk_pipeline_binary.h" #include "include/vk_pipeline_cache.h" #include "include/vk_pipeline_layout.h" #include "include/vk_memory.h" @@ -113,6 +114,8 @@ VkResult ComputePipeline::CreatePipelineBinaries( PipelineCompiler* pDefaultCompiler = pDevice->GetCompiler(DefaultDeviceIndex); bool storeBinaryToCache = true; + storeBinaryToCache = (flags & VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR) == 0; + // Load or create the pipeline binary PipelineBinaryCache* pPipelineBinaryCache = (pPipelineCache != nullptr) ? pPipelineCache->GetPipelineCache() : nullptr; @@ -319,6 +322,7 @@ ComputePipeline::ComputePipeline( Device* const pDevice, Pal::IPipeline** pPalPipeline, const PipelineLayout* pPipelineLayout, + PipelineBinaryStorage* pBinaryStorage, const ImmedInfo& immedInfo, #if VKI_RAY_TRACING bool hasRayTracing, @@ -340,6 +344,7 @@ ComputePipeline::ComputePipeline( Pipeline::Init( pPalPipeline, pPipelineLayout, + pBinaryStorage, staticStateMask, #if VKI_RAY_TRACING dispatchRaysUserDataOffset, @@ -388,6 +393,36 @@ VkResult ComputePipeline::Create( HandleExtensionStructs(pCreateInfo, &extStructs); + auto pPipelineBinaryInfoKHR = extStructs.pPipelineBinaryInfoKHR; + + if (pPipelineBinaryInfoKHR != nullptr) + { + if (pPipelineBinaryInfoKHR->binaryCount > 0) + { + VK_ASSERT(pPipelineBinaryInfoKHR->binaryCount == pDevice->NumPalDevices()); + binariesProvided = true; + } + + for (uint32_t deviceIdx = 0; + (deviceIdx < pPipelineBinaryInfoKHR->binaryCount) && (result == VK_SUCCESS); + ++deviceIdx) + { + const auto pBinary = PipelineBinary::ObjectFromHandle( + pPipelineBinaryInfoKHR->pPipelineBinaries[deviceIdx]); + + cacheId[deviceIdx] = pBinary->BinaryKey(); + pipelineBinaries[deviceIdx] = pBinary->BinaryData(); + + if (deviceIdx == DefaultDeviceIndex) + { + pDefaultCompiler->ReadBinaryMetadata( + pDevice, + pipelineBinaries[deviceIdx], + &binaryMetadata); + } + } + } + ComputePipelineShaderStageInfo shaderInfo = {}; uint64_t apiPsoHash = {}; @@ -395,6 +430,9 @@ VkResult ComputePipeline::Create( PipelineCompiler::InitPipelineCreationFeedback(pPipelineCreationFeedbackCreateInfo); + PipelineBinaryStorage binaryStorage = {}; + bool storeBinaryToPipeline = (flags & VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR) != 0; + if ((result == VK_SUCCESS) && (binariesProvided == false)) { // 1. Create Cache IDs @@ -429,6 +467,37 @@ VkResult ComputePipeline::Create( &binaryMetadata); } + // 3. Store created binaries for pipeline_binary + if ((result == VK_SUCCESS) && storeBinaryToPipeline) + { + for (uint32_t deviceIdx = 0; (deviceIdx < pDevice->NumPalDevices()) && (result == VK_SUCCESS); ++deviceIdx) + { + void* pMemory = pAllocator->pfnAllocation( + pAllocator->pUserData, + pipelineBinaries[deviceIdx].codeSize, + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); // retained in the pipeline object + + if (pMemory != nullptr) + { + memcpy( + pMemory, + pipelineBinaries[deviceIdx].pCode, + pipelineBinaries[deviceIdx].codeSize); + + InsertBinaryData( + &binaryStorage, + deviceIdx, + cacheId[deviceIdx], + pipelineBinaries[deviceIdx].codeSize, + pMemory); + } + else + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + } } CreateInfo localPipelineInfo = {}; @@ -445,6 +514,7 @@ VkResult ComputePipeline::Create( // Get the pipeline and shader size from PAL and allocate memory. size_t pipelineSize = 0; + PipelineBinaryStorage* pPermBinaryStorage = nullptr; void* pSystemMem = nullptr; Pal::Result palResult = Pal::Result::Success; @@ -460,6 +530,10 @@ VkResult ComputePipeline::Create( VK_ASSERT(palResult == Pal::Result::Success); size_t allocationSize = sizeof(ComputePipeline) + (pipelineSize * pDevice->NumPalDevices()); + if (storeBinaryToPipeline) + { + allocationSize += sizeof(PipelineBinaryStorage); + } pSystemMem = pDevice->AllocApiObject( pAllocator, @@ -534,6 +608,15 @@ VkResult ComputePipeline::Create( result = PalToVkResult(palResult); + if ((result == VK_SUCCESS) && storeBinaryToPipeline) + { + size_t pipelineBinaryOffset = sizeof(ComputePipeline) + (pipelineSize * pDevice->NumPalDevices()); + pPermBinaryStorage = static_cast(Util::VoidPtrInc(pSystemMem, + pipelineBinaryOffset)); + + // Simply copy the existing allocations to the new struct. + *pPermBinaryStorage = binaryStorage; + } } if (result == VK_SUCCESS) @@ -552,6 +635,7 @@ VkResult ComputePipeline::Create( VK_PLACEMENT_NEW(pSystemMem) ComputePipeline(pDevice, pPalPipeline, localPipelineInfo.pLayout, + pPermBinaryStorage, localPipelineInfo.immedInfo, #if VKI_RAY_TRACING hasRayTracing, @@ -563,7 +647,7 @@ VkResult ComputePipeline::Create( apiPsoHash); *pPipeline = ComputePipeline::HandleFromVoidPointer(pSystemMem); - if (settings.enableDebugPrintf) + if (pDevice->GetEnabledFeatures().enableDebugPrintf) { ComputePipeline* pComputePipeline = static_cast(pSystemMem); pComputePipeline->ClearFormatString(); @@ -577,6 +661,8 @@ VkResult ComputePipeline::Create( } else { + // Free the binaries only if we failed to create the pipeline. + FreeBinaryStorage(&binaryStorage, pAllocator); for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) { diff --git a/icd/api/vk_conv.cpp b/icd/api/vk_conv.cpp index e89d0650..48f54089 100644 --- a/icd/api/vk_conv.cpp +++ b/icd/api/vk_conv.cpp @@ -1122,20 +1122,24 @@ static uint32_t GetBufferSrdFormatInfo( } else { - VK_ASSERT(pPhysicalDevice->PalProperties().gfxipProperties.srdSizes.bufferView == 4 * sizeof(uint32_t)); + const Pal::DeviceProperties& palProperties = pPhysicalDevice->PalProperties(); + VK_ASSERT(palProperties.gfxipProperties.srdSizes.typedBufferView <= MaxBufferSrdSize * sizeof(uint32_t)); - uint32_t result[4] = {}; - Pal::BufferViewInfo bufferInfo = {}; - bufferInfo.gpuAddr = 0x300000000ull; - bufferInfo.swizzledFormat = swizzledFormat; - bufferInfo.range = UINT32_MAX; - bufferInfo.stride = Pal::Formats::BytesPerPixel(swizzledFormat.format); + uint32_t result[MaxBufferSrdSize] = {}; + Pal::BufferViewInfo bufferInfo = {}; + bufferInfo.gpuAddr = 0x300000000ull; + bufferInfo.swizzledFormat = swizzledFormat; + bufferInfo.range = UINT32_MAX; + bufferInfo.stride = Pal::Formats::BytesPerPixel(swizzledFormat.format); pPhysicalDevice->PalDevice()->CreateTypedBufferViewSrds(1, &bufferInfo, result); - // NOTE: Until now, all buffer format info is stored the fourth DWORD of buffer SRD. please modify + // NOTE: Until now, all buffer format info is stored the last DWORD of buffer SRD. please modify // both BilVertexFetchManager::IssueUberFetchInst and UberFetchShaderFormatInfo once it is changed. - return result[3]; + + { + return result[3]; + } } } @@ -1366,7 +1370,7 @@ VkResult InitializeUberFetchShaderFormatTable( // to avoid access the exact bit in buffer SRD, we create untypeded buffer twice with different stride, // and record the modified bits. - VK_ASSERT(pPhysicalDevice->PalProperties().gfxipProperties.srdSizes.bufferView == 4 * sizeof(uint32_t)); + VK_ASSERT(pPhysicalDevice->PalProperties().gfxipProperties.srdSizes.untypedBufferView == 4 * sizeof(uint32_t)); uint32_t defaultSrd[4] = {}; uint32_t zeroStrideSrd[4] = {}; diff --git a/icd/api/vk_descriptor_buffer.cpp b/icd/api/vk_descriptor_buffer.cpp index 705fca54..e54054e7 100644 --- a/icd/api/vk_descriptor_buffer.cpp +++ b/icd/api/vk_descriptor_buffer.cpp @@ -86,10 +86,6 @@ VKAPI_ATTR void VKAPI_CALL vkGetDescriptorEXT( const Device* pDevice = ApiDevice::ObjectFromHandle(device); const Device::Properties& props = pDevice->GetProperties(); - VK_ASSERT((props.descriptorSizes.imageView == 32) && - (props.descriptorSizes.sampler == 16) && - (props.descriptorSizes.bufferView == 16)); - switch (static_cast(pDescriptorInfo->type)) { case VK_DESCRIPTOR_TYPE_SAMPLER: @@ -198,6 +194,7 @@ VKAPI_ATTR void VKAPI_CALL vkGetDescriptorEXT( { if (pDescriptorInfo->data.pUniformTexelBuffer != nullptr) { + VK_ASSERT(pDescriptorInfo->data.pUniformTexelBuffer->format != VK_FORMAT_UNDEFINED); BufferView::BuildSrd( pDevice, 0, @@ -205,12 +202,11 @@ VKAPI_ATTR void VKAPI_CALL vkGetDescriptorEXT( static_cast (&pDescriptorInfo->data.pUniformTexelBuffer->address), pDescriptorInfo->data.pUniformTexelBuffer->format, 1, - props.descriptorSizes.bufferView, pDescriptor); } else { - memset(pDescriptor, 0, props.descriptorSizes.bufferView); + memset(pDescriptor, 0, props.descriptorSizes.typedBufferView); } break; } @@ -231,7 +227,7 @@ VKAPI_ATTR void VKAPI_CALL vkGetDescriptorEXT( } else { - memset(pDescriptor, 0, props.descriptorSizes.bufferView); + memset(pDescriptor, 0, props.descriptorSizes.untypedBufferView); } break; @@ -249,12 +245,11 @@ VKAPI_ATTR void VKAPI_CALL vkGetDescriptorEXT( static_cast (&pDescriptorInfo->data.pUniformBuffer->address), VK_FORMAT_UNDEFINED, 1, - props.descriptorSizes.bufferView, pDescriptor); } else { - memset(pDescriptor, 0, props.descriptorSizes.bufferView); + memset(pDescriptor, 0, props.descriptorSizes.untypedBufferView); } break; } diff --git a/icd/api/vk_descriptor_set.cpp b/icd/api/vk_descriptor_set.cpp index 14576653..9386be8e 100644 --- a/icd/api/vk_descriptor_set.cpp +++ b/icd/api/vk_descriptor_set.cpp @@ -519,7 +519,8 @@ void DescriptorUpdate::WriteInlineUniformBlock( template void DescriptorUpdate::WriteDescriptorSets( const Device* pDevice, @@ -639,7 +640,7 @@ void DescriptorUpdate::WriteDescriptorSets( break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - WriteBufferDescriptors( + WriteBufferDescriptors( params.pTexelBufferView, deviceIdx, pDestAddr, @@ -648,7 +649,7 @@ void DescriptorUpdate::WriteDescriptorSets( break; case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - WriteBufferDescriptors( + WriteBufferDescriptors( params.pTexelBufferView, deviceIdx, pDestAddr, @@ -657,7 +658,7 @@ void DescriptorUpdate::WriteDescriptorSets( break; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - WriteBufferInfoDescriptors( + WriteBufferInfoDescriptors( pDevice, params.pBufferInfo, deviceIdx, @@ -667,7 +668,7 @@ void DescriptorUpdate::WriteDescriptorSets( break; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - WriteBufferInfoDescriptors( + WriteBufferInfoDescriptors( pDevice, params.pBufferInfo, deviceIdx, @@ -681,7 +682,7 @@ void DescriptorUpdate::WriteDescriptorSets( pDestAddr = pDestSet->DynamicDescriptorData(deviceIdx) + pDestSet->Layout()->GetDstDynOffset(destBinding, params.dstArrayElement); - WriteBufferInfoDescriptors( + WriteBufferInfoDescriptors( pDevice, params.pBufferInfo, deviceIdx, @@ -695,7 +696,7 @@ void DescriptorUpdate::WriteDescriptorSets( pDestAddr = pDestSet->DynamicDescriptorData(deviceIdx) + pDestSet->Layout()->GetDstDynOffset(destBinding, params.dstArrayElement); - WriteBufferInfoDescriptors( + WriteBufferInfoDescriptors( pDevice, params.pBufferInfo, deviceIdx, @@ -923,7 +924,8 @@ void DescriptorUpdate::CopyDescriptorSets( template VKAPI_ATTR void VKAPI_CALL DescriptorUpdate::UpdateDescriptorSets( VkDevice device, @@ -936,7 +938,8 @@ VKAPI_ATTR void VKAPI_CALL DescriptorUpdate::UpdateDescriptorSets( for (uint32_t deviceIdx = 0; deviceIdx < numPalDevices; deviceIdx++) { - WriteDescriptorSets( + WriteDescriptorSets + ( pDevice, deviceIdx, descriptorWriteCount, @@ -988,39 +991,70 @@ template PFN_vkUpdateDescriptorSets DescriptorUpdate::GetUpdateDescriptorSetsFunc( const Device* pDevice) { - const size_t imageDescSize = pDevice->GetProperties().descriptorSizes.imageView; - const size_t fmaskDescSize = pDevice->GetProperties().descriptorSizes.fmaskView; - const size_t samplerDescSize = pDevice->GetProperties().descriptorSizes.sampler; - const size_t bufferDescSize = pDevice->GetProperties().descriptorSizes.bufferView; + const size_t imageDescSize = pDevice->GetProperties().descriptorSizes.imageView; + const size_t fmaskDescSize = pDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead ? + pDevice->GetProperties().descriptorSizes.fmaskView : 0; + const size_t samplerDescSize = pDevice->GetProperties().descriptorSizes.sampler; + const size_t typedBufferDescSize = pDevice->GetProperties().descriptorSizes.typedBufferView; + const size_t untypedBufferDescSize = pDevice->GetProperties().descriptorSizes.untypedBufferView; + PFN_vkUpdateDescriptorSets pFunc = nullptr; - if ((imageDescSize == 32) && - (samplerDescSize == 16) && - (bufferDescSize == 16)) + if ((imageDescSize == 32) && + (fmaskDescSize == 0) && + (samplerDescSize == 16) && + (typedBufferDescSize == 16) && + (untypedBufferDescSize == 16)) { - if ((pDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead == false) || (fmaskDescSize == 0)) - { - pFunc = &UpdateDescriptorSets< - 32, - 0, - 16, - 16, - numPalDevices>; - } - else if (fmaskDescSize == 32) - { - pFunc = &UpdateDescriptorSets< - 32, - 32, - 16, - 16, - numPalDevices>; - } - else - { - VK_NEVER_CALLED(); - pFunc = nullptr; - } + pFunc = &UpdateDescriptorSets< + 32, + 0, + 16, + 16, + 16, + numPalDevices>; + } + else if ((imageDescSize == 32) && + (fmaskDescSize == 32) && + (samplerDescSize == 16) && + (typedBufferDescSize == 16) && + (untypedBufferDescSize == 16)) + { + pFunc = &UpdateDescriptorSets< + 32, + 32, + 16, + 16, + 16, + numPalDevices>; + } + else if ((imageDescSize == 32) && + (fmaskDescSize == 0) && + (samplerDescSize == 16) && + (typedBufferDescSize == 24) && + (untypedBufferDescSize == 16)) + { + pFunc = &UpdateDescriptorSets< + 32, + 0, + 16, + 24, + 16, + numPalDevices>; + } + else if ((imageDescSize == 32) && + (fmaskDescSize == 32) && + (samplerDescSize == 16) && + (typedBufferDescSize == 24) && + (untypedBufferDescSize == 16)) + { + pFunc = &UpdateDescriptorSets< + 32, + 32, + 16, + 24, + 16, + numPalDevices>; } else { @@ -1143,6 +1177,24 @@ void DescriptorUpdate::WriteBufferDescriptors<16, VK_DESCRIPTOR_TYPE_STORAGE_TEX uint32_t dwStride, size_t descriptorStrideInBytes); +template +void DescriptorUpdate::WriteBufferDescriptors<24, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER>( + const VkBufferView* pDescriptors, + uint32_t deviceIdx, + uint32_t* pDestAddr, + uint32_t count, + uint32_t dwStride, + size_t descriptorStrideInBytes); + +template +void DescriptorUpdate::WriteBufferDescriptors<24, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER>( + const VkBufferView* pDescriptors, + uint32_t deviceIdx, + uint32_t* pDestAddr, + uint32_t count, + uint32_t dwStride, + size_t descriptorStrideInBytes); + template void DescriptorUpdate::WriteBufferInfoDescriptors<16, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER>( const Device* pDevice, diff --git a/icd/api/vk_descriptor_set_layout.cpp b/icd/api/vk_descriptor_set_layout.cpp index 4227d2b3..2fdea471 100644 --- a/icd/api/vk_descriptor_set_layout.cpp +++ b/icd/api/vk_descriptor_set_layout.cpp @@ -171,12 +171,14 @@ uint32_t DescriptorSetLayout::GetSingleDescStaticSize( case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + size = props.descriptorSizes.typedBufferView; + break; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: #if VKI_RAY_TRACING case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: #endif - size = props.descriptorSizes.bufferView; + size = props.descriptorSizes.untypedBufferView; break; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: @@ -268,7 +270,7 @@ uint32_t DescriptorSetLayout::GetDynamicBufferDescDwSize( } else { - size = pDevice->GetProperties().descriptorSizes.bufferView; + size = pDevice->GetProperties().descriptorSizes.untypedBufferView; } VK_ASSERT(Util::IsPow2Aligned(size, sizeof(uint32_t))); diff --git a/icd/api/vk_descriptor_update_template.cpp b/icd/api/vk_descriptor_update_template.cpp index 87cfba51..28abf320 100644 --- a/icd/api/vk_descriptor_update_template.cpp +++ b/icd/api/vk_descriptor_update_template.cpp @@ -124,7 +124,8 @@ VkResult DescriptorUpdateTemplate::Create( template DescriptorUpdateTemplate::PfnUpdateEntry DescriptorUpdateTemplate::GetUpdateEntryFunc( VkDescriptorType descriptorType, @@ -165,22 +166,22 @@ DescriptorUpdateTemplate::PfnUpdateEntry DescriptorUpdateTemplate::GetUpdateEntr pFunc = &UpdateEntrySampledImage; break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - pFunc = &UpdateEntryTexelBuffer; + pFunc = &UpdateEntryTexelBuffer; break; case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - pFunc = &UpdateEntryTexelBuffer; + pFunc = &UpdateEntryTexelBuffer; break; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - pFunc = &UpdateEntryBuffer; + pFunc = &UpdateEntryBuffer; break; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - pFunc = &UpdateEntryBuffer; + pFunc = &UpdateEntryBuffer; break; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - pFunc = &UpdateEntryBuffer; + pFunc = &UpdateEntryBuffer; break; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: - pFunc = &UpdateEntryBuffer; + pFunc = &UpdateEntryBuffer; break; case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: pFunc = &UpdateEntryInlineUniformBlock; @@ -205,41 +206,74 @@ DescriptorUpdateTemplate::PfnUpdateEntry DescriptorUpdateTemplate::GetUpdateEntr VkDescriptorType descriptorType, const DescriptorSetLayout::BindingInfo& dstBinding) { - const size_t imageDescSize = pDevice->GetProperties().descriptorSizes.imageView; - const size_t fmaskDescSize = pDevice->GetProperties().descriptorSizes.fmaskView; - const size_t samplerDescSize = pDevice->GetProperties().descriptorSizes.sampler; - const size_t bufferDescSize = pDevice->GetProperties().descriptorSizes.bufferView; + const size_t imageDescSize = pDevice->GetProperties().descriptorSizes.imageView; + const size_t fmaskDescSize = pDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead ? + pDevice->GetProperties().descriptorSizes.fmaskView : 0; + const size_t samplerDescSize = pDevice->GetProperties().descriptorSizes.sampler; + const size_t typedBufferDescSize = pDevice->GetProperties().descriptorSizes.typedBufferView; + const size_t untypedBufferDescSize = pDevice->GetProperties().descriptorSizes.untypedBufferView; DescriptorUpdateTemplate::PfnUpdateEntry pFunc = nullptr; - if ((imageDescSize == 32) && - (samplerDescSize == 16) && - (bufferDescSize == 16)) + if ((imageDescSize == 32) && + (fmaskDescSize == 0) && + (samplerDescSize == 16) && + (typedBufferDescSize == 16) && + (untypedBufferDescSize == 16)) + + { + pFunc = GetUpdateEntryFunc< + 32, + 0, + 16, + 16, + 16, + numPalDevices>(descriptorType, dstBinding); + } + else if ((imageDescSize == 32) && + (fmaskDescSize == 32) && + (samplerDescSize == 16) && + (typedBufferDescSize == 16) && + (untypedBufferDescSize == 16)) + + { + pFunc = GetUpdateEntryFunc< + 32, + 32, + 16, + 16, + 16, + numPalDevices>(descriptorType, dstBinding); + } + else if ((imageDescSize == 32) && + (fmaskDescSize == 0) && + (samplerDescSize == 16) && + (typedBufferDescSize == 24) && + (untypedBufferDescSize == 16)) { - if ((pDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead == false) || (fmaskDescSize == 0)) - { pFunc = GetUpdateEntryFunc< 32, 0, 16, + 24, 16, numPalDevices>(descriptorType, dstBinding); - } - else if (fmaskDescSize == 32) - { + } + else if ((imageDescSize == 32) && + (fmaskDescSize == 32) && + (samplerDescSize == 16) && + (typedBufferDescSize == 24) && + (untypedBufferDescSize == 16)) + + { pFunc = GetUpdateEntryFunc< 32, 32, 16, + 24, 16, numPalDevices>(descriptorType, dstBinding); - } - else - { - VK_NEVER_CALLED(); - pFunc = nullptr; - } } else { diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index 943a80d2..b09fbc8b 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -44,7 +44,7 @@ #include "include/vk_fence.h" #include "include/vk_formats.h" #include "include/vk_framebuffer.h" - +#include "include/vk_pipeline_binary.h" #include "include/vk_pipeline_layout.h" #include "include/vk_physical_device.h" #include "include/vk_image.h" @@ -742,6 +742,12 @@ VkResult Device::Create( { deviceFeatures.strictImageSizeRequirements = false; } + + if (pPhysicalDevice->GetRuntimeSettings().enableDebugPrintf + ) + { + deviceFeatures.enableDebugPrintf = true; + } } if (palResult == Pal::Result::Success) @@ -1154,15 +1160,16 @@ VkResult Device::Initialize( } #if VKI_RAY_TRACING - m_properties.rayTracingIpLevel = deviceProps.gfxipProperties.rayTracingIp; + m_properties.rayTracingIpLevel = deviceProps.gfxipProperties.rayTracingIp; #endif - m_properties.virtualMemAllocGranularity = deviceProps.gpuMemoryProperties.virtualMemAllocGranularity; - m_properties.virtualMemPageSize = deviceProps.gpuMemoryProperties.virtualMemPageSize; - m_properties.descriptorSizes.bufferView = deviceProps.gfxipProperties.srdSizes.bufferView; - m_properties.descriptorSizes.imageView = deviceProps.gfxipProperties.srdSizes.imageView; - m_properties.descriptorSizes.fmaskView = deviceProps.gfxipProperties.srdSizes.fmaskView; - m_properties.descriptorSizes.sampler = deviceProps.gfxipProperties.srdSizes.sampler; - m_properties.descriptorSizes.bvh = deviceProps.gfxipProperties.srdSizes.bvh; + m_properties.virtualMemAllocGranularity = deviceProps.gpuMemoryProperties.virtualMemAllocGranularity; + m_properties.virtualMemPageSize = deviceProps.gpuMemoryProperties.virtualMemPageSize; + m_properties.descriptorSizes.typedBufferView = deviceProps.gfxipProperties.srdSizes.typedBufferView; + m_properties.descriptorSizes.untypedBufferView = deviceProps.gfxipProperties.srdSizes.untypedBufferView; + m_properties.descriptorSizes.imageView = deviceProps.gfxipProperties.srdSizes.imageView; + m_properties.descriptorSizes.fmaskView = deviceProps.gfxipProperties.srdSizes.fmaskView; + m_properties.descriptorSizes.sampler = deviceProps.gfxipProperties.srdSizes.sampler; + m_properties.descriptorSizes.bvh = deviceProps.gfxipProperties.srdSizes.bvh; // Size of combined image samplers is the sum of the image and sampler SRD sizes (8DW + 4DW) m_properties.descriptorSizes.combinedImageSampler = m_properties.descriptorSizes.imageView + @@ -1849,7 +1856,7 @@ VkResult Device::CreateInternalComputePipeline( const uint8_t* pCode, uint32_t numUserDataNodes, Vkgc::ResourceMappingRootNode* pUserDataNodes, - VkShaderModuleCreateFlags internalShaderFlags, + ShaderModuleFlags flags, bool forceWave64, const VkSpecializationInfo* pSpecializationInfo, InternalPipeline* pInternalPipeline) @@ -1879,10 +1886,9 @@ VkResult Device::CreateInternalComputePipeline( // Build shader module Vkgc::BinaryData spvBin = { codeByteSize, pCode }; - internalShaderFlags |= VK_INTERNAL_SHADER_FLAGS_INTERNAL_SHADER_BIT; + ShaderModuleFlags internalShaderFlags = flags | ShaderModuleInternalShader; result = pCompiler->BuildShaderModule( this, - 0, internalShaderFlags, spvBin, &shaderModule); @@ -2103,34 +2109,38 @@ VkResult Device::CreateInternalPipelines() Vkgc::ResourceMappingRootNode userDataNodes[3] = {}; - const uint32_t uavViewSize = m_properties.descriptorSizes.bufferView / sizeof(uint32_t); + const uint32_t untypedViewDwSize = m_properties.descriptorSizes.untypedBufferView / sizeof(uint32_t); + const uint32_t typedViewDwSize = m_properties.descriptorSizes.typedBufferView / sizeof(uint32_t); + uint32_t offset = 0; // Timestamp counter storage view - userDataNodes[0].node.type = useStridedShader ? + userDataNodes[0].node.type = useStridedShader ? Vkgc::ResourceMappingNodeType::DescriptorBuffer : Vkgc::ResourceMappingNodeType::DescriptorTexelBuffer; - userDataNodes[0].node.offsetInDwords = 0; - userDataNodes[0].node.sizeInDwords = uavViewSize; - userDataNodes[0].node.srdRange.set = 0; - userDataNodes[0].node.srdRange.binding = 0; + userDataNodes[0].node.offsetInDwords = 0; + userDataNodes[0].node.sizeInDwords = useStridedShader ? untypedViewDwSize : typedViewDwSize; + userDataNodes[0].node.srdRange.set = 0; + userDataNodes[0].node.srdRange.binding = 0; userDataNodes[0].node.srdRange.strideInDwords = 0; - userDataNodes[0].visibility = Vkgc::ShaderStageComputeBit; + userDataNodes[0].visibility = Vkgc::ShaderStageComputeBit; + offset += userDataNodes[0].node.sizeInDwords; // Copy destination storage view - userDataNodes[1].node.type = Vkgc::ResourceMappingNodeType::DescriptorBuffer; - userDataNodes[1].node.offsetInDwords = uavViewSize; - userDataNodes[1].node.sizeInDwords = uavViewSize; - userDataNodes[1].node.srdRange.set = 0; - userDataNodes[1].node.srdRange.binding = 1; + userDataNodes[1].node.type = Vkgc::ResourceMappingNodeType::DescriptorBuffer; + userDataNodes[1].node.offsetInDwords = offset; + userDataNodes[1].node.sizeInDwords = untypedViewDwSize; + userDataNodes[1].node.srdRange.set = 0; + userDataNodes[1].node.srdRange.binding = 1; userDataNodes[1].node.srdRange.strideInDwords = 0; - userDataNodes[1].visibility = Vkgc::ShaderStageComputeBit; + userDataNodes[1].visibility = Vkgc::ShaderStageComputeBit; + offset += userDataNodes[1].node.sizeInDwords; // Inline constant data - userDataNodes[2].node.type = Vkgc::ResourceMappingNodeType::PushConst; - userDataNodes[2].node.offsetInDwords = 2 * uavViewSize; - userDataNodes[2].node.sizeInDwords = 4; - userDataNodes[2].node.srdRange.set = Vkgc::InternalDescriptorSetId; + userDataNodes[2].node.type = Vkgc::ResourceMappingNodeType::PushConst; + userDataNodes[2].node.offsetInDwords = offset; + userDataNodes[2].node.sizeInDwords = 4; + userDataNodes[2].node.srdRange.set = Vkgc::InternalDescriptorSetId; userDataNodes[2].node.srdRange.strideInDwords = 0; - userDataNodes[2].visibility = Vkgc::ShaderStageComputeBit; + userDataNodes[2].visibility = Vkgc::ShaderStageComputeBit; result = CreateInternalComputePipeline( spvCodeSize, @@ -4176,6 +4186,7 @@ VkResult Device::GetDeviceFaultInfoEXT( { m_retrievedFaultData = true; } + } if (m_pageFaultStatus.flags.pageFault == false) @@ -4218,6 +4229,7 @@ VkResult Device::GetDeviceFaultInfoEXT( VK_DEVICE_FAULT_ADDRESS_TYPE_WRITE_INVALID_EXT; pAddressInfo->reportedAddress = static_cast(m_pageFaultStatus.faultAddress); pAddressInfo->addressPrecision = 4096; + } return result; @@ -5404,6 +5416,69 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetDeviceFaultInfoEXT( return pDevice->GetDeviceFaultInfoEXT(pFaultCounts, pFaultInfo); } +// ===================================================================================================================== +VKAPI_ATTR VkResult VKAPI_CALL vkCreatePipelineBinariesKHR( + VkDevice device, + const VkPipelineBinaryCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineBinaryHandlesInfoKHR* pBinaries) +{ + const auto pDevice = ApiDevice::ObjectFromHandle(device); + const auto pAllocCB = (pAllocator != nullptr) ? pAllocator : pDevice->VkInstance()->GetAllocCallbacks(); + + return PipelineBinary::CreatePipelineBinaries(pDevice, pCreateInfo, pAllocCB, pBinaries); +} + +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkDestroyPipelineBinaryKHR( + VkDevice device, + VkPipelineBinaryKHR pipelineBinary, + const VkAllocationCallbacks* pAllocator) +{ + const auto pDevice = ApiDevice::ObjectFromHandle(device); + const auto pAllocCB = (pAllocator != nullptr) ? pAllocator : pDevice->VkInstance()->GetAllocCallbacks(); + const auto pBinary = PipelineBinary::ObjectFromHandle(pipelineBinary); + + pBinary->DestroyPipelineBinary(pDevice, pAllocCB); +} + +// ===================================================================================================================== +VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineKeyKHR( + VkDevice device, + const VkPipelineCreateInfoKHR* pPipelineCreateInfo, + VkPipelineBinaryKeyKHR* pPipelineBinaryKey) +{ + const auto pDevice = ApiDevice::ObjectFromHandle(device); + + return PipelineBinary::GetPipelineKey(pDevice, pPipelineCreateInfo, pPipelineBinaryKey); +} + +// ===================================================================================================================== +VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineBinaryDataKHR( + VkDevice device, + const VkPipelineBinaryDataInfoKHR* pInfo, + VkPipelineBinaryKeyKHR* pPipelineBinaryKey, + size_t* pPipelineBinaryDataSize, + void* pPipelineBinaryData) +{ + const auto pBinary = PipelineBinary::ObjectFromHandle(pInfo->pipelineBinary); + + return pBinary->GetPipelineBinaryData(pPipelineBinaryKey, pPipelineBinaryDataSize, pPipelineBinaryData); +} + +// ===================================================================================================================== +VKAPI_ATTR VkResult VKAPI_CALL vkReleaseCapturedPipelineDataKHR( + VkDevice device, + const VkReleaseCapturedPipelineDataInfoKHR* pInfo, + const VkAllocationCallbacks* pAllocator) +{ + const auto pDevice = ApiDevice::ObjectFromHandle(device); + const auto pPipeline = Pipeline::BaseObjectFromHandle(pInfo->pipeline); + const auto pAllocCB = (pAllocator != nullptr) ? pAllocator : pDevice->VkInstance()->GetAllocCallbacks(); + + return PipelineBinary::ReleaseCapturedPipelineData(pDevice, pPipeline, pAllocCB); +} + // ===================================================================================================================== VKAPI_ATTR void VKAPI_CALL vkGetDeviceImageSubresourceLayoutKHR( VkDevice device, diff --git a/icd/api/vk_dispatch.cpp b/icd/api/vk_dispatch.cpp index 1e00c8e2..50ad9c55 100644 --- a/icd/api/vk_dispatch.cpp +++ b/icd/api/vk_dispatch.cpp @@ -387,6 +387,7 @@ void DispatchTable::Init() INIT_DISPATCH_ENTRY(vkGetQueryPoolResults ); INIT_DISPATCH_ENTRY(vkGetRenderAreaGranularity ); INIT_DISPATCH_ENTRY(vkGetRenderingAreaGranularityKHR ); + INIT_DISPATCH_ENTRY(vkGetPhysicalDeviceSurfaceCapabilitiesKHR ); INIT_DISPATCH_ENTRY(vkGetPhysicalDeviceSurfaceCapabilities2KHR ); INIT_DISPATCH_ENTRY(vkGetPhysicalDeviceSurfaceFormatsKHR ); @@ -625,7 +626,6 @@ void DispatchTable::Init() INIT_DISPATCH_ENTRY(vkCmdSetLineStippleEXT ); INIT_DISPATCH_ALIAS(vkCmdSetLineStippleKHR , vkCmdSetLineStippleEXT ); - INIT_DISPATCH_ENTRY(vkSetDeviceMemoryPriorityEXT ); INIT_DISPATCH_ENTRY(vkGetDeviceFaultInfoEXT ); INIT_DISPATCH_ENTRY(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ); @@ -771,6 +771,7 @@ void DispatchTable::Init() vkCmdResolveImage2 ); INIT_DISPATCH_ENTRY(vkCmdPushDescriptorSetKHR ); INIT_DISPATCH_ENTRY(vkCmdPushDescriptorSetWithTemplateKHR ); + INIT_DISPATCH_ENTRY(vkGetDeviceBufferMemoryRequirements ); INIT_DISPATCH_ENTRY(vkGetDeviceImageMemoryRequirements ); INIT_DISPATCH_ENTRY(vkGetDeviceImageSparseMemoryRequirements ); @@ -840,12 +841,19 @@ void DispatchTable::Init() INIT_DISPATCH_ENTRY(vkCmdPushConstants2KHR ); INIT_DISPATCH_ENTRY(vkCmdPushDescriptorSet2KHR ); INIT_DISPATCH_ENTRY(vkCmdPushDescriptorSetWithTemplate2KHR ); + INIT_DISPATCH_ENTRY(vkCmdSetDescriptorBufferOffsets2EXT ); INIT_DISPATCH_ENTRY(vkCmdBindDescriptorBufferEmbeddedSamplers2EXT ); INIT_DISPATCH_ENTRY(vkCmdSetRenderingAttachmentLocationsKHR ); INIT_DISPATCH_ENTRY(vkCmdSetRenderingInputAttachmentIndicesKHR ); + INIT_DISPATCH_ENTRY(vkCreatePipelineBinariesKHR ); + INIT_DISPATCH_ENTRY(vkDestroyPipelineBinaryKHR ); + INIT_DISPATCH_ENTRY(vkGetPipelineKeyKHR ); + INIT_DISPATCH_ENTRY(vkGetPipelineBinaryDataKHR ); + INIT_DISPATCH_ENTRY(vkReleaseCapturedPipelineDataKHR ); + INIT_DISPATCH_ENTRY(vkCmdSetDepthBias2EXT ); } diff --git a/icd/api/vk_gpa_session.cpp b/icd/api/vk_gpa_session.cpp index ea63e919..17e076b8 100644 --- a/icd/api/vk_gpa_session.cpp +++ b/icd/api/vk_gpa_session.cpp @@ -240,17 +240,13 @@ VkResult GpaSession::CmdBeginSample( sampleConfig.flags.sampleInternalOperations = pGpaSampleBeginInfo->sampleInternalOperations; sampleConfig.flags.cacheFlushOnCounterCollection = pGpaSampleBeginInfo->cacheFlushOnCounterCollection; sampleConfig.flags.sqShaderMask = pGpaSampleBeginInfo->sqShaderMaskEnable; - -#if VKI_BUILD_GFX11 sampleConfig.flags.sqWgpShaderMask = pGpaSampleBeginInfo->sqShaderMaskEnable; -#endif + sampleConfig.sqShaderMask = static_cast( VkToPalPerfExperimentShaderFlags(pGpaSampleBeginInfo->sqShaderMask)); -#if VKI_BUILD_GFX11 sampleConfig.sqWgpShaderMask = static_cast( VkToPalPerfExperimentShaderFlags(pGpaSampleBeginInfo->sqShaderMask)); -#endif VirtualStackFrame virtStackFrame(pCmdbuf->GetStackAllocator()); diff --git a/icd/api/vk_graphics_pipeline.cpp b/icd/api/vk_graphics_pipeline.cpp index 61a497ea..e4a6f094 100644 --- a/icd/api/vk_graphics_pipeline.cpp +++ b/icd/api/vk_graphics_pipeline.cpp @@ -30,6 +30,7 @@ #include "include/vk_graphics_pipeline_library.h" #include "include/vk_instance.h" #include "include/vk_memory.h" +#include "include/vk_pipeline_binary.h" #include "include/vk_pipeline_cache.h" #include "include/vk_pipeline_layout.h" #include "include/vk_render_pass.h" @@ -80,6 +81,8 @@ VkResult GraphicsPipeline::CreatePipelineBinaries( PipelineCompiler* pDefaultCompiler = pDevice->GetCompiler(DefaultDeviceIndex); bool storeBinaryToCache = true; + storeBinaryToCache = (flags & VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR) == 0; + // Load or create the pipeline binary PipelineBinaryCache* pPipelineBinaryCache = (pPipelineCache != nullptr) ? pPipelineCache->GetPipelineCache() : nullptr; @@ -280,9 +283,6 @@ VkResult GraphicsPipeline::CreatePalPipelineObjects( { size_t palSize = 0; - pObjectCreateInfo->pipeline.pipelineBinarySize = pPipelineBinaries[DefaultDeviceIndex].codeSize; - pObjectCreateInfo->pipeline.pPipelineBinary = pPipelineBinaries[DefaultDeviceIndex].pCode; - Pal::Result palResult = Pal::Result::Success; palSize = pDevice->PalDevice(DefaultDeviceIndex)->GetGraphicsPipelineSize(pObjectCreateInfo->pipeline, &palResult); @@ -367,6 +367,7 @@ VkResult GraphicsPipeline::CreatePipelineObjects( PipelineCache* pPipelineCache, const Util::MetroHash::Hash* pCacheIds, uint64_t apiPsoHash, + const PipelineBinaryStorage& binaryStorage, GraphicsPipelineObjectCreateInfo* pObjectCreateInfo, VkPipeline* pPipeline) { @@ -386,12 +387,20 @@ VkResult GraphicsPipeline::CreatePipelineObjects( // Get the pipeline size from PAL and allocate memory. void* pSystemMem = nullptr; size_t palSize = 0; + pObjectCreateInfo->pipeline.pipelineBinarySize = pPipelineBinaries[DefaultDeviceIndex].codeSize; + pObjectCreateInfo->pipeline.pPipelineBinary = pPipelineBinaries[DefaultDeviceIndex].pCode; palSize = pDevice->PalDevice(DefaultDeviceIndex)->GetGraphicsPipelineSize(pObjectCreateInfo->pipeline, &palResult); VK_ASSERT(palResult == Pal::Result::Success); size_t allocationSize = sizeof(GraphicsPipeline) + (palSize * numPalDevices); + const bool storeBinaryToPipeline = (flags & VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR) != 0; + + if (storeBinaryToPipeline) + { + allocationSize += sizeof(PipelineBinaryStorage); + } pSystemMem = pDevice->AllocApiObject( pAllocator, @@ -415,6 +424,8 @@ VkResult GraphicsPipeline::CreatePipelineObjects( pPalPipeline); } + PipelineBinaryStorage* pPermBinaryStorage = nullptr; + if (result == VK_SUCCESS) { bool sampleShadingEnable = pObjectCreateInfo->flags.sampleShadingEnable; @@ -507,6 +518,15 @@ VkResult GraphicsPipeline::CreatePipelineObjects( result = PalToVkResult(palResult); + if ((result == VK_SUCCESS) && storeBinaryToPipeline) + { + size_t pipelineBinaryOffset = sizeof(GraphicsPipeline) + (palSize * numPalDevices); + pPermBinaryStorage = static_cast(Util::VoidPtrInc(pSystemMem, + pipelineBinaryOffset)); + + // Simply copy the existing allocations to the new struct. + memcpy(pPermBinaryStorage, &binaryStorage, sizeof(PipelineBinaryStorage)); + } } // On success, wrap it up in a Vulkan object. @@ -516,6 +536,7 @@ VkResult GraphicsPipeline::CreatePipelineObjects( pDevice, pPalPipeline, pPipelineLayout, + pPermBinaryStorage, pObjectCreateInfo->immedInfo, pObjectCreateInfo->staticStateMask, pObjectCreateInfo->flags, @@ -535,7 +556,7 @@ VkResult GraphicsPipeline::CreatePipelineObjects( &palPipelineHasher); *pPipeline = GraphicsPipeline::HandleFromVoidPointer(pSystemMem); - if (pDevice->GetRuntimeSettings().enableDebugPrintf) + if (pDevice->GetEnabledFeatures().enableDebugPrintf) { GraphicsPipeline* pGraphicsPipeline = static_cast(pSystemMem); pGraphicsPipeline->ClearFormatString(); @@ -759,6 +780,39 @@ VkResult GraphicsPipeline::Create( Util::MetroHash::Hash gplCacheId[GraphicsLibraryCount] = {}; uint32_t numShaderLibraries = 0; + auto pPipelineBinaryInfoKHR = extStructs.pPipelineBinaryInfoKHR; + + if (pPipelineBinaryInfoKHR != nullptr) + { + if (pPipelineBinaryInfoKHR->binaryCount > 0) + { + VK_ASSERT(pPipelineBinaryInfoKHR->binaryCount == pDevice->NumPalDevices()); + binariesProvided = true; + } + + for (uint32_t deviceIdx = 0; + deviceIdx < pPipelineBinaryInfoKHR->binaryCount; + ++deviceIdx) + { + const auto pBinary = PipelineBinary::ObjectFromHandle( + pPipelineBinaryInfoKHR->pPipelineBinaries[deviceIdx]); + + cacheId[deviceIdx] = pBinary->BinaryKey(); + pipelineBinaries[deviceIdx] = pBinary->BinaryData(); + + if (deviceIdx == DefaultDeviceIndex) + { + pDefaultCompiler->ReadBinaryMetadata( + pDevice, + pipelineBinaries[deviceIdx], + &binaryMetadata); + } + } + } + + PipelineBinaryStorage binaryStorage = {}; + const bool storeBinaryToPipeline = (flags & VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR) != 0; + VK_ASSERT(pCreateInfo->layout != VK_NULL_HANDLE); pPipelineLayout = PipelineLayout::ObjectFromHandle(pCreateInfo->layout); @@ -905,6 +959,77 @@ VkResult GraphicsPipeline::Create( } } + // 4. Store created binaries for pipeline_binary + if ((result == VK_SUCCESS) && storeBinaryToPipeline) + { + if (gplProvided) + { + for (uint32_t gplType = 0; + (gplType < GraphicsLibraryCount) && + (shaderLibraries[gplType] != nullptr) && + Util::TestAnyFlagSet(gplMask, 1 << gplType) && + (result == VK_SUCCESS); + ++gplType) + { + uint32 codeSize = 0; + + shaderLibraries[gplType]->GetCodeObject(&codeSize, nullptr); + + void* pMemory = pAllocator->pfnAllocation( + pAllocator->pUserData, + codeSize, + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); // retained in the pipeline object + + if (pMemory != nullptr) + { + shaderLibraries[gplType]->GetCodeObject(&codeSize, pMemory); + + InsertBinaryData( + &binaryStorage, + gplType, + gplCacheId[gplType], + codeSize, + pMemory); + } + else + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + } + else + { + for (uint32_t deviceIdx = 0; (deviceIdx < pDevice->NumPalDevices()) && (result == VK_SUCCESS); ++deviceIdx) + { + void* pMemory = pAllocator->pfnAllocation( + pAllocator->pUserData, + pipelineBinaries[deviceIdx].codeSize, + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); // retained in the pipeline object + + if (pMemory != nullptr) + { + memcpy( + pMemory, + pipelineBinaries[deviceIdx].pCode, + pipelineBinaries[deviceIdx].codeSize); + + InsertBinaryData( + &binaryStorage, + deviceIdx, + cacheId[deviceIdx], + pipelineBinaries[deviceIdx].codeSize, + pMemory); + } + else + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + } + } + if (result == VK_SUCCESS) { // 5. Build pipeline object create info @@ -947,9 +1072,15 @@ VkResult GraphicsPipeline::Create( pPipelineCache, cacheId, apiPsoHash, + binaryStorage, &objectCreateInfo, pPipeline); + if (result != VK_SUCCESS) + { + // Free the binaries only if we failed to create the pipeline objects. + FreeBinaryStorage(&binaryStorage, pAllocator); + } } } @@ -1195,6 +1326,7 @@ GraphicsPipeline::GraphicsPipeline( Device* const pDevice, Pal::IPipeline** pPalPipeline, const PipelineLayout* pLayout, + PipelineBinaryStorage* pBinaryStorage, const GraphicsPipelineObjectImmedInfo& immedInfo, uint64_t staticStateMask, GraphicsPipelineObjectFlags flags, @@ -1228,6 +1360,7 @@ GraphicsPipeline::GraphicsPipeline( Pipeline::Init( pPalPipeline, pLayout, + pBinaryStorage, staticStateMask, #if VKI_RAY_TRACING dispatchRaysUserDataOffset, @@ -1725,6 +1858,7 @@ void GraphicsPipeline::BindToCmdBuffer( pRenderState->msaaCreateInfo.sampleClusters = m_info.msaaCreateInfo.sampleClusters; pRenderState->msaaCreateInfo.alphaToCoverageSamples = m_info.msaaCreateInfo.alphaToCoverageSamples; pRenderState->msaaCreateInfo.occlusionQuerySamples = m_info.msaaCreateInfo.occlusionQuerySamples; + if (m_flags.customSampleLocations) { pRenderState->msaaCreateInfo.flags.enable1xMsaaSampleLocations = @@ -2003,7 +2137,8 @@ void GraphicsPipeline::BindToCmdBuffer( } if (ContainsStaticState(DynamicStatesInternal::SampleLocations) && - ContainsStaticState(DynamicStatesInternal::SampleLocationsEnable)) + ContainsStaticState(DynamicStatesInternal::SampleLocationsEnable) && + ContainsStaticState(DynamicStatesInternal::RasterizationSamples)) { if ((pRenderState->sampleLocationsEnable != m_flags.customSampleLocations) || (memcmp(&pRenderState->samplePattern, &m_info.samplePattern, sizeof(SamplePattern)) != 0)) @@ -2019,13 +2154,15 @@ void GraphicsPipeline::BindToCmdBuffer( { if (ContainsStaticState(DynamicStatesInternal::SampleLocations)) { - if (memcmp(&pRenderState->samplePattern, &m_info.samplePattern, sizeof(SamplePattern)) != 0) + if (memcmp(&pRenderState->samplePattern.locations, + &m_info.samplePattern.locations, + sizeof(Pal::MsaaQuadSamplePattern)) != 0) { - pRenderState->samplePattern = m_info.samplePattern; + pRenderState->samplePattern.locations = m_info.samplePattern.locations; pRenderState->dirtyGraphics.samplePattern = 1; } } - else if (ContainsStaticState(DynamicStatesInternal::SampleLocationsEnable)) + if (ContainsStaticState(DynamicStatesInternal::SampleLocationsEnable)) { if (pRenderState->sampleLocationsEnable != m_flags.customSampleLocations) { @@ -2033,6 +2170,14 @@ void GraphicsPipeline::BindToCmdBuffer( pRenderState->dirtyGraphics.samplePattern = 1; } } + if (ContainsStaticState(DynamicStatesInternal::RasterizationSamples)) + { + if (pRenderState->samplePattern.sampleCount != m_info.samplePattern.sampleCount) + { + pRenderState->samplePattern.sampleCount = m_info.samplePattern.sampleCount; + pRenderState->dirtyGraphics.samplePattern = 1; + } + } } // Only set the Fragment Shading Rate if the dynamic state is not set. diff --git a/icd/api/vk_graphics_pipeline_library.cpp b/icd/api/vk_graphics_pipeline_library.cpp index 6a9d4700..30cc392e 100644 --- a/icd/api/vk_graphics_pipeline_library.cpp +++ b/icd/api/vk_graphics_pipeline_library.cpp @@ -24,6 +24,7 @@ **********************************************************************************************************************/ #include "include/vk_graphics_pipeline_library.h" +#include "include/vk_pipeline_binary.h" #include "include/vk_pipeline_layout.h" #include "palVectorImpl.h" @@ -335,6 +336,26 @@ VkResult GraphicsPipelineLibrary::CreatePartialPipelineBinary( &pBinaryCreateInfo->pipelineInfo.fs, }; + auto pPipelineBinaryInfoKHR = extStructs.pPipelineBinaryInfoKHR; + PipelineBinaryInfo providedBinaries[GraphicsLibraryCount] = {}; + + if (pPipelineBinaryInfoKHR != nullptr) + { + for (uint32_t binaryIndex = 0; (binaryIndex < pPipelineBinaryInfoKHR->binaryCount); ++binaryIndex) + { + const auto pBinary = PipelineBinary::ObjectFromHandle( + pPipelineBinaryInfoKHR->pPipelineBinaries[binaryIndex]); + + // Retrieve the GraphicsLibraryType identifier from the binary + GraphicsLibraryType gplType = *static_cast(pBinary->BinaryData().pCode); + + providedBinaries[gplType].binaryHash = pBinary->BinaryKey(); + providedBinaries[gplType].pipelineBinary.codeSize = pBinary->BinaryData().codeSize; + providedBinaries[gplType].pipelineBinary.pCode = + Util::VoidPtrInc(pBinary->BinaryData().pCode, sizeof(GraphicsLibraryType)); + } + } + uint32_t gplMask = 0; for (uint32_t i = 0; i < ShaderStage::ShaderStageGfxCount; ++i) { @@ -364,6 +385,8 @@ VkResult GraphicsPipelineLibrary::CreatePartialPipelineBinary( pPipelineCache, gplType, pBinaryCreateInfo, + &providedBinaries[gplType].pipelineBinary, + &providedBinaries[gplType].binaryHash, &pTempModuleStages[i]); gplMask |= (1 << gplType); } @@ -393,6 +416,8 @@ VkResult GraphicsPipelineLibrary::CreatePartialPipelineBinary( pPipelineCache, GraphicsLibraryPreRaster, pBinaryCreateInfo, + nullptr, + nullptr, &pTempModuleStages[TempIdx]); } @@ -409,6 +434,8 @@ VkResult GraphicsPipelineLibrary::CreatePartialPipelineBinary( pPipelineCache, GraphicsLibraryFragment, pBinaryCreateInfo, + nullptr, + nullptr, &pTempModuleStages[TempIdx]); } } @@ -551,6 +578,9 @@ VkResult GraphicsPipelineLibrary::Create( &binaryCreateInfo); } + PipelineBinaryStorage binaryStorage = {}; + const bool storeBinaryToPipeline = (flags & VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR) != 0; + if (result == VK_SUCCESS) { // 5. Create partial pipeline binary for fast-link @@ -565,6 +595,53 @@ VkResult GraphicsPipelineLibrary::Create( pAllocator, tempModuleStates); + // 6. Store created binaries for pipeline_binary + if ((result == VK_SUCCESS) && storeBinaryToPipeline) + { + uint32 binaryIndex = 0; + + for (uint32_t gplType = 0; gplType < GraphicsLibraryCount; ++gplType) + { + if ((binaryCreateInfo.earlyElfPackage[gplType].codeSize != 0) && + (binaryCreateInfo.earlyElfPackage[gplType].pCode != nullptr) && + (result == VK_SUCCESS)) + { + const size_t storageSize = sizeof(GraphicsLibraryType) + + binaryCreateInfo.earlyElfPackage[gplType].codeSize; + + void* pMemory = pAllocator->pfnAllocation( + pAllocator->pUserData, + storageSize, + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); // retained in the GPL pipeline object + + if (pMemory != nullptr) + { + // Store the GraphicsLibraryType identifier with the binary + *static_cast(pMemory) = static_cast(gplType); + + memcpy( + Util::VoidPtrInc(pMemory, sizeof(GraphicsLibraryType)), + binaryCreateInfo.earlyElfPackage[gplType].pCode, + binaryCreateInfo.earlyElfPackage[gplType].codeSize); + + InsertBinaryData( + &binaryStorage, + binaryIndex, + binaryCreateInfo.earlyElfPackageHash[gplType], + storageSize, + pMemory); + + ++binaryIndex; + } + else + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + } + } + // Clean up temporary storage for (uint32_t stage = 0; stage < ShaderStage::ShaderStageGfxCount; ++stage) { @@ -585,6 +662,7 @@ VkResult GraphicsPipelineLibrary::Create( GraphicsPipelineObjectCreateInfo objectCreateInfo = {}; size_t auxiliarySize = 0; + PipelineBinaryStorage* pPermBinaryStorage = nullptr; if (result == VK_SUCCESS) { @@ -607,6 +685,11 @@ VkResult GraphicsPipelineLibrary::Create( size_t objSize = apiSize + auxiliarySize; + if (storeBinaryToPipeline) + { + objSize += sizeof(PipelineBinaryStorage); + } + // Allocate memory pSysMem = pDevice->AllocApiObject(pAllocator, objSize); @@ -621,6 +704,17 @@ VkResult GraphicsPipelineLibrary::Create( GraphicsPipelineBinaryCreateInfo* pBinInfo = DumpGraphicsPipelineBinaryCreateInfo(&binaryCreateInfo, Util::VoidPtrInc(pSysMem, apiSize), nullptr); + if (storeBinaryToPipeline) + { + size_t pipelineBinaryOffset = apiSize + auxiliarySize; + + pPermBinaryStorage = static_cast(Util::VoidPtrInc(pSysMem, + pipelineBinaryOffset)); + + // Simply copy the existing allocations to the new struct. + memcpy(pPermBinaryStorage, &binaryStorage, sizeof(PipelineBinaryStorage)); + } + VK_PLACEMENT_NEW(pSysMem) GraphicsPipelineLibrary( pDevice, objectCreateInfo, @@ -629,6 +723,7 @@ VkResult GraphicsPipelineLibrary::Create( elfHash, apiPsoHash, tempModuleStates, + pPermBinaryStorage, pPipelineLayout); *pPipeline = GraphicsPipelineLibrary::HandleFromVoidPointer(pSysMem); @@ -773,6 +868,7 @@ GraphicsPipelineLibrary::GraphicsPipelineLibrary( const Util::MetroHash::Hash& elfHash, const uint64_t apiHash, const GplModuleState* pGplModuleStates, + PipelineBinaryStorage* pBinaryStorage, const PipelineLayout* pPipelineLayout) : GraphicsPipelineCommon( #if VKI_RAY_TRACING @@ -789,6 +885,7 @@ GraphicsPipelineLibrary::GraphicsPipelineLibrary( Pipeline::Init( nullptr, pPipelineLayout, + pBinaryStorage, objectInfo.staticStateMask, #if VKI_RAY_TRACING 0, diff --git a/icd/api/vk_indirect_commands_layout.cpp b/icd/api/vk_indirect_commands_layout.cpp index a05c1799..1a49ad9e 100644 --- a/icd/api/vk_indirect_commands_layout.cpp +++ b/icd/api/vk_indirect_commands_layout.cpp @@ -235,22 +235,22 @@ void IndirectCommandsLayoutNV::BuildPalCreateInfo( Pal::IndirectParam* pIndirectParams, Pal::IndirectCmdGeneratorCreateInfo* pPalCreateInfo) { - uint32_t paramCount = 0; - uint32_t expectedOffset = 0; - uint32_t bindingArgsSize = 0; + uint32_t paramCount = 0u; + uint32_t expectedOffset = 0u; + uint32_t bindingArgsSize = 0u; bool useNativeIndexType = true; - const bool isDispatch = (pCreateInfo->pTokens[pCreateInfo->tokenCount - 1].tokenType - == VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NV); + const bool isDispatch = (pCreateInfo->pTokens[pCreateInfo->tokenCount - 1].tokenType == + VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NV); for (uint32_t i = 0; i < pCreateInfo->tokenCount; ++i) { const VkIndirectCommandsLayoutTokenNV& token = pCreateInfo->pTokens[i]; -#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 889 // Set a padding operation to handle non tightly packed indirect arguments buffers VK_ASSERT(token.offset >= expectedOffset); + if (token.offset > expectedOffset) { pIndirectParams[paramCount].type = Pal::IndirectParamType::Padding; @@ -259,7 +259,6 @@ void IndirectCommandsLayoutNV::BuildPalCreateInfo( bindingArgsSize += pIndirectParams[paramCount].sizeInBytes; paramCount++; } -#endif switch (token.tokenType) { @@ -359,9 +358,12 @@ void IndirectCommandsLayoutNV::BuildPalCreateInfo( constexpr uint32_t DxgiIndexTypeUint16 = 57; constexpr uint32_t DxgiIndexTypeUint32 = 42; - pPalCreateInfo->indexTypeTokens[0] = useNativeIndexType ? VK_INDEX_TYPE_UINT8_KHR : DxgiIndexTypeUint8; - pPalCreateInfo->indexTypeTokens[1] = useNativeIndexType ? VK_INDEX_TYPE_UINT16 : DxgiIndexTypeUint16; - pPalCreateInfo->indexTypeTokens[2] = useNativeIndexType ? VK_INDEX_TYPE_UINT32 : DxgiIndexTypeUint32; + pPalCreateInfo->indexTypeTokens[0] = useNativeIndexType ? + static_cast(VK_INDEX_TYPE_UINT8_KHR) : DxgiIndexTypeUint8; + pPalCreateInfo->indexTypeTokens[1] = useNativeIndexType ? + static_cast(VK_INDEX_TYPE_UINT16) : DxgiIndexTypeUint16; + pPalCreateInfo->indexTypeTokens[2] = useNativeIndexType ? + static_cast(VK_INDEX_TYPE_UINT32) : DxgiIndexTypeUint32; } // ===================================================================================================================== diff --git a/icd/api/vk_memory.cpp b/icd/api/vk_memory.cpp index 171c6504..a016df9b 100644 --- a/icd/api/vk_memory.cpp +++ b/icd/api/vk_memory.cpp @@ -304,7 +304,8 @@ VkResult Memory::Create( pNext = pHeader->pNext; } - // For the descriptor table VA range for descriptor buffers + // Use the descriptor table VA range for descriptor buffers because we need to program descriptors + // with a single (32-bit) user data entry and there is no such guarentee with the default VA range. if (pDevice->VkPhysicalDevice(DefaultDeviceIndex)->GetMemoryTypeMaskForDescriptorBuffers() & (1 << pAllocInfo->memoryTypeIndex)) { diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index 01d28850..bf67f464 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -726,7 +726,7 @@ void PhysicalDevice::InitializePlatformKey( // - Be a valid UUID generated using normal means // // Settings: -// - markPipelineCacheWithBuildTimestamp: decides whether to mix in __DATE__ __TIME__ from compiler to UUID +// - markPipelineCacheWithBuildTimestamp: decides whether to mix in current library BuildId from compiler to UUID // - useGlobalCacheId : decides if UUID should be portable between machines // static void GenerateCacheUuid( @@ -1667,14 +1667,7 @@ size_t PhysicalDevice::GetFeatures( pFeatures->shaderInt64 = (PalProperties().gfxipProperties.flags.support64BitInstructions ? VK_TRUE : VK_FALSE); - if (Is16BitInstructionsSupported()) - { - pFeatures->shaderInt16 = VK_TRUE; - } - else - { - pFeatures->shaderInt16 = VK_FALSE; - } + pFeatures->shaderInt16 = VK_TRUE; if (settings.optEnablePrt) { @@ -4399,12 +4392,10 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_BORDER_COLOR_SWIZZLE)); } -#if VKI_BUILD_GFX11 if ((pPhysicalDevice == nullptr) || (pPhysicalDevice->PalProperties().gfxLevel >= Pal::GfxIpLevel::GfxIp11_0)) { availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_PRIMITIVES_GENERATED_QUERY)); } -#endif if (IsKhrCooperativeMatrixSupported(pPhysicalDevice)) { @@ -4462,6 +4453,8 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_NON_SEAMLESS_CUBE_MAP)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_SHADER_MODULE_IDENTIFIER)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_PIPELINE_BINARY)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_SHADER_MAXIMAL_RECONVERGENCE)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_EXTENDED_DYNAMIC_STATE3)); @@ -5138,14 +5131,12 @@ void PhysicalDevice::GetPhysicalDeviceDotProduct8Properties( *pIntegerDotProductAccumulatingSaturating8BitUnsignedAccelerated = int8DotSupport; *pIntegerDotProductAccumulatingSaturating8BitSignedAccelerated = int8DotSupport; -#if VKI_BUILD_GFX11 if (PalProperties().gfxLevel >= Pal::GfxIpLevel::GfxIp11_0) { *pIntegerDotProduct8BitMixedSignednessAccelerated = VK_TRUE; *pIntegerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = VK_TRUE; } else -#endif { *pIntegerDotProduct8BitMixedSignednessAccelerated = VK_FALSE; *pIntegerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = VK_FALSE; @@ -5170,14 +5161,12 @@ void PhysicalDevice::GetPhysicalDeviceDotProduct4x8Properties( *pIntegerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = int8DotSupport; *pIntegerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = int8DotSupport; -#if VKI_BUILD_GFX11 - if (PalProperties().gfxLevel >= Pal::GfxIpLevel::GfxIp11_0) + if (PalProperties().gfxipProperties.flags.supportMixedSignIntDot) { *pIntegerDotProduct4x8BitPackedMixedSignednessAccelerated = VK_TRUE; *pIntegerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = VK_TRUE; } else -#endif { *pIntegerDotProduct4x8BitPackedMixedSignednessAccelerated = VK_FALSE; *pIntegerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = VK_FALSE; @@ -5195,9 +5184,7 @@ void PhysicalDevice::GetPhysicalDeviceDotProduct16Properties( ) const { const VkBool32 int16DotSupport = (Is16BitInstructionsSupported() -#if VKI_BUILD_GFX11 && (PalProperties().gfxLevel < Pal::GfxIpLevel::GfxIp11_0) -#endif #if VKI_BUILD_GFX115 && (PalProperties().gfxLevel < Pal::GfxIpLevel::GfxIp11_5) #endif @@ -5269,6 +5256,54 @@ void PhysicalDevice::GetDevicePropertiesMaxBufferSize( *pMaxBufferSize = 2u * 1024u * 1024u * 1024u; // TODO: replace with actual size } +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceLineSubPixelPrecisionBits( + uint32_t* pLineSubPixelPrecisionBits +) const +{ + *pLineSubPixelPrecisionBits = Pal::SubPixelBits; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceVertexAttributeDivisorProperties( + uint32_t* pMaxVertexAttribDivisor, + VkBool32* pSupportsNonZeroFirstInstance +) const +{ + *pMaxVertexAttribDivisor = UINT32_MAX; + *pSupportsNonZeroFirstInstance = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceMaintenance5Properties( + VkBool32* pEarlyFragmentMultisampleCoverageAfterSampleCounting, + VkBool32* pEarlyFragmentSampleMaskTestBeforeSampleCounting, + VkBool32* pDepthStencilSwizzleOneSupport, + VkBool32* pPolygonModePointSize, + VkBool32* pNonStrictSinglePixelWideLinesUseParallelogram, + VkBool32* pNonStrictWideLinesUseParallelogram +) const +{ + *pEarlyFragmentMultisampleCoverageAfterSampleCounting = VK_TRUE; + *pEarlyFragmentSampleMaskTestBeforeSampleCounting = VK_TRUE; + *pDepthStencilSwizzleOneSupport = VK_TRUE; + *pPolygonModePointSize = VK_TRUE; + *pNonStrictSinglePixelWideLinesUseParallelogram = VK_TRUE; + *pNonStrictWideLinesUseParallelogram = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceMaintenance6Properties( + VkBool32* pBlockTexelViewCompatibleMultipleLayers, + uint32_t* pMaxCombinedImageSamplerDescriptorCount, + VkBool32* pFragmentShadingRateClampCombinerInputs +) const +{ + *pBlockTexelViewCompatibleMultipleLayers = VK_TRUE; + *pMaxCombinedImageSamplerDescriptorCount = MaxCombinedImageSamplerDescriptorCount; + *pFragmentShadingRateClampCombinerInputs = VK_TRUE; +} + // ===================================================================================================================== void PhysicalDevice::GetPhysicalDeviceDriverProperties( VkDriverId* pDriverID, @@ -5544,14 +5579,7 @@ void PhysicalDevice::GetPhysicalDeviceSamplerYcbcrConversionFeatures( VkBool32* pSamplerYcbcrConversion ) const { - if (IsExtensionSupported(DeviceExtensions::KHR_SAMPLER_YCBCR_CONVERSION)) - { - *pSamplerYcbcrConversion = VK_TRUE; - } - else - { - *pSamplerYcbcrConversion = VK_FALSE; - } + *pSamplerYcbcrConversion = VK_TRUE; } // ===================================================================================================================== @@ -5723,6 +5751,23 @@ void PhysicalDevice::GetPhysicalDeviceVulkanMemoryModelFeatures( } +void PhysicalDevice::GetPhysicalDeviceLineRasterizationFeatures( + VkBool32* pRectangularLines, + VkBool32* pBresenhamLines, + VkBool32* pSmoothLines, + VkBool32* pStippledRectangularLines, + VkBool32* pStippledBresenhamLines, + VkBool32* pStippledSmoothLines +) const +{ + *pRectangularLines = VK_FALSE; + *pBresenhamLines = VK_TRUE; + *pSmoothLines = VK_FALSE; + *pStippledRectangularLines = VK_FALSE; + *pStippledBresenhamLines = VK_TRUE; + *pStippledSmoothLines = VK_FALSE; +} + // ===================================================================================================================== // Retrieve device feature support. Called in response to vkGetPhysicalDeviceFeatures2 // NOTE: Don't memset here. Otherwise, VerifyRequestedPhysicalDeviceFeatures needs to compare member by member @@ -6230,13 +6275,12 @@ size_t PhysicalDevice::GetFeatures2( if (updateFeatures) { - pExtInfo->rectangularLines = VK_FALSE; - pExtInfo->bresenhamLines = VK_TRUE; - pExtInfo->smoothLines = VK_FALSE; - - pExtInfo->stippledRectangularLines = VK_FALSE; - pExtInfo->stippledBresenhamLines = VK_TRUE; - pExtInfo->stippledSmoothLines = VK_FALSE; + GetPhysicalDeviceLineRasterizationFeatures(&pExtInfo->rectangularLines, + &pExtInfo->bresenhamLines, + &pExtInfo->smoothLines, + &pExtInfo->stippledRectangularLines, + &pExtInfo->stippledBresenhamLines, + &pExtInfo->stippledSmoothLines); } structSize = sizeof(*pExtInfo); @@ -6851,7 +6895,6 @@ size_t PhysicalDevice::GetFeatures2( structSize = sizeof(*pExtInfo); break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_FEATURES_EXT: { auto* pExtInfo = reinterpret_cast(pHeader); @@ -7470,6 +7513,19 @@ size_t PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_BINARY_FEATURES_KHR: + { + auto* pExtInfo = reinterpret_cast(pHeader); + + if (updateFeatures) + { + pExtInfo->pipelineBinaries = VK_TRUE; + } + + structSize = sizeof(*pExtInfo); + break; + } + default: { // skip any unsupported extension structures @@ -7973,7 +8029,8 @@ void PhysicalDevice::GetDeviceProperties2( { auto* pProps = static_cast(pNext); - pProps->lineSubPixelPrecisionBits = Pal::SubPixelBits; + GetPhysicalDeviceLineSubPixelPrecisionBits( + &pProps->lineSubPixelPrecisionBits); break; } @@ -8264,25 +8321,26 @@ void PhysicalDevice::GetDeviceProperties2( pProps->samplerCaptureReplayDescriptorDataSize = sizeof(uint32_t); pProps->accelerationStructureCaptureReplayDescriptorDataSize = sizeof(uint32_t); - VK_ASSERT(palProps.gfxipProperties.srdSizes.sampler <= 32); - VK_ASSERT(palProps.gfxipProperties.srdSizes.imageView <= 64); - VK_ASSERT(palProps.gfxipProperties.srdSizes.bufferView <= 64); + VK_ASSERT(palProps.gfxipProperties.srdSizes.sampler <= 32); + VK_ASSERT(palProps.gfxipProperties.srdSizes.imageView <= 64); + VK_ASSERT(palProps.gfxipProperties.srdSizes.typedBufferView <= 64); + VK_ASSERT(palProps.gfxipProperties.srdSizes.untypedBufferView <= 64); pProps->samplerDescriptorSize = palProps.gfxipProperties.srdSizes.sampler; pProps->combinedImageSamplerDescriptorSize = palProps.gfxipProperties.srdSizes.sampler + palProps.gfxipProperties.srdSizes.imageView; pProps->sampledImageDescriptorSize = palProps.gfxipProperties.srdSizes.imageView; pProps->storageImageDescriptorSize = palProps.gfxipProperties.srdSizes.imageView; - pProps->uniformTexelBufferDescriptorSize = palProps.gfxipProperties.srdSizes.bufferView; - pProps->robustUniformTexelBufferDescriptorSize = palProps.gfxipProperties.srdSizes.bufferView; - pProps->storageTexelBufferDescriptorSize = palProps.gfxipProperties.srdSizes.bufferView; - pProps->robustStorageTexelBufferDescriptorSize = palProps.gfxipProperties.srdSizes.bufferView; - pProps->uniformBufferDescriptorSize = palProps.gfxipProperties.srdSizes.bufferView; - pProps->robustUniformBufferDescriptorSize = palProps.gfxipProperties.srdSizes.bufferView; - pProps->storageBufferDescriptorSize = palProps.gfxipProperties.srdSizes.bufferView; - pProps->robustStorageBufferDescriptorSize = palProps.gfxipProperties.srdSizes.bufferView; + pProps->uniformTexelBufferDescriptorSize = palProps.gfxipProperties.srdSizes.typedBufferView; + pProps->robustUniformTexelBufferDescriptorSize = palProps.gfxipProperties.srdSizes.typedBufferView; + pProps->storageTexelBufferDescriptorSize = palProps.gfxipProperties.srdSizes.typedBufferView; + pProps->robustStorageTexelBufferDescriptorSize = palProps.gfxipProperties.srdSizes.typedBufferView; + pProps->uniformBufferDescriptorSize = palProps.gfxipProperties.srdSizes.untypedBufferView; + pProps->robustUniformBufferDescriptorSize = palProps.gfxipProperties.srdSizes.untypedBufferView; + pProps->storageBufferDescriptorSize = palProps.gfxipProperties.srdSizes.untypedBufferView; + pProps->robustStorageBufferDescriptorSize = palProps.gfxipProperties.srdSizes.untypedBufferView; pProps->inputAttachmentDescriptorSize = palProps.gfxipProperties.srdSizes.imageView; - pProps->accelerationStructureDescriptorSize = palProps.gfxipProperties.srdSizes.bufferView; + pProps->accelerationStructureDescriptorSize = palProps.gfxipProperties.srdSizes.untypedBufferView; pProps->maxSamplerDescriptorBufferRange = UINT_MAX; pProps->maxResourceDescriptorBufferRange = UINT_MAX; pProps->resourceDescriptorBufferAddressSpaceSize = UINT_MAX; @@ -8334,9 +8392,9 @@ void PhysicalDevice::GetDeviceProperties2( { auto* pProps = static_cast(pNext); - pProps->blockTexelViewCompatibleMultipleLayers = VK_TRUE; - pProps->maxCombinedImageSamplerDescriptorCount = MaxCombinedImageSamplerDescriptorCount; - pProps->fragmentShadingRateClampCombinerInputs = VK_TRUE; + GetPhysicalDeviceMaintenance6Properties(&pProps->blockTexelViewCompatibleMultipleLayers, + &pProps->maxCombinedImageSamplerDescriptorCount, + &pProps->fragmentShadingRateClampCombinerInputs); break; } @@ -8424,13 +8482,11 @@ void PhysicalDevice::GetDeviceProperties2( pProps->maxMeshOutputVertices = 256; pProps->maxMeshOutputPrimitives = 256; - #if VKI_BUILD_GFX11 if (palProps.gfxLevel >= Pal::GfxIpLevel::GfxIp11_0) { pProps->maxMeshOutputLayers = m_limits.maxFramebufferLayers; } else -#endif { pProps->maxMeshOutputLayers = 8; } @@ -8487,8 +8543,8 @@ void PhysicalDevice::GetDeviceProperties2( { auto* pProps = static_cast(pNext); - pProps->maxVertexAttribDivisor = UINT32_MAX; - pProps->supportsNonZeroFirstInstance = VK_TRUE; + GetPhysicalDeviceVertexAttributeDivisorProperties(&pProps->maxVertexAttribDivisor, + &pProps->supportsNonZeroFirstInstance); break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_PROPERTIES_EXT: @@ -8513,6 +8569,27 @@ void PhysicalDevice::GetDeviceProperties2( } #endif + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_BINARY_PROPERTIES_KHR: + { + auto* pProps = static_cast(pNext); + pProps->pipelineBinaryInternalCache = VK_TRUE; + pProps->pipelineBinaryInternalCacheControl = VK_FALSE; + pProps->pipelineBinaryPrefersInternalCache = VK_FALSE; + pProps->pipelineBinaryCompressedData = VK_FALSE; + + if ((getenv(PipelineBinaryCache::EnvVarPath) != nullptr) || + (getenv(PipelineBinaryCache::EnvVarReadOnlyFileName) != nullptr)) + { + pProps->pipelineBinaryPrecompiledInternalCache = VK_TRUE; + } + else + { + pProps->pipelineBinaryPrecompiledInternalCache = VK_FALSE; + } + + break; + } + #if VKI_COPY_MEMORY_INDIRECT case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COPY_MEMORY_INDIRECT_PROPERTIES_KHR: { @@ -8816,21 +8893,9 @@ static void VerifyLimits( VK_ASSERT(limits.maxImageDimension3D >= 256); VK_ASSERT(limits.maxImageDimensionCube >= 4096); VK_ASSERT(limits.maxImageArrayLayers >= 256); - VK_ASSERT(limits.maxTexelBufferElements >= 65536); VK_ASSERT(limits.maxUniformBufferRange >= 16384); - VK_ASSERT(limits.maxStorageBufferRange >= (1UL << 27)); - VK_ASSERT(limits.maxPushConstantsSize >= 128); - VK_ASSERT(limits.maxMemoryAllocationCount >= 4096); - VK_ASSERT(limits.maxSamplerAllocationCount >= 4000); VK_ASSERT(limits.bufferImageGranularity <= 131072); - VK_ASSERT(limits.sparseAddressSpaceSize >= (features.sparseBinding ? (1ULL << 31) : 0)); - VK_ASSERT(limits.maxBoundDescriptorSets >= 4); - VK_ASSERT(limits.maxPerStageDescriptorSamplers >= 16); VK_ASSERT(limits.maxPerStageDescriptorUniformBuffers >= 12); - VK_ASSERT(limits.maxPerStageDescriptorStorageBuffers >= 4); - VK_ASSERT(limits.maxPerStageDescriptorSampledImages >= 16); - VK_ASSERT(limits.maxPerStageDescriptorStorageImages >= 4); - VK_ASSERT(limits.maxPerStageDescriptorInputAttachments >= 4); const uint64_t reqMaxPerStageResources = Util::Min( static_cast(limits.maxPerStageDescriptorUniformBuffers) + @@ -8841,14 +8906,59 @@ static void VerifyLimits( static_cast(limits.maxColorAttachments), static_cast(128)); + VK_ASSERT(limits.maxDescriptorSetUniformBuffers >= 72); + VK_ASSERT(limits.maxDescriptorSetStorageBuffers >= 24); + VK_ASSERT(limits.maxDescriptorSetStorageImages >= 24); + VK_ASSERT(limits.maxFragmentCombinedOutputResources >= 4); + VK_ASSERT(limits.maxComputeWorkGroupInvocations >= 128); + VK_ASSERT(limits.maxComputeWorkGroupSize[0] >= 128); + VK_ASSERT(limits.maxComputeWorkGroupSize[1] >= 128); + VK_ASSERT(limits.maxComputeWorkGroupSize[2] >= 64); + VK_ASSERT(limits.subTexelPrecisionBits >= 4); + VK_ASSERT(limits.mipmapPrecisionBits >= 4); + VK_ASSERT(limits.maxSamplerLodBias >= 2); + VK_ASSERT(limits.maxBoundDescriptorSets >= 4); + VK_ASSERT(limits.maxColorAttachments >= 4); + VK_ASSERT(limits.maxPushConstantsSize >= 128); + + if (features.largePoints) + { + VK_ASSERT(limits.pointSizeRange[0] <= 1.0f); + VK_ASSERT(limits.pointSizeRange[1] >= 64.0f - limits.pointSizeGranularity); + + VK_ASSERT(limits.pointSizeGranularity <= 1.0f); + } + else + { + VK_ASSERT(limits.pointSizeRange[0] == 1.0f); + VK_ASSERT(limits.pointSizeRange[1] == 1.0f); + VK_ASSERT(limits.pointSizeGranularity == 0.0f); + } + + if (features.wideLines) + { + VK_ASSERT(limits.lineWidthGranularity <= 1.0f); + } + else + { + VK_ASSERT(limits.lineWidthGranularity == 0.0f); + } + VK_ASSERT(limits.maxTexelBufferElements >= 65536); + VK_ASSERT(limits.maxStorageBufferRange >= (1UL << 27)); + VK_ASSERT(limits.maxMemoryAllocationCount >= 4096); + VK_ASSERT(limits.maxSamplerAllocationCount >= 4000); + VK_ASSERT(limits.sparseAddressSpaceSize >= (features.sparseBinding ? (1ULL << 31) : 0)); + VK_ASSERT(limits.maxPerStageDescriptorSamplers >= 16); + VK_ASSERT(limits.maxPerStageDescriptorStorageBuffers >= 4); + VK_ASSERT(limits.maxPerStageDescriptorSampledImages >= 16); + VK_ASSERT(limits.maxPerStageDescriptorStorageImages >= 4); + VK_ASSERT(limits.maxPerStageDescriptorInputAttachments >= 4); + VK_ASSERT(limits.maxPerStageResources >= reqMaxPerStageResources); VK_ASSERT(limits.maxDescriptorSetSamplers >= 96); - VK_ASSERT(limits.maxDescriptorSetUniformBuffers >= 72); VK_ASSERT(limits.maxDescriptorSetUniformBuffersDynamic >= 8); - VK_ASSERT(limits.maxDescriptorSetStorageBuffers >= 24); VK_ASSERT(limits.maxDescriptorSetStorageBuffersDynamic >= 4); VK_ASSERT(limits.maxDescriptorSetSampledImages >= 96); - VK_ASSERT(limits.maxDescriptorSetStorageImages >= 24); VK_ASSERT(limits.maxDescriptorSetInputAttachments >= 4); VK_ASSERT(limits.maxVertexInputAttributes >= 16); VK_ASSERT(limits.maxVertexInputBindings >= 16); @@ -8914,18 +9024,11 @@ static void VerifyLimits( VK_ASSERT(limits.maxFragmentDualSrcAttachments == 0); } - VK_ASSERT(limits.maxFragmentCombinedOutputResources >= 4); VK_ASSERT(limits.maxComputeSharedMemorySize >= 16384); VK_ASSERT(limits.maxComputeWorkGroupCount[0] >= 65535); VK_ASSERT(limits.maxComputeWorkGroupCount[1] >= 65535); VK_ASSERT(limits.maxComputeWorkGroupCount[2] >= 65535); - VK_ASSERT(limits.maxComputeWorkGroupInvocations >= 128); - VK_ASSERT(limits.maxComputeWorkGroupSize[0] >= 128); - VK_ASSERT(limits.maxComputeWorkGroupSize[1] >= 128); - VK_ASSERT(limits.maxComputeWorkGroupSize[2] >= 64); VK_ASSERT(limits.subPixelPrecisionBits >= 4); - VK_ASSERT(limits.subTexelPrecisionBits >= 4); - VK_ASSERT(limits.mipmapPrecisionBits >= 4); VK_ASSERT(features.fullDrawIndexUint32); @@ -8947,8 +9050,6 @@ static void VerifyLimits( VK_ASSERT(limits.maxDrawIndirectCount == 1); } - VK_ASSERT(limits.maxSamplerLodBias >= 2); - VK_ASSERT(features.samplerAnisotropy); if (features.samplerAnisotropy) @@ -9030,7 +9131,6 @@ static void VerifyLimits( VK_ASSERT(limits.framebufferStencilSampleCounts & VK_SAMPLE_COUNT_4_BIT); VK_ASSERT(limits.framebufferNoAttachmentsSampleCounts & VK_SAMPLE_COUNT_1_BIT); VK_ASSERT(limits.framebufferNoAttachmentsSampleCounts & VK_SAMPLE_COUNT_4_BIT); - VK_ASSERT(limits.maxColorAttachments >= 4); VK_ASSERT(limits.sampledImageColorSampleCounts & VK_SAMPLE_COUNT_1_BIT); VK_ASSERT(limits.sampledImageColorSampleCounts & VK_SAMPLE_COUNT_4_BIT); VK_ASSERT(limits.sampledImageIntegerSampleCounts & VK_SAMPLE_COUNT_1_BIT); @@ -9079,21 +9179,6 @@ static void VerifyLimits( VK_ASSERT(limits.discreteQueuePriorities >= 2); - VK_ASSERT(features.largePoints); - - if (features.largePoints) - { - const float ULP = limits.pointSizeGranularity; - - VK_ASSERT(limits.pointSizeRange[0] <= 1.0f); - VK_ASSERT(limits.pointSizeRange[1] >= 64.0f - limits.pointSizeGranularity); - } - else - { - VK_ASSERT(limits.pointSizeRange[0] == 1.0f); - VK_ASSERT(limits.pointSizeRange[1] == 1.0f); - } - VK_ASSERT(features.wideLines); if (features.wideLines) @@ -9109,24 +9194,6 @@ static void VerifyLimits( VK_ASSERT(limits.lineWidthRange[1] == 1.0f); } - if (features.largePoints) - { - VK_ASSERT(limits.pointSizeGranularity <= 1.0f); - } - else - { - VK_ASSERT(limits.pointSizeGranularity == 0.0f); - } - - if (features.wideLines) - { - VK_ASSERT(limits.lineWidthGranularity <= 1.0f); - } - else - { - VK_ASSERT(limits.lineWidthGranularity == 0.0f); - } - VK_ASSERT(limits.nonCoherentAtomSize >= 128); } @@ -9322,6 +9389,7 @@ static void VerifyExtensions( && dev.IsExtensionSupported(DeviceExtensions::KHR_SYNCHRONIZATION2) && dev.IsExtensionSupported(DeviceExtensions::KHR_ZERO_INITIALIZE_WORKGROUP_MEMORY)); } + } // ===================================================================================================================== @@ -9482,10 +9550,14 @@ VkResult PhysicalDevice::GetDisplayModeProperties( properties[i].displayMode = reinterpret_cast(pDisplayMode); properties[i].parameters.visibleRegion.width = pScreenMode[i]->extent.width; properties[i].parameters.visibleRegion.height = pScreenMode[i]->extent.height; - // The refresh rate returned by pal is HZ. // Spec requires refresh rate to be "the number of times the display is refreshed each second // multiplied by 1000", in other words, HZ * 1000 +#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 894 properties[i].parameters.refreshRate = pScreenMode[i]->refreshRate * 1000; +#else + properties[i].parameters.refreshRate = + pScreenMode[i]->refreshRate.numerator * 1000 / pScreenMode[i]->refreshRate.denominator; +#endif } *pPropertyCount = loopCount; @@ -9549,7 +9621,12 @@ VkResult PhysicalDevice::CreateDisplayMode( // The modes are considered as identical if the dimension as well as the refresh rate are the same. if ((pCreateInfo->parameters.visibleRegion.width == pScreenMode[i]->extent.width) && (pCreateInfo->parameters.visibleRegion.height == pScreenMode[i]->extent.height) && +#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 894 (pCreateInfo->parameters.refreshRate == pScreenMode[i]->refreshRate * 1000)) +#else + (pCreateInfo->parameters.refreshRate == + pScreenMode[i]->refreshRate.numerator * 1000 / pScreenMode[i]->refreshRate.denominator)) +#endif { isValidMode = true; break; @@ -9580,7 +9657,12 @@ VkResult PhysicalDevice::CreateDisplayMode( { pNewMode->palScreenMode.extent.width = pCreateInfo->parameters.visibleRegion.width; pNewMode->palScreenMode.extent.height = pCreateInfo->parameters.visibleRegion.height; - pNewMode->palScreenMode.refreshRate = pCreateInfo->parameters.refreshRate; +#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 894 + pNewMode->palScreenMode.refreshRate = pCreateInfo->parameters.refreshRate / 1000; +#else + pNewMode->palScreenMode.refreshRate.numerator = pCreateInfo->parameters.refreshRate; + pNewMode->palScreenMode.refreshRate.denominator = 1000; +#endif pNewMode->palScreenMode.flags.u32All = 0; pNewMode->pScreen = pScreen; *pMode = reinterpret_cast(pNewMode); @@ -9781,8 +9863,8 @@ VkResult PhysicalDevice::GetPhysicalDeviceCooperativeMatrixPropertiesKHR( if (IsKhrCooperativeMatrixSupported(this)) { - constexpr uint32_t totalCount = CooperativeMatrixTypesCount + CooperativeMatrixSaturatingTypesCount; - + const uint32_t basicTypeCount = CooperativeMatrixTypesCount + CooperativeMatrixSaturatingTypesCount; + uint32_t totalCount = basicTypeCount; if (pProperties == nullptr) { *pPropertyCount = totalCount; @@ -9798,17 +9880,24 @@ VkResult PhysicalDevice::GetPhysicalDeviceCooperativeMatrixPropertiesKHR( for (uint32_t i = 0; i < *pPropertyCount; ++i) { - const bool sat = (i >= CooperativeMatrixTypesCount); - const uint32_t n = sat ? i - CooperativeMatrixTypesCount : i; - const CooperativeMatrixType* types = sat ? CooperativeMatrixSaturatingTypes : CooperativeMatrixTypes; - + bool sat = false; + const CooperativeMatrixType* pType = nullptr; + if (i < CooperativeMatrixTypesCount) + { + pType = CooperativeMatrixTypes + i; + } + else if (i < basicTypeCount) + { + sat = true; + pType = CooperativeMatrixSaturatingTypes + i - CooperativeMatrixTypesCount; + } pProperties[i].MSize = CooperativeMatrixDimension; pProperties[i].NSize = CooperativeMatrixDimension; pProperties[i].KSize = CooperativeMatrixDimension; - pProperties[i].AType = types[n].a; - pProperties[i].BType = types[n].b; - pProperties[i].CType = types[n].c; - pProperties[i].ResultType = types[n].c; + pProperties[i].AType = pType->a; + pProperties[i].BType = pType->b; + pProperties[i].CType = pType->c; + pProperties[i].ResultType = pType->c; pProperties[i].scope = VK_SCOPE_SUBGROUP_KHR; pProperties[i].saturatingAccumulation = sat ? VK_TRUE : VK_FALSE; } diff --git a/icd/api/vk_pipeline.cpp b/icd/api/vk_pipeline.cpp index 6c250699..f9134898 100644 --- a/icd/api/vk_pipeline.cpp +++ b/icd/api/vk_pipeline.cpp @@ -101,6 +101,69 @@ static_assert(VK_ARRAY_SIZE(HwStageNames) == static_cast(Util::Abi::Ha // the vkGetPipelineExecutableStatisticsKHR function static constexpr uint32_t ExecutableStatisticsCount = 5; +// ===================================================================================================================== +// Add binary data to this storage. +// To avoid redundant copies and memory allocation, it's expected that the calling code will allocate and prepare +// the binary. A Vulkan allocator must be used to allocate the memory at pData pointer. +// PipelineBinaryStorage will take ownership of the pointer and later free it in Free() call. +void Pipeline::InsertBinaryData( + PipelineBinaryStorage* pBinaryStorage, + const uint32 binaryIndex, + const Util::MetroHash::Hash& key, + const size_t dataSize, + const void* pData) +{ + VK_ASSERT(pBinaryStorage != nullptr); + VK_ASSERT(binaryIndex < VK_ARRAY_SIZE(pBinaryStorage->binaryInfo)); + // Expect that each entry is added only once + VK_ASSERT((pBinaryStorage->binaryInfo[binaryIndex].binaryHash.qwords[0] == 0) && + (pBinaryStorage->binaryInfo[binaryIndex].binaryHash.qwords[1] == 0)); + + pBinaryStorage->binaryInfo[binaryIndex].binaryHash = key; + pBinaryStorage->binaryInfo[binaryIndex].pipelineBinary.codeSize = dataSize; + pBinaryStorage->binaryInfo[binaryIndex].pipelineBinary.pCode = pData; + + ++pBinaryStorage->binaryCount; +} + +// ===================================================================================================================== +// Frees the previously inserted pipeline binaries. +VkResult Pipeline::FreeBinaryStorage( + const VkAllocationCallbacks* pAllocator) +{ + VkResult result = VK_SUCCESS; + + if (m_pBinaryStorage != nullptr) + { + Pipeline::FreeBinaryStorage(m_pBinaryStorage, pAllocator); + m_pBinaryStorage = nullptr; + } + else + { + result = VK_ERROR_UNKNOWN; + } + + return result; +} + +// ===================================================================================================================== +// Frees the pipeline binaries. +void Pipeline::FreeBinaryStorage( + PipelineBinaryStorage* pBinaryStorage, + const VkAllocationCallbacks* pAllocator) +{ + VK_ASSERT(pBinaryStorage != nullptr); + + for (uint32_t binaryIndex = 0; binaryIndex < VK_ARRAY_SIZE(pBinaryStorage->binaryInfo); ++binaryIndex) + { + if (pBinaryStorage->binaryInfo[binaryIndex].pipelineBinary.pCode != nullptr) + { + auto pMemory = const_cast(pBinaryStorage->binaryInfo[binaryIndex].pipelineBinary.pCode); + pAllocator->pfnFree(pAllocator->pUserData, pMemory); + } + } +} + // ===================================================================================================================== // Filter VkPipelineCreateFlags2KHR to only values used for pipeline caching VkPipelineCreateFlags2KHR Pipeline::GetCacheIdControlFlags( @@ -317,7 +380,7 @@ VkResult Pipeline::BuildShaderStageInfo( // creation of pipeline. VK_ASSERT(pTempModules != nullptr); - VkShaderModuleCreateFlags flags = 0; + ShaderModuleFlags flags = 0; Vkgc::BinaryData shaderBinary = {}; Pal::ShaderHash codeHash = {}; PipelineCreationFeedback* pShaderFeedback = (pFeedbacks == nullptr) ? nullptr : pFeedbacks + outIdx; @@ -335,7 +398,7 @@ VkResult Pipeline::BuildShaderStageInfo( if (pShaderModuleCreateInfo != nullptr) { - flags = pShaderModuleCreateInfo->flags; + flags = ShaderModule::ConvertVkShaderModuleCreateFlags(pShaderModuleCreateInfo->flags); shaderBinary.codeSize = pShaderModuleCreateInfo->codeSize; shaderBinary.pCode = pShaderModuleCreateInfo->pCode; @@ -347,8 +410,7 @@ VkResult Pipeline::BuildShaderStageInfo( { result = pCompiler->BuildShaderModule( pDevice, - flags, - VK_INTERNAL_SHADER_FLAGS_FORCE_UNCACHED_BIT, + flags | ShaderModuleForceUncached, shaderBinary, &pTempModules[outIdx]); @@ -435,6 +497,11 @@ void Pipeline::HandleExtensionStructs( static_cast(pNext); break; } + case VK_STRUCTURE_TYPE_PIPELINE_BINARY_INFO_KHR: + { + pExtStructs->pPipelineBinaryInfoKHR = static_cast(pNext); + break; + } default: break; } @@ -460,6 +527,7 @@ Pipeline::Pipeline( m_hasRayTracing(hasRayTracing), m_dispatchRaysUserDataOffset(0), #endif + m_pBinaryStorage(nullptr), m_pFormatStrings(nullptr) { memset(m_pPalPipeline, 0, sizeof(m_pPalPipeline)); @@ -468,6 +536,7 @@ Pipeline::Pipeline( void Pipeline::Init( Pal::IPipeline** pPalPipeline, const PipelineLayout* pLayout, + PipelineBinaryStorage* pBinaryStorage, uint64_t staticStateMask, #if VKI_RAY_TRACING uint32_t dispatchRaysUserDataOffset, @@ -475,6 +544,7 @@ void Pipeline::Init( const Util::MetroHash::Hash& cacheHash, uint64_t apiHash) { + m_pBinaryStorage = pBinaryStorage; m_staticStateMask = staticStateMask; m_cacheHash = cacheHash; m_apiHash = apiHash; @@ -523,6 +593,11 @@ VkResult Pipeline::Destroy( m_pPalPipeline[deviceIdx]->Destroy(); } + if (m_pBinaryStorage != nullptr) + { + FreeBinaryStorage(m_pBinaryStorage, pAllocator); + } + if (m_pFormatStrings != nullptr) { Util::Destructor(m_pFormatStrings); diff --git a/icd/api/vk_pipeline_binary.cpp b/icd/api/vk_pipeline_binary.cpp new file mode 100644 index 00000000..699959b0 --- /dev/null +++ b/icd/api/vk_pipeline_binary.cpp @@ -0,0 +1,558 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ + +#include "include/vk_pipeline_binary.h" +#include "include/vk_device.h" +#include "include/vk_pipeline.h" +#include "include/vk_compute_pipeline.h" +#include "include/vk_graphics_pipeline.h" +#include "include/graphics_pipeline_common.h" +#if VKI_RAY_TRACING +#include "raytrace/vk_ray_tracing_pipeline.h" +#endif + +#include "palPlatformKey.h" + +namespace vk +{ +// ===================================================================================================================== +PipelineBinary::PipelineBinary( + const Util::MetroHash::Hash& binaryKey, + const Vkgc::BinaryData& binaryData) + : + m_binaryKey(binaryKey), + m_binaryData(binaryData) +{ +} + +// ===================================================================================================================== +// Create a pipeline binary object. +VkResult PipelineBinary::Create( + Device* pDevice, + const Util::MetroHash::Hash& binaryKey, + const Vkgc::BinaryData& binaryData, + const VkAllocationCallbacks* pAllocator, + VkPipelineBinaryKHR* pPipelineBinary) +{ + VK_ASSERT(pPipelineBinary != nullptr); + + VkResult result = VK_SUCCESS; + + PipelineBinary* pObject = nullptr; + uint8_t* pCode = nullptr; + + auto placement = utils::PlacementHelper<2>( + nullptr, + utils::PlacementElement{&pObject}, + utils::PlacementElement {&pCode, binaryData.codeSize}); + + void* pMemory = pDevice->AllocApiObject(pAllocator, placement.SizeOf()); + + if (pMemory == nullptr) + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + *pPipelineBinary = VK_NULL_HANDLE; + } + else + { + placement.FixupPtrs(pMemory); + VK_ASSERT(pObject == pMemory); + + memcpy(pCode, binaryData.pCode, binaryData.codeSize); + + Vkgc::BinaryData objectBinaryData + { + .codeSize = binaryData.codeSize, + .pCode = pCode + }; + + VK_PLACEMENT_NEW(pObject) PipelineBinary(binaryKey, objectBinaryData); + + *pPipelineBinary = PipelineBinary::HandleFromVoidPointer(pObject); + } + + return result; +} + +// ===================================================================================================================== +VkResult PipelineBinary::CreatePipelineBinaries( + Device* pDevice, + const VkPipelineBinaryCreateInfoKHR* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipelineBinaryHandlesInfoKHR* pBinaries) +{ + VK_ASSERT((pCreateInfo != nullptr) && (pBinaries != nullptr)); + + VkResult finalResult = VK_SUCCESS; + + if (pCreateInfo->pKeysAndDataInfo != nullptr) + { + if (pBinaries->pPipelineBinaries == nullptr) + { + pBinaries->pipelineBinaryCount = pCreateInfo->pKeysAndDataInfo->binaryCount; + } + else + { + uint32 binariesCopiedCount = 0; + + for (uint32_t binaryIndex = 0; + (binaryIndex < pCreateInfo->pKeysAndDataInfo->binaryCount); + ++binaryIndex) + { + Util::MetroHash::Hash binaryKey = {}; + ReadFromPipelineBinaryKey(pCreateInfo->pKeysAndDataInfo->pPipelineBinaryKeys[binaryIndex], &binaryKey); + + const auto binaryData = Vkgc::BinaryData + { + .codeSize = pCreateInfo->pKeysAndDataInfo->pPipelineBinaryData[binaryIndex].dataSize, + .pCode = pCreateInfo->pKeysAndDataInfo->pPipelineBinaryData[binaryIndex].pData + }; + + VkResult result = PipelineBinary::Create( + pDevice, + binaryKey, + binaryData, + pAllocator, + &pBinaries->pPipelineBinaries[binaryIndex]); + + if (result == VK_SUCCESS) + { + ++binariesCopiedCount; + } + else if (finalResult == VK_SUCCESS) + { + // Keep the first failed result, but attempt to create the remaining pipeline binaries + finalResult = result; + } + } + + pBinaries->pipelineBinaryCount = binariesCopiedCount; + + } + } + else if (pCreateInfo->pipeline != VK_NULL_HANDLE) + { + const auto pBinaryStorage = Pipeline::BaseObjectFromHandle(pCreateInfo->pipeline)->GetBinaryStorage(); + + if (pBinaryStorage != nullptr) + { + if (pBinaries->pPipelineBinaries == nullptr) + { + pBinaries->pipelineBinaryCount = pBinaryStorage->binaryCount; + } + else + { + uint32 binariesCopiedCount = 0; + + for (uint32_t binaryIndex = 0; + (binaryIndex < pBinaries->pipelineBinaryCount); + ++binaryIndex) + { + VkResult result = PipelineBinary::Create( + pDevice, + pBinaryStorage->binaryInfo[binaryIndex].binaryHash, + pBinaryStorage->binaryInfo[binaryIndex].pipelineBinary, + pAllocator, + &pBinaries->pPipelineBinaries[binaryIndex]); + + if (result == VK_SUCCESS) + { + ++binariesCopiedCount; + } + else if (finalResult == VK_SUCCESS) + { + // Keep the first failed result, but attempt to create the remaining pipeline binaries + finalResult = result; + } + } + + pBinaries->pipelineBinaryCount = binariesCopiedCount; + } + } + else + { + // Pipeline didn't enable VK_PIPELINE_CREATE_2_CAPTURE_DATA_BIT_KHR. + finalResult = VK_ERROR_INITIALIZATION_FAILED; + } + } + else if (pCreateInfo->pPipelineCreateInfo != nullptr) + { + // Generate the key for the provided pipeline create info + VkPipelineBinaryKeyKHR binaryKey = {}; + PipelineBinary::GetPipelineKey(pDevice, pCreateInfo->pPipelineCreateInfo, &binaryKey); + + // Query the pipeline binary cache using the generated key. + bool isUserCacheHit = false; + bool isInternalCacheHit = false; + Util::MetroHash::Hash key = {}; + Vkgc::BinaryData pipelineBinary = {}; + FreeCompilerBinary freeCompilerBinary = FreeWithCompiler; + + ReadFromPipelineBinaryKey(binaryKey, &key); + + Util::Result cacheResult = pDevice->GetCompiler(DefaultDeviceIndex)->GetCachedPipelineBinary( + &key, + nullptr, // pPipelineBinaryCache + &pipelineBinary, + &isUserCacheHit, + &isInternalCacheHit, + &freeCompilerBinary, + nullptr); // pPipelineFeedback + + if (cacheResult == Util::Result::Success) + { + if (pBinaries->pPipelineBinaries == nullptr) + { + // Cached binaries are monolithic, not GPL libraries + pBinaries->pipelineBinaryCount = pDevice->NumPalDevices(); + } + else + { + uint32 binariesCopiedCount = 0; + + for (uint32_t binaryIndex = 0; + (binaryIndex < pBinaries->pipelineBinaryCount); + ++binaryIndex) + { + VkResult result = PipelineBinary::Create( + pDevice, + key, + pipelineBinary, + pAllocator, + &pBinaries->pPipelineBinaries[binaryIndex]); + + if (result == VK_SUCCESS) + { + ++binariesCopiedCount; + } + else if (finalResult == VK_SUCCESS) + { + // Keep the first failed result, but attempt to create the remaining pipeline binaries + finalResult = result; + } + } + + pBinaries->pipelineBinaryCount = binariesCopiedCount; + } + } + else + { + finalResult = VK_PIPELINE_BINARY_MISSING_KHR; + } + } + else + { + finalResult = VK_ERROR_INITIALIZATION_FAILED; + VK_NEVER_CALLED(); + } + + return finalResult; +} + +// ===================================================================================================================== +VkResult PipelineBinary::DestroyPipelineBinary( + Device* pDevice, + const VkAllocationCallbacks* pAllocator) +{ + Util::Destructor(this); + + pDevice->FreeApiObject(pAllocator, this); + + return VK_SUCCESS; +} + +// ===================================================================================================================== +VkResult PipelineBinary::GetPipelineKey( + const Device* pDevice, + const VkPipelineCreateInfoKHR* pPipelineCreateInfo, + VkPipelineBinaryKeyKHR* pPipelineBinaryKey) +{ + VkResult result = VK_SUCCESS; + + if (pPipelineCreateInfo == nullptr) + { + // Return a common key that applies to all pipelines. If it's changed, it invalidates all other + // pipeline-specific keys. + const auto pPlatformKey = pDevice->VkPhysicalDevice(DefaultDeviceIndex)->GetPlatformKey(); + + // If this fails, then we probably need to compute the key in some other way, or we risk collisions. + VK_ASSERT(pPlatformKey->GetKeySize() <= VK_MAX_PIPELINE_BINARY_KEY_SIZE_KHR); + + WriteToPipelineBinaryKey( + pPlatformKey->GetKey(), + pPlatformKey->GetKeySize(), + pPipelineBinaryKey); + } + else + { + Util::MetroHash::Hash cacheId[MaxPipelineBinaryInfoCount] = {}; + + switch (static_cast(pPipelineCreateInfo->pNext)->sType) + { + case VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO: + { + const auto pComputeCreateInfo = static_cast(pPipelineCreateInfo->pNext); + const auto flags = Device::GetPipelineCreateFlags(pComputeCreateInfo); + + ComputePipelineBinaryCreateInfo binaryCreateInfo = {}; + PipelineOptimizerKey pipelineOptimizerKey = {}; + ShaderOptimizerKey shaderOptimizerKey = {}; + ShaderModuleHandle tempModule = {}; + ComputePipelineShaderStageInfo shaderInfo = {}; + uint64 apiPsoHash = 0; + + result = ComputePipeline::CreateCacheId( + pDevice, + pComputeCreateInfo, + flags, + &shaderInfo, + &binaryCreateInfo, + &shaderOptimizerKey, + &pipelineOptimizerKey, + &apiPsoHash, + &tempModule, + cacheId); + + break; + } + case VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO: + { + const auto pGraphicsCreateInfo = + static_cast(pPipelineCreateInfo->pNext); + + const auto flags = Device::GetPipelineCreateFlags(pGraphicsCreateInfo); + + GraphicsPipelineExtStructs extStructs = {}; + GraphicsPipelineLibraryInfo libInfo = {}; + GraphicsPipelineBinaryCreateInfo binaryCreateInfo = {}; + PipelineOptimizerKey pipelineOptimizerKey = {}; + ShaderOptimizerKey shaderOptimizerKeys[ShaderStage::ShaderStageGfxCount] = {}; + ShaderModuleHandle tempModules[ShaderStage::ShaderStageGfxCount] = {}; + GraphicsPipelineShaderStageInfo shaderStageInfo = {}; + uint64 apiPsoHash = 0; + + GraphicsPipelineCommon::HandleExtensionStructs(pGraphicsCreateInfo, &extStructs); + GraphicsPipelineCommon::ExtractLibraryInfo(pDevice, pGraphicsCreateInfo, extStructs, flags, &libInfo); + + result = GraphicsPipelineCommon::CreateCacheId( + pDevice, + pGraphicsCreateInfo, + extStructs, + libInfo, + flags, + &shaderStageInfo, + &binaryCreateInfo, + shaderOptimizerKeys, + &pipelineOptimizerKey, + &apiPsoHash, + tempModules, + cacheId); + + break; + } +#if VKI_RAY_TRACING + case VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR: + { + const auto pRayTracingCreateInfo = + static_cast(pPipelineCreateInfo->pNext); + + const auto flags = Device::GetPipelineCreateFlags(pRayTracingCreateInfo); + + RayTracingPipelineShaderStageInfo shaderInfo = {}; + PipelineOptimizerKey optimizerKey = {}; + ShaderModuleHandle* pTempModules = nullptr; + uint64 apiPsoHash = 0; + Util::MetroHash::Hash elfHash = {}; + + // If rtEnableCompilePipelineLibrary is false, the library shaders are included in pRayTracingCreateInfo. + const bool hasLibraries = + pDevice->GetRuntimeSettings().rtEnableCompilePipelineLibrary && + ((pRayTracingCreateInfo->pLibraryInfo != nullptr) && + (pRayTracingCreateInfo->pLibraryInfo->libraryCount > 0)); + + void* pShaderTempBuffer = nullptr; + const uint32_t nativeShaderCount = pRayTracingCreateInfo->stageCount; + uint32_t totalShaderCount = pRayTracingCreateInfo->stageCount; + + if (hasLibraries) + { + for (uint32_t libraryIdx = 0; + libraryIdx < pRayTracingCreateInfo->pLibraryInfo->libraryCount; + ++libraryIdx) + { + auto pLibrary = RayTracingPipeline::ObjectFromHandle( + pRayTracingCreateInfo->pLibraryInfo->pLibraries[libraryIdx]); + + VK_ASSERT(pLibrary->GetType() == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR); + + totalShaderCount += pLibrary->GetTotalShaderCount(); + } + } + + if (totalShaderCount > 0) + { + auto placement = utils::PlacementHelper<3>( + nullptr, + utils::PlacementElement {&shaderInfo.pStages, nativeShaderCount}, + utils::PlacementElement{&pTempModules, nativeShaderCount}, + utils::PlacementElement{&optimizerKey.pShaders, totalShaderCount}); + + pShaderTempBuffer = pDevice->VkInstance()->AllocMem( + placement.SizeOf(), + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + + if (pShaderTempBuffer != nullptr) + { + memset(pShaderTempBuffer, 0, placement.SizeOf()); + placement.FixupPtrs(pShaderTempBuffer); + + shaderInfo.stageCount = nativeShaderCount; + } + else + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + + if (result == VK_SUCCESS) + { + optimizerKey.shaderCount = totalShaderCount; + + result = RayTracingPipeline::CreateCacheId( + pDevice, + pRayTracingCreateInfo, + flags, + hasLibraries, + &shaderInfo, + &optimizerKey, + &apiPsoHash, + &elfHash, + pTempModules, + cacheId); + + // Free the temporary memory for shader modules + Pipeline::FreeTempModules(pDevice, nativeShaderCount, pTempModules); + + // Free the temporary memory for creating cacheId + if (pShaderTempBuffer != nullptr) + { + pDevice->VkInstance()->FreeMem(pShaderTempBuffer); + } + } + } + + break; + } +#endif + default: + // Unexpected header + result = VK_ERROR_UNKNOWN; + + VK_NEVER_CALLED(); + break; + } + + if (result == VK_SUCCESS) + { + WriteToPipelineBinaryKey( + cacheId[0].bytes, + sizeof(cacheId[0].bytes), + pPipelineBinaryKey); + } + } + + return result; +} + +// ===================================================================================================================== +VkResult PipelineBinary::GetPipelineBinaryData( + VkPipelineBinaryKeyKHR* pPipelineBinaryKey, + size_t* pPipelineBinaryDataSize, + void* pPipelineBinaryData) +{ + VK_ASSERT(pPipelineBinaryDataSize != nullptr); + + VkResult result = VK_SUCCESS; + + if (pPipelineBinaryData != nullptr) + { + if (*pPipelineBinaryDataSize < m_binaryData.codeSize) + { + result = VK_ERROR_NOT_ENOUGH_SPACE_KHR; + } + else + { + WriteToPipelineBinaryKey(&m_binaryKey, sizeof(m_binaryKey), pPipelineBinaryKey); + + memcpy(pPipelineBinaryData, m_binaryData.pCode, m_binaryData.codeSize); + } + } + + // Must be written in all cases + *pPipelineBinaryDataSize = m_binaryData.codeSize; + + return result; +} + +// ===================================================================================================================== +VkResult PipelineBinary::ReleaseCapturedPipelineData( + Device* pDevice, + Pipeline* pPipeline, + const VkAllocationCallbacks* pAllocator) +{ + return pPipeline->FreeBinaryStorage(pAllocator); +} + +// ===================================================================================================================== +// A helper to write a pipeline binary key +void PipelineBinary::WriteToPipelineBinaryKey( + const void* pSrcData, + const size_t dataSize, + VkPipelineBinaryKeyKHR* pDstKey) +{ + VK_ASSERT(pDstKey != nullptr); + VK_ASSERT(dataSize <= sizeof(pDstKey->key)); + + pDstKey->keySize = static_cast(dataSize); + memcpy(pDstKey->key, pSrcData, dataSize); + memset(&pDstKey->key[dataSize], 0, sizeof(pDstKey->key) - dataSize); +} + +// ===================================================================================================================== +// A helper to convert a pipeline binary key to MetroHash::Hash. +void PipelineBinary::ReadFromPipelineBinaryKey( + const VkPipelineBinaryKeyKHR& inKey, + Util::MetroHash::Hash* pOutKey) +{ + VK_ASSERT(pOutKey != nullptr); + + constexpr auto OutKeySize = static_cast(sizeof(pOutKey->bytes)); + + VK_ASSERT(inKey.keySize >= OutKeySize); + + memcpy(pOutKey->bytes, inKey.key, OutKeySize); +} + +} // namespace vk diff --git a/icd/api/vk_pipeline_layout.cpp b/icd/api/vk_pipeline_layout.cpp index 68f3de3a..3c029508 100644 --- a/icd/api/vk_pipeline_layout.cpp +++ b/icd/api/vk_pipeline_layout.cpp @@ -333,7 +333,7 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( gfxReservedCount++; } - if (pDevice->GetRuntimeSettings().enableDebugPrintf) + if (pDevice->GetEnabledFeatures().enableDebugPrintf) { gfxReservedCount++; } @@ -386,7 +386,7 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( pPipelineInfo->numUserDataNodes += 1; } - if (pDevice->GetRuntimeSettings().enableDebugPrintf) + if (pDevice->GetEnabledFeatures().enableDebugPrintf) { pPipelineInfo->numUserDataNodes += 1; pUserDataLayout->debugPrintfRegBase = pInfo->userDataRegCount; @@ -633,7 +633,7 @@ VkResult PipelineLayout::BuildIndirectSchemeInfo( pInfo->userDataRegCount += 1; } - if (settings.enableDebugPrintf) + if (pDevice->GetEnabledFeatures().enableDebugPrintf) { pPipelineInfo->numUserDataNodes += 1; pUserDataLayout->debugPrintfRegBase = pInfo->userDataRegCount; @@ -886,6 +886,54 @@ VkResult PipelineLayout::Create( return result; } +// ===================================================================================================================== +// Extract user data layout based on createInfo with no pipeline layout object being actually created +VkResult PipelineLayout::GenerateUserDataLayout( + const Device* pDevice, + const VkPipelineLayoutCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + UserDataLayout* pUserDataLayout) +{ + VkResult result = VK_SUCCESS; + + Info info = {}; + PipelineInfo pipelineInfo = {}; + SetUserDataLayout* pSetUserDataLayout = nullptr; + + const size_t setUserDataLayoutSize = + Util::Pow2Align((pCreateInfo->setLayoutCount * sizeof(SetUserDataLayout)), ExtraDataAlignment()); + + void* pMemory = pAllocator->pfnAllocation(pAllocator->pUserData, + setUserDataLayoutSize, + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); + + if (pMemory != nullptr) + { + pSetUserDataLayout = static_cast(pMemory); + + result = ConvertCreateInfo( + pDevice, + pCreateInfo, + &info, + &pipelineInfo, + pSetUserDataLayout); + + if (result == VK_SUCCESS) + { + *pUserDataLayout = info.userDataLayout; + } + + pAllocator->pfnFree(pAllocator->pUserData, pMemory); + } + else + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + + return result; +} + // ===================================================================================================================== // Translates VkDescriptorType to VKGC ResourceMappingNodeType Vkgc::ResourceMappingNodeType PipelineLayout::MapLlpcResourceNodeType( @@ -1037,7 +1085,7 @@ void PipelineLayout::BuildLlpcVertexBufferTableMapping( if (pVbInfo != nullptr) { // Build the table description itself - const uint32_t srdDwSize = m_pDevice->GetProperties().descriptorSizes.bufferView / sizeof(uint32_t); + const uint32_t srdDwSize = m_pDevice->GetProperties().descriptorSizes.untypedBufferView / sizeof(uint32_t); const uint32_t vbTableSize = pVbInfo->bindingTableSize * srdDwSize; // Add the set pointer node pointing to this table @@ -1346,7 +1394,7 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( &userDataNodeCount); } - if (m_pDevice->GetRuntimeSettings().enableDebugPrintf) + if (m_pDevice->GetEnabledFeatures().enableDebugPrintf) { BuildLlpcDebugPrintfMapping( stageMask, @@ -1640,7 +1688,7 @@ void PipelineLayout::BuildIndirectSchemeLlpcPipelineMapping( } #endif - if (m_pDevice->GetRuntimeSettings().enableDebugPrintf) + if (m_pDevice->GetEnabledFeatures().enableDebugPrintf) { BuildLlpcDebugPrintfMapping( stageMask, diff --git a/icd/api/vk_query.cpp b/icd/api/vk_query.cpp index d16685b6..af16c0fe 100644 --- a/icd/api/vk_query.cpp +++ b/icd/api/vk_query.cpp @@ -445,7 +445,9 @@ VkResult TimestampQueryPool::Create( // Allocate system memory size_t apiSize = sizeof(TimestampQueryPool); - size_t viewSize = pDevice->GetProperties().descriptorSizes.bufferView; + size_t viewSize = pDevice->UseStridedCopyQueryResults() ? + pDevice->GetProperties().descriptorSizes.untypedBufferView : + pDevice->GetProperties().descriptorSizes.typedBufferView; size_t totalSize = apiSize + (viewSize * pDevice->NumPalDevices()); void* pMemory = nullptr; const uint32_t slotSize = pDevice->GetProperties().timestampQueryPoolSlotSize; @@ -642,7 +644,7 @@ VkResult QueryPoolWithStorageView::Initialize( } else { - memset(pViewMem, 0, m_pDevice->GetProperties().descriptorSizes.bufferView); + memset(pViewMem, 0, viewSize * m_pDevice->NumPalDevices()); } } @@ -822,7 +824,9 @@ VkResult AccelerationStructureQueryPool::Create( // Allocate system memory size_t apiSize = sizeof(AccelerationStructureQueryPool); - size_t viewSize = pDevice->GetProperties().descriptorSizes.bufferView; + size_t viewSize = pDevice->UseStridedCopyQueryResults() ? + pDevice->GetProperties().descriptorSizes.untypedBufferView : + pDevice->GetProperties().descriptorSizes.typedBufferView; size_t totalSize = apiSize + (viewSize * pDevice->NumPalDevices()); void* pMemory = nullptr; diff --git a/icd/api/vk_shader.cpp b/icd/api/vk_shader.cpp index c6378aef..9fd7e4dd 100644 --- a/icd/api/vk_shader.cpp +++ b/icd/api/vk_shader.cpp @@ -158,7 +158,7 @@ ShaderModule::ShaderModule( : m_codeSize(codeSize), m_pCode(pCode), - m_flags(flags) + m_flags(ConvertVkShaderModuleCreateFlags(flags)) { m_codeHash = BuildCodeHash(pCode, codeSize); @@ -211,7 +211,6 @@ VkResult ShaderModule::Init( VkResult result = pCompiler->BuildShaderModule( pDevice, m_flags, - 0, shaderBinary, &m_handle); diff --git a/icd/api/vk_utils.cpp b/icd/api/vk_utils.cpp index 097adbe2..94cbf36c 100644 --- a/icd/api/vk_utils.cpp +++ b/icd/api/vk_utils.cpp @@ -41,7 +41,10 @@ namespace utils // Get driver build time hash uint32_t GetBuildTimeHash() { - return Util::HashLiteralString(__DATE__ __TIME__); + Util::BuildId buildId; + Util::GetCurrentLibraryBuildId(&buildId); + + return Util::HashString((const char*)(buildId.data), sizeof(buildId.data)); } // ===================================================================================================================== diff --git a/icd/imported/gputexdecoder/gpuTexDecoder.cpp b/icd/imported/gputexdecoder/gpuTexDecoder.cpp index 8da4cc88..0c4d95c5 100755 --- a/icd/imported/gputexdecoder/gpuTexDecoder.cpp +++ b/icd/imported/gputexdecoder/gpuTexDecoder.cpp @@ -389,7 +389,7 @@ void Device::Init( { m_info = info; m_imageViewSizeInDwords = m_info.pDeviceProperties->gfxipProperties.srdSizes.imageView / sizeof(uint32); - m_bufferViewSizeInDwords = m_info.pDeviceProperties->gfxipProperties.srdSizes.bufferView / sizeof(uint32); + m_bufferViewSizeInDwords = m_info.pDeviceProperties->gfxipProperties.srdSizes.typedBufferView / sizeof(uint32); // 3 Table and 1 TexBuffer, and 2 Image resource m_srdDwords[static_cast(InternalTexConvertCsType::ConvertASTCToRGBA8)] diff --git a/icd/make/amdicd.so.def b/icd/make/amdicd.so.def index 769208a3..76fce96f 100644 --- a/icd/make/amdicd.so.def +++ b/icd/make/amdicd.so.def @@ -37,6 +37,7 @@ global: vkEnumerateInstanceExtensionProperties; vkEnumerateInstanceLayerProperties; vkEnumerateInstanceVersion; + GetSettingsBlobsAll; local: *; }; diff --git a/icd/res/ver.h b/icd/res/ver.h index dd4a4df1..46a9acb7 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 318 +#define VULKAN_ICD_BUILD_VERSION 321 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION @@ -45,7 +45,7 @@ // These values specify the driver ID and driver info string #define VULKAN_DRIVER_ID VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR // "AMDOPEN" #define VULKAN_DRIVER_NAME_STR "AMD open-source driver" -#define VULKAN_DRIVER_INFO_STR "2024.Q3.2" +#define VULKAN_DRIVER_INFO_STR "2024.Q3.3" #define VULKAN_DRIVER_INFO_STR_LLPC "(LLPC)" // These values tell which version of the conformance test the driver is compliant against diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index 04cd3b66..6133823d 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -203,7 +203,7 @@ void VulkanSettingsLoader::OverrideSettingsBySystemInfo() pRootPath, m_settings.appProfileDumpDir); MakeAbsolutePath(m_settings.pipelineProfileDumpFile, sizeof(m_settings.pipelineProfileDumpFile), pRootPath, m_settings.pipelineProfileDumpFile); -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE MakeAbsolutePath(m_settings.pipelineProfileRuntimeFile, sizeof(m_settings.pipelineProfileRuntimeFile), pRootPath, m_settings.pipelineProfileRuntimeFile); #endif @@ -225,6 +225,8 @@ void VulkanSettingsLoader::OverrideDefaultsExperimentInfo() VK_SET_VAL_IF_EXPERIMENT_ENABLED(RayTracingSupport, enableRaytracingSupport, false); #endif + VK_SET_VAL_IF_EXPERIMENT_ENABLED(VariableRateShadingSupport, enableVariableRateShading, false); + VK_SET_VAL_IF_EXPERIMENT_ENABLED(Native16BitTypesSupport, enableNative16BitTypes, false); VK_SET_VAL_IF_EXPERIMENT_ENABLED(AmdVendorExtensions, disableAmdVendorExtensions, true); @@ -247,7 +249,6 @@ void VulkanSettingsLoader::OverrideDefaultsExperimentInfo() { m_settings.rtEnableTreeRebraid = RebraidTypeOff; m_settings.rtEnableTriangleSplitting = false; - m_settings.rtEnableTopDownBuild = false; m_settings.rtBvhBuildModeFastBuild = BvhBuildModeLinear; m_settings.enablePairCompressionCostCheck = true; } @@ -455,7 +456,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( { m_settings.maxUnifiedNonRayGenShaders = static_cast(atoi(pMaxInlinedShadersEnvVar)); } -#if VKI_BUILD_GFX11 + // Default optimized RT settings for Navi31 / 32, // which has physical VGPR 1536 per SIMD if (pInfo->gfxipProperties.shaderCore.vgprsPerSimd == 1536) @@ -467,7 +468,10 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.indirectCallTargetOccupancyPerSimd = 0.75; } #endif -#endif + + { + m_settings.disableImplicitInvariantExports = false; + } if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { @@ -479,13 +483,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.nggCompactVertex = false; } - - { - m_settings.disableImplicitInvariantExports = false; - } - -#if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { // Enable NGG compactionless mode for Navi3x m_settings.nggCompactVertex = false; @@ -494,18 +492,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.csWaveSize = 64; m_settings.fsWaveSize = 64; } -#endif - switch (pInfo->revision) - { -#if VKI_BUILD_STRIX1 - case Pal::AsicRevision::Strix1: - // Remove this when displayDcc corruption issue is fixed on Strix. - m_settings.disableDisplayDcc = DisplayableDcc::DisplayableDccDisabled; - break; -#endif - default: - break; - } // Put command buffers in local for large/resizable BAR systems with > 7 GBs of local heap constexpr gpusize _1GB = 1024ull * 1024ull * 1024ull; @@ -608,11 +594,8 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( { } } -#if VKI_BUILD_GFX11 else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { - -#if VKI_BUILD_NAVI31 if (pInfo->revision == Pal::AsicRevision::Navi31) { { @@ -620,9 +603,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; } } -#endif } -#endif } if ((appProfile == AppProfile::WolfensteinII) || @@ -722,12 +703,10 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } } -#if VKI_BUILD_GFX11 if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { m_settings.resourceBarrierOptions &= ~ResourceBarrierOptions::SkipDstCacheInv; } -#endif m_settings.implicitExternalSynchronization = false; } @@ -800,7 +779,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } } -#if VKI_BUILD_GFX11 if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; @@ -812,28 +790,21 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( RpmViewBypassMall::RpmViewBypassMallOnRead; } -#if VKI_BUILD_NAVI31 if (pInfo->revision == Pal::AsicRevision::Navi31) { m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; } -#endif -#if VKI_BUILD_NAVI32 if (pInfo->revision == Pal::AsicRevision::Navi32) { m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; } -#endif -#if VKI_BUILD_NAVI33 if (pInfo->revision == Pal::AsicRevision::Navi33) { m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrPolicy::MallNoAllocCtSsrAsSnsr; } -#endif } -#endif m_settings.enableUberFetchShader = true; } @@ -859,6 +830,8 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.forceDepthClampBasedOnZExport = true; m_settings.clampMaxImageSize = 16384u; + + m_settings.ac01WaNotNeeded = true; } if (appProfile == AppProfile::SeriousSamFusion) @@ -868,6 +841,8 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.anisoThreshold = 1.0f; m_settings.clampMaxImageSize = 16384u; + + m_settings.ac01WaNotNeeded = true; } if ((appProfile == AppProfile::TalosVR) || @@ -943,13 +918,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( // Originally applied to QuakeRemastered - this setting applies to QuakeEnhanced now since it's an update // to the same game. m_settings.disableDisplayDcc = DisplayableDcc::DisplayableDccDisabled; - -#if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { - m_settings.forcePwsMode = PwsMode::NoLateAcquirePoint; - } -#endif } if (appProfile == AppProfile::SedpEngine) @@ -989,18 +957,15 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( ForceDccForColorAttachments | ForceDccFor32BppShaderStorage); } -#if VKI_BUILD_GFX11 + else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { -#if VKI_BUILD_NAVI31 if (pInfo->revision == Pal::AsicRevision::Navi31) { m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; m_settings.pipelineBinningMode = PipelineBinningModeEnable; } -#endif } -#endif } if (appProfile == AppProfile::ZombieArmy4) @@ -1108,19 +1073,34 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( if (appProfile == AppProfile::RainbowSixExtraction) { -#if VKI_BUILD_GFX11 if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { - m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | - ForceDccForColorAttachments | - ForceDccForNonColorAttachmentShaderStorage | - ForceDccFor32BppShaderStorage | - ForceDccFor64BppShaderStorage); + if (pInfo->revision == Pal::AsicRevision::Navi31) + { + m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | + ForceDccForColorAttachments | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccFor32BppShaderStorage | + ForceDccFor64BppShaderStorage); + + m_settings.disableLoopUnrolls = true; + m_settings.forceCsThreadIdSwizzling = true; + } + else if (pInfo->revision == Pal::AsicRevision::Navi33) + { + m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | + ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccFor64BppShaderStorage); - m_settings.disableLoopUnrolls = true; - m_settings.forceCsThreadIdSwizzling = true; + m_settings.pipelineBinningMode = PipelineBinningModeDisable; + m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; + } } -#endif } if (appProfile == AppProfile::Rage2) @@ -1169,8 +1149,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.fsWaveSize = 64; } } - -#if VKI_BUILD_GFX11 else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { m_settings.pipelineBinningMode = PipelineBinningModeDisable; @@ -1183,7 +1161,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( ForceDccFor32BppShaderStorage | ForceDccFor64BppShaderStorage); } -#endif } if (appProfile == AppProfile::RedDeadRedemption2) @@ -1212,12 +1189,11 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( ForceDccForColorAttachments | ForceDccFor64BppShaderStorage); -#if VKI_BUILD_GFX11 if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { m_settings.forceEnableDcc |= ForceDccForNonColorAttachmentShaderStorage; } -#endif + } m_settings.ac01WaNotNeeded = true; @@ -1284,12 +1260,9 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } } -#if VKI_BUILD_GFX11 if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { - m_settings.forcePwsMode = PwsMode::NoLateAcquirePoint; } -#endif } #if VKI_RAY_TRACING @@ -1314,13 +1287,11 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } -#if VKI_BUILD_GFX11 if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp11_0) { // Gives ~0.5% gain at 4k m_settings.enableAceShaderPrefetch = false; } -#endif } if (appProfile == AppProfile::ControlDX12) @@ -1330,17 +1301,17 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.rtEnableCompilePipelineLibrary = false; } -#if VKI_BUILD_GFX11 if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp11_0) { // Gives ~2.22% gain at 1080p m_settings.enableAceShaderPrefetch = false; } -#endif } if (appProfile == AppProfile::RayTracingWeekends) { + m_settings.rtEnableTopDownBuild = true; + if (pInfo->gfxipProperties.shaderCore.vgprsPerSimd == 1024) { { @@ -1388,7 +1359,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( { #if VKI_RAY_TRACING m_settings.rtBvhBuildModeFastTrace = BvhBuildModeLinear; - m_settings.rtEnableTopDownBuild = false; m_settings.plocRadius = 4; // 13% Gain @ 4k - Allows overlapping builds @@ -1425,15 +1395,10 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.csWaveSize = 64; } -#if VKI_BUILD_GFX11 else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { // Navi31 Mall and Tiling Settings - if ((pInfo->revision == Pal::AsicRevision::Navi31) -#if VKI_BUILD_NAVI32 - || (pInfo->revision == Pal::AsicRevision::Navi32) -#endif - ) + if ((pInfo->revision == Pal::AsicRevision::Navi31) || (pInfo->revision == Pal::AsicRevision::Navi32)) { // Mall no alloc settings give a ~1% gain m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; @@ -1444,7 +1409,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.imageTilingPreference3dGpuWritable = Pal::ImageTilingPattern::YMajor; } } -#endif } if (appProfile == AppProfile::IdTechLauncher) @@ -1512,13 +1476,11 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( // A larger minImageCount can get a performance gain for game Metro Exodus. m_settings.forceMinImageCount = 3; -#if VKI_BUILD_GFX11 if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp11_0) { // Gives ~0.9% gain at 1080p m_settings.enableAceShaderPrefetch = false; } -#endif } if (appProfile == AppProfile::X4Foundations) @@ -1594,27 +1556,22 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; } } -#if VKI_BUILD_GFX11 else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { -#if VKI_BUILD_NAVI31 if (pInfo->revision == Pal::AsicRevision::Navi31) { // This provides ~4.2% gain at 4k m_settings.imageTilingPreference3dGpuWritable = Pal::ImageTilingPattern::YMajor; m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; } -#endif -#if VKI_BUILD_NAVI33 + if (pInfo->revision == Pal::AsicRevision::Navi33) { { m_settings.forceCsThreadIdSwizzling = true; } } -#endif } -#endif } if (appProfile == AppProfile::MetalGearSolid5) @@ -1660,27 +1617,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } - if ((appProfile == AppProfile::HalfLifeAlyx) || - (appProfile == AppProfile::Satisfactory)) - { -#if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { - m_settings.forcePwsMode = PwsMode::NoLateAcquirePoint; - } -#endif - } - - if (appProfile == AppProfile::RomeRemastered) - { -#if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { - m_settings.forcePwsMode = PwsMode::NoLateAcquirePoint; - } -#endif - } - if (appProfile == AppProfile::SpidermanRemastered) { m_settings.supportMutableDescriptors = false; @@ -1695,7 +1631,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( #if VKI_RAY_TRACING m_settings.plocRadius = 4; m_settings.rtBvhBuildModeFastTrace = BvhBuildModeLinear; - m_settings.rtEnableTopDownBuild = false; if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { @@ -1715,7 +1650,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( { OverrideVkd3dCommonSettings(&m_settings); -#if VKI_BUILD_GFX11 if ((pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) #if VKI_BUILD_GFX115 || (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_5) @@ -1724,7 +1658,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( { m_settings.fsWaveSize = 32; } -#endif } if (appProfile == AppProfile::Vkd3dEngine) @@ -1969,11 +1902,9 @@ void VulkanSettingsLoader::ValidateSettings() m_settings.enableRaytracingSupport = false; } -#if VKI_BUILD_GFX11 // RTIP 2.0+ is always expected to support hardware traversal stack VK_ASSERT((rayTracingIpLevel <= Pal::RayTracingIpLevel::RtIp1_1) || (deviceProps.gfxipProperties.flags.supportRayTraversalStack == 1)); -#endif // Clamp target occupancy to [0.0, 1.0] m_settings.indirectCallTargetOccupancyPerSimd = Util::Clamp(m_settings.indirectCallTargetOccupancyPerSimd, 0.0f, 1.0f); diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json index acc08788..2ca1ff94 100644 --- a/icd/settings/settings_xgl.json +++ b/icd/settings/settings_xgl.json @@ -1074,7 +1074,7 @@ }, { "Name": "PipelineProfileRuntimeFile", - "Description": "Relative Path to a JSON file that describes a shader app profile that is parsed at runtime. This setting only triggers on debug builds or builds made with the ICD_RUNTIME_APP_PROFILE=1 option. This file has the same format as the JSON files used to build production shader app profiles. Root directory is determined by AMD_DEBUG_DIR environment variable", + "Description": "Relative Path to a JSON file that describes a shader app profile that is parsed at runtime. This setting only triggers on debug builds or builds made with the VKI_RUNTIME_APP_PROFILE=1 option. This file has the same format as the JSON files used to build production shader app profiles. Root directory is determined by AMD_DEBUG_DIR environment variable", "Tags": [ "Pipeline Options" ], @@ -1129,9 +1129,6 @@ "Tags": [ "Pipeline Options" ], - "BuildTypes": [ - "VKI_BUILD_GFX11" - ], "Defaults": { "Default": false }, @@ -3184,7 +3181,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": true + "Default": false }, "Type": "bool", "Name": "RtEnableTopDownBuild", @@ -3331,6 +3328,36 @@ "Type": "bool", "Scope": "Driver" }, + { + "Name": "RtCheckBufferOverlapsInBatch", + "Description": "Check for scratch and result buffer overlaps in a batch of BVH builds/updates.", + "Tags": [ + "Ray Tracing" + ], + "BuildTypes": [ + "VKI_RAY_TRACING" + ], + "Defaults": { + "Default": false + }, + "Type": "bool", + "Scope": "Driver" + }, + { + "Name": "RtDisableAccelStructCompaction", + "Description": "Disables compaction of the Acceleration Structure build and performs a copy instead.", + "Tags": [ + "Ray Tracing" + ], + "BuildTypes": [ + "VKI_RAY_TRACING" + ], + "Defaults": { + "Default": false + }, + "Type": "bool", + "Scope": "Driver" + }, { "Name": "RtEnableMortonCode30", "Description": "Enable Morton Code 30 bits", @@ -3341,7 +3368,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": true + "Default": false }, "Type": "bool", "Scope": "Driver" @@ -4625,7 +4652,7 @@ }, { "Name": "DbgBarrierPostCmdEnable", - "Description": "Triggers a CmdBarrier call after any command in the given mask. The barrier behavior is controlled by the other DbgBarrierPost* settings in this category. Requires VK_ENABLE_DEBUG_BARRIERS=1 to take effect. 0x8FFFFFFF: All commands (heavyweight option)", + "Description": "Triggers a CmdBarrier call after any command in the given mask. The barrier behavior is controlled by the other DbgBarrierPost* settings in this category. Requires VKI_ENABLE_DEBUG_BARRIERS=1 to take effect. 0x8FFFFFFF: All commands (heavyweight option)", "Tags": [ "Debugging" ], @@ -4905,7 +4932,7 @@ "Name": "DbgBarrierPostCacheDstMask" }, { - "Description": "Triggers a CmdBarrier call before any command in the given mask. The barrier behavior is controlled by the other DbgBarrierPre* settings in this category. Requires VK_ENABLE_DEBUG_BARRIERS=1 to take effect. For further documentation, consult the corresponding DbgBarrierPostCmdEnable command.", + "Description": "Triggers a CmdBarrier call before any command in the given mask. The barrier behavior is controlled by the other DbgBarrierPre* settings in this category. Requires VKI_ENABLE_DEBUG_BARRIERS=1 to take effect. For further documentation, consult the corresponding DbgBarrierPostCmdEnable command.", "Tags": [ "Debugging" ], @@ -5371,21 +5398,6 @@ "Name": "RebraidQualityHeuristicType", "Scope": "Driver" }, - { - "Description": "Fast BVH Build with no Morton Code sorting. Applies to BVHs with up to a wave size number of primitives.", - "Tags": [ - "Ray Tracing" - ], - "BuildTypes": [ - "VKI_RAY_TRACING" - ], - "Defaults": { - "Default": 0 - }, - "Type": "uint32", - "Name": "FastBuildThreshold", - "Scope": "Driver" - }, { "Description": "Enable pair compression in early build stage, i.e., During Encode phase.", "Tags": [ @@ -5640,7 +5652,7 @@ "General" ], "Defaults": { - "Default": false + "Default": true }, "Scope": "Driver", "Type": "bool" diff --git a/icd/tools/generate/shaderProfileTemplate.py b/icd/tools/generate/shaderProfileTemplate.py index 9f3b449c..2218bb76 100644 --- a/icd/tools/generate/shaderProfileTemplate.py +++ b/icd/tools/generate/shaderProfileTemplate.py @@ -71,7 +71,7 @@ #include \"utils/json_writer.h\" #include \"palJsonWriter.h\" -#if ICD_RUNTIME_APP_PROFILE +#if VKI_RUNTIME_APP_PROFILE #include \"utils/json_reader.h\" #endif """ @@ -2762,16 +2762,7 @@ def json_enum_reader_template(values, prefix=""): BuildTypesTemplate = { "llpc": "ICD_BUILD_LLPC", -#if VKI_BUILD_NAVI31 - "Navi31": "VKI_BUILD_NAVI31", -#endif -#if VKI_BUILD_NAVI33 - "Navi33": "VKI_BUILD_NAVI33", -#endif -#if VKI_BUILD_GFX11 - "gfxIp11_0": "VKI_BUILD_GFX11", -#endif - "icdRuntimeAppProfile": "ICD_RUNTIME_APP_PROFILE" + "icdRuntimeAppProfile": "VKI_RUNTIME_APP_PROFILE" } ###################################################################################################################