diff --git a/CMakeLists.txt b/CMakeLists.txt index 40cd61cf..0d941189 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,8 +28,8 @@ cmake_policy(SET CMP0091 NEW) # This part set before "project(XGL VERSION 1 LANGUAGES C CXX)". # In a system has both gcc and clang compiler. -option(XGL_USE_CLANG "Build with clang?" OFF) -if(UNIX AND XGL_USE_CLANG) +option(VKI_USE_CLANG "Build with clang?" OFF) +if(UNIX AND VKI_USE_CLANG) list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) include(XglCompilerOptions) xgl_use_clang_compiler() @@ -108,75 +108,75 @@ xgl_set_compiler() ### Sanitizers ######################################################################################################## # Temporary variables -set(ICD_SANITIZER_LINK_FLAGS "") -set(ICD_SANITIZER_COMPILE_FLAGS "") +set(VKI_SANITIZER_LINK_FLAGS "") +set(VKI_SANITIZER_COMPILE_FLAGS "") -if(XGL_USE_SANITIZER) +if(VKI_USE_SANITIZER) # -pthread is needed sometimes to fix a cmake bug: https://gitlab.kitware.com/cmake/cmake/issues/16609 - string(APPEND ICD_SANITIZER_LINK_FLAGS " -pthread") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -pthread") if(UNIX) - if(XGL_USE_SANITIZER MATCHES "Address") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -shared-libasan") + if(VKI_USE_SANITIZER MATCHES "Address") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -shared-libasan") endif() - if(XGL_USE_SANITIZER STREQUAL "Address") + if(VKI_USE_SANITIZER STREQUAL "Address") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=address") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=address") - elseif (XGL_USE_SANITIZER MATCHES "Memory(WithOrigins)?") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=address") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=address") + elseif (VKI_USE_SANITIZER MATCHES "Memory(WithOrigins)?") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=memory") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=memory") - if(XGL_USE_SANITIZER STREQUAL "MemoryWithOrigins") - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize-memory-track-origins") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize-memory-track-origins") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=memory") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=memory") + if(VKI_USE_SANITIZER STREQUAL "MemoryWithOrigins") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize-memory-track-origins") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize-memory-track-origins") endif() - elseif(XGL_USE_SANITIZER STREQUAL "Undefined") + elseif(VKI_USE_SANITIZER STREQUAL "Undefined") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=undefined -fno-sanitize=vptr,function \ + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=undefined -fno-sanitize=vptr,function \ -fno-sanitize-recover=all") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=undefined") - elseif(XGL_USE_SANITIZER STREQUAL "Thread") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=undefined") + elseif(VKI_USE_SANITIZER STREQUAL "Thread") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=thread") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=thread") - elseif(XGL_USE_SANITIZER STREQUAL "Address;Undefined" OR - XGL_USE_SANITIZER STREQUAL "Undefined;Address") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=thread") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=thread") + elseif(VKI_USE_SANITIZER STREQUAL "Address;Undefined" OR + VKI_USE_SANITIZER STREQUAL "Undefined;Address") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=address,undefined -fno-sanitize=vptr,function \ + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=address,undefined -fno-sanitize=vptr,function \ -fno-sanitize-recover=all") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=address,undefined") - elseif(XGL_USE_SANITIZER STREQUAL "Leaks") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=address,undefined") + elseif(VKI_USE_SANITIZER STREQUAL "Leaks") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=leak") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=leak") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=leak") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=leak") else() - message(FATAL_ERROR "Unsupported value of XGL_USE_SANITIZER: ${XGL_USE_SANITIZER}") + message(FATAL_ERROR "Unsupported value of VKI_USE_SANITIZER: ${VKI_USE_SANITIZER}") endif() elseif(MSVC) - if(XGL_USE_SANITIZER STREQUAL "Address") + if(VKI_USE_SANITIZER STREQUAL "Address") xgl_append_common_sanitizer_flags() - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize=address") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -fsanitize=address") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize=address") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -fsanitize=address") else() - message(FATAL_ERROR "This sanitizer not yet supported in the MSVC environment: ${XGL_USE_SANITIZER}") + message(FATAL_ERROR "This sanitizer not yet supported in the MSVC environment: ${VKI_USE_SANITIZER}") endif() else() - message(FATAL_ERROR "XGL_USE_SANITIZER is not supported on this platform.") + message(FATAL_ERROR "VKI_USE_SANITIZER is not supported on this platform.") endif() - if(XGL_USE_SANITIZER MATCHES "(Undefined;)?Address(;Undefined)?") - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fsanitize-address-use-after-scope") + if(VKI_USE_SANITIZER MATCHES "(Undefined;)?Address(;Undefined)?") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fsanitize-address-use-after-scope") endif() endif() -string(APPEND CMAKE_EXE_LINKER_FLAGS "${ICD_SANITIZER_LINK_FLAGS}") -string(APPEND CMAKE_SHARED_LINKER_FLAGS "${ICD_SANITIZER_LINK_FLAGS}") -string(APPEND CMAKE_C_FLAGS "${ICD_SANITIZER_COMPILE_FLAGS}") -string(APPEND CMAKE_CXX_FLAGS "${ICD_SANITIZER_COMPILE_FLAGS}") +string(APPEND CMAKE_EXE_LINKER_FLAGS "${VKI_SANITIZER_LINK_FLAGS}") +string(APPEND CMAKE_SHARED_LINKER_FLAGS "${VKI_SANITIZER_LINK_FLAGS}") +string(APPEND CMAKE_C_FLAGS "${VKI_SANITIZER_COMPILE_FLAGS}") +string(APPEND CMAKE_CXX_FLAGS "${VKI_SANITIZER_COMPILE_FLAGS}") # LLVM libc++ -if(XGL_ENABLE_LIBCXX) +if(VKI_ENABLE_LIBCXX) if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") string(APPEND CMAKE_EXE_LINKER_FLAGS " -stdlib=libc++") string(APPEND CMAKE_SHARED_LINKER_FLAGS " -stdlib=libc++") @@ -189,24 +189,24 @@ endif() ### GCOV source code coverage ######################################################################################### # Temporary variables -set(ICD_GCOV_LINK_FLAGS "") -set(ICD_GCOV_COMPILE_FLAGS "") +set(VKI_GCOV_LINK_FLAGS "") +set(VKI_GCOV_COMPILE_FLAGS "") -if(XGL_ENABLE_GCOV) +if(VKI_ENABLE_GCOV) if(UNIX) - message(STATUS "This gcov is supported on the UNIX environment: ${XGL_ENABLE_GCOV}") + message(STATUS "This gcov is supported on the UNIX environment: ${VKI_ENABLE_GCOV}") xgl_append_gcov_coverage_flags() elseif(MSVC) - message(FATAL_ERROR "This gcov not yet supported in the MSVC environment: ${XGL_ENABLE_GCOV}") + message(FATAL_ERROR "This gcov not yet supported in the MSVC environment: ${VKI_ENABLE_GCOV}") else() - message(FATAL_ERROR "XGL_ENABLE_GCOV is not supported on this platform.") + message(FATAL_ERROR "VKI_ENABLE_GCOV is not supported on this platform.") endif() endif() -string(APPEND CMAKE_EXE_LINKER_FLAGS "${ICD_GCOV_LINK_FLAGS}") -string(APPEND CMAKE_SHARED_LINKER_FLAGS "${ICD_GCOV_LINK_FLAGS}") -string(APPEND CMAKE_C_FLAGS "${ICD_GCOV_COMPILE_FLAGS}") -string(APPEND CMAKE_CXX_FLAGS "${ICD_GCOV_COMPILE_FLAGS}") +string(APPEND CMAKE_EXE_LINKER_FLAGS "${VKI_GCOV_LINK_FLAGS}") +string(APPEND CMAKE_SHARED_LINKER_FLAGS "${VKI_GCOV_LINK_FLAGS}") +string(APPEND CMAKE_C_FLAGS "${VKI_GCOV_COMPILE_FLAGS}") +string(APPEND CMAKE_CXX_FLAGS "${VKI_GCOV_COMPILE_FLAGS}") ### Generator Dependencies ############################################################################################ # Python3 @@ -228,7 +228,7 @@ endif() set(PERL_CMD ${PERL_EXECUTABLE}) # Wayland required -if (BUILD_WAYLAND_SUPPORT) +if (VKI_BUILD_WAYLAND) find_package(PkgConfig REQUIRED) pkg_check_modules(WAYLAND REQUIRED wayland-client) endif() diff --git a/cmake/XglCompileDefinitions.cmake b/cmake/XglCompileDefinitions.cmake index 797ac5ed..fbf82015 100644 --- a/cmake/XglCompileDefinitions.cmake +++ b/cmake/XglCompileDefinitions.cmake @@ -30,20 +30,13 @@ macro(xgl_set_compile_definitions) target_compile_definitions(xgl PRIVATE ${TARGET_ARCHITECTURE_ENDIANESS}ENDIAN_CPU) if(TARGET_ARCHITECTURE_BITS EQUAL 32) - target_compile_definitions(xgl PRIVATE ICD_X86_BUILD) + target_compile_definitions(xgl PRIVATE VKI_X86_BUILD) elseif(TARGET_ARCHITECTURE_BITS EQUAL 64) - target_compile_definitions(xgl PRIVATE ICD_X64_BUILD) + target_compile_definitions(xgl PRIVATE VKI_X64_BUILD) endif() # Turn on the memory tracker if enabled. - if(ICD_MEMTRACK) - target_compile_definitions(xgl PRIVATE ICD_MEMTRACK) - endif() - - # Enable relevant GPUOpen preprocessor definitions - if(ICD_GPUOPEN_DEVMODE_BUILD) - target_compile_definitions(xgl PRIVATE ICD_GPUOPEN_DEVMODE_BUILD) - endif() + target_compile_definitions(xgl PRIVATE $<$:VKI_MEMTRACK>) if(ICD_BUILD_LLPC) target_compile_definitions(xgl PRIVATE ICD_BUILD_LLPC) @@ -68,13 +61,13 @@ macro(xgl_set_compile_definitions) target_compile_definitions(xgl PRIVATE PAL_BUILD_GFX9=1) #if VKI_BUILD_GFX115 - if(XGL_BUILD_GFX115) + if(VKI_BUILD_GFX115) target_compile_definitions(xgl PRIVATE VKI_BUILD_GFX115=1) endif() #endif #if VKI_BUILD_STRIX1 - if(XGL_BUILD_STRIX1) + if(VKI_BUILD_STRIX1) target_compile_definitions(xgl PRIVATE VKI_BUILD_STRIX1=1) endif() #endif @@ -87,17 +80,11 @@ macro(xgl_set_compile_definitions) endif() #endif -#if VKI_NORMALIZED_TRIG_FUNCTIONS - if(VKI_NORMALIZED_TRIG_FUNCTIONS) - target_compile_definitions(xgl PRIVATE VKI_NORMALIZED_TRIG_FUNCTIONS) - endif() -#endif - #if VKI_RAY_TRACING #endif - if (XGL_ENABLE_GCOV) - target_compile_definitions(xgl PRIVATE ICD_ENABLE_GCOV) + if (VKI_ENABLE_GCOV) + target_compile_definitions(xgl PRIVATE VKI_ENABLE_GCOV) endif() #if VKI_GPU_DECOMPRESS @@ -109,7 +96,7 @@ macro(xgl_set_compile_definitions) #if VKI_RAY_TRACING #endif - if(BUILD_WAYLAND_SUPPORT) + if(VKI_BUILD_WAYLAND) target_compile_definitions(xgl PRIVATE VK_USE_PLATFORM_WAYLAND_KHR) endif() diff --git a/cmake/XglCompilerOptions.cmake b/cmake/XglCompilerOptions.cmake index 293d5c0e..0195a326 100644 --- a/cmake/XglCompilerOptions.cmake +++ b/cmake/XglCompilerOptions.cmake @@ -156,7 +156,7 @@ macro(xgl_set_compiler) endif() # Assertions - if(XGL_ENABLE_ASSERTIONS) + if(VKI_ENABLE_ASSERTIONS) # MSVC doesn't like _DEBUG on release builds. if(NOT MSVC) add_definitions(-D_DEBUG) @@ -221,7 +221,7 @@ function(xgl_compiler_options TARGET) -Wno-unused-parameter ) - if(ICD_ANALYSIS_WARNINGS_AS_ERRORS) + if(VKI_ANALYSIS_WARNINGS_AS_ERRORS) target_compile_options(${TARGET} PRIVATE -Werror -Wno-error=comment @@ -304,7 +304,7 @@ function(xgl_compiler_options TARGET) if(CMAKE_BUILD_TYPE_RELEASE) target_compile_options(${TARGET} PRIVATE -O3) - if(XGL_ENABLE_LTO) + if(VKI_ENABLE_LTO) if(${CMAKE_CXX_COMPILER_ID} MATCHES "GNU") execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) if(GCC_VERSION VERSION_GREATER 5.3 OR GCC_VERSION VERSION_EQUAL 5.3) diff --git a/cmake/XglHelper.cmake b/cmake/XglHelper.cmake index d8afa3f4..0cfcbf62 100644 --- a/cmake/XglHelper.cmake +++ b/cmake/XglHelper.cmake @@ -28,19 +28,19 @@ include_guard() macro(xgl_append_common_sanitizer_flags) if(NOT MSVC) # Append -fno-omit-frame-pointer and turn on debug info to get better stack traces. - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -fno-omit-frame-pointer") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -fno-omit-frame-pointer") if (NOT CMAKE_BUILD_TYPE_DEBUG) - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -gline-tables-only") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -gline-tables-only") else() # Use -O1 even in debug mode, otherwise sanitizers slowdown is too large. - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " -O1") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " -O1") endif() elseif(CLANG_CL) # Keep frame pointers around. - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " /Oy-") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " /Oy-") # Always ask the linker to produce symbols with asan. - string(APPEND ICD_SANITIZER_COMPILE_FLAGS " /Z7") - string(APPEND ICD_SANITIZER_LINK_FLAGS " -debug") + string(APPEND VKI_SANITIZER_COMPILE_FLAGS " /Z7") + string(APPEND VKI_SANITIZER_LINK_FLAGS " -debug") endif() endmacro() @@ -49,18 +49,18 @@ macro(xgl_append_gcov_coverage_flags) # This option is used to compile and link code instrumented for coverage analysis. # The option --coverage is a synonym for -fprofile-arcs -ftest-coverage (when compiling) and -lgcov (when linking) # Ref link: https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html#Instrumentation-Options - string(APPEND ICD_GCOV_COMPILE_FLAGS " --coverage") - string(APPEND ICD_GCOV_LINK_FLAGS " --coverage") + string(APPEND VKI_GCOV_COMPILE_FLAGS " --coverage") + string(APPEND VKI_GCOV_LINK_FLAGS " --coverage") if (NOT CMAKE_BUILD_TYPE_DEBUG) # Use -O0 even in not debug mode, otherwise code coverage is not accurate. - string(APPEND ICD_GCOV_COMPILE_FLAGS " -O0") + string(APPEND VKI_GCOV_COMPILE_FLAGS " -O0") endif() if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - string(APPEND ICD_GCOV_COMPILE_FLAGS " -Xclang -coverage-cfg-checksum") - string(APPEND ICD_GCOV_COMPILE_FLAGS " -Xclang -coverage-no-function-names-in-data") - string(APPEND ICD_GCOV_COMPILE_FLAGS " -Xclang -coverage-version='408*'") + string(APPEND VKI_GCOV_COMPILE_FLAGS " -Xclang -coverage-cfg-checksum") + string(APPEND VKI_GCOV_COMPILE_FLAGS " -Xclang -coverage-no-function-names-in-data") + string(APPEND VKI_GCOV_COMPILE_FLAGS " -Xclang -coverage-version='408*'") endif() else() message(FATAL_ERROR "Unknown compiler ID: ${CMAKE_CXX_COMPILER_ID}") diff --git a/cmake/XglOptions.cmake b/cmake/XglOptions.cmake index e018fe7c..9b950304 100644 --- a/cmake/XglOptions.cmake +++ b/cmake/XglOptions.cmake @@ -39,21 +39,18 @@ macro(xgl_options) option(VKI_DEVMODE_COMPILER_SETTINGS "Build with devmode compiler settings?" OFF) #endif - option(XGL_ENABLE_PRINTS_ASSERTS "Build with debug print enabled?" OFF) + option(VKI_ENABLE_PRINTS_ASSERTS "Build with debug print enabled?" OFF) - option(XGL_ENABLE_LTO "Build with LTO enabled?" ON) + option(VKI_ENABLE_LTO "Build with LTO enabled?" ON) - option(XGL_ENABLE_GCOV "Build with gcov source code coverage?" OFF) -#if VKI_BUILD_GFX115 - option(XGL_BUILD_GFX115 "Build vulkan for GFX115" ON) -#endif + option(VKI_ENABLE_GCOV "Build with gcov source code coverage?" OFF) #if VKI_BUILD_STRIX1 - option(XGL_BUILD_STRIX1 "Build vulkan for STRIX1" ON) + option(VKI_BUILD_STRIX1 "Build vulkan for STRIX1" ON) #endif - option(XGL_BUILD_TESTS "Build tests?" OFF) + option(VKI_BUILD_TESTS "Build tests?" OFF) - option(XGL_BUILD_TOOLS "Build tools?" OFF) + option(VKI_BUILD_TOOLS "Build tools?" OFF) #if VKI_RAY_TRACING option(VKI_RAY_TRACING "Build vulkan with RAY_TRACING" ON) @@ -65,22 +62,16 @@ macro(xgl_options) option(ICD_BUILD_LLPC "Build LLPC?" ON) - option(XGL_LLVM_UPSTREAM "Build with upstreamed LLVM?" OFF) - - option(XGL_ENABLE_ASSERTIONS "Enable assertions in release builds" OFF) - - option(XGL_ENABLE_LIBCXX "Use libc++. This is intended for MemorySanitizer support only." OFF) - - option(ICD_GPUOPEN_DEVMODE_BUILD "Build ${PROJECT_NAME} with GPU Open Developer Mode driver support?" ON) + option(VKI_ENABLE_ASSERTIONS "Enable assertions in release builds" OFF) - option(ICD_MEMTRACK "Turn on memory tracking?" ${CMAKE_BUILD_TYPE_DEBUG}) + option(VKI_ENABLE_LIBCXX "Use libc++. This is intended for MemorySanitizer support only." OFF) if(UNIX AND (NOT ANDROID)) - option(BUILD_WAYLAND_SUPPORT "Build XGL with Wayland support" ON) + option(VKI_BUILD_WAYLAND "Build XGL with Wayland support" ON) - option(BUILD_DRI3_SUPPORT "Build XGL with Dri3 support" ON) + option(VKI_BUILD_DRI3 "Build XGL with Dri3 support" ON) endif() - option(ICD_ANALYSIS_WARNINGS_AS_ERRORS "Warnings as errors?" OFF) + option(VKI_ANALYSIS_WARNINGS_AS_ERRORS "Warnings as errors?" OFF) endmacro() diff --git a/cmake/XglOverrides.cmake b/cmake/XglOverrides.cmake index e4ee602d..e717a0bb 100644 --- a/cmake/XglOverrides.cmake +++ b/cmake/XglOverrides.cmake @@ -46,7 +46,7 @@ macro(xgl_get_path) set(GPURT_DEVELOPER_MODE ON CACHE BOOL "GPURT_DEVELOPER_MODE override." FORCE) set(GPURT_CLIENT_API "VULKAN" CACHE STRING "GPURT_CLIENT_API_VULKAN override." FORCE) - set(GPURT_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_GPURT_CLIENT_MAJOR_VERSION} CACHE STRING "GPURT_CLIENT_INTERFACE_MAJOR_VERSION override." FORCE) + set(GPURT_CLIENT_INTERFACE_MAJOR_VERSION ${VKI_GPURT_CLIENT_MAJOR_VERSION} CACHE STRING "GPURT_CLIENT_INTERFACE_MAJOR_VERSION override." FORCE) endif() #endif @@ -90,34 +90,32 @@ macro(xgl_overrides_pal) ### For PAL ########################################################################################################### set(PAL_BUILD_JEMALLOC OFF CACHE BOOL "Force jemalloc off" FORCE) - set(PAL_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_PAL_CLIENT_MAJOR_VERSION} CACHE STRING "${PROJECT_NAME} override." FORCE) + set(PAL_CLIENT_INTERFACE_MAJOR_VERSION ${VKI_PAL_CLIENT_MAJOR_VERSION} CACHE STRING "${PROJECT_NAME} override." FORCE) set(PAL_CLIENT "VULKAN" CACHE STRING "${PROJECT_NAME} override." FORCE) - set(PAL_ENABLE_PRINTS_ASSERTS ${XGL_ENABLE_PRINTS_ASSERTS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(PAL_ENABLE_PRINTS_ASSERTS ${VKI_ENABLE_PRINTS_ASSERTS} CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(PAL_ENABLE_LTO ${XGL_ENABLE_LTO} CACHE BOOL "XGL override to build PAL with LTO support" FORCE) + set(PAL_ENABLE_LTO ${VKI_ENABLE_LTO} CACHE BOOL "XGL override to build PAL with LTO support" FORCE) - set(PAL_MEMTRACK ${ICD_MEMTRACK} CACHE BOOL "${PROJECT_NAME} override." FORCE) - - set(PAL_BUILD_GPUOPEN ${ICD_GPUOPEN_DEVMODE_BUILD} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(PAL_MEMTRACK ${VKI_MEMTRACK} CACHE BOOL "${PROJECT_NAME} override." FORCE) set(PAL_BUILD_GFX11 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) set(PAL_BUILD_PHOENIX2 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) #if VKI_BUILD_GFX115 - set(PAL_BUILD_GFX115 ${XGL_BUILD_GFX115} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(PAL_BUILD_GFX115 ${VKI_BUILD_GFX115} CACHE BOOL "${PROJECT_NAME} override." FORCE) #endif #if VKI_BUILD_STRIX1 - set(PAL_BUILD_STRIX1 ${XGL_BUILD_STRIX1} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(PAL_BUILD_STRIX1 ${VKI_BUILD_STRIX1} CACHE BOOL "${PROJECT_NAME} override." FORCE) #endif # Wayland - set(PAL_BUILD_WAYLAND ${BUILD_WAYLAND_SUPPORT} CACHE BOOL "Build PAL with Wayland support" FORCE) + set(PAL_BUILD_WAYLAND ${VKI_BUILD_WAYLAND} CACHE BOOL "Build PAL with Wayland support" FORCE) # Dri3 - set(PAL_BUILD_DRI3 ${BUILD_DRI3_SUPPORT} CACHE BOOL "PAL build with Dri3 enabled" FORCE) + set(PAL_BUILD_DRI3 ${VKI_BUILD_DRI3} CACHE BOOL "PAL build with Dri3 enabled" FORCE) if(EXISTS ${XGL_METROHASH_PATH}) set(PAL_METROHASH_PATH ${XGL_METROHASH_PATH} CACHE PATH "${PROJECT_NAME} override." FORCE) @@ -129,86 +127,86 @@ macro(xgl_overrides_pal) endmacro() -macro(xgl_overrides_vkgc) +macro(xgl_overrides_llpc) ### For LLPC ########################################################################################################## - set(LLPC_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_LLPC_CLIENT_MAJOR_VERSION} CACHE STRING "${PROJECT_NAME} override." FORCE) + if(VKI_ENABLE_LIBCXX) + set(LLVM_ENABLE_LIBCXX ${VKI_ENABLE_LIBCXX} CACHE BOOL "LLVM_ENABLE_LIBCXX is overridden." FORCE) + endif() + + if(VKI_ENABLE_ASSERTIONS) + set(LLVM_ENABLE_ASSERTIONS "${VKI_ENABLE_ASSERTIONS}" CACHE BOOL "LLVM_ENABLE_ASSERTIONS is overridden." FORCE) + endif() + + set(LLVM_INCLUDE_BENCHMARKS OFF CACHE BOOL "LLVM_INCLUDE_BENCHMARKS is overriden." FORCE) + + set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "LLVM_INCLUDE_DOCS is overriden." FORCE) - set(LLPC_BUILD_TOOLS ${XGL_BUILD_TOOLS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "LLVM_INCLUDE_EXAMPLES is overriden." FORCE) - set(LLPC_BUILD_TESTS ${XGL_BUILD_TESTS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + if(VKI_USE_SANITIZER) + set(LLPC_USE_SANITIZER "${VKI_USE_SANITIZER}" CACHE STRING "LLPC_USE_SANITIZER is overridden." FORCE) + endif() + set(LLPC_ENABLE_LTO ${VKI_ENABLE_LTO} CACHE BOOL "XGL override to build LLPC with LTO support" FORCE) + set(LLPC_MEMTRACK ${VKI_MEMTRACK} CACHE BOOL "${PROJECT_NAME} override." FORCE) + # llpc still use below build options, will be removed after llpc finish the update and promote + set(XGL_ENABLE_LTO ${VKI_ENABLE_LTO}) + set(ICD_MEMTRACK ${VKI_MEMTRACK}) + set(XGL_USE_SANITIZER "${VKI_USE_SANITIZER}") - set(LLPC_BUILD_NAVI12 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_REMBRANDT ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_RAPHAEL ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_MENDOCINO ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_GFX11 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_NAVI31 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_NAVI32 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_NAVI33 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_PHOENIX1 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_PHOENIX2 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_GFX11 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) #if VKI_BUILD_GFX115 - set(LLPC_BUILD_GFX115 ${XGL_BUILD_GFX115} CACHE BOOL "${PROJECT_NAME} override." FORCE) + if(VKI_BUILD_GFX115) + set(LLPC_BUILD_GFX115 ${VKI_BUILD_GFX115} CACHE BOOL "${PROJECT_NAME} override." FORCE) + endif() #endif #if VKI_BUILD_STRIX1 - set(LLPC_BUILD_STRIX1 ${XGL_BUILD_STRIX1} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_STRIX1 ${VKI_BUILD_STRIX1} CACHE BOOL "${PROJECT_NAME} override." FORCE) #endif - set(LLPC_ENABLE_WERROR ${ICD_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_CLIENT_INTERFACE_MAJOR_VERSION ${VKI_LLPC_CLIENT_MAJOR_VERSION} CACHE STRING "${PROJECT_NAME} override." FORCE) + + set(LLPC_BUILD_TOOLS ${VKI_BUILD_TOOLS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + + set(LLPC_BUILD_TESTS ${VKI_BUILD_TESTS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + + set(LLPC_ENABLE_WERROR ${VKI_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + +#if VKI_RAY_TRACING + set(LLPC_RAY_TRACING ${VKI_RAY_TRACING} CACHE BOOL "${PROJECT_NAME} override." FORCE) +#endif endmacro() macro(xgl_overrides) - if(ICD_GPUOPEN_DEVMODE_BUILD) - set(GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_GPUOPEN_CLIENT_MAJOR_VERSION}) - endif() + set(GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION ${VKI_GPUOPEN_CLIENT_MAJOR_VERSION}) #if VKI_BUILD_GFX115 + set(VKI_BUILD_GFX115 OFF) #if VKI_BUILD_STRIX1 - if(XGL_BUILD_STRIX1) - set(XGL_BUILD_GFX115 ON CACHE BOOL "XGL_BUILD_GFX115 override." FORCE) + if(VKI_BUILD_STRIX1) + set(VKI_BUILD_GFX115 ON) endif() #endif #endif xgl_get_path() - if(XGL_BUILD_TESTS) - set(XGL_BUILD_TOOLS ON CACHE BOOL "XGL_BUILD_TOOLS override by XGL_BUILD_TESTS." FORCE) - endif() - - if(NOT ICD_BUILD_LLPC) - set(XGL_LLVM_UPSTREAM OFF CACHE BOOL "XGL_LLVM_UPSTREAM is overrided to false." FORCE) - endif() - - set(XGL_USE_SANITIZER "" CACHE STRING "Build with sanitizers, e.g. Address;Undefined") - - if(XGL_USE_SANITIZER) - set(LLVM_USE_SANITIZER "${XGL_USE_SANITIZER}" CACHE STRING "LLVM_USE_SANITIZER is overridden." FORCE) - endif() + set(VKI_MEMTRACK ${CMAKE_BUILD_TYPE_DEBUG}) - if(XGL_ENABLE_LIBCXX) - set(LLVM_ENABLE_LIBCXX "${XGL_ENABLE_LIBCXX}" CACHE BOOL "LLVM_ENABLE_LIBCXX is overridden." FORCE) + if(VKI_BUILD_TESTS) + set(VKI_BUILD_TOOLS ON CACHE BOOL "VKI_BUILD_TOOLS override by VKI_BUILD_TESTS." FORCE) endif() - if(XGL_ENABLE_ASSERTIONS) - set(LLVM_ENABLE_ASSERTIONS "${XGL_ENABLE_ASSERTIONS}" CACHE BOOL "LLVM_ENABLE_ASSERTIONS is overridden." FORCE) - endif() - - set(LLVM_INCLUDE_BENCHMARKS OFF CACHE BOOL "LLVM_INCLUDE_BENCHMARKS is overriden." FORCE) - - set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "LLVM_INCLUDE_DOCS is overriden." FORCE) - - set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "LLVM_INCLUDE_EXAMPLES is overriden." FORCE) + set(VKI_USE_SANITIZER "" CACHE STRING "Build with sanitizers, e.g. Address;Undefined") - set(VAM_ENABLE_WERROR ${ICD_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(VAM_ENABLE_WERROR ${VKI_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(ADDR_ENABLE_WERROR ${ICD_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(ADDR_ENABLE_WERROR ${VKI_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(METROHASH_ENABLE_WERROR ${ICD_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(METROHASH_ENABLE_WERROR ${VKI_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) ### XCB required ###################################################################################################### set(XCB_REQUIRED OFF) @@ -218,6 +216,6 @@ macro(xgl_overrides) xgl_overrides_pal() - xgl_overrides_vkgc() + xgl_overrides_llpc() endmacro() diff --git a/cmake/XglVersions.cmake b/cmake/XglVersions.cmake index 52f9a3b9..e731978b 100644 --- a/cmake/XglVersions.cmake +++ b/cmake/XglVersions.cmake @@ -30,18 +30,18 @@ include_guard() # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. # It must be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -set(ICD_PAL_CLIENT_MAJOR_VERSION "905") +set(VKI_PAL_CLIENT_MAJOR_VERSION "909") -# This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. +# This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION. # It describes the interface version of the gpuopen shared module (part of PAL) that the ICD supports. -set(ICD_GPUOPEN_CLIENT_MAJOR_VERSION "42") +set(VKI_GPUOPEN_CLIENT_MAJOR_VERSION "42") #if VKI_RAY_TRACING # This will become the value of GPURT_CLIENT_INTERFACE_MAJOR_VERSION if VKI_RAY_TRACING=1. # It describes the interface version of the GpuRT shared module that the ICD supports. -set(ICD_GPURT_CLIENT_MAJOR_VERSION "49") +set(VKI_GPURT_CLIENT_MAJOR_VERSION "51") #endif # This will become the value of LLPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_LLPC=1. # It describes the version of the interface version of LLPC that the ICD supports. -set(ICD_LLPC_CLIENT_MAJOR_VERSION "75") +set(VKI_LLPC_CLIENT_MAJOR_VERSION "75") diff --git a/icd/CMakeLists.txt b/icd/CMakeLists.txt index e800d879..e6fe9515 100644 --- a/icd/CMakeLists.txt +++ b/icd/CMakeLists.txt @@ -26,8 +26,8 @@ ### Create XGL Library ################################################################################################# add_library(xgl SHARED) -set(ICD_TARGET amdvlk${TARGET_ARCHITECTURE_BITS}) -set_target_properties(xgl PROPERTIES OUTPUT_NAME ${ICD_TARGET}) +set(VKI_TARGET amdvlk${TARGET_ARCHITECTURE_BITS}) +set_target_properties(xgl PROPERTIES OUTPUT_NAME ${VKI_TARGET}) set_target_properties(xgl PROPERTIES PREFIX "") install(TARGETS xgl DESTINATION ${CMAKE_SOURCE_DIR}/lib/${CMAKE_BUILD_TYPE}) @@ -44,7 +44,7 @@ endif() xgl_set_compile_definitions() ### Include Directories ################################################################################################ -if (BUILD_WAYLAND_SUPPORT) +if (VKI_BUILD_WAYLAND) target_include_directories(xgl PUBLIC ${WAYLAND_INCLUDE_DIRS}) endif() @@ -155,7 +155,6 @@ target_sources(xgl PRIVATE api/appopt/barrier_filter_layer.cpp api/appopt/strange_brigade_layer.cpp api/appopt/baldurs_gate3_layer.cpp - api/appopt/gravity_mark_layer.cpp api/appopt/g_shader_profile.cpp api/render_state_cache.cpp api/renderpass/renderpass_builder.cpp @@ -193,65 +192,65 @@ if(ICD_BUILD_LLPC) endif() ### ICD Auto-generated Shader Profiles Files ################################## -# ICD_GENDIR Path to the code generation tools -set(ICD_GENDIR ${CMAKE_CURRENT_SOURCE_DIR}/tools/generate) +# VKI_GENDIR Path to the code generation tools +set(VKI_GENDIR ${CMAKE_CURRENT_SOURCE_DIR}/tools/generate) # ICD shader profile code generation main script -set(ICD_GEN_SHADER_PROFILE_SCRIPTS ${ICD_GENDIR}/genShaderProfile.py ${ICD_GENDIR}/shaderProfileTemplate.py) +set(VKI_GEN_SHADER_PROFILE_SCRIPTS ${VKI_GENDIR}/genShaderProfile.py ${VKI_GENDIR}/shaderProfileTemplate.py) -set(ICD_SHADER_PROFILE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/api/appopt) +set(VKI_SHADER_PROFILE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/api/appopt) -file(GLOB_RECURSE ICD_ALL_SHADER_PROFILE_FILES - ${ICD_SHADER_PROFILE_DIR}/shader_profiles/profile.json +file(GLOB_RECURSE VKI_ALL_SHADER_PROFILE_FILES + ${VKI_SHADER_PROFILE_DIR}/shader_profiles/profile.json ) add_custom_command( - OUTPUT ${ICD_SHADER_PROFILE_DIR}/g_shader_profile.cpp ${ICD_SHADER_PROFILE_DIR}/g_shader_profile.h - COMMAND ${PYTHON_CMD} ${ICD_GENDIR}/genShaderProfile.py ${ICD_SHADER_PROFILE_DIR}/shader_profiles - DEPENDS ${ICD_GEN_SHADER_PROFILE_SCRIPTS} ${ICD_ALL_SHADER_PROFILE_FILES} + OUTPUT ${VKI_SHADER_PROFILE_DIR}/g_shader_profile.cpp ${VKI_SHADER_PROFILE_DIR}/g_shader_profile.h + COMMAND ${PYTHON_CMD} ${VKI_GENDIR}/genShaderProfile.py ${VKI_SHADER_PROFILE_DIR}/shader_profiles + DEPENDS ${VKI_GEN_SHADER_PROFILE_SCRIPTS} ${VKI_ALL_SHADER_PROFILE_FILES} COMMENT "Generating shader profiles code from all profile.json files" ) add_custom_target( GenerateShaderProfiles - DEPENDS ${ICD_GEN_SHADER_PROFILE_SCRIPTS} ${ICD_ALL_SHADER_PROFILE_FILES} + DEPENDS ${VKI_GEN_SHADER_PROFILE_SCRIPTS} ${VKI_ALL_SHADER_PROFILE_FILES} COMMENT "Checking if re-generation is required for shader profiles" ) add_dependencies(xgl GenerateShaderProfiles) ### ICD Auto-generated String Files ########################################### -set(ICD_STRING_DIR ${CMAKE_CURRENT_SOURCE_DIR}/api/strings) +set(VKI_STRING_DIR ${CMAKE_CURRENT_SOURCE_DIR}/api/strings) # ICD settings code generation main script -set(ICD_GEN_STRINGS ${ICD_STRING_DIR}/generate_strings.py) +set(VKI_GEN_STRINGS ${VKI_STRING_DIR}/generate_strings.py) -set(ICD_GEN_STRINGS_FILES ${ICD_GEN_STRINGS} ${ICD_STRING_DIR}/func_table_template.py) +set(VKI_GEN_STRINGS_FILES ${VKI_GEN_STRINGS} ${VKI_STRING_DIR}/func_table_template.py) -set(ICD_STRING_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/api/strings) +set(VKI_STRING_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/api/strings) -set(ICD_GEN_STRINGS_OPT -w ${ICD_STRING_DIR} -d ${ICD_STRING_OUTPUT_DIR}) +set(VKI_GEN_STRINGS_OPT -w ${VKI_STRING_DIR} -d ${VKI_STRING_OUTPUT_DIR}) -set(ICD_STRING_OUTPUT_FILES ${ICD_STRING_OUTPUT_DIR}/g_entry_points_decl.h - ${ICD_STRING_OUTPUT_DIR}/g_entry_points_impl.h - ${ICD_STRING_OUTPUT_DIR}/g_extensions_decl.h - ${ICD_STRING_OUTPUT_DIR}/g_extensions_impl.h - ${ICD_STRING_OUTPUT_DIR}/g_func_table.h +set(VKI_STRING_OUTPUT_FILES ${VKI_STRING_OUTPUT_DIR}/g_entry_points_decl.h + ${VKI_STRING_OUTPUT_DIR}/g_entry_points_impl.h + ${VKI_STRING_OUTPUT_DIR}/g_extensions_decl.h + ${VKI_STRING_OUTPUT_DIR}/g_extensions_impl.h + ${VKI_STRING_OUTPUT_DIR}/g_func_table.h ) -set(ICD_STRING_SOURCE_FILES ${ICD_STRING_DIR}/entry_points.txt - ${ICD_STRING_DIR}/extensions.txt +set(VKI_STRING_SOURCE_FILES ${VKI_STRING_DIR}/entry_points.txt + ${VKI_STRING_DIR}/extensions.txt ) add_custom_command( - OUTPUT ${ICD_STRING_OUTPUT_FILES} - COMMAND ${PYTHON_CMD} ${ICD_GEN_STRINGS} ${ICD_GEN_STRINGS_OPT} - DEPENDS ${ICD_GEN_STRINGS_FILES} ${ICD_STRING_SOURCE_FILES} - COMMENT "Generating Vulkan api strings ${ICD_GEN_STRINGS_OPT}" + OUTPUT ${VKI_STRING_OUTPUT_FILES} + COMMAND ${PYTHON_CMD} ${VKI_GEN_STRINGS} ${VKI_GEN_STRINGS_OPT} + DEPENDS ${VKI_GEN_STRINGS_FILES} ${VKI_STRING_SOURCE_FILES} + COMMENT "Generating Vulkan api strings ${VKI_GEN_STRINGS_OPT}" ) add_custom_target( RunVKStringsGenerator - DEPENDS ${ICD_STRING_OUTPUT_FILES} + DEPENDS ${VKI_STRING_OUTPUT_FILES} COMMENT "Checking if re-generation is required for strings" ) @@ -263,31 +262,31 @@ target_sources(xgl PRIVATE api/strings/strings.cpp) # ICD settings code generation main script set(XGL_DEVDRIVER_PATH ${XGL_PAL_PATH}/shared/devdriver) -set(ICD_DD_GENDIR ${XGL_DEVDRIVER_PATH}/apis/settings/codegen) +set(VKI_DD_GENDIR ${XGL_DEVDRIVER_PATH}/apis/settings/codegen) -set(ICD_GEN_SETTINGS ${ICD_DD_GENDIR}/settings_codegen.py) +set(VKI_GEN_SETTINGS ${VKI_DD_GENDIR}/settings_codegen.py) -set(ICD_GEN_SETTINGS_FILES ${ICD_GEN_SETTINGS}) +set(VKI_GEN_SETTINGS_FILES ${VKI_GEN_SETTINGS}) -set(ICD_SETTINGS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/settings) +set(VKI_SETTINGS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/settings) add_custom_command( - OUTPUT ${ICD_SETTINGS_DIR}/g_settings.cpp ${ICD_SETTINGS_DIR}/g_settings.h - COMMAND ${PYTHON_CMD} ${ICD_GEN_SETTINGS} - -i ${ICD_SETTINGS_DIR}/settings_xgl.json - -o ${ICD_SETTINGS_DIR} + OUTPUT ${VKI_SETTINGS_DIR}/g_settings.cpp ${VKI_SETTINGS_DIR}/g_settings.h + COMMAND ${PYTHON_CMD} ${VKI_GEN_SETTINGS} + -i ${VKI_SETTINGS_DIR}/settings_xgl.json + -o ${VKI_SETTINGS_DIR} -g settings -s settings/settings.h --namespaces vk --settings-struct-name RuntimeSettings --include-headers pal.h palImage.h - DEPENDS ${ICD_GEN_SETTINGS_FILES} ${ICD_SETTINGS_DIR}/settings_xgl.json + DEPENDS ${VKI_GEN_SETTINGS_FILES} ${VKI_SETTINGS_DIR}/settings_xgl.json COMMENT "Generating Vulkan settings code from settings_xgl.json" ) add_custom_target( RunVKSettingsGenerator - DEPENDS ${ICD_GEN_SETTINGS_FILES} ${ICD_SETTINGS_DIR}/settings_xgl.json + DEPENDS ${VKI_GEN_SETTINGS_FILES} ${VKI_SETTINGS_DIR}/settings_xgl.json COMMENT "Checking if re-generation is required for settings" ) @@ -300,22 +299,22 @@ target_sources(xgl PRIVATE ) add_custom_command( - OUTPUT ${ICD_SETTINGS_DIR}/g_experiments.cpp ${ICD_SETTINGS_DIR}/g_experiments.h - COMMAND ${PYTHON_CMD} ${ICD_GEN_SETTINGS} - -i ${ICD_SETTINGS_DIR}/experiments_settings_xgl.json - -o ${ICD_SETTINGS_DIR} + OUTPUT ${VKI_SETTINGS_DIR}/g_experiments.cpp ${VKI_SETTINGS_DIR}/g_experiments.h + COMMAND ${PYTHON_CMD} ${VKI_GEN_SETTINGS} + -i ${VKI_SETTINGS_DIR}/experiments_settings_xgl.json + -o ${VKI_SETTINGS_DIR} -g experiments -s settings/experimentsLoader.h --namespaces vk --settings-struct-name ExpSettings --classname ExperimentsLoader - DEPENDS ${ICD_GEN_SETTINGS_FILES} ${ICD_SETTINGS_DIR}/experiments_settings_xgl.json + DEPENDS ${VKI_GEN_SETTINGS_FILES} ${VKI_SETTINGS_DIR}/experiments_settings_xgl.json COMMENT "Generating Vulkan settings code from experiments_settings_xgl.json" ) add_custom_target( RunVKExperimentsGenerator - DEPENDS ${ICD_GEN_SETTINGS_FILES} ${ICD_SETTINGS_DIR}/experiments_settings_xgl.json + DEPENDS ${VKI_GEN_SETTINGS_FILES} ${VKI_SETTINGS_DIR}/experiments_settings_xgl.json COMMENT "Checking if re-generation is required for settings" ) @@ -335,12 +334,10 @@ target_sources(xgl PRIVATE ) ### ICD api/devmode ########################################################### -if(ICD_GPUOPEN_DEVMODE_BUILD) - target_sources(xgl PRIVATE - api/devmode/devmode_rgp.cpp - api/devmode/devmode_ubertrace.cpp - ) -endif() +target_sources(xgl PRIVATE + api/devmode/devmode_rgp.cpp + api/devmode/devmode_ubertrace.cpp +) ### ICD layer ################################################################## target_sources(xgl PRIVATE @@ -362,7 +359,7 @@ if (UNIX) target_link_libraries(xgl PRIVATE -fabi-version=0 -static-intel) endif() - if(CMAKE_BUILD_TYPE_RELEASE AND XGL_ENABLE_LTO) + if(CMAKE_BUILD_TYPE_RELEASE AND VKI_ENABLE_LTO) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") target_link_options(xgl PRIVATE -Wno-stringop-overflow) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) @@ -379,8 +376,8 @@ if (UNIX) target_link_options(xgl PRIVATE -Wl,-Bdynamic -Wl,-z,noexecstack - -Wl,-Map=$/${ICD_TARGET}.map - -Wl,-soname=${ICD_TARGET}.so.1 + -Wl,-Map=$/${VKI_TARGET}.map + -Wl,-soname=${VKI_TARGET}.so.1 ) target_link_options(xgl PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/make/amdicd.so.def) @@ -390,7 +387,7 @@ if (UNIX) target_link_options(xgl PRIVATE -static-libgcc -static-libstdc++) endif() - if(NOT XGL_USE_SANITIZER) + if(NOT VKI_USE_SANITIZER) # -Wl,--no-undefined is incompatible with asan target_link_libraries(xgl PRIVATE -Wl,--no-undefined @@ -440,9 +437,9 @@ if(UNIX) endif() add_custom_command( TARGET xgl POST_BUILD - COMMAND ${TOOLCHAIN_TARGET_PREFIX}objcopy --only-keep-debug ${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so ${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so.debug - COMMAND ${TOOLCHAIN_TARGET_PREFIX}strip ${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so - COMMAND ${TOOLCHAIN_TARGET_PREFIX}objcopy --add-gnu-debuglink=${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so.debug ${CMAKE_CURRENT_BINARY_DIR}/${ICD_TARGET}.so + COMMAND ${TOOLCHAIN_TARGET_PREFIX}objcopy --only-keep-debug ${CMAKE_CURRENT_BINARY_DIR}/${VKI_TARGET}.so ${CMAKE_CURRENT_BINARY_DIR}/${VKI_TARGET}.so.debug + COMMAND ${TOOLCHAIN_TARGET_PREFIX}strip ${CMAKE_CURRENT_BINARY_DIR}/${VKI_TARGET}.so + COMMAND ${TOOLCHAIN_TARGET_PREFIX}objcopy --add-gnu-debuglink=${CMAKE_CURRENT_BINARY_DIR}/${VKI_TARGET}.so.debug ${CMAKE_CURRENT_BINARY_DIR}/${VKI_TARGET}.so ) endif() endif() diff --git a/icd/Loader/LunarG/Lnx/amd-icd.json b/icd/Loader/LunarG/Lnx/amd-icd.json index 9248602a..9adea9cf 100644 --- a/icd/Loader/LunarG/Lnx/amd-icd.json +++ b/icd/Loader/LunarG/Lnx/amd-icd.json @@ -2,13 +2,13 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.297" + "api_version": "1.3.301" }, "layer": { "name": "VK_LAYER_AMD_switchable_graphics_@ISABITS@", "type": "GLOBAL", "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.297", + "api_version": "1.3.301", "implementation_version": "1", "description": "AMD switchable graphics layer", "functions": { diff --git a/icd/api/app_profile.cpp b/icd/api/app_profile.cpp index 44c1f3ef..beae8f8e 100644 --- a/icd/api/app_profile.cpp +++ b/icd/api/app_profile.cpp @@ -780,12 +780,6 @@ constexpr AppProfilePatternEntry AppNameHoudini = "houdini" }; -constexpr AppProfilePatternEntry AppNameGravityMark = -{ - PatternAppNameLower, - "clayapp" -}; - constexpr AppProfilePatternEntry AppNameSevenDaysToDie = { PatternAppNameLower, @@ -798,6 +792,12 @@ constexpr AppProfilePatternEntry AppNameGgmlVulkan = "ggml-vulkan" }; +constexpr AppProfilePatternEntry AppNameBlender = +{ + PatternAppNameLower, + "blender" +}; + // Section END of AppProfilePatternEntry for all games // This is a table of patterns. The first matching pattern in this table will be returned. @@ -1326,14 +1326,6 @@ AppProfilePattern AppPatternTable[] = } }, - { - AppProfile::GravityMark, - { - AppNameGravityMark, - PatternEnd - } - }, - { AppProfile::SOTTR, { @@ -1646,6 +1638,14 @@ AppProfilePattern AppPatternTable[] = } }, + { + AppProfile::Blender, + { + AppNameBlender, + PatternEnd + } + }, + }; static char* GetExecutableName(size_t* pLength, bool includeExtension = false); diff --git a/icd/api/app_resource_optimizer.cpp b/icd/api/app_resource_optimizer.cpp index dd4a8644..6b89ebfb 100644 --- a/icd/api/app_resource_optimizer.cpp +++ b/icd/api/app_resource_optimizer.cpp @@ -115,6 +115,7 @@ void ResourceOptimizer::ApplyProfileToImageViewCreateInfo( pViewInfo->flags.bypassMallRead = 1; pViewInfo->flags.bypassMallWrite = 1; } + } } } diff --git a/icd/api/appopt/bvh_batch_layer.cpp b/icd/api/appopt/bvh_batch_layer.cpp index 9fc0d5d7..dd217926 100644 --- a/icd/api/appopt/bvh_batch_layer.cpp +++ b/icd/api/appopt/bvh_batch_layer.cpp @@ -118,7 +118,7 @@ VkResult BvhBatchLayer::CreateLayer( } } - if (result == VK_SUCCESS) + if ((result == VK_SUCCESS) && (pLayer != nullptr)) { result = pLayer->Init(pDevice); } diff --git a/icd/api/appopt/gpu_decode_layer.cpp b/icd/api/appopt/gpu_decode_layer.cpp index afb6cedc..bcb81dfb 100755 --- a/icd/api/appopt/gpu_decode_layer.cpp +++ b/icd/api/appopt/gpu_decode_layer.cpp @@ -122,7 +122,7 @@ Pal::Result ClientCreateInternalComputePipeline( rootNodeCount, &rootNode[0], 0, - false, // forceWave64, + vk::ShaderWaveSize::WaveSizeAuto, &specializationInfo, &pDevice->GetInternalTexDecodePipeline()); diff --git a/icd/api/appopt/gravity_mark_layer.cpp b/icd/api/appopt/gravity_mark_layer.cpp deleted file mode 100644 index 8aeea3d6..00000000 --- a/icd/api/appopt/gravity_mark_layer.cpp +++ /dev/null @@ -1,119 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************************************************************************/ -/** -*********************************************************************************************************************** -* @file gravity_mark_layer.cpp -* @brief Implementation Gravity Mark Layer. -*********************************************************************************************************************** -*/ - -#include "gravity_mark_layer.h" - -#include "include/vk_image.h" -#include "include/vk_cmdbuffer.h" -#include "include/vk_device.h" - -namespace vk -{ - -namespace entry -{ - -namespace gravity_mark_layer -{ - -// ===================================================================================================================== -VKAPI_ATTR void VKAPI_CALL vkCmdPipelineBarrier( - VkCommandBuffer cmdBuffer, - VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - VkDependencyFlags dependencyFlags, - uint32_t memoryBarrierCount, - const VkMemoryBarrier* pMemoryBarriers, - uint32_t bufferMemoryBarrierCount, - const VkBufferMemoryBarrier* pBufferMemoryBarriers, - uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) -{ - CmdBuffer* pCmdBuffer = ApiCmdBuffer::ObjectFromHandle(cmdBuffer); - OptLayer* pLayer = pCmdBuffer->VkDevice()->GetAppOptLayer(); - - // - corruption caused by incorrect barrier between CmdDispatch and CmdDrawIndexed calls which access the same - // R16G16B16A16_SFLOAT image - // - existing barrier from app specifies srcStageMask = TOP_OF_PIPE which is equivalent to VK_PIPELINE_STAGE_2_NONE - // - changing this to BOTTOM_OF_PIPE will correctly sync between the dispatch and draw calls, resolving corruption - - if ((imageMemoryBarrierCount == 1) && - (srcStageMask == VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT) && - (dstStageMask == (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT - | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT - | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT - | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) && - (pImageMemoryBarriers != nullptr) && - (Image::ObjectFromHandle(pImageMemoryBarriers[0].image)->GetFormat() == VK_FORMAT_R16G16B16A16_SFLOAT) && - (Image::ObjectFromHandle(pImageMemoryBarriers[0].image)->GetImageSamples() == VK_SAMPLE_COUNT_1_BIT) && - (pImageMemoryBarriers[0].srcAccessMask == VK_ACCESS_NONE) && - (pImageMemoryBarriers[0].dstAccessMask == VK_ACCESS_SHADER_READ_BIT) && - (pImageMemoryBarriers[0].oldLayout == VK_IMAGE_LAYOUT_GENERAL) && - (pImageMemoryBarriers[0].newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL)) - { - srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; - } - - // Pass the barrier call on to the Vulkan driver - pLayer->GetNextLayer()->GetEntryPoints().vkCmdPipelineBarrier( - cmdBuffer, - srcStageMask, - dstStageMask, - dependencyFlags, - memoryBarrierCount, - pMemoryBarriers, - bufferMemoryBarrierCount, - pBufferMemoryBarriers, - imageMemoryBarrierCount, - pImageMemoryBarriers); -} - -} // namespace gravity_mark_layer - -} // namespace entry - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#define GRAVITY_MARK_OVERRIDE_ALIAS(entry_name, func_name) \ - pDispatchTable->OverrideEntryPoints()->entry_name = vk::entry::gravity_mark_layer::func_name - -#define GRAVITY_MARK_OVERRIDE_ENTRY(entry_name) GRAVITY_MARK_OVERRIDE_ALIAS(entry_name, entry_name) - -// ===================================================================================================================== -void GravityMarkLayer::OverrideDispatchTable( - DispatchTable* pDispatchTable) -{ - // Save current device dispatch table to use as the next layer. - m_nextLayer = *pDispatchTable; - - GRAVITY_MARK_OVERRIDE_ENTRY(vkCmdPipelineBarrier); -} - -} // namespace vk diff --git a/icd/api/appopt/gravity_mark_layer.h b/icd/api/appopt/gravity_mark_layer.h deleted file mode 100644 index 725b0654..00000000 --- a/icd/api/appopt/gravity_mark_layer.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************************************************************************/ -/** -*********************************************************************************************************************** -* @file gravity_mark_layer.h -* @brief Contains shadowed entry points related to Gravity Mark. -*********************************************************************************************************************** -*/ - -#ifndef __GRAVITY_MARK_LAYER_H__ -#define __GRAVITY_MARK_LAYER_H__ - -#pragma once - -#include "opt_layer.h" - -namespace vk -{ -// ===================================================================================================================== -// Class for the Gravity Mark Layer to simplify calls to the overriden dispatch table from the layer's entrypoints -class GravityMarkLayer final : public OptLayer -{ -public: - GravityMarkLayer() {} - virtual ~GravityMarkLayer() {} - - virtual void OverrideDispatchTable(DispatchTable* pDispatchTable) override; - -private: - PAL_DISALLOW_COPY_AND_ASSIGN(GravityMarkLayer); -}; - -}; // namespace vk - -#endif /* __GRAVITY_MARK_LAYER_H__ */ diff --git a/icd/api/appopt/shader_profiles/llpc/generic/SniperElite5/profile.json b/icd/api/appopt/shader_profiles/llpc/generic/SniperElite5/profile.json new file mode 100644 index 00000000..32fd85c2 --- /dev/null +++ b/icd/api/appopt/shader_profiles/llpc/generic/SniperElite5/profile.json @@ -0,0 +1,23 @@ +{ + "entries": [ + { + "pattern": { + "always": true + }, + "action": { + "cs": { + "nsaThreshold": 3 + }, + "gs": { + "nsaThreshold": 3 + }, + "vs": { + "nsaThreshold": 3 + }, + "ps": { + "nsaThreshold": 3 + } + } + } + ] +} \ No newline at end of file diff --git a/icd/api/appopt/shader_profiles/llpc/generic/WorldWarZ/profile.json b/icd/api/appopt/shader_profiles/llpc/generic/WorldWarZ/profile.json index 51303768..d2ca1014 100644 --- a/icd/api/appopt/shader_profiles/llpc/generic/WorldWarZ/profile.json +++ b/icd/api/appopt/shader_profiles/llpc/generic/WorldWarZ/profile.json @@ -6,17 +6,18 @@ }, "action": { "cs": { + "nsaThreshold": 3, "unrollThreshold": 5000 - } - } - }, - { - "pattern": { - "always": true - }, - "action": { + }, "gs": { + "nsaThreshold": 3, "unrollThreshold": 5000 + }, + "vs": { + "nsaThreshold": 3 + }, + "ps": { + "nsaThreshold": 3 } } }, diff --git a/icd/api/barrier_policy.cpp b/icd/api/barrier_policy.cpp index 3e6519b1..92b733fa 100644 --- a/icd/api/barrier_policy.cpp +++ b/icd/api/barrier_policy.cpp @@ -110,11 +110,15 @@ class LayoutUsageHelper InitEntry(VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL, Pal::LayoutDepthStencilTarget | Pal::LayoutShaderRead); + // Disable metadata for avoiding corruption if one image is sampled and rendered + // in the same draw. InitEntry(VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT, - Pal::LayoutShaderRead | Pal::LayoutShaderWrite); + Pal::LayoutShaderRead | Pal::LayoutShaderWrite| Pal::LayoutUncompressed); + // Disable metadata for avoiding corruption if one image is read and rendered + // in the same draw. InitEntry(VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR, - Pal::LayoutShaderRead); + Pal::LayoutShaderRead | Pal::LayoutShaderWrite | Pal::LayoutUncompressed); } // Return layout usage index corresponding to the specified layout. @@ -918,6 +922,11 @@ void ImageBarrierPolicy::InitImageLayoutUsagePolicy( m_supportedLayoutUsageMask |= Pal::LayoutSampleRate; } + if (usage & VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) + { + m_supportedLayoutUsageMask |= Pal::LayoutUncompressed; + } + // We don't do anything special in case of transient attachment images } @@ -1086,7 +1095,7 @@ Pal::ImageLayout ImageBarrierPolicy::GetTransferLayout( // Mask determined layout usage flags by the supported layout usage mask on the given queue family index. result.usages = g_LayoutUsageHelper.GetLayoutUsage(0, usageIndex) - & GetSupportedLayoutUsageMask(queueFamilyIndex); + & GetSupportedLayoutUsageMask(queueFamilyIndex, layout); // If the layout usage is 0, it likely means that an application is trying to transition to an image layout that // is not supported by that image's usage flags. @@ -1112,7 +1121,7 @@ Pal::ImageLayout ImageBarrierPolicy::GetAspectLayout( // Mask determined layout usage flags by the supported layout usage mask on the given queue family index. result.usages = g_LayoutUsageHelper.GetLayoutUsage(plane, usageIndex) - & GetSupportedLayoutUsageMask(queueFamilyIndex); + & GetSupportedLayoutUsageMask(queueFamilyIndex, layout); // If the layout usage is 0, it likely means that an application is trying to transition to an image layout that // is not supported by that image's usage flags. @@ -1135,7 +1144,7 @@ void ImageBarrierPolicy::GetLayouts( uint32_t usageIndex = g_LayoutUsageHelper.GetLayoutUsageIndex(layout, format); // Mask determined layout usage flags by the supported layout usage mask on the corresponding queue family index. - const uint32_t supportedLayoutUsageMask = GetSupportedLayoutUsageMask(queueFamilyIndex); + const uint32_t supportedLayoutUsageMask = GetSupportedLayoutUsageMask(queueFamilyIndex, layout); results[0].usages = g_LayoutUsageHelper.GetLayoutUsage(0, usageIndex) & supportedLayoutUsageMask; results[1].usages = g_LayoutUsageHelper.GetLayoutUsage(1, usageIndex) & supportedLayoutUsageMask; results[2].usages = g_LayoutUsageHelper.GetLayoutUsage(2, usageIndex) & supportedLayoutUsageMask; diff --git a/icd/api/debug_printf.cpp b/icd/api/debug_printf.cpp index 2976d8cb..5529c67c 100644 --- a/icd/api/debug_printf.cpp +++ b/icd/api/debug_printf.cpp @@ -673,108 +673,116 @@ const void* GetMetaData( // Retrieve the formatstring section from elf void DebugPrintf::DecodeFormatStringsFromElf( const Device* pDevice, - uint32_t code, + uint32_t codeSize, const char* pCode, PrintfFormatMap* pFormatStrings) { - Util::Abi::PipelineAbiReader abiReader(pDevice->VkInstance()->Allocator(), pCode); - auto& elfReader = abiReader.GetElfReader(); - auto noteId = abiReader.GetElfReader().FindSection(".note"); - auto& noteSection = abiReader.GetElfReader().GetSection(noteId); - VK_ASSERT(noteId != 0); - VK_ASSERT(noteSection.sh_type == static_cast(Elf::SectionHeaderType::Note)); - ElfReader::Notes notes(elfReader, noteId); - unsigned noteLength = 0; - auto noteData = GetMetaData(notes, Abi::MetadataNoteType, ¬eLength); - MsgPackReader docReader; - Result result = docReader.InitFromBuffer(noteData, noteLength); - VK_ASSERT(docReader.Type() == CWP_ITEM_MAP); - const auto hashFormatStr = HashLiteralString("amdpal.format_strings"); - const auto hashIndex = HashLiteralString(".index"); - const auto hashString = HashLiteralString(".string"); - const auto hashVarsCount = HashLiteralString(".argument_count"); - const auto hashBitsPos = HashLiteralString(".64bit_arguments"); - const auto hashStrings = HashLiteralString(".strings"); - - Util::StringView key; - uint32_t palmetaSize = docReader.Get().as.map.size; - for (uint32 i = 0; i < palmetaSize; ++i) + // Elf code size is zero when graphics shader library is used, so early return. + if (codeSize == 0) { - result = docReader.Next(CWP_ITEM_STR); - const char* itemString = static_cast(docReader.Get().as.str.start); - if (Util::HashString(itemString, docReader.Get().as.str.length) == hashFormatStr) + return; + } + + Util::Abi::PipelineAbiReader abiReader(pDevice->VkInstance()->Allocator(), + Util::Span{ pCode, static_cast(codeSize) }); + if (abiReader.Init() == Result::Success) + { + auto& elfReader = abiReader.GetElfReader(); + auto noteId = abiReader.GetElfReader().FindSection(".note"); + auto& noteSection = abiReader.GetElfReader().GetSection(noteId); + VK_ASSERT(noteId != 0); + VK_ASSERT(noteSection.sh_type == static_cast(Elf::SectionHeaderType::Note)); + ElfReader::Notes notes(elfReader, noteId); + unsigned noteLength = 0; + auto noteData = GetMetaData(notes, Abi::MetadataNoteType, ¬eLength); + MsgPackReader docReader; + Result result = docReader.InitFromBuffer(noteData, noteLength); + VK_ASSERT(docReader.Type() == CWP_ITEM_MAP); + const auto hashFormatStr = HashLiteralString("amdpal.format_strings"); + const auto hashIndex = HashLiteralString(".index"); + const auto hashString = HashLiteralString(".string"); + const auto hashVarsCount = HashLiteralString(".argument_count"); + const auto hashBitsPos = HashLiteralString(".64bit_arguments"); + const auto hashStrings = HashLiteralString(".strings"); + + Util::StringView key; + uint32_t palmetaSize = docReader.Get().as.map.size; + for (uint32 i = 0; i < palmetaSize; ++i) { - result = docReader.Next(CWP_ITEM_MAP); - VK_ASSERT(docReader.Get().as.map.size == 2); - uint32_t formatStringsMap = docReader.Get().as.map.size; - for (uint32 j = 0; j < formatStringsMap; ++j) + result = docReader.Next(CWP_ITEM_STR); + const char* itemString = static_cast(docReader.Get().as.str.start); + if (Util::HashString(itemString, docReader.Get().as.str.length) == hashFormatStr) { - result = docReader.UnpackNext(&key); - itemString = static_cast(docReader.Get().as.str.start); - if (Util::HashString(key) == hashStrings) + result = docReader.Next(CWP_ITEM_MAP); + VK_ASSERT(docReader.Get().as.map.size == 2); + uint32_t formatStringsMap = docReader.Get().as.map.size; + for (uint32 j = 0; j < formatStringsMap; ++j) { - result = docReader.Next(CWP_ITEM_ARRAY); - uint32_t stringsSize = docReader.Get().as.array.size; - for (uint32 k = 0; k < stringsSize; ++k) + result = docReader.UnpackNext(&key); + itemString = static_cast(docReader.Get().as.str.start); + if (Util::HashString(key) == hashStrings) { - result = docReader.Next(CWP_ITEM_MAP); - uint64_t hashValue = 0; - uint64_t outputCount = 0; - StringView formatString; - Vector bitPos(nullptr); - uint32_t stringMap = docReader.Get().as.map.size; - for (uint32 l = 0; l < stringMap; ++l) + result = docReader.Next(CWP_ITEM_ARRAY); + uint32_t stringsSize = docReader.Get().as.array.size; + for (uint32 k = 0; k < stringsSize; ++k) { - result = docReader.UnpackNext(&key); - auto hashKey = Util::HashString(key); - switch (hashKey) - { - case hashIndex: - docReader.UnpackNext(&hashValue); - break; - case hashString: - docReader.UnpackNext(&formatString); - break; - case hashVarsCount: - docReader.UnpackNext(&outputCount); - break; - default: + result = docReader.Next(CWP_ITEM_MAP); + uint64_t hashValue = 0; + uint64_t outputCount = 0; + StringView formatString; + Vector bitPos(nullptr); + uint32_t stringMap = docReader.Get().as.map.size; + for (uint32 l = 0; l < stringMap; ++l) { - VK_ASSERT(hashKey == hashBitsPos); - docReader.UnpackNext(&bitPos); - break; + result = docReader.UnpackNext(&key); + auto hashKey = Util::HashString(key); + switch (hashKey) + { + case hashIndex: + docReader.UnpackNext(&hashValue); + break; + case hashString: + docReader.UnpackNext(&formatString); + break; + case hashVarsCount: + docReader.UnpackNext(&outputCount); + break; + default: + VK_ASSERT(hashKey == hashBitsPos); + docReader.UnpackNext(&bitPos); + break; + } } - } - } - bool found = true; - PrintfElfString* pElfString = nullptr; - result = pFormatStrings->FindAllocate(hashValue, &found, &pElfString); - if ((result == Pal::Result::Success) && (found == false)) - { - pElfString->printStr.Reserve(formatString.Length()); - for (auto& elem : formatString) - { - pElfString->printStr.PushBack(elem); - } - pElfString->bit64s.Reserve(outputCount); - for (uint32 bitIndex = 0; bitIndex < outputCount; ++bitIndex) + bool found = true; + PrintfElfString* pElfString = nullptr; + result = pFormatStrings->FindAllocate(hashValue, &found, &pElfString); + if ((result == Pal::Result::Success) && (found == false)) { - bool bitValue = (bitPos[bitIndex / 64] >> (bitIndex % 64)) & 1; - pElfString->bit64s.PushBack(bitValue); + pElfString->printStr.Reserve(formatString.Length()); + for (auto& elem : formatString) + { + pElfString->printStr.PushBack(elem); + } + pElfString->bit64s.Reserve(outputCount); + for (uint32 bitIndex = 0; bitIndex < outputCount; ++bitIndex) + { + bool bitValue = (bitPos[bitIndex / 64] >> (bitIndex % 64)) & 1; + pElfString->bit64s.PushBack(bitValue); + } } } } - } - else - { - docReader.Skip(1); + else + { + docReader.Skip(1); + } } } - } - else - { - docReader.Skip(1); + else + { + docReader.Skip(1); + } } } } diff --git a/icd/api/devmode/devmode_mgr.h b/icd/api/devmode/devmode_mgr.h index 634533dc..ff8f213a 100644 --- a/icd/api/devmode/devmode_mgr.h +++ b/icd/api/devmode/devmode_mgr.h @@ -69,7 +69,6 @@ struct AccelStructUserMarkerString // of the driver. class IDevMode { -#if ICD_GPUOPEN_DEVMODE_BUILD public: // Pipeline hash used for instruction tracing whenever no pipeline is being targetted. static constexpr uint64_t InvalidTargetPipelineHash = 0; @@ -160,8 +159,6 @@ class IDevMode virtual void LabelAccelStruct( uint64_t deviceAddress, const char* pString) {} - -#endif }; } diff --git a/icd/api/devmode/devmode_rgp.cpp b/icd/api/devmode/devmode_rgp.cpp index bab86493..055e7bb0 100644 --- a/icd/api/devmode/devmode_rgp.cpp +++ b/icd/api/devmode/devmode_rgp.cpp @@ -29,7 +29,6 @@ *********************************************************************************************************************** */ -#if ICD_GPUOPEN_DEVMODE_BUILD // Vulkan headers #include "devmode/devmode_rgp.h" #include "include/vk_cmdbuffer.h" @@ -121,7 +120,7 @@ class DevModeRgpStringTableTraceSource : public GpuUtil::StringTableTraceSource stringIdx++; } - uint32_t tableId = m_pDevMode->AcquireStringTableId(); + uint32_t tableId = AcquireTableId(); AddStringTable(tableId, numStrings, stringOffsets.Data(), stringData.Data(), stringData.size()); } @@ -401,7 +400,6 @@ DevModeRgp::DevModeRgp( m_crashAnalysisEnabled(false), m_perfCounterIds(pInstance->Allocator()), m_pipelineCaches(pInstance->Allocator()), - m_stringTableId(0), m_pStringTableTraceSource(nullptr), m_pUserMarkerHistoryTraceSource(nullptr), m_accelStructNames(64, m_pInstance->Allocator()) @@ -2929,7 +2927,7 @@ void DevModeRgp::ProcessMarkerTable( uint32 markerStringDataSize, const char* pMarkerStringData) { - uint32_t tableId = AcquireStringTableId(); + uint32_t tableId = m_pStringTableTraceSource->AcquireTableId(); m_pStringTableTraceSource->AddStringTable(tableId, numMarkerStrings, pMarkerStringOffsets, @@ -2963,5 +2961,3 @@ void DevModeRgp::LabelAccelStruct( } }; // namespace vk - -#endif diff --git a/icd/api/devmode/devmode_rgp.h b/icd/api/devmode/devmode_rgp.h index 8a83e478..b5d14c89 100644 --- a/icd/api/devmode/devmode_rgp.h +++ b/icd/api/devmode/devmode_rgp.h @@ -42,10 +42,8 @@ // gpuutil headers #include "gpuUtil/palGpaSession.h" -#if ICD_GPUOPEN_DEVMODE_BUILD // gpuopen headers #include "gpuopen.h" -#endif #include @@ -84,7 +82,6 @@ namespace vk // of the driver. class DevModeRgp final : public IDevMode { -#if ICD_GPUOPEN_DEVMODE_BUILD public: // Number of frames to wait before collecting a hardware trace. // Note: This will be replaced in the future by a remotely configurable value provided by the RGP server. @@ -183,8 +180,6 @@ class DevModeRgp final : public IDevMode const AccelStructUserMarkerTable& GetAccelStructUserMarkerTable() const { return m_accelStructNames; } - uint32_t AcquireStringTableId() { return ++m_stringTableId; } - private: static constexpr uint32_t MaxTraceQueueFamilies = Queue::MaxQueueFamilies; static constexpr uint32_t MaxTraceQueues = MaxTraceQueueFamilies * Queue::MaxQueuesPerFamily; @@ -352,13 +347,11 @@ class DevModeRgp final : public IDevMode PipelineCacheList m_pipelineCaches; Util::RWLock m_pipelineReinjectionLock; - std::atomic m_stringTableId; GpuUtil::StringTableTraceSource* m_pStringTableTraceSource; GpuUtil::UserMarkerHistoryTraceSource* m_pUserMarkerHistoryTraceSource; AccelStructUserMarkerTable m_accelStructNames; Util::Mutex m_mutex; -#endif }; } diff --git a/icd/api/devmode/devmode_ubertrace.cpp b/icd/api/devmode/devmode_ubertrace.cpp index 1111b597..63d8fcfa 100644 --- a/icd/api/devmode/devmode_ubertrace.cpp +++ b/icd/api/devmode/devmode_ubertrace.cpp @@ -29,7 +29,6 @@ *********************************************************************************************************************** */ -#if ICD_GPUOPEN_DEVMODE_BUILD // Vulkan headers #include "devmode/devmode_ubertrace.h" #include "include/vk_cmdbuffer.h" @@ -118,7 +117,7 @@ class DevModeUberTraceStringTableTraceSource : public GpuUtil::StringTableTraceS stringIdx++; } - uint32_t tableId = m_pDevMode->AcquireStringTableId(); + uint32_t tableId = AcquireTableId(); AddStringTable(tableId, numStrings, stringOffsets.Data(), stringData.Data(), stringData.size()); } @@ -144,7 +143,6 @@ DevModeUberTrace::DevModeUberTrace( m_pStringTableTraceSource(nullptr), m_pUserMarkerHistoryTraceSource(nullptr), m_pRenderOpTraceController(nullptr), - m_stringTableId(0), m_accelStructNames(64, m_pInstance->Allocator()) { m_accelStructNames.Init(); @@ -216,8 +214,6 @@ void DevModeUberTrace::NotifyFrameBegin( { // Wait for the driver to be resumed in case it's been paused. WaitForDriverResume(); - - m_pInstance->PalPlatform()->UpdateFrameTraceController(pQueue->PalQueue(DefaultDeviceIndex)); } // ===================================================================================================================== @@ -726,7 +722,7 @@ void DevModeUberTrace::ProcessMarkerTable( uint32 markerStringDataSize, const char* pMarkerStringData) { - uint32_t tableId = AcquireStringTableId(); + uint32_t tableId = m_pStringTableTraceSource->AcquireTableId(); m_pStringTableTraceSource->AddStringTable(tableId, numMarkerStrings, pMarkerStringOffsets, @@ -760,5 +756,3 @@ void DevModeUberTrace::LabelAccelStruct( } } // namespace vk - -#endif diff --git a/icd/api/devmode/devmode_ubertrace.h b/icd/api/devmode/devmode_ubertrace.h index 0f81b0c6..ce06f8aa 100644 --- a/icd/api/devmode/devmode_ubertrace.h +++ b/icd/api/devmode/devmode_ubertrace.h @@ -64,7 +64,6 @@ namespace vk // of the driver. class DevModeUberTrace final : public IDevMode { -#if ICD_GPUOPEN_DEVMODE_BUILD public: ~DevModeUberTrace(); @@ -154,9 +153,6 @@ class DevModeUberTrace final : public IDevMode const AccelStructUserMarkerTable& GetAccelStructUserMarkerTable() const { return m_accelStructNames; } - uint32_t AcquireStringTableId() - { return ++m_stringTableId; } - private: DevModeUberTrace(Instance* pInstance); @@ -177,11 +173,9 @@ class DevModeUberTrace final : public IDevMode GpuUtil::StringTableTraceSource* m_pStringTableTraceSource; GpuUtil::UserMarkerHistoryTraceSource* m_pUserMarkerHistoryTraceSource; GpuUtil::RenderOpTraceController* m_pRenderOpTraceController; - std::atomic m_stringTableId; AccelStructUserMarkerTable m_accelStructNames; Util::Mutex m_mutex; -#endif }; } diff --git a/icd/api/include/app_profile.h b/icd/api/include/app_profile.h index b6c51aef..fdd41153 100644 --- a/icd/api/include/app_profile.h +++ b/icd/api/include/app_profile.h @@ -112,7 +112,6 @@ enum class AppProfile : uint32_t BaldursGate3, // Baldur's Gate by Larian Studios Enshrouded, // Enshrouded by Keen Games HolisticEngine, // Holistic Engine by Keen Games - GravityMark, // Tellusim GravityMark IdTechEngine, // id Tech Engine (Default) #if VKI_RAY_TRACING ControlDX12, // VKD3D Control Ultimate Edition @@ -148,7 +147,8 @@ enum class AppProfile : uint32_t Archean, // Archean by batcholi Houdini, // Houdini SevenDaysToDie, // 7 Days to Die - GgmlVulkan // ggml-vulkan + GgmlVulkan, // ggml-vulkan + Blender // Blender }; struct ProfileSettings diff --git a/icd/api/include/barrier_policy.h b/icd/api/include/barrier_policy.h index 7316e418..730b01a4 100644 --- a/icd/api/include/barrier_policy.h +++ b/icd/api/include/barrier_policy.h @@ -261,13 +261,17 @@ class ImageBarrierPolicy final : public ResourceBarrierPolicy { return m_supportedLayoutUsageMask; } VK_FORCEINLINE uint32_t GetSupportedLayoutUsageMask( - uint32_t queueFamilyIndex) const + uint32_t queueFamilyIndex, + VkImageLayout layout) const { + const uint32_t extraLayoutUsages = (layout == VK_IMAGE_LAYOUT_RENDERING_LOCAL_READ_KHR) ? + Pal::LayoutUncompressed : 0u; + // This version of the function returns the supported layout usage masks in the scope of the specified queue // family. Accordingly, the image's supported layout usage mask is limited to the layout usage mask that // is supported by the specified queue family or by other queue families that are allowed to concurrently // access the image. - return m_supportedLayoutUsageMask & + return (m_supportedLayoutUsageMask | extraLayoutUsages)& (GetQueueFamilyPolicy(queueFamilyIndex).supportedLayoutUsageMask | m_concurrentLayoutUsageMask); } diff --git a/icd/api/include/graphics_pipeline_common.h b/icd/api/include/graphics_pipeline_common.h index 931985b9..d0b92988 100644 --- a/icd/api/include/graphics_pipeline_common.h +++ b/icd/api/include/graphics_pipeline_common.h @@ -120,15 +120,15 @@ union GraphicsPipelineObjectFlags uint32_t isPointSizeUsed : 1; uint32_t bindColorBlendObject : 1; uint32_t bindMsaaObject : 1; - uint32_t viewIndexFromDeviceIndex : 1; uint32_t perpLineEndCapsEnable : 1; uint32_t shadingRateUsedInShader : 1; uint32_t fragmentShadingRateEnable : 1; + uint32_t viewIndexFromDeviceIndex : 2; #if VKI_RAY_TRACING uint32_t hasRayTracing : 1; - uint32_t reserved : 15; + uint32_t reserved : 14; #else - uint32_t reserved : 16; + uint32_t reserved : 15; #endif }; uint32_t value; diff --git a/icd/api/include/khronos/GLSL.ext.AMD.h b/icd/api/include/khronos/GLSL.ext.AMD.h index 297a6f98..8d60d679 100644 --- a/icd/api/include/khronos/GLSL.ext.AMD.h +++ b/icd/api/include/khronos/GLSL.ext.AMD.h @@ -84,41 +84,4 @@ enum GcnShaderAMD { GcnShaderCountAMD }; -#if VKI_TEXEL_BUFFER_EXPLICIT_FORMAT_SUPPORT -// SPV_AMD_shader_texel_buffer_explicit_format -static const Capability CapabilityImageBufferReadWriteWithFormatAMD = static_cast(5024); - -static const Op OpImageBufferReadAMD = static_cast(5025); -static const Op OpImageBufferWriteAMD = static_cast(5026); - -static const ImageFormat ImageFormatRgb32fAMD = static_cast(5028); -static const ImageFormat ImageFormatRgb32uiAMD = static_cast(5029); -static const ImageFormat ImageFormatRgb32iAMD = static_cast(5030); -static const ImageFormat ImageFormatR10G11B11fAMD = static_cast(5031); -static const ImageFormat ImageFormatRgb10A2SnormAMD = static_cast(5032); -static const ImageFormat ImageFormatRgb10A2iAMD = static_cast(5033); -static const ImageFormat ImageFormatRgba16SscaledAMD = static_cast(5034); -static const ImageFormat ImageFormatRgb10A2SscaledAMD = static_cast(5035); -static const ImageFormat ImageFormatRg16SscaledAMD = static_cast(5036); -static const ImageFormat ImageFormatRgba8SscaledAMD = static_cast(5037); -static const ImageFormat ImageFormatRg8SscaledAMD = static_cast(5038); -static const ImageFormat ImageFormatR16SscaledAMD = static_cast(5039); -static const ImageFormat ImageFormatR8SscaledAMD = static_cast(5040); -static const ImageFormat ImageFormatRgba16UscaledAMD = static_cast(5041); -static const ImageFormat ImageFormatRgb10A2UscaledAMD = static_cast(5042); -static const ImageFormat ImageFormatRg16UscaledAMD = static_cast(5043); -static const ImageFormat ImageFormatRgba8USscaledAMD = static_cast(5044); -static const ImageFormat ImageFormatRg8UscaledAMD = static_cast(5045); -static const ImageFormat ImageFormatR16UscaledAMD = static_cast(5046); -static const ImageFormat ImageFormatR8UscaledAMD = static_cast(5047); -#endif - -#if VKI_NORMALIZED_TRIG_FUNCTIONS -// SPV_AMD_normalized_trig - Internal Use Only -static const Capability CapabilityTrigNormalizedAMD = static_cast(5058); - -static const Op OpSinNormalizedAMD = static_cast(5059); -static const Op OpCosNormalizedAMD = static_cast(5060); -#endif - #endif diff --git a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_beta.h b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_beta.h index df18b404..f5e94750 100644 --- a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_beta.h +++ b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_beta.h @@ -53,13 +53,14 @@ typedef struct VkPhysicalDevicePortabilitySubsetPropertiesKHR { // VK_AMDX_shader_enqueue is a preprocessor guard. Do not pass it to API calls. #define VK_AMDX_shader_enqueue 1 -#define VK_AMDX_SHADER_ENQUEUE_SPEC_VERSION 1 +#define VK_AMDX_SHADER_ENQUEUE_SPEC_VERSION 2 #define VK_AMDX_SHADER_ENQUEUE_EXTENSION_NAME "VK_AMDX_shader_enqueue" #define VK_SHADER_INDEX_UNUSED_AMDX (~0U) typedef struct VkPhysicalDeviceShaderEnqueueFeaturesAMDX { VkStructureType sType; void* pNext; VkBool32 shaderEnqueue; + VkBool32 shaderMeshEnqueue; } VkPhysicalDeviceShaderEnqueueFeaturesAMDX; typedef struct VkPhysicalDeviceShaderEnqueuePropertiesAMDX { @@ -70,12 +71,16 @@ typedef struct VkPhysicalDeviceShaderEnqueuePropertiesAMDX { uint32_t maxExecutionGraphShaderPayloadSize; uint32_t maxExecutionGraphShaderPayloadCount; uint32_t executionGraphDispatchAddressAlignment; + uint32_t maxExecutionGraphWorkgroupCount[3]; + uint32_t maxExecutionGraphWorkgroups; } VkPhysicalDeviceShaderEnqueuePropertiesAMDX; typedef struct VkExecutionGraphPipelineScratchSizeAMDX { VkStructureType sType; void* pNext; - VkDeviceSize size; + VkDeviceSize minSize; + VkDeviceSize maxSize; + VkDeviceSize sizeGranularity; } VkExecutionGraphPipelineScratchSizeAMDX; typedef struct VkExecutionGraphPipelineCreateInfoAMDX { @@ -116,12 +121,12 @@ typedef struct VkPipelineShaderStageNodeCreateInfoAMDX { } VkPipelineShaderStageNodeCreateInfoAMDX; typedef VkResult (VKAPI_PTR *PFN_vkCreateExecutionGraphPipelinesAMDX)(VkDevice device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkExecutionGraphPipelineCreateInfoAMDX* pCreateInfos, const VkAllocationCallbacks* pAllocator, VkPipeline* pPipelines); -typedef VkResult (VKAPI_PTR *PFN_vkGetExecutionGraphPipelineScratchSizeAMDX)(VkDevice device, VkPipeline executionGraph, VkExecutionGraphPipelineScratchSizeAMDX* pSizeInfo); -typedef VkResult (VKAPI_PTR *PFN_vkGetExecutionGraphPipelineNodeIndexAMDX)(VkDevice device, VkPipeline executionGraph, const VkPipelineShaderStageNodeCreateInfoAMDX* pNodeInfo, uint32_t* pNodeIndex); -typedef void (VKAPI_PTR *PFN_vkCmdInitializeGraphScratchMemoryAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch); -typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, const VkDispatchGraphCountInfoAMDX* pCountInfo); -typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphIndirectAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, const VkDispatchGraphCountInfoAMDX* pCountInfo); -typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphIndirectCountAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, VkDeviceAddress countInfo); +typedef VkResult (VKAPI_PTR *PFN_vkGetExecutionGraphPipelineScratchSizeAMDX)(VkDevice device, VkPipeline executionGraph, VkExecutionGraphPipelineScratchSizeAMDX* pSizeInfo); +typedef VkResult (VKAPI_PTR *PFN_vkGetExecutionGraphPipelineNodeIndexAMDX)(VkDevice device, VkPipeline executionGraph, const VkPipelineShaderStageNodeCreateInfoAMDX* pNodeInfo, uint32_t* pNodeIndex); +typedef void (VKAPI_PTR *PFN_vkCmdInitializeGraphScratchMemoryAMDX)(VkCommandBuffer commandBuffer, VkPipeline executionGraph, VkDeviceAddress scratch, VkDeviceSize scratchSize); +typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, VkDeviceSize scratchSize, const VkDispatchGraphCountInfoAMDX* pCountInfo); +typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphIndirectAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, VkDeviceSize scratchSize, const VkDispatchGraphCountInfoAMDX* pCountInfo); +typedef void (VKAPI_PTR *PFN_vkCmdDispatchGraphIndirectCountAMDX)(VkCommandBuffer commandBuffer, VkDeviceAddress scratch, VkDeviceSize scratchSize, VkDeviceAddress countInfo); #ifndef VK_NO_PROTOTYPES VKAPI_ATTR VkResult VKAPI_CALL vkCreateExecutionGraphPipelinesAMDX( @@ -145,21 +150,26 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetExecutionGraphPipelineNodeIndexAMDX( VKAPI_ATTR void VKAPI_CALL vkCmdInitializeGraphScratchMemoryAMDX( VkCommandBuffer commandBuffer, - VkDeviceAddress scratch); + VkPipeline executionGraph, + VkDeviceAddress scratch, + VkDeviceSize scratchSize); VKAPI_ATTR void VKAPI_CALL vkCmdDispatchGraphAMDX( VkCommandBuffer commandBuffer, VkDeviceAddress scratch, + VkDeviceSize scratchSize, const VkDispatchGraphCountInfoAMDX* pCountInfo); VKAPI_ATTR void VKAPI_CALL vkCmdDispatchGraphIndirectAMDX( VkCommandBuffer commandBuffer, VkDeviceAddress scratch, + VkDeviceSize scratchSize, const VkDispatchGraphCountInfoAMDX* pCountInfo); VKAPI_ATTR void VKAPI_CALL vkCmdDispatchGraphIndirectCountAMDX( VkCommandBuffer commandBuffer, VkDeviceAddress scratch, + VkDeviceSize scratchSize, VkDeviceAddress countInfo); #endif diff --git a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h index 1dc7e23e..d9cdd137 100644 --- a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h +++ b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h @@ -69,7 +69,7 @@ extern "C" { #define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0 // Version of this file -#define VK_HEADER_VERSION 297 +#define VK_HEADER_VERSION 301 // Complete version of this file #define VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION(0, 1, 3, VK_HEADER_VERSION) @@ -508,10 +508,6 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_KHR = 1000040005, VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR = 1000040006, VK_STRUCTURE_TYPE_TEXTURE_LOD_GATHER_FORMAT_PROPERTIES_AMD = 1000041000, - VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR = 1000044006, - VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_INFO_EXT = 1000044007, - VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD = 1000044008, - VK_STRUCTURE_TYPE_MULTIVIEW_PER_VIEW_ATTRIBUTES_INFO_NVX = 1000044009, VK_STRUCTURE_TYPE_STREAM_DESCRIPTOR_SURFACE_CREATE_INFO_GGP = 1000049000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CORNER_SAMPLED_IMAGE_FEATURES_NV = 1000050000, VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_NV = 1000056000, @@ -553,6 +549,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_SWAPCHAIN_COUNTER_CREATE_INFO_EXT = 1000091003, VK_STRUCTURE_TYPE_PRESENT_TIMES_INFO_GOOGLE = 1000092000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PER_VIEW_ATTRIBUTES_PROPERTIES_NVX = 1000097000, + VK_STRUCTURE_TYPE_MULTIVIEW_PER_VIEW_ATTRIBUTES_INFO_NVX = 1000044009, VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV = 1000098000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT = 1000099000, VK_STRUCTURE_TYPE_PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT = 1000099001, @@ -612,6 +609,7 @@ typedef enum VkStructureType { #ifdef VK_ENABLE_BETA_EXTENSIONS VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_NODE_CREATE_INFO_AMDX = 1000134004, #endif + VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD = 1000044008, VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT = 1000143000, VK_STRUCTURE_TYPE_RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT = 1000143001, VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT = 1000143002, @@ -703,6 +701,8 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXCLUSIVE_SCISSOR_FEATURES_NV = 1000205002, VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV = 1000206000, VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_NV = 1000206001, + VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_2_NV = 1000314008, + VK_STRUCTURE_TYPE_CHECKPOINT_DATA_2_NV = 1000314009, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL = 1000209000, VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_QUERY_CREATE_INFO_INTEL = 1000210000, VK_STRUCTURE_TYPE_INITIALIZE_PERFORMANCE_API_INFO_INTEL = 1000210001, @@ -718,11 +718,13 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT = 1000218000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT = 1000218001, VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT = 1000218002, + VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_INFO_EXT = 1000044007, VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR = 1000226000, VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR = 1000226001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR = 1000226002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR = 1000226003, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_KHR = 1000226004, + VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR = 1000044006, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD = 1000227000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD = 1000229000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_LOCAL_READ_FEATURES_KHR = 1000232000, @@ -848,8 +850,6 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_IMPORT_METAL_IO_SURFACE_INFO_EXT = 1000311009, VK_STRUCTURE_TYPE_EXPORT_METAL_SHARED_EVENT_INFO_EXT = 1000311010, VK_STRUCTURE_TYPE_IMPORT_METAL_SHARED_EVENT_INFO_EXT = 1000311011, - VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_2_NV = 1000314008, - VK_STRUCTURE_TYPE_CHECKPOINT_DATA_2_NV = 1000314009, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT = 1000316000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_DENSITY_MAP_PROPERTIES_EXT = 1000316001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT = 1000316002, @@ -1163,6 +1163,11 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_IMAGE_ALIGNMENT_CONTROL_CREATE_INFO_MESA = 1000575002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLAMP_CONTROL_FEATURES_EXT = 1000582000, VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_DEPTH_CLAMP_CONTROL_CREATE_INFO_EXT = 1000582001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HDR_VIVID_FEATURES_HUAWEI = 1000590000, + VK_STRUCTURE_TYPE_HDR_VIVID_DYNAMIC_METADATA_HUAWEI = 1000590001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_2_FEATURES_NV = 1000593000, + VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_FLEXIBLE_DIMENSIONS_PROPERTIES_NV = 1000593001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_2_PROPERTIES_NV = 1000593002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES, // VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT is a deprecated alias @@ -1172,7 +1177,6 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO_KHR = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO, - VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_NV = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD, VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES, @@ -1246,6 +1250,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2_KHR = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2_KHR = VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2, VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO, + VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_NV = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD, VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO, VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO_KHR = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO, VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO_KHR = VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO, @@ -2647,8 +2652,6 @@ typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_DISPATCH_BASE_BIT = 0x00000010, VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT = 0x00000100, VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT = 0x00000200, - VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = 0x00200000, - VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = 0x00400000, VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR = 0x00004000, VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 0x00008000, VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR = 0x00010000, @@ -2657,6 +2660,8 @@ typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_RAY_TRACING_SKIP_AABBS_BIT_KHR = 0x00002000, VK_PIPELINE_CREATE_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR = 0x00080000, VK_PIPELINE_CREATE_DEFER_COMPILE_BIT_NV = 0x00000020, + VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = 0x00400000, + VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = 0x00200000, VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR = 0x00000040, VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR = 0x00000080, VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV = 0x00040000, @@ -2674,12 +2679,12 @@ typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_NO_PROTECTED_ACCESS_BIT_EXT = 0x08000000, VK_PIPELINE_CREATE_PROTECTED_ACCESS_ONLY_BIT_EXT = 0x40000000, VK_PIPELINE_CREATE_DISPATCH_BASE = VK_PIPELINE_CREATE_DISPATCH_BASE_BIT, - // VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR is a deprecated alias - VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR, - // VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT is a deprecated alias - VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR = VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT, VK_PIPELINE_CREATE_DISPATCH_BASE_KHR = VK_PIPELINE_CREATE_DISPATCH_BASE, + // VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT is a deprecated alias + VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT, + // VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR is a deprecated alias + VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR, VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT = VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT, VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT = VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT, VK_PIPELINE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF @@ -8903,38 +8908,6 @@ typedef VkPhysicalDeviceDynamicRenderingFeatures VkPhysicalDeviceDynamicRenderin typedef VkCommandBufferInheritanceRenderingInfo VkCommandBufferInheritanceRenderingInfoKHR; -typedef struct VkRenderingFragmentShadingRateAttachmentInfoKHR { - VkStructureType sType; - const void* pNext; - VkImageView imageView; - VkImageLayout imageLayout; - VkExtent2D shadingRateAttachmentTexelSize; -} VkRenderingFragmentShadingRateAttachmentInfoKHR; - -typedef struct VkRenderingFragmentDensityMapAttachmentInfoEXT { - VkStructureType sType; - const void* pNext; - VkImageView imageView; - VkImageLayout imageLayout; -} VkRenderingFragmentDensityMapAttachmentInfoEXT; - -typedef struct VkAttachmentSampleCountInfoAMD { - VkStructureType sType; - const void* pNext; - uint32_t colorAttachmentCount; - const VkSampleCountFlagBits* pColorAttachmentSamples; - VkSampleCountFlagBits depthStencilAttachmentSamples; -} VkAttachmentSampleCountInfoAMD; - -typedef VkAttachmentSampleCountInfoAMD VkAttachmentSampleCountInfoNV; - -typedef struct VkMultiviewPerViewAttributesInfoNVX { - VkStructureType sType; - const void* pNext; - VkBool32 perViewAttributes; - VkBool32 perViewAttributesPositionXOnly; -} VkMultiviewPerViewAttributesInfoNVX; - typedef void (VKAPI_PTR *PFN_vkCmdBeginRenderingKHR)(VkCommandBuffer commandBuffer, const VkRenderingInfo* pRenderingInfo); typedef void (VKAPI_PTR *PFN_vkCmdEndRenderingKHR)(VkCommandBuffer commandBuffer); @@ -10309,6 +10282,14 @@ typedef struct VkPhysicalDeviceFragmentShadingRateKHR { VkExtent2D fragmentSize; } VkPhysicalDeviceFragmentShadingRateKHR; +typedef struct VkRenderingFragmentShadingRateAttachmentInfoKHR { + VkStructureType sType; + const void* pNext; + VkImageView imageView; + VkImageLayout imageLayout; + VkExtent2D shadingRateAttachmentTexelSize; +} VkRenderingFragmentShadingRateAttachmentInfoKHR; + typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceFragmentShadingRatesKHR)(VkPhysicalDevice physicalDevice, uint32_t* pFragmentShadingRateCount, VkPhysicalDeviceFragmentShadingRateKHR* pFragmentShadingRates); typedef void (VKAPI_PTR *PFN_vkCmdSetFragmentShadingRateKHR)(VkCommandBuffer commandBuffer, const VkExtent2D* pFragmentSize, const VkFragmentShadingRateCombinerOpKHR combinerOps[2]); @@ -10890,27 +10871,12 @@ typedef VkCommandBufferSubmitInfo VkCommandBufferSubmitInfoKHR; typedef VkPhysicalDeviceSynchronization2Features VkPhysicalDeviceSynchronization2FeaturesKHR; -typedef struct VkQueueFamilyCheckpointProperties2NV { - VkStructureType sType; - void* pNext; - VkPipelineStageFlags2 checkpointExecutionStageMask; -} VkQueueFamilyCheckpointProperties2NV; - -typedef struct VkCheckpointData2NV { - VkStructureType sType; - void* pNext; - VkPipelineStageFlags2 stage; - void* pCheckpointMarker; -} VkCheckpointData2NV; - typedef void (VKAPI_PTR *PFN_vkCmdSetEvent2KHR)(VkCommandBuffer commandBuffer, VkEvent event, const VkDependencyInfo* pDependencyInfo); typedef void (VKAPI_PTR *PFN_vkCmdResetEvent2KHR)(VkCommandBuffer commandBuffer, VkEvent event, VkPipelineStageFlags2 stageMask); typedef void (VKAPI_PTR *PFN_vkCmdWaitEvents2KHR)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent* pEvents, const VkDependencyInfo* pDependencyInfos); typedef void (VKAPI_PTR *PFN_vkCmdPipelineBarrier2KHR)(VkCommandBuffer commandBuffer, const VkDependencyInfo* pDependencyInfo); typedef void (VKAPI_PTR *PFN_vkCmdWriteTimestamp2KHR)(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkQueryPool queryPool, uint32_t query); typedef VkResult (VKAPI_PTR *PFN_vkQueueSubmit2KHR)(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2* pSubmits, VkFence fence); -typedef void (VKAPI_PTR *PFN_vkCmdWriteBufferMarker2AMD)(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker); -typedef void (VKAPI_PTR *PFN_vkGetQueueCheckpointData2NV)(VkQueue queue, uint32_t* pCheckpointDataCount, VkCheckpointData2NV* pCheckpointData); #ifndef VK_NO_PROTOTYPES VKAPI_ATTR void VKAPI_CALL vkCmdSetEvent2KHR( @@ -10944,18 +10910,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit2KHR( uint32_t submitCount, const VkSubmitInfo2* pSubmits, VkFence fence); - -VKAPI_ATTR void VKAPI_CALL vkCmdWriteBufferMarker2AMD( - VkCommandBuffer commandBuffer, - VkPipelineStageFlags2 stage, - VkBuffer dstBuffer, - VkDeviceSize dstOffset, - uint32_t marker); - -VKAPI_ATTR void VKAPI_CALL vkGetQueueCheckpointData2NV( - VkQueue queue, - uint32_t* pCheckpointDataCount, - VkCheckpointData2NV* pCheckpointData); #endif @@ -11198,6 +11152,9 @@ typedef VkFlags64 VkPipelineCreateFlagBits2KHR; static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_DISABLE_OPTIMIZATION_BIT_KHR = 0x00000001ULL; static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_ALLOW_DERIVATIVES_BIT_KHR = 0x00000002ULL; static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_DERIVATIVE_BIT_KHR = 0x00000004ULL; +#ifdef VK_ENABLE_BETA_EXTENSIONS +static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_EXECUTION_GRAPH_BIT_AMDX = 0x100000000ULL; +#endif static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_ENABLE_LEGACY_DITHERING_BIT_EXT = 0x400000000ULL; static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR = 0x00000008ULL; static const VkPipelineCreateFlagBits2KHR VK_PIPELINE_CREATE_2_DISPATCH_BASE_BIT_KHR = 0x00000010ULL; @@ -11243,7 +11200,9 @@ static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_STORAGE_BUFFER_BIT_KHR static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_INDEX_BUFFER_BIT_KHR = 0x00000040ULL; static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_VERTEX_BUFFER_BIT_KHR = 0x00000080ULL; static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_INDIRECT_BUFFER_BIT_KHR = 0x00000100ULL; +#ifdef VK_ENABLE_BETA_EXTENSIONS static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_EXECUTION_GRAPH_SCRATCH_BIT_AMDX = 0x02000000ULL; +#endif static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_CONDITIONAL_RENDERING_BIT_EXT = 0x00000200ULL; static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_SHADER_BINDING_TABLE_BIT_KHR = 0x00000400ULL; static const VkBufferUsageFlagBits2KHR VK_BUFFER_USAGE_2_RAY_TRACING_BIT_NV = 0x00000400ULL; @@ -13008,6 +12967,13 @@ typedef struct VkPhysicalDeviceMultiviewPerViewAttributesPropertiesNVX { VkBool32 perViewPositionAllComponents; } VkPhysicalDeviceMultiviewPerViewAttributesPropertiesNVX; +typedef struct VkMultiviewPerViewAttributesInfoNVX { + VkStructureType sType; + const void* pNext; + VkBool32 perViewAttributes; + VkBool32 perViewAttributesPositionXOnly; +} VkMultiviewPerViewAttributesInfoNVX; + // VK_NV_viewport_swizzle is a preprocessor guard. Do not pass it to API calls. @@ -13376,6 +13342,14 @@ typedef VkPhysicalDeviceSamplerFilterMinmaxProperties VkPhysicalDeviceSamplerFil #define VK_AMD_mixed_attachment_samples 1 #define VK_AMD_MIXED_ATTACHMENT_SAMPLES_SPEC_VERSION 1 #define VK_AMD_MIXED_ATTACHMENT_SAMPLES_EXTENSION_NAME "VK_AMD_mixed_attachment_samples" +typedef struct VkAttachmentSampleCountInfoAMD { + VkStructureType sType; + const void* pNext; + uint32_t colorAttachmentCount; + const VkSampleCountFlagBits* pColorAttachmentSamples; + VkSampleCountFlagBits depthStencilAttachmentSamples; +} VkAttachmentSampleCountInfoAMD; + // VK_AMD_shader_fragment_mask is a preprocessor guard. Do not pass it to API calls. @@ -13555,6 +13529,8 @@ typedef struct VkPipelineCoverageModulationStateCreateInfoNV { const float* pCoverageModulationTable; } VkPipelineCoverageModulationStateCreateInfoNV; +typedef VkAttachmentSampleCountInfoAMD VkAttachmentSampleCountInfoNV; + // VK_NV_fill_rectangle is a preprocessor guard. Do not pass it to API calls. @@ -14312,6 +14288,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetMemoryHostPointerPropertiesEXT( #define VK_AMD_BUFFER_MARKER_SPEC_VERSION 1 #define VK_AMD_BUFFER_MARKER_EXTENSION_NAME "VK_AMD_buffer_marker" typedef void (VKAPI_PTR *PFN_vkCmdWriteBufferMarkerAMD)(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker); +typedef void (VKAPI_PTR *PFN_vkCmdWriteBufferMarker2AMD)(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker); #ifndef VK_NO_PROTOTYPES VKAPI_ATTR void VKAPI_CALL vkCmdWriteBufferMarkerAMD( @@ -14320,6 +14297,13 @@ VKAPI_ATTR void VKAPI_CALL vkCmdWriteBufferMarkerAMD( VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker); + +VKAPI_ATTR void VKAPI_CALL vkCmdWriteBufferMarker2AMD( + VkCommandBuffer commandBuffer, + VkPipelineStageFlags2 stage, + VkBuffer dstBuffer, + VkDeviceSize dstOffset, + uint32_t marker); #endif @@ -14590,8 +14574,22 @@ typedef struct VkCheckpointDataNV { void* pCheckpointMarker; } VkCheckpointDataNV; +typedef struct VkQueueFamilyCheckpointProperties2NV { + VkStructureType sType; + void* pNext; + VkPipelineStageFlags2 checkpointExecutionStageMask; +} VkQueueFamilyCheckpointProperties2NV; + +typedef struct VkCheckpointData2NV { + VkStructureType sType; + void* pNext; + VkPipelineStageFlags2 stage; + void* pCheckpointMarker; +} VkCheckpointData2NV; + typedef void (VKAPI_PTR *PFN_vkCmdSetCheckpointNV)(VkCommandBuffer commandBuffer, const void* pCheckpointMarker); typedef void (VKAPI_PTR *PFN_vkGetQueueCheckpointDataNV)(VkQueue queue, uint32_t* pCheckpointDataCount, VkCheckpointDataNV* pCheckpointData); +typedef void (VKAPI_PTR *PFN_vkGetQueueCheckpointData2NV)(VkQueue queue, uint32_t* pCheckpointDataCount, VkCheckpointData2NV* pCheckpointData); #ifndef VK_NO_PROTOTYPES VKAPI_ATTR void VKAPI_CALL vkCmdSetCheckpointNV( @@ -14602,6 +14600,11 @@ VKAPI_ATTR void VKAPI_CALL vkGetQueueCheckpointDataNV( VkQueue queue, uint32_t* pCheckpointDataCount, VkCheckpointDataNV* pCheckpointData); + +VKAPI_ATTR void VKAPI_CALL vkGetQueueCheckpointData2NV( + VkQueue queue, + uint32_t* pCheckpointDataCount, + VkCheckpointData2NV* pCheckpointData); #endif @@ -14823,6 +14826,13 @@ typedef struct VkRenderPassFragmentDensityMapCreateInfoEXT { VkAttachmentReference fragmentDensityMapAttachment; } VkRenderPassFragmentDensityMapCreateInfoEXT; +typedef struct VkRenderingFragmentDensityMapAttachmentInfoEXT { + VkStructureType sType; + const void* pNext; + VkImageView imageView; + VkImageLayout imageLayout; +} VkRenderingFragmentDensityMapAttachmentInfoEXT; + // VK_EXT_scalar_block_layout is a preprocessor guard. Do not pass it to API calls. @@ -19641,7 +19651,7 @@ typedef struct VkPhysicalDeviceDeviceGeneratedCommandsPropertiesEXT { typedef struct VkGeneratedCommandsMemoryRequirementsInfoEXT { VkStructureType sType; - void* pNext; + const void* pNext; VkIndirectExecutionSetEXT indirectExecutionSet; VkIndirectCommandsLayoutEXT indirectCommandsLayout; uint32_t maxSequenceCount; @@ -19893,6 +19903,74 @@ typedef struct VkPipelineViewportDepthClampControlCreateInfoEXT { +// VK_HUAWEI_hdr_vivid is a preprocessor guard. Do not pass it to API calls. +#define VK_HUAWEI_hdr_vivid 1 +#define VK_HUAWEI_HDR_VIVID_SPEC_VERSION 1 +#define VK_HUAWEI_HDR_VIVID_EXTENSION_NAME "VK_HUAWEI_hdr_vivid" +typedef struct VkPhysicalDeviceHdrVividFeaturesHUAWEI { + VkStructureType sType; + void* pNext; + VkBool32 hdrVivid; +} VkPhysicalDeviceHdrVividFeaturesHUAWEI; + +typedef struct VkHdrVividDynamicMetadataHUAWEI { + VkStructureType sType; + const void* pNext; + size_t dynamicMetadataSize; + const void* pDynamicMetadata; +} VkHdrVividDynamicMetadataHUAWEI; + + + +// VK_NV_cooperative_matrix2 is a preprocessor guard. Do not pass it to API calls. +#define VK_NV_cooperative_matrix2 1 +#define VK_NV_COOPERATIVE_MATRIX_2_SPEC_VERSION 1 +#define VK_NV_COOPERATIVE_MATRIX_2_EXTENSION_NAME "VK_NV_cooperative_matrix2" +typedef struct VkCooperativeMatrixFlexibleDimensionsPropertiesNV { + VkStructureType sType; + void* pNext; + uint32_t MGranularity; + uint32_t NGranularity; + uint32_t KGranularity; + VkComponentTypeKHR AType; + VkComponentTypeKHR BType; + VkComponentTypeKHR CType; + VkComponentTypeKHR ResultType; + VkBool32 saturatingAccumulation; + VkScopeKHR scope; + uint32_t workgroupInvocations; +} VkCooperativeMatrixFlexibleDimensionsPropertiesNV; + +typedef struct VkPhysicalDeviceCooperativeMatrix2FeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 cooperativeMatrixWorkgroupScope; + VkBool32 cooperativeMatrixFlexibleDimensions; + VkBool32 cooperativeMatrixReductions; + VkBool32 cooperativeMatrixConversions; + VkBool32 cooperativeMatrixPerElementOperations; + VkBool32 cooperativeMatrixTensorAddressing; + VkBool32 cooperativeMatrixBlockLoads; +} VkPhysicalDeviceCooperativeMatrix2FeaturesNV; + +typedef struct VkPhysicalDeviceCooperativeMatrix2PropertiesNV { + VkStructureType sType; + void* pNext; + uint32_t cooperativeMatrixWorkgroupScopeMaxWorkgroupSize; + uint32_t cooperativeMatrixFlexibleDimensionsMaxDimension; + uint32_t cooperativeMatrixWorkgroupScopeReservedSharedMemory; +} VkPhysicalDeviceCooperativeMatrix2PropertiesNV; + +typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceCooperativeMatrixFlexibleDimensionsPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixFlexibleDimensionsPropertiesNV* pProperties); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceCooperativeMatrixFlexibleDimensionsPropertiesNV( + VkPhysicalDevice physicalDevice, + uint32_t* pPropertyCount, + VkCooperativeMatrixFlexibleDimensionsPropertiesNV* pProperties); +#endif + + // VK_KHR_acceleration_structure is a preprocessor guard. Do not pass it to API calls. #define VK_KHR_acceleration_structure 1 #define VK_KHR_ACCELERATION_STRUCTURE_SPEC_VERSION 13 diff --git a/icd/api/include/khronos/vulkan.h b/icd/api/include/khronos/vulkan.h index 11faf725..b81cc1b3 100644 --- a/icd/api/include/khronos/vulkan.h +++ b/icd/api/include/khronos/vulkan.h @@ -61,9 +61,6 @@ // Internal (under development) extension definitions #include "devext/vk_amd_gpa_interface.h" -#if VKI_TEXEL_BUFFER_EXPLICIT_FORMAT_SUPPORT -#include "devext/vk_amd_shader_texel_buffer_explicit_format.h" -#endif #if VKI_RAY_TRACING #endif diff --git a/icd/api/include/pipeline_binary_cache.h b/icd/api/include/pipeline_binary_cache.h index 372476e4..6e8c619f 100644 --- a/icd/api/include/pipeline_binary_cache.h +++ b/icd/api/include/pipeline_binary_cache.h @@ -41,9 +41,7 @@ namespace Util { class IPlatformKey; -#if ICD_GPUOPEN_DEVMODE_BUILD class IDevMode; -#endif } // namespace Util namespace vk @@ -63,9 +61,7 @@ class PipelineBinaryCache const Vkgc::GfxIpVersion& gfxIp, const vk::RuntimeSettings& settings, const char* pDefaultCacheFilePath, -#if ICD_GPUOPEN_DEVMODE_BUILD vk::IDevMode* pDevMode, -#endif uint32_t expectedEntries, size_t initDataSize, const void* pInitData, @@ -128,7 +124,6 @@ class PipelineBinaryCache uint32_t srcCacheCount, const PipelineBinaryCache** ppSrcCaches); -#if ICD_GPUOPEN_DEVMODE_BUILD Util::Result LoadReinjectionBinary( const CacheId* pInternalPipelineHash, size_t* pPipelineBinarySize, @@ -153,7 +148,6 @@ class PipelineBinaryCache Util::RWLock* GetHashMappingLock() { return &m_hashMappingLock; } -#endif void FreePipelineBinary(const void* pPipelineBinary); @@ -198,13 +192,11 @@ class PipelineBinaryCache bool createArchiveLayers, const RuntimeSettings& settings); -#if ICD_GPUOPEN_DEVMODE_BUILD VkResult InitReinjectionLayer( const RuntimeSettings& settings); Util::Result InjectBinariesFromDirectory( const RuntimeSettings& settings); -#endif VkResult InitMemoryCacheLayer( const RuntimeSettings& settings); @@ -237,13 +229,11 @@ class PipelineBinaryCache Util::ICacheLayer* m_pTopLayer; // Top layer of the cache chain where queries are submitted -#if ICD_GPUOPEN_DEVMODE_BUILD vk::IDevMode* m_pDevMode; Util::ICacheLayer* m_pReinjectionLayer; // Reinjection interface layer HashMapping m_hashMapping; // Maps the internalPipelineHash to the appropriate CacheId Util::RWLock m_hashMappingLock; // Prevents collisions during writes to the map -#endif Util::ICacheLayer* m_pMemoryLayer; diff --git a/icd/api/include/pipeline_compiler.h b/icd/api/include/pipeline_compiler.h index 91ca3f51..681428fd 100644 --- a/icd/api/include/pipeline_compiler.h +++ b/icd/api/include/pipeline_compiler.h @@ -315,14 +315,12 @@ class PipelineCompiler #endif -#if ICD_GPUOPEN_DEVMODE_BUILD Util::Result RegisterAndLoadReinjectionBinary( const Pal::PipelineHash* pInternalPipelineHash, const Util::MetroHash::Hash* pCacheId, size_t* pBinarySize, const void** ppPipelineBinary, PipelineCache* pPipelineCache = nullptr); -#endif template PipelineCompilerType CheckCompilerType( diff --git a/icd/api/include/vk_alloccb.h b/icd/api/include/vk_alloccb.h index 05584969..83be9620 100644 --- a/icd/api/include/vk_alloccb.h +++ b/icd/api/include/vk_alloccb.h @@ -84,7 +84,7 @@ class PalAllocator private: PAL_DISALLOW_COPY_AND_ASSIGN(PalAllocator); -#if PAL_MEMTRACK +#if VKI_MEMTRACK // NOTE: Memory leak tracking requires an allocator in order to perform the actual allocations. We can't provide // this platform because that would result in a stack overflow. Instead, we define this simple allocator // structure which contains the necessary methods to allocate and free system memory. diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h index 1860b0ec..a1e443fa 100644 --- a/icd/api/include/vk_cmdbuffer.h +++ b/icd/api/include/vk_cmdbuffer.h @@ -279,9 +279,10 @@ struct AllGpuRenderState // changed for all GPUs if it is changed for any GPU. Put DirtyGraphicsState management here will be easier to manage. DirtyGraphicsState dirtyGraphics; - // Value of VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT - // defined by the last bound GraphicsPipeline, which was not nullptr. - bool viewIndexFromDeviceIndex; + // A bit mask determining if the shader stages are making use of VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT. + // 1: pre-raster stages + // 2: fragment stage + uint32_t viewIndexFromDeviceIndex; DynamicRenderingInstance dynamicRenderingInstance; @@ -1118,7 +1119,8 @@ class CmdBuffer void PalCmdAcquire( Pal::AcquireReleaseInfo* pAcquireReleaseInfo, - const VkEvent event, + uint32_t eventCount, + const VkEvent* pEvents, Pal::MemBarrier* const pBufferBarriers, const Buffer** const ppBuffers, Pal::ImgBarrier* const pImageBarriers, @@ -1602,7 +1604,21 @@ class CmdBuffer const VkImageMemoryBarrier* pImageMemoryBarriers, Pal::BarrierInfo* pBarrier); - void ExecuteReleaseThenAcquire( + void FlushAcquireReleaseBarriers( + Pal::AcquireReleaseInfo* pAcquireReleaseInfo, + uint32_t eventCount, + const VkEvent* pEvents, + Pal::MemBarrier* const pBufferBarriers, + const Buffer** const ppBuffers, + Pal::ImgBarrier* const pImageBarriers, + const Image** const ppImages, + VirtualStackFrame* pVirtStackFrame, + const AcquireReleaseMode acquireReleaseMode, + uint32_t deviceMask); + + void ExecuteAcquireRelease( + uint32_t eventCount, + const VkEvent* pEvents, PipelineStageFlags srcStageMask, PipelineStageFlags dstStageMask, uint32_t memBarrierCount, @@ -1610,13 +1626,15 @@ class CmdBuffer uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers); + const VkImageMemoryBarrier* pImageMemoryBarriers, + const AcquireReleaseMode acquireReleaseMode, + uint32_t rgpBarrierReasonType); - void ExecuteAcquireRelease( + void ExecuteAcquireRelease2( uint32_t dependencyCount, const VkEvent* pEvents, const VkDependencyInfoKHR* pDependencyInfos, - AcquireReleaseMode acquireReleaseMode, + const AcquireReleaseMode acquireReleaseMode, uint32_t rgpBarrierReasonType); enum RebindUserDataFlag : uint32_t diff --git a/icd/api/include/vk_compute_pipeline.h b/icd/api/include/vk_compute_pipeline.h index 5f01590f..72f983f9 100644 --- a/icd/api/include/vk_compute_pipeline.h +++ b/icd/api/include/vk_compute_pipeline.h @@ -161,6 +161,7 @@ class ComputePipeline final : public Pipeline, public NonDispatchable(handle); #else return handle; diff --git a/icd/api/include/vk_extensions.h b/icd/api/include/vk_extensions.h index f3ea14ed..7b77073d 100644 --- a/icd/api/include/vk_extensions.h +++ b/icd/api/include/vk_extensions.h @@ -446,6 +446,7 @@ class DeviceExtensions final : public Extensions EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, EXT_SHADER_IMAGE_ATOMIC_INT64, EXT_SHADER_MODULE_IDENTIFIER, + EXT_SHADER_REPLICATED_COMPOSITES, EXT_SHADER_STENCIL_EXPORT, EXT_SHADER_SUBGROUP_BALLOT, EXT_SHADER_SUBGROUP_VOTE, diff --git a/icd/api/include/vk_graphics_pipeline.h b/icd/api/include/vk_graphics_pipeline.h index 796d965a..96e45ab3 100644 --- a/icd/api/include/vk_graphics_pipeline.h +++ b/icd/api/include/vk_graphics_pipeline.h @@ -215,9 +215,7 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch bool IsPointSizeUsed() const { return m_flags.isPointSizeUsed; } - // Returns value of VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT - // defined by flags member of VkGraphicsPipelineCreateInfo. - bool ViewIndexFromDeviceIndex() const + uint32_t StageMaskForViewIndexUseDeviceIndex() const { return m_flags.viewIndexFromDeviceIndex; } GraphicsPipelineObjectFlags GetPipelineFlags() const diff --git a/icd/api/include/vk_physical_device.h b/icd/api/include/vk_physical_device.h index 7cb9ea74..f7d2940c 100644 --- a/icd/api/include/vk_physical_device.h +++ b/icd/api/include/vk_physical_device.h @@ -481,6 +481,12 @@ template uint32_t* pPropertyCount, utils::ArrayView properties) const; + void GetPhysicalDevicePipelineRobustnessProperties( + VkPipelineRobustnessBufferBehaviorEXT* defaultRobustnessStorageBuffers, + VkPipelineRobustnessBufferBehaviorEXT* defaultRobustnessUniformBuffers, + VkPipelineRobustnessBufferBehaviorEXT* defaultRobustnessVertexInputs, + VkPipelineRobustnessImageBehaviorEXT* defaultRobustnessImages) const; + void GetPhysicalDevice16BitStorageFeatures( VkBool32* pStorageBuffer16BitAccess, VkBool32* pUniformAndStorageBuffer16BitAccess, diff --git a/icd/api/include/vk_pipeline_layout.h b/icd/api/include/vk_pipeline_layout.h index b78a2b06..a4978d00 100644 --- a/icd/api/include/vk_pipeline_layout.h +++ b/icd/api/include/vk_pipeline_layout.h @@ -66,6 +66,15 @@ struct UserDataLayout // Number of user data registers used for push constants uint32_t pushConstRegCount; +#if VKI_RAY_TRACING + // Base user data register index to use for buffer storing ray tracing dispatch arguments + // The number of user data registers used is always 1 + uint32_t dispatchRaysArgsPtrRegBase; + + // Base user data register index to use for ray tracing capture replay VA mapping internal buffer + uint32_t rtCaptureReplayConstBufRegBase; +#endif + } common; union @@ -90,11 +99,6 @@ struct UserDataLayout // Base use data register for debug printf uint32_t debugPrintfRegBase; -#if VKI_RAY_TRACING - // Base user data register index to use for ray tracing capture replay VA mapping internal buffer - uint32_t rtCaptureReplayConstBufRegBase; -#endif - // Base user data register index to use for thread group order reversal state uint32_t threadGroupReversalRegBase; @@ -115,21 +119,10 @@ struct UserDataLayout // Base use data register for debug printf uint32_t debugPrintfRegBase; -#if VKI_RAY_TRACING - // Base user data register index to use for buffer storing ray tracing dispatch arguments - // The number of user data registers used is always 1 - uint32_t dispatchRaysArgsPtrRegBase; -#endif - // Base user data register index to use for the constant buffer used in uber-fetch shader // The number of user data register used is always 2 uint32_t uberFetchConstBufRegBase; -#if VKI_RAY_TRACING - // Base user data register index to use for ray tracing capture replay VA mapping internal buffer - uint32_t rtCaptureReplayConstBufRegBase; -#endif - // Base user data register index to use for thread group order reversal state uint32_t threadGroupReversalRegBase; diff --git a/icd/api/include/vk_queue.h b/icd/api/include/vk_queue.h index 87add35f..03a2a7ee 100644 --- a/icd/api/include/vk_queue.h +++ b/icd/api/include/vk_queue.h @@ -209,15 +209,9 @@ class Queue VkResult CreateSqttState( void* pMemory); - enum - { - MaxQueueFamilies = Pal::QueueTypeCount // Maximum number of queue families - , - MaxQueuesPerFamily = 8, // Maximum number of queues per family - MaxMultiQueues = 4, - - MaxSubQueuesInGroup = MaxQueueFamilies * MaxQueuesPerFamily // Maximum number of queues per group - }; + static constexpr uint32_t MaxQueueFamilies = Pal::QueueTypeCount // Maximum number of queue families + ; + static constexpr uint32_t MaxQueuesPerFamily = 8; // Maximum number of queues per family VK_FORCEINLINE Pal::IQueue* PalQueue(int32_t idx) const { diff --git a/icd/api/include/vk_utils.h b/icd/api/include/vk_utils.h index cda14997..25a0d182 100644 --- a/icd/api/include/vk_utils.h +++ b/icd/api/include/vk_utils.h @@ -59,6 +59,7 @@ // Reuse some PAL macros here #define VK_ASSERT PAL_ASSERT #define VK_ASSERT_MSG PAL_ASSERT_MSG +#define VK_ASSERT_ALWAYS_MSG PAL_ASSERT_ALWAYS_MSG #define VK_DEBUG_BUILD_ONLY_ASSERT PAL_DEBUG_BUILD_ONLY_ASSERT #define VK_ALERT PAL_ALERT #define VK_ALERT_ALWAYS_MSG PAL_ALERT_ALWAYS_MSG @@ -85,9 +86,6 @@ #define VK_FORCEINLINE inline #endif -// Wrap _malloca and _freea for compilers other than MSVS -#define VK_ALLOC_A(_numBytes) alloca(_numBytes) - // Default alignment for memory allocation #define VK_DEFAULT_MEM_ALIGN 16 @@ -131,6 +129,17 @@ constexpr uint32_t RayTraceShaderStages = typedef VkPipelineStageFlags2KHR PipelineStageFlags; typedef VkAccessFlags2KHR AccessFlags; +// ===================================================================================================================== +constexpr bool IsGfx11( + Pal::GfxIpLevel gfxLevel) +{ + return ((gfxLevel == Pal::GfxIpLevel::GfxIp11_0) +#if VKI_BUILD_GFX115 + || (gfxLevel == Pal::GfxIpLevel::GfxIp11_5) +#endif + ); +} + namespace utils { diff --git a/icd/api/pipeline_binary_cache.cpp b/icd/api/pipeline_binary_cache.cpp index bd438d7e..32ffb253 100644 --- a/icd/api/pipeline_binary_cache.cpp +++ b/icd/api/pipeline_binary_cache.cpp @@ -39,11 +39,8 @@ #include "palHashMapImpl.h" #include "palFile.h" #include "palLiterals.h" - -#if ICD_GPUOPEN_DEVMODE_BUILD #include "palPipelineAbiReader.h" #include "devmode/devmode_mgr.h" -#endif #include using namespace Util::Literals; @@ -62,9 +59,7 @@ static constexpr size_t ElfTypeStringLen = sizeof(ElfTypeString); const uint32_t PipelineBinaryCache::ArchiveType = Util::HashString(ArchiveTypeString, ArchiveTypeStringLen); const uint32_t PipelineBinaryCache::ElfType = Util::HashString(ElfTypeString, ElfTypeStringLen); -#if ICD_GPUOPEN_DEVMODE_BUILD static Util::Hash128 ParseHash128(const char* str); -#endif bool PipelineBinaryCache::IsValidBlob( VkAllocationCallbacks* pAllocationCallbacks, @@ -108,9 +103,7 @@ PipelineBinaryCache* PipelineBinaryCache::Create( const Vkgc::GfxIpVersion& gfxIp, const RuntimeSettings& settings, const char* pDefaultCacheFilePath, -#if ICD_GPUOPEN_DEVMODE_BUILD vk::IDevMode* pDevMode, -#endif uint32_t expectedEntries, size_t initDataSize, const void* pInitData, @@ -127,10 +120,7 @@ PipelineBinaryCache* PipelineBinaryCache::Create( if (pMem != nullptr) { pObj = VK_PLACEMENT_NEW(pMem) PipelineBinaryCache(pAllocationCallbacks, gfxIp, expectedEntries); - -#if ICD_GPUOPEN_DEVMODE_BUILD pObj->m_pDevMode = pDevMode; -#endif if (pObj->Initialize(settings, createArchiveLayers, pDefaultCacheFilePath, pKey) != VK_SUCCESS) { @@ -192,11 +182,9 @@ PipelineBinaryCache::PipelineBinaryCache( m_palAllocator { pAllocationCallbacks }, m_pPlatformKey { nullptr }, m_pTopLayer { nullptr }, -#if ICD_GPUOPEN_DEVMODE_BUILD m_pDevMode { nullptr }, m_pReinjectionLayer { nullptr }, m_hashMapping { 32, &m_palAllocator }, -#endif m_pMemoryLayer { nullptr }, m_pCompressingLayer { nullptr }, m_expectedEntries { expectedEntries }, @@ -247,12 +235,10 @@ PipelineBinaryCache::~PipelineBinaryCache() FreeMem(m_pCompressingLayer); } -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pReinjectionLayer != nullptr) { m_pReinjectionLayer->Destroy(); } -#endif } // ===================================================================================================================== @@ -405,7 +391,6 @@ Util::Result PipelineBinaryCache::GetPipelineBinary( return m_pTopLayer->Load(pQeuryId, pPipelineBinary); } -#if ICD_GPUOPEN_DEVMODE_BUILD // ===================================================================================================================== // Introduces a mapping from an internal pipeline hash to a cache ID void PipelineBinaryCache::RegisterHashMapping( @@ -495,7 +480,8 @@ Util::Result PipelineBinaryCache::StoreReinjectionBinary( uint32_t gfxIpMinor = 0u; uint32_t gfxIpStepping = 0u; - Util::Abi::PipelineAbiReader reader(&m_palAllocator, pPipelineBinary); + Util::Abi::PipelineAbiReader reader(&m_palAllocator, + Util::Span{ pPipelineBinary, pipelineBinarySize}); reader.GetGfxIpVersion(&gfxIpMajor, &gfxIpMinor, &gfxIpStepping); if (gfxIpMajor == m_gfxIp.major && @@ -516,7 +502,6 @@ Util::Result PipelineBinaryCache::StoreReinjectionBinary( return result; } -#endif // ===================================================================================================================== // Free memory allocated by our allocator void PipelineBinaryCache::FreePipelineBinary( @@ -529,12 +514,10 @@ void PipelineBinaryCache::FreePipelineBinary( // Destroy PipelineBinaryCache itself void PipelineBinaryCache::Destroy() { -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevMode != nullptr) { m_pDevMode->DeregisterPipelineCache(this); } -#endif VkAllocationCallbacks* pAllocationCallbacks = m_pAllocationCallbacks; void* pMem = this; @@ -571,7 +554,6 @@ VkResult PipelineBinaryCache::Initialize( result = OrderLayers(settings); } -#if ICD_GPUOPEN_DEVMODE_BUILD if ((result == VK_SUCCESS) && (m_pReinjectionLayer != nullptr)) { @@ -593,7 +575,6 @@ VkResult PipelineBinaryCache::Initialize( PAL_ASSERT_ALWAYS(); } } -#endif if (result == VK_SUCCESS) { @@ -604,7 +585,6 @@ VkResult PipelineBinaryCache::Initialize( return result; } -#if ICD_GPUOPEN_DEVMODE_BUILD // ===================================================================================================================== // Initialize reinjection cache layer VkResult PipelineBinaryCache::InitReinjectionLayer( @@ -805,7 +785,6 @@ Util::Result PipelineBinaryCache::InjectBinariesFromDirectory( return result; } -#endif // ===================================================================================================================== // Initialize memory layer @@ -825,7 +804,7 @@ VkResult PipelineBinaryCache::InitMemoryCacheLayer( // Reason: CTS generates a large number of cache applications and cause insufficient memory in 32-bit system. // Purpose: To limit the maximun value of MemorySize in 32-bit system. -#ifdef ICD_X86_BUILD +#ifdef VKI_X86_BUILD createInfo.maxMemorySize = 192_MiB; #else createInfo.maxMemorySize = 4_GiB; @@ -929,14 +908,7 @@ Util::IArchiveFile* PipelineBinaryCache::OpenReadOnlyArchive( { Util::Result openResult = Util::OpenArchiveFile(&info, pMem, &pFile); - if (openResult == Util::Result::Success) - { - if (info.useBufferedReadMemory) - { - pFile->Preload(0, info.maxReadBufferMem); - } - } - else + if (openResult != Util::Result::Success) { FreeMem(pMem); pFile = nullptr; @@ -992,14 +964,7 @@ Util::IArchiveFile* PipelineBinaryCache::OpenWritableArchive( } } - if (openResult == Util::Result::Success) - { - if (info.useBufferedReadMemory) - { - pFile->Preload(0, info.maxReadBufferMem); - } - } - else + if (openResult != Util::Result::Success) { FreeMem(pMem); pFile = nullptr; @@ -1275,11 +1240,7 @@ VkResult PipelineBinaryCache::InitLayers( bool createArchiveLayers, const RuntimeSettings& settings) { -#if ICD_GPUOPEN_DEVMODE_BUILD bool injectionLayerOnline = (InitReinjectionLayer(settings) >= VK_SUCCESS); -#else - bool injectionLayerOnline = false; -#endif bool memoryLayerOnline = (InitMemoryCacheLayer(settings) >= VK_SUCCESS); diff --git a/icd/api/pipeline_compiler.cpp b/icd/api/pipeline_compiler.cpp index 168832b3..69fbc4d2 100644 --- a/icd/api/pipeline_compiler.cpp +++ b/icd/api/pipeline_compiler.cpp @@ -206,7 +206,7 @@ static bool SupportInternalModuleCache( supportInternalModuleCache = false; } -#if ICD_X86_BUILD +#if VKI_X86_BUILD supportInternalModuleCache = false; #endif @@ -363,9 +363,7 @@ VkResult PipelineCompiler::Initialize() m_gfxIp, settings, m_pPhysicalDevice->PalDevice()->GetCacheFilePath(), -#if ICD_GPUOPEN_DEVMODE_BUILD m_pPhysicalDevice->VkInstance()->GetDevModeMgr(), -#endif 0, 0, nullptr, @@ -890,7 +888,8 @@ void PipelineCompiler::ReplacePipelineIsaCode( return; } - Util::Abi::PipelineAbiReader abiReader(pDevice->VkInstance()->Allocator(), pipelineBinary.pCode); + Util::Abi::PipelineAbiReader abiReader(pDevice->VkInstance()->Allocator(), + Util::Span{ pipelineBinary.pCode, pipelineBinary.codeSize}); Pal::Result palResult = abiReader.Init(); if (palResult != Pal::Result::Success) { @@ -919,7 +918,7 @@ void PipelineCompiler::ReplacePipelineIsaCode( }; for (const auto& simbolTypeEntry : stageSymbolTypes) { - const Util::Elf::SymbolTableEntry* pEntry = abiReader.GetPipelineSymbol(simbolTypeEntry); + const Util::Elf::SymbolTableEntry* pEntry = abiReader.GetSymbolHeader(simbolTypeEntry); if (pEntry != nullptr) { shaderStageSymbols.push_back(pEntry); @@ -1772,6 +1771,10 @@ static void CopyPipelineShadersInfo( if ((shaderMask & (1 << stage)) != 0) { *pShaderInfosDst[stage] = *pShaderInfosSrc[stage]; + if (libInfo.flags & VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT) + { + pShaderInfosDst[stage]->options.viewIndexFromDeviceIndex = true; + } } } @@ -1837,6 +1840,7 @@ static void MergePipelineOptions( pDst->pageMigrationEnabled |= src.pageMigrationEnabled; pDst->optimizationLevel |= src.optimizationLevel; pDst->glState.disableTruncCoordForGather |= src.glState.disableTruncCoordForGather; + pDst->optimizePointSizeWrite |= src.optimizePointSizeWrite; pDst->shadowDescriptorTableUsage = src.shadowDescriptorTableUsage; pDst->shadowDescriptorTablePtrHigh = src.shadowDescriptorTablePtrHigh; pDst->overrideThreadGroupSizeX = src.overrideThreadGroupSizeX; @@ -1899,10 +1903,7 @@ static void CopyFragmentOutputInterfaceState( { const GraphicsPipelineBinaryCreateInfo& libInfo = pLibrary->GetPipelineBinaryCreateInfo(); - for (uint32_t i = 0; i < Vkgc::MaxColorTargets; ++i) - { - pCreateInfo->pipelineInfo.cbState.target[i] = libInfo.pipelineInfo.cbState.target[i]; - } + pCreateInfo->pipelineInfo.cbState = libInfo.pipelineInfo.cbState; pCreateInfo->pipelineInfo.rsState.perSampleShading = libInfo.pipelineInfo.rsState.perSampleShading; pCreateInfo->pipelineInfo.rsState.dynamicSampleInfo = libInfo.pipelineInfo.rsState.dynamicSampleInfo; @@ -1911,9 +1912,6 @@ static void CopyFragmentOutputInterfaceState( pCreateInfo->pipelineInfo.rsState.pixelShaderSamples = libInfo.pipelineInfo.rsState.pixelShaderSamples; pCreateInfo->dbFormat = libInfo.dbFormat; - pCreateInfo->pipelineInfo.cbState.alphaToCoverageEnable = libInfo.pipelineInfo.cbState.alphaToCoverageEnable; - pCreateInfo->pipelineInfo.cbState.dualSourceBlendEnable = libInfo.pipelineInfo.cbState.dualSourceBlendEnable; - pCreateInfo->pipelineInfo.cbState.dualSourceBlendDynamic = libInfo.pipelineInfo.cbState.dualSourceBlendDynamic; pCreateInfo->pipelineInfo.iaState.enableMultiView = libInfo.pipelineInfo.iaState.enableMultiView; pCreateInfo->cbStateHash = libInfo.cbStateHash; } @@ -3319,6 +3317,7 @@ void PipelineCompiler::ApplyPipelineOptions( pOptions->enableRelocatableShaderElf = settings.enableRelocatableShaders; pOptions->disableImageResourceCheck = settings.disableImageResourceTypeCheck; pOptions->optimizeTessFactor = settings.optimizeTessFactor != OptimizeTessFactorDisable; + pOptions->optimizePointSizeWrite = true; pOptions->forceCsThreadIdSwizzling = settings.forceCsThreadIdSwizzling; pOptions->overrideThreadGroupSizeX = settings.overrideThreadGroupSizeX; pOptions->overrideThreadGroupSizeY = settings.overrideThreadGroupSizeY; @@ -4444,6 +4443,7 @@ void PipelineCompiler::SetRayTracingState( pRtState->enableRayQueryCsSwizzle = settings.rtEnableRayQueryCsSwizzle; pRtState->enableDispatchRaysInnerSwizzle = settings.rtEnableDispatchRaysInnerSwizzle; pRtState->enableDispatchRaysOuterSwizzle = settings.rtEnableDispatchRaysOuterSwizzle; + pRtState->forceInvalidAccelStruct = settings.forceInvalidAccelStruct; pRtState->ldsStackSize = settings.ldsStackSize; pRtState->enableOptimalLdsStackSizeForIndirect = settings.enableOptimalLdsStackSizeForIndirect; pRtState->enableOptimalLdsStackSizeForUnified = settings.enableOptimalLdsStackSizeForUnified; @@ -4788,7 +4788,6 @@ bool PipelineCompiler::BuildRayTracingPipelineBinary( #endif // ===================================================================================================================== -#if ICD_GPUOPEN_DEVMODE_BUILD Util::Result PipelineCompiler::RegisterAndLoadReinjectionBinary( const Pal::PipelineHash* pInternalPipelineHash, const Util::MetroHash::Hash* pCacheId, @@ -4829,7 +4828,6 @@ Util::Result PipelineCompiler::RegisterAndLoadReinjectionBinary( return result; } -#endif // ===================================================================================================================== // Filter VkPipelineCreateFlags2KHR to only values used for pipeline caching @@ -5236,8 +5234,25 @@ uint32_t PipelineCompiler::BuildUberFetchShaderInternalDataImp( case VK_FORMAT_R8G8_SINT: case VK_FORMAT_R8G8B8A8_UINT: case VK_FORMAT_R8G8B8A8_SNORM: + case VK_FORMAT_R16G16_UNORM: + case VK_FORMAT_R16G16_SNORM: + case VK_FORMAT_R16G16_USCALED: + case VK_FORMAT_R16G16_SSCALED: + case VK_FORMAT_R16G16_UINT: + case VK_FORMAT_R16G16_SINT: case VK_FORMAT_R16G16_SFLOAT: + case VK_FORMAT_R16G16B16_UNORM: + case VK_FORMAT_R16G16B16_SNORM: + case VK_FORMAT_R16G16B16_USCALED: + case VK_FORMAT_R16G16B16_SSCALED: + case VK_FORMAT_R16G16B16_UINT: + case VK_FORMAT_R16G16B16_SINT: + case VK_FORMAT_R16G16B16A16_UNORM: + case VK_FORMAT_R16G16B16A16_SNORM: case VK_FORMAT_R16G16B16A16_USCALED: + case VK_FORMAT_R16G16B16A16_SSCALED: + case VK_FORMAT_R16G16B16A16_UINT: + case VK_FORMAT_R16G16B16A16_SINT: stride = 1; break; default: diff --git a/icd/api/raytrace/ray_tracing_device.cpp b/icd/api/raytrace/ray_tracing_device.cpp index 06e85c1f..65d0cab0 100644 --- a/icd/api/raytrace/ray_tracing_device.cpp +++ b/icd/api/raytrace/ray_tracing_device.cpp @@ -38,10 +38,7 @@ #include "palArchiveFile.h" #include "gpurt/gpurtLib.h" #include "g_gpurtOptions.h" - -#if ICD_GPUOPEN_DEVMODE_BUILD #include "devmode/devmode_mgr.h" -#endif namespace vk { @@ -233,7 +230,6 @@ void RayTracingDevice::CreateGpuRtDeviceSettings( pDeviceSettings->enableParallelUpdate = settings.rtEnableUpdateParallel; pDeviceSettings->enableParallelBuild = settings.rtEnableBuildParallel; pDeviceSettings->parallelBuildWavesPerSimd = settings.buildParallelWavesPerSimd; - pDeviceSettings->enableAcquireReleaseInterface = settings.rtEnableAcquireReleaseInterface; pDeviceSettings->bvhCpuBuildModeFastTrace = static_cast(settings.rtBvhCpuBuildMode); pDeviceSettings->bvhCpuBuildModeDefault = static_cast(settings.rtBvhCpuBuildMode); pDeviceSettings->bvhCpuBuildModeFastBuild = static_cast(settings.rtBvhCpuBuildMode); @@ -257,6 +253,7 @@ void RayTracingDevice::CreateGpuRtDeviceSettings( pDeviceSettings->enableInsertBarriersInBuildAS = settings.enableInsertBarriersInBuildAs; pDeviceSettings->numMortonSizeBits = settings.numMortonSizeBits; pDeviceSettings->allowFp16BoxNodesInUpdatableBvh = settings.rtAllowFp16BoxNodesInUpdatableBvh; + pDeviceSettings->fp16BoxNodesRequireCompaction = settings.fp16BoxNodesRequireCompactionFlag; // Enable AS stats based on panel setting pDeviceSettings->enableBuildAccelStructStats = settings.rtEnableBuildAccelStructStats; @@ -308,6 +305,9 @@ void RayTracingDevice::CollectGpurtOptions( } *optionMap.FindKey(GpuRt::ThreadTraceEnabledOptionNameHash) = threadTraceEnabled; + *optionMap.FindKey(GpuRt::PersistentLaunchEnabledOptionNameHash) = + (settings.rtPersistentDispatchRaysFactor > 0.0f) ? 1 : 0; + pGpurtOptions->Clear(); for (auto it = optionMap.Begin(); it.Get() != nullptr; it.Next()) { @@ -574,6 +574,15 @@ Pal::Result RayTracingDevice::InitCmdContext( cmdBufInfo.queueType = Pal::QueueTypeUniversal; queueHandle = m_pDevice->GetQueue(cmdBufInfo.engineType, cmdBufInfo.queueType); + + if (queueHandle == VK_NULL_HANDLE) + { + // Could not find a universal queue, try transfer + cmdBufInfo.engineType = Pal::EngineTypeDma; + cmdBufInfo.queueType = Pal::QueueTypeDma; + + queueHandle = m_pDevice->GetQueue(cmdBufInfo.engineType, cmdBufInfo.queueType); + } } Pal::Result result = (queueHandle != VK_NULL_HANDLE) ? Pal::Result::Success : Pal::Result::ErrorUnknown; @@ -914,8 +923,12 @@ Pal::Result RayTracingDevice::ClientCreateInternalComputePipeline( pSubNode->type = Vkgc::ResourceMappingNodeType::DescriptorConstBuffer; break; case GpuRt::NodeType::SrvTable: + pSubNode->type = Vkgc::ResourceMappingNodeType::DescriptorResource; + pSubNode->srdRange.strideInDwords = untypedBufferSrdSizeDw; + break; case GpuRt::NodeType::TypedSrvTable: pSubNode->type = Vkgc::ResourceMappingNodeType::DescriptorResource; + pSubNode->srdRange.strideInDwords = typedBufferSrdSizeDw; break; default: VK_NEVER_CALLED(); @@ -952,15 +965,31 @@ Pal::Result RayTracingDevice::ClientCreateInternalComputePipeline( compileConstants.pConstants }; - bool forceWave64 = false; + constexpr uint32_t CompilerOptionWaveSize = Util::HashLiteralString("waveSize"); + constexpr uint32_t CompilerOptionValueWave32 = Util::HashLiteralString("Wave32"); + constexpr uint32_t CompilerOptionValueWave64 = Util::HashLiteralString("Wave64"); - // Overide wave size for these GpuRT shader types - if (((buildInfo.shaderType == GpuRt::InternalRayTracingCsType::BuildBVHTD) || - (buildInfo.shaderType == GpuRt::InternalRayTracingCsType::BuildBVHTDTR) || - (buildInfo.shaderType == GpuRt::InternalRayTracingCsType::BuildParallel) || - (buildInfo.shaderType == GpuRt::InternalRayTracingCsType::BuildQBVH))) + ShaderWaveSize waveSize = ShaderWaveSize::WaveSizeAuto; + + for (uint32_t i = 0; i < buildInfo.hashedCompilerOptionCount; ++i) { - forceWave64 = true; + const GpuRt::PipelineCompilerOption& compilerOption = buildInfo.pHashedCompilerOptions[i]; + + switch (compilerOption.hashedOptionName) + { + case CompilerOptionWaveSize: + if (compilerOption.value == CompilerOptionValueWave32) + { + waveSize = ShaderWaveSize::WaveSize32; + } + else if (compilerOption.value == CompilerOptionValueWave64) + { + waveSize = ShaderWaveSize::WaveSize64; + } + break; + default: + VK_ASSERT_ALWAYS_MSG("Unknown GPURT setting! Handle it!"); + } } result = pDevice->CreateInternalComputePipeline(spvBin.codeSize, @@ -968,7 +997,7 @@ Pal::Result RayTracingDevice::ClientCreateInternalComputePipeline( buildInfo.nodeCount, nodes, ShaderModuleInternalRayTracingShader, - forceWave64, + waveSize, &specializationInfo, &pDevice->GetInternalRayTracingPipeline()); diff --git a/icd/api/raytrace/vk_acceleration_structure.h b/icd/api/raytrace/vk_acceleration_structure.h index a245a7e6..7391efd7 100644 --- a/icd/api/raytrace/vk_acceleration_structure.h +++ b/icd/api/raytrace/vk_acceleration_structure.h @@ -39,7 +39,6 @@ class Buffer; class DeferredHostOperation; class Device; struct GeometryConvertHelper; -class VirtualStackFrame; // ===================================================================================================================== // VkAccelerationStructureKHR (VK_KHR_acceleration_structure) diff --git a/icd/api/raytrace/vk_ray_tracing_pipeline.cpp b/icd/api/raytrace/vk_ray_tracing_pipeline.cpp index e75aad8d..cdbb344a 100644 --- a/icd/api/raytrace/vk_ray_tracing_pipeline.cpp +++ b/icd/api/raytrace/vk_ray_tracing_pipeline.cpp @@ -1708,7 +1708,6 @@ VkResult RayTracingPipeline::CreateImpl( pAllocator); } } -#if ICD_GPUOPEN_DEVMODE_BUILD // Temporarily reinject post Pal pipeline creation (when the internal pipeline hash is available). // The reinjection cache layer can be linked back into the pipeline cache chain once the // Vulkan pipeline cache key can be stored (and read back) inside the ELF as metadata. @@ -1739,7 +1738,6 @@ VkResult RayTracingPipeline::CreateImpl( palResult = Util::Result::Success; } } -#endif } result = PalToVkResult(palResult); @@ -1985,7 +1983,6 @@ static int32_t DeferredCreateRayTracingPipelineCallback( pState->pAllocator, pOperation->Workload(index)); -#if ICD_GPUOPEN_DEVMODE_BUILD if (localResult == VK_SUCCESS) { IDevMode* pDevMode = pDevice->VkInstance()->GetDevModeMgr(); @@ -2000,7 +1997,6 @@ static int32_t DeferredCreateRayTracingPipelineCallback( } } } -#endif } if (localResult != VK_SUCCESS) @@ -2244,6 +2240,7 @@ VkResult RayTracingPipeline::GetPipelineExecutableProperties( // ===================================================================================================================== VkResult RayTracingPipeline::GetRayTracingShaderDisassembly( Util::Abi::PipelineSymbolType pipelineSymbolType, + size_t binarySize, const void* pBinaryCode, size_t* pBufferSize, void* pBuffer @@ -2251,7 +2248,8 @@ VkResult RayTracingPipeline::GetRayTracingShaderDisassembly( { // To extract the shader code, we can re-parse the saved ELF binary and lookup the shader's program // instructions by examining the symbol table entry for that shader's entrypoint. - Util::Abi::PipelineAbiReader abiReader(m_pDevice->VkInstance()->Allocator(), pBinaryCode); + Util::Abi::PipelineAbiReader abiReader(m_pDevice->VkInstance()->Allocator(), + Util::Span{pBinaryCode, binarySize}); VkResult result = VK_SUCCESS; Pal::Result palResult = abiReader.Init(); @@ -2264,32 +2262,34 @@ VkResult RayTracingPipeline::GetRayTracingShaderDisassembly( VK_ASSERT((pipelineSymbolType == Util::Abi::PipelineSymbolType::ShaderDisassembly) || (pipelineSymbolType == Util::Abi::PipelineSymbolType::ShaderAmdIl)); - const Util::Elf::SymbolTableEntry* pSymbolEntry = nullptr; const char* pSectionName = nullptr; if (pipelineSymbolType == Util::Abi::PipelineSymbolType::ShaderDisassembly) { - pSymbolEntry = abiReader.GetPipelineSymbol( - Util::Abi::GetSymbolForStage( - Util::Abi::PipelineSymbolType::ShaderDisassembly, - Util::Abi::HardwareStage::Cs)); + palResult = abiReader.CopySymbol( + Util::Abi::GetSymbolForStage( + Util::Abi::PipelineSymbolType::ShaderDisassembly, + Util::Abi::HardwareStage::Cs), + pBufferSize, + pBuffer); + pSectionName = Util::Abi::AmdGpuDisassemblyName; + symbolValid = palResult == Util::Result::Success; } else if (pipelineSymbolType == Util::Abi::PipelineSymbolType::ShaderAmdIl) { - pSymbolEntry = abiReader.GetPipelineSymbol( - Util::Abi::GetSymbolForStage( - Util::Abi::PipelineSymbolType::ShaderAmdIl, - Util::Abi::ApiShaderType::Cs)); + palResult = abiReader.CopySymbol( + Util::Abi::GetSymbolForStage( + Util::Abi::PipelineSymbolType::ShaderAmdIl, + Util::Abi::ApiShaderType::Cs), + pBufferSize, + pBuffer); + pSectionName = Util::Abi::AmdGpuCommentLlvmIrName; + symbolValid = palResult == Util::Result::Success; } - if (pSymbolEntry != nullptr) - { - palResult = abiReader.GetElfReader().CopySymbol(*pSymbolEntry, pBufferSize, pBuffer); - symbolValid = palResult == Util::Result::Success; - } - else if (pSectionName != nullptr) + if ((symbolValid == false) && (pSectionName != nullptr)) { const auto& elfReader = abiReader.GetElfReader(); Util::ElfReader::SectionId disassemblySectionId = elfReader.FindSection(pSectionName); @@ -2406,6 +2406,7 @@ VkResult RayTracingPipeline::GetPipelineExecutableInternalRepresentations( // Get the text based ISA disassembly of the shader VkResult result = GetRayTracingShaderDisassembly( Util::Abi::PipelineSymbolType::ShaderDisassembly, + static_cast(binarySize), pBinaryCode, &(pInternalRepresentations[entry].dataSize), pInternalRepresentations[entry].pData); @@ -2608,9 +2609,7 @@ void RayTracingPipeline::BindToCmdBuffer( uint32_t* pCpuAddr = pPalCmdBuf->CmdAllocateEmbeddedData(dwordSize, 1, &gpuAddress); memcpy(pCpuAddr, m_captureReplayVaMappingBufferInfo.pData, m_captureReplayVaMappingBufferInfo.dataSize); - uint32_t rtCaptureReplayConstBufRegBase = (m_userDataLayout.scheme == PipelineLayoutScheme::Compact) ? - m_userDataLayout.compact.rtCaptureReplayConstBufRegBase : - m_userDataLayout.indirect.rtCaptureReplayConstBufRegBase; + const uint32_t rtCaptureReplayConstBufRegBase = m_userDataLayout.common.rtCaptureReplayConstBufRegBase; pPalCmdBuf->CmdSetUserData(Pal::PipelineBindPoint::Compute, rtCaptureReplayConstBufRegBase, @@ -2802,6 +2801,29 @@ uint32_t RayTracingPipeline::UpdateShaderGroupIndex( return (shader == VK_SHADER_UNUSED_KHR) ? VK_SHADER_UNUSED_KHR : idx; } +// ===================================================================================================================== +uint32_t RayTracingPipeline::PersistentDispatchSize( + uint32_t width, + uint32_t height, + uint32_t depth + ) const +{ + const Pal::DispatchDims dispatchSize = GetDispatchSize({ .x = width, .y = height, .z = depth }); + + // Groups needed to cover the x, y, and z dimension of a persistent dispatch + // For large dispatches, this will be limited by the size of the GPU because we want just enough groups to fill it + // For small dispatches, there will be even fewer groups; don't launch groups that will have nothing to do + const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); + const Pal::DeviceProperties& deviceProp = m_pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties(); + const auto& props = deviceProp.gfxipProperties.shaderCore; + const uint32 rayDispatchMaxGroups = settings.rtPersistentDispatchRaysFactor * + (props.numAvailableCus * props.numSimdsPerCu * props.numWavefrontsPerSimd); + const uint32 persistentDispatchSize = Util::Min(rayDispatchMaxGroups, + (dispatchSize.x * dispatchSize.y * dispatchSize.z)); + + return persistentDispatchSize; +} + // ===================================================================================================================== Pal::DispatchDims RayTracingPipeline::GetDispatchSize( Pal::DispatchDims size) const diff --git a/icd/api/raytrace/vk_ray_tracing_pipeline.h b/icd/api/raytrace/vk_ray_tracing_pipeline.h index 2cf2d6ed..d7148fab 100644 --- a/icd/api/raytrace/vk_ray_tracing_pipeline.h +++ b/icd/api/raytrace/vk_ray_tracing_pipeline.h @@ -296,11 +296,17 @@ class RayTracingPipeline final : public Pipeline, public NonDispatchableVkInstance()->Allocator()), m_userMarkerOpHistory(pCmdBuf->VkInstance()->Allocator()), m_userMarkerStrings(pCmdBuf->VkInstance()->Allocator()) @@ -325,12 +323,10 @@ void SqttCmdBufferState::Begin( m_userMarkerOpHistory.Clear(); m_userMarkerStrings.Clear(); -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevMode != nullptr) { m_instructionTrace.targetHash = m_pDevMode->GetInstructionTraceTargetHash(); } -#endif m_cbId = m_pSqttMgr->GetNextCmdBufID(m_pCmdBuf->GetQueueFamilyIndex(), pBeginInfo); @@ -349,7 +345,6 @@ void SqttCmdBufferState::Begin( // Inserts a CbEnd marker when command buffer building has finished. void SqttCmdBufferState::End() { -#if ICD_GPUOPEN_DEVMODE_BUILD // If instruction tracing was enabled for this Command List, // insert a barrier used to wait for all trace data to finish writing. if (m_instructionTrace.started && m_settings.rgpInstTraceBarrierEnabled) @@ -378,18 +373,15 @@ void SqttCmdBufferState::End() m_pCmdBuf->PalCmdBuffer(DefaultDeviceIndex)->CmdBarrier(barrierInfo); } -#endif WriteCbEndMarker(); -#if ICD_GPUOPEN_DEVMODE_BUILD if ((m_pDevMode != nullptr) && (m_instructionTrace.started)) { m_pDevMode->StopInstructionTrace(m_pCmdBuf); m_instructionTrace.started = false; } -#endif } // ===================================================================================================================== @@ -415,10 +407,25 @@ void SqttCmdBufferState::WriteMarker( ) const { VK_ASSERT(m_enabledMarkers != 0); + + WriteMarker(m_pCmdBuf->PalCmdBuffer(DefaultDeviceIndex), + pData, + dataSize, + subQueueFlags); +} + +// ===================================================================================================================== +void SqttCmdBufferState::WriteMarker( + Pal::ICmdBuffer* pPalCmdBuffer, + const void* pData, + size_t dataSize, + Pal::RgpMarkerSubQueueFlags subQueueFlags + ) +{ VK_ASSERT((dataSize % sizeof(uint32_t)) == 0); VK_ASSERT((dataSize / sizeof(uint32_t)) > 0); - m_pCmdBuf->PalCmdBuffer(DefaultDeviceIndex)->CmdInsertRgpTraceMarker( + pPalCmdBuffer->CmdInsertRgpTraceMarker( subQueueFlags, static_cast(dataSize / sizeof(uint32_t)), pData); @@ -554,6 +561,28 @@ void SqttCmdBufferState::WriteUserEventMarker( } } +// ===================================================================================================================== +// Insert an event marker for a PAL internal event such as a dispatch initiated from PAL. +void SqttCmdBufferState::WritePalInternalEventMarker( + Pal::ICmdBuffer* pPalCmdBuffer, + Pal::DispatchInfoFlags infoFlags, + Pal::RgpMarkerSubQueueFlags subQueueFlags) +{ + RgpSqttMarkerEventType apiType = RgpSqttMarkerEventType::CmdUnknown; + + if (infoFlags.devDriverOverlay) + { + apiType = RgpSqttMarkerEventType::CmdDispatchDevDriverOverlay; + } + + RgpSqttMarkerEvent marker = {}; + + marker.identifier = RgpSqttMarkerIdentifierEvent; + marker.apiType = static_cast(apiType); + + WriteMarker(pPalCmdBuffer, &marker, sizeof(marker), subQueueFlags); +} + // ==================================================================================================================== void SqttCmdBufferState::RgdAnnotateCmdBuf() { @@ -1055,7 +1084,6 @@ void SqttCmdBufferState::PipelineBound( { const Pipeline* pPipeline = Pipeline::BaseObjectFromHandle(pipeline); -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevMode != nullptr) { if ((m_instructionTrace.started == false) && @@ -1066,7 +1094,6 @@ void SqttCmdBufferState::PipelineBound( m_instructionTrace.started = true; } } -#endif } } @@ -1143,27 +1170,33 @@ void SqttCmdBufferState::DebugMarkerInsert( void SqttCmdBufferState::DebugLabelBegin( const VkDebugUtilsLabelEXT* pMarkerInfo) { - DevUserMarkerString userMarkerString = {}; - userMarkerString.length = static_cast(strlen(pMarkerInfo->pLabelName)) + 1; - Util::Strncpy(userMarkerString.string, pMarkerInfo->pLabelName, sizeof(userMarkerString.string)); - m_userMarkerStrings.PushBack(userMarkerString); + if (m_pDevMode->IsCrashAnalysisEnabled() == false) + { + DevUserMarkerString userMarkerString = {}; + userMarkerString.length = static_cast(strlen(pMarkerInfo->pLabelName)) + 1; + Util::Strncpy(userMarkerString.string, pMarkerInfo->pLabelName, sizeof(userMarkerString.string)); + m_userMarkerStrings.PushBack(userMarkerString); - Pal::Developer::UserMarkerOpInfo opInfo = {}; - opInfo.opType = static_cast(Pal::Developer::UserMarkerOpType::Push); - opInfo.strIndex = static_cast(m_userMarkerStrings.size()); - m_userMarkerOpHistory.PushBack(opInfo.u32All); + Pal::Developer::UserMarkerOpInfo opInfo = {}; + opInfo.opType = static_cast(Pal::Developer::UserMarkerOpType::Push); + opInfo.strIndex = static_cast(m_userMarkerStrings.size()); + m_userMarkerOpHistory.PushBack(opInfo.u32All); - WriteUserEventMarker(RgpSqttMarkerUserEventPush, pMarkerInfo->pLabelName); + WriteUserEventMarker(RgpSqttMarkerUserEventPush, pMarkerInfo->pLabelName); + } } // ===================================================================================================================== void SqttCmdBufferState::DebugLabelEnd() { - Pal::Developer::UserMarkerOpInfo opInfo = {}; - opInfo.opType = static_cast(Pal::Developer::UserMarkerOpType::Pop); - m_userMarkerOpHistory.PushBack(opInfo.u32All); + if (m_pDevMode->IsCrashAnalysisEnabled() == false) + { + Pal::Developer::UserMarkerOpInfo opInfo = {}; + opInfo.opType = static_cast(Pal::Developer::UserMarkerOpType::Pop); + m_userMarkerOpHistory.PushBack(opInfo.u32All); - WriteUserEventMarker(RgpSqttMarkerUserEventPop, nullptr); + WriteUserEventMarker(RgpSqttMarkerUserEventPop, nullptr); + } } // ===================================================================================================================== @@ -2347,9 +2380,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateGraphicsPipelines( } } -#if ICD_GPUOPEN_DEVMODE_BUILD pDevMode->PipelineCreated(pDevice, pPipeline); -#endif } } } @@ -2395,9 +2426,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateComputePipelines( pCreateInfos[i].stage.module; } -#if ICD_GPUOPEN_DEVMODE_BUILD pDevMode->PipelineCreated(pDevice, pPipeline); -#endif } } } @@ -2449,7 +2478,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateRayTracingPipelinesKHR( } } -#if ICD_GPUOPEN_DEVMODE_BUILD if (result != VK_OPERATION_DEFERRED_KHR) { pDevMode->PipelineCreated(pDevice, pPipeline); @@ -2459,7 +2487,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateRayTracingPipelinesKHR( pDevMode->ShaderLibrariesCreated(pDevice, pPipeline); } } -#endif } } } @@ -2621,7 +2648,6 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyPipeline( SqttMgr* pSqtt = pDevice->GetSqttMgr(); IDevMode* pDevMode = pDevice->VkInstance()->GetDevModeMgr(); -#if ICD_GPUOPEN_DEVMODE_BUILD if (pDevice->GetRuntimeSettings().devModeShaderIsaDbEnable && (pDevMode != nullptr)) { if (VK_NULL_HANDLE != pipeline) @@ -2643,7 +2669,6 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyPipeline( #endif } } -#endif return SQTT_CALL_NEXT_LAYER(vkDestroyPipeline)(device, pipeline, pAllocator); } @@ -2799,7 +2824,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkSetDebugUtilsObjectTagEXT( return SQTT_CALL_NEXT_LAYER(vkSetDebugUtilsObjectTagEXT)(device, pTagInfo); } -#if ICD_GPUOPEN_DEVMODE_BUILD // ===================================================================================================================== // This function looks for specific tags in a submit's command buffers to identify when to force an RGP trace start // rather than during it during vkQueuePresent(). This is done for applications that explicitly do not make present @@ -2871,7 +2895,6 @@ static void CheckRGPFrameEnd( } } } -#endif // ===================================================================================================================== VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( @@ -2884,11 +2907,9 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( SqttMgr* pSqtt = pQueue->VkDevice()->GetSqttMgr(); IDevMode* pDevMode = pQueue->VkDevice()->VkInstance()->GetDevModeMgr(); -#if ICD_GPUOPEN_DEVMODE_BUILD pDevMode->NotifyPreSubmit(); CheckRGPFrameBegin(pQueue, pDevMode, submitCount, pSubmits); -#endif if (pDevMode->IsTraceRunning()) { @@ -2935,9 +2956,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( VkResult result = SQTT_CALL_NEXT_LAYER(vkQueueSubmit)(queue, submitCount, pSubmits, fence); -#if ICD_GPUOPEN_DEVMODE_BUILD CheckRGPFrameEnd(pQueue, pDevMode, submitCount, pSubmits); -#endif return result; } diff --git a/icd/api/sqtt/sqtt_layer.h b/icd/api/sqtt/sqtt_layer.h index e105e193..0378ab5a 100644 --- a/icd/api/sqtt/sqtt_layer.h +++ b/icd/api/sqtt/sqtt_layer.h @@ -187,6 +187,11 @@ class SqttCmdBufferState void WriteUserEventMarker(RgpSqttMarkerUserEventType eventType, const char* pString) const; + static void WritePalInternalEventMarker( + Pal::ICmdBuffer* pPalCmdBuffer, + Pal::DispatchInfoFlags infoFlags, + Pal::RgpMarkerSubQueueFlags subQueueFlags); + void AddDebugTag(uint64_t tag); bool HasDebugTag(uint64_t tag) const; @@ -200,6 +205,11 @@ class SqttCmdBufferState void WriteCbEndMarker() const; void WritePipelineBindMarker(const Pal::Developer::BindPipelineData& data) const; void WriteMarker(const void* pData, size_t dataSize, Pal::RgpMarkerSubQueueFlags subQueueFlags) const; + static void WriteMarker( + Pal::ICmdBuffer* pPalCmdBuffer, + const void* pData, + size_t dataSize, + Pal::RgpMarkerSubQueueFlags subQueueFlags); void WriteBeginGeneralApiMarker(RgpSqttMarkerGeneralApiType apiType) const; void WriteEndGeneralApiMarker(RgpSqttMarkerGeneralApiType apiType) const; void WriteBarrierStartMarker(const Pal::Developer::BarrierData& data) const; @@ -233,14 +243,12 @@ class SqttCmdBufferState RgpSqttMarkerEventType m_currentEventType; // Current API type for pre-draw/dispatch event markers uint32_t m_enabledMarkers; -#if ICD_GPUOPEN_DEVMODE_BUILD struct { bool started; // True if a pipeline is currently being traced uint64_t targetHash; // Determines target pipeline used to trigger instruction tracing VkPipelineBindPoint bindPoint; // Bind point of the target pipeline } m_instructionTrace; -#endif RgpSqttMarkerUserEventWithString* m_pUserEvent; diff --git a/icd/api/sqtt/sqtt_mgr.cpp b/icd/api/sqtt/sqtt_mgr.cpp index 02005042..cb6b8e29 100644 --- a/icd/api/sqtt/sqtt_mgr.cpp +++ b/icd/api/sqtt/sqtt_mgr.cpp @@ -199,6 +199,17 @@ void SqttMgr::PalDeveloperCallback( pSqtt->PalDrawDispatchCallback(drawDispatch); } } + else if (drawDispatch.dispatch.infoFlags.u32All != 0) + { + // Handle a dispatch initiated by PAL. + // For this this dispatch we have a command buffer created by PAL + // so we depend on the PAL-provided context and information + // to instrument the dispatch. + SqttCmdBufferState::WritePalInternalEventMarker( + drawDispatch.pCmdBuffer, + drawDispatch.dispatch.infoFlags, + drawDispatch.subQueueFlags); + } } break; diff --git a/icd/api/sqtt/sqtt_rgp_annotations.h b/icd/api/sqtt/sqtt_rgp_annotations.h index 0c3cf3a2..e2d3083f 100644 --- a/icd/api/sqtt/sqtt_rgp_annotations.h +++ b/icd/api/sqtt/sqtt_rgp_annotations.h @@ -219,6 +219,7 @@ enum class RgpSqttMarkerEventType : uint32_t CmdDrawMeshTasksIndirectEXT = 43, // vkCmdDrawMeshTasksIndirectEXT CmdDrawIndirectCount = 44, // vkCmdDrawIndirectCount CmdDrawIndexedIndirectCount = 45, // vkCmdDrawIndexedIndirectCount + CmdDispatchDevDriverOverlay = 46, // DevDriverOverlay dispatch #if VKI_RAY_TRACING ShaderIndirectModeMask = 0x800000, // Used to mark whether the shader is compiled in indirect mode or not // This mask can only be used with CmdTraceRaysKHR and CmdTraceRaysIndirectKHR diff --git a/icd/api/strings/extensions.txt b/icd/api/strings/extensions.txt index 50f32292..21745df1 100644 --- a/icd/api/strings/extensions.txt +++ b/icd/api/strings/extensions.txt @@ -229,3 +229,4 @@ VK_EXT_depth_bias_control VK_MESA_image_alignment_control VK_EXT_pipeline_protected_access VK_EXT_pipeline_robustness +VK_EXT_shader_replicated_composites diff --git a/icd/api/strings/strings.h b/icd/api/strings/strings.h index 230de578..fd3bfb0d 100644 --- a/icd/api/strings/strings.h +++ b/icd/api/strings/strings.h @@ -42,6 +42,7 @@ namespace vk namespace strings { + namespace entry { #include "strings/g_entry_points_decl.h" diff --git a/icd/api/vk_alloccb.cpp b/icd/api/vk_alloccb.cpp index c652b750..4225a3ff 100644 --- a/icd/api/vk_alloccb.cpp +++ b/icd/api/vk_alloccb.cpp @@ -189,7 +189,7 @@ void PAL_STDCALL PalFreeFuncDelegator( PalAllocator::PalAllocator( VkAllocationCallbacks* pCallbacks) : -#if PAL_MEMTRACK +#if VKI_MEMTRACK m_memTrackerAlloc(pCallbacks), m_memTracker(&m_memTrackerAlloc), #endif @@ -200,7 +200,7 @@ PalAllocator::PalAllocator( // ===================================================================================================================== void PalAllocator::Init() { -#if PAL_MEMTRACK +#if VKI_MEMTRACK m_memTracker.Init(); #endif } @@ -211,7 +211,7 @@ void* PalAllocator::Alloc( { void* pMem = nullptr; -#if PAL_MEMTRACK +#if VKI_MEMTRACK pMem = m_memTracker.Alloc(allocInfo); #else pMem = allocator::PalAllocFuncDelegator( @@ -234,7 +234,7 @@ void PalAllocator::Free( { if (freeInfo.pClientMem != nullptr) { -#if PAL_MEMTRACK +#if VKI_MEMTRACK m_memTracker.Free(freeInfo); #else allocator::PalFreeFuncDelegator(m_pCallbacks, freeInfo.pClientMem); @@ -242,7 +242,7 @@ void PalAllocator::Free( } } -#if PAL_MEMTRACK +#if VKI_MEMTRACK // ===================================================================================================================== void PalAllocator::MemTrackerAllocator::Free( const Util::FreeInfo& freeInfo) diff --git a/icd/api/vk_cmd_pool.cpp b/icd/api/vk_cmd_pool.cpp index 44643bcc..70c2af88 100644 --- a/icd/api/vk_cmd_pool.cpp +++ b/icd/api/vk_cmd_pool.cpp @@ -40,9 +40,7 @@ #include "palIntrusiveListImpl.h" #include "palVectorImpl.h" -#if ICD_GPUOPEN_DEVMODE_BUILD #include "devmode/devmode_mgr.h" -#endif namespace vk { diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp index 4f223b9d..ea7b5db2 100644 --- a/icd/api/vk_cmdbuffer.cpp +++ b/icd/api/vk_cmdbuffer.cpp @@ -71,9 +71,7 @@ #include -#if ICD_GPUOPEN_DEVMODE_BUILD #include "devmode/devmode_mgr.h" -#endif namespace vk { @@ -358,6 +356,10 @@ Pal::Result CreateClearSubresRanges( { subresRange.startSubres.plane = 1; } + else if (clearInfo.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) + { + subresRange.startSubres.plane = 0; + } else { hasPlaneDepthAndStencil = (clearInfo.aspectMask == @@ -814,7 +816,6 @@ VkResult CmdBuffer::Initialize( VK_ASSERT(palSize == pPalDevice->GetCmdBufferSize(groupCreateInfo, &result)); VK_ASSERT(result == Pal::Result::Success); } - } if (result == Pal::Result::Success) @@ -1267,7 +1268,7 @@ void CmdBuffer::PalCmdDispatch( utils::IterateMask deviceGroup(m_curDeviceMask); do { - PalCmdBuffer(deviceGroup.Index())->CmdDispatch({ x, y, z }); + PalCmdBuffer(deviceGroup.Index())->CmdDispatch({ x, y, z }, {}); } while (deviceGroup.IterateNext()); } @@ -2453,7 +2454,6 @@ void CmdBuffer::ReleaseResources() m_pStackAllocator = nullptr; } - } // ===================================================================================================================== @@ -3081,70 +3081,60 @@ void CmdBuffer::BindVertexBuffers( { if (bindingCount > 0) { + VK_ASSERT((firstBinding + bindingCount) <= VK_ARRAY_SIZE(PerGpuRenderState::vbBindings)); DbgBarrierPreCmd(DbgBarrierBindIndexVertexBuffer); - constexpr uint32_t MaxLowBindings = VK_ARRAY_SIZE(PerGpuRenderState::vbBindings); - - const uint32_t lowBindingCount = - (firstBinding < MaxLowBindings) ? Util::Min(bindingCount, MaxLowBindings - firstBinding) : 0u; - utils::IterateMask deviceGroup(GetDeviceMask()); do { const uint32_t deviceIdx = deviceGroup.Index(); - if (lowBindingCount > 0) - { - Pal::BufferViewInfo* const pBinding = &PerGpuState(deviceIdx)->vbBindings[firstBinding]; + Pal::BufferViewInfo* const pBinding = &PerGpuState(deviceIdx)->vbBindings[firstBinding]; - BindVertexBuffersUpdateBindingRange( - deviceIdx, - pBinding, - pBinding + lowBindingCount, - 0, - pBuffers, - pOffsets, - pSizes, - pStrides); + BindVertexBuffersUpdateBindingRange( + deviceIdx, + pBinding, + pBinding + bindingCount, + 0, + pBuffers, + pOffsets, + pSizes, + pStrides); - if (m_flags.offsetMode) + if (m_flags.offsetMode) + { + Pal::VertexBufferView vertexViews[Pal::MaxVertexBuffers] = {}; + for (uint32_t idx = 0; idx < bindingCount; idx++) { - Pal::VertexBufferView vertexViews[Pal::MaxVertexBuffers] = {}; - for (uint32_t idx = 0; idx < lowBindingCount; idx++) - { - vertexViews[idx].gpuva = pBinding[idx].gpuAddr; - vertexViews[idx].sizeInBytes = pBinding[idx].range; - vertexViews[idx].strideInBytes = pBinding[idx].stride; - } - - const Pal::VertexBufferViews bufferViews = - { - .firstBuffer = firstBinding, - .bufferCount = lowBindingCount, - .offsetMode = true, - .pVertexBufferViews = vertexViews - }; - PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); + vertexViews[idx].gpuva = pBinding[idx].gpuAddr; + vertexViews[idx].sizeInBytes = pBinding[idx].range; + vertexViews[idx].strideInBytes = pBinding[idx].stride; } - else + + const Pal::VertexBufferViews bufferViews = { - const Pal::VertexBufferViews bufferViews = - { - .firstBuffer = firstBinding, - .bufferCount = lowBindingCount, - .offsetMode = false, - .pBufferViewInfos = pBinding - }; - PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); - } + .firstBuffer = firstBinding, + .bufferCount = bindingCount, + .offsetMode = true, + .pVertexBufferViews = vertexViews + }; + PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); + } + else + { + const Pal::VertexBufferViews bufferViews = + { + .firstBuffer = firstBinding, + .bufferCount = bindingCount, + .offsetMode = false, + .pBufferViewInfos = pBinding + }; + PalCmdBuffer(deviceIdx)->CmdSetVertexBuffers(bufferViews); } - } while (deviceGroup.IterateNext()); - m_vbWatermark = Util::Max( - m_vbWatermark, - Util::Min(firstBinding + bindingCount, MaxLowBindings)); + m_vbWatermark = Util::Max(m_vbWatermark, firstBinding + bindingCount); DbgBarrierPostCmd(DbgBarrierBindIndexVertexBuffer); } @@ -3822,7 +3812,8 @@ void CmdBuffer::ClearColorImage( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRanges = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(*pRanges)), MaxPalColorAspectsPerMask); + const auto maxRanges = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::SubresRange)), + MaxPalColorAspectsPerMask); auto rangeBatch = Util::Min(rangeCount * MaxPalColorAspectsPerMask, maxRanges); // Allocate space to store image subresource ranges @@ -3890,7 +3881,8 @@ void CmdBuffer::ClearDepthStencilImage( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRanges = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(*pRanges)), MaxPalDepthAspectsPerMask); + const auto maxRanges = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::SubresRange)), + MaxPalDepthAspectsPerMask); auto rangeBatch = Util::Min(rangeCount * MaxPalDepthAspectsPerMask, maxRanges); // Allocate space to store image subresource ranges (we need a separate region per PAL aspect) @@ -3992,7 +3984,7 @@ void CmdBuffer::ClearDynamicRenderingImages( // Note: Bound target clears are pipelined by the HW, so we do not have to insert any barriers VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(*pRects)); + constexpr uint32 MinRects = 8; for (uint32_t idx = 0; idx < attachmentCount; ++idx) { @@ -4012,9 +4004,12 @@ void CmdBuffer::ClearDynamicRenderingImages( const Pal::SwizzledFormat palFormat = VkToPalFormat(attachment.attachmentFormat, m_pDevice->GetRuntimeSettings()); - Util::Vector clearBoxes{ &virtStackFrame }; - Util::Vector clearSubresRanges{ &virtStackFrame }; + Util::Vector clearBoxes { &virtStackFrame }; + Util::Vector clearSubresRanges{ &virtStackFrame }; + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::Box) + + sizeof(Pal::SubresRange)), + MinRects); auto rectBatch = Util::Min(rectCount, maxRects); const auto palResult1 = clearBoxes.Reserve(rectBatch); const auto palResult2 = clearSubresRanges.Reserve(rectBatch); @@ -4091,10 +4086,13 @@ void CmdBuffer::ClearDynamicRenderingImages( // Clear only if the referenced attachment index is active if (pDepthStencilView != nullptr) { - Util::Vector clearRects{ &virtStackFrame }; - Util::Vector clearSubresRanges{ &virtStackFrame }; + Util::Vector clearRects { &virtStackFrame }; + Util::Vector clearSubresRanges{ &virtStackFrame }; - auto rectBatch = Util::Min((rectCount * MaxPalDepthAspectsPerMask), maxRects); + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::Rect) + + sizeof(Pal::SubresRange)), + MinRects); + auto rectBatch = Util::Min((rectCount * MaxPalDepthAspectsPerMask), maxRects); const auto palResult1 = clearRects.Reserve(rectBatch); const auto palResult2 = clearSubresRanges.Reserve(rectBatch); @@ -4157,10 +4155,14 @@ void CmdBuffer::ClearDynamicRenderingBoundAttachments( // Note: Bound target clears are pipelined by the HW, so we do not have to insert any barriers VirtualStackFrame virtStackFrame(m_pStackAllocator); - Util::Vector clearRegions{ &virtStackFrame }; - Util::Vector colorTargets{ &virtStackFrame }; + constexpr uint32 MinRects = 8; + + Util::Vector clearRegions{ &virtStackFrame }; + Util::Vector colorTargets{ &virtStackFrame }; - const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(*pRects)); + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ClearBoundTargetRegion) + + sizeof(Pal::BoundColorTarget)), + MinRects); auto rectBatch = Util::Min(rectCount, maxRects); const auto palResult1 = clearRegions.Reserve(rectBatch); const auto palResult2 = colorTargets.Reserve(attachmentCount); @@ -4287,10 +4289,14 @@ void CmdBuffer::ClearBoundAttachments( const RenderPass* pRenderPass = m_allGpuState.pRenderPass; const uint32_t subpass = m_renderPassInstance.subpass; - Util::Vector clearRegions { &virtStackFrame }; - Util::Vector colorTargets { &virtStackFrame }; + constexpr uint32 MinRects = 8; - const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(*pRects)); + Util::Vector clearRegions { &virtStackFrame }; + Util::Vector colorTargets { &virtStackFrame }; + + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ClearBoundTargetRegion) + + sizeof(Pal::BoundColorTarget)), + MinRects); auto rectBatch = Util::Min(rectCount, maxRects); const auto palResult1 = clearRegions.Reserve(rectBatch); const auto palResult2 = colorTargets.Reserve(attachmentCount); @@ -4550,10 +4556,11 @@ void CmdBuffer::ClearImageAttachments( { VirtualStackFrame virtStackFrame(m_pStackAllocator); + constexpr uint32 MinRects = 8; + // Get the current renderpass and subpass const RenderPass* pRenderPass = m_allGpuState.pRenderPass; const uint32_t subpass = m_renderPassInstance.subpass; - const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(*pRects)); // Go through each of the clear attachment infos for (uint32_t idx = 0; idx < attachmentCount; ++idx) @@ -4581,9 +4588,12 @@ void CmdBuffer::ClearImageAttachments( // Get the layout that this color attachment is currently in within the render pass const Pal::ImageLayout targetLayout = RPGetAttachmentLayout(attachmentIdx, 0); - Util::Vector clearBoxes { &virtStackFrame }; - Util::Vector clearSubresRanges { &virtStackFrame }; + Util::Vector clearBoxes { &virtStackFrame }; + Util::Vector clearSubresRanges { &virtStackFrame }; + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::Box) + + sizeof(Pal::SubresRange)), + MinRects); auto rectBatch = Util::Min(rectCount, maxRects); const auto palResult1 = clearBoxes.Reserve(rectBatch); const auto palResult2 = clearSubresRanges.Reserve(rectBatch); @@ -4652,9 +4662,12 @@ void CmdBuffer::ClearImageAttachments( const Pal::ImageLayout depthLayout = RPGetAttachmentLayout(attachmentIdx, 0); const Pal::ImageLayout stencilLayout = RPGetAttachmentLayout(attachmentIdx, 1); - Util::Vector clearRects { &virtStackFrame }; - Util::Vector clearSubresRanges { &virtStackFrame }; + Util::Vector clearRects { &virtStackFrame }; + Util::Vector clearSubresRanges { &virtStackFrame }; + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::Rect) + + sizeof(Pal::SubresRange)), + MinRects); auto rectBatch = Util::Min(rectCount, maxRects); const auto palResult1 = clearRects.Reserve(rectBatch); const auto palResult2 = clearSubresRanges.Reserve(rectBatch); @@ -4716,7 +4729,8 @@ void CmdBuffer::ResolveImage( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(*pRects)), MaxRangePerAttachment); + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ImageResolveRegion)), + MaxRangePerAttachment); auto rectBatch = Util::Min(rectCount * MaxRangePerAttachment, maxRects); // Allocate space to store image resolve regions (we need a separate region per PAL aspect) @@ -4807,17 +4821,17 @@ void CmdBuffer::SetEvent2( if (m_flags.useSplitReleaseAcquire) { - ExecuteAcquireRelease(1, - &event, - pDependencyInfo, - Release, - RgpBarrierExternalCmdWaitEvents); + ExecuteAcquireRelease2(1, + &event, + pDependencyInfo, + Release, + RgpBarrierExternalCmdWaitEvents); } else { PipelineStageFlags stageMask = 0; - for(uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) + for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) { stageMask |= pDependencyInfo->pMemoryBarriers[i].srcStageMask; } @@ -5809,47 +5823,87 @@ void CmdBuffer::WaitEvents( { DbgBarrierPreCmd(DbgBarrierPipelineBarrierWaitEvents); - VirtualStackFrame virtStackFrame(m_pStackAllocator); + if (m_flags.useSplitReleaseAcquire) + { + uint32_t eventRangeCount = 0; - // Allocate space to store signaled event pointers (automatically rewound on unscope) - const Pal::IGpuEvent** ppGpuEvents = virtStackFrame.AllocArray(NumDeviceEvents(eventCount)); + for (uint32_t i = 0; i < eventCount; i += eventRangeCount) + { + eventRangeCount = 1; - if (ppGpuEvents != nullptr) + bool usesToken = Event::ObjectFromHandle(pEvents[i])->IsUseToken(); + + for (uint32_t j = i + 1; j < eventCount; j++) + { + if (Event::ObjectFromHandle(pEvents[j])->IsUseToken() == usesToken) + { + eventRangeCount++; + } + else + { + break; + } + } + + ExecuteAcquireRelease(eventRangeCount, + pEvents + i, + srcStageMask, + dstStageMask, + memoryBarrierCount, + pMemoryBarriers, + bufferMemoryBarrierCount, + pBufferMemoryBarriers, + imageMemoryBarrierCount, + pImageMemoryBarriers, + Acquire, + RgpBarrierExternalCmdWaitEvents); + } + } + else { - const uint32_t multiDeviceStride = eventCount; + VirtualStackFrame virtStackFrame(m_pStackAllocator); - for (uint32_t i = 0; i < eventCount; ++i) + // Allocate space to store signaled event pointers (automatically rewound on unscope) + const Pal::IGpuEvent** ppGpuEvents = + virtStackFrame.AllocArray(NumDeviceEvents(eventCount)); + + if (ppGpuEvents != nullptr) { - const Event* pEvent = Event::ObjectFromHandle(pEvents[i]); + const uint32_t multiDeviceStride = eventCount; - InsertDeviceEvents(ppGpuEvents, pEvent, i, multiDeviceStride); - } + for (uint32_t i = 0; i < eventCount; ++i) + { + const Event* pEvent = Event::ObjectFromHandle(pEvents[i]); - Pal::BarrierInfo barrier = {}; + InsertDeviceEvents(ppGpuEvents, pEvent, i, multiDeviceStride); + } - // Tell PAL to wait at a specific point until the given set of GpuEvent objects is signaled. - // We intentionally ignore the source stage flags (srcStagemask) as they are irrelevant in the - // presence of event objects + Pal::BarrierInfo barrier = {}; - barrier.reason = RgpBarrierExternalCmdWaitEvents; - barrier.waitPoint = VkToPalWaitPipePoint(dstStageMask); - barrier.gpuEventWaitCount = eventCount; - barrier.ppGpuEvents = ppGpuEvents; + // Tell PAL to wait at a specific point until the given set of GpuEvent objects is signaled. + // We intentionally ignore the source stage flags (srcStagemask) as they are irrelevant in the + // presence of event objects - ExecuteBarriers(&virtStackFrame, - memoryBarrierCount, - pMemoryBarriers, - bufferMemoryBarrierCount, - pBufferMemoryBarriers, - imageMemoryBarrierCount, - pImageMemoryBarriers, - &barrier); + barrier.reason = RgpBarrierExternalCmdWaitEvents; + barrier.waitPoint = VkToPalWaitPipePoint(dstStageMask); + barrier.gpuEventWaitCount = eventCount; + barrier.ppGpuEvents = ppGpuEvents; - virtStackFrame.FreeArray(ppGpuEvents); - } - else - { - m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; + ExecuteBarriers(&virtStackFrame, + memoryBarrierCount, + pMemoryBarriers, + bufferMemoryBarrierCount, + pBufferMemoryBarriers, + imageMemoryBarrierCount, + pImageMemoryBarriers, + &barrier); + + virtStackFrame.FreeArray(ppGpuEvents); + } + else + { + m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; + } } DbgBarrierPostCmd(DbgBarrierPipelineBarrierWaitEvents); @@ -5877,40 +5931,25 @@ void CmdBuffer::WaitEvents2( { eventRangeCount = 1; - if (Event::ObjectFromHandle(pEvents[i])->IsUseToken()) + bool usesToken = Event::ObjectFromHandle(pEvents[i])->IsUseToken(); + + for (uint32_t j = i + 1; j < eventCount; j++) { - for (uint32_t j = i + 1; j < eventCount; j++) + if (Event::ObjectFromHandle(pEvents[j])->IsUseToken() == usesToken) { - if (Event::ObjectFromHandle(pEvents[j])->IsUseToken()) - { - eventRangeCount++; - } - else - { - break; - } + eventRangeCount++; } - } - else - { - for (uint32_t j = i + 1; j < eventCount; j++) + else { - if (Event::ObjectFromHandle(pEvents[j])->IsUseToken()) - { - break; - } - else - { - eventRangeCount++; - } + break; } } - ExecuteAcquireRelease(eventRangeCount, - pEvents + i, - pDependencyInfos + i, - Acquire, - RgpBarrierExternalCmdWaitEvents); + ExecuteAcquireRelease2(eventRangeCount, + pEvents + i, + pDependencyInfos + i, + Acquire, + RgpBarrierExternalCmdWaitEvents); } } else @@ -6058,12 +6097,106 @@ void CmdBuffer::WaitEventsSync2ToSync1( } // ===================================================================================================================== -// Based on Dependency Info, execute Acquire or Release according to the mode. -void CmdBuffer::ExecuteAcquireRelease( +// Helper function called from ExecuteAcquireRelease* to route barrier calls based on AcquireReleaseMode +void CmdBuffer::FlushAcquireReleaseBarriers( + Pal::AcquireReleaseInfo* pAcquireReleaseInfo, + uint32_t eventCount, + const VkEvent* pEvents, + Pal::MemBarrier* const pBufferBarriers, + const Buffer** const ppBuffers, + Pal::ImgBarrier* const pImageBarriers, + const Image** const ppImages, + VirtualStackFrame* pVirtStackFrame, + const AcquireReleaseMode acquireReleaseMode, + uint32_t deviceMask) +{ + if (acquireReleaseMode == Release) + { + pAcquireReleaseInfo->dstGlobalStageMask = 0; + pAcquireReleaseInfo->dstGlobalAccessMask = 0; + + // If memoryBarrierCount is 0, set srcStageMask to Pal::PipelineStageTopOfPipe. + if (pAcquireReleaseInfo->srcGlobalStageMask == 0) + { + pAcquireReleaseInfo->srcGlobalStageMask |= Pal::PipelineStageTopOfPipe; + } + + for (uint32 i = 0; i < pAcquireReleaseInfo->memoryBarrierCount; i++) + { + pBufferBarriers[i].dstStageMask = 0; + pBufferBarriers[i].dstAccessMask = 0; + } + + for (uint32 i = 0; i < pAcquireReleaseInfo->imageBarrierCount; i++) + { + pImageBarriers[i].dstStageMask = 0; + pImageBarriers[i].dstAccessMask = 0; + } + + // The only possibility we are here would be as a result of vkCmdSetEvent2 in which case eventCount must be 1 + VK_ASSERT(eventCount == 1); + + PalCmdRelease( + pAcquireReleaseInfo, + pEvents[0], + pBufferBarriers, + ppBuffers, + pImageBarriers, + ppImages, + deviceMask); + } + else if (acquireReleaseMode == Acquire) + { + pAcquireReleaseInfo->srcGlobalStageMask = 0; + pAcquireReleaseInfo->srcGlobalAccessMask = 0; + + for (uint32 i = 0; i < pAcquireReleaseInfo->memoryBarrierCount; i++) + { + pBufferBarriers[i].srcStageMask = 0; + pBufferBarriers[i].srcAccessMask = 0; + } + + for (uint32 i = 0; i < pAcquireReleaseInfo->imageBarrierCount; i++) + { + pImageBarriers[i].srcStageMask = 0; + pImageBarriers[i].srcAccessMask = 0; + } + + // The only possibility we are here would be as a result of vkCmdWaitEvents* in which case eventCount + // must be non-zero + VK_ASSERT(eventCount != 0); + + PalCmdAcquire( + pAcquireReleaseInfo, + eventCount, + pEvents, + pBufferBarriers, + ppBuffers, + pImageBarriers, + ppImages, + pVirtStackFrame, + deviceMask); + } + else + { + PalCmdReleaseThenAcquire( + pAcquireReleaseInfo, + pBufferBarriers, + ppBuffers, + pImageBarriers, + ppImages, + deviceMask); + } +} + +// ===================================================================================================================== +// Based on Dependency Info, execute Acquire or Release according to the mode. This funtion handles the +// VK_KHR_synchronization2 barrier API calls +void CmdBuffer::ExecuteAcquireRelease2( uint32_t dependencyCount, const VkEvent* pEvents, const VkDependencyInfoKHR* pDependencyInfos, - AcquireReleaseMode acquireReleaseMode, + const AcquireReleaseMode acquireReleaseMode, uint32_t rgpBarrierReasonType) { VK_ASSERT((acquireReleaseMode == ReleaseThenAcquire) || (pEvents != nullptr)); @@ -6138,7 +6271,7 @@ void CmdBuffer::ExecuteAcquireRelease( uint32_t bufferMemoryBarrierCount = pThisDependencyInfo->bufferMemoryBarrierCount; uint32_t imageMemoryBarrierCount = pThisDependencyInfo->imageMemoryBarrierCount; - while ((memoryBarrierIdx < memBarrierCount) || + while ((memoryBarrierIdx < memBarrierCount) || (bufferMemoryBarrierIdx < bufferMemoryBarrierCount) || (imageMemoryBarrierIdx < imageMemoryBarrierCount)) { @@ -6213,8 +6346,8 @@ void CmdBuffer::ExecuteAcquireRelease( // Accounting for the max sub ranges, if we do not have enough space left for another image, // break from this loop. The info for remaining barriers will be passed to PAL in subsequent calls. while (((MaxPalAspectsPerMask + acquireReleaseInfo.imageBarrierCount) < maxImageBarrierCount) && - (locationIndex < maxLocationCount) && - (imageMemoryBarrierIdx < imageMemoryBarrierCount)) + (locationIndex < maxLocationCount) && + (imageMemoryBarrierIdx < imageMemoryBarrierCount)) { Pal::BarrierTransition tempTransition = {}; @@ -6317,7 +6450,7 @@ void CmdBuffer::ExecuteAcquireRelease( else if (pLocations != nullptr) // Could be null due to an OOM error { VK_ASSERT(static_cast(pSampleLocationsInfoEXT->sType) == - VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT); + VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT); VK_ASSERT(pImage->IsSampleLocationsCompatibleDepth()); ConvertToPalMsaaQuadSamplePattern(pSampleLocationsInfoEXT, &pLocations[locationIndex]); @@ -6340,75 +6473,17 @@ void CmdBuffer::ExecuteAcquireRelease( imageMemoryBarrierIdx++; } - if (acquireReleaseMode == Release) - { - acquireReleaseInfo.dstGlobalStageMask = 0; - acquireReleaseInfo.dstGlobalAccessMask = 0; - - // If memoryBarrierCount is 0, set srcStageMask to Pal::PipelineStageTopOfPipe. - if (acquireReleaseInfo.srcGlobalStageMask == 0) - { - acquireReleaseInfo.srcGlobalStageMask |= Pal::PipelineStageTopOfPipe; - } - - for (uint32 i = 0; i < acquireReleaseInfo.memoryBarrierCount; i++) - { - pPalBufferMemoryBarriers[i].dstStageMask = 0; - pPalBufferMemoryBarriers[i].dstAccessMask = 0; - } - - for (uint32 i = 0; i < acquireReleaseInfo.imageBarrierCount; i++) - { - pPalImageBarriers[i].dstStageMask = 0; - pPalImageBarriers[i].dstAccessMask = 0; - } - - PalCmdRelease( - &acquireReleaseInfo, - pEvents[j], - pPalBufferMemoryBarriers, - ppBuffers, - pPalImageBarriers, - ppImages, - m_curDeviceMask); - } - else if (acquireReleaseMode == Acquire) - { - acquireReleaseInfo.srcGlobalStageMask = 0; - acquireReleaseInfo.srcGlobalAccessMask = 0; - - for (uint32 i = 0; i < acquireReleaseInfo.memoryBarrierCount; i++) - { - pPalBufferMemoryBarriers[i].srcStageMask = 0; - pPalBufferMemoryBarriers[i].srcAccessMask = 0; - } - - for (uint32 i = 0; i < acquireReleaseInfo.imageBarrierCount; i++) - { - pPalImageBarriers[i].srcStageMask = 0; - pPalImageBarriers[i].srcAccessMask = 0; - } - - PalCmdAcquire( - &acquireReleaseInfo, - pEvents[j], - pPalBufferMemoryBarriers, - ppBuffers, - pPalImageBarriers, - ppImages, - &virtStackFrame, - m_curDeviceMask); - } - else - { - PalCmdReleaseThenAcquire( - &acquireReleaseInfo, - pPalBufferMemoryBarriers, - ppBuffers, - pPalImageBarriers, - ppImages, - m_curDeviceMask); - } + FlushAcquireReleaseBarriers( + &acquireReleaseInfo, + ((pEvents != nullptr) ? 1u : 0u), + ((pEvents != nullptr) ? &pEvents[j] : nullptr), + pPalBufferMemoryBarriers, + ppBuffers, + pPalImageBarriers, + ppImages, + &virtStackFrame, + acquireReleaseMode, + m_curDeviceMask); } } } @@ -6445,8 +6520,11 @@ void CmdBuffer::ExecuteAcquireRelease( } // ===================================================================================================================== -// Execute Release then acquire mode -void CmdBuffer::ExecuteReleaseThenAcquire( +// Records acquire-release barriers into PAL structures and passes them to PAL. This funtion handles the +// Synchronization_1 barrier API calls +void CmdBuffer::ExecuteAcquireRelease( + uint32_t eventCount, + const VkEvent* pEvents, PipelineStageFlags srcStageMask, PipelineStageFlags dstStageMask, uint32_t memBarrierCount, @@ -6454,9 +6532,13 @@ void CmdBuffer::ExecuteReleaseThenAcquire( uint32_t bufferMemoryBarrierCount, const VkBufferMemoryBarrier* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, - const VkImageMemoryBarrier* pImageMemoryBarriers) + const VkImageMemoryBarrier* pImageMemoryBarriers, + const AcquireReleaseMode acquireReleaseMode, + uint32_t rgpBarrierReasonType) { - if ((memBarrierCount + bufferMemoryBarrierCount + imageMemoryBarrierCount) > 0) + VK_ASSERT((acquireReleaseMode == ReleaseThenAcquire) || (pEvents != nullptr)); + + if ((memBarrierCount + bufferMemoryBarrierCount + imageMemoryBarrierCount + eventCount) > 0) { VirtualStackFrame virtStackFrame(m_pStackAllocator); @@ -6471,6 +6553,7 @@ void CmdBuffer::ExecuteReleaseThenAcquire( uint32_t bufferMemoryBarrierIdx = 0; uint32_t imageMemoryBarrierIdx = 0; + uint32_t gpuEventCount = eventCount; uint32_t maxLocationCount = Util::Min(imageMemoryBarrierCount, MaxSampleLocationCount); uint32_t maxBufferBarrierCount = Util::Min(bufferMemoryBarrierCount, MaxTransitionCount); uint32_t maxImageBarrierCount = Util::Min((MaxPalAspectsPerMask * imageMemoryBarrierCount) + 1, @@ -6504,15 +6587,16 @@ void CmdBuffer::ExecuteReleaseThenAcquire( if (bufferAllocSuccess && imageAllocSuccess) { - while ((memoryBarrierIdx < memBarrierCount) || + while ((memoryBarrierIdx < memBarrierCount) || (bufferMemoryBarrierIdx < bufferMemoryBarrierCount) || - (imageMemoryBarrierIdx < imageMemoryBarrierCount)) + (imageMemoryBarrierIdx < imageMemoryBarrierCount) || + (gpuEventCount > 0)) { Pal::AcquireReleaseInfo acquireReleaseInfo = {}; acquireReleaseInfo.pMemoryBarriers = pPalBufferMemoryBarriers; acquireReleaseInfo.pImageBarriers = pPalImageBarriers; - acquireReleaseInfo.reason = RgpBarrierExternalCmdPipelineBarrier; + acquireReleaseInfo.reason = rgpBarrierReasonType; uint32_t palSrcStageMask = VkToPalPipelineStageFlags(srcStageMask, true); uint32_t palDstStageMask = VkToPalPipelineStageFlags(dstStageMask, false); @@ -6694,13 +6778,19 @@ void CmdBuffer::ExecuteReleaseThenAcquire( imageMemoryBarrierIdx++; } - PalCmdReleaseThenAcquire( + FlushAcquireReleaseBarriers( &acquireReleaseInfo, + gpuEventCount, + pEvents, pPalBufferMemoryBarriers, ppBuffers, pPalImageBarriers, ppImages, + &virtStackFrame, + acquireReleaseMode, m_curDeviceMask); + + gpuEventCount = 0; } } else @@ -6749,16 +6839,30 @@ void CmdBuffer::PipelineBarrier( { DbgBarrierPreCmd(DbgBarrierPipelineBarrierWaitEvents); + const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); + + if (settings.syncPreviousDrawForTransferStage && + (srcStageMask == VK_PIPELINE_STAGE_TRANSFER_BIT) && + (destStageMask == VK_PIPELINE_STAGE_TRANSFER_BIT)) + { + srcStageMask |= (VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR | + VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR); + } + if (m_flags.useReleaseAcquire) { - ExecuteReleaseThenAcquire(srcStageMask, - destStageMask, - memBarrierCount, - pMemoryBarriers, - bufferMemoryBarrierCount, - pBufferMemoryBarriers, - imageMemoryBarrierCount, - pImageMemoryBarriers); + ExecuteAcquireRelease(0, + nullptr, + srcStageMask, + destStageMask, + memBarrierCount, + pMemoryBarriers, + bufferMemoryBarrierCount, + pBufferMemoryBarriers, + imageMemoryBarrierCount, + pImageMemoryBarriers, + ReleaseThenAcquire, + RgpBarrierExternalCmdPipelineBarrier); } else { @@ -6798,11 +6902,11 @@ void CmdBuffer::PipelineBarrier2( if (m_flags.useReleaseAcquire) { - ExecuteAcquireRelease(1, - nullptr, - pDependencyInfo, - ReleaseThenAcquire, - RgpBarrierExternalCmdPipelineBarrier); + ExecuteAcquireRelease2(1, + nullptr, + pDependencyInfo, + ReleaseThenAcquire, + RgpBarrierExternalCmdPipelineBarrier); } else { @@ -7509,7 +7613,8 @@ void CmdBuffer::PalCmdReleaseThenAcquire( // ===================================================================================================================== void CmdBuffer::PalCmdAcquire( Pal::AcquireReleaseInfo* pAcquireReleaseInfo, - const VkEvent event, + uint32_t eventCount, + const VkEvent* pEvents, Pal::MemBarrier* const pBufferBarriers, const Buffer** const ppBuffers, Pal::ImgBarrier* const pImageBarriers, @@ -7522,7 +7627,7 @@ void CmdBuffer::PalCmdAcquire( // in the header, but temporarily you may use the generic "unknown" reason so as not to block you. VK_ASSERT(pAcquireReleaseInfo->reason != 0); - Event* pEvent = Event::ObjectFromHandle(event); + Event* pEvent = Event::ObjectFromHandle(pEvents[0]); utils::IterateMask deviceGroup(deviceMask); do @@ -7540,19 +7645,50 @@ void CmdBuffer::PalCmdAcquire( pAcquireReleaseInfo->pImageBarriers = pImageBarriers; pAcquireReleaseInfo->pMemoryBarriers = pBufferBarriers; + // Whether syncToken is used or not is decided by the setting 'SyncTokenEnabled' if (pEvent->IsUseToken()) { - Pal::ReleaseToken syncToken = {}; + // Allocate space to store sync token values (automatically rewound on unscope) + Pal::ReleaseToken* pSyncTokens = (eventCount > 0) ? + pVirtStackFrame->AllocArray(eventCount) : nullptr; + + if (pSyncTokens != nullptr) + { + for (uint32_t i = 0; i < eventCount; ++i) + { + pSyncTokens[i] = Event::ObjectFromHandle(pEvents[i])->GetSyncToken(); + } - syncToken = pEvent->GetSyncToken(); - PalCmdBuffer(deviceIdx)->CmdAcquire(*pAcquireReleaseInfo, 1u, &syncToken); + PalCmdBuffer(deviceIdx)->CmdAcquire(*pAcquireReleaseInfo, eventCount, pSyncTokens); + + pVirtStackFrame->FreeArray(pSyncTokens); + } + else + { + m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; + } } else { - const Pal::IGpuEvent* pGpuEvent = {}; + // Allocate space to store signaled event pointers (automatically rewound on unscope) + const Pal::IGpuEvent** ppGpuEvents = (eventCount > 0) ? + pVirtStackFrame->AllocArray(eventCount) : nullptr; + + if (ppGpuEvents != nullptr) + { + for (uint32_t i = 0; i < eventCount; ++i) + { + ppGpuEvents[i] = Event::ObjectFromHandle(pEvents[i])->PalEvent(deviceIdx); + } + + PalCmdBuffer(deviceIdx)->CmdAcquireEvent(*pAcquireReleaseInfo, eventCount, ppGpuEvents); - pGpuEvent = pEvent->PalEvent(deviceIdx); - PalCmdBuffer(deviceIdx)->CmdAcquireEvent(*pAcquireReleaseInfo, 1u, &pGpuEvent); + pVirtStackFrame->FreeArray(ppGpuEvents); + } + else + { + m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; + } } } while (deviceGroup.IterateNext()); @@ -8131,6 +8267,16 @@ void CmdBuffer::RPSyncPostLoadOpColorClear( pPalTransitions, ppImages, GetRpDeviceMask()); + + if (pPalTransitions != nullptr) + { + virtStack.FreeArray(pPalTransitions); + } + + if (ppImages != nullptr) + { + virtStack.FreeArray(ppImages); + } } else { @@ -8630,9 +8776,11 @@ void CmdBuffer::RPLoadOpClearColor( VirtualStackFrame virtStackFrame(m_pStackAllocator); - Util::Vector clearRegions{ &virtStackFrame }; + constexpr uint32 MinRects = 8; - const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(VkClearRect)); + Util::Vector clearRegions{ &virtStackFrame }; + + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ClearBoundTargetRegion)), MinRects); auto rectBatch = Util::Min(count, maxRects); const auto palResult = clearRegions.Reserve(rectBatch); @@ -8779,9 +8927,11 @@ void CmdBuffer::RPLoadOpClearDepthStencil( VirtualStackFrame virtStackFrame(m_pStackAllocator); - Util::Vector clearRegions{ &virtStackFrame }; + constexpr uint32 MinRects = 8; + + Util::Vector clearRegions{ &virtStackFrame }; - const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(VkClearRect)); + const auto maxRects = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ClearBoundTargetRegion)), MinRects); auto rectBatch = Util::Min(count, maxRects); for (uint32_t i = 0; i < count; ++i) @@ -9237,8 +9387,7 @@ void CmdBuffer::SetViewInstanceMask( const uint32_t deviceViewMask = uint32_t { 0x1 } << deviceIdx; uint32_t viewMask = 0x0; - - if (m_allGpuState.viewIndexFromDeviceIndex) + if (m_allGpuState.viewIndexFromDeviceIndex && (Util::CountSetBits(deviceMask) > 1)) { // VK_KHR_multiview interaction with VK_KHR_device_group. // When GraphicsPipeline is created with flag @@ -9266,7 +9415,6 @@ void CmdBuffer::SetViewInstanceMask( // Basically each device renders all views. viewMask = subpassViewMask; } - PalCmdBuffer(deviceIdx)->CmdSetViewInstanceMask(viewMask); } while (deviceGroup.IterateNext()); @@ -11152,6 +11300,8 @@ void CmdBuffer::GetRayTracingDispatchArgs( pConstants->constData.rayDispatchWidth = width; pConstants->constData.rayDispatchHeight = height; pConstants->constData.rayDispatchDepth = depth; + pConstants->constData.rayDispatchMaxGroups = pPipeline->PersistentDispatchSize(width, height, depth); + pConstants->constData.missTableBaseAddressLo = Util::LowPart(missSbt.deviceAddress); pConstants->constData.missTableBaseAddressHi = Util::HighPart(missSbt.deviceAddress); pConstants->constData.missTableStrideInBytes = static_cast(missSbt.stride); @@ -11393,7 +11543,7 @@ void CmdBuffer::TraceRaysDispatchPerDevice( { const RayTracingPipeline* pPipeline = pCmdBuffer->m_allGpuState.pRayTracingPipeline; const Pal::DispatchDims dispatchSize = pPipeline->GetDispatchSize({ .x = width, .y = height, .z = depth }); - pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdDispatch(dispatchSize); + pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdDispatch(dispatchSize, {}); } // ===================================================================================================================== @@ -11791,7 +11941,6 @@ void CmdBuffer::InsertDebugMarker( const char* pLabelName, bool isBegin) { -#if ICD_GPUOPEN_DEVMODE_BUILD constexpr uint8 MarkerSourceApplication = 0; const IDevMode* pDevMode = m_pDevice->VkInstance()->GetDevModeMgr(); @@ -11806,7 +11955,6 @@ void CmdBuffer::InsertDebugMarker( Util::StringLength(pLabelName) : 0); } -#endif } // ===================================================================================================================== diff --git a/icd/api/vk_cmdbuffer_transfer.cpp b/icd/api/vk_cmdbuffer_transfer.cpp index 9b03c795..7a641e13 100644 --- a/icd/api/vk_cmdbuffer_transfer.cpp +++ b/icd/api/vk_cmdbuffer_transfer.cpp @@ -279,7 +279,7 @@ void CmdBuffer::CopyBuffer( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRegions = EstimateMaxObjectsOnVirtualStack(sizeof(*pRegions)); + const auto maxRegions = EstimateMaxObjectsOnVirtualStack(sizeof(Pal::MemoryCopyRegion)); auto regionBatch = Util::Min(regionCount, maxRegions); // Allocate space to store memory copy regions @@ -332,7 +332,8 @@ void CmdBuffer::CopyImage( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRegions = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(*pRegions)), MaxPalAspectsPerMask); + const auto maxRegions = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ImageCopyRegion)), + MaxPalAspectsPerMask); auto regionBatch = Util::Min(regionCount * MaxPalAspectsPerMask, maxRegions); Pal::ImageCopyRegion* pPalRegions = @@ -392,7 +393,8 @@ void CmdBuffer::BlitImage( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRegions = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(*pRegions)), MaxPalAspectsPerMask); + const auto maxRegions = Util::Max(EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ImageScaledCopyRegion)), + MaxPalAspectsPerMask); auto regionBatch = Util::Min(regionCount * MaxPalAspectsPerMask, maxRegions); // Allocate space to store scaled image copy regions (we need a separate region per PAL aspect) @@ -521,7 +523,7 @@ void CmdBuffer::CopyBufferToImage( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRegions = EstimateMaxObjectsOnVirtualStack(sizeof(*pRegions)); + const auto maxRegions = EstimateMaxObjectsOnVirtualStack(sizeof(Pal::MemoryImageCopyRegion)); auto regionBatch = Util::Min(regionCount, maxRegions); // Allocate space to store memory image copy regions @@ -587,7 +589,7 @@ void CmdBuffer::CopyImageToBuffer( VirtualStackFrame virtStackFrame(m_pStackAllocator); - const auto maxRegions = EstimateMaxObjectsOnVirtualStack(sizeof(*pRegions)); + const auto maxRegions = EstimateMaxObjectsOnVirtualStack(sizeof(Pal::MemoryImageCopyRegion)); auto regionBatch = Util::Min(regionCount, maxRegions); // Allocate space to store memory image copy regions @@ -889,7 +891,7 @@ void CmdBuffer::QueryCopy( uint32_t threadGroupCount = Util::Max(1U, (queryCount + ThreadsPerGroup - 1) / ThreadsPerGroup); - PalCmdBuffer(deviceIdx)->CmdDispatch({ threadGroupCount, 1, 1 }); + PalCmdBuffer(deviceIdx)->CmdDispatch({ threadGroupCount, 1, 1 }, {}); // Restore compute state PalCmdBuffer(deviceIdx)->CmdRestoreComputeState(Pal::ComputeStatePipelineAndUserData); diff --git a/icd/api/vk_compute_pipeline.cpp b/icd/api/vk_compute_pipeline.cpp index 312583b6..bbf447ee 100644 --- a/icd/api/vk_compute_pipeline.cpp +++ b/icd/api/vk_compute_pipeline.cpp @@ -266,9 +266,10 @@ void ComputePipeline::ConvertComputePipelineInfo( void ComputePipeline::FetchPalMetadata( PalAllocator* pAllocator, const void* pBinary, + size_t binarySize, uint32_t* pOrigThreadgroupDims) { - Util::Abi::PipelineAbiReader abiReader(pAllocator, pBinary); + Util::Abi::PipelineAbiReader abiReader(pAllocator, Util::Span{pBinary, binarySize}); Util::Result result = abiReader.Init(); if (result == Util::Result::Success) @@ -593,7 +594,6 @@ VkResult ComputePipeline::Create( Util::VoidPtrInc(pPalMem, deviceIdx * pipelineSize), &pPalPipeline[deviceIdx]); -#if ICD_GPUOPEN_DEVMODE_BUILD // Temporarily reinject post Pal pipeline creation (when the internal pipeline hash is available). // The reinjection cache layer can be linked back into the pipeline cache chain once the // Vulkan pipeline cache key can be stored (and read back) inside the ELF as metadata. @@ -624,7 +624,6 @@ VkResult ComputePipeline::Create( palResult = Util::Result::Success; } } -#endif } result = PalToVkResult(palResult); @@ -650,6 +649,7 @@ VkResult ComputePipeline::Create( uint32_t origThreadgroupDims[3]; FetchPalMetadata(pDevice->VkInstance()->Allocator(), pipelineBinaries[DefaultDeviceIndex].pCode, + pipelineBinaries[DefaultDeviceIndex].codeSize, origThreadgroupDims); // On success, wrap it up in a Vulkan object and return. diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index 66237a87..32438c8d 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -89,12 +89,9 @@ #include "appopt/barrier_filter_layer.h" #include "appopt/strange_brigade_layer.h" -#include "appopt/gravity_mark_layer.h" #include "appopt/baldurs_gate3_layer.h" -#if ICD_GPUOPEN_DEVMODE_BUILD #include "devmode/devmode_mgr.h" -#endif #include "palCmdBuffer.h" #include "palCmdAllocator.h" @@ -354,8 +351,7 @@ VkResult Device::Create( } // Dedicated Compute Units - static constexpr uint32_t MaxEngineCount = 8; - uint32_t dedicatedComputeUnits[Queue::MaxQueueFamilies][MaxEngineCount] = {}; + uint32_t dedicatedComputeUnits[Queue::MaxQueueFamilies][Queue::MaxQueuesPerFamily] = {}; VkResult vkResult = VK_SUCCESS; void* pMemory = nullptr; @@ -1311,21 +1307,6 @@ VkResult Device::Initialize( break; } - case AppProfile::GravityMark: - { - void* pMemory = VkInstance()->AllocMem(sizeof(GravityMarkLayer), VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - - if (pMemory != nullptr) - { - m_pAppOptLayer = VK_PLACEMENT_NEW(pMemory) GravityMarkLayer(); - } - else - { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - } - - break; - } case AppProfile::BaldursGate3: { void* pMemory = VkInstance()->AllocMem(sizeof(BaldursGate3Layer), VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); @@ -1453,12 +1434,10 @@ VkResult Device::Initialize( } } -#if ICD_GPUOPEN_DEVMODE_BUILD if ((result == VK_SUCCESS) && (VkInstance()->GetDevModeMgr() != nullptr)) { VkInstance()->GetDevModeMgr()->PostDeviceCreate(this); } -#endif if (result == VK_SUCCESS) { @@ -1766,12 +1745,10 @@ VkResult Device::Destroy(const VkAllocationCallbacks* pAllocator) VK_ALERT(powerRes != VK_SUCCESS); } -#if ICD_GPUOPEN_DEVMODE_BUILD if (VkInstance()->GetDevModeMgr() != nullptr) { VkInstance()->GetDevModeMgr()->PreDeviceDestroy(this); } -#endif #if VKI_RAY_TRACING if (m_pRayTrace != nullptr) @@ -1924,7 +1901,7 @@ VkResult Device::CreateInternalComputePipeline( uint32_t numUserDataNodes, Vkgc::ResourceMappingRootNode* pUserDataNodes, ShaderModuleFlags flags, - bool forceWave64, + ShaderWaveSize waveSize, const VkSpecializationInfo* pSpecializationInfo, InternalPipeline* pInternalPipeline) { @@ -1979,14 +1956,30 @@ VkResult Device::CreateInternalComputePipeline( pCompiler->ApplyDefaultShaderOptions(ShaderStage::ShaderStageCompute, 0, &pShaderInfo->options); // forceWave64 is currently true for only GpuRT shaders and shouldForceWave32 should not affect GpuRT shaders - bool shouldForceWave32 = (((GetRuntimeSettings().deprecateWave64 & DeprecateWave64::DeprecateWave64Cs) || - (GetRuntimeSettings().deprecateWave64 & DeprecateWave64::DeprecateWave64All)) && - (forceWave64 == false)); + bool shouldForceWave32 = ((GetRuntimeSettings().deprecateWave64 & DeprecateWave64::DeprecateWave64Cs) || + (GetRuntimeSettings().deprecateWave64 & DeprecateWave64::DeprecateWave64All)); - if (forceWave64) + switch (waveSize) { + case ShaderWaveSize::WaveSizeAuto: + pShaderInfo->options.allowVaryWaveSize = true; + + // Only apply if the wave size was not already specified. + if (shouldForceWave32) + { + pShaderInfo->options.waveSize = 32; + } + break; + case ShaderWaveSize::WaveSize32: + pShaderInfo->options.waveSize = 32; + pShaderInfo->options.subgroupSize = 32; + break; + case ShaderWaveSize::WaveSize64: pShaderInfo->options.waveSize = 64; pShaderInfo->options.subgroupSize = 64; + break; + default: + VK_NEVER_CALLED(); } Pal::ShaderHash codeHash = ShaderModule::GetCodeHash( @@ -2012,8 +2005,6 @@ VkResult Device::CreateInternalComputePipeline( 0, options); - options.pOptions->waveSize = (shouldForceWave32) ? 32 : options.pOptions->waveSize; - // PAL Pipeline caching Util::Result cacheResult = Util::Result::NotFound; Util::MetroHash::Hash cacheId = {}; @@ -2215,7 +2206,7 @@ VkResult Device::CreateInternalPipelines() VK_ARRAY_SIZE(userDataNodes), userDataNodes, 0, - false, + ShaderWaveSize::WaveSizeAuto, nullptr, &m_timestampQueryCopyPipeline); @@ -2230,7 +2221,7 @@ VkResult Device::CreateInternalPipelines() VK_ARRAY_SIZE(userDataNodes), userDataNodes, 0, - false, + ShaderWaveSize::WaveSizeAuto, nullptr, &m_accelerationStructureQueryCopyPipeline); } @@ -2478,54 +2469,77 @@ VkResult Device::WaitForFences( VkBool32 waitAll, uint64_t timeout) { - Pal::Result palResult = Pal::Result::Success; + VirtualStackAllocator* pStackAllocator = nullptr; - Pal::IFence** ppPalFences = static_cast(VK_ALLOC_A(sizeof(Pal::IFence*) * fenceCount)); + Pal::Result palResult = m_pInstance->StackMgr()->AcquireAllocator(&pStackAllocator); - if (IsMultiGpu() == false) + if (palResult == Pal::Result::Success) { - for (uint32_t i = 0; i < fenceCount; ++i) + VirtualStackFrame virtStackFrame(pStackAllocator); + + Pal::IFence** ppPalFences = virtStackFrame.AllocArray(fenceCount); + + if (ppPalFences == nullptr) { - ppPalFences[i] = Fence::ObjectFromHandle(pFences[i])->PalFence(DefaultDeviceIndex); + palResult = Pal::Result::ErrorOutOfMemory; } - palResult = PalDevice(DefaultDeviceIndex)->WaitForFences(fenceCount, - ppPalFences, - waitAll != VK_FALSE, - Uint64ToChronoNano(timeout)); - } - else - { - for (uint32_t deviceIdx = 0; - (deviceIdx < NumPalDevices()) && (palResult == Pal::Result::Success); - deviceIdx++) + if (IsMultiGpu() == false) { - const uint32_t currentDeviceMask = 1 << deviceIdx; - - uint32_t perDeviceFenceCount = 0; - for (uint32_t i = 0; i < fenceCount; ++i) + if (palResult == Pal::Result::Success) { - Fence* pFence = Fence::ObjectFromHandle(pFences[i]); + for (uint32_t i = 0; i < fenceCount; ++i) + { + ppPalFences[i] = Fence::ObjectFromHandle(pFences[i])->PalFence(DefaultDeviceIndex); + } - // Some conformance tests will wait on fences that were never submitted, so use only the first device - // for these cases. - const bool forceWait = (pFence->GetActiveDeviceMask() == 0) && (deviceIdx == DefaultDeviceIndex); + palResult = PalDevice(DefaultDeviceIndex)->WaitForFences(fenceCount, + ppPalFences, + waitAll != VK_FALSE, + Uint64ToChronoNano(timeout)); + } + } + else + { + for (uint32_t deviceIdx = 0; + (deviceIdx < NumPalDevices()) && (palResult == Pal::Result::Success); + deviceIdx++) + { + const uint32_t currentDeviceMask = 1 << deviceIdx; - if (forceWait || ((currentDeviceMask & pFence->GetActiveDeviceMask()) != 0)) + uint32_t perDeviceFenceCount = 0; + for (uint32_t i = 0; i < fenceCount; ++i) { - ppPalFences[perDeviceFenceCount++] = pFence->PalFence(deviceIdx); + Fence* pFence = Fence::ObjectFromHandle(pFences[i]); + + // Some conformance tests will wait on fences that were never submitted, so use only the first + // device for these cases. + const bool forceWait = (pFence->GetActiveDeviceMask() == 0) && (deviceIdx == DefaultDeviceIndex); + + if (forceWait || ((currentDeviceMask & pFence->GetActiveDeviceMask()) != 0)) + { + ppPalFences[perDeviceFenceCount++] = pFence->PalFence(deviceIdx); + } } - } - if (perDeviceFenceCount > 0) - { - palResult = PalDevice(deviceIdx)->WaitForFences(perDeviceFenceCount, - ppPalFences, - waitAll != VK_FALSE, - Uint64ToChronoNano(timeout)); + if (perDeviceFenceCount > 0) + { + palResult = PalDevice(deviceIdx)->WaitForFences(perDeviceFenceCount, + ppPalFences, + waitAll != VK_FALSE, + Uint64ToChronoNano(timeout)); + } } } + + virtStackFrame.FreeArray(ppPalFences); } + + if (pStackAllocator != nullptr) + { + m_pInstance->StackMgr()->ReleaseAllocator(pStackAllocator); + } + return PalToVkResult(palResult); } @@ -2535,28 +2549,52 @@ VkResult Device::ResetFences( uint32_t fenceCount, const VkFence* pFences) { - Pal::IFence** ppPalFences = static_cast(VK_ALLOC_A(sizeof(Pal::IFence*) * fenceCount)); + VirtualStackAllocator* pStackAllocator = nullptr; - Pal::Result palResult = Pal::Result::Success; + Pal::Result palResult = m_pInstance->StackMgr()->AcquireAllocator(&pStackAllocator); - // Clear the wait masks for each fence - for (uint32_t i = 0; i < fenceCount; ++i) + if (palResult == Pal::Result::Success) { - Fence::ObjectFromHandle(pFences[i])->ClearActiveDeviceMask(); - Fence::ObjectFromHandle(pFences[i])->RestoreFence(this); - } + VirtualStackFrame virtStackFrame(pStackAllocator); - for (uint32_t deviceIdx = 0; - (deviceIdx < NumPalDevices()) && (palResult == Pal::Result::Success); - deviceIdx++) - { - for (uint32_t i = 0; i < fenceCount; ++i) + Pal::IFence** ppPalFences = virtStackFrame.AllocArray(fenceCount); + + if (ppPalFences == nullptr) + { + palResult = Pal::Result::ErrorOutOfMemory; + } + else + { + // Clear the wait masks for each fence + for (uint32_t i = 0; i < fenceCount; ++i) + { + Fence::ObjectFromHandle(pFences[i])->ClearActiveDeviceMask(); + Fence::ObjectFromHandle(pFences[i])->RestoreFence(this); + } + } + + for (uint32_t deviceIdx = 0; + (deviceIdx < NumPalDevices()) && (palResult == Pal::Result::Success); + deviceIdx++) + { + for (uint32_t i = 0; i < fenceCount; ++i) + { + Fence* pFence = Fence::ObjectFromHandle(pFences[i]); + ppPalFences[i] = pFence->PalFence(deviceIdx); + } + + palResult = PalDevice(deviceIdx)->ResetFences(fenceCount, ppPalFences); + } + + if (ppPalFences != nullptr) { - Fence* pFence = Fence::ObjectFromHandle(pFences[i]); - ppPalFences[i] = pFence->PalFence(deviceIdx); + virtStackFrame.FreeArray(ppPalFences); } + } - palResult = PalDevice(deviceIdx)->ResetFences(fenceCount, ppPalFences); + if (pStackAllocator != nullptr) + { + m_pInstance->StackMgr()->ReleaseAllocator(pStackAllocator); } return PalToVkResult(palResult); @@ -3251,27 +3289,47 @@ VkResult Device::WaitSemaphores( const VkSemaphoreWaitInfo* pWaitInfo, uint64_t timeout) { - Pal::Result palResult = Pal::Result::Success; - uint32_t flags = 0; + VirtualStackAllocator* pStackAllocator = nullptr; - Pal::IQueueSemaphore** ppPalSemaphores = static_cast(VK_ALLOC_A( - sizeof(Pal::IQueueSemaphore*) * pWaitInfo->semaphoreCount)); + Pal::Result palResult = m_pInstance->StackMgr()->AcquireAllocator(&pStackAllocator); - for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) + if (palResult == Pal::Result::Success) { - Semaphore* currentSemaphore = Semaphore::ObjectFromHandle(pWaitInfo->pSemaphores[i]); - ppPalSemaphores[i] = currentSemaphore->PalSemaphore(DefaultDeviceIndex); - currentSemaphore->RestoreSemaphore(); + VirtualStackFrame virtStackFrame(pStackAllocator); + + Pal::IQueueSemaphore** ppPalSemaphores = + virtStackFrame.AllocArray(pWaitInfo->semaphoreCount); + + if (ppPalSemaphores == nullptr) + { + palResult = Pal::Result::ErrorOutOfMemory; + } + else + { + for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) + { + Semaphore* currentSemaphore = Semaphore::ObjectFromHandle(pWaitInfo->pSemaphores[i]); + ppPalSemaphores[i] = currentSemaphore->PalSemaphore(DefaultDeviceIndex); + currentSemaphore->RestoreSemaphore(); + } + + const uint32 flags = (pWaitInfo->flags == VK_SEMAPHORE_WAIT_ANY_BIT) ? Pal::HostWaitFlags::HostWaitAny : 0; + + palResult = PalDevice(DefaultDeviceIndex)->WaitForSemaphores(pWaitInfo->semaphoreCount, ppPalSemaphores, + pWaitInfo->pValues, flags, Uint64ToChronoNano(timeout)); + } + + if (ppPalSemaphores != nullptr) + { + virtStackFrame.FreeArray(ppPalSemaphores); + } } - if (pWaitInfo->flags == VK_SEMAPHORE_WAIT_ANY_BIT) + if (pStackAllocator != nullptr) { - flags |= Pal::HostWaitFlags::HostWaitAny; + m_pInstance->StackMgr()->ReleaseAllocator(pStackAllocator); } - palResult = PalDevice(DefaultDeviceIndex)->WaitForSemaphores(pWaitInfo->semaphoreCount, ppPalSemaphores, - pWaitInfo->pValues, flags, Uint64ToChronoNano(timeout)); - return PalToVkResult(palResult); } diff --git a/icd/api/vk_graphics_pipeline.cpp b/icd/api/vk_graphics_pipeline.cpp index 478c1c80..89cad731 100644 --- a/icd/api/vk_graphics_pipeline.cpp +++ b/icd/api/vk_graphics_pipeline.cpp @@ -311,7 +311,6 @@ VkResult GraphicsPipeline::CreatePalPipelineObjects( Util::VoidPtrInc(pSystemMem, palOffset), &pPalPipeline[deviceIdx]); -#if ICD_GPUOPEN_DEVMODE_BUILD // Temporarily reinject post Pal pipeline creation (when the internal pipeline hash is available). // The reinjection cache layer can be linked back into the pipeline cache chain once the // Vulkan pipeline cache key can be stored (and read back) inside the ELF as metadata. @@ -342,7 +341,6 @@ VkResult GraphicsPipeline::CreatePalPipelineObjects( palResult = Util::Result::Success; } } -#endif VK_ASSERT(palSize == pPalDevice->GetGraphicsPipelineSize(pObjectCreateInfo->pipeline, nullptr)); palOffset += palSize; @@ -444,10 +442,9 @@ VkResult GraphicsPipeline::CreatePipelineObjects( const auto& palProperties = pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties(); const auto& info = pPalPipeline[deviceIdx]->GetInfo(); - if ((info.ps.flags.perSampleShading == 1) || - (info.ps.flags.enablePops == 1)) + if (info.ps.flags.perSampleShading == 1) { - // Override the shader rate to 1x1 if SampleId used in shader, or POPS is enabled. + // Override the shader rate to 1x1 if SampleId used in shader. Device::SetDefaultVrsRateParams(&pObjectCreateInfo->immedInfo.vrsRateParams); pObjectCreateInfo->flags.force1x1ShaderRate = true; @@ -480,6 +477,14 @@ VkResult GraphicsPipeline::CreatePipelineObjects( pObjectCreateInfo->flags.force1x1ShaderRate = true; pObjectCreateInfo->immedInfo.msaaCreateInfo.pixelShaderSamples = 1; } + else if (info.ps.flags.enablePops == 1) + { + // Override the shader rate to 1x1 if POPS is enabled and + // fragmentShadingRateWithFragmentShaderInterlock is not supported. + Device::SetDefaultVrsRateParams(&pObjectCreateInfo->immedInfo.vrsRateParams); + + pObjectCreateInfo->flags.force1x1ShaderRate = true; + } } if (pObjectCreateInfo->flags.bindMsaaObject) @@ -1051,8 +1056,30 @@ VkResult GraphicsPipeline::Create( #endif objectCreateInfo.flags.isPointSizeUsed = binaryMetadata.pointSizeUsed; objectCreateInfo.flags.shadingRateUsedInShader = binaryMetadata.shadingRateUsedInShader; - objectCreateInfo.flags.viewIndexFromDeviceIndex = Util::TestAnyFlagSet(flags, - VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT); + + if (libInfo.pPreRasterizationShaderLib != nullptr) + { + if (libInfo.pPreRasterizationShaderLib->GetPipelineBinaryCreateInfo().flags & + VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT) + { + objectCreateInfo.flags.viewIndexFromDeviceIndex |= 1 << GraphicsLibraryPreRaster; + } + } + + if (libInfo.pFragmentShaderLib != nullptr) + { + if (libInfo.pFragmentShaderLib->GetPipelineBinaryCreateInfo().flags & + VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT) + { + objectCreateInfo.flags.viewIndexFromDeviceIndex |= 1 << GraphicsLibraryFragment; + } + } + + if (Util::TestAnyFlagSet(flags, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT)) + { + objectCreateInfo.flags.viewIndexFromDeviceIndex |= + ((1 << GraphicsLibraryPreRaster) | (1 << GraphicsLibraryFragment)); + } #if VKI_RAY_TRACING objectCreateInfo.dispatchRaysUserDataOffset = pPipelineLayout->GetDispatchRaysUserData(); @@ -2232,8 +2259,8 @@ void GraphicsPipeline::BindToCmdBuffer( // because when VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT is specified // ViewMask for each VkPhysicalDevice is defined by DeviceIndex // not by current subpass during a render pass instance. - const bool oldViewIndexFromDeviceIndex = pRenderState->viewIndexFromDeviceIndex; - const bool newViewIndexFromDeviceIndex = ViewIndexFromDeviceIndex(); + const uint32_t oldViewIndexFromDeviceIndex = pRenderState->viewIndexFromDeviceIndex; + const uint32_t newViewIndexFromDeviceIndex = StageMaskForViewIndexUseDeviceIndex(); if (oldViewIndexFromDeviceIndex != newViewIndexFromDeviceIndex) { diff --git a/icd/api/vk_graphics_pipeline_library.cpp b/icd/api/vk_graphics_pipeline_library.cpp index 30cc392e..6f512ffd 100644 --- a/icd/api/vk_graphics_pipeline_library.cpp +++ b/icd/api/vk_graphics_pipeline_library.cpp @@ -500,6 +500,21 @@ VkResult GraphicsPipelineLibrary::Create( GplModuleState tempModuleStates[ShaderStage::ShaderStageGfxCount] = {}; binaryCreateInfo.pipelineInfo.iaState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + if (pCreateInfo->pInputAssemblyState != nullptr) + { + binaryCreateInfo.pipelineInfo.iaState.topology = pCreateInfo->pInputAssemblyState->topology; + } + else if (pCreateInfo->stageCount > 0) + { + for (uint32_t stage = 0; stage < pCreateInfo->stageCount; ++stage) + { + if ((pCreateInfo->pStages[stage].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) || + (pCreateInfo->pStages[stage].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) + { + binaryCreateInfo.pipelineInfo.iaState.topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; + } + } + } if ((internalFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FORCE_LLPC) != 0) { diff --git a/icd/api/vk_image.cpp b/icd/api/vk_image.cpp index e69d1743..a023a61b 100644 --- a/icd/api/vk_image.cpp +++ b/icd/api/vk_image.cpp @@ -432,6 +432,9 @@ void Image::ConvertImageCreateInfo( const Pal::GfxIpLevel gfxLevel = palProperties.gfxLevel; + const uint32_t forceEnableDccMask = settings.forceEnableDcc; + const uint32_t forceDisableCompressionMask = settings.forceDisableCompression; + { // Don't force DCC to be enabled for performance reasons unless the image is larger than the minimum size set for // compression, another performance optimization. @@ -439,14 +442,11 @@ void Image::ConvertImageCreateInfo( (settings.disableSmallSurfColorCompressionSize * settings.disableSmallSurfColorCompressionSize)) && (Formats::IsColorFormat(createInfoFormat))) { - const uint32_t forceEnableDccMask = settings.forceEnableDcc; - const uint32_t bpp = Pal::Formats::BitsPerPixel(pPalCreateInfo->swizzledFormat.format); const bool isShaderStorage = (pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT); - if (isShaderStorage && ((forceEnableDccMask & (ForceDccDefault | - ForceDisableCompression | - ForceDisableCompressionForColor)) == 0)) + if (isShaderStorage && (forceEnableDccMask != 0) && + ((forceDisableCompressionMask & DisableCompressionForColor) == 0)) { const bool isColorAttachment = (pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT); @@ -520,25 +520,20 @@ void Image::ConvertImageCreateInfo( pPalCreateInfo->metadataTcCompatMode = Pal::MetadataTcCompatMode::Disabled; } - const uint32_t disableBits = - ForceDisableCompression | - ((Formats::IsColorFormat(createInfoFormat)) ? ForceDisableCompressionForColor : 0) | - ((Formats::IsDepthStencilFormat(createInfoFormat)) ? ForceDisableCompressionForDepthStencil : 0) | - (externalFlags.externallyShareable ? ForceDisableCompressionForSharedImages : 0); - - // We must not use any metadata if sparse aliasing is enabled or - // settings.forceEnableDcc matches any of the disableBits. - if ((pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) || - ((settings.forceEnableDcc & disableBits) != 0)) + // We must not use any metadata if sparse aliasing is enabled + if ((pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) != 0) { pPalCreateInfo->metadataMode = Pal::MetadataMode::Disabled; } - // Disable metadata for avoiding corruption if one image is sampled and rendered - // in the same draw. - if ((pCreateInfo->usage & VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0) + const uint32_t disableBits = + (externalFlags.externallyShareable ? DisableCompressionForSharedImages : 0) | + ((Formats::IsColorFormat(createInfoFormat)) ? DisableCompressionForColor : 0) | + ((Formats::IsDepthStencilFormat(createInfoFormat)) ? DisableCompressionForDepthStencil : 0); + + if ((forceDisableCompressionMask & disableBits) != 0) { - pPalCreateInfo->metadataMode = Pal::MetadataMode::Disabled; + pPalCreateInfo->metadataMode = Pal::MetadataMode::Disabled; } // Apply per application (or run-time) options @@ -550,12 +545,17 @@ void Image::ConvertImageCreateInfo( if ((extStructs.pImageCompressionControl->sType == VK_STRUCTURE_TYPE_IMAGE_COMPRESSION_CONTROL_EXT) && (extStructs.pImageCompressionControl->flags == VK_IMAGE_COMPRESSION_DISABLED_EXT)) { - pPalCreateInfo->metadataMode = Pal::MetadataMode::Disabled; - pPalCreateInfo->metadataTcCompatMode = Pal::MetadataTcCompatMode::Disabled; + pPalCreateInfo->metadataMode = Pal::MetadataMode::Disabled; + pPalCreateInfo->metadataTcCompatMode = Pal::MetadataTcCompatMode::Disabled; } } #if defined(__unix__) + if (pPalCreateInfo->flags.optimalShareable && pPalCreateInfo->usageFlags.depthStencil) + { + pPalCreateInfo->metadataMode = Pal::MetadataMode::Disabled; + } + pPalCreateInfo->modifier = DRM_FORMAT_MOD_INVALID; if (pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) diff --git a/icd/api/vk_instance.cpp b/icd/api/vk_instance.cpp index d963de9d..9bb64749 100644 --- a/icd/api/vk_instance.cpp +++ b/icd/api/vk_instance.cpp @@ -44,11 +44,9 @@ #include "include/internal_layer_hooks.h" -#if ICD_GPUOPEN_DEVMODE_BUILD #include "devmode/devmode_mgr.h" #include "devmode/devmode_rgp.h" #include "devmode/devmode_ubertrace.h" -#endif #include "res/ver.h" @@ -374,9 +372,7 @@ VkResult Instance::Init( m_nullGpuId = createInfo.nullGpuId; } -#if ICD_GPUOPEN_DEVMODE_BUILD createInfo.flags.supportRgpTraces = 1; -#endif //Check the KHR_DISPALY extension, and then determine whether to open the primaryNode. if (IsExtensionEnabled(InstanceExtensions::KHR_DISPLAY) == false) @@ -628,13 +624,11 @@ VkResult Instance::LoadAndCommitSettings( } } -#if ICD_GPUOPEN_DEVMODE_BUILD // Inform developer mode manager of settings. This also finalizes the developer mode manager. if (m_pDevMode != nullptr) { m_pDevMode->Finalize(deviceCount, settingsLoaders); } -#endif // After all of the settings have been finalized, initialize each device for (uint32_t deviceIdx = 0; ((deviceIdx < deviceCount) && (result == VK_SUCCESS)); ++deviceIdx) @@ -670,7 +664,6 @@ VkResult Instance::Destroy(void) { AmdvlkLog(m_logTagIdMask, GeneralPrint, "%s End ********\n", GetApplicationName()); -#if ICD_GPUOPEN_DEVMODE_BUILD // Pipeline binary cache is required to be freed before destroying DevMode // because DevMode manages the state of pipeline binary cache. uint32_t deviceCount = PhysicalDeviceManager::MaxPhysicalDevices; @@ -685,7 +678,6 @@ VkResult Instance::Destroy(void) { m_pDevMode->Destroy(); } -#endif // Destroy physical device manager if (m_pPhysicalDeviceManager != nullptr) @@ -784,6 +776,8 @@ const InstanceExtensions::Supported& Instance::GetSupportedExtensions() supportedExtensions.AddExtension(VK_INSTANCE_EXTENSION(KHR_GET_SURFACE_CAPABILITIES2)); + supportedExtensions.AddExtension(VK_INSTANCE_EXTENSION(EXT_SWAPCHAIN_COLORSPACE)); + supportedExtensions.AddExtension(VK_INSTANCE_EXTENSION(KHR_DEVICE_GROUP_CREATION)); supportedExtensions.AddExtension(VK_INSTANCE_EXTENSION(KHR_EXTERNAL_SEMAPHORE_CAPABILITIES)); @@ -1028,7 +1022,6 @@ void Instance::EnableCrashAnalysisSupport() // PAL devices (before physical device manager is created). void Instance::DevModeEarlyInitialize() { -#if ICD_GPUOPEN_DEVMODE_BUILD VK_ASSERT(m_pPhysicalDeviceManager == nullptr); VK_ASSERT(m_pDevMode == nullptr); @@ -1051,7 +1044,6 @@ void Instance::DevModeEarlyInitialize() VK_ASSERT(result == VK_SUCCESS); } -#endif } // ===================================================================================================================== @@ -1059,7 +1051,6 @@ void Instance::DevModeEarlyInitialize() // PAL devices (after physical device manager is created). void Instance::DevModeLateInitialize() { -#if ICD_GPUOPEN_DEVMODE_BUILD VK_ASSERT(m_pPhysicalDeviceManager != nullptr); VK_ASSERT(m_pDevMode != nullptr); @@ -1074,7 +1065,6 @@ void Instance::DevModeLateInitialize() { EnableCrashAnalysisSupport(); } -#endif } // ===================================================================================================================== diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index 5ec83b79..0fb34279 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -427,6 +427,8 @@ PhysicalDevice::PhysicalDevice( memset(&m_limits, 0, sizeof(m_limits)); memset(m_formatFeatureMsaaTarget, 0, sizeof(m_formatFeatureMsaaTarget)); memset(&m_queueFamilies, 0, sizeof(m_queueFamilies)); + memset(&m_compQueueEnginesNdx, 0, sizeof(m_compQueueEnginesNdx)); + memset(&m_universalQueueEnginesNdx, 0, sizeof(m_universalQueueEnginesNdx)); memset(&m_memoryProperties, 0, sizeof(m_memoryProperties)); memset(&m_gpaProps, 0, sizeof(m_gpaProps)); @@ -2568,6 +2570,20 @@ void PhysicalDevice::GetSparseImageFormatProperties( } } +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDevicePipelineRobustnessProperties( + VkPipelineRobustnessBufferBehaviorEXT* defaultRobustnessStorageBuffers, + VkPipelineRobustnessBufferBehaviorEXT* defaultRobustnessUniformBuffers, + VkPipelineRobustnessBufferBehaviorEXT* defaultRobustnessVertexInputs, + VkPipelineRobustnessImageBehaviorEXT* defaultRobustnessImages +) const +{ + *defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT; + *defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT; + *defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT; + *defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT; +} + // ===================================================================================================================== VkResult PhysicalDevice::GetPhysicalDeviceCalibrateableTimeDomainsEXT( uint32_t* pTimeDomainCount, @@ -4436,6 +4452,8 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_COOPERATIVE_MATRIX)); } + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_COMPUTE_SHADER_DERIVATIVES)); + bool exposeNvComputeShaderDerivatives = false; if ((pPhysicalDevice == nullptr) || (pPhysicalDevice->GetRuntimeSettings().exportNvComputeShaderDerivatives)) { @@ -4677,7 +4695,7 @@ void PhysicalDevice::PopulateQueueFamilies() enabledQueueFlags |= VK_QUEUE_PROTECTED_BIT; } - // find out the sub engine index of VrHighPriority and indices for compute engines that aren't exclusive. + // find out the sub engine index of VrHighPriority and indices for compute engines that are exclusive. { const auto& computeProps = m_properties.engineProperties[Pal::EngineTypeCompute]; uint32_t engineIndex = 0u; @@ -4704,23 +4722,6 @@ void PhysicalDevice::PopulateQueueFamilies() m_vrHighPrioritySubEngineIndex = subEngineIndex; } } - else if (IsNormalQueue(computeProps.capabilities[subEngineIndex])) - { - m_compQueueEnginesNdx[engineIndex++] = subEngineIndex; - } - } - } - - // find out universal engines that aren't exclusive. - { - const auto& universalProps = m_properties.engineProperties[Pal::EngineTypeUniversal]; - uint32_t engineIndex = 0u; - for (uint32_t subEngineIndex = 0; subEngineIndex < universalProps.engineCount; subEngineIndex++) - { - if (IsNormalQueue(universalProps.capabilities[subEngineIndex])) - { - m_universalQueueEnginesNdx[engineIndex++] = subEngineIndex; - } } } @@ -4808,9 +4809,64 @@ void PhysicalDevice::PopulateQueueFamilies() pQueueFamilyProps->queueCount++; } } - pQueueFamilyProps->queueCount = (engineType == Pal::EngineTypeCompute) - ? Util::Min(settings.asyncComputeQueueLimit, pQueueFamilyProps->queueCount) - : pQueueFamilyProps->queueCount; + + // if the engineType is Universal or Compute, adjust the queue count based on the settings. + // and find pal engine indices for the queues + if (pQueueFamilyProps->queueCount != 0) + { + switch (engineType) + { + case Pal::EngineTypeUniversal: + { + if (settings.forceGraphicsQueueCount != UINT32_MAX) + { + VK_ASSERT(settings.forceGraphicsQueueCount <= Queue::MaxQueuesPerFamily); + pQueueFamilyProps->queueCount = settings.forceGraphicsQueueCount; + } + + // find out pal engine indices for universal queues that aren't exclusive. + uint32_t index = 0; + while (index < pQueueFamilyProps->queueCount) + { + for (uint32_t engineIndex = 0u; engineIndex < engineProps.engineCount; ++engineIndex) + { + if (IsNormalQueue(engineProps.capabilities[engineIndex])) + { + m_universalQueueEnginesNdx[index] = engineIndex; + index++; + } + } + } + break; + } + + case Pal::EngineTypeCompute: + { + if (settings.forceComputeQueueCount != UINT32_MAX) + { + VK_ASSERT(settings.forceComputeQueueCount <= Queue::MaxQueuesPerFamily); + pQueueFamilyProps->queueCount = settings.forceComputeQueueCount; + } + + // find out pal engine indices for compute queues that aren't exclusive. + uint32_t index = 0; + while (index < pQueueFamilyProps->queueCount) + { + for (uint32_t engineIndex = 0u; engineIndex < engineProps.engineCount; ++engineIndex) + { + if (IsNormalQueue(engineProps.capabilities[engineIndex])) + { + m_compQueueEnginesNdx[index] = engineIndex; + index++; + } + } + } + break; + } + default: + break; // no-op + } + } pQueueFamilyProps->timestampValidBits = (engineProps.flags.supportsTimestamps != 0) ? 64 : 0; pQueueFamilyProps->minImageTransferGranularity = PalToVkExtent3d(engineProps.minTiledImageCopyAlignment); @@ -6949,9 +7005,8 @@ size_t PhysicalDevice::GetFeatures2( if (updateFeatures) { - const bool captureReplay = PalProperties().gfxipProperties.flags.supportCaptureReplay; pExtInfo->descriptorBuffer = VK_TRUE; - pExtInfo->descriptorBufferCaptureReplay = captureReplay ? VK_TRUE : VK_FALSE; + pExtInfo->descriptorBufferCaptureReplay = VK_FALSE; pExtInfo->descriptorBufferImageLayoutIgnored = VK_FALSE; pExtInfo->descriptorBufferPushDescriptors = VK_TRUE; } @@ -7711,6 +7766,18 @@ size_t PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_REPLICATED_COMPOSITES_FEATURES_EXT: + { + auto* pExtInfo = reinterpret_cast(pHeader); + if (updateFeatures) + { + pExtInfo->shaderReplicatedComposites = VK_TRUE; + } + + structSize = sizeof(*pExtInfo); + break; + } + default: { // skip any unsupported extension structures @@ -7856,12 +7923,11 @@ VkResult PhysicalDevice::GetImageFormatProperties2( pImageCompressionProps->imageCompressionFixedRateFlags = VK_IMAGE_COMPRESSION_FIXED_RATE_NONE_EXT; const uint32_t disableBits = - ForceDisableCompression | - ((Formats::IsColorFormat(createInfoFormat)) ? ForceDisableCompressionForColor : 0) | - ((Formats::IsDepthStencilFormat(createInfoFormat)) ? ForceDisableCompressionForDepthStencil : 0); + ((Formats::IsColorFormat(createInfoFormat)) ? DisableCompressionForColor : 0) | + ((Formats::IsDepthStencilFormat(createInfoFormat)) ? DisableCompressionForDepthStencil : 0); pImageCompressionProps->imageCompressionFlags = - ((GetRuntimeSettings().forceEnableDcc & disableBits) == 0) ? + ((GetRuntimeSettings().forceDisableCompression & disableBits) == 0) ? VK_IMAGE_COMPRESSION_DEFAULT_EXT : VK_IMAGE_COMPRESSION_DISABLED_EXT; } @@ -8479,10 +8545,11 @@ void PhysicalDevice::GetDeviceProperties2( { auto* pProps = static_cast(pNext); - pProps->defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT; - pProps->defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT; - pProps->defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT; - pProps->defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT; + GetPhysicalDevicePipelineRobustnessProperties(&pProps->defaultRobustnessStorageBuffers, + &pProps->defaultRobustnessUniformBuffers, + &pProps->defaultRobustnessVertexInputs, + &pProps->defaultRobustnessImages); + break; } diff --git a/icd/api/vk_pipeline.cpp b/icd/api/vk_pipeline.cpp index 5a3c4ba2..ec1379d8 100644 --- a/icd/api/vk_pipeline.cpp +++ b/icd/api/vk_pipeline.cpp @@ -796,7 +796,8 @@ VkResult Pipeline::GetShaderDisassembly( // To extract the shader code, we can re-parse the saved ELF binary and lookup the shader's program // instructions by examining the symbol table entry for that shader's entrypoint. - Util::Abi::PipelineAbiReader abiReader(pDevice->VkInstance()->Allocator(), binaryInfo.pipelineBinary.pCode); + Util::Abi::PipelineAbiReader abiReader(pDevice->VkInstance()->Allocator(), + Util::Span{binaryInfo.pipelineBinary.pCode, binaryInfo.pipelineBinary.codeSize}); VkResult result = VK_SUCCESS; Pal::Result palResult = abiReader.Init(); @@ -846,32 +847,34 @@ VkResult Pipeline::GetShaderDisassembly( uint32_t hwStage = 0; if (Util::BitMaskScanForward(&hwStage, apiToHwShader.apiShaders[static_cast(apiShaderType)])) { - const Util::Elf::SymbolTableEntry* pSymbolEntry = nullptr; const char* pSectionName = nullptr; if (pipelineSymbolType == Util::Abi::PipelineSymbolType::ShaderDisassembly) { - pSymbolEntry = abiReader.GetPipelineSymbol( - Util::Abi::GetSymbolForStage( - Util::Abi::PipelineSymbolType::ShaderDisassembly, - static_cast(hwStage))); + palResult = abiReader.CopySymbol( + Util::Abi::GetSymbolForStage( + Util::Abi::PipelineSymbolType::ShaderDisassembly, + static_cast(hwStage)), + pBufferSize, + pBuffer); + pSectionName = Util::Abi::AmdGpuDisassemblyName; + symbolValid = palResult == Util::Result::Success; } else if (pipelineSymbolType == Util::Abi::PipelineSymbolType::ShaderAmdIl) { - pSymbolEntry = abiReader.GetPipelineSymbol( - Util::Abi::GetSymbolForStage( - Util::Abi::PipelineSymbolType::ShaderAmdIl, - apiShaderType)); + palResult = abiReader.CopySymbol( + Util::Abi::GetSymbolForStage( + Util::Abi::PipelineSymbolType::ShaderAmdIl, + apiShaderType), + pBufferSize, + pBuffer); + pSectionName = Util::Abi::AmdGpuCommentLlvmIrName; + symbolValid = palResult == Util::Result::Success; } - if (pSymbolEntry != nullptr) - { - palResult = abiReader.GetElfReader().CopySymbol(*pSymbolEntry, pBufferSize, pBuffer); - symbolValid = palResult == Util::Result::Success; - } - else if (pSectionName != nullptr) + if ((symbolValid == false) && (pSectionName != nullptr)) { // NOTE: LLVM doesn't add disassemble symbol in ELF disassemble section, instead, it contains // the entry name in disassemble section. so we have to search the entry name to split per @@ -1016,7 +1019,8 @@ uint32_t Pipeline::GetAvailableAmdIlSymbol( bool hasBinary = GetBinary(shaderType, &binaryInfo); if (hasBinary) { - Util::Abi::PipelineAbiReader abiReader(m_pDevice->VkInstance()->Allocator(), binaryInfo.pipelineBinary.pCode); + Util::Abi::PipelineAbiReader abiReader(m_pDevice->VkInstance()->Allocator(), + Util::Span{binaryInfo.pipelineBinary.pCode, binaryInfo.pipelineBinary.codeSize}); Pal::Result result = abiReader.Init(); if (result == Pal::Result::Success) @@ -1036,7 +1040,7 @@ uint32_t Pipeline::GetAvailableAmdIlSymbol( const Util::Elf::SymbolTableEntry* pSymbolEntry = nullptr; const char* pSectionName = nullptr; - pSymbolEntry = abiReader.GetPipelineSymbol( + pSymbolEntry = abiReader.GetSymbolHeader( Util::Abi::GetSymbolForStage( Util::Abi::PipelineSymbolType::ShaderAmdIl, abiShaderType)); diff --git a/icd/api/vk_pipeline_cache.cpp b/icd/api/vk_pipeline_cache.cpp index 98938439..5fc28f46 100644 --- a/icd/api/vk_pipeline_cache.cpp +++ b/icd/api/vk_pipeline_cache.cpp @@ -129,9 +129,7 @@ VkResult PipelineCache::Create( pDevice->GetCompiler(DefaultDeviceIndex)->GetGfxIp(), pDefaultPhysicalDevice->GetRuntimeSettings(), pDefaultPhysicalDevice->PalDevice()->GetCacheFilePath(), -#if ICD_GPUOPEN_DEVMODE_BUILD pDefaultPhysicalDevice->VkInstance()->GetDevModeMgr(), -#endif expectedEntries, initialDataSize, pInitialData, diff --git a/icd/api/vk_pipeline_layout.cpp b/icd/api/vk_pipeline_layout.cpp index 3c029508..0d166d00 100644 --- a/icd/api/vk_pipeline_layout.cpp +++ b/icd/api/vk_pipeline_layout.cpp @@ -361,9 +361,9 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( // Reserve user data nodes for vertex buffer table pPipelineInfo->numUserDataNodes += 1; - pInfo->userDataRegCount += VbTablePtrRegCount; + pInfo->userDataRegCount += VbTablePtrRegCount; // In case we need an internal vertex buffer table, add nodes required for its entries, and its set pointer. - pPipelineInfo->numRsrcMapNodes += Pal::MaxVertexBuffers; + pPipelineInfo->numRsrcMapNodes += Pal::MaxVertexBuffers; // If uber-fetch shader is not enabled for early compile, the user data entries for uber-fetch shader const // buffer is appended at the bottom of user data table. Just following vertex buffer table. @@ -371,27 +371,27 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( { VK_ASSERT(pUserDataLayout->uberFetchConstBufRegBase == InvalidReg); - pUserDataLayout->uberFetchConstBufRegBase = pInfo->userDataRegCount; - pInfo->userDataRegCount += 1; - pPipelineInfo->numUserDataNodes += 1; - pPipelineInfo->numRsrcMapNodes += 1; + pUserDataLayout->uberFetchConstBufRegBase = pInfo->userDataRegCount; + pInfo->userDataRegCount += 1; + pPipelineInfo->numUserDataNodes += 1; + pPipelineInfo->numRsrcMapNodes += 1; } // Reserve an user-data to store the VA of buffer for transform feedback. if (ReserveXfbNode(pDevice)) { - pUserDataLayout->transformFeedbackRegBase = pInfo->userDataRegCount; - pUserDataLayout->transformFeedbackRegCount = 1; - pInfo->userDataRegCount += pUserDataLayout->transformFeedbackRegCount; - pPipelineInfo->numUserDataNodes += 1; + pUserDataLayout->transformFeedbackRegBase = pInfo->userDataRegCount; + pUserDataLayout->transformFeedbackRegCount = 1; + pInfo->userDataRegCount += pUserDataLayout->transformFeedbackRegCount; + pPipelineInfo->numUserDataNodes += 1; } if (pDevice->GetEnabledFeatures().enableDebugPrintf) { - pPipelineInfo->numUserDataNodes += 1; - pUserDataLayout->debugPrintfRegBase = pInfo->userDataRegCount; - pInfo->userDataRegCount += 1; - pPipelineInfo->numRsrcMapNodes += 1; + pPipelineInfo->numUserDataNodes += 1; + pUserDataLayout->debugPrintfRegBase = pInfo->userDataRegCount; + pInfo->userDataRegCount += 1; + pPipelineInfo->numRsrcMapNodes += 1; } // Allocate user data for the thread group reversal state @@ -402,29 +402,34 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( &pInfo->userDataRegCount, &pUserDataLayout->threadGroupReversalRegBase); + // Allocate user data for push constants + pPipelineInfo->numUserDataNodes += pushConstantsUserDataNodeCount; + + pCommonUserDataLayout->pushConstRegBase = pInfo->userDataRegCount; + pCommonUserDataLayout->pushConstRegCount = pushConstRegCount; + pInfo->userDataRegCount += pushConstRegCount; + #if VKI_RAY_TRACING if (HasRayTracing(pDevice, pIn)) { // Reserve one node for indirect RT capture replay. - pPipelineInfo->numUserDataNodes += 1; - pUserDataLayout->rtCaptureReplayConstBufRegBase = pInfo->userDataRegCount; - pInfo->userDataRegCount += InternalConstBufferRegCount; - - // Dispatch ray args - pInfo->userDataRegCount += MaxTraceRayUserDataRegCount; - pPipelineInfo->numUserDataNodes += MaxTraceRayUserDataNodeCount; - pPipelineInfo->numRsrcMapNodes += MaxTraceRayResourceNodeCount; - pPipelineInfo->hasRayTracing = true; + pPipelineInfo->numUserDataNodes += 1; + pCommonUserDataLayout->rtCaptureReplayConstBufRegBase = pInfo->userDataRegCount; + pInfo->userDataRegCount += InternalConstBufferRegCount; + + // NOTE: In certain Proton games, the dispatchRaysArgsPtrRegBase must be positioned carefully within the user + // data entry list. Experimental results indicate that these games work without a GPU hang when + // dispatchRaysArgsPtrRegBase is placed after the pushConst user data entry. The root cause of this behavior is + // currently unknown and may be due to a potential bug in Proton. Exercise caution when changing the location of + // dispatchRaysArgsPtrRegBase. + pCommonUserDataLayout->dispatchRaysArgsPtrRegBase = pInfo->userDataRegCount; + pInfo->userDataRegCount += MaxTraceRayUserDataRegCount; + pPipelineInfo->numUserDataNodes += MaxTraceRayUserDataNodeCount; + pPipelineInfo->numRsrcMapNodes += MaxTraceRayResourceNodeCount; + pPipelineInfo->hasRayTracing = true; } #endif - // Allocate user data for push constants - pPipelineInfo->numUserDataNodes += pushConstantsUserDataNodeCount; - - pCommonUserDataLayout->pushConstRegBase = pInfo->userDataRegCount; - pCommonUserDataLayout->pushConstRegCount = pushConstRegCount; - pInfo->userDataRegCount += pushConstRegCount; - // Populate user data layouts for each descriptor set that is active pUserDataLayout->setBindingRegBase = pInfo->userDataRegCount; @@ -656,16 +661,16 @@ VkResult PipelineLayout::BuildIndirectSchemeInfo( #if VKI_RAY_TRACING if (HasRayTracing(pDevice, pIn)) { - pUserDataLayout->dispatchRaysArgsPtrRegBase = pInfo->userDataRegCount; - pPipelineInfo->numUserDataNodes += MaxTraceRayUserDataNodeCount; - pPipelineInfo->numRsrcMapNodes += MaxTraceRayResourceNodeCount; - pInfo->userDataRegCount += MaxTraceRayUserDataRegCount; - pPipelineInfo->hasRayTracing = true; + pCommonUserDataLayout->dispatchRaysArgsPtrRegBase = pInfo->userDataRegCount; + pPipelineInfo->numUserDataNodes += MaxTraceRayUserDataNodeCount; + pPipelineInfo->numRsrcMapNodes += MaxTraceRayResourceNodeCount; + pInfo->userDataRegCount += MaxTraceRayUserDataRegCount; + pPipelineInfo->hasRayTracing = true; // Reserve one node for indirect RT capture replay. - pUserDataLayout->rtCaptureReplayConstBufRegBase = pInfo->userDataRegCount; - pPipelineInfo->numUserDataNodes += 1; - pInfo->userDataRegCount += InternalConstBufferRegCount; + pCommonUserDataLayout->rtCaptureReplayConstBufRegBase = pInfo->userDataRegCount; + pPipelineInfo->numUserDataNodes += 1; + pInfo->userDataRegCount += InternalConstBufferRegCount; } #endif @@ -1191,21 +1196,7 @@ uint32_t PipelineLayout::GetDispatchRaysUserData() const if (m_pipelineInfo.hasRayTracing) { - if (userDataLayout.scheme == PipelineLayoutScheme::Compact) - { - // The dispatch rays args is always the last entry - // TODO #raytracing: This means it spills first. Probably bad for perf. - dispatchRaysUserData = m_info.userDataRegCount; - } - else if (userDataLayout.scheme == PipelineLayoutScheme::Indirect) - { - dispatchRaysUserData = userDataLayout.indirect.dispatchRaysArgsPtrRegBase; - } - else - { - VK_NEVER_CALLED(); - dispatchRaysUserData = 0; - } + dispatchRaysUserData = userDataLayout.common.dispatchRaysArgsPtrRegBase; } return dispatchRaysUserData; @@ -1366,7 +1357,7 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( { BuildLlpcInternalConstantBufferMapping( stageMask, - userDataLayout.rtCaptureReplayConstBufRegBase, + commonUserDataLayout.rtCaptureReplayConstBufRegBase, Vkgc::RtCaptureReplayInternalBufferBinding, &pUserDataNodes[userDataNodeCount], &userDataNodeCount); @@ -1374,7 +1365,7 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( BuildLlpcRayTracingDispatchArgumentsMapping( stageMask, - m_info.userDataRegCount, + commonUserDataLayout.dispatchRaysArgsPtrRegBase, MaxTraceRayUserDataRegCount, &pUserDataNodes[userDataNodeCount], &userDataNodeCount, @@ -1677,7 +1668,7 @@ void PipelineLayout::BuildIndirectSchemeLlpcPipelineMapping( if (appendRtCaptureReplayCb) { - VK_ASSERT(rtCaptureReplayCbRegBase == userDataLayout.rtCaptureReplayConstBufRegBase); + VK_ASSERT(rtCaptureReplayCbRegBase == commonUserDataLayout.rtCaptureReplayConstBufRegBase); BuildLlpcInternalConstantBufferMapping( stageMask, rtCaptureReplayCbRegBase, diff --git a/icd/api/vk_queue.cpp b/icd/api/vk_queue.cpp index 291b6bae..3e941fb0 100644 --- a/icd/api/vk_queue.cpp +++ b/icd/api/vk_queue.cpp @@ -43,9 +43,7 @@ #include "include/vk_swapchain.h" #include "include/vk_utils.h" -#if ICD_GPUOPEN_DEVMODE_BUILD #include "devmode/devmode_mgr.h" -#endif #if VKI_RAY_TRACING #include "raytrace/ray_tracing_device.h" @@ -1089,13 +1087,10 @@ VkResult Queue::Submit( const SubmitInfoType* pSubmits, VkFence fence) { -#if ICD_GPUOPEN_DEVMODE_BUILD IDevMode* pDevMode = m_pDevice->VkInstance()->GetDevModeMgr(); bool timedQueueEvents = ((pDevMode != nullptr) && pDevMode->IsQueueTimingActive(m_pDevice)); -#else - bool timedQueueEvents = false; -#endif + Fence* pFence = Fence::ObjectFromHandle(fence); VirtualStackFrame virtStackFrame(m_pStackAllocator); @@ -1469,8 +1464,9 @@ VkResult Queue::Submit( pDevMode->RecordRenderOps(deviceIdx, this, drawCallCount, dispatchCallCount); } - Pal::IFence* iFence[2] = {nullptr, nullptr}; + Pal::IFence* iFence[2] = { nullptr, nullptr }; palSubmitInfo.ppFences = iFence; + palSubmitInfo.fenceCount = 0; #if VKI_RAY_TRACING if (pCpsMemFence != nullptr) @@ -1573,7 +1569,6 @@ VkResult Queue::Submit( } else { -#if ICD_GPUOPEN_DEVMODE_BUILD // TMZ is NOT supported for GPUOPEN path. VK_ASSERT((*pCommandBuffers[0])->IsProtected() == false); @@ -1584,9 +1579,6 @@ VkResult Queue::Submit( pCmdBuffers, palSubmitInfo, &virtStackFrame); -#else - VK_NEVER_CALLED(); -#endif } result = PalToVkResult(palResult); @@ -1696,14 +1688,10 @@ VkResult Queue::PalSignalSemaphores( const uint32_t semaphoreDeviceIndicesCount, const uint32_t* pSemaphoreDeviceIndices) { -#if ICD_GPUOPEN_DEVMODE_BUILD IDevMode* pDevMode = m_pDevice->VkInstance()->GetDevModeMgr(); bool timedQueueEvents = ((pDevMode != nullptr) && pDevMode->IsQueueTimingActive(m_pDevice)); -#else - bool timedQueueEvents = false; -#endif Pal::Result palResult = Pal::Result::Success; uint32_t deviceIdx = DefaultDeviceIndex; @@ -1744,14 +1732,8 @@ VkResult Queue::PalSignalSemaphores( } else { -#if ICD_GPUOPEN_DEVMODE_BUILD palResult = pDevMode->TimedSignalQueueSemaphore(deviceIdx, this, pSemaphores[i], pointValue, pPalSemaphore); -#else - VK_NEVER_CALLED(); - - palResult = Pal::Result::ErrorUnknown; -#endif } } } @@ -1773,14 +1755,10 @@ VkResult Queue::PalWaitSemaphores( Pal::Result palResult = Pal::Result::Success; uint32_t deviceIdx = DefaultDeviceIndex; -#if ICD_GPUOPEN_DEVMODE_BUILD IDevMode* pDevMode = m_pDevice->VkInstance()->GetDevModeMgr(); bool timedQueueEvents = ((pDevMode != nullptr) && pDevMode->IsQueueTimingActive(m_pDevice)); -#else - bool timedQueueEvents = false; -#endif for (uint32_t i = 0; (i < semaphoreCount) && (palResult == Pal::Result::Success); ++i) { @@ -1823,14 +1801,8 @@ VkResult Queue::PalWaitSemaphores( } else { -#if ICD_GPUOPEN_DEVMODE_BUILD palResult = pDevMode->TimedWaitQueueSemaphore(deviceIdx, this, pSemaphores[i], pointValue, pPalSemaphore); -#else - VK_NEVER_CALLED(); - - palResult = Pal::Result::ErrorUnknown; -#endif } } } @@ -1889,7 +1861,6 @@ VkResult Queue::Present( if (pPresentInfo == nullptr) { -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevice->VkInstance()->GetDevModeMgr() != nullptr) { m_pDevice->VkInstance()->GetDevModeMgr()->NotifyFrameEnd(this, @@ -1897,7 +1868,6 @@ VkResult Queue::Present( m_pDevice->VkInstance()->GetDevModeMgr()->NotifyFrameBegin(this, IDevMode::FrameDelimiterType::QueuePresent); } -#endif return VK_ERROR_INITIALIZATION_FAILED; } @@ -2048,13 +2018,11 @@ VkResult Queue::Present( m_pDevice->VkInstance()->PalPlatform()->UpdateFrameTraceController(pPresentQueue); // Notify gpuopen developer mode that we're about to present (frame-end boundary) -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevice->VkInstance()->GetDevModeMgr() != nullptr) { m_pDevice->VkInstance()->GetDevModeMgr()->NotifyFrameEnd(this, IDevMode::FrameDelimiterType::QueuePresent); } -#endif bool syncFlip = false; bool postFrameTimerSubmission = false; @@ -2092,13 +2060,11 @@ VkResult Queue::Present( pSwapChain->PostPresent(presentInfo, &palResult); // Notify gpuopen developer mode that a present occurred (frame-begin boundary) -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevice->VkInstance()->GetDevModeMgr() != nullptr) { m_pDevice->VkInstance()->GetDevModeMgr()->NotifyFrameBegin(this, IDevMode::FrameDelimiterType::QueuePresent); } -#endif VkResult curResult = PalToVkResult(palResult); @@ -2736,7 +2702,6 @@ void Queue::InsertDebugUtilsLabel( if (strcmp(pLabelInfo->pLabelName, settings.devModeEndFrameDebugUtilsLabel) == 0) { -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevice->VkInstance()->GetDevModeMgr() != nullptr) { m_pDevice->VkInstance()->GetDevModeMgr()->NotifyFrameEnd(this, IDevMode::FrameDelimiterType::QueueLabel); @@ -2749,18 +2714,15 @@ void Queue::InsertDebugUtilsLabel( VK_ASSERT(tempResult == VK_SUCCESS); } } -#endif } if (strcmp(pLabelInfo->pLabelName, settings.devModeStartFrameDebugUtilsLabel) == 0) { -#if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevice->VkInstance()->GetDevModeMgr() != nullptr) { m_pDevice->VkInstance()->GetDevModeMgr()->NotifyFrameBegin(this, IDevMode::FrameDelimiterType::QueueLabel); } -#endif } } @@ -2771,7 +2733,6 @@ void Queue::DevModeFrameBoundary( IDevMode* pDevMode, const VkFrameBoundaryEXT* pFrameBoundaryInfo) { -#if ICD_GPUOPEN_DEVMODE_BUILD if ((pDevMode != nullptr) && (pFrameBoundaryInfo != nullptr)) { @@ -2783,7 +2744,6 @@ void Queue::DevModeFrameBoundary( IDevMode::FrameDelimiterType::QueuePresent); } } -#endif } #if VKI_RAY_TRACING diff --git a/icd/api/vk_swapchain.cpp b/icd/api/vk_swapchain.cpp index 7b936c3b..046189f7 100644 --- a/icd/api/vk_swapchain.cpp +++ b/icd/api/vk_swapchain.cpp @@ -668,7 +668,7 @@ VkResult SwapChain::SetupAutoStereo( 3, userDataNodes, 0, - false, + ShaderWaveSize::WaveSizeAuto, nullptr, &m_pAutoStereoPipeline); @@ -963,7 +963,6 @@ bool SwapChain::BuildPostProcessingCommands( imageViewInfo[0].subresRange.startSubres.plane = 0; imageViewInfo[0].possibleLayouts.usages = Pal::LayoutShaderRead | Pal::LayoutShaderWrite; imageViewInfo[0].possibleLayouts.engines = Pal::ImageLayoutEngineFlags::LayoutUniversalEngine; - // Update array slice for right eye SRD imageViewInfo[1] = imageViewInfo[0]; imageViewInfo[1].subresRange.startSubres.arraySlice = 1; @@ -1009,7 +1008,7 @@ bool SwapChain::BuildPostProcessingCommands( dispatchDimensions.y = Util::RoundUpToMultiple(imageCreateInfo.extent.width, workGroupSize[1]) / workGroupSize[1]; dispatchDimensions.z = 1; - pCmdBuf->CmdDispatch(dispatchDimensions); + pCmdBuf->CmdDispatch(dispatchDimensions, {}); Pal::AcquireReleaseInfo acquireRelInfo = {}; diff --git a/icd/imported/gputexdecoder/gpuTexDecoder.cpp b/icd/imported/gputexdecoder/gpuTexDecoder.cpp index 0c4d95c5..40265842 100755 --- a/icd/imported/gputexdecoder/gpuTexDecoder.cpp +++ b/icd/imported/gputexdecoder/gpuTexDecoder.cpp @@ -515,7 +515,7 @@ Pal::Result Device::GpuDecodeImage( const uint32 threadGroupsZ = Util::Max(pPalImageRegions[idx].extent.depth, pPalImageRegions[idx].numSlices); - m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }); + m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }, {}); } } else if ((type == InternalTexConvertCsType::ConvertETC2ToRGBA8) || @@ -589,7 +589,7 @@ Pal::Result Device::GpuDecodeImage( const uint32 threadGroupsZ = Util::Max(pPalImageRegions[idx].extent.depth, pPalImageRegions[idx].numSlices); - m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }); + m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }, {}); } } else @@ -641,7 +641,7 @@ Pal::Result Device::GpuDecodeImage( uint32 height = pPalImageRegions[idx].extent.height * 4; const uint32 threadGroupsX = (width * height + 63) / 64; - m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, 1, 1 }); + m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, 1, 1 }, {}); } } @@ -743,7 +743,7 @@ Pal::Result Device::GpuDecodeBuffer( const uint32 threadGroupsZ = Util::Max(pPalBufferRegionsIn[idx].imageExtent.depth, pPalBufferRegionsIn[idx].numSlices); - m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }); + m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }, {}); } } else @@ -843,7 +843,7 @@ Pal::Result Device::GpuDecodeBuffer( const uint32 threadGroupsZ = Util::Max(pPalBufferRegionsIn[idx].imageExtent.depth, pPalBufferRegionsIn[idx].numSlices); - m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }); + m_pPalCmdBuffer->CmdDispatch({ threadGroupsX, threadGroupsY, threadGroupsZ }, {}); } } diff --git a/icd/res/ver.h b/icd/res/ver.h index 52eb7fb5..911aa4c2 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 325 +#define VULKAN_ICD_BUILD_VERSION 328 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION @@ -45,11 +45,11 @@ // These values specify the driver ID and driver info string #define VULKAN_DRIVER_ID VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR // "AMDOPEN" #define VULKAN_DRIVER_NAME_STR "AMD open-source driver" -#define VULKAN_DRIVER_INFO_STR "2024.Q4.1" +#define VULKAN_DRIVER_INFO_STR "2024.Q4.2" #define VULKAN_DRIVER_INFO_STR_LLPC "(LLPC)" // These values tell which version of the conformance test the driver is compliant against #define CTS_VERSION_MAJOR 1 #define CTS_VERSION_MINOR 3 -#define CTS_VERSION_SUBMINOR 5 +#define CTS_VERSION_SUBMINOR 9 #define CTS_VERSION_PATCH 2 diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index 0c663cf9..66211c4d 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -232,7 +232,7 @@ void VulkanSettingsLoader::OverrideDefaultsExperimentInfo() VK_SET_VAL_IF_EXPERIMENT_ENABLED(AmdVendorExtensions, disableAmdVendorExtensions, true); - VK_SET_VAL_IF_EXPERIMENT_ENABLED(ComputeQueueSupport, asyncComputeQueueLimit, 0); + VK_SET_VAL_IF_EXPERIMENT_ENABLED(ComputeQueueSupport, forceComputeQueueCount, 0); if (pExpSettings->expBarrierOptimizations.ValueOr(false)) { @@ -304,7 +304,7 @@ void VulkanSettingsLoader::OverrideDefaultsExperimentInfo() m_settings.allowExternalPipelineCacheObject = false; } - VK_SET_VAL_IF_EXPERIMENT_ENABLED(TextureColorCompression, forceEnableDcc, ForceDisableCompressionForColor); + VK_SET_VAL_IF_EXPERIMENT_ENABLED(TextureColorCompression, forceDisableCompression, DisableCompressionForColor); PAL_SET_VAL_IF_EXPERIMENT_ENABLED(ZeroUnboundDescriptors, zeroUnboundDescDebugSrd, true); @@ -348,7 +348,7 @@ void VulkanSettingsLoader::FinalizeExperiments() pExpSettings->expAmdVendorExtensions = m_settings.disableAmdVendorExtensions; - pExpSettings->expComputeQueueSupport = (m_settings.asyncComputeQueueLimit == 0); + pExpSettings->expComputeQueueSupport = (m_settings.forceComputeQueueCount == 0); pExpSettings->expBarrierOptimizations = ((pPalSettings->pwsMode == Pal::PwsMode::Disabled) && (m_settings.useAcquireReleaseInterface == false)); @@ -371,7 +371,7 @@ void VulkanSettingsLoader::FinalizeExperiments() pExpSettings->expRayTracingPipelineCompilationMode = (m_settings.rtCompileMode == RtCompileModeIndirect); #endif - pExpSettings->expTextureColorCompression = m_settings.forceEnableDcc == ForceDisableCompressionForColor; + pExpSettings->expTextureColorCompression = m_settings.forceDisableCompression == DisableCompressionForColor; pExpSettings->expZeroUnboundDescriptors = pPalSettings->zeroUnboundDescDebugSrd; @@ -484,7 +484,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.nggCompactVertex = false; } - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (IsGfx11(pInfo->gfxLevel)) { // Enable NGG compactionless mode for Navi3x m_settings.nggCompactVertex = false; @@ -528,12 +528,14 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.optColorTargetUsageDoesNotContainResolveLayout = true; m_settings.barrierFilterOptions = SkipStrayExecutionDependencies | - SkipImageLayoutUndefined | - SkipDuplicateResourceBarriers; + SkipImageLayoutUndefined | + SkipDuplicateResourceBarriers; m_settings.modifyResourceKeyForAppProfile = true; m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusive; + m_settings.asyncComputeQueueMaxWavesPerCu = 20; + // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we // can't do any better than returning a non-null function pointer for them. m_settings.lenientInstanceFuncQuery = true; @@ -595,7 +597,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( { } } - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (IsGfx11(pInfo->gfxLevel)) { if (pInfo->revision == Pal::AsicRevision::Navi31) { @@ -615,25 +617,26 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.optColorTargetUsageDoesNotContainResolveLayout = true; m_settings.barrierFilterOptions = SkipStrayExecutionDependencies | - SkipImageLayoutUndefined; + SkipImageLayoutUndefined; m_settings.modifyResourceKeyForAppProfile = true; m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusive; - m_settings.asyncComputeQueueLimit = 1; + m_settings.forceComputeQueueCount = 1; + + m_settings.asyncComputeQueueMaxWavesPerCu = 20; // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we // can't do any better than returning a non-null function pointer for them. m_settings.lenientInstanceFuncQuery = true; } - if (((appProfile == AppProfile::WolfensteinII) || - (appProfile == AppProfile::WolfensteinYoungblood) || - (appProfile == AppProfile::Doom)) && + if (((appProfile == AppProfile::WolfensteinII) || + (appProfile == AppProfile::WolfensteinYoungblood) || + (appProfile == AppProfile::Doom)) && ((pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) || - (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3))) + (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3))) { - m_settings.asyncComputeQueueMaxWavesPerCu = 20; m_settings.nggSubgroupSizing = NggSubgroupExplicit; m_settings.nggVertsPerSubgroup = 254; m_settings.nggPrimsPerSubgroup = 128; @@ -704,7 +707,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } } - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (IsGfx11(pInfo->gfxLevel)) { m_settings.resourceBarrierOptions &= ~ResourceBarrierOptions::SkipDstCacheInv; } @@ -780,7 +783,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } } - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (IsGfx11(pInfo->gfxLevel)) { m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; m_settings.ac01WaNotNeeded = true; @@ -970,8 +973,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( ForceDccForColorAttachments | ForceDccFor32BppShaderStorage); } - - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (IsGfx11(pInfo->gfxLevel)) { if (pInfo->revision == Pal::AsicRevision::Navi31) { @@ -1086,7 +1088,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( if (appProfile == AppProfile::RainbowSixExtraction) { - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (IsGfx11(pInfo->gfxLevel)) { if (pInfo->revision == Pal::AsicRevision::Navi31) { @@ -1162,7 +1164,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.fsWaveSize = 64; } } - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (IsGfx11(pInfo->gfxLevel)) { m_settings.pipelineBinningMode = PipelineBinningModeDisable; m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; @@ -1202,7 +1204,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( ForceDccForColorAttachments | ForceDccFor64BppShaderStorage); - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (IsGfx11(pInfo->gfxLevel)) { m_settings.forceEnableDcc |= ForceDccForNonColorAttachmentShaderStorage; } @@ -1273,7 +1275,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } } - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (IsGfx11(pInfo->gfxLevel)) { } } @@ -1331,6 +1333,9 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.rtUnifiedVgprLimit = 64; } } + + // Turn off FP16 for this application to fix 5% perf drop + m_settings.rtFp16BoxNodesInBlasMode = Fp16BoxNodesInBlasMode::Fp16BoxNodesInBlasModeNone; } #endif @@ -1356,6 +1361,8 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.alwaysReportHdrFormats = true; + m_settings.asyncComputeQueueMaxWavesPerCu = 20; + if (pInfo->gpuType == Pal::GpuType::Discrete) { m_settings.cmdAllocatorDataHeap = Pal::GpuHeapLocal; @@ -1389,15 +1396,12 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( // triangle culling and there are no options in the game to turn it off making NGG somewhat redundant. m_settings.enableNgg = false; - m_settings.asyncComputeQueueMaxWavesPerCu = 20; - m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; m_settings.csWaveSize = 64; } else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { - m_settings.asyncComputeQueueMaxWavesPerCu = 20; m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; if (pInfo->revision != Pal::AsicRevision::Navi21) @@ -1408,7 +1412,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.csWaveSize = 64; } - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (IsGfx11(pInfo->gfxLevel)) { // Navi31 Mall and Tiling Settings if ((pInfo->revision == Pal::AsicRevision::Navi31) || (pInfo->revision == Pal::AsicRevision::Navi32)) @@ -1436,7 +1440,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( if ((appProfile == AppProfile::DxvkHaloInfiniteLauncher) || (appProfile == AppProfile::DxvkTf2) -#ifndef ICD_X64_BUILD +#ifndef VKI_X64_BUILD || (appProfile == AppProfile::DXVK) #endif ) @@ -1569,7 +1573,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; } } - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (IsGfx11(pInfo->gfxLevel)) { if (pInfo->revision == Pal::AsicRevision::Navi31) { @@ -1663,11 +1667,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( { OverrideVkd3dCommonSettings(&m_settings); - if ((pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) -#if VKI_BUILD_GFX115 - || (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_5) -#endif - ) + if (IsGfx11(pInfo->gfxLevel)) { m_settings.fsWaveSize = 32; } @@ -1695,6 +1695,12 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.memoryDeviceOverallocationAllowed = true; } + if (appProfile == AppProfile::Blender) + { + m_settings.memoryDeviceOverallocationAllowed = true; + m_settings.syncPreviousDrawForTransferStage = true; + } + if (appProfile == AppProfile::SevenDaysToDie) { m_settings.disableDisplayDcc = DisplayableDcc::DisplayableDccDisabled; @@ -1762,7 +1768,7 @@ VkResult VulkanSettingsLoader::ProcessSettings( *pAppProfile = static_cast(m_settings.forceAppProfileValue); } -#if ICD_X86_BUILD +#if VKI_X86_BUILD if (m_settings.shaderCacheMode == ShaderCacheEnableRuntimeOnly) { m_settings.shaderCacheMode = ShaderCacheDisable; @@ -1966,10 +1972,10 @@ void VulkanSettingsLoader::UpdatePalSettings() switch (m_settings.disableBinningPsKill) { - case DisableBinningPsKillEnable: + case DisableBinningPsKillTrue: pPalSettings->disableBinningPsKill = Pal::OverrideMode::Enabled; break; - case DisableBinningPsKillDisable: + case DisableBinningPsKillFalse: pPalSettings->disableBinningPsKill = Pal::OverrideMode::Disabled; break; case DisableBinningPsKillDefault: diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json index b50e462d..ae52cc05 100644 --- a/icd/settings/settings_xgl.json +++ b/icd/settings/settings_xgl.json @@ -885,6 +885,18 @@ "Scope": "Driver", "Type": "bool" }, + { + "Name": "SyncPreviousDrawForTransferStage", + "Description": "Whether sync the previous draw for pipeline transfer stage barrier", + "Tags": [ + "Pipeline Options" + ], + "Defaults": { + "Default": false + }, + "Scope": "Driver", + "Type": "bool" + }, { "Name": "PipelineBinningMode", "Description": "Specifies whether to override binning setting for pipeline.", @@ -930,19 +942,19 @@ "IsEnum": true, "Values": [ { - "Name": "DisableBinningPsKillDisable", + "Name": "DisableBinningPsKillDefault", "Value": 0, - "Description": "Enable Binning." + "Description": "Default PAL values" }, { - "Name": "DisableBinningPsKillEnable", + "Name": "DisableBinningPsKillFalse", "Value": 1, - "Description": "Disable Binning" + "Description": "Enable Binning." }, { - "Name": "DisableBinningPsKillDefault", + "Name": "DisableBinningPsKillTrue", "Value": 2, - "Description": "Default PAL values" + "Description": "Disable Binning" } ], "Name": "DisableBinningPsKill" @@ -3095,7 +3107,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": "Fp16BoxNodesInBlasModeNone" + "Default": "Fp16BoxNodesInBlasModeMixed" }, "ValidValues": { "IsEnum": true, @@ -3166,7 +3178,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": 0.0 + "Default": 1.1 }, "Type": "float", "Name": "RtFp16BoxNodesInBlasModeMixedThreshold", @@ -3277,7 +3289,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": 0 + "Default": 4 }, "Type": "uint32", "Name": "RtTriangleSplittingBudgetPerTriangle", @@ -3382,7 +3394,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": false + "Default": true }, "Type": "bool", "Name": "EnableVariableBitsMortonCodes", @@ -3398,7 +3410,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": false + "Default": true }, "Type": "bool", "Scope": "Driver" @@ -3742,7 +3754,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": true + "Default": false }, "Type": "bool", "Name": "EnablePairCompressionCostCheck", @@ -4081,7 +4093,7 @@ "VKI_RAY_TRACING" ], "Defaults": { - "Default": false + "Default": true }, "Type": "bool", "Name": "RtEnableFastLBVH", @@ -4102,21 +4114,6 @@ "Name": "BuildParallelWavesPerSimd", "Scope": "Driver" }, - { - "Name": "RtEnableAcquireReleaseInterface", - "Description": "Enable Acquire/release-based barrier interface if PAL reports the ASIC supports it.", - "Tags": [ - "Ray Tracing" - ], - "BuildTypes": [ - "VKI_RAY_TRACING" - ], - "Defaults": { - "Default": false - }, - "Type": "bool", - "Scope": "Driver" - }, { "Name": "EnableFusedInstanceNode", "Description": "Enable fused instance node for BVH builder", @@ -4144,6 +4141,18 @@ "Default": 32 } }, + { + "Name": "RtPersistentDispatchRaysFactor", + "Type": "float", + "Description": "Controls the number of groups launched for a persistent DispatchRays or 0.0 to disable persistent launch", + "Scope": "Driver", + "Tags": [ + "Ray Tracing" + ], + "Defaults": { + "Default": 0.0 + } + }, { "Name": "RtEnableBuildAccelStructStats", "Description": "Dump built acceleration stats. (Pending implementation)", @@ -5401,7 +5410,7 @@ { "Description": "Enable pair compression in early build stage, i.e., During Encode phase.", "Tags": [ - "RayTracing" + "Ray Tracing" ], "BuildTypes": [ "VKI_RAY_TRACING" @@ -5416,7 +5425,7 @@ { "Description": "Triangle pair search radius during EarlyPairCompression.", "Tags": [ - "RayTracing" + "Ray Tracing" ], "BuildTypes": [ "VKI_RAY_TRACING" @@ -5580,16 +5589,25 @@ "Type": "uint32" }, { - "Name": "AsyncComputeQueueLimit", - "Description": "Limit the number of async compute queues that are reported.", + "Name": "ForceGraphicsQueueCount", + "Description": "Sets the number of graphics/universal queues reported by the driver. If set to UINT32_MAX, this setting will not be used. The maximum allowable queue count is currently 8, as defined by 'MaxQueuesPerFamily' in the driver.", "Tags": [ "General" ], "Defaults": { "Default": 4294967295 }, - "Flags": { - "IsHex": true + "Scope": "Driver", + "Type": "uint32" + }, + { + "Name": "ForceComputeQueueCount", + "Description": "Sets the number of compute queues reported by the driver. If set to UINT32_MAX, this setting will not be used. The maximum allowable queue count is currently 8, as defined by 'MaxQueuesPerFamily' in the driver.", + "Tags": [ + "General" + ], + "Defaults": { + "Default": 4294967295 }, "Scope": "Driver", "Type": "uint32" @@ -7050,7 +7068,7 @@ }, { "Name": "ForceEnableDcc", - "Description": "If not default, force enables/disables compression on the basis of resource and/or BPP. NOTE: To force enable shader storage DCC, at least one of 2D/3D and one of CA/non-CA need to be set", + "Description": "If not default, force enables compression on the basis of resource and/or BPP. NOTE: To force enable shader storage DCC, at least one of 2D/3D and one of CA/non-CA need to be set", "Tags": [ "Optimization" ], @@ -7094,29 +7112,51 @@ "Name": "ForceDccFor64BppShaderStorage", "Value": 32, "Description": "Force enable DCC for shader storage resources with 64 BPP or deeper." - }, + } + ], + "Name": "ForceEnableDcc" + }, + "Flags": { + "IsHex": true, + "IsBitmask": true + }, + "Scope": "Driver", + "Type": "uint32" + }, + { + "Name": "ForceDisableCompression", + "Description": "If not default, force disables metadata and compression for appropriate resource types", + "Tags": [ + "Optimization" + ], + "Defaults": { + "Default": "DisableCompressionDefault" + }, + "ValidValues": { + "Name": "ForceDisableCompression", + "IsEnum": true, + "Values": [ { - "Name": "ForceDisableCompression", - "Value": 64, - "Description": "Force disable compression for every resource irrespective of PAL heuristics." + "Name": "DisableCompressionDefault", + "Value": 0, + "Description": "Don't force anything. Let PAL heuristics decide what's best." }, { - "Name": "ForceDisableCompressionForSharedImages", - "Value": 128, + "Name": "DisableCompressionForSharedImages", + "Value": 1, "Description": "Force disable compression for externally sharable resources." }, { - "Name": "ForceDisableCompressionForColor", - "Value": 256, + "Name": "DisableCompressionForColor", + "Value": 2, "Description": "Force disable compression for all color format images." }, { - "Name": "ForceDisableCompressionForDepthStencil", - "Value": 512, + "Name": "DisableCompressionForDepthStencil", + "Value": 4, "Description": "Force disable compression for all depth and stencil format images." } - ], - "Name": "ForceEnableDcc" + ] }, "Flags": { "IsHex": true, diff --git a/icd/tools/generate/genShaderProfile.py b/icd/tools/generate/genShaderProfile.py index 325ffb3a..d26e88db 100644 --- a/icd/tools/generate/genShaderProfile.py +++ b/icd/tools/generate/genShaderProfile.py @@ -52,7 +52,7 @@ FUN_DEC_CLASS_SHADER_PROFILE_PUBLIC, FUNC_DEC_PARSE_JSON_PROFILE, FUNC_DEC_BUILD_APP_PROFILE_LLPC, \ BUILD_APP_PROFILE_LLPC_FUNC, JSON_WRITER_GENERIC_DEF, JSON_READER_GENERIC_DEF, NAMESPACE_VK, CPP_INCLUDE, \ CopyrightAndWarning, CONDITION_DYNAMIC_SHADER_INFO_APPLY, CLASS_TEMPLATE, ShaderTuningStructsAndVars, \ - HEADER_INCLUDES, PARSE_DWORD_ARRAY_FUNC, CONDITION_SHADER_CREATE_TUNING_OPTION_FLAGS + HEADER_INCLUDES, PARSE_DWORD_ARRAY_FUNC, CONDITION_SHADER_CREATE_TUNING_OPTION_FLAGS, CONDITION_GFX_IP_11 OUTPUT_FILE = "g_shader_profile" CONFIG_FILE_NAME = "profile.json" @@ -341,6 +341,8 @@ def gen_profile(input_json, compiler): if not success: raise ValueError("JSON parsing failed") action_result, cpp_action = parse_json_profile_entry_action(action) + if not action_result['success']: + raise ValueError("JSON parsing failed") for branch, result in action_result.items(): if result: result_ret[branch] = True @@ -1186,7 +1188,11 @@ def main(): if_gfxip_body = indent(if_asic_group_dict[title] + if_asic_generic_dict[title]) else: if_gfxip_body = indent(if_asic_group_dict[title]) - if_gfxip = CONDITION_GFX_IP.replace("%Gfxip%", gfxip[0].upper() + gfxip[1:]) + if gfxip == "gfxIp11_0": + # Use the IsGfx11 method instead of the explicit CONDITION_GFX_IP. + if_gfxip = CONDITION_GFX_IP_11 + else: + if_gfxip = CONDITION_GFX_IP.replace("%Gfxip%", gfxip[0].upper() + gfxip[1:]) if_gfxip = if_gfxip.replace("%Defs%", if_gfxip_body) if gfxip in BuildTypesTemplate: if_gfxip = wrap_with_directive(if_gfxip, BuildTypesTemplate[gfxip]) diff --git a/icd/tools/generate/shaderProfileTemplate.py b/icd/tools/generate/shaderProfileTemplate.py index c2bab504..b2da7372 100644 --- a/icd/tools/generate/shaderProfileTemplate.py +++ b/icd/tools/generate/shaderProfileTemplate.py @@ -417,6 +417,12 @@ class JsonOutputStream; } """ +CONDITION_GFX_IP_11 = """if (IsGfx11(gfxIpLevel)) +{ +%Defs%\ +} +""" + CONDITION_ASIC = """if (asicRevision == Pal::AsicRevision::%Asic%) { SetAppProfile%FuncName%(pPipelineProfile);