From 9478a913fc7d2ceb4b6eae84bb87fa3e64f08521 Mon Sep 17 00:00:00 2001 From: wenqinli Date: Wed, 18 May 2022 10:37:29 +0800 Subject: [PATCH] Update xgl from commit: 5a1db64d * BasemarkGPU1.2: amdvlk performs 5-10% lower than RADV * Force default sample pattern with preBindDefaultState setting * Fix angle failures: add support for flag lowZplanePolyOffsetBits * Consistently touch vk_physical_device.cpp on each build * Change setting default to disable acquire-release interface * Update Khronos Vulkan Headers to 1.3.212 * Fix for blit-copy test failures on gfx9 * Add missing CmdSetMsaaQuadSamplePattern calls * Update PAL Version to 729 * Re-enable shader prefetching * move m_sharedCmdAllocator to be a bit in m_flags * Break Barrier infos into chunks and call PAL multiple times * Disable fragmentShadingRateWithShaderSampleMask --- cmake/XglCompileDefinitions.cmake | 6 - cmake/XglOptions.cmake | 15 +- cmake/XglOverrides.cmake | 26 +- icd/CMakeLists.txt | 24 +- icd/Loader/LunarG/Lnx/amd-icd.json | 4 +- icd/api/app_profile.cpp | 6 - .../llpc/gfxIp10_3/generic/DiRT4/profile.json | 17 + .../generic/WarHammerII/profile.json | 14 + icd/api/compiler_solution_llpc.cpp | 5 +- icd/api/devmode/devmode_mgr.cpp | 199 +---- icd/api/devmode/devmode_mgr.h | 16 - icd/api/graphics_pipeline_common.cpp | 575 ++++++++++--- icd/api/include/app_profile.h | 1 + icd/api/include/compiler_solution.h | 4 + icd/api/include/compiler_solution_llpc.h | 1 + icd/api/include/graphics_pipeline_common.h | 53 +- .../vk_video/vulkan_video_codec_h264std.h | 101 ++- .../vulkan_video_codec_h264std_decode.h | 17 +- .../vulkan_video_codec_h264std_encode.h | 49 +- .../vk_video/vulkan_video_codec_h265std.h | 235 +++--- .../vulkan_video_codec_h265std_decode.h | 16 +- .../vulkan_video_codec_h265std_encode.h | 65 +- .../khronos/sdk-1.3/vulkan/vulkan_beta.h | 163 ++-- .../khronos/sdk-1.3/vulkan/vulkan_core.h | 148 +++- icd/api/include/pipeline_compiler.h | 7 +- icd/api/include/vk_cmd_pool.h | 11 +- icd/api/include/vk_cmdbuffer.h | 32 +- icd/api/include/vk_conv.h | 11 +- icd/api/include/vk_descriptor_set.h | 15 +- icd/api/include/vk_descriptor_set_layout.h | 2 + .../include/vk_descriptor_update_template.h | 54 +- icd/api/include/vk_device.h | 7 +- icd/api/include/vk_extensions.h | 6 +- .../include/vk_graphics_pipeline_library.h | 102 +++ icd/api/include/vk_image_view.h | 1 + icd/api/include/vk_physical_device.h | 9 +- icd/api/include/vk_pipeline.h | 2 +- icd/api/include/vk_pipeline_layout.h | 67 +- icd/api/include/vk_utils.h | 5 +- icd/api/pipeline_binary_cache.cpp | 25 +- icd/api/pipeline_compiler.cpp | 318 +++++-- icd/api/strings/extensions.txt | 2 + icd/api/vk_cmd_pool.cpp | 60 +- icd/api/vk_cmdbuffer.cpp | 790 +++++++++++++----- icd/api/vk_conv.cpp | 14 +- icd/api/vk_descriptor_pool.cpp | 1 - icd/api/vk_descriptor_set.cpp | 121 ++- icd/api/vk_descriptor_set_layout.cpp | 11 + icd/api/vk_descriptor_update_template.cpp | 82 +- icd/api/vk_device.cpp | 26 +- icd/api/vk_event.cpp | 7 +- icd/api/vk_graphics_pipeline.cpp | 40 +- icd/api/vk_graphics_pipeline_library.cpp | 644 ++++++++++++++ icd/api/vk_image.cpp | 14 +- icd/api/vk_image_view.cpp | 11 +- icd/api/vk_instance.cpp | 5 +- icd/api/vk_memory.cpp | 4 +- icd/api/vk_physical_device.cpp | 77 +- icd/api/vk_pipeline.cpp | 10 +- icd/api/vk_pipeline_layout.cpp | 608 +++++++------- icd/api/vk_queue.cpp | 8 +- icd/api/vk_shader.cpp | 3 +- icd/api/vk_utils.cpp | 49 ++ icd/make/importdefs | 2 +- icd/res/ver.h | 4 +- icd/settings/settings.cpp | 91 +- icd/settings/settings.h | 3 +- icd/settings/settings_xgl.json | 85 +- icd/tools/generate/genShaderProfile.py | 5 +- icd/tools/generate/shaderProfileTemplate.py | 16 + 70 files changed, 3480 insertions(+), 1747 deletions(-) create mode 100644 icd/api/appopt/shader_profiles/llpc/gfxIp10_3/generic/DiRT4/profile.json create mode 100644 icd/api/appopt/shader_profiles/llpc/gfxIp10_3/generic/WarHammerII/profile.json create mode 100644 icd/api/include/vk_graphics_pipeline_library.h create mode 100644 icd/api/vk_graphics_pipeline_library.cpp create mode 100644 icd/api/vk_utils.cpp diff --git a/cmake/XglCompileDefinitions.cmake b/cmake/XglCompileDefinitions.cmake index e531dcfb..95cc6ab7 100644 --- a/cmake/XglCompileDefinitions.cmake +++ b/cmake/XglCompileDefinitions.cmake @@ -89,12 +89,6 @@ macro(xgl_set_compile_definitions) target_compile_definitions(xgl PRIVATE ICD_ENABLE_GCOV) endif() -#if VKI_EXT_EXTENDED_DYNAMIC_STATE2 - if(VKI_EXT_EXTENDED_DYNAMIC_STATE2) - target_compile_definitions(xgl PRIVATE VKI_EXT_EXTENDED_DYNAMIC_STATE2) - endif() -#endif - #if VKI_GPU_DECOMPRESS if(VKI_GPU_DECOMPRESS) target_compile_definitions(xgl PRIVATE VKI_GPU_DECOMPRESS) diff --git a/cmake/XglOptions.cmake b/cmake/XglOptions.cmake index 4ab67069..88e10765 100644 --- a/cmake/XglOptions.cmake +++ b/cmake/XglOptions.cmake @@ -54,24 +54,11 @@ macro(xgl_options) option(XGL_BUILD_CACHE_CREATOR "Build cache-creator tools?" OFF) -#if VKI_EXT_EXTENDED_DYNAMIC_STATE2 - option(VKI_EXT_EXTENDED_DYNAMIC_STATE2 "Build vulkan with EXT_EXTENDED_DYNAMIC_STATE2" OFF) -#endif - -#if VKI_KHR_SHADER_SUBGROUP_EXTENDED_TYPES - option(VKI_KHR_SHADER_SUBGROUP_EXTENDED_TYPES "Build vulkan with KHR_SHADER_SUBGROUP_EXTENDED_TYPES" OFF) -#endif - #if VKI_GPU_DECOMPRESS option(VKI_GPU_DECOMPRESS "Build vulkan with GPU_DECOMPRESS" OFF) #endif -#if VKI_EXT_EXTENDED_DYNAMIC_STATE - option(VKI_EXT_EXTENDED_DYNAMIC_STATE "Build vulkan with EXTENDED_DYNAMIC_STATE extention" OFF) -#endif - - option(ICD_BUILD_LLPC "Build LLPC?" ON) - + option(ICD_BUILD_LLPC "Build LLPC?" ON) option(ICD_BUILD_LLPCONLY "Build LLPC Only?" OFF) option(XGL_LLVM_UPSTREAM "Build with upstreamed LLVM?" OFF) diff --git a/cmake/XglOverrides.cmake b/cmake/XglOverrides.cmake index 436dbaee..b267ebd8 100644 --- a/cmake/XglOverrides.cmake +++ b/cmake/XglOverrides.cmake @@ -190,28 +190,26 @@ macro(xgl_overrides_vkgc) ### For LLPC ########################################################################################################## set(LLPC_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_LLPC_CLIENT_MAJOR_VERSION} CACHE STRING "${PROJECT_NAME} override." FORCE) - if(ICD_BUILD_LLPC OR ICD_BUILD_SPVONLY) - set(LLPC_BUILD_TESTS ${XGL_BUILD_TESTS} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_TESTS ${XGL_BUILD_TESTS} CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_LIT ${XGL_BUILD_LIT} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_LIT ${XGL_BUILD_LIT} CACHE BOOL "${PROJECT_NAME} override." FORCE) - if(XGL_BUILD_LIT) - message(DEPRECATION "XGL_BUILD_LIT is deprecated, use XGL_BUILD_TESTS instead") - endif() - set(LLPC_BUILD_NAVI12 ${XGL_BUILD_NAVI12} CACHE BOOL "${PROJECT_NAME} override." FORCE) + if(XGL_BUILD_LIT) + message(DEPRECATION "XGL_BUILD_LIT is deprecated, use XGL_BUILD_TESTS instead") + endif() + set(LLPC_BUILD_NAVI12 ${XGL_BUILD_NAVI12} CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_NAVI22 ${XGL_BUILD_NAVI22} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_NAVI22 ${XGL_BUILD_NAVI22} CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_NAVI23 ${XGL_BUILD_NAVI23} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_NAVI23 ${XGL_BUILD_NAVI23} CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_NAVI24 ${XGL_BUILD_NAVI24} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_NAVI24 ${XGL_BUILD_NAVI24} CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_RAVEN2 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_RAVEN2 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_BUILD_VEGA20 ${XGL_BUILD_VEGA20} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_VEGA20 ${XGL_BUILD_VEGA20} CACHE BOOL "${PROJECT_NAME} override." FORCE) - set(LLPC_ENABLE_WERROR ${ICD_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) - endif() + set(LLPC_ENABLE_WERROR ${ICD_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) endmacro() diff --git a/icd/CMakeLists.txt b/icd/CMakeLists.txt index 6b593fe2..028dfd88 100644 --- a/icd/CMakeLists.txt +++ b/icd/CMakeLists.txt @@ -118,6 +118,7 @@ target_sources(xgl PRIVATE api/vk_physical_device.cpp api/vk_physical_device_manager.cpp api/vk_graphics_pipeline.cpp + api/vk_graphics_pipeline_library.cpp api/vk_image.cpp api/vk_image_view.cpp api/vk_instance.cpp @@ -137,6 +138,7 @@ target_sources(xgl PRIVATE api/vk_surface.cpp api/vk_gpa_session.cpp api/vk_descriptor_update_template.cpp + api/vk_utils.cpp api/appopt/barrier_filter_layer.cpp api/appopt/strange_brigade_layer.cpp api/appopt/g_shader_profile.cpp @@ -163,21 +165,15 @@ if(ICD_BUILD_LLPC) ) endif() -# vk_physical_device.cpp uses the __DATE__ and __TIME__ macros to generate a pipelineCacheUUID. -# The following rule forces vk_physical_device.cpp to be re-compiled on every build, so that -# an up-to-date time/date is always used regardless of which files were touched since the last build. +# vk_utils.cpp uses the __DATE__ and __TIME__ macros to generate a pipelineCacheUUID. The following +# rule forces vk_utils.cpp to be re-compiled on every build, so that an up-to-date time/date +# is always used regardless of which files were touched since the last build. add_custom_command( - OUTPUT "vk_physical_device.g" - COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_SOURCE_DIR}/api/vk_physical_device.cpp + TARGET xgl PRE_BUILD + COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_SOURCE_DIR}/api/vk_utils.cpp + COMMENT "Touching vk_utils.cpp" ) -add_custom_target( - RebuildVkPhysicalDevice - DEPENDS "vk_physical_device.g" -) - -add_dependencies(xgl RebuildVkPhysicalDevice) - ### ICD Auto-generated Shader Profiles Files ################################## # ICD_GENDIR Path to the code generation tools set(ICD_GENDIR ${CMAKE_CURRENT_SOURCE_DIR}/tools/generate) @@ -318,11 +314,11 @@ if (UNIX) endif() endif() - target_link_libraries(xgl PRIVATE + target_link_options(xgl PRIVATE -Wl,-Bdynamic -Wl,-z,noexecstack -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/make/amdicd.so.def - -Wl,-Map=${ICD_TARGET}.map + -Wl,-Map=$/${ICD_TARGET}.map -Wl,-soname=${ICD_TARGET}.so.1 ) diff --git a/icd/Loader/LunarG/Lnx/amd-icd.json b/icd/Loader/LunarG/Lnx/amd-icd.json index 17658423..5a86b23d 100644 --- a/icd/Loader/LunarG/Lnx/amd-icd.json +++ b/icd/Loader/LunarG/Lnx/amd-icd.json @@ -2,13 +2,13 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.208" + "api_version": "1.3.212" }, "layer": { "name": "VK_LAYER_AMD_switchable_graphics_@ISABITS@", "type": "GLOBAL", "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.208", + "api_version": "1.3.212", "implementation_version": "1", "description": "AMD switchable graphics layer", "functions": { diff --git a/icd/api/app_profile.cpp b/icd/api/app_profile.cpp index 919e0c20..3dfabf93 100644 --- a/icd/api/app_profile.cpp +++ b/icd/api/app_profile.cpp @@ -1049,12 +1049,6 @@ char* StringToLower(const char* pString, size_t strLength) AppProfile ScanApplicationProfile( const VkInstanceCreateInfo& instanceInfo) { - // You can uncomment these if you need to add new hashes for specific strings (which is - // hopefully never). DON'T LEAVE THIS UNCOMMENTED: - // - //Util::MetroHash::Hash hash = {}; - //Util::MetroHash128::Hash(reinterpret_cast(pTestPattern), strlen(pTestPattern), hash.bytes); - AppProfile profile = AppProfile::Default; // Generate hashes for all of the tested pattern entries diff --git a/icd/api/appopt/shader_profiles/llpc/gfxIp10_3/generic/DiRT4/profile.json b/icd/api/appopt/shader_profiles/llpc/gfxIp10_3/generic/DiRT4/profile.json new file mode 100644 index 00000000..d0c299a3 --- /dev/null +++ b/icd/api/appopt/shader_profiles/llpc/gfxIp10_3/generic/DiRT4/profile.json @@ -0,0 +1,17 @@ +{ + "entries": [ + { + "pattern": { + "cs": { + "stageActive": true, + "codeHash": "0xe4eb5ea3fb70eeb7 2989e44ba02788a8" + } + }, + "action": { + "cs": { + "waveSize": 32 + } + } + } + ] +} \ No newline at end of file diff --git a/icd/api/appopt/shader_profiles/llpc/gfxIp10_3/generic/WarHammerII/profile.json b/icd/api/appopt/shader_profiles/llpc/gfxIp10_3/generic/WarHammerII/profile.json new file mode 100644 index 00000000..58ac90d0 --- /dev/null +++ b/icd/api/appopt/shader_profiles/llpc/gfxIp10_3/generic/WarHammerII/profile.json @@ -0,0 +1,14 @@ +{ + "entries": [ + { + "pattern": { + "always": true + }, + "action": { + "ps": { + "waveSize": 32 + } + } + } + ] +} \ No newline at end of file diff --git a/icd/api/compiler_solution_llpc.cpp b/icd/api/compiler_solution_llpc.cpp index 5cb5ad0a..21c5863a 100644 --- a/icd/api/compiler_solution_llpc.cpp +++ b/icd/api/compiler_solution_llpc.cpp @@ -118,6 +118,7 @@ VkResult CompilerSolutionLlpc::BuildShaderModule( VkShaderModuleCreateFlags flags, size_t codeSize, const void* pCode, + const bool adaptForFaskLink, ShaderModuleHandle* pShaderModule, const Util::MetroHash::Hash& hash) { @@ -461,10 +462,6 @@ VkResult CompilerSolutionLlpc::CreateLlpcCompiler( // Identify for Icd and stanalone compiler llpcOptions[numOptions++] = Llpc::VkIcdName; - // Enable shadow descriptor table - Pal::DeviceProperties info; - m_pPhysicalDevice->PalDevice()->GetProperties(&info); - // LLPC log options llpcOptions[numOptions++] = (settings.enableLog & 1) ? "-enable-errs=1" : "-enable-errs=0"; llpcOptions[numOptions++] = (settings.enableLog & 2) ? "-enable-outs=1" : "-enable-outs=0"; diff --git a/icd/api/devmode/devmode_mgr.cpp b/icd/api/devmode/devmode_mgr.cpp index 67d333f8..a4d2a02e 100644 --- a/icd/api/devmode/devmode_mgr.cpp +++ b/icd/api/devmode/devmode_mgr.cpp @@ -60,10 +60,6 @@ #include "protocols/ddPipelineUriService.h" #include "protocols/ddEventServer.h" -#if VKI_GPUOPEN_PROTOCOL_ETW_CLIENT -#include "protocols/etwClient.h" -#endif - namespace vk { @@ -316,9 +312,6 @@ DevModeMgr::DevModeMgr(Instance* pInstance) m_pDevDriverServer(pInstance->PalPlatform()->GetDevDriverServer()), m_pRGPServer(nullptr), m_pPipelineUriService(nullptr), -#if VKI_GPUOPEN_PROTOCOL_ETW_CLIENT - m_pEtwClient(nullptr), -#endif m_finalized(false), m_triggerMode(TriggerMode::Present), m_numPrepFrames(0), @@ -610,93 +603,6 @@ Pal::Result DevModeMgr::TraceEndingToIdleStep(TraceState* pState) (pState->pBeginFence->GetStatus() != Pal::Result::NotReady) && // "Trace begin" cmdbuf has retired (pState->pEndFence->GetStatus() != Pal::Result::NotReady)) // "Trace end" cmdbuf has retired { -#if VKI_GPUOPEN_PROTOCOL_ETW_CLIENT - // Process ETW events if we have a connected client. - if (m_pEtwClient != nullptr) - { - // Disable tracing on the ETW client. - size_t numGpuEvents = 0; - DevDriver::Result devDriverResult = m_pEtwClient->DisableTracing(&numGpuEvents); - - // Inject any external signal and wait events if we have any. - if ((devDriverResult == DevDriver::Result::Success) && (numGpuEvents > 0)) - { - // Allocate memory for the gpu events. - DevDriver::GpuEvent* pGpuEvents = reinterpret_cast(m_pInstance->AllocMem( - sizeof(DevDriver::GpuEvent) * numGpuEvents, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE)); - if (pGpuEvents == nullptr) - { - devDriverResult = DevDriver::Result::InsufficientMemory; - } - else if (devDriverResult == DevDriver::Result::Success) - { - devDriverResult = m_pEtwClient->GetTraceData(pGpuEvents, numGpuEvents); - } - - if (devDriverResult == DevDriver::Result::Success) - { - for (uint32_t eventIndex = 0; eventIndex < static_cast(numGpuEvents); ++eventIndex) - { - Pal::Result palResult = Pal::Result::Success; - - const DevDriver::GpuEvent* pGpuEvent = &pGpuEvents[eventIndex]; - if (pGpuEvent->type == DevDriver::GpuEventType::QueueSignal) - { - GpuUtil::TimedQueueSemaphoreInfo signalInfo = {}; - signalInfo.semaphoreID = pGpuEvent->queue.fenceObject; - - palResult = - pState->pGpaSession->ExternalTimedSignalQueueSemaphore( - pGpuEvent->queue.contextIdentifier, - pGpuEvent->submissionTime, - pGpuEvent->completionTime, - signalInfo); - } - else if (pGpuEvent->type == DevDriver::GpuEventType::QueueWait) - { - GpuUtil::TimedQueueSemaphoreInfo waitInfo = {}; - waitInfo.semaphoreID = pGpuEvent->queue.fenceObject; - - palResult = - pState->pGpaSession->ExternalTimedWaitQueueSemaphore( - pGpuEvent->queue.contextIdentifier, - pGpuEvent->submissionTime, - pGpuEvent->completionTime, - waitInfo); - } - - // Traces sometimes capture events that don't belong to an API level queue. - // In that case, PAL will return ErrorIncompatibleQueue which means we should ignore - // the event. If we get a result that's not incompatible queue or success, then treat it - // as an error and break out of the loop. - if ((palResult != Pal::Result::ErrorIncompatibleQueue) && - (palResult != Pal::Result::Success)) - { - devDriverResult = DevDriver::Result::Error; - break; - } - } - } - - // Free the memory for the gpu events. - if (pGpuEvents != nullptr) - { - m_pInstance->FreeMem(pGpuEvents); - pGpuEvents = nullptr; - } - } - - // Throw an assert and clean up the etw client if we fail to capture gpu events. - // It's not a critical error though so it shouldn't abort the trace. - if (devDriverResult != DevDriver::Result::Success) - { - VK_ASSERT(false); - CleanupEtwClient(); - } - } -#endif - bool success = false; // Fetch required trace data size from GPA session @@ -1037,7 +943,7 @@ Pal::Result DevModeMgr::TracePendingToPreparingStep( info.flags.enableQueueTiming = pState->queueTimingEnabled; info.flags.enableSampleUpdates = m_enableSampleUpdates; - info.flags.useInternalQueueSemaphoreTiming = settings.devModeSemaphoreQueueTimingEnable; + info.flags.useInternalQueueSemaphoreTiming = true; result = pState->pGpaSession->Begin(info); } @@ -1169,24 +1075,6 @@ Pal::Result DevModeMgr::TracePendingToPreparingStep( result = pBeginCmdBuf->End(); } -#if VKI_GPUOPEN_PROTOCOL_ETW_CLIENT - if (result == Pal::Result::Success) - { - // Enable tracing on the ETW client if it's connected. - if (m_pEtwClient != nullptr) - { - const DevDriver::Result devDriverResult = m_pEtwClient->EnableTracing(Util::GetIdOfCurrentProcess()); - - // If an error occurs, cleanup the ETW client so it doesn't continue to cause issues for future traces. - if (devDriverResult != DevDriver::Result::Success) - { - VK_ASSERT(false); - CleanupEtwClient(); - } - } - } -#endif - // Reset the trace-begin fence if (result == Pal::Result::Success) { @@ -1661,15 +1549,6 @@ void DevModeMgr::FinishOrAbortTrace( pState->pGpaSession->Reset(); } -#if VKI_GPUOPEN_PROTOCOL_ETW_CLIENT - // Disable tracing on the ETW client if it exists. - if (aborted && m_pEtwClient != nullptr) - { - DevDriver::Result devDriverResult = m_pEtwClient->DisableTracing(nullptr); - VK_ASSERT(devDriverResult == DevDriver::Result::Success); - } -#endif - // Reset tracing state to idle pState->preparedFrameCount = 0; pState->sqttFrameCount = 0; @@ -2306,18 +2185,6 @@ Pal::Result DevModeMgr::InitRGPTracing( result = Pal::Result::ErrorInitializationFailed; } -#if VKI_GPUOPEN_PROTOCOL_ETW_CLIENT - // Attempt to initialize the ETW client for use with RGP traces. This might fail if there's no ETW server - // available on the message bus. Failure just means that we won't be able to get extra information about - // queue signal and wait events. Just trigger an assert in the case of failure. - if ((result == Pal::Result::Success) && - (settings.devModeSemaphoreQueueTimingEnable == false)) - { - Pal::Result etwInitResult = InitEtwClient(); - VK_ASSERT(etwInitResult == Pal::Result::Success); - } -#endif - if (result != Pal::Result::Success) { // If we've failed to initialize tracing, permanently disable traces @@ -2628,70 +2495,6 @@ void DevModeMgr::StopInstructionTrace( } } -#if VKI_GPUOPEN_PROTOCOL_ETW_CLIENT -Pal::Result DevModeMgr::InitEtwClient() -{ - // We should never have a valid etw client pointer already. - VK_ASSERT(m_pEtwClient == nullptr); - - Pal::Result result = Pal::Result::Success; - - void* pStorage = m_pInstance->AllocMem(sizeof(DevDriver::ETWProtocol::ETWClient), - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - - // Attempt to create an ETW client for rgp traces. - if (pStorage != nullptr) - { - m_pEtwClient = VK_PLACEMENT_NEW(pStorage) - DevDriver::ETWProtocol::ETWClient(m_pDevDriverServer->GetMessageChannel()); - } - else - { - result = Pal::Result::ErrorOutOfMemory; - } - - // Attempt to locate an ETW server. - DevDriver::ClientId etwProviderId = DevDriver::kBroadcastClientId; - - if (result == Pal::Result::Success) - { - DevDriver::ClientMetadata filter = {}; - filter.clientType = DevDriver::Component::Server; - filter.protocols.etw = 1; - - result = - DevDriverToPalResult(m_pDevDriverServer->GetMessageChannel()->FindFirstClient(filter, &etwProviderId)); - } - - // Connect to the server - if (result == Pal::Result::Success) - { - result = DevDriverToPalResult(m_pEtwClient->Connect(etwProviderId)); - } - - if ((result != Pal::Result::Success) && (m_pEtwClient != nullptr)) - { - Util::Destructor(m_pEtwClient); - m_pInstance->FreeMem(m_pEtwClient); - m_pEtwClient = nullptr; - } - - return result; -} - -void DevModeMgr::CleanupEtwClient() -{ - if (m_pEtwClient != nullptr) - { - m_pEtwClient->Disconnect(); - - Util::Destructor(m_pEtwClient); - m_pInstance->FreeMem(m_pEtwClient); - m_pEtwClient = nullptr; - } -} -#endif - // ===================================================================================================================== // Registers a pipeline binary cache object with the pipeline URI service and initializes the pipeline URI service // the first time a pipeline binary cache object is registered diff --git a/icd/api/devmode/devmode_mgr.h b/icd/api/devmode/devmode_mgr.h index 1b8ff76a..10585df5 100644 --- a/icd/api/devmode/devmode_mgr.h +++ b/icd/api/devmode/devmode_mgr.h @@ -86,13 +86,6 @@ namespace RGPProtocol { class RGPServer; } - -#if VKI_GPUOPEN_PROTOCOL_ETW_CLIENT -namespace ETWProtocol -{ -class ETWClient; -} -#endif } // Vulkan forward declarations @@ -312,19 +305,10 @@ class DevModeMgr TraceQueueState* FindTraceQueueState(TraceState* pState, const Queue* pQueue); bool QueueSupportsTiming(uint32_t deviceIdx, const Queue* pQueue); -#if VKI_GPUOPEN_PROTOCOL_ETW_CLIENT - Pal::Result InitEtwClient(); - void CleanupEtwClient(); -#endif - Instance* m_pInstance; DevDriver::DevDriverServer* m_pDevDriverServer; DevDriver::RGPProtocol::RGPServer* m_pRGPServer; DevDriver::PipelineUriService* m_pPipelineUriService; -#if VKI_GPUOPEN_PROTOCOL_ETW_CLIENT - DevDriver::ETWProtocol::ETWClient* m_pEtwClient; // ETW client pointer used to collect gpu - // events for RGP -#endif Util::Mutex m_traceMutex; TraceState m_trace; bool m_finalized; diff --git a/icd/api/graphics_pipeline_common.cpp b/icd/api/graphics_pipeline_common.cpp index 55f5029d..866fb294 100644 --- a/icd/api/graphics_pipeline_common.cpp +++ b/icd/api/graphics_pipeline_common.cpp @@ -27,9 +27,12 @@ #include "include/vk_cmdbuffer.h" #include "include/vk_device.h" #include "include/vk_graphics_pipeline.h" +#include "include/vk_graphics_pipeline_library.h" #include "include/vk_pipeline_layout.h" #include "include/vk_render_pass.h" +#include "palVectorImpl.h" + namespace vk { @@ -308,10 +311,37 @@ static uint32_t GetColorAttachmentCount( 0u; } +// ===================================================================================================================== +static VkShaderStageFlagBits GetLibraryActiveShaderStages( + const VkGraphicsPipelineLibraryFlagsEXT libFlags) +{ + constexpr VkShaderStageFlagBits PrsActiveStageMask = + static_cast( + VK_SHADER_STAGE_VERTEX_BIT | + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | + VK_SHADER_STAGE_GEOMETRY_BIT); + constexpr VkShaderStageFlagBits FgsActiveStageMask = + static_cast(VK_SHADER_STAGE_FRAGMENT_BIT); + + VkShaderStageFlagBits activeStageMask = static_cast(0); + + if (libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) + { + activeStageMask = static_cast(activeStageMask | PrsActiveStageMask); + } + if (libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) + { + activeStageMask = static_cast(activeStageMask | FgsActiveStageMask); + } + + return activeStageMask; +} + // ===================================================================================================================== VkShaderStageFlagBits GraphicsPipelineCommon::GetActiveShaderStages( - const VkGraphicsPipelineCreateInfo* pGraphicsPipelineCreateInfo - ) + const VkGraphicsPipelineCreateInfo* pGraphicsPipelineCreateInfo, + const GraphicsPipelineLibraryInfo* pLibInfo) { VK_ASSERT(pGraphicsPipelineCreateInfo != nullptr); @@ -322,16 +352,85 @@ VkShaderStageFlagBits GraphicsPipelineCommon::GetActiveShaderStages( activeStages = static_cast(activeStages | pGraphicsPipelineCreateInfo->pStages[i].stage); } + VkShaderStageFlagBits activeStageMask = GetLibraryActiveShaderStages(pLibInfo->libFlags); + + activeStages = static_cast(activeStages & activeStageMask); + + const GraphicsPipelineLibrary* libraries[] = { pLibInfo->pPreRasterizationShaderLib, pLibInfo->pFragmentShaderLib }; + + for (uint32_t i = 0; i < Util::ArrayLen(libraries); ++i) + { + if (libraries[i] != nullptr) + { + const VkShaderStageFlagBits libShaderStages = + libraries[i]->GetPipelineObjectCreateInfo().activeStages; + + const VkShaderStageFlagBits libActiveStageMask = + GetLibraryActiveShaderStages(libraries[i]->GetLibraryFlags()); + + activeStages = static_cast(activeStages | (libActiveStageMask & libShaderStages)); + } + } + + activeStageMask = static_cast(0); + + if (pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) + { + activeStageMask = static_cast(activeStageMask | + VK_SHADER_STAGE_VERTEX_BIT | + VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | + VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | + VK_SHADER_STAGE_GEOMETRY_BIT); + } + if (pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) + { + activeStageMask = static_cast(activeStageMask | VK_SHADER_STAGE_FRAGMENT_BIT); + } + + activeStages = static_cast(activeStages & activeStageMask); + return activeStages; } // ===================================================================================================================== uint32_t GraphicsPipelineCommon::GetDynamicStateFlags( - const VkPipelineDynamicStateCreateInfo* pDy - ) + const VkPipelineDynamicStateCreateInfo* pDy, + const GraphicsPipelineLibraryInfo* pLibInfo) { uint32_t dynamicState = 0; + if (pLibInfo->pVertexInputInterfaceLib != nullptr) + { + const uint32_t libDynamicStates = + ViiDynamicStatesMask & pLibInfo->pVertexInputInterfaceLib->GetDynamicStates(); + + dynamicState |= libDynamicStates; + } + + if (pLibInfo->pPreRasterizationShaderLib != nullptr) + { + const uint32_t libDynamicStates = + PrsDynamicStatesMask & pLibInfo->pPreRasterizationShaderLib->GetDynamicStates(); + + dynamicState |= libDynamicStates; + } + + if (pLibInfo->pFragmentShaderLib != nullptr) + { + const uint32_t libDynamicStates = + FgsDynamicStatesMask & pLibInfo->pFragmentShaderLib->GetDynamicStates(); + + dynamicState |= libDynamicStates; + } + + if (pLibInfo->pFragmentOutputInterfaceLib != nullptr) + { + const uint32_t libDynamicStates = + FoiDynamicStatesMask & pLibInfo->pFragmentOutputInterfaceLib->GetDynamicStates(); + + dynamicState |= libDynamicStates; + } + // The section of the following dynamic states are not defined, so we don't get them from libraries // - VK_DYNAMIC_STATE_WAVE_LIMIT_AMD // - VK_DYNAMIC_STATE_VIEWPORT_W_SCALING_NV (not available) @@ -341,10 +440,14 @@ uint32_t GraphicsPipelineCommon::GetDynamicStateFlags( // Get dynamic states from VkPipelineDynamicStateCreateInfo if (pDy != nullptr) { - const uint32_t viiMask = 0xFFFFFFFF; - const uint32_t prsMask = 0xFFFFFFFF; - const uint32_t fgsMask = 0xFFFFFFFF; - const uint32_t foiMask = 0xFFFFFFFF; + const uint32_t viiMask = + (pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) ? 0xFFFFFFFF : 0; + const uint32_t prsMask = + (pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) ? 0xFFFFFFFF : 0; + const uint32_t fgsMask = + (pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) ? 0xFFFFFFFF : 0; + const uint32_t foiMask = + (pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) ? 0xFFFFFFFF : 0; for (uint32_t i = 0; i < pDy->dynamicStateCount; ++i) { @@ -409,6 +512,9 @@ uint32_t GraphicsPipelineCommon::GetDynamicStateFlags( case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR: dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::FragmentShadingRateStateKhr)); break; + case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT: + dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::DepthWriteEnableExt)); + break; case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT: dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::DepthTestEnableExt)); break; @@ -430,9 +536,6 @@ uint32_t GraphicsPipelineCommon::GetDynamicStateFlags( case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: dynamicState |= foiMask & (1 << static_cast(DynamicStatesInternal::SampleLocationsExt)); break; - case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT: - dynamicState |= foiMask & (1 << static_cast(DynamicStatesInternal::DepthWriteEnableExt)); - break; case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: dynamicState |= foiMask & (1 << static_cast(DynamicStatesInternal::ColorWriteEnableExt)); break; @@ -446,6 +549,119 @@ uint32_t GraphicsPipelineCommon::GetDynamicStateFlags( return dynamicState; } +// ===================================================================================================================== +void GraphicsPipelineCommon::ExtractLibraryInfo( + const VkGraphicsPipelineCreateInfo* pCreateInfo, + GraphicsPipelineLibraryInfo* pLibInfo) +{ + constexpr VkGraphicsPipelineLibraryFlagsEXT GraphicsPipelineLibraryAll = 0 + | VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT + | VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT + | VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT + | VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT; + + EXTRACT_VK_STRUCTURES_1( + gfxPipeline, + GraphicsPipelineLibraryCreateInfoEXT, + PipelineLibraryCreateInfoKHR, + static_cast(pCreateInfo->pNext), + GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT, + PIPELINE_LIBRARY_CREATE_INFO_KHR) + + pLibInfo->flags.isLibrary = (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) ? 1 : 0; + + pLibInfo->flags.optimize = (pCreateInfo->flags & VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT) ? 1 : 0; + + pLibInfo->libFlags = + (pLibInfo->flags.isLibrary == false) ? GraphicsPipelineLibraryAll : + (pGraphicsPipelineLibraryCreateInfoEXT == nullptr) ? 0 : pGraphicsPipelineLibraryCreateInfoEXT->flags; + + pLibInfo->pVertexInputInterfaceLib = nullptr; + pLibInfo->pPreRasterizationShaderLib = nullptr; + pLibInfo->pFragmentShaderLib = nullptr; + pLibInfo->pFragmentOutputInterfaceLib = nullptr; + + if (pPipelineLibraryCreateInfoKHR != nullptr) + { + for (uint32_t i = 0; i < pPipelineLibraryCreateInfoKHR->libraryCount; ++i) + { + const GraphicsPipelineLibrary* pPipelineLib = + GraphicsPipelineLibrary::ObjectFromHandle(pPipelineLibraryCreateInfoKHR->pLibraries[i]); + + if (pPipelineLib != nullptr) + { + VkGraphicsPipelineLibraryFlagsEXT linkLibFlags = pPipelineLib->GetLibraryFlags(); + + if (linkLibFlags & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) + { + VK_ASSERT(pLibInfo->pVertexInputInterfaceLib == nullptr); + pLibInfo->pVertexInputInterfaceLib = pPipelineLib; + pLibInfo->libFlags &= ~VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT; + } + + if (linkLibFlags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) + { + VK_ASSERT(pLibInfo->pPreRasterizationShaderLib == nullptr); + pLibInfo->pPreRasterizationShaderLib = pPipelineLib; + pLibInfo->libFlags &= ~VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT; + } + + if (linkLibFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) + { + VK_ASSERT(pLibInfo->pFragmentShaderLib == nullptr); + pLibInfo->pFragmentShaderLib = pPipelineLib; + pLibInfo->libFlags &= ~VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT; + } + + if (linkLibFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) + { + VK_ASSERT(pLibInfo->pFragmentOutputInterfaceLib == nullptr); + pLibInfo->pFragmentOutputInterfaceLib = pPipelineLib; + pLibInfo->libFlags &= ~VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT; + } + } + } + } +} + +// ===================================================================================================================== +bool GraphicsPipelineCommon::NeedBuildPipelineBinary( + const GraphicsPipelineLibraryInfo* pLibInfo, + const bool enableRasterization) +{ + bool result = false; + + if (pLibInfo->flags.isLibrary == false) + { + result = true; + } + else if (pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) + { + result = true; + } + else if ((enableRasterization == true) && + (pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)) + { + result = true; + } + else if (pLibInfo->flags.optimize) + { + if ((pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) && + (pLibInfo->pPreRasterizationShaderLib != nullptr)) + { + result = true; + } + else if ((pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) && + (pLibInfo->pFragmentOutputInterfaceLib != nullptr) && + (enableRasterization == true)) + { + result = true; + } + } + + return result; +} + // ===================================================================================================================== VkResult GraphicsPipelineCommon::Create( Device* pDevice, @@ -456,6 +672,14 @@ VkResult GraphicsPipelineCommon::Create( { VkResult result; + const bool isLibrary = pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR; + + if (isLibrary) + { + result = GraphicsPipelineLibrary::Create( + pDevice, pPipelineCache, pCreateInfo, pAllocator, pPipeline); + } + else { result = GraphicsPipeline::Create( pDevice, pPipelineCache, pCreateInfo, pAllocator, pPipeline); @@ -464,6 +688,125 @@ VkResult GraphicsPipelineCommon::Create( return result; } +// ===================================================================================================================== +static void CopyVertexInputInterfaceState( + const GraphicsPipelineLibrary* pLibrary, + GraphicsPipelineObjectCreateInfo* pInfo) +{ + const GraphicsPipelineObjectCreateInfo& libInfo = pLibrary->GetPipelineObjectCreateInfo(); + + pInfo->pipeline.iaState = libInfo.pipeline.iaState; + + pInfo->immedInfo.inputAssemblyState = libInfo.immedInfo.inputAssemblyState; + + pInfo->staticStateMask |= (libInfo.staticStateMask & ViiDynamicStatesMask); +} + +// ===================================================================================================================== +static void CopyPreRasterizationShaderState( + const GraphicsPipelineLibrary* pLibrary, + GraphicsPipelineObjectCreateInfo* pInfo) +{ + const GraphicsPipelineObjectCreateInfo& libInfo = pLibrary->GetPipelineObjectCreateInfo(); + + pInfo->pipeline.rsState = libInfo.pipeline.rsState; + pInfo->pipeline.viewportInfo = libInfo.pipeline.viewportInfo; + + pInfo->msaa.conservativeRasterizationMode = libInfo.msaa.conservativeRasterizationMode; + pInfo->msaa.flags.enableConservativeRasterization = libInfo.msaa.flags.enableConservativeRasterization; + pInfo->msaa.flags.enableLineStipple = libInfo.msaa.flags.enableLineStipple; + + pInfo->immedInfo.triangleRasterState = libInfo.immedInfo.triangleRasterState; + pInfo->immedInfo.depthBiasParams = libInfo.immedInfo.depthBiasParams; + pInfo->immedInfo.pointLineRasterParams = libInfo.immedInfo.pointLineRasterParams; + pInfo->immedInfo.lineStippleParams = libInfo.immedInfo.lineStippleParams; + pInfo->immedInfo.graphicsShaderInfos.vs = libInfo.immedInfo.graphicsShaderInfos.vs; + pInfo->immedInfo.graphicsShaderInfos.hs = libInfo.immedInfo.graphicsShaderInfos.hs; + pInfo->immedInfo.graphicsShaderInfos.ds = libInfo.immedInfo.graphicsShaderInfos.ds; + pInfo->immedInfo.graphicsShaderInfos.gs = libInfo.immedInfo.graphicsShaderInfos.gs; + pInfo->immedInfo.graphicsShaderInfos.ts = libInfo.immedInfo.graphicsShaderInfos.ts; + pInfo->immedInfo.graphicsShaderInfos.ms = libInfo.immedInfo.graphicsShaderInfos.ms; + pInfo->immedInfo.graphicsShaderInfos.flags = libInfo.immedInfo.graphicsShaderInfos.flags; + pInfo->immedInfo.viewportParams = libInfo.immedInfo.viewportParams; + pInfo->immedInfo.scissorRectParams = libInfo.immedInfo.scissorRectParams; + pInfo->immedInfo.rasterizerDiscardEnable = libInfo.immedInfo.rasterizerDiscardEnable; + + pInfo->flags.bresenhamEnable = libInfo.flags.bresenhamEnable; + + pInfo->staticStateMask |= (libInfo.staticStateMask & PrsDynamicStatesMask); +} + +// ===================================================================================================================== +static void CopyFragmentShaderState( + const GraphicsPipelineLibrary* pLibrary, + GraphicsPipelineObjectCreateInfo* pInfo) +{ + const GraphicsPipelineObjectCreateInfo& libInfo = pLibrary->GetPipelineObjectCreateInfo(); + + pInfo->immedInfo.depthBoundParams = libInfo.immedInfo.depthBoundParams; + pInfo->immedInfo.stencilRefMasks = libInfo.immedInfo.stencilRefMasks; + pInfo->immedInfo.graphicsShaderInfos.ps = libInfo.immedInfo.graphicsShaderInfos.ps; + pInfo->immedInfo.depthStencilCreateInfo.front = libInfo.immedInfo.depthStencilCreateInfo.front; + pInfo->immedInfo.depthStencilCreateInfo.back = libInfo.immedInfo.depthStencilCreateInfo.back; + pInfo->immedInfo.depthStencilCreateInfo.depthFunc = libInfo.immedInfo.depthStencilCreateInfo.depthFunc; + pInfo->immedInfo.depthStencilCreateInfo.depthEnable = libInfo.immedInfo.depthStencilCreateInfo.depthEnable; + pInfo->immedInfo.depthStencilCreateInfo.depthWriteEnable = libInfo.immedInfo.depthStencilCreateInfo.depthWriteEnable; + pInfo->immedInfo.depthStencilCreateInfo.depthBoundsEnable = libInfo.immedInfo.depthStencilCreateInfo.depthBoundsEnable; + pInfo->immedInfo.depthStencilCreateInfo.stencilEnable = libInfo.immedInfo.depthStencilCreateInfo.stencilEnable; + pInfo->immedInfo.vrsRateParams = libInfo.immedInfo.vrsRateParams; + + pInfo->staticStateMask |= (libInfo.staticStateMask & FgsDynamicStatesMask); +} + +// ===================================================================================================================== +static void CopyFragmentOutputInterfaceState( + const GraphicsPipelineLibrary* pLibrary, + GraphicsPipelineObjectCreateInfo* pInfo) +{ + const GraphicsPipelineObjectCreateInfo& libInfo = pLibrary->GetPipelineObjectCreateInfo(); + + pInfo->pipeline.cbState.dualSourceBlendEnable = libInfo.pipeline.cbState.dualSourceBlendEnable; + pInfo->pipeline.cbState.logicOp = libInfo.pipeline.cbState.logicOp; + pInfo->pipeline.cbState.uavExportSingleDraw = libInfo.pipeline.cbState.uavExportSingleDraw; + pInfo->pipeline.cbState.target[0].forceAlphaToOne = libInfo.pipeline.cbState.target[0].forceAlphaToOne; + pInfo->pipeline.cbState.alphaToCoverageEnable = libInfo.pipeline.cbState.alphaToCoverageEnable; + for (uint32_t i = 0; i < MaxColorTargets; ++i) + { + pInfo->pipeline.cbState.target[i].swizzledFormat = libInfo.pipeline.cbState.target[i].swizzledFormat; + pInfo->pipeline.cbState.target[i].channelWriteMask = libInfo.pipeline.cbState.target[i].channelWriteMask; + } + pInfo->pipeline.viewInstancingDesc = libInfo.pipeline.viewInstancingDesc; + + for (uint32_t i = 0; i < Pal::MaxColorTargets; ++i) + { + pInfo->blend.targets[i] = libInfo.blend.targets[i]; + } + + pInfo->msaa.coverageSamples = libInfo.msaa.coverageSamples; + pInfo->msaa.exposedSamples = libInfo.msaa.exposedSamples; + pInfo->msaa.pixelShaderSamples = libInfo.msaa.pixelShaderSamples; + pInfo->msaa.depthStencilSamples = libInfo.msaa.depthStencilSamples; + pInfo->msaa.shaderExportMaskSamples = libInfo.msaa.shaderExportMaskSamples; + pInfo->msaa.sampleMask = libInfo.msaa.sampleMask; + pInfo->msaa.sampleClusters = libInfo.msaa.sampleClusters; + pInfo->msaa.alphaToCoverageSamples = libInfo.msaa.alphaToCoverageSamples; + pInfo->msaa.occlusionQuerySamples = libInfo.msaa.occlusionQuerySamples; + pInfo->msaa.flags.enable1xMsaaSampleLocations = libInfo.msaa.flags.enable1xMsaaSampleLocations; + + pInfo->immedInfo.blendConstParams = libInfo.immedInfo.blendConstParams; + pInfo->immedInfo.samplePattern = libInfo.immedInfo.samplePattern; + + pInfo->sampleCoverage = libInfo.sampleCoverage; + pInfo->flags.customMultiSampleState = libInfo.flags.customMultiSampleState; + pInfo->flags.customSampleLocations = libInfo.flags.customSampleLocations; + pInfo->flags.force1x1ShaderRate = libInfo.flags.force1x1ShaderRate; + pInfo->flags.sampleShadingEnable = libInfo.flags.sampleShadingEnable; + + pInfo->staticStateMask |= (libInfo.staticStateMask & FoiDynamicStatesMask); + + pInfo->dbFormat = libInfo.dbFormat; +} + // ===================================================================================================================== static void BuildRasterizationState( const Device* pDevice, @@ -492,10 +835,11 @@ static void BuildRasterizationState( pInfo->immedInfo.triangleRasterState.cullMode = VkToPalCullMode(pRs->cullMode); pInfo->immedInfo.triangleRasterState.frontFace = VkToPalFaceOrientation(pRs->frontFace); - pInfo->immedInfo.triangleRasterState.flags.depthBiasEnable = pRs->depthBiasEnable; - pInfo->immedInfo.depthBiasParams.depthBias = pRs->depthBiasConstantFactor; - pInfo->immedInfo.depthBiasParams.depthBiasClamp = pRs->depthBiasClamp; - pInfo->immedInfo.depthBiasParams.slopeScaledDepthBias = pRs->depthBiasSlopeFactor; + pInfo->immedInfo.triangleRasterState.flags.frontDepthBiasEnable = pRs->depthBiasEnable; + pInfo->immedInfo.triangleRasterState.flags.backDepthBiasEnable = pRs->depthBiasEnable; + pInfo->immedInfo.depthBiasParams.depthBias = pRs->depthBiasConstantFactor; + pInfo->immedInfo.depthBiasParams.depthBiasClamp = pRs->depthBiasClamp; + pInfo->immedInfo.depthBiasParams.slopeScaledDepthBias = pRs->depthBiasSlopeFactor; if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::RasterizerDiscardEnableExt) == true) { @@ -925,6 +1269,36 @@ static void BuildDepthStencilState( pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::StencilReference); } + if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::DepthWriteEnableExt) == false) + { + pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::DepthWriteEnableExt); + } + + if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::DepthTestEnableExt) == false) + { + pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::DepthTestEnableExt); + } + + if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::DepthCompareOpExt) == false) + { + pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::DepthCompareOpExt); + } + + if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::DepthBoundsTestEnableExt) == false) + { + pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::DepthBoundsTestEnableExt); + } + + if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::StencilTestEnableExt) == false) + { + pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::StencilTestEnableExt); + } + + if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::StencilOpExt) == false) + { + pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::StencilOpExt); + } + pInfo->immedInfo.depthStencilCreateInfo.front.stencilFailOp = VkToPalStencilOp(pDs->front.failOp); pInfo->immedInfo.depthStencilCreateInfo.front.stencilPassOp = VkToPalStencilOp(pDs->front.passOp); pInfo->immedInfo.depthStencilCreateInfo.front.stencilDepthFailOp = VkToPalStencilOp(pDs->front.depthFailOp); @@ -1183,26 +1557,6 @@ static void BuildFragmentShaderState( BuildVrsRateParams(pDevice, pPipelineFragmentShadingRateStateCreateInfoKHR, dynamicStateFlags, pInfo); - if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::DepthTestEnableExt) == false) - { - pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::DepthTestEnableExt); - } - if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::DepthCompareOpExt) == false) - { - pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::DepthCompareOpExt); - } - if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::DepthBoundsTestEnableExt) == false) - { - pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::DepthBoundsTestEnableExt); - } - if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::StencilTestEnableExt) == false) - { - pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::StencilTestEnableExt); - } - if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::StencilOpExt) == false) - { - pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::StencilOpExt); - } } // ===================================================================================================================== @@ -1231,13 +1585,13 @@ static void BuildFragmentOutputInterfaceState( { // Build states via VkPipelineColorBlendStateCreateInfo BuildColorBlendState( - pDevice, - pPipelineRenderingCreateInfoKHR, - pIn->pColorBlendState, - pRenderPass, - subpass, - dynamicStateFlags, - pInfo); + pDevice, + pPipelineRenderingCreateInfoKHR, + pIn->pColorBlendState, + pRenderPass, + subpass, + dynamicStateFlags, + pInfo); } BuildRenderingState(pDevice, @@ -1246,10 +1600,6 @@ static void BuildFragmentOutputInterfaceState( pRenderPass, pInfo); - if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::DepthWriteEnableExt) == false) - { - pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::DepthWriteEnableExt); - } if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::ColorWriteEnableExt) == false) { pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::ColorWriteEnableExt); @@ -1341,12 +1691,10 @@ static void BuildExecutablePipelineState( pInfo->sampleCoverage = 1; - pInfo->immedInfo.samplePattern = {}; + pInfo->immedInfo.samplePattern.sampleCount = 1; + pInfo->immedInfo.samplePattern.locations = *Device::GetDefaultQuadSamplePattern(1); pInfo->flags.sampleShadingEnable = false; - - pInfo->staticStateMask &= - ~(1 << static_cast(DynamicStatesInternal::SampleLocationsExt)); } #if PAL_BUILD_GFX103 @@ -1392,35 +1740,67 @@ void GraphicsPipelineCommon::BuildPipelineObjectCreateInfo( const PipelineLayout* pPipelineLayout, GraphicsPipelineObjectCreateInfo* pInfo) { + GraphicsPipelineLibraryInfo libInfo; + ExtractLibraryInfo(pIn, &libInfo); - pInfo->activeStages = GetActiveShaderStages(pIn - ); + pInfo->activeStages = GetActiveShaderStages(pIn, &libInfo); - uint32_t dynamicStateFlags = GetDynamicStateFlags( - pIn->pDynamicState - ); + uint32_t dynamicStateFlags = GetDynamicStateFlags(pIn->pDynamicState, &libInfo); - BuildVertexInputInterfaceState(pDevice, pIn, pVbInfo, dynamicStateFlags, false, pInfo); + pInfo->dynamicStates = dynamicStateFlags; - BuildPreRasterizationShaderState(pDevice, - pIn, - dynamicStateFlags, - pInfo); + if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) + { + BuildVertexInputInterfaceState( + pDevice, pIn, pVbInfo, dynamicStateFlags, libInfo.flags.isLibrary, pInfo); + } + else if (libInfo.pVertexInputInterfaceLib != nullptr) + { + CopyVertexInputInterfaceState(libInfo.pVertexInputInterfaceLib, pInfo); + } + + if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) + { + BuildPreRasterizationShaderState(pDevice, + pIn, + dynamicStateFlags, + pInfo); + } + else if (libInfo.pPreRasterizationShaderLib != nullptr) + { + CopyPreRasterizationShaderState(libInfo.pPreRasterizationShaderLib, pInfo); + } const bool enableRasterization = + (~libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) || (pInfo->immedInfo.rasterizerDiscardEnable == false) || IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::RasterizerDiscardEnableExt); if (enableRasterization) { - BuildFragmentShaderState(pDevice, - pIn, - dynamicStateFlags, - pInfo); + if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) + { + BuildFragmentShaderState(pDevice, + pIn, + dynamicStateFlags, + pInfo); + } + else if (libInfo.pFragmentShaderLib != nullptr) + { + CopyFragmentShaderState(libInfo.pFragmentShaderLib, pInfo); + } - BuildFragmentOutputInterfaceState(pDevice, pIn, dynamicStateFlags, pInfo); + if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) + { + BuildFragmentOutputInterfaceState(pDevice, pIn, dynamicStateFlags, pInfo); + } + else if (libInfo.pFragmentOutputInterfaceLib != nullptr) + { + CopyFragmentOutputInterfaceState(libInfo.pFragmentOutputInterfaceLib, pInfo); + } } + if (libInfo.flags.isLibrary == false) { BuildExecutablePipelineState(pBinInfo, dynamicStateFlags, pInfo); @@ -1435,27 +1815,6 @@ void GraphicsPipelineCommon::BuildPipelineObjectCreateInfo( } } -// ===================================================================================================================== -// Achieve pipeline layout from VkGraphicsPipelineCreateInfo. -// If the pipeline layout is merged, callee must destroy it manually. -VkResult GraphicsPipelineCommon::AchievePipelineLayout( - const Device* pDevice, - const VkGraphicsPipelineCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - PipelineLayout** ppPipelineLayout, - bool* pIsMerged) -{ - VkResult result = VK_SUCCESS; - - *pIsMerged = false; - - { - *ppPipelineLayout = PipelineLayout::ObjectFromHandle(pCreateInfo->layout); - } - - return result; -} - // ===================================================================================================================== // Build the input information for pipeline compiler to create the graphics pipeline binaries VkResult GraphicsPipelineCommon::BuildPipelineBinaryCreateInfo( @@ -2086,28 +2445,58 @@ uint64_t GraphicsPipelineCommon::BuildApiHash( Util::MetroHash128 baseHasher; Util::MetroHash128 apiHasher; - uint32_t dynamicStateFlags = GetDynamicStateFlags( - pCreateInfo->pDynamicState - ); + GraphicsPipelineLibraryInfo libInfo; + GraphicsPipelineCommon::ExtractLibraryInfo(pCreateInfo, &libInfo); + + uint32_t dynamicStateFlags = GetDynamicStateFlags(pCreateInfo->pDynamicState, &libInfo); baseHasher.Update(pCreateInfo->flags); baseHasher.Update(dynamicStateFlags); const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pCreateInfo->renderPass); - GenerateHashForVertexInputInterfaceState(pCreateInfo, &baseHasher, &apiHasher); + if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) + { + GenerateHashForVertexInputInterfaceState(pCreateInfo, &baseHasher, &apiHasher); + } + else if (libInfo.pVertexInputInterfaceLib != nullptr) + { + baseHasher.Update(libInfo.pVertexInputInterfaceLib->GetApiHash()); + } - GenerateHashForPreRasterizationShadersState(pCreateInfo, pInfo, &baseHasher, &apiHasher); + if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) + { + GenerateHashForPreRasterizationShadersState(pCreateInfo, pInfo, &baseHasher, &apiHasher); + } + else if (libInfo.pPreRasterizationShaderLib != nullptr) + { + baseHasher.Update(libInfo.pPreRasterizationShaderLib->GetApiHash()); + } const bool enableRasterization = + (~libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) || (pInfo->immedInfo.rasterizerDiscardEnable == false) || IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::RasterizerDiscardEnableExt); if (enableRasterization) { - GenerateHashForFragmentShaderState(pCreateInfo, &baseHasher, &apiHasher); + if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) + { + GenerateHashForFragmentShaderState(pCreateInfo, &baseHasher, &apiHasher); + } + else if (libInfo.pFragmentShaderLib != nullptr) + { + baseHasher.Update(libInfo.pFragmentShaderLib->GetApiHash()); + } - GenerateHashForFragmentOutputInterfaceState(pCreateInfo, &baseHasher, &apiHasher); + if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) + { + GenerateHashForFragmentOutputInterfaceState(pCreateInfo, &baseHasher, &apiHasher); + } + else if (libInfo.pFragmentOutputInterfaceLib != nullptr) + { + baseHasher.Update(libInfo.pFragmentOutputInterfaceLib->GetApiHash()); + } } if ((pCreateInfo->flags & VK_PIPELINE_CREATE_DERIVATIVE_BIT) && (pCreateInfo->basePipelineHandle != VK_NULL_HANDLE)) diff --git a/icd/api/include/app_profile.h b/icd/api/include/app_profile.h index 8a6ddd1c..c71d7fe0 100644 --- a/icd/api/include/app_profile.h +++ b/icd/api/include/app_profile.h @@ -45,6 +45,7 @@ namespace Pal namespace vk { class Instance; +class PhysicalDevice; }; namespace vk diff --git a/icd/api/include/compiler_solution.h b/icd/api/include/compiler_solution.h index e68044c3..5eb52689 100644 --- a/icd/api/include/compiler_solution.h +++ b/icd/api/include/compiler_solution.h @@ -94,6 +94,9 @@ struct GraphicsPipelineBinaryCreateInfo PipelineCreationFeedback pipelineFeedback; PipelineCreationFeedback stageFeedback[ShaderStage::ShaderStageGfxCount]; uint32_t rasterizationStream; + VkGraphicsPipelineLibraryFlagsEXT libFlags; // These flags indicate the section(s) included in pipeline + // (library). Including the sections in the referenced + // libraries. PipelineMetadata pipelineMetadata; }; @@ -138,6 +141,7 @@ class CompilerSolution VkShaderModuleCreateFlags flags, size_t codeSize, const void* pCode, + const bool adaptForFaskLink, ShaderModuleHandle* pShaderModule, const Util::MetroHash::Hash& hash) = 0; diff --git a/icd/api/include/compiler_solution_llpc.h b/icd/api/include/compiler_solution_llpc.h index b5539614..06d19d1a 100644 --- a/icd/api/include/compiler_solution_llpc.h +++ b/icd/api/include/compiler_solution_llpc.h @@ -65,6 +65,7 @@ class CompilerSolutionLlpc final : public CompilerSolution VkShaderModuleCreateFlags flags, size_t codeSize, const void* pCode, + const bool adaptForFaskLink, ShaderModuleHandle* pShaderModule, const Util::MetroHash::Hash& hash) override; diff --git a/icd/api/include/graphics_pipeline_common.h b/icd/api/include/graphics_pipeline_common.h index 6018bb31..61928215 100644 --- a/icd/api/include/graphics_pipeline_common.h +++ b/icd/api/include/graphics_pipeline_common.h @@ -37,6 +37,7 @@ namespace vk { +class GraphicsPipelineLibrary; class PipelineCache; class RenderPass; struct PipelineOptimizerKey; @@ -134,6 +135,7 @@ struct GraphicsPipelineObjectCreateInfo uint32_t sampleCoverage; VkShaderStageFlagBits activeStages; VkFormat dbFormat; + uint32_t dynamicStates; union { @@ -162,6 +164,32 @@ struct GraphicsPipelineBinaryInfo const PipelineOptimizerKey* pOptimizerKey; }; +// ===================================================================================================================== +// Graphics pipeline library information extracted from VkGraphicsPipelineCreateInfo +struct GraphicsPipelineLibraryInfo +{ + union + { + struct + { + uint32_t isLibrary : 1; //> Whether the pipeline is a library or is executable + uint32_t optimize : 1; //> Can do link time optimization + uint32_t reserved : 30; + }; + uint32_t value; + } flags; + + VkGraphicsPipelineLibraryFlagsEXT libFlags; //> The sections whose state should be built via + // VkGraphicsPipelineCreateInfo rather than copy from pipeline + // library or be skipped. + + // The referred pipeline libraries for each section. + const GraphicsPipelineLibrary* pVertexInputInterfaceLib; + const GraphicsPipelineLibrary* pPreRasterizationShaderLib; + const GraphicsPipelineLibrary* pFragmentShaderLib; + const GraphicsPipelineLibrary* pFragmentOutputInterfaceLib; +}; + // ===================================================================================================================== // The common part used by both executable graphics pipelines and graphics pipeline libraries class GraphicsPipelineCommon : public Pipeline @@ -177,8 +205,8 @@ class GraphicsPipelineCommon : public Pipeline // Get the active shader stages through API info static VkShaderStageFlagBits GetActiveShaderStages( - const VkGraphicsPipelineCreateInfo* pGraphicsPipelineCreateInfo - ); + const VkGraphicsPipelineCreateInfo* pGraphicsPipelineCreateInfo, + const GraphicsPipelineLibraryInfo* pLibInfo); // Returns true if Dual Source Blending is to be enabled based on the given ColorBlendAttachmentState static bool GetDualSourceBlendEnableState( @@ -200,8 +228,18 @@ class GraphicsPipelineCommon : public Pipeline // Get the dynamics states specified by API info static uint32_t GetDynamicStateFlags( - const VkPipelineDynamicStateCreateInfo* pDy - ); + const VkPipelineDynamicStateCreateInfo* pDy, + const GraphicsPipelineLibraryInfo* pLibInfo); + + // Extract graphics pipeline library related info from VkGraphicsPipelineCreateInfo. + static void ExtractLibraryInfo( + const VkGraphicsPipelineCreateInfo* pCreateInfo, + GraphicsPipelineLibraryInfo* pLibInfo); + + // Check whether pipeline binary will be built + static bool NeedBuildPipelineBinary( + const GraphicsPipelineLibraryInfo* pLibInfo, + const bool enableRasterization); protected: // Convert API information into internal create info used to create internal pipeline binary @@ -225,13 +263,6 @@ class GraphicsPipelineCommon : public Pipeline const PipelineLayout* pPipelineLayout, GraphicsPipelineObjectCreateInfo* pObjInfo); - static VkResult AchievePipelineLayout( - const Device* pDevice, - const VkGraphicsPipelineCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - PipelineLayout** ppPipelineLayout, - bool* pIsTemporary); - // Generates the API PSO hash using the contents of the VkGraphicsPipelineCreateInfo struct static uint64_t BuildApiHash( const VkGraphicsPipelineCreateInfo* pCreateInfo, diff --git a/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h264std.h b/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h264std.h index 05956989..440b09ec 100644 --- a/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h264std.h +++ b/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h264std.h @@ -21,16 +21,13 @@ extern "C" { #define vulkan_video_codec_h264std 1 #include -// Vulkan 0.9 provisional Vulkan video H.264 encode and decode std specification version number -#define VK_STD_VULKAN_VIDEO_CODEC_H264_API_VERSION_0_9_5 VK_MAKE_VIDEO_STD_VERSION(0, 9, 5) // Patch version should always be set to 0 - #define STD_VIDEO_H264_CPB_CNT_LIST_SIZE 32 #define STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS 6 #define STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS 16 #define STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS 2 #define STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS 64 -#define VK_STD_VULKAN_VIDEO_CODEC_H264_SPEC_VERSION VK_STD_VULKAN_VIDEO_CODEC_H264_API_VERSION_0_9_5 -#define VK_STD_VULKAN_VIDEO_CODEC_H264_EXTENSION_NAME "VK_STD_vulkan_video_codec_h264" +#define STD_VIDEO_H264_MAX_NUM_LIST_REF 32 +#define STD_VIDEO_H264_MAX_CHROMA_PLANES 2 typedef enum StdVideoH264ChromaFormatIdc { STD_VIDEO_H264_CHROMA_FORMAT_IDC_MONOCHROME = 0, @@ -207,19 +204,19 @@ typedef struct StdVideoH264HrdParameters { } StdVideoH264HrdParameters; typedef struct StdVideoH264SequenceParameterSetVui { - StdVideoH264AspectRatioIdc aspect_ratio_idc; - uint16_t sar_width; - uint16_t sar_height; - uint8_t video_format; - uint8_t color_primaries; - uint8_t transfer_characteristics; - uint8_t matrix_coefficients; - uint32_t num_units_in_tick; - uint32_t time_scale; - StdVideoH264HrdParameters* pHrdParameters; - uint8_t max_num_reorder_frames; - uint8_t max_dec_frame_buffering; - StdVideoH264SpsVuiFlags flags; + StdVideoH264SpsVuiFlags flags; + StdVideoH264AspectRatioIdc aspect_ratio_idc; + uint16_t sar_width; + uint16_t sar_height; + uint8_t video_format; + uint8_t color_primaries; + uint8_t transfer_characteristics; + uint8_t matrix_coefficients; + uint32_t num_units_in_tick; + uint32_t time_scale; + const StdVideoH264HrdParameters* pHrdParameters; + uint8_t max_num_reorder_frames; + uint8_t max_dec_frame_buffering; } StdVideoH264SequenceParameterSetVui; typedef struct StdVideoH264SpsFlags { @@ -249,29 +246,29 @@ typedef struct StdVideoH264ScalingLists { } StdVideoH264ScalingLists; typedef struct StdVideoH264SequenceParameterSet { - StdVideoH264ProfileIdc profile_idc; - StdVideoH264Level level_idc; - uint8_t seq_parameter_set_id; - StdVideoH264ChromaFormatIdc chroma_format_idc; - uint8_t bit_depth_luma_minus8; - uint8_t bit_depth_chroma_minus8; - uint8_t log2_max_frame_num_minus4; - StdVideoH264PocType pic_order_cnt_type; - uint8_t log2_max_pic_order_cnt_lsb_minus4; - int32_t offset_for_non_ref_pic; - int32_t offset_for_top_to_bottom_field; - uint8_t num_ref_frames_in_pic_order_cnt_cycle; - uint8_t max_num_ref_frames; - uint32_t pic_width_in_mbs_minus1; - uint32_t pic_height_in_map_units_minus1; - uint32_t frame_crop_left_offset; - uint32_t frame_crop_right_offset; - uint32_t frame_crop_top_offset; - uint32_t frame_crop_bottom_offset; - StdVideoH264SpsFlags flags; - int32_t* pOffsetForRefFrame; - StdVideoH264ScalingLists* pScalingLists; - StdVideoH264SequenceParameterSetVui* pSequenceParameterSetVui; + StdVideoH264SpsFlags flags; + StdVideoH264ProfileIdc profile_idc; + StdVideoH264Level level_idc; + uint8_t seq_parameter_set_id; + StdVideoH264ChromaFormatIdc chroma_format_idc; + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t log2_max_frame_num_minus4; + StdVideoH264PocType pic_order_cnt_type; + uint8_t log2_max_pic_order_cnt_lsb_minus4; + int32_t offset_for_non_ref_pic; + int32_t offset_for_top_to_bottom_field; + uint8_t num_ref_frames_in_pic_order_cnt_cycle; + uint8_t max_num_ref_frames; + uint32_t pic_width_in_mbs_minus1; + uint32_t pic_height_in_map_units_minus1; + uint32_t frame_crop_left_offset; + uint32_t frame_crop_right_offset; + uint32_t frame_crop_top_offset; + uint32_t frame_crop_bottom_offset; + const int32_t* pOffsetForRefFrame; + const StdVideoH264ScalingLists* pScalingLists; + const StdVideoH264SequenceParameterSetVui* pSequenceParameterSetVui; } StdVideoH264SequenceParameterSet; typedef struct StdVideoH264PpsFlags { @@ -287,17 +284,17 @@ typedef struct StdVideoH264PpsFlags { } StdVideoH264PpsFlags; typedef struct StdVideoH264PictureParameterSet { - uint8_t seq_parameter_set_id; - uint8_t pic_parameter_set_id; - uint8_t num_ref_idx_l0_default_active_minus1; - uint8_t num_ref_idx_l1_default_active_minus1; - StdVideoH264WeightedBipredIdc weighted_bipred_idc; - int8_t pic_init_qp_minus26; - int8_t pic_init_qs_minus26; - int8_t chroma_qp_index_offset; - int8_t second_chroma_qp_index_offset; - StdVideoH264PpsFlags flags; - StdVideoH264ScalingLists* pScalingLists; + StdVideoH264PpsFlags flags; + uint8_t seq_parameter_set_id; + uint8_t pic_parameter_set_id; + uint8_t num_ref_idx_l0_default_active_minus1; + uint8_t num_ref_idx_l1_default_active_minus1; + StdVideoH264WeightedBipredIdc weighted_bipred_idc; + int8_t pic_init_qp_minus26; + int8_t pic_init_qs_minus26; + int8_t chroma_qp_index_offset; + int8_t second_chroma_qp_index_offset; + const StdVideoH264ScalingLists* pScalingLists; } StdVideoH264PictureParameterSet; diff --git a/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h264std_decode.h b/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h264std_decode.h index 8e3b05c0..7a1688a0 100644 --- a/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h264std_decode.h +++ b/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h264std_decode.h @@ -20,8 +20,13 @@ extern "C" { #define vulkan_video_codec_h264std_decode 1 +// Vulkan 0.9 provisional Vulkan video H.264 decode std specification version number +#define VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_API_VERSION_0_9_6 VK_MAKE_VIDEO_STD_VERSION(0, 9, 6) // Patch version should always be set to 0 + #define STD_VIDEO_DECODE_H264_FIELD_ORDER_COUNT_LIST_SIZE 2 #define STD_VIDEO_DECODE_H264_MVC_REF_LIST_SIZE 15 +#define VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_SPEC_VERSION VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_API_VERSION_0_9_6 +#define VK_STD_VULKAN_VIDEO_CODEC_H264_DECODE_EXTENSION_NAME "VK_STD_vulkan_video_codec_h264_decode" typedef enum StdVideoDecodeH264FieldOrderCount { STD_VIDEO_DECODE_H264_FIELD_ORDER_COUNT_TOP = 0, @@ -39,27 +44,27 @@ typedef struct StdVideoDecodeH264PictureInfoFlags { } StdVideoDecodeH264PictureInfoFlags; typedef struct StdVideoDecodeH264PictureInfo { + StdVideoDecodeH264PictureInfoFlags flags; uint8_t seq_parameter_set_id; uint8_t pic_parameter_set_id; uint16_t reserved; uint16_t frame_num; uint16_t idr_pic_id; int32_t PicOrderCnt[STD_VIDEO_DECODE_H264_FIELD_ORDER_COUNT_LIST_SIZE]; - StdVideoDecodeH264PictureInfoFlags flags; } StdVideoDecodeH264PictureInfo; typedef struct StdVideoDecodeH264ReferenceInfoFlags { uint32_t top_field_flag : 1; uint32_t bottom_field_flag : 1; - uint32_t is_long_term : 1; + uint32_t used_for_long_term_reference : 1; uint32_t is_non_existing : 1; } StdVideoDecodeH264ReferenceInfoFlags; typedef struct StdVideoDecodeH264ReferenceInfo { + StdVideoDecodeH264ReferenceInfoFlags flags; uint16_t FrameNum; uint16_t reserved; int32_t PicOrderCnt[2]; - StdVideoDecodeH264ReferenceInfoFlags flags; } StdVideoDecodeH264ReferenceInfo; typedef struct StdVideoDecodeH264MvcElementFlags { @@ -85,9 +90,9 @@ typedef struct StdVideoDecodeH264MvcElement { } StdVideoDecodeH264MvcElement; typedef struct StdVideoDecodeH264Mvc { - uint32_t viewId0; - uint32_t mvcElementCount; - StdVideoDecodeH264MvcElement* pMvcElements; + uint32_t viewId0; + uint32_t mvcElementCount; + const StdVideoDecodeH264MvcElement* pMvcElements; } StdVideoDecodeH264Mvc; diff --git a/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h264std_encode.h b/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h264std_encode.h index 8b69abf8..4cf37e36 100644 --- a/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h264std_encode.h +++ b/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h264std_encode.h @@ -20,6 +20,32 @@ extern "C" { #define vulkan_video_codec_h264std_encode 1 +// Vulkan 0.9 provisional Vulkan video H.264 encode std specification version number +#define VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_API_VERSION_0_9_6 VK_MAKE_VIDEO_STD_VERSION(0, 9, 6) // Patch version should always be set to 0 + +#define VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_SPEC_VERSION VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_API_VERSION_0_9_6 +#define VK_STD_VULKAN_VIDEO_CODEC_H264_ENCODE_EXTENSION_NAME "VK_STD_vulkan_video_codec_h264_encode" +typedef struct StdVideoEncodeH264WeightTableFlags { + uint32_t luma_weight_l0_flag; + uint32_t chroma_weight_l0_flag; + uint32_t luma_weight_l1_flag; + uint32_t chroma_weight_l1_flag; +} StdVideoEncodeH264WeightTableFlags; + +typedef struct StdVideoEncodeH264WeightTable { + StdVideoEncodeH264WeightTableFlags flags; + uint8_t luma_log2_weight_denom; + uint8_t chroma_log2_weight_denom; + int8_t luma_weight_l0[STD_VIDEO_H264_MAX_NUM_LIST_REF]; + int8_t luma_offset_l0[STD_VIDEO_H264_MAX_NUM_LIST_REF]; + int8_t chroma_weight_l0[STD_VIDEO_H264_MAX_NUM_LIST_REF][STD_VIDEO_H264_MAX_CHROMA_PLANES]; + int8_t chroma_offset_l0[STD_VIDEO_H264_MAX_NUM_LIST_REF][STD_VIDEO_H264_MAX_CHROMA_PLANES]; + int8_t luma_weight_l1[STD_VIDEO_H264_MAX_NUM_LIST_REF]; + int8_t luma_offset_l1[STD_VIDEO_H264_MAX_NUM_LIST_REF]; + int8_t chroma_weight_l1[STD_VIDEO_H264_MAX_NUM_LIST_REF][STD_VIDEO_H264_MAX_CHROMA_PLANES]; + int8_t chroma_offset_l1[STD_VIDEO_H264_MAX_NUM_LIST_REF][STD_VIDEO_H264_MAX_CHROMA_PLANES]; +} StdVideoEncodeH264WeightTable; + typedef struct StdVideoEncodeH264SliceHeaderFlags { uint32_t direct_spatial_mv_pred_flag : 1; uint32_t num_ref_idx_active_override_flag : 1; @@ -31,11 +57,11 @@ typedef struct StdVideoEncodeH264SliceHeaderFlags { typedef struct StdVideoEncodeH264PictureInfoFlags { uint32_t idr_flag : 1; uint32_t is_reference_flag : 1; - uint32_t long_term_reference_flag : 1; + uint32_t used_for_long_term_reference : 1; } StdVideoEncodeH264PictureInfoFlags; typedef struct StdVideoEncodeH264ReferenceInfoFlags { - uint32_t is_long_term : 1; + uint32_t used_for_long_term_reference : 1; } StdVideoEncodeH264ReferenceInfoFlags; typedef struct StdVideoEncodeH264RefMgmtFlags { @@ -58,17 +84,19 @@ typedef struct StdVideoEncodeH264RefPicMarkingEntry { } StdVideoEncodeH264RefPicMarkingEntry; typedef struct StdVideoEncodeH264RefMemMgmtCtrlOperations { - StdVideoEncodeH264RefMgmtFlags flags; - uint8_t refList0ModOpCount; - StdVideoEncodeH264RefListModEntry* pRefList0ModOperations; - uint8_t refList1ModOpCount; - StdVideoEncodeH264RefListModEntry* pRefList1ModOperations; - uint8_t refPicMarkingOpCount; - StdVideoEncodeH264RefPicMarkingEntry* pRefPicMarkingOperations; + StdVideoEncodeH264RefMgmtFlags flags; + uint8_t refList0ModOpCount; + const StdVideoEncodeH264RefListModEntry* pRefList0ModOperations; + uint8_t refList1ModOpCount; + const StdVideoEncodeH264RefListModEntry* pRefList1ModOperations; + uint8_t refPicMarkingOpCount; + const StdVideoEncodeH264RefPicMarkingEntry* pRefPicMarkingOperations; } StdVideoEncodeH264RefMemMgmtCtrlOperations; typedef struct StdVideoEncodeH264PictureInfo { StdVideoEncodeH264PictureInfoFlags flags; + uint8_t seq_parameter_set_id; + uint8_t pic_parameter_set_id; StdVideoH264PictureType pictureType; uint32_t frame_num; int32_t PicOrderCnt; @@ -86,8 +114,6 @@ typedef struct StdVideoEncodeH264SliceHeader { StdVideoEncodeH264SliceHeaderFlags flags; uint32_t first_mb_in_slice; StdVideoH264SliceType slice_type; - uint8_t seq_parameter_set_id; - uint8_t pic_parameter_set_id; uint16_t idr_pic_id; uint8_t num_ref_idx_l0_active_minus1; uint8_t num_ref_idx_l1_active_minus1; @@ -95,6 +121,7 @@ typedef struct StdVideoEncodeH264SliceHeader { StdVideoH264DisableDeblockingFilterIdc disable_deblocking_filter_idc; int8_t slice_alpha_c0_offset_div2; int8_t slice_beta_offset_div2; + const StdVideoEncodeH264WeightTable* pWeightTable; } StdVideoEncodeH264SliceHeader; diff --git a/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h265std.h b/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h265std.h index 009133ab..f0e5c787 100644 --- a/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h265std.h +++ b/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h265std.h @@ -20,9 +20,6 @@ extern "C" { #define vulkan_video_codec_h265std 1 -// Vulkan 0.5 version number WIP -#define VK_STD_VULKAN_VIDEO_CODEC_H265_API_VERSION_0_9_5 VK_MAKE_VIDEO_STD_VERSION(0, 9, 5) // Patch version should always be set to 0 - #define STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE 7 #define STD_VIDEO_H265_CPB_CNT_LIST_SIZE 32 #define STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS 6 @@ -38,8 +35,8 @@ extern "C" { #define STD_VIDEO_H265_CHROMA_QP_OFFSET_TILE_COLS_LIST_SIZE 19 #define STD_VIDEO_H265_CHROMA_QP_OFFSET_TILE_ROWS_LIST_SIZE 21 #define STD_VIDEO_H265_CHROMA_QP_OFFSET_LIST_SIZE 6 -#define VK_STD_VULKAN_VIDEO_CODEC_H265_SPEC_VERSION VK_STD_VULKAN_VIDEO_CODEC_H265_API_VERSION_0_9_5 -#define VK_STD_VULKAN_VIDEO_CODEC_H265_EXTENSION_NAME "VK_STD_vulkan_video_codec_h265" +#define STD_VIDEO_H265_MAX_NUM_LIST_REF 15 +#define STD_VIDEO_H265_MAX_CHROMA_PLANES 2 typedef enum StdVideoH265ChromaFormatIdc { STD_VIDEO_H265_CHROMA_FORMAT_IDC_MONOCHROME = 0, @@ -119,20 +116,20 @@ typedef struct StdVideoH265HrdFlags { } StdVideoH265HrdFlags; typedef struct StdVideoH265HrdParameters { - uint8_t tick_divisor_minus2; - uint8_t du_cpb_removal_delay_increment_length_minus1; - uint8_t dpb_output_delay_du_length_minus1; - uint8_t bit_rate_scale; - uint8_t cpb_size_scale; - uint8_t cpb_size_du_scale; - uint8_t initial_cpb_removal_delay_length_minus1; - uint8_t au_cpb_removal_delay_length_minus1; - uint8_t dpb_output_delay_length_minus1; - uint8_t cpb_cnt_minus1[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; - uint16_t elemental_duration_in_tc_minus1[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; - StdVideoH265SubLayerHrdParameters* pSubLayerHrdParametersNal[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; - StdVideoH265SubLayerHrdParameters* pSubLayerHrdParametersVcl[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; - StdVideoH265HrdFlags flags; + StdVideoH265HrdFlags flags; + uint8_t tick_divisor_minus2; + uint8_t du_cpb_removal_delay_increment_length_minus1; + uint8_t dpb_output_delay_du_length_minus1; + uint8_t bit_rate_scale; + uint8_t cpb_size_scale; + uint8_t cpb_size_du_scale; + uint8_t initial_cpb_removal_delay_length_minus1; + uint8_t au_cpb_removal_delay_length_minus1; + uint8_t dpb_output_delay_length_minus1; + uint8_t cpb_cnt_minus1[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; + uint16_t elemental_duration_in_tc_minus1[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; + const StdVideoH265SubLayerHrdParameters* pSubLayerHrdParametersNal[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; + const StdVideoH265SubLayerHrdParameters* pSubLayerHrdParametersVcl[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; } StdVideoH265HrdParameters; typedef struct StdVideoH265VpsFlags { @@ -143,14 +140,14 @@ typedef struct StdVideoH265VpsFlags { } StdVideoH265VpsFlags; typedef struct StdVideoH265VideoParameterSet { - uint8_t vps_video_parameter_set_id; - uint8_t vps_max_sub_layers_minus1; - uint32_t vps_num_units_in_tick; - uint32_t vps_time_scale; - uint32_t vps_num_ticks_poc_diff_one_minus1; - StdVideoH265DecPicBufMgr* pDecPicBufMgr; - StdVideoH265HrdParameters* pHrdParameters; - StdVideoH265VpsFlags flags; + StdVideoH265VpsFlags flags; + uint8_t vps_video_parameter_set_id; + uint8_t vps_max_sub_layers_minus1; + uint32_t vps_num_units_in_tick; + uint32_t vps_time_scale; + uint32_t vps_num_ticks_poc_diff_one_minus1; + const StdVideoH265DecPicBufMgr* pDecPicBufMgr; + const StdVideoH265HrdParameters* pHrdParameters; } StdVideoH265VideoParameterSet; typedef struct StdVideoH265ScalingLists { @@ -184,29 +181,29 @@ typedef struct StdVideoH265SpsVuiFlags { } StdVideoH265SpsVuiFlags; typedef struct StdVideoH265SequenceParameterSetVui { - uint8_t aspect_ratio_idc; - uint16_t sar_width; - uint16_t sar_height; - uint8_t video_format; - uint8_t colour_primaries; - uint8_t transfer_characteristics; - uint8_t matrix_coeffs; - uint8_t chroma_sample_loc_type_top_field; - uint8_t chroma_sample_loc_type_bottom_field; - uint16_t def_disp_win_left_offset; - uint16_t def_disp_win_right_offset; - uint16_t def_disp_win_top_offset; - uint16_t def_disp_win_bottom_offset; - uint32_t vui_num_units_in_tick; - uint32_t vui_time_scale; - uint32_t vui_num_ticks_poc_diff_one_minus1; - StdVideoH265HrdParameters* pHrdParameters; - uint16_t min_spatial_segmentation_idc; - uint8_t max_bytes_per_pic_denom; - uint8_t max_bits_per_min_cu_denom; - uint8_t log2_max_mv_length_horizontal; - uint8_t log2_max_mv_length_vertical; - StdVideoH265SpsVuiFlags flags; + StdVideoH265SpsVuiFlags flags; + uint8_t aspect_ratio_idc; + uint16_t sar_width; + uint16_t sar_height; + uint8_t video_format; + uint8_t colour_primaries; + uint8_t transfer_characteristics; + uint8_t matrix_coeffs; + uint8_t chroma_sample_loc_type_top_field; + uint8_t chroma_sample_loc_type_bottom_field; + uint16_t def_disp_win_left_offset; + uint16_t def_disp_win_right_offset; + uint16_t def_disp_win_top_offset; + uint16_t def_disp_win_bottom_offset; + uint32_t vui_num_units_in_tick; + uint32_t vui_time_scale; + uint32_t vui_num_ticks_poc_diff_one_minus1; + const StdVideoH265HrdParameters* pHrdParameters; + uint16_t min_spatial_segmentation_idc; + uint8_t max_bytes_per_pic_denom; + uint8_t max_bits_per_min_cu_denom; + uint8_t log2_max_mv_length_horizontal; + uint8_t log2_max_mv_length_vertical; } StdVideoH265SequenceParameterSetVui; typedef struct StdVideoH265PredictorPaletteEntries { @@ -237,6 +234,7 @@ typedef struct StdVideoH265SpsFlags { uint32_t high_precision_offsets_enabled_flag : 1; uint32_t persistent_rice_adaptation_enabled_flag : 1; uint32_t cabac_bypass_alignment_enabled_flag : 1; + uint32_t sps_scc_extension_flag : 1; uint32_t sps_curr_pic_ref_enabled_flag : 1; uint32_t palette_mode_enabled_flag : 1; uint32_t sps_palette_predictor_initializer_present_flag : 1; @@ -244,43 +242,42 @@ typedef struct StdVideoH265SpsFlags { } StdVideoH265SpsFlags; typedef struct StdVideoH265SequenceParameterSet { - StdVideoH265ProfileIdc profile_idc; - StdVideoH265Level level_idc; - uint32_t pic_width_in_luma_samples; - uint32_t pic_height_in_luma_samples; - uint8_t sps_video_parameter_set_id; - uint8_t sps_max_sub_layers_minus1; - uint8_t sps_seq_parameter_set_id; - uint8_t chroma_format_idc; - uint8_t bit_depth_luma_minus8; - uint8_t bit_depth_chroma_minus8; - uint8_t log2_max_pic_order_cnt_lsb_minus4; - uint8_t sps_max_dec_pic_buffering_minus1; - uint8_t log2_min_luma_coding_block_size_minus3; - uint8_t log2_diff_max_min_luma_coding_block_size; - uint8_t log2_min_luma_transform_block_size_minus2; - uint8_t log2_diff_max_min_luma_transform_block_size; - uint8_t max_transform_hierarchy_depth_inter; - uint8_t max_transform_hierarchy_depth_intra; - uint8_t num_short_term_ref_pic_sets; - uint8_t num_long_term_ref_pics_sps; - uint8_t pcm_sample_bit_depth_luma_minus1; - uint8_t pcm_sample_bit_depth_chroma_minus1; - uint8_t log2_min_pcm_luma_coding_block_size_minus3; - uint8_t log2_diff_max_min_pcm_luma_coding_block_size; - uint32_t conf_win_left_offset; - uint32_t conf_win_right_offset; - uint32_t conf_win_top_offset; - uint32_t conf_win_bottom_offset; - StdVideoH265DecPicBufMgr* pDecPicBufMgr; - StdVideoH265SpsFlags flags; - StdVideoH265ScalingLists* pScalingLists; - StdVideoH265SequenceParameterSetVui* pSequenceParameterSetVui; - uint8_t palette_max_size; - uint8_t delta_palette_max_predictor_size; - uint8_t motion_vector_resolution_control_idc; - uint8_t sps_num_palette_predictor_initializer_minus1; - StdVideoH265PredictorPaletteEntries* pPredictorPaletteEntries; + StdVideoH265SpsFlags flags; + StdVideoH265ProfileIdc profile_idc; + StdVideoH265Level level_idc; + uint32_t pic_width_in_luma_samples; + uint32_t pic_height_in_luma_samples; + uint8_t sps_video_parameter_set_id; + uint8_t sps_max_sub_layers_minus1; + uint8_t sps_seq_parameter_set_id; + uint8_t chroma_format_idc; + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t log2_max_pic_order_cnt_lsb_minus4; + uint8_t log2_min_luma_coding_block_size_minus3; + uint8_t log2_diff_max_min_luma_coding_block_size; + uint8_t log2_min_luma_transform_block_size_minus2; + uint8_t log2_diff_max_min_luma_transform_block_size; + uint8_t max_transform_hierarchy_depth_inter; + uint8_t max_transform_hierarchy_depth_intra; + uint8_t num_short_term_ref_pic_sets; + uint8_t num_long_term_ref_pics_sps; + uint8_t pcm_sample_bit_depth_luma_minus1; + uint8_t pcm_sample_bit_depth_chroma_minus1; + uint8_t log2_min_pcm_luma_coding_block_size_minus3; + uint8_t log2_diff_max_min_pcm_luma_coding_block_size; + uint32_t conf_win_left_offset; + uint32_t conf_win_right_offset; + uint32_t conf_win_top_offset; + uint32_t conf_win_bottom_offset; + const StdVideoH265DecPicBufMgr* pDecPicBufMgr; + const StdVideoH265ScalingLists* pScalingLists; + const StdVideoH265SequenceParameterSetVui* pSequenceParameterSetVui; + uint8_t palette_max_size; + uint8_t delta_palette_max_predictor_size; + uint8_t motion_vector_resolution_control_idc; + uint8_t sps_num_palette_predictor_initializer_minus1; + const StdVideoH265PredictorPaletteEntries* pPredictorPaletteEntries; } StdVideoH265SequenceParameterSet; typedef struct StdVideoH265PpsFlags { @@ -318,38 +315,38 @@ typedef struct StdVideoH265PpsFlags { } StdVideoH265PpsFlags; typedef struct StdVideoH265PictureParameterSet { - uint8_t pps_pic_parameter_set_id; - uint8_t pps_seq_parameter_set_id; - uint8_t num_extra_slice_header_bits; - uint8_t num_ref_idx_l0_default_active_minus1; - uint8_t num_ref_idx_l1_default_active_minus1; - int8_t init_qp_minus26; - uint8_t diff_cu_qp_delta_depth; - int8_t pps_cb_qp_offset; - int8_t pps_cr_qp_offset; - uint8_t num_tile_columns_minus1; - uint8_t num_tile_rows_minus1; - uint16_t column_width_minus1[STD_VIDEO_H265_CHROMA_QP_OFFSET_TILE_COLS_LIST_SIZE]; - uint16_t row_height_minus1[STD_VIDEO_H265_CHROMA_QP_OFFSET_TILE_ROWS_LIST_SIZE]; - int8_t pps_beta_offset_div2; - int8_t pps_tc_offset_div2; - uint8_t log2_parallel_merge_level_minus2; - StdVideoH265PpsFlags flags; - StdVideoH265ScalingLists* pScalingLists; - uint8_t log2_max_transform_skip_block_size_minus2; - uint8_t diff_cu_chroma_qp_offset_depth; - uint8_t chroma_qp_offset_list_len_minus1; - int8_t cb_qp_offset_list[STD_VIDEO_H265_CHROMA_QP_OFFSET_LIST_SIZE]; - int8_t cr_qp_offset_list[STD_VIDEO_H265_CHROMA_QP_OFFSET_LIST_SIZE]; - uint8_t log2_sao_offset_scale_luma; - uint8_t log2_sao_offset_scale_chroma; - int8_t pps_act_y_qp_offset_plus5; - int8_t pps_act_cb_qp_offset_plus5; - int8_t pps_act_cr_qp_offset_plus5; - uint8_t pps_num_palette_predictor_initializer; - uint8_t luma_bit_depth_entry_minus8; - uint8_t chroma_bit_depth_entry_minus8; - StdVideoH265PredictorPaletteEntries* pPredictorPaletteEntries; + StdVideoH265PpsFlags flags; + uint8_t pps_pic_parameter_set_id; + uint8_t pps_seq_parameter_set_id; + uint8_t num_extra_slice_header_bits; + uint8_t num_ref_idx_l0_default_active_minus1; + uint8_t num_ref_idx_l1_default_active_minus1; + int8_t init_qp_minus26; + uint8_t diff_cu_qp_delta_depth; + int8_t pps_cb_qp_offset; + int8_t pps_cr_qp_offset; + uint8_t num_tile_columns_minus1; + uint8_t num_tile_rows_minus1; + uint16_t column_width_minus1[STD_VIDEO_H265_CHROMA_QP_OFFSET_TILE_COLS_LIST_SIZE]; + uint16_t row_height_minus1[STD_VIDEO_H265_CHROMA_QP_OFFSET_TILE_ROWS_LIST_SIZE]; + int8_t pps_beta_offset_div2; + int8_t pps_tc_offset_div2; + uint8_t log2_parallel_merge_level_minus2; + const StdVideoH265ScalingLists* pScalingLists; + uint8_t log2_max_transform_skip_block_size_minus2; + uint8_t diff_cu_chroma_qp_offset_depth; + uint8_t chroma_qp_offset_list_len_minus1; + int8_t cb_qp_offset_list[STD_VIDEO_H265_CHROMA_QP_OFFSET_LIST_SIZE]; + int8_t cr_qp_offset_list[STD_VIDEO_H265_CHROMA_QP_OFFSET_LIST_SIZE]; + uint8_t log2_sao_offset_scale_luma; + uint8_t log2_sao_offset_scale_chroma; + int8_t pps_act_y_qp_offset_plus5; + int8_t pps_act_cb_qp_offset_plus5; + int8_t pps_act_cr_qp_offset_plus5; + uint8_t pps_num_palette_predictor_initializer; + uint8_t luma_bit_depth_entry_minus8; + uint8_t chroma_bit_depth_entry_minus8; + const StdVideoH265PredictorPaletteEntries* pPredictorPaletteEntries; } StdVideoH265PictureParameterSet; diff --git a/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h265std_decode.h b/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h265std_decode.h index 0867952f..1171f339 100644 --- a/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h265std_decode.h +++ b/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h265std_decode.h @@ -20,7 +20,12 @@ extern "C" { #define vulkan_video_codec_h265std_decode 1 +// Vulkan 0.9 provisional Vulkan video H.265 decode std specification version number +#define VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_API_VERSION_0_9_7 VK_MAKE_VIDEO_STD_VERSION(0, 9, 7) // Patch version should always be set to 0 + #define STD_VIDEO_DECODE_H265_REF_PIC_SET_LIST_SIZE 8 +#define VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_SPEC_VERSION VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_API_VERSION_0_9_7 +#define VK_STD_VULKAN_VIDEO_CODEC_H265_DECODE_EXTENSION_NAME "VK_STD_vulkan_video_codec_h265_decode" typedef struct StdVideoDecodeH265PictureInfoFlags { uint32_t IrapPicFlag : 1; uint32_t IdrPicFlag : 1; @@ -29,8 +34,9 @@ typedef struct StdVideoDecodeH265PictureInfoFlags { } StdVideoDecodeH265PictureInfoFlags; typedef struct StdVideoDecodeH265PictureInfo { - uint8_t vps_video_parameter_set_id; - uint8_t sps_seq_parameter_set_id; + StdVideoDecodeH265PictureInfoFlags flags; + uint8_t sps_video_parameter_set_id; + uint8_t pps_seq_parameter_set_id; uint8_t pps_pic_parameter_set_id; uint8_t num_short_term_ref_pic_sets; int32_t PicOrderCntVal; @@ -39,17 +45,17 @@ typedef struct StdVideoDecodeH265PictureInfo { uint8_t RefPicSetStCurrBefore[STD_VIDEO_DECODE_H265_REF_PIC_SET_LIST_SIZE]; uint8_t RefPicSetStCurrAfter[STD_VIDEO_DECODE_H265_REF_PIC_SET_LIST_SIZE]; uint8_t RefPicSetLtCurr[STD_VIDEO_DECODE_H265_REF_PIC_SET_LIST_SIZE]; - StdVideoDecodeH265PictureInfoFlags flags; } StdVideoDecodeH265PictureInfo; typedef struct StdVideoDecodeH265ReferenceInfoFlags { - uint32_t is_long_term : 1; + uint32_t used_for_long_term_reference : 1; + uint32_t unused_for_reference : 1; uint32_t is_non_existing : 1; } StdVideoDecodeH265ReferenceInfoFlags; typedef struct StdVideoDecodeH265ReferenceInfo { - int32_t PicOrderCntVal; StdVideoDecodeH265ReferenceInfoFlags flags; + int32_t PicOrderCntVal; } StdVideoDecodeH265ReferenceInfo; diff --git a/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h265std_encode.h b/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h265std_encode.h index 5acdc5dc..dd3b7ffb 100644 --- a/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h265std_encode.h +++ b/icd/api/include/khronos/sdk-1.3/vk_video/vulkan_video_codec_h265std_encode.h @@ -20,13 +20,37 @@ extern "C" { #define vulkan_video_codec_h265std_encode 1 -#define STD_VIDEO_ENCODE_H265_LUMA_LIST_SIZE 15 -#define STD_VIDEO_ENCODE_H265_CHROMA_LIST_SIZE 15 -#define STD_VIDEO_ENCODE_H265_CHROMA_LISTS_NUM 2 +// Vulkan 0.9 provisional Vulkan video H.265 encode std specification version number +#define VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_API_VERSION_0_9_7 VK_MAKE_VIDEO_STD_VERSION(0, 9, 7) // Patch version should always be set to 0 + +#define VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_SPEC_VERSION VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_API_VERSION_0_9_7 +#define VK_STD_VULKAN_VIDEO_CODEC_H265_ENCODE_EXTENSION_NAME "VK_STD_vulkan_video_codec_h265_encode" +typedef struct StdVideoEncodeH265WeightTableFlags { + uint16_t luma_weight_l0_flag; + uint16_t chroma_weight_l0_flag; + uint16_t luma_weight_l1_flag; + uint16_t chroma_weight_l1_flag; +} StdVideoEncodeH265WeightTableFlags; + +typedef struct StdVideoEncodeH265WeightTable { + StdVideoEncodeH265WeightTableFlags flags; + uint8_t luma_log2_weight_denom; + int8_t delta_chroma_log2_weight_denom; + int8_t delta_luma_weight_l0[STD_VIDEO_H265_MAX_NUM_LIST_REF]; + int8_t luma_offset_l0[STD_VIDEO_H265_MAX_NUM_LIST_REF]; + int8_t delta_chroma_weight_l0[STD_VIDEO_H265_MAX_NUM_LIST_REF][STD_VIDEO_H265_MAX_CHROMA_PLANES]; + int8_t delta_chroma_offset_l0[STD_VIDEO_H265_MAX_NUM_LIST_REF][STD_VIDEO_H265_MAX_CHROMA_PLANES]; + int8_t delta_luma_weight_l1[STD_VIDEO_H265_MAX_NUM_LIST_REF]; + int8_t luma_offset_l1[STD_VIDEO_H265_MAX_NUM_LIST_REF]; + int8_t delta_chroma_weight_l1[STD_VIDEO_H265_MAX_NUM_LIST_REF][STD_VIDEO_H265_MAX_CHROMA_PLANES]; + int8_t delta_chroma_offset_l1[STD_VIDEO_H265_MAX_NUM_LIST_REF][STD_VIDEO_H265_MAX_CHROMA_PLANES]; +} StdVideoEncodeH265WeightTable; + typedef struct StdVideoEncodeH265SliceSegmentHeaderFlags { uint32_t first_slice_segment_in_pic_flag : 1; uint32_t no_output_of_prior_pics_flag : 1; uint32_t dependent_slice_segment_flag : 1; + uint32_t pic_output_flag : 1; uint32_t short_term_ref_pic_set_sps_flag : 1; uint32_t slice_temporal_mvp_enable_flag : 1; uint32_t slice_sao_luma_flag : 1; @@ -37,17 +61,11 @@ typedef struct StdVideoEncodeH265SliceSegmentHeaderFlags { uint32_t slice_deblocking_filter_disable_flag : 1; uint32_t collocated_from_l0_flag : 1; uint32_t slice_loop_filter_across_slices_enabled_flag : 1; - uint32_t bLastSliceInPic : 1; - uint32_t reservedBits : 18; - uint16_t luma_weight_l0_flag; - uint16_t chroma_weight_l0_flag; - uint16_t luma_weight_l1_flag; - uint16_t chroma_weight_l1_flag; } StdVideoEncodeH265SliceSegmentHeaderFlags; typedef struct StdVideoEncodeH265SliceSegmentHeader { + StdVideoEncodeH265SliceSegmentHeaderFlags flags; StdVideoH265SliceType slice_type; - uint8_t slice_pic_parameter_set_id; uint8_t num_short_term_ref_pic_sets; uint32_t slice_segment_address; uint8_t short_term_ref_pic_set_idx; @@ -56,16 +74,6 @@ typedef struct StdVideoEncodeH265SliceSegmentHeader { uint8_t collocated_ref_idx; uint8_t num_ref_idx_l0_active_minus1; uint8_t num_ref_idx_l1_active_minus1; - uint8_t luma_log2_weight_denom; - int8_t delta_chroma_log2_weight_denom; - int8_t delta_luma_weight_l0[STD_VIDEO_ENCODE_H265_LUMA_LIST_SIZE]; - int8_t luma_offset_l0[STD_VIDEO_ENCODE_H265_LUMA_LIST_SIZE]; - int8_t delta_chroma_weight_l0[STD_VIDEO_ENCODE_H265_CHROMA_LIST_SIZE][STD_VIDEO_ENCODE_H265_CHROMA_LISTS_NUM]; - int8_t delta_chroma_offset_l0[STD_VIDEO_ENCODE_H265_CHROMA_LIST_SIZE][STD_VIDEO_ENCODE_H265_CHROMA_LISTS_NUM]; - int8_t delta_luma_weight_l1[STD_VIDEO_ENCODE_H265_LUMA_LIST_SIZE]; - int8_t luma_offset_l1[STD_VIDEO_ENCODE_H265_LUMA_LIST_SIZE]; - int8_t delta_chroma_weight_l1[STD_VIDEO_ENCODE_H265_CHROMA_LIST_SIZE][STD_VIDEO_ENCODE_H265_CHROMA_LISTS_NUM]; - int8_t delta_chroma_offset_l1[STD_VIDEO_ENCODE_H265_CHROMA_LIST_SIZE][STD_VIDEO_ENCODE_H265_CHROMA_LISTS_NUM]; uint8_t MaxNumMergeCand; int8_t slice_cb_qp_offset; int8_t slice_cr_qp_offset; @@ -74,7 +82,7 @@ typedef struct StdVideoEncodeH265SliceSegmentHeader { int8_t slice_act_y_qp_offset; int8_t slice_act_cb_qp_offset; int8_t slice_act_cr_qp_offset; - StdVideoEncodeH265SliceSegmentHeaderFlags flags; + const StdVideoEncodeH265WeightTable* pWeightTable; } StdVideoEncodeH265SliceSegmentHeader; typedef struct StdVideoEncodeH265ReferenceModificationFlags { @@ -85,35 +93,38 @@ typedef struct StdVideoEncodeH265ReferenceModificationFlags { typedef struct StdVideoEncodeH265ReferenceModifications { StdVideoEncodeH265ReferenceModificationFlags flags; uint8_t referenceList0ModificationsCount; - uint8_t* pReferenceList0Modifications; + const uint8_t* pReferenceList0Modifications; uint8_t referenceList1ModificationsCount; - uint8_t* pReferenceList1Modifications; + const uint8_t* pReferenceList1Modifications; } StdVideoEncodeH265ReferenceModifications; typedef struct StdVideoEncodeH265PictureInfoFlags { uint32_t is_reference_flag : 1; uint32_t IrapPicFlag : 1; uint32_t long_term_flag : 1; + uint32_t discardable_flag : 1; + uint32_t cross_layer_bla_flag : 1; } StdVideoEncodeH265PictureInfoFlags; typedef struct StdVideoEncodeH265PictureInfo { + StdVideoEncodeH265PictureInfoFlags flags; StdVideoH265PictureType PictureType; uint8_t sps_video_parameter_set_id; uint8_t pps_seq_parameter_set_id; + uint8_t pps_pic_parameter_set_id; int32_t PicOrderCntVal; uint8_t TemporalId; - StdVideoEncodeH265PictureInfoFlags flags; } StdVideoEncodeH265PictureInfo; typedef struct StdVideoEncodeH265ReferenceInfoFlags { - uint32_t is_long_term : 1; - uint32_t isUsedFlag : 1; + uint32_t used_for_long_term_reference : 1; + uint32_t unused_for_reference : 1; } StdVideoEncodeH265ReferenceInfoFlags; typedef struct StdVideoEncodeH265ReferenceInfo { + StdVideoEncodeH265ReferenceInfoFlags flags; int32_t PicOrderCntVal; uint8_t TemporalId; - StdVideoEncodeH265ReferenceInfoFlags flags; } StdVideoEncodeH265ReferenceInfo; diff --git a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_beta.h b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_beta.h index f5f9086c..37537633 100644 --- a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_beta.h +++ b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_beta.h @@ -22,7 +22,7 @@ extern "C" { #define VK_KHR_video_queue 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkVideoSessionKHR) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkVideoSessionParametersKHR) -#define VK_KHR_VIDEO_QUEUE_SPEC_VERSION 2 +#define VK_KHR_VIDEO_QUEUE_SPEC_VERSION 3 #define VK_KHR_VIDEO_QUEUE_EXTENSION_NAME "VK_KHR_video_queue" typedef enum VkQueryResultStatusKHR { @@ -138,6 +138,7 @@ typedef struct VkVideoCapabilitiesKHR { VkExtent2D maxExtent; uint32_t maxReferencePicturesSlotsCount; uint32_t maxReferencePicturesActiveCount; + VkExtensionProperties stdHeaderVersion; } VkVideoCapabilitiesKHR; typedef struct VkPhysicalDeviceVideoFormatInfoKHR { @@ -196,6 +197,7 @@ typedef struct VkVideoSessionCreateInfoKHR { VkFormat referencePicturesFormat; uint32_t maxReferencePicturesSlotsCount; uint32_t maxReferencePicturesActiveCount; + const VkExtensionProperties* pStdHeaderVersion; } VkVideoSessionCreateInfoKHR; typedef struct VkVideoSessionParametersCreateInfoKHR { @@ -313,7 +315,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdControlVideoCodingKHR( #define VK_KHR_video_decode_queue 1 -#define VK_KHR_VIDEO_DECODE_QUEUE_SPEC_VERSION 3 +#define VK_KHR_VIDEO_DECODE_QUEUE_SPEC_VERSION 4 #define VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME "VK_KHR_video_decode_queue" typedef enum VkVideoDecodeCapabilityFlagBitsKHR { @@ -340,8 +342,6 @@ typedef struct VkVideoDecodeInfoKHR { VkStructureType sType; const void* pNext; VkVideoDecodeFlagsKHR flags; - VkOffset2D codedOffset; - VkExtent2D codedExtent; VkBuffer srcBuffer; VkDeviceSize srcBufferOffset; VkDeviceSize srcBufferRange; @@ -392,7 +392,7 @@ typedef struct VkPhysicalDevicePortabilitySubsetPropertiesKHR { #define VK_KHR_video_encode_queue 1 -#define VK_KHR_VIDEO_ENCODE_QUEUE_SPEC_VERSION 4 +#define VK_KHR_VIDEO_ENCODE_QUEUE_SPEC_VERSION 5 #define VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME "VK_KHR_video_encode_queue" typedef enum VkVideoEncodeFlagBitsKHR { @@ -428,7 +428,6 @@ typedef struct VkVideoEncodeInfoKHR { const void* pNext; VkVideoEncodeFlagsKHR flags; uint32_t qualityLevel; - VkExtent2D codedExtent; VkBuffer dstBitstreamBuffer; VkDeviceSize dstBitstreamBufferOffset; VkDeviceSize dstBitstreamBufferMaxRange; @@ -441,7 +440,7 @@ typedef struct VkVideoEncodeInfoKHR { typedef struct VkVideoEncodeCapabilitiesKHR { VkStructureType sType; - const void* pNext; + void* pNext; VkVideoEncodeCapabilityFlagsKHR flags; VkVideoEncodeRateControlModeFlagsKHR rateControlModes; uint8_t rateControlLayerCount; @@ -481,33 +480,35 @@ VKAPI_ATTR void VKAPI_CALL vkCmdEncodeVideoKHR( #define VK_EXT_video_encode_h264 1 #include "vk_video/vulkan_video_codec_h264std.h" #include "vk_video/vulkan_video_codec_h264std_encode.h" -#define VK_EXT_VIDEO_ENCODE_H264_SPEC_VERSION 5 +#define VK_EXT_VIDEO_ENCODE_H264_SPEC_VERSION 7 #define VK_EXT_VIDEO_ENCODE_H264_EXTENSION_NAME "VK_EXT_video_encode_h264" typedef enum VkVideoEncodeH264CapabilityFlagBitsEXT { - VK_VIDEO_ENCODE_H264_CAPABILITY_DIRECT_8X8_INFERENCE_BIT_EXT = 0x00000001, - VK_VIDEO_ENCODE_H264_CAPABILITY_SEPARATE_COLOUR_PLANE_BIT_EXT = 0x00000002, - VK_VIDEO_ENCODE_H264_CAPABILITY_QPPRIME_Y_ZERO_TRANSFORM_BYPASS_BIT_EXT = 0x00000004, - VK_VIDEO_ENCODE_H264_CAPABILITY_SCALING_LISTS_BIT_EXT = 0x00000008, - VK_VIDEO_ENCODE_H264_CAPABILITY_HRD_COMPLIANCE_BIT_EXT = 0x00000010, - VK_VIDEO_ENCODE_H264_CAPABILITY_CHROMA_QP_OFFSET_BIT_EXT = 0x00000020, - VK_VIDEO_ENCODE_H264_CAPABILITY_SECOND_CHROMA_QP_OFFSET_BIT_EXT = 0x00000040, - VK_VIDEO_ENCODE_H264_CAPABILITY_PIC_INIT_QP_MINUS26_BIT_EXT = 0x00000080, - VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_PRED_BIT_EXT = 0x00000100, - VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_BIPRED_EXPLICIT_BIT_EXT = 0x00000200, - VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_BIPRED_IMPLICIT_BIT_EXT = 0x00000400, - VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_PRED_NO_TABLE_BIT_EXT = 0x00000800, - VK_VIDEO_ENCODE_H264_CAPABILITY_TRANSFORM_8X8_BIT_EXT = 0x00001000, - VK_VIDEO_ENCODE_H264_CAPABILITY_CABAC_BIT_EXT = 0x00002000, - VK_VIDEO_ENCODE_H264_CAPABILITY_CAVLC_BIT_EXT = 0x00004000, - VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_DISABLED_BIT_EXT = 0x00008000, - VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_ENABLED_BIT_EXT = 0x00010000, - VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_PARTIAL_BIT_EXT = 0x00020000, - VK_VIDEO_ENCODE_H264_CAPABILITY_DISABLE_DIRECT_SPATIAL_MV_PRED_BIT_EXT = 0x00040000, - VK_VIDEO_ENCODE_H264_CAPABILITY_MULTIPLE_SLICE_PER_FRAME_BIT_EXT = 0x00080000, - VK_VIDEO_ENCODE_H264_CAPABILITY_SLICE_MB_COUNT_BIT_EXT = 0x00100000, - VK_VIDEO_ENCODE_H264_CAPABILITY_ROW_UNALIGNED_SLICE_BIT_EXT = 0x00200000, - VK_VIDEO_ENCODE_H264_CAPABILITY_DIFFERENT_SLICE_TYPE_BIT_EXT = 0x00400000, + VK_VIDEO_ENCODE_H264_CAPABILITY_DIRECT_8X8_INFERENCE_ENABLED_BIT_EXT = 0x00000001, + VK_VIDEO_ENCODE_H264_CAPABILITY_DIRECT_8X8_INFERENCE_DISABLED_BIT_EXT = 0x00000002, + VK_VIDEO_ENCODE_H264_CAPABILITY_SEPARATE_COLOUR_PLANE_BIT_EXT = 0x00000004, + VK_VIDEO_ENCODE_H264_CAPABILITY_QPPRIME_Y_ZERO_TRANSFORM_BYPASS_BIT_EXT = 0x00000008, + VK_VIDEO_ENCODE_H264_CAPABILITY_SCALING_LISTS_BIT_EXT = 0x00000010, + VK_VIDEO_ENCODE_H264_CAPABILITY_HRD_COMPLIANCE_BIT_EXT = 0x00000020, + VK_VIDEO_ENCODE_H264_CAPABILITY_CHROMA_QP_OFFSET_BIT_EXT = 0x00000040, + VK_VIDEO_ENCODE_H264_CAPABILITY_SECOND_CHROMA_QP_OFFSET_BIT_EXT = 0x00000080, + VK_VIDEO_ENCODE_H264_CAPABILITY_PIC_INIT_QP_MINUS26_BIT_EXT = 0x00000100, + VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_PRED_BIT_EXT = 0x00000200, + VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_BIPRED_EXPLICIT_BIT_EXT = 0x00000400, + VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_BIPRED_IMPLICIT_BIT_EXT = 0x00000800, + VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_PRED_NO_TABLE_BIT_EXT = 0x00001000, + VK_VIDEO_ENCODE_H264_CAPABILITY_TRANSFORM_8X8_BIT_EXT = 0x00002000, + VK_VIDEO_ENCODE_H264_CAPABILITY_CABAC_BIT_EXT = 0x00004000, + VK_VIDEO_ENCODE_H264_CAPABILITY_CAVLC_BIT_EXT = 0x00008000, + VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_DISABLED_BIT_EXT = 0x00010000, + VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_ENABLED_BIT_EXT = 0x00020000, + VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_PARTIAL_BIT_EXT = 0x00040000, + VK_VIDEO_ENCODE_H264_CAPABILITY_DISABLE_DIRECT_SPATIAL_MV_PRED_BIT_EXT = 0x00080000, + VK_VIDEO_ENCODE_H264_CAPABILITY_MULTIPLE_SLICE_PER_FRAME_BIT_EXT = 0x00100000, + VK_VIDEO_ENCODE_H264_CAPABILITY_SLICE_MB_COUNT_BIT_EXT = 0x00200000, + VK_VIDEO_ENCODE_H264_CAPABILITY_ROW_UNALIGNED_SLICE_BIT_EXT = 0x00400000, + VK_VIDEO_ENCODE_H264_CAPABILITY_DIFFERENT_SLICE_TYPE_BIT_EXT = 0x00800000, + VK_VIDEO_ENCODE_H264_CAPABILITY_B_FRAME_IN_L1_LIST_BIT_EXT = 0x01000000, VK_VIDEO_ENCODE_H264_CAPABILITY_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF } VkVideoEncodeH264CapabilityFlagBitsEXT; typedef VkFlags VkVideoEncodeH264CapabilityFlagsEXT; @@ -528,13 +529,6 @@ typedef enum VkVideoEncodeH264OutputModeFlagBitsEXT { } VkVideoEncodeH264OutputModeFlagBitsEXT; typedef VkFlags VkVideoEncodeH264OutputModeFlagsEXT; -typedef enum VkVideoEncodeH264CreateFlagBitsEXT { - VK_VIDEO_ENCODE_H264_CREATE_DEFAULT_EXT = 0, - VK_VIDEO_ENCODE_H264_CREATE_RESERVED_0_BIT_EXT = 0x00000001, - VK_VIDEO_ENCODE_H264_CREATE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF -} VkVideoEncodeH264CreateFlagBitsEXT; -typedef VkFlags VkVideoEncodeH264CreateFlagsEXT; - typedef enum VkVideoEncodeH264RateControlStructureFlagBitsEXT { VK_VIDEO_ENCODE_H264_RATE_CONTROL_STRUCTURE_UNKNOWN_EXT = 0, VK_VIDEO_ENCODE_H264_RATE_CONTROL_STRUCTURE_FLAT_BIT_EXT = 0x00000001, @@ -544,7 +538,7 @@ typedef enum VkVideoEncodeH264RateControlStructureFlagBitsEXT { typedef VkFlags VkVideoEncodeH264RateControlStructureFlagsEXT; typedef struct VkVideoEncodeH264CapabilitiesEXT { VkStructureType sType; - const void* pNext; + void* pNext; VkVideoEncodeH264CapabilityFlagsEXT flags; VkVideoEncodeH264InputModeFlagsEXT inputModeFlags; VkVideoEncodeH264OutputModeFlagsEXT outputModeFlags; @@ -556,17 +550,8 @@ typedef struct VkVideoEncodeH264CapabilitiesEXT { uint32_t maxBitsPerMbDenom; uint32_t log2MaxMvLengthHorizontal; uint32_t log2MaxMvLengthVertical; - VkExtensionProperties stdExtensionVersion; } VkVideoEncodeH264CapabilitiesEXT; -typedef struct VkVideoEncodeH264SessionCreateInfoEXT { - VkStructureType sType; - const void* pNext; - VkVideoEncodeH264CreateFlagsEXT flags; - VkExtent2D maxPictureSizeInMbs; - const VkExtensionProperties* pStdExtensionVersion; -} VkVideoEncodeH264SessionCreateInfoEXT; - typedef struct VkVideoEncodeH264SessionParametersAddInfoEXT { VkStructureType sType; const void* pNext; @@ -674,7 +659,7 @@ typedef struct VkVideoEncodeH264RateControlLayerInfoEXT { #define VK_EXT_video_encode_h265 1 #include "vk_video/vulkan_video_codec_h265std.h" #include "vk_video/vulkan_video_codec_h265std_encode.h" -#define VK_EXT_VIDEO_ENCODE_H265_SPEC_VERSION 5 +#define VK_EXT_VIDEO_ENCODE_H265_SPEC_VERSION 7 #define VK_EXT_VIDEO_ENCODE_H265_EXTENSION_NAME "VK_EXT_video_encode_h265" typedef enum VkVideoEncodeH265CapabilityFlagBitsEXT { @@ -688,20 +673,22 @@ typedef enum VkVideoEncodeH265CapabilityFlagBitsEXT { VK_VIDEO_ENCODE_H265_CAPABILITY_LOG2_PARALLEL_MERGE_LEVEL_MINUS2_BIT_EXT = 0x00000080, VK_VIDEO_ENCODE_H265_CAPABILITY_SIGN_DATA_HIDING_ENABLED_BIT_EXT = 0x00000100, VK_VIDEO_ENCODE_H265_CAPABILITY_TRANSFORM_SKIP_ENABLED_BIT_EXT = 0x00000200, - VK_VIDEO_ENCODE_H265_CAPABILITY_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_BIT_EXT = 0x00000400, - VK_VIDEO_ENCODE_H265_CAPABILITY_WEIGHTED_PRED_BIT_EXT = 0x00000800, - VK_VIDEO_ENCODE_H265_CAPABILITY_WEIGHTED_BIPRED_BIT_EXT = 0x00001000, - VK_VIDEO_ENCODE_H265_CAPABILITY_WEIGHTED_PRED_NO_TABLE_BIT_EXT = 0x00002000, - VK_VIDEO_ENCODE_H265_CAPABILITY_TRANSQUANT_BYPASS_ENABLED_BIT_EXT = 0x00004000, - VK_VIDEO_ENCODE_H265_CAPABILITY_ENTROPY_CODING_SYNC_ENABLED_BIT_EXT = 0x00008000, - VK_VIDEO_ENCODE_H265_CAPABILITY_DEBLOCKING_FILTER_OVERRIDE_ENABLED_BIT_EXT = 0x00010000, - VK_VIDEO_ENCODE_H265_CAPABILITY_MULTIPLE_TILE_PER_FRAME_BIT_EXT = 0x00020000, - VK_VIDEO_ENCODE_H265_CAPABILITY_MULTIPLE_SLICE_PER_TILE_BIT_EXT = 0x00040000, - VK_VIDEO_ENCODE_H265_CAPABILITY_MULTIPLE_TILE_PER_SLICE_BIT_EXT = 0x00080000, - VK_VIDEO_ENCODE_H265_CAPABILITY_SLICE_SEGMENT_CTB_COUNT_BIT_EXT = 0x00100000, - VK_VIDEO_ENCODE_H265_CAPABILITY_ROW_UNALIGNED_SLICE_SEGMENT_BIT_EXT = 0x00200000, - VK_VIDEO_ENCODE_H265_CAPABILITY_DEPENDENT_SLICE_SEGMENT_BIT_EXT = 0x00400000, - VK_VIDEO_ENCODE_H265_CAPABILITY_DIFFERENT_SLICE_TYPE_BIT_EXT = 0x00800000, + VK_VIDEO_ENCODE_H265_CAPABILITY_TRANSFORM_SKIP_DISABLED_BIT_EXT = 0x00000400, + VK_VIDEO_ENCODE_H265_CAPABILITY_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT_BIT_EXT = 0x00000800, + VK_VIDEO_ENCODE_H265_CAPABILITY_WEIGHTED_PRED_BIT_EXT = 0x00001000, + VK_VIDEO_ENCODE_H265_CAPABILITY_WEIGHTED_BIPRED_BIT_EXT = 0x00002000, + VK_VIDEO_ENCODE_H265_CAPABILITY_WEIGHTED_PRED_NO_TABLE_BIT_EXT = 0x00004000, + VK_VIDEO_ENCODE_H265_CAPABILITY_TRANSQUANT_BYPASS_ENABLED_BIT_EXT = 0x00008000, + VK_VIDEO_ENCODE_H265_CAPABILITY_ENTROPY_CODING_SYNC_ENABLED_BIT_EXT = 0x00010000, + VK_VIDEO_ENCODE_H265_CAPABILITY_DEBLOCKING_FILTER_OVERRIDE_ENABLED_BIT_EXT = 0x00020000, + VK_VIDEO_ENCODE_H265_CAPABILITY_MULTIPLE_TILE_PER_FRAME_BIT_EXT = 0x00040000, + VK_VIDEO_ENCODE_H265_CAPABILITY_MULTIPLE_SLICE_PER_TILE_BIT_EXT = 0x00080000, + VK_VIDEO_ENCODE_H265_CAPABILITY_MULTIPLE_TILE_PER_SLICE_BIT_EXT = 0x00100000, + VK_VIDEO_ENCODE_H265_CAPABILITY_SLICE_SEGMENT_CTB_COUNT_BIT_EXT = 0x00200000, + VK_VIDEO_ENCODE_H265_CAPABILITY_ROW_UNALIGNED_SLICE_SEGMENT_BIT_EXT = 0x00400000, + VK_VIDEO_ENCODE_H265_CAPABILITY_DEPENDENT_SLICE_SEGMENT_BIT_EXT = 0x00800000, + VK_VIDEO_ENCODE_H265_CAPABILITY_DIFFERENT_SLICE_TYPE_BIT_EXT = 0x01000000, + VK_VIDEO_ENCODE_H265_CAPABILITY_B_FRAME_IN_L1_LIST_BIT_EXT = 0x02000000, VK_VIDEO_ENCODE_H265_CAPABILITY_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF } VkVideoEncodeH265CapabilityFlagBitsEXT; typedef VkFlags VkVideoEncodeH265CapabilityFlagsEXT; @@ -721,7 +708,6 @@ typedef enum VkVideoEncodeH265OutputModeFlagBitsEXT { VK_VIDEO_ENCODE_H265_OUTPUT_MODE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF } VkVideoEncodeH265OutputModeFlagBitsEXT; typedef VkFlags VkVideoEncodeH265OutputModeFlagsEXT; -typedef VkFlags VkVideoEncodeH265CreateFlagsEXT; typedef enum VkVideoEncodeH265CtbSizeFlagBitsEXT { VK_VIDEO_ENCODE_H265_CTB_SIZE_16_BIT_EXT = 0x00000001, @@ -749,7 +735,7 @@ typedef enum VkVideoEncodeH265RateControlStructureFlagBitsEXT { typedef VkFlags VkVideoEncodeH265RateControlStructureFlagsEXT; typedef struct VkVideoEncodeH265CapabilitiesEXT { VkStructureType sType; - const void* pNext; + void* pNext; VkVideoEncodeH265CapabilityFlagsEXT flags; VkVideoEncodeH265InputModeFlagsEXT inputModeFlags; VkVideoEncodeH265OutputModeFlagsEXT outputModeFlags; @@ -770,16 +756,8 @@ typedef struct VkVideoEncodeH265CapabilitiesEXT { uint8_t maxDiffCuQpDeltaDepth; uint8_t minMaxNumMergeCand; uint8_t maxMaxNumMergeCand; - VkExtensionProperties stdExtensionVersion; } VkVideoEncodeH265CapabilitiesEXT; -typedef struct VkVideoEncodeH265SessionCreateInfoEXT { - VkStructureType sType; - const void* pNext; - VkVideoEncodeH265CreateFlagsEXT flags; - const VkExtensionProperties* pStdExtensionVersion; -} VkVideoEncodeH265SessionCreateInfoEXT; - typedef struct VkVideoEncodeH265SessionParametersAddInfoEXT { VkStructureType sType; const void* pNext; @@ -891,7 +869,7 @@ typedef struct VkVideoEncodeH265RateControlLayerInfoEXT { #define VK_EXT_video_decode_h264 1 #include "vk_video/vulkan_video_codec_h264std_decode.h" -#define VK_EXT_VIDEO_DECODE_H264_SPEC_VERSION 3 +#define VK_EXT_VIDEO_DECODE_H264_SPEC_VERSION 5 #define VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME "VK_EXT_video_decode_h264" typedef enum VkVideoDecodeH264PictureLayoutFlagBitsEXT { @@ -901,7 +879,6 @@ typedef enum VkVideoDecodeH264PictureLayoutFlagBitsEXT { VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF } VkVideoDecodeH264PictureLayoutFlagBitsEXT; typedef VkFlags VkVideoDecodeH264PictureLayoutFlagsEXT; -typedef VkFlags VkVideoDecodeH264CreateFlagsEXT; typedef struct VkVideoDecodeH264ProfileEXT { VkStructureType sType; const void* pNext; @@ -910,20 +887,12 @@ typedef struct VkVideoDecodeH264ProfileEXT { } VkVideoDecodeH264ProfileEXT; typedef struct VkVideoDecodeH264CapabilitiesEXT { - VkStructureType sType; - void* pNext; - uint32_t maxLevel; - VkOffset2D fieldOffsetGranularity; - VkExtensionProperties stdExtensionVersion; + VkStructureType sType; + void* pNext; + StdVideoH264Level maxLevel; + VkOffset2D fieldOffsetGranularity; } VkVideoDecodeH264CapabilitiesEXT; -typedef struct VkVideoDecodeH264SessionCreateInfoEXT { - VkStructureType sType; - const void* pNext; - VkVideoDecodeH264CreateFlagsEXT flags; - const VkExtensionProperties* pStdExtensionVersion; -} VkVideoDecodeH264SessionCreateInfoEXT; - typedef struct VkVideoDecodeH264SessionParametersAddInfoEXT { VkStructureType sType; const void* pNext; @@ -965,9 +934,8 @@ typedef struct VkVideoDecodeH264DpbSlotInfoEXT { #define VK_EXT_video_decode_h265 1 #include "vk_video/vulkan_video_codec_h265std_decode.h" -#define VK_EXT_VIDEO_DECODE_H265_SPEC_VERSION 1 +#define VK_EXT_VIDEO_DECODE_H265_SPEC_VERSION 3 #define VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME "VK_EXT_video_decode_h265" -typedef VkFlags VkVideoDecodeH265CreateFlagsEXT; typedef struct VkVideoDecodeH265ProfileEXT { VkStructureType sType; const void* pNext; @@ -975,22 +943,16 @@ typedef struct VkVideoDecodeH265ProfileEXT { } VkVideoDecodeH265ProfileEXT; typedef struct VkVideoDecodeH265CapabilitiesEXT { - VkStructureType sType; - void* pNext; - uint32_t maxLevel; - VkExtensionProperties stdExtensionVersion; + VkStructureType sType; + void* pNext; + StdVideoH265Level maxLevel; } VkVideoDecodeH265CapabilitiesEXT; -typedef struct VkVideoDecodeH265SessionCreateInfoEXT { - VkStructureType sType; - const void* pNext; - VkVideoDecodeH265CreateFlagsEXT flags; - const VkExtensionProperties* pStdExtensionVersion; -} VkVideoDecodeH265SessionCreateInfoEXT; - typedef struct VkVideoDecodeH265SessionParametersAddInfoEXT { VkStructureType sType; const void* pNext; + uint32_t vpsStdCount; + const StdVideoH265VideoParameterSet* pVpsStd; uint32_t spsStdCount; const StdVideoH265SequenceParameterSet* pSpsStd; uint32_t ppsStdCount; @@ -1000,6 +962,7 @@ typedef struct VkVideoDecodeH265SessionParametersAddInfoEXT { typedef struct VkVideoDecodeH265SessionParametersCreateInfoEXT { VkStructureType sType; const void* pNext; + uint32_t maxVpsStdCount; uint32_t maxSpsStdCount; uint32_t maxPpsStdCount; const VkVideoDecodeH265SessionParametersAddInfoEXT* pParametersAddInfo; diff --git a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h index 00edc6b1..80e3c4e2 100644 --- a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h +++ b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h @@ -72,7 +72,7 @@ extern "C" { #define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0 // Version of this file -#define VK_HEADER_VERSION 208 +#define VK_HEADER_VERSION 212 // Complete version of this file #define VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION(0, 1, 3, VK_HEADER_VERSION) @@ -496,97 +496,88 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_CAPABILITIES_EXT = 1000038000, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_CREATE_INFO_EXT = 1000038001, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_CREATE_INFO_EXT = 1000038001, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_CREATE_INFO_EXT = 1000038002, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_ADD_INFO_EXT = 1000038002, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_SESSION_PARAMETERS_ADD_INFO_EXT = 1000038003, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_VCL_FRAME_INFO_EXT = 1000038003, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_VCL_FRAME_INFO_EXT = 1000038004, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_DPB_SLOT_INFO_EXT = 1000038004, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_DPB_SLOT_INFO_EXT = 1000038005, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_NALU_SLICE_EXT = 1000038005, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_NALU_SLICE_EXT = 1000038006, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_EMIT_PICTURE_PARAMETERS_EXT = 1000038006, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_EMIT_PICTURE_PARAMETERS_EXT = 1000038007, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_EXT = 1000038007, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_EXT = 1000038008, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_RATE_CONTROL_INFO_EXT = 1000038008, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_RATE_CONTROL_INFO_EXT = 1000038009, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_RATE_CONTROL_LAYER_INFO_EXT = 1000038009, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_RATE_CONTROL_LAYER_INFO_EXT = 1000038010, -#endif -#ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_REFERENCE_LISTS_EXT = 1000038011, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_REFERENCE_LISTS_EXT = 1000038010, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_CAPABILITIES_EXT = 1000039000, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_CREATE_INFO_EXT = 1000039001, -#endif -#ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_CREATE_INFO_EXT = 1000039002, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_CREATE_INFO_EXT = 1000039001, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_ADD_INFO_EXT = 1000039003, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_ADD_INFO_EXT = 1000039002, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_VCL_FRAME_INFO_EXT = 1000039004, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_VCL_FRAME_INFO_EXT = 1000039003, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_DPB_SLOT_INFO_EXT = 1000039005, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_DPB_SLOT_INFO_EXT = 1000039004, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_NALU_SLICE_SEGMENT_EXT = 1000039006, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_NALU_SLICE_SEGMENT_EXT = 1000039005, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_EMIT_PICTURE_PARAMETERS_EXT = 1000039007, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_EMIT_PICTURE_PARAMETERS_EXT = 1000039006, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_PROFILE_EXT = 1000039008, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_PROFILE_EXT = 1000039007, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_REFERENCE_LISTS_EXT = 1000039009, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_REFERENCE_LISTS_EXT = 1000039008, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_RATE_CONTROL_INFO_EXT = 1000039010, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_RATE_CONTROL_INFO_EXT = 1000039009, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_RATE_CONTROL_LAYER_INFO_EXT = 1000039011, + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_RATE_CONTROL_LAYER_INFO_EXT = 1000039010, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_EXT = 1000040000, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_CREATE_INFO_EXT = 1000040001, + VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PICTURE_INFO_EXT = 1000040001, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PICTURE_INFO_EXT = 1000040002, + VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_MVC_EXT = 1000040002, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_MVC_EXT = 1000040003, + VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_EXT = 1000040003, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_EXT = 1000040004, + VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_EXT = 1000040004, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_EXT = 1000040005, + VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_EXT = 1000040005, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_ADD_INFO_EXT = 1000040006, -#endif -#ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_EXT = 1000040007, + VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_EXT = 1000040006, #endif VK_STRUCTURE_TYPE_TEXTURE_LOD_GATHER_FORMAT_PROPERTIES_AMD = 1000041000, VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR = 1000044006, @@ -751,22 +742,19 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_EXT = 1000187000, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_CREATE_INFO_EXT = 1000187001, -#endif -#ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_EXT = 1000187002, + VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_CREATE_INFO_EXT = 1000187001, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_ADD_INFO_EXT = 1000187003, + VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_SESSION_PARAMETERS_ADD_INFO_EXT = 1000187002, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_EXT = 1000187004, + VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_EXT = 1000187003, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PICTURE_INFO_EXT = 1000187005, + VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PICTURE_INFO_EXT = 1000187004, #endif #ifdef VK_ENABLE_BETA_EXTENSIONS - VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_DPB_SLOT_INFO_EXT = 1000187006, + VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_DPB_SLOT_INFO_EXT = 1000187005, #endif VK_STRUCTURE_TYPE_DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR = 1000174000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_KHR = 1000388000, @@ -885,6 +873,9 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV = 1000300001, VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_2_NV = 1000314008, VK_STRUCTURE_TYPE_CHECKPOINT_DATA_2_NV = 1000314009, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_FEATURES_EXT = 1000320000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_PROPERTIES_EXT = 1000320001, + VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT = 1000320002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR = 1000323000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_ENUMS_PROPERTIES_NV = 1000326000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_ENUMS_FEATURES_NV = 1000326001, @@ -935,10 +926,12 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_SCREEN_SURFACE_CREATE_INFO_QNX = 1000378000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT = 1000381000, VK_STRUCTURE_TYPE_PIPELINE_COLOR_WRITE_CREATE_INFO_EXT = 1000381001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT = 1000382000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_MIN_LOD_FEATURES_EXT = 1000391000, VK_STRUCTURE_TYPE_IMAGE_VIEW_MIN_LOD_CREATE_INFO_EXT = 1000391001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT = 1000392000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT = 1000392001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT = 1000393000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT = 1000411000, VK_STRUCTURE_TYPE_SAMPLER_BORDER_COLOR_COMPONENT_MAPPING_CREATE_INFO_EXT = 1000411001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PAGEABLE_DEVICE_LOCAL_MEMORY_FEATURES_EXT = 1000412000, @@ -1602,6 +1595,7 @@ typedef enum VkQueryType { #ifdef VK_ENABLE_BETA_EXTENSIONS VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR = 1000299000, #endif + VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT = 1000382000, VK_QUERY_TYPE_MAX_ENUM = 0x7FFFFFFF } VkQueryType; @@ -2080,6 +2074,7 @@ typedef enum VkImageCreateFlagBits { VK_IMAGE_CREATE_CORNER_SAMPLED_BIT_NV = 0x00002000, VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT = 0x00001000, VK_IMAGE_CREATE_SUBSAMPLED_BIT_EXT = 0x00004000, + VK_IMAGE_CREATE_2D_VIEW_COMPATIBLE_BIT_EXT = 0x00020000, VK_IMAGE_CREATE_FRAGMENT_DENSITY_MAP_OFFSET_BIT_QCOM = 0x00008000, VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT_KHR = VK_IMAGE_CREATE_SPLIT_INSTANCE_BIND_REGIONS_BIT, VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT_KHR = VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT, @@ -2378,6 +2373,8 @@ typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR = 0x00000080, VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV = 0x00040000, VK_PIPELINE_CREATE_LIBRARY_BIT_KHR = 0x00000800, + VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT = 0x00800000, + VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT = 0x00000400, VK_PIPELINE_CREATE_RAY_TRACING_ALLOW_MOTION_BIT_NV = 0x00100000, VK_PIPELINE_CREATE_DISPATCH_BASE = VK_PIPELINE_CREATE_DISPATCH_BASE_BIT, VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR, @@ -2454,6 +2451,11 @@ typedef enum VkPipelineColorBlendStateCreateFlagBits { } VkPipelineColorBlendStateCreateFlagBits; typedef VkFlags VkPipelineColorBlendStateCreateFlags; typedef VkFlags VkPipelineDynamicStateCreateFlags; + +typedef enum VkPipelineLayoutCreateFlagBits { + VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT = 0x00000002, + VK_PIPELINE_LAYOUT_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkPipelineLayoutCreateFlagBits; typedef VkFlags VkPipelineLayoutCreateFlags; typedef VkFlags VkShaderStageFlags; @@ -13222,6 +13224,39 @@ typedef struct VkDeviceDiagnosticsConfigCreateInfoNV { #define VK_QCOM_RENDER_PASS_STORE_OPS_EXTENSION_NAME "VK_QCOM_render_pass_store_ops" +#define VK_EXT_graphics_pipeline_library 1 +#define VK_EXT_GRAPHICS_PIPELINE_LIBRARY_SPEC_VERSION 1 +#define VK_EXT_GRAPHICS_PIPELINE_LIBRARY_EXTENSION_NAME "VK_EXT_graphics_pipeline_library" + +typedef enum VkGraphicsPipelineLibraryFlagBitsEXT { + VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT = 0x00000001, + VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT = 0x00000002, + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT = 0x00000004, + VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT = 0x00000008, + VK_GRAPHICS_PIPELINE_LIBRARY_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkGraphicsPipelineLibraryFlagBitsEXT; +typedef VkFlags VkGraphicsPipelineLibraryFlagsEXT; +typedef struct VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 graphicsPipelineLibrary; +} VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT; + +typedef struct VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT { + VkStructureType sType; + void* pNext; + VkBool32 graphicsPipelineLibraryFastLinking; + VkBool32 graphicsPipelineLibraryIndependentInterpolationDecoration; +} VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT; + +typedef struct VkGraphicsPipelineLibraryCreateInfoEXT { + VkStructureType sType; + void* pNext; + VkGraphicsPipelineLibraryFlagsEXT flags; +} VkGraphicsPipelineLibraryCreateInfoEXT; + + + #define VK_NV_fragment_shading_rate_enums 1 #define VK_NV_FRAGMENT_SHADING_RATE_ENUMS_SPEC_VERSION 1 #define VK_NV_FRAGMENT_SHADING_RATE_ENUMS_EXTENSION_NAME "VK_NV_fragment_shading_rate_enums" @@ -13727,6 +13762,19 @@ VKAPI_ATTR void VKAPI_CALL vkCmdSetColorWrite #endif +#define VK_EXT_primitives_generated_query 1 +#define VK_EXT_PRIMITIVES_GENERATED_QUERY_SPEC_VERSION 1 +#define VK_EXT_PRIMITIVES_GENERATED_QUERY_EXTENSION_NAME "VK_EXT_primitives_generated_query" +typedef struct VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 primitivesGeneratedQuery; + VkBool32 primitivesGeneratedQueryWithRasterizerDiscard; + VkBool32 primitivesGeneratedQueryWithNonZeroStreams; +} VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT; + + + #define VK_EXT_global_priority_query 1 #define VK_EXT_GLOBAL_PRIORITY_QUERY_SPEC_VERSION 1 #define VK_EXT_GLOBAL_PRIORITY_QUERY_EXTENSION_NAME "VK_EXT_global_priority_query" @@ -13803,6 +13851,18 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDrawMultiIndexedEXT( #endif +#define VK_EXT_image_2d_view_of_3d 1 +#define VK_EXT_IMAGE_2D_VIEW_OF_3D_SPEC_VERSION 1 +#define VK_EXT_IMAGE_2D_VIEW_OF_3D_EXTENSION_NAME "VK_EXT_image_2d_view_of_3d" +typedef struct VkPhysicalDeviceImage2DViewOf3DFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 image2DViewOf3D; + VkBool32 sampler2DViewOf3D; +} VkPhysicalDeviceImage2DViewOf3DFeaturesEXT; + + + #define VK_EXT_load_store_op_none 1 #define VK_EXT_LOAD_STORE_OP_NONE_SPEC_VERSION 1 #define VK_EXT_LOAD_STORE_OP_NONE_EXTENSION_NAME "VK_EXT_load_store_op_none" diff --git a/icd/api/include/pipeline_compiler.h b/icd/api/include/pipeline_compiler.h index bc74adb3..9170adea 100644 --- a/icd/api/include/pipeline_compiler.h +++ b/icd/api/include/pipeline_compiler.h @@ -127,6 +127,7 @@ class PipelineCompiler const VkShaderModuleCreateFlags flags, size_t codeSize, const void* pCode, + const bool adaptForFaskLink, PipelineBinaryCache* pBinaryCache, PipelineCreationFeedback* pFeedback, ShaderModuleHandle* pShaderModule); @@ -242,8 +243,10 @@ class PipelineCompiler void DestroyPipelineBinaryCache(); - void BuildPipelineInternalBufferData(GraphicsPipelineBinaryCreateInfo* pCreateInfo, - PipelineInternalBufferInfo* pInternalBufferInfo); + void BuildPipelineInternalBufferData( + const PipelineLayout* pPipelineLayout, + GraphicsPipelineBinaryCreateInfo* pCreateInfo, + PipelineInternalBufferInfo* pInternalBufferInfo); void GetComputePipelineCacheId( uint32_t deviceIdx, diff --git a/icd/api/include/vk_cmd_pool.h b/icd/api/include/vk_cmd_pool.h index 1dba9c75..19a38791 100644 --- a/icd/api/include/vk_cmd_pool.h +++ b/icd/api/include/vk_cmd_pool.h @@ -85,6 +85,8 @@ class CmdPool final : public NonDispatchable bool IsProtected() const { return m_flags.isProtected ? true : false; } + bool IsResetCmdBuffer() const { return m_flags.isResetCmdBuffer; } + Pal::Result MarkCmdBufBegun(CmdBuffer* pCmdBuffer); void UnmarkCmdBufBegun(CmdBuffer* pCmdBuffer); @@ -100,20 +102,21 @@ class CmdPool final : public NonDispatchable VkCommandPoolCreateFlags flags, bool sharedCmdAllocator); - VkResult ResetCmdAllocator(); + VkResult ResetCmdAllocator(bool releaseResources); Device* m_pDevice; Pal::ICmdAllocator* m_pPalCmdAllocators[MaxPalDevices]; const VkAllocationCallbacks* m_pAllocator; const uint32_t m_queueFamilyIndex; - const bool m_sharedCmdAllocator; union { struct { - uint32 isProtected : 1; - uint32 reserved : 31; + uint32 isProtected : 1; + uint32 sharedCmdAllocator : 1; + uint32 isResetCmdBuffer : 1; + uint32 reserved : 29; }; uint32 u32All; } m_flags; diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h index 3a75dab0..7978cd2a 100644 --- a/icd/api/include/vk_cmdbuffer.h +++ b/icd/api/include/vk_cmdbuffer.h @@ -88,7 +88,12 @@ class TimestampQueryPool; class SqttCmdBufferState; class QueryPool; -constexpr uint8_t DefaultStencilOpValue = 1; +constexpr uint8_t DefaultStencilOpValue = 1; +constexpr uint8_t DefaultRefPicIndexValue = 0xFF; +constexpr uint8_t DefaultIndex7BitsValue = 0x7F; +constexpr uint8_t DefaultTidValue = 0xFF; +constexpr uint8_t AssociatedFlag = 31; +constexpr uint32_t DefaultAssociatedFlagValue = (1 << AssociatedFlag); // Internal API pipeline binding points enum PipelineBindPoint @@ -821,6 +826,10 @@ class CmdBuffer VK_FORCEINLINE VirtualStackAllocator* GetStackAllocator() { return m_pStackAllocator; } + void TranslateBarrierInfoToAcqRel( + const Pal::BarrierInfo& barrierInfo, + uint32_t deviceMask); + void PalCmdBarrier( const Pal::BarrierInfo& info, uint32_t deviceMask); @@ -831,6 +840,16 @@ class CmdBuffer const Image** const pTransitionImages, uint32_t deviceMask); + void PalCmdReleaseThenAcquire( + const Pal::AcquireReleaseInfo& info, + uint32_t deviceMask); + + void PalCmdReleaseThenAcquire( + Pal::AcquireReleaseInfo* pAcquireReleaseInfo, + Pal::ImgBarrier* const pImageBarriers, + const Image** const pTransitionImages, + uint32_t deviceMask); + Pal::Result PalCmdBufferBegin( const Pal::CmdBufferBuildInfo& cmdInfo); @@ -1169,6 +1188,7 @@ class CmdBuffer void RPEndSubpass(); void RPResolveAttachments(uint32_t count, const RPResolveInfo* pResolves); void RPSyncPoint(const RPSyncPointInfo& syncPoint, VirtualStackFrame* pVirtStack); + void RPSyncPointLegacy(const RPSyncPointInfo& syncPoint, VirtualStackFrame* pVirtStack); void RPLoadOpClearColor(uint32_t count, const RPLoadOpClearInfo* pClears); void RPLoadOpClearDepthStencil(uint32_t count, const RPLoadOpClearInfo* pClears); void RPBindTargets(const RPBindTargetsInfo& targets); @@ -1231,7 +1251,7 @@ class CmdBuffer void DbgCmdBarrier(bool preCmd); #endif - template + template void BindDescriptorSets( VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, @@ -1245,11 +1265,10 @@ void SetUserDataPipelineLayout( uint32_t firstSet, uint32_t setCount, const PipelineLayout* pLayout, - PipelineBindState* pBindState, const Pal::PipelineBindPoint palBindPoint, const PipelineBindPoint apiBindPoint); - template + template static VKAPI_ATTR void VKAPI_CALL CmdBindDescriptorSets( VkCommandBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, @@ -1317,12 +1336,13 @@ void SetUserDataPipelineLayout( uint32_t disableResetReleaseResources : 1; uint32_t subpassLoadOpClearsBoundAttachments : 1; uint32_t preBindDefaultState : 1; - uint32_t hasReleaseAcquire : 1; + uint32_t useReleaseAcquire : 1; uint32_t useSplitReleaseAcquire : 1; uint32_t reserved2 : 3; uint32_t isRenderingSuspended : 1; uint32_t reserved4 : 1; - uint32_t reserved : 15; + uint32_t reserved5 : 1; + uint32_t reserved : 14; }; }; diff --git a/icd/api/include/vk_conv.h b/icd/api/include/vk_conv.h index 665b2859..e325c23c 100644 --- a/icd/api/include/vk_conv.h +++ b/icd/api/include/vk_conv.h @@ -2519,7 +2519,8 @@ inline VkCompositeAlphaFlagsKHR PalToVkSupportedCompositeAlphaMode(uint32 compos // Converts Vulkan image creation flags to PAL image creation flags (unfortunately, PAL doesn't define a dedicated type // for the image creation flags so we have to return the constructed flag set as a uint32_t) inline uint32_t VkToPalImageCreateFlags(VkImageCreateFlags imageCreateFlags, - VkFormat format) + VkFormat format, + VkImageUsageFlags imageUsage) { Pal::ImageCreateFlags flags = {}; @@ -3603,13 +3604,13 @@ class PhysicalDevice; // ===================================================================================================================== VkResult InitializeUberFetchShaderFormatTable( - PhysicalDevice* pPhysicalDevice, + const PhysicalDevice* pPhysicalDevice, UberFetchShaderFormatInfoMap* pFormatInfoMap); UberFetchShaderFormatInfo GetUberFetchShaderFormatInfo( - UberFetchShaderFormatInfoMap* pFormatInfoMap, - VkFormat vkFormat, - bool isZeroStride); + const UberFetchShaderFormatInfoMap* pFormatInfoMap, + const VkFormat vkFormat, + const bool isZeroStride); } // namespace vk diff --git a/icd/api/include/vk_descriptor_set.h b/icd/api/include/vk_descriptor_set.h index 3beb7af5..56431e45 100644 --- a/icd/api/include/vk_descriptor_set.h +++ b/icd/api/include/vk_descriptor_set.h @@ -53,7 +53,6 @@ class BufferView; struct DescriptorAddr { Pal::gpusize staticGpuAddr; - Pal::gpusize fmaskGpuAddr; uint32_t* staticCpuAddr; uint32_t* fmaskCpuAddr; }; @@ -92,11 +91,6 @@ class DescriptorSet final : public NonDispatchable static PFN_vkUpdateDescriptorSets GetUpdateDescriptorSetsFunc(const Device* pDevice); - template - static PFN_vkUpdateDescriptorSets GetUpdateDescriptorSetsFunc(const Device* pDevice); - template + uint32_t numPalDevices> static VKAPI_ATTR void VKAPI_CALL UpdateDescriptorSets( VkDevice device, uint32_t descriptorWriteCount, @@ -332,7 +322,6 @@ class DescriptorUpdate size_t fmaskDescSize, size_t samplerDescSize, size_t bufferDescSize, - bool fmaskBasedMsaaReadEnabled, uint32_t numPalDevices> static void WriteDescriptorSets( const Device* pDevice, @@ -340,7 +329,7 @@ class DescriptorUpdate uint32_t descriptorWriteCount, const VkWriteDescriptorSet* pDescriptorWrites); - template + template static void CopyDescriptorSets( const Device* pDevice, uint32_t deviceIdx, diff --git a/icd/api/include/vk_descriptor_set_layout.h b/icd/api/include/vk_descriptor_set_layout.h index 890cbe58..1f36a11a 100644 --- a/icd/api/include/vk_descriptor_set_layout.h +++ b/icd/api/include/vk_descriptor_set_layout.h @@ -134,6 +134,8 @@ class DescriptorSetLayout final : public NonDispatchable static PfnUpdateEntry GetUpdateEntryFunc( - const Device* pDevice, VkDescriptorType descriptorType, const DescriptorSetLayout::BindingInfo& dstBinding); @@ -119,49 +118,48 @@ class DescriptorUpdateTemplate final : public NonDispatchable + template static void UpdateEntrySampledImage( - const Device* pDevice, - VkDescriptorSet descriptorSet, - const void* pDescriptorInfo, - const TemplateUpdateInfo& entry); + const Device* pDevice, + VkDescriptorSet descriptorSet, + const void* pDescriptorInfo, + const TemplateUpdateInfo& entry); template static void UpdateEntrySampler( - const Device* pDevice, - VkDescriptorSet descriptorSet, - const void* pDescriptorInfo, - const TemplateUpdateInfo& entry); + const Device* pDevice, + VkDescriptorSet descriptorSet, + const void* pDescriptorInfo, + const TemplateUpdateInfo& entry); template static void UpdateEntryBuffer( - const Device* pDevice, - VkDescriptorSet descriptorSet, - const void* pDescriptorInfo, - const TemplateUpdateInfo& entry); + const Device* pDevice, + VkDescriptorSet descriptorSet, + const void* pDescriptorInfo, + const TemplateUpdateInfo& entry); template static void UpdateEntryTexelBuffer( - const Device* pDevice, - VkDescriptorSet descriptorSet, - const void* pDescriptorInfo, - const TemplateUpdateInfo& entry); + const Device* pDevice, + VkDescriptorSet descriptorSet, + const void* pDescriptorInfo, + const TemplateUpdateInfo& entry); - template static void UpdateEntryCombinedImageSampler( - const Device* pDevice, - VkDescriptorSet descriptorSet, - const void* pDescriptorInfo, - const TemplateUpdateInfo& entry); + const Device* pDevice, + VkDescriptorSet descriptorSet, + const void* pDescriptorInfo, + const TemplateUpdateInfo& entry); template static void UpdateEntryInlineUniformBlock( - const Device* pDevice, - VkDescriptorSet descriptorSet, - const void* pDescriptorInfo, - const TemplateUpdateInfo& entry); + const Device* pDevice, + VkDescriptorSet descriptorSet, + const void* pDescriptorInfo, + const TemplateUpdateInfo& entry); VkPipelineBindPoint m_pipelineBindPoint; uint32_t m_numEntries; diff --git a/icd/api/include/vk_device.h b/icd/api/include/vk_device.h index f033d4d6..aa539d22 100644 --- a/icd/api/include/vk_device.h +++ b/icd/api/include/vk_device.h @@ -541,13 +541,10 @@ class Device uint32_t deviceMask, uint32_t heapIdx); - bool ShouldAddRemoteBackupHeap( - uint32_t deviceIdx, - uint32_t memoryTypeIdx, + bool OverallocationRequestedForPalHeap( uint32_t palHeapIdx) const { - return (m_perGpu[deviceIdx].pPhysicalDevice->ShouldAddRemoteBackupHeap(memoryTypeIdx) || - m_overallocationRequestedForPalHeap[palHeapIdx]); + return m_overallocationRequestedForPalHeap[palHeapIdx]; } const InternalPipeline& GetTimestampQueryCopyPipeline() const diff --git a/icd/api/include/vk_extensions.h b/icd/api/include/vk_extensions.h index de71382b..3c85a0b7 100644 --- a/icd/api/include/vk_extensions.h +++ b/icd/api/include/vk_extensions.h @@ -160,7 +160,7 @@ class Extensions const char* const* const extensionNames, uint32_t extensionNameCount, const Supported& supported, - Enabled& enabled) + Enabled* pEnabled) { bool invalidExtensionRequested = false; @@ -180,7 +180,7 @@ class Extensions if (strcmp(extensionNames[i], ext.extensionName) == 0) { - enabled.EnableExtension(id); + pEnabled->EnableExtension(id); break; } } @@ -283,6 +283,7 @@ class DeviceExtensions final : public Extensions KHR_MAINTENANCE4, KHR_MULTIVIEW, KHR_PIPELINE_EXECUTABLE_PROPERTIES, + KHR_PIPELINE_LIBRARY, KHR_RELAXED_BLOCK_LAYOUT, KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_SAMPLER_YCBCR_CONVERSION, @@ -329,6 +330,7 @@ class DeviceExtensions final : public Extensions EXT_EXTERNAL_MEMORY_HOST, EXT_GLOBAL_PRIORITY, EXT_GLOBAL_PRIORITY_QUERY, + EXT_GRAPHICS_PIPELINE_LIBRARY, EXT_HDR_METADATA, EXT_HOST_QUERY_RESET, EXT_IMAGE_ROBUSTNESS, diff --git a/icd/api/include/vk_graphics_pipeline_library.h b/icd/api/include/vk_graphics_pipeline_library.h new file mode 100644 index 00000000..8adb485a --- /dev/null +++ b/icd/api/include/vk_graphics_pipeline_library.h @@ -0,0 +1,102 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2021-2022 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ + +#ifndef __VK_GRAPHICS_PIPELINE_LIBRARY_H__ +#define __VK_GRAPHICS_PIPELINE_LIBRARY_H__ + +#pragma once + +#include "include/graphics_pipeline_common.h" +#include "include/vk_pipeline_cache.h" + +namespace vk +{ + +class GraphicsPipelineLibrary final : public GraphicsPipelineCommon, public NonDispatchable +{ +public: + static VkResult Create( + Device* pDevice, + PipelineCache* pPipelineCache, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline); + + VkResult Destroy( + Device* pDevice, + const VkAllocationCallbacks* pAllocator) override; + + const GraphicsPipelineObjectCreateInfo& GetPipelineObjectCreateInfo() const + { return m_objectCreateInfo; } + + const GraphicsPipelineBinaryCreateInfo& GetPipelineBinaryCreateInfo() const + { return *m_pBinaryCreateInfo; } + + VkGraphicsPipelineLibraryFlagsEXT GetLibraryFlags() const + { return m_pBinaryCreateInfo->libFlags; } + + uint32_t GetDynamicStates() const + { return m_objectCreateInfo.dynamicStates; } + + const ShaderModuleHandle* GetShaderModuleHandle(const ShaderStage stage) const; + +private: + PAL_DISALLOW_COPY_AND_ASSIGN(GraphicsPipelineLibrary); + + struct TempModuleState + { + ShaderStage stage; + bool needFreeBinaryOnly; + }; + + GraphicsPipelineLibrary( + Device* pDevice, + const GraphicsPipelineObjectCreateInfo& objectInfo, + const GraphicsPipelineBinaryCreateInfo* pBinaryInfo, + const GraphicsPipelineLibraryInfo& libInfo, + const uint64_t apiHash, + const ShaderModuleHandle* pTempModules, + const TempModuleState* pTempModuleStates, + PipelineLayout* pPipelineLayout); + + static void CreatePartialPipelineBinary( + const Device* pDevice, + const GraphicsPipelineLibraryInfo* pLibInfo, + const GraphicsPipelineShaderStageInfo* pShaderStageInfo, + const bool disableRasterization, + GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, + ShaderModuleHandle* pTempModules, + TempModuleState* pTempModuleStages); + + const GraphicsPipelineObjectCreateInfo m_objectCreateInfo; + const GraphicsPipelineBinaryCreateInfo* m_pBinaryCreateInfo; + const GraphicsPipelineLibraryInfo m_libInfo; + ShaderModuleHandle m_tempModules[ShaderStage::ShaderStageGfxCount]; + TempModuleState m_tempModuleStates[ShaderStage::ShaderStageGfxCount]; +}; + +} + +#endif/*__VK_GRAPHICS_PIPELINE_LIBRARY_H__*/ diff --git a/icd/api/include/vk_image_view.h b/icd/api/include/vk_image_view.h index 3d2e4eed..72ca5827 100644 --- a/icd/api/include/vk_image_view.h +++ b/icd/api/include/vk_image_view.h @@ -127,6 +127,7 @@ class ImageView final : public NonDispatchable const RuntimeSettings& settings); static Pal::Result BuildDepthStencilView( + const Device* pDevice, const Pal::IDevice* pPalDevice, const Pal::IImage* pPalImage, VkImageViewType viewType, diff --git a/icd/api/include/vk_physical_device.h b/icd/api/include/vk_physical_device.h index 381320e9..29a9b253 100644 --- a/icd/api/include/vk_physical_device.h +++ b/icd/api/include/vk_physical_device.h @@ -92,6 +92,11 @@ struct PhysicalDeviceGpaProperties Pal::PerfExperimentProperties palProps; }; +// ===================================================================================================================== +// Shader stage bit that represents all graphics stages +static const uint32 ShaderStageAllGraphics = + VK_SHADER_STAGE_ALL_GRAPHICS; + // ===================================================================================================================== // Represents the Vulkan view of physical device. All Vulkan functions on the VkPhysicalDevice land in // this class. The class wraps a PAL IDevice and punts most functionality down to the next layer. @@ -760,9 +765,6 @@ class PhysicalDevice Pal::gpusize allocationSize, uint32_t heapIdx); - bool ShouldAddRemoteBackupHeap(uint32_t vkIndex) const - { return m_memoryVkIndexAddRemoteBackupHeap[vkIndex]; } - bool IsOverrideHeapChoiceToLocalWithinBudget(Pal::gpusize size) const; Util::IPlatformKey* GetPlatformKey() const { return m_pPlatformKey; } @@ -792,7 +794,6 @@ class PhysicalDevice uint32_t m_memoryTypeMask; uint32_t m_memoryTypeMaskForExternalSharing; - bool m_memoryVkIndexAddRemoteBackupHeap[VK_MAX_MEMORY_TYPES]; uint32_t m_memoryPalHeapToVkIndexBits[Pal::GpuHeapCount]; uint32_t m_memoryPalHeapToVkHeap[Pal::GpuHeapCount]; Pal::GpuHeap m_memoryVkIndexToPalHeap[VK_MAX_MEMORY_TYPES]; diff --git a/icd/api/include/vk_pipeline.h b/icd/api/include/vk_pipeline.h index 0d14625b..66cfcbcf 100644 --- a/icd/api/include/vk_pipeline.h +++ b/icd/api/include/vk_pipeline.h @@ -193,7 +193,7 @@ class Pipeline const Device* pDevice, const uint32_t stageCount, const VkPipelineShaderStageCreateInfo* pStages, - const bool duplicateExistingModules, + const bool isLibrary, uint32_t (*pfnGetOutputIdx)(const uint32_t inputIdx, const uint32_t stageIdx), ShaderStageInfo* pShaderStageInfo, diff --git a/icd/api/include/vk_pipeline_layout.h b/icd/api/include/vk_pipeline_layout.h index e3be6e3b..14233c62 100644 --- a/icd/api/include/vk_pipeline_layout.h +++ b/icd/api/include/vk_pipeline_layout.h @@ -45,10 +45,10 @@ class DescriptorSetLayout; // Determine mapping layout of the resouces used in shaders enum class PipelineLayoutScheme : uint32_t { - // Compact scheme make full use of all the user data registers and can achieve best performance in theory. + // Compact scheme makes full use of all the user data registers and can achieve best performance in theory. // See PipelineLayout::BuildCompactSchemeInfo() for more details Compact = 0, - // The searching path of resouce belongs to a specific binding is fixed in indirect scheme. + // The searching path of resource belongs to a specific binding is fixed in indirect scheme. // See PipelineLayout::BuildIndirectSchemeInfo() for more details Indirect }; @@ -79,6 +79,14 @@ struct UserDataLayout // Number of user data registers used for transform feedback uint32_t transformFeedbackRegCount; + // Base user data register index to use for the constant buffer used in uber-fetch shader + // The number of user data register used is always 2 + uint32_t uberFetchConstBufRegBase; + + // Base user data register indices to use for buffers storing specialization constants + uint32_t specConstBufVertexRegBase; + uint32_t specConstBufFragmentRegBase; + } compact; struct @@ -88,8 +96,8 @@ struct UserDataLayout uint32_t transformFeedbackRegBase; // Base user data register index to use for the pointers pointing to the buffers - // storing descriptor set bingding data. - // Each set occupy 2 entries: one for static and one for descriptor descriptors + // storing descriptor set binding data. + // Each set occupy 2 entries: one for static and one for dynamic descriptors // The total number of user data registers used is always MaxDescriptorSets * 2 * SetPtrRegCount uint32_t setBindingPtrRegBase; @@ -100,6 +108,10 @@ struct UserDataLayout // The size of buffer required to store push constants uint32_t pushConstSizeInDword; + // Base user data register index to use for the constant buffer used in uber-fetch shader + // The number of user data register used is always 2 + uint32_t uberFetchConstBufRegBase; + } indirect; }; }; @@ -122,6 +134,13 @@ class PipelineLayout final : public NonDispatchable - void FillDynamicSetNode( - const Vkgc::ResourceMappingNodeType type, + void BuildLlpcStaticMapping( + const DescriptorSetLayout* pLayout, const uint32_t visibility, const uint32_t setIndex, const DescriptorSetLayout::BindingInfo& binding, - const uint32_t userDataRegBase, - NodeType* pNode) const; + Vkgc::ResourceMappingNode* pNode, + Vkgc::StaticDescriptorValue* pDescriptorRangeValue, + uint32_t* pDescriptorRangeCount) const; - template - void BuildLlpcDynamicSetMapping( - const DescriptorSetLayout* pLayout, - const uint32_t visibility, - const uint32_t setIndex, - const uint32_t userDataRegBase, - NodeType* pNodes, - uint32_t* pNodeCount) const; + void BuildLlpcDynamicMapping( + const uint32_t setIndex, + const uint32_t userDataRegBase, + const DescriptorSetLayout::BindingInfo& binding, + Vkgc::ResourceMappingNode* pNode) const; void BuildLlpcVertexBufferTableMapping( const VbBindingInfo* pVbInfo, @@ -315,6 +323,13 @@ class PipelineLayout final : public NonDispatchableGetMemoryLayer(); size_t curCount, curDataSize; - result = PalToVkResult(Util::GetMemoryCacheLayerCurSize(pMemoryLayer, &curCount, &curDataSize)); - if ((result == VK_SUCCESS) && (curCount > 0)) + result = Util::GetMemoryCacheLayerCurSize(pMemoryLayer, &curCount, &curDataSize); + + if ((result == Pal::Result::Success) && (curCount > 0)) { Util::AutoBuffer cacheIds(curCount, &m_palAllocator); - result = PalToVkResult(Util::GetMemoryCacheLayerHashIds(pMemoryLayer, curCount, &cacheIds[0])); - if (result == VK_SUCCESS) + result = Util::GetMemoryCacheLayerHashIds(pMemoryLayer, curCount, &cacheIds[0]); + if (result == Pal::Result::Success) { for (uint32_t j = 0; j < curCount; j++) { size_t dataSize; const void* pBinaryCacheData; - result = PalToVkResult(ppSrcCaches[i]->LoadPipelineBinary(&cacheIds[j], &dataSize, &pBinaryCacheData)); - if (result == VK_SUCCESS) + result = ppSrcCaches[i]->LoadPipelineBinary(&cacheIds[j], &dataSize, &pBinaryCacheData); + if (result == Pal::Result::Success) { - result = PalToVkResult(StorePipelineBinary(&cacheIds[j], dataSize, pBinaryCacheData)); + result = StorePipelineBinary(&cacheIds[j], dataSize, pBinaryCacheData); FreeMem(const_cast(pBinaryCacheData)); - if (result != VK_SUCCESS) + + // Do not break for success cases or an already existing cache entry + if ((result != Pal::Result::Success) && (result != Pal::Result::AlreadyExists)) { break; } @@ -1382,7 +1385,9 @@ VkResult PipelineBinaryCache::Merge( } } - return result; + return (((result == Pal::Result::Success) || + (result == Pal::Result::AlreadyExists))? + VK_SUCCESS : PalToVkResult(result)); } } // namespace vk diff --git a/icd/api/pipeline_compiler.cpp b/icd/api/pipeline_compiler.cpp index 37242e80..689604d8 100644 --- a/icd/api/pipeline_compiler.cpp +++ b/icd/api/pipeline_compiler.cpp @@ -36,6 +36,7 @@ #include "include/vk_pipeline_layout.h" #include "include/vk_render_pass.h" #include "include/vk_graphics_pipeline.h" +#include "include/vk_graphics_pipeline_library.h" #include "include/pipeline_compiler.h" #include @@ -268,22 +269,19 @@ VkResult PipelineCompiler::Initialize() result = m_compilerSolutionLlpc.Initialize(m_gfxIp, info.gfxLevel, pCacheAdapter); } - if (settings.enableUberFetchShader || settings.enableEarlyCompile) + if (result == VK_SUCCESS) { - if (result == VK_SUCCESS) - { - result = PalToVkResult(m_shaderModuleHandleMap.Init()); - } + result = PalToVkResult(m_shaderModuleHandleMap.Init()); + } - if (result == VK_SUCCESS) - { - result = PalToVkResult(m_uberFetchShaderInfoFormatMap.Init()); - } + if (result == VK_SUCCESS) + { + result = PalToVkResult(m_uberFetchShaderInfoFormatMap.Init()); + } - if (result == VK_SUCCESS) - { - result = InitializeUberFetchShaderFormatTable(m_pPhysicalDevice, &m_uberFetchShaderInfoFormatMap); - } + if (result == VK_SUCCESS) + { + result = InitializeUberFetchShaderFormatTable(m_pPhysicalDevice, &m_uberFetchShaderInfoFormatMap); } if (result == VK_SUCCESS) @@ -560,6 +558,7 @@ VkResult PipelineCompiler::BuildShaderModule( const VkShaderModuleCreateFlags flags, size_t codeSize, const void* pCode, + const bool adaptForFaskLink, PipelineBinaryCache* pBinaryCache, PipelineCreationFeedback* pFeedback, ShaderModuleHandle* pShaderModule) @@ -570,10 +569,14 @@ VkResult PipelineCompiler::BuildShaderModule( uint32_t compilerMask = GetCompilerCollectionMask(); Util::MetroHash::Hash stableHash = {}; Util::MetroHash::Hash uniqueHash = {}; - Util::MetroHash64::Hash(reinterpret_cast(pCode), codeSize, stableHash.bytes); + + Util::MetroHash64 hasher; + hasher.Update(reinterpret_cast(pCode), codeSize); + hasher.Update(adaptForFaskLink); + hasher.Finalize(stableHash.bytes); uniqueHash = stableHash; - bool findReplaceShader = false; + bool findReplaceShader = false; if ((pSettings->shaderReplaceMode == ShaderReplaceShaderHash) || (pSettings->shaderReplaceMode == ShaderReplaceShaderHashPipelineBinaryHash)) { @@ -591,12 +594,13 @@ VkResult PipelineCompiler::BuildShaderModule( result = LoadShaderModuleFromCache( pDevice, flags, compilerMask, uniqueHash, pBinaryCache, pFeedback, pShaderModule); + if (result != VK_SUCCESS) { if (compilerMask & (1 << PipelineCompilerTypeLlpc)) { result = m_compilerSolutionLlpc.BuildShaderModule( - pDevice, flags, codeSize, pCode, pShaderModule, stableHash); + pDevice, flags, codeSize, pCode, adaptForFaskLink, pShaderModule, stableHash); } StoreShaderModuleToCache(pDevice, flags, compilerMask, uniqueHash, pBinaryCache, pShaderModule); @@ -735,7 +739,7 @@ bool PipelineCompiler::ReplacePipelineShaderModule( if (LoadReplaceShaderBinary(hash64, &codeSize, &pCode)) { - VkResult result = BuildShaderModule(pDevice, 0, codeSize, pCode, nullptr, nullptr, pShaderModule); + VkResult result = BuildShaderModule(pDevice, 0, codeSize, pCode, false, nullptr, nullptr, pShaderModule); if (result == VK_SUCCESS) { pShaderInfo->pModuleData = ShaderModule::GetShaderData(compilerType, pShaderModule); @@ -1613,6 +1617,145 @@ void BuildLlpcVertexInputDescriptors( } } +// ===================================================================================================================== +template +static void CopyPipelineShadersInfo( + const GraphicsPipelineLibrary* pLibrary, + GraphicsPipelineBinaryCreateInfo* pCreateInfo) +{ + const GraphicsPipelineBinaryCreateInfo& libInfo = pLibrary->GetPipelineBinaryCreateInfo(); + + pCreateInfo->compilerType = libInfo.compilerType; + + Vkgc::PipelineShaderInfo* pShaderInfosDst[] = + { + &pCreateInfo->pipelineInfo.vs, + &pCreateInfo->pipelineInfo.tcs, + &pCreateInfo->pipelineInfo.tes, + &pCreateInfo->pipelineInfo.gs, + &pCreateInfo->pipelineInfo.fs, + }; + + const Vkgc::PipelineShaderInfo* pShaderInfosSrc[] = + { + &libInfo.pipelineInfo.vs, + &libInfo.pipelineInfo.tcs, + &libInfo.pipelineInfo.tes, + &libInfo.pipelineInfo.gs, + &libInfo.pipelineInfo.fs, + }; + + for (uint32_t stage = 0; stage < ShaderStage::ShaderStageGfxCount; ++stage) + { + if ((shaderMask & (1 << stage)) != 0) + { + *pShaderInfosDst[stage] = *pShaderInfosSrc[stage]; + pCreateInfo->pipelineProfileKey.shaders[stage] = libInfo.pipelineProfileKey.shaders[stage]; + } + } +} + +// ===================================================================================================================== +static void CopyVertexInputInterfaceState( + const Device* pDevice, + const GraphicsPipelineLibrary* pLibrary, + GraphicsPipelineBinaryCreateInfo* pCreateInfo, + VbBindingInfo* pVbInfo) +{ + const GraphicsPipelineBinaryCreateInfo& libInfo = pLibrary->GetPipelineBinaryCreateInfo(); + + pCreateInfo->pipelineInfo.pVertexInput = libInfo.pipelineInfo.pVertexInput; + pCreateInfo->pipelineInfo.iaState.topology = libInfo.pipelineInfo.iaState.topology; + pCreateInfo->pipelineInfo.iaState.disableVertexReuse = libInfo.pipelineInfo.iaState.disableVertexReuse; + pCreateInfo->pipelineInfo.dynamicVertexStride = libInfo.pipelineInfo.dynamicVertexStride; + + BuildLlpcVertexInputDescriptors(pDevice, pCreateInfo->pipelineInfo.pVertexInput, pVbInfo); +} + +// ===================================================================================================================== +static void MergePipelineOptions(const Vkgc::PipelineOptions& src, Vkgc::PipelineOptions& dst) +{ + dst.includeDisassembly |= src.includeDisassembly; + dst.scalarBlockLayout |= src.scalarBlockLayout; + dst.reconfigWorkgroupLayout |= src.reconfigWorkgroupLayout; + dst.includeIr |= src.includeIr; + dst.robustBufferAccess |= src.robustBufferAccess; + dst.enableRelocatableShaderElf |= src.enableRelocatableShaderElf; + dst.disableImageResourceCheck |= src.disableImageResourceCheck; + dst.enableScratchAccessBoundsChecks |= src.enableScratchAccessBoundsChecks; + dst.extendedRobustness.nullDescriptor |= src.extendedRobustness.nullDescriptor; + dst.extendedRobustness.robustBufferAccess |= src.extendedRobustness.robustBufferAccess; + dst.extendedRobustness.robustImageAccess |= src.extendedRobustness.robustImageAccess; + dst.enableInterpModePatch |= src.enableInterpModePatch; + dst.pageMigrationEnabled |= src.pageMigrationEnabled; + + dst.shadowDescriptorTableUsage = src.shadowDescriptorTableUsage; + dst.shadowDescriptorTablePtrHigh = src.shadowDescriptorTablePtrHigh; +} + +// ===================================================================================================================== +static void CopyPreRasterizationShaderState( + const GraphicsPipelineLibrary* pLibrary, + GraphicsPipelineBinaryCreateInfo* pCreateInfo) +{ + const GraphicsPipelineBinaryCreateInfo& libInfo = pLibrary->GetPipelineBinaryCreateInfo(); + + pCreateInfo->pipelineInfo.iaState.patchControlPoints = libInfo.pipelineInfo.iaState.patchControlPoints; + pCreateInfo->pipelineInfo.iaState.switchWinding = libInfo.pipelineInfo.iaState.switchWinding; + pCreateInfo->pipelineInfo.vpState.depthClipEnable = libInfo.pipelineInfo.vpState.depthClipEnable; + pCreateInfo->pipelineInfo.rsState.rasterizerDiscardEnable = libInfo.pipelineInfo.rsState.rasterizerDiscardEnable; + pCreateInfo->pipelineInfo.rsState.provokingVertexMode = libInfo.pipelineInfo.rsState.provokingVertexMode; + pCreateInfo->pipelineInfo.nggState = libInfo.pipelineInfo.nggState; + pCreateInfo->pipelineInfo.enableUberFetchShader = libInfo.pipelineInfo.enableUberFetchShader; + pCreateInfo->rasterizationStream = libInfo.rasterizationStream; + + MergePipelineOptions(libInfo.pipelineInfo.options, pCreateInfo->pipelineInfo.options); + + CopyPipelineShadersInfo(pLibrary, pCreateInfo); +} + +// ===================================================================================================================== +static void CopyFragmentShaderState( + const GraphicsPipelineLibrary* pLibrary, + GraphicsPipelineBinaryCreateInfo* pCreateInfo) +{ + const GraphicsPipelineBinaryCreateInfo& libInfo = pLibrary->GetPipelineBinaryCreateInfo(); + + pCreateInfo->pipelineInfo.rsState.perSampleShading = libInfo.pipelineInfo.rsState.perSampleShading; + pCreateInfo->pipelineInfo.rsState.numSamples = libInfo.pipelineInfo.rsState.numSamples; + pCreateInfo->pipelineInfo.rsState.samplePatternIdx = libInfo.pipelineInfo.rsState.samplePatternIdx; + pCreateInfo->pipelineInfo.rsState.pixelShaderSamples = libInfo.pipelineInfo.rsState.pixelShaderSamples; + + pCreateInfo->pipelineInfo.dsState.depthTestEnable = libInfo.pipelineInfo.dsState.depthTestEnable; + pCreateInfo->pipelineInfo.dsState.depthWriteEnable = libInfo.pipelineInfo.dsState.depthWriteEnable; + pCreateInfo->pipelineInfo.dsState.depthCompareOp = libInfo.pipelineInfo.dsState.depthCompareOp; + pCreateInfo->pipelineInfo.dsState.stencilTestEnable = libInfo.pipelineInfo.dsState.stencilTestEnable; + pCreateInfo->pipelineInfo.dsState.front = libInfo.pipelineInfo.dsState.front; + pCreateInfo->pipelineInfo.dsState.back = libInfo.pipelineInfo.dsState.back; + + MergePipelineOptions(libInfo.pipelineInfo.options, pCreateInfo->pipelineInfo.options); + + CopyPipelineShadersInfo(pLibrary, pCreateInfo); +} + +// ===================================================================================================================== +static void CopyFragmentOutputInterfaceState( + const GraphicsPipelineLibrary* pLibrary, + GraphicsPipelineBinaryCreateInfo* pCreateInfo) +{ + const GraphicsPipelineBinaryCreateInfo& libInfo = pLibrary->GetPipelineBinaryCreateInfo(); + + for (uint32_t i = 0; i < Vkgc::MaxColorTargets; ++i) + { + pCreateInfo->pipelineInfo.cbState.target[i] = libInfo.pipelineInfo.cbState.target[i]; + } + + pCreateInfo->dbFormat = libInfo.dbFormat; + pCreateInfo->pipelineInfo.cbState.alphaToCoverageEnable = libInfo.pipelineInfo.cbState.alphaToCoverageEnable; + pCreateInfo->pipelineInfo.cbState.dualSourceBlendEnable = libInfo.pipelineInfo.cbState.dualSourceBlendEnable; + pCreateInfo->pipelineInfo.iaState.enableMultiView = libInfo.pipelineInfo.iaState.enableMultiView; +} + // ===================================================================================================================== static void BuildRasterizationState( const VkPipelineRasterizationStateCreateInfo* pRs, @@ -2006,6 +2149,28 @@ static void BuildPipelineShadersInfo( ); } } + + // Uber fetch shader is actully used in the following scenes: + // * enableUberFetchShader or enableEarlyCompile is set as TRUE in panel. + // * When creating shader module, adaptForFaskLink parameter of PipelineCompiler::BuildShaderModule() is set as + // TRUE. This may happen when shader is created during pipeline creation, and that pipeline is a library, not + // executable. More details can be found in Pipeline::BuildShaderStageInfo(). + // * When creating pipeline, GraphicsPipelineBuildInfo::enableUberFetchShader controls the actual enablement. It is + // only set when Vertex Input Interface section (VII) is not avaible and Pre-Rasterization Shader section (PRS)is + // available, or inherits from its PRS parent (referenced library). However, enableUberFetchShader would also be + // set as FALSE even if its parent set it as TRUE if current pipeline want to re-compile pre-rasterazation shaders + // and VII is available. This may happen when VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT is set. More + // details can be found in PipelineCompiler::ConvertGraphicsPipelineInfo(). + // PS: For standard gfx pipeline, GraphicsPipelineBuildInfo::enableUberFetchShader is never set as TRUE with default + // panel setting because VII and PRS are always available at the same time. + if (pDevice->GetRuntimeSettings().enableUberFetchShader || + pDevice->GetRuntimeSettings().enableEarlyCompile || + (((pCreateInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) == 0) && + ((pCreateInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) != 0)) + ) + { + pCreateInfo->pipelineInfo.enableUberFetchShader = true; + } } // ===================================================================================================================== @@ -2130,11 +2295,6 @@ static void BuildVertexInputInterfaceState( pCreateInfo->pipelineInfo.dynamicVertexStride = true; } - if (pDevice->GetRuntimeSettings().enableUberFetchShader || pDevice->GetRuntimeSettings().enableEarlyCompile) - { - pCreateInfo->pipelineInfo.enableUberFetchShader = true; - } - BuildLlpcVertexInputDescriptors(pDevice, pIn->pVertexInputState, pVbInfo); } } @@ -2243,22 +2403,12 @@ static void BuildFragmentOutputInterfaceState( (Util::CountSetBits(pPipelineRenderingCreateInfoKHR->viewMask) != 0)); } -// ===================================================================================================================== -static void BuildPipelineInternalBufferData( - const Device* pDevice, - GraphicsPipelineBinaryCreateInfo* pCreateInfo, - PipelineInternalBufferInfo* pInternalBufferInfo) -{ - PipelineCompiler* pDefaultCompiler = pDevice->GetCompiler(DefaultDeviceIndex); - VK_ASSERT(pCreateInfo->pipelineInfo.enableUberFetchShader); - pDefaultCompiler->BuildPipelineInternalBufferData(pCreateInfo, pInternalBufferInfo); -} - // ===================================================================================================================== static void BuildExecutablePipelineState( const Device* pDevice, const VkGraphicsPipelineCreateInfo* pIn, const GraphicsPipelineShaderStageInfo* pShaderInfo, + const PipelineLayout* pPipelineLayout, const uint32_t dynamicStateFlags, GraphicsPipelineBinaryCreateInfo* pCreateInfo, PipelineInternalBufferInfo* pInternalBufferInfo) @@ -2298,7 +2448,7 @@ static void BuildExecutablePipelineState( if (pCreateInfo->pipelineInfo.enableUberFetchShader) { - BuildPipelineInternalBufferData(pDevice, pCreateInfo, pInternalBufferInfo); + pDefaultCompiler->BuildPipelineInternalBufferData(pPipelineLayout, pCreateInfo, pInternalBufferInfo); } } @@ -2318,31 +2468,71 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( VkResult result = VK_SUCCESS; - VkShaderStageFlagBits activeStages = GraphicsPipelineCommon::GetActiveShaderStages( - pIn - ); + GraphicsPipelineLibraryInfo libInfo; + GraphicsPipelineCommon::ExtractLibraryInfo(pIn, &libInfo); + + pCreateInfo->libFlags = libInfo.libFlags; + + pCreateInfo->libFlags |= (libInfo.pVertexInputInterfaceLib == nullptr) ? + 0 : VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT; + pCreateInfo->libFlags |= (libInfo.pPreRasterizationShaderLib == nullptr) ? + 0 : VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT; + pCreateInfo->libFlags |= (libInfo.pFragmentShaderLib == nullptr) ? + 0 : VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT; + pCreateInfo->libFlags |= (libInfo.pFragmentOutputInterfaceLib == nullptr) ? + 0 : VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT; + + VkShaderStageFlagBits activeStages = GraphicsPipelineCommon::GetActiveShaderStages(pIn, &libInfo); - uint32_t dynamicStateFlags = GraphicsPipelineCommon::GetDynamicStateFlags( - pIn->pDynamicState - ); + uint32_t dynamicStateFlags = GraphicsPipelineCommon::GetDynamicStateFlags(pIn->pDynamicState, &libInfo); pCreateInfo->flags = pIn->flags; - BuildVertexInputInterfaceState(pDevice, pIn, dynamicStateFlags, activeStages, pCreateInfo, pVbInfo); + if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) + { + BuildVertexInputInterfaceState(pDevice, pIn, dynamicStateFlags, activeStages, pCreateInfo, pVbInfo); + } + else if (libInfo.pVertexInputInterfaceLib != nullptr) + { + CopyVertexInputInterfaceState(pDevice, libInfo.pVertexInputInterfaceLib, pCreateInfo, pVbInfo); + } - BuildPreRasterizationShaderState(pDevice, pIn, pShaderInfo, dynamicStateFlags, activeStages, pCreateInfo); + if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) + { + BuildPreRasterizationShaderState(pDevice, pIn, pShaderInfo, dynamicStateFlags, activeStages, pCreateInfo); + } + else if (libInfo.pPreRasterizationShaderLib != nullptr) + { + CopyPreRasterizationShaderState(libInfo.pPreRasterizationShaderLib, pCreateInfo); + } const bool enableRasterization = + (~libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) || (pCreateInfo->pipelineInfo.rsState.rasterizerDiscardEnable == false) || IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::RasterizerDiscardEnableExt); if (enableRasterization) { - BuildFragmentShaderState(pDevice, pIn, pShaderInfo, pCreateInfo); + if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) + { + BuildFragmentShaderState(pDevice, pIn, pShaderInfo, pCreateInfo); + } + else if (libInfo.pFragmentShaderLib != nullptr) + { + CopyFragmentShaderState(libInfo.pFragmentShaderLib, pCreateInfo); + } - BuildFragmentOutputInterfaceState(pDevice, pIn, pCreateInfo); + if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) + { + BuildFragmentOutputInterfaceState(pDevice, pIn, pCreateInfo); + } + else if (libInfo.pFragmentOutputInterfaceLib != nullptr) + { + CopyFragmentOutputInterfaceState(libInfo.pFragmentOutputInterfaceLib, pCreateInfo); + } } + if (GraphicsPipelineCommon::NeedBuildPipelineBinary(&libInfo, enableRasterization)) { const Vkgc::PipelineShaderInfo* shaderInfos[] = { @@ -2362,13 +2552,21 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( availableStageMask |= (1 << stage); } } + + if ((libInfo.flags.optimize != 0) && + ((libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) || + (libInfo.pVertexInputInterfaceLib != nullptr))) + { + pCreateInfo->pipelineInfo.enableUberFetchShader = false; + } + result = BuildPipelineResourceMapping(pDevice, pPipelineLayout, availableStageMask, pVbInfo, pCreateInfo); } - if ((result == VK_SUCCESS) - ) + if ((result == VK_SUCCESS) && (libInfo.flags.isLibrary == false)) { - BuildExecutablePipelineState(pDevice, pIn, pShaderInfo, dynamicStateFlags, pCreateInfo, pInternalBufferInfo); + BuildExecutablePipelineState( + pDevice, pIn, pShaderInfo, pPipelineLayout, dynamicStateFlags, pCreateInfo, pInternalBufferInfo); } return result; @@ -2818,9 +3016,31 @@ void PipelineCompiler::GetGraphicsPipelineCacheId( // ===================================================================================================================== void PipelineCompiler::BuildPipelineInternalBufferData( - GraphicsPipelineBinaryCreateInfo* pCreateInfo, - PipelineInternalBufferInfo* pInternalBufferInfo) + const PipelineLayout* pPipelineLayout, + GraphicsPipelineBinaryCreateInfo* pCreateInfo, + PipelineInternalBufferInfo* pInternalBufferInfo) { + uint32_t fetchShaderConstBufRegBase = PipelineLayout::InvalidReg; + uint32_t specConstBufVertexRegBase = PipelineLayout::InvalidReg; + uint32_t specConstBufFragmentRegBase = PipelineLayout::InvalidReg; + + const UserDataLayout& layout = pPipelineLayout->GetInfo().userDataLayout; + + switch (layout.scheme) + { + case PipelineLayoutScheme::Compact: + fetchShaderConstBufRegBase = layout.compact.uberFetchConstBufRegBase; + specConstBufVertexRegBase = layout.compact.specConstBufVertexRegBase; + specConstBufFragmentRegBase = layout.compact.specConstBufFragmentRegBase; + break; + case PipelineLayoutScheme::Indirect: + fetchShaderConstBufRegBase = layout.indirect.uberFetchConstBufRegBase; + break; + default: + VK_NEVER_CALLED(); + break; + } + if (pCreateInfo->compilerType == PipelineCompilerTypeLlpc) { VK_NOT_IMPLEMENTED; diff --git a/icd/api/strings/extensions.txt b/icd/api/strings/extensions.txt index e1f80eb1..9a7aa060 100644 --- a/icd/api/strings/extensions.txt +++ b/icd/api/strings/extensions.txt @@ -131,6 +131,7 @@ VK_EXT_tooling_info VK_EXT_shader_image_atomic_int64 VK_EXT_pipeline_creation_cache_control VK_KHR_sampler_ycbcr_conversion +VK_KHR_pipeline_library VK_KHR_shader_non_semantic_info VK_GOOGLE_user_type VK_KHR_incremental_present @@ -148,6 +149,7 @@ VK_EXT_primitive_topology_list_restart VK_KHR_dynamic_rendering VK_KHR_format_feature_flags2 VK_EXT_extended_dynamic_state2 +VK_EXT_graphics_pipeline_library VK_KHR_copy_commands2 VK_EXT_ycbcr_image_arrays VK_KHR_zero_initialize_workgroup_memory diff --git a/icd/api/vk_cmd_pool.cpp b/icd/api/vk_cmd_pool.cpp index deb3d8a6..2b346121 100644 --- a/icd/api/vk_cmd_pool.cpp +++ b/icd/api/vk_cmd_pool.cpp @@ -59,7 +59,6 @@ CmdPool::CmdPool( m_pDevice(pDevice), m_pAllocator(pAllocator), m_queueFamilyIndex(queueFamilyIndex), - m_sharedCmdAllocator(sharedCmdAllocator), m_cmdBufferRegistry(32, pDevice->VkInstance()->Allocator()), m_cmdBuffersAlreadyBegun(32, pDevice->VkInstance()->Allocator()) { @@ -69,6 +68,12 @@ CmdPool::CmdPool( { m_flags.isProtected = true; } + if (flags & VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT) + { + m_flags.isResetCmdBuffer = true; + } + + m_flags.sharedCmdAllocator = sharedCmdAllocator; memcpy(m_pPalCmdAllocators, pPalCmdAllocators, sizeof(pPalCmdAllocators[0]) * pDevice->NumPalDevices()); } @@ -227,7 +232,7 @@ VkResult CmdPool::Destroy( } // If we don't use a shared CmdAllocator then we have to destroy our own one. - if (m_sharedCmdAllocator == false) + if (m_flags.sharedCmdAllocator == 0) { for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) { @@ -244,7 +249,8 @@ VkResult CmdPool::Destroy( // ===================================================================================================================== // Resets the PAL command allocators -VkResult CmdPool::ResetCmdAllocator() +VkResult CmdPool::ResetCmdAllocator( + bool releaseResources) { Pal::Result result = Pal::Result::Success; @@ -252,7 +258,7 @@ VkResult CmdPool::ResetCmdAllocator() (deviceIdx < m_pDevice->NumPalDevices()) && (result == Pal::Result::Success); deviceIdx++) { - result = m_pPalCmdAllocators[deviceIdx]->Reset(); + result = m_pPalCmdAllocators[deviceIdx]->Reset(releaseResources); } return PalToVkResult(result); @@ -260,34 +266,49 @@ VkResult CmdPool::ResetCmdAllocator() // ===================================================================================================================== // Reset a command buffer pool object -VkResult CmdPool::Reset(VkCommandPoolResetFlags flags) +VkResult CmdPool::Reset( + VkCommandPoolResetFlags flags) { VkResult result = VK_SUCCESS; m_cmdPoolResetInProgress = true; - // First reset all command buffers that were begun and not already reset (PAL doesn't do this automatically). - for (auto it = m_cmdBuffersAlreadyBegun.Begin(); (it.Get() != nullptr) && (result == VK_SUCCESS); it.Next()) + // Reset all command buffers in the pool when individual command buffer reset is selected for this pool. Otherwise, + // only reset the command buffers that were begun and not already reset (PAL doesn't do this automatically). + if (IsResetCmdBuffer()) { - // Per-spec we always have to do a command buffer reset that also releases the used resources. - result = it.Get()->key->Reset(VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + for (auto it = m_cmdBufferRegistry.Begin(); (it.Get() != nullptr) && (result == VK_SUCCESS); it.Next()) + { + // Per-spec we always have to do a command buffer reset that also releases the used resources. + result = it.Get()->key->Reset(VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + } } - - if (result == VK_SUCCESS) + else { + for (auto it = m_cmdBuffersAlreadyBegun.Begin(); (it.Get() != nullptr) && (result == VK_SUCCESS); it.Next()) + { + // Per-spec we always have to do a command buffer reset that also releases the used resources. + result = it.Get()->key->Reset(VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + } + // Clear the set of command buffers to reset. Only done if all the buffers were reset successfully so it is // possible that after an error this set will contain already reset command buffers. This is fine because we // can reset command buffers twice. - if (m_cmdBuffersAlreadyBegun.GetNumEntries() > 0) + if ((result == VK_SUCCESS) && (m_cmdBuffersAlreadyBegun.GetNumEntries() > 0)) { m_cmdBuffersAlreadyBegun.Reset(); } + } + if (result == VK_SUCCESS) + { // After resetting the registered command buffers, reset the pool itself but only if we use per-pool // CmdAllocator objects, not a single shared one. - if (m_sharedCmdAllocator == false) + if (m_flags.sharedCmdAllocator == 0) { - result = ResetCmdAllocator(); + const bool releaseResources = ((flags & VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT) != 0); + + result = ResetCmdAllocator(releaseResources); } } @@ -325,7 +346,14 @@ void CmdPool::UnregisterCmdBuffer(CmdBuffer* pCmdBuffer) Pal::Result CmdPool::MarkCmdBufBegun( CmdBuffer* pCmdBuffer) { - return m_cmdBuffersAlreadyBegun.Insert(pCmdBuffer); + Pal::Result result = Pal::Result::Success; + + if (IsResetCmdBuffer() == false) + { + result = m_cmdBuffersAlreadyBegun.Insert(pCmdBuffer); + } + + return result; } // ===================================================================================================================== @@ -335,7 +363,7 @@ void CmdPool::UnmarkCmdBufBegun( { // Skip erasing individual command buffers during command pool reset as the command pool reset will instead reset // the entire HashSet all at once after all individual command buffer resets are completed. - if (m_cmdPoolResetInProgress == false) + if ((IsResetCmdBuffer() == false) && (m_cmdPoolResetInProgress == false)) { m_cmdBuffersAlreadyBegun.Erase(pCmdBuffer); } diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp index 7620fc7b..592a1d63 100644 --- a/icd/api/vk_cmdbuffer.cpp +++ b/icd/api/vk_cmdbuffer.cpp @@ -65,6 +65,42 @@ namespace vk namespace { +// ===================================================================================================================== +// Convert Pipe points +static uint32_t ConvertPipePointToPipeStage( + Pal::HwPipePoint pipePoint) +{ + uint32_t stageMask = 0; + + switch (pipePoint) + { + case Pal::HwPipeTop: + stageMask = Pal::PipelineStageTopOfPipe; + break; + // Same as Pal::HwPipePreCs and Pal::HwPipePreBlt + case Pal::HwPipePostIndexFetch: + stageMask = Pal::PipelineStageFetchIndirectArgs | Pal::PipelineStageFetchIndices; + break; + case Pal::HwPipePreRasterization: + case Pal::HwPipePostPs: + stageMask = Pal::PipelineStagePs; + break; + case Pal::HwPipePreColorTarget: + stageMask = Pal::PipelineStageColorTarget; + break; + case Pal::HwPipePostCs: + stageMask = Pal::PipelineStageCs; + break; + case Pal::HwPipePostBlt: + stageMask = Pal::PipelineStageBlt; + case Pal::HwPipeBottom: + stageMask = Pal::PipelineStageBottomOfPipe; + break; + } + + return stageMask; +} + // ===================================================================================================================== // Creates a compatible PAL "clear box" structure from attachment + render area for a renderpass clear. Pal::Box BuildClearBox( @@ -522,8 +558,9 @@ CmdBuffer::CmdBuffer( // designed for Acquire/Release-based driver. This flag is currently enabled for gfx9 and above. // If supportSplitReleaseAcquire is true, the ASIC provides split CmdRelease() and CmdAcquire() to express barrier, // and CmdReleaseThenAcquire() is still valid. This flag is currently enabled for gfx10 and above. - m_flags.hasReleaseAcquire = info.gfxipProperties.flags.supportReleaseAcquireInterface; - m_flags.useSplitReleaseAcquire = info.gfxipProperties.flags.supportReleaseAcquireInterface && + m_flags.useReleaseAcquire = info.gfxipProperties.flags.supportReleaseAcquireInterface && + settings.useReleaseAcquireInterface; + m_flags.useSplitReleaseAcquire = m_flags.useReleaseAcquire && info.gfxipProperties.flags.supportSplitReleaseAcquire; } @@ -1464,6 +1501,15 @@ VkResult CmdBuffer::Begin( } m_allGpuState.dirtyGraphics.vrs = 0; + + // Set default sample pattern + m_allGpuState.samplePattern.sampleCount = 1; + m_allGpuState.samplePattern.locations = + *Device::GetDefaultQuadSamplePattern(m_allGpuState.samplePattern.sampleCount); + + PalCmdSetMsaaQuadSamplePattern(m_allGpuState.samplePattern.sampleCount, m_allGpuState.samplePattern.locations); + + m_allGpuState.dirtyGraphics.samplePattern = 0; } DbgBarrierPostCmd(DbgBarrierCmdBufStart); @@ -1879,11 +1925,13 @@ void CmdBuffer::RebindUserData( { const uint32_t deviceIdx = deviceGroup.Index(); - PalCmdBuffer(deviceIdx)->CmdSetUserData(palBindPoint, + PalCmdBuffer(deviceIdx)->CmdSetUserData( + palBindPoint, userDataLayout.setBindingRegBase, count, PerGpuState(deviceIdx)->setBindingData[apiBindPoint]); - } while (deviceGroup.IterateNext()); + } + while (deviceGroup.IterateNext()); } } @@ -2030,9 +2078,6 @@ void CmdBuffer::BindDescriptorSets( ConvertPipelineBindPoint(pipelineBindPoint, &palBindPoint, &apiBindPoint); - // Get the current binding state in the command buffer - PipelineBindState* pBindState = &m_allGpuState.pipelineState[apiBindPoint]; - const PipelineLayout* pLayout = PipelineLayout::ObjectFromHandle(layout); // Get user data register information from the given pipeline layout @@ -2052,9 +2097,7 @@ void CmdBuffer::BindDescriptorSets( // If this descriptor set has any dynamic descriptor data then write them into the shadow. if (setLayoutInfo.dynDescCount > 0) { - // NOTE: We currently have to supply patched SRDs directly in used data registers. If we'll have proper - // support for dynamic descriptors in SC then we'll only need to write the dynamic offsets directly. - + // NOTE: We supply patched SRDs directly in used data registers. utils::IterateMask deviceGroup(m_curDeviceMask); do { @@ -2095,7 +2138,7 @@ void CmdBuffer::BindDescriptorSets( } } - SetUserDataPipelineLayout(firstSet, setCount, pLayout, pBindState, palBindPoint, apiBindPoint); + SetUserDataPipelineLayout(firstSet, setCount, pLayout, palBindPoint, apiBindPoint); } DbgBarrierPostCmd(DbgBarrierBindSetsPushConstants); @@ -2108,7 +2151,6 @@ void CmdBuffer::SetUserDataPipelineLayout( uint32_t firstSet, uint32_t setCount, const PipelineLayout* pLayout, - PipelineBindState* pBindState, const Pal::PipelineBindPoint palBindPoint, const PipelineBindPoint apiBindPoint) { @@ -2119,6 +2161,9 @@ void CmdBuffer::SetUserDataPipelineLayout( if (pLayout->GetScheme() == PipelineLayoutScheme::Compact) { + // Get the current binding state in the command buffer + PipelineBindState* pBindState = &m_allGpuState.pipelineState[apiBindPoint]; + // Figure out the total range of user data registers written by this sequence of descriptor set binds const PipelineLayout::SetUserDataLayout& firstSetLayout = pLayout->GetSetUserData(firstSet); const PipelineLayout::SetUserDataLayout& lastSetLayout = pLayout->GetSetUserData(firstSet + setCount - 1); @@ -5540,195 +5585,236 @@ void CmdBuffer::ExecuteReleaseThenAcquire( do { - const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); - uint32_t deviceIdx = deviceGroup.Index(); - uint32_t maxImageMemoryBarriers = imageMemoryBarrierCount * MaxPalAspectsPerMask; - - Pal::MsaaQuadSamplePattern* pLocations = (maxImageMemoryBarriers > 0) ? - virtStackFrame.AllocArray(maxImageMemoryBarriers) : nullptr; - - Pal::MemBarrier* pPalBufferMemoryBarriers = (bufferMemoryBarrierCount > 0) ? - virtStackFrame.AllocArray(bufferMemoryBarrierCount) : nullptr; - - Pal::ImgBarrier* pPalImageBarriers = (maxImageMemoryBarriers > 0) ? - virtStackFrame.AllocArray(maxImageMemoryBarriers) : nullptr; + const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); - Pal::AcquireReleaseInfo acquireReleaseInfo = {}; + constexpr uint32_t MaxTransitionCount = 512; + constexpr uint32_t MaxSampleLocationCount = 128; - acquireReleaseInfo.pMemoryBarriers = pPalBufferMemoryBarriers; - acquireReleaseInfo.pImageBarriers = pPalImageBarriers; - acquireReleaseInfo.reason = RgpBarrierExternalCmdPipelineBarrier; + // Keeps track of the number of barriers for which info has already been + // stored in Pal::AcquireReleaseInfo + uint32_t memoryBarrierIdx = 0; + uint32_t bufferMemoryBarrierIdx = 0; + uint32_t imageMemoryBarrierIdx = 0; - uint32_t locationIndex = 0; + uint32_t maxLocationCount = Util::Min(imageMemoryBarrierCount, MaxSampleLocationCount); + uint32_t maxBufferBarrierCount = Util::Min(bufferMemoryBarrierCount, MaxTransitionCount); + uint32_t maxImageBarrierCount = Util::Min((MaxPalAspectsPerMask * imageMemoryBarrierCount) + 1, + MaxTransitionCount); - for (uint32_t i = 0; i < memBarrierCount; i++) - { - Pal::BarrierTransition tempTransition = {}; + Pal::MsaaQuadSamplePattern* pLocations = (imageMemoryBarrierCount > 0) ? + virtStackFrame.AllocArray(maxLocationCount) : nullptr; - VkAccessFlags srcAccessMask = pMemoryBarriers[i].srcAccessMask; - VkAccessFlags dstAccessMask = pMemoryBarriers[i].dstAccessMask; - - acquireReleaseInfo.srcStageMask |= VkToPalPipelineStageFlags(srcStageMask); - acquireReleaseInfo.dstStageMask |= VkToPalPipelineStageFlags(dstStageMask); - - m_pDevice->GetBarrierPolicy().ApplyBarrierCacheFlags(srcAccessMask, dstAccessMask, VK_IMAGE_LAYOUT_GENERAL, - VK_IMAGE_LAYOUT_GENERAL, &tempTransition); + Pal::MemBarrier* pPalBufferMemoryBarriers = (bufferMemoryBarrierCount > 0) ? + virtStackFrame.AllocArray(maxBufferBarrierCount) : nullptr; - acquireReleaseInfo.srcGlobalAccessMask |= tempTransition.srcCacheMask; - acquireReleaseInfo.dstGlobalAccessMask |= tempTransition.dstCacheMask; - } + Pal::ImgBarrier* pPalImageBarriers = (imageMemoryBarrierCount > 0) ? + virtStackFrame.AllocArray(maxImageBarrierCount) : nullptr; - for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) + while ((memoryBarrierIdx < memBarrierCount) || + (bufferMemoryBarrierIdx < bufferMemoryBarrierCount) || + (imageMemoryBarrierIdx < imageMemoryBarrierCount)) { - Pal::BarrierTransition tempTransition = {}; + Pal::AcquireReleaseInfo acquireReleaseInfo = {}; - acquireReleaseInfo.srcStageMask |= VkToPalPipelineStageFlags(srcStageMask); - acquireReleaseInfo.dstStageMask |= VkToPalPipelineStageFlags(dstStageMask); - - const Buffer* pBuffer = Buffer::ObjectFromHandle(pBufferMemoryBarriers[i].buffer); - - pBuffer->GetBarrierPolicy().ApplyBufferMemoryBarrier( - GetQueueFamilyIndex(), - pBufferMemoryBarriers[i], - &tempTransition); - - pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].flags.u32All = - 0; - pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].memory.pGpuMemory = - pBuffer->PalMemory(deviceIdx); - pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].memory.offset = - pBufferMemoryBarriers[i].offset; - pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].memory.size = - pBufferMemoryBarriers[i].size; - pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].srcAccessMask = - tempTransition.srcCacheMask; - pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].dstAccessMask = - tempTransition.dstCacheMask; - - acquireReleaseInfo.memoryBarrierCount++; - } - - for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) - { - Pal::BarrierTransition tempTransition = {}; + acquireReleaseInfo.pMemoryBarriers = pPalBufferMemoryBarriers; + acquireReleaseInfo.pImageBarriers = pPalImageBarriers; + acquireReleaseInfo.reason = RgpBarrierExternalCmdPipelineBarrier; acquireReleaseInfo.srcStageMask |= VkToPalPipelineStageFlags(srcStageMask); acquireReleaseInfo.dstStageMask |= VkToPalPipelineStageFlags(dstStageMask); - bool layoutChanging = false; - Pal::ImageLayout oldLayouts[MaxPalAspectsPerMask]; - Pal::ImageLayout newLayouts[MaxPalAspectsPerMask]; + uint32_t locationIndex = 0; - const Image* pImage = Image::ObjectFromHandle(pImageMemoryBarriers[i].image); + while (memoryBarrierIdx < memBarrierCount) + { + Pal::BarrierTransition tempTransition = {}; - pImage->GetBarrierPolicy().ApplyImageMemoryBarrier( - GetQueueFamilyIndex(), - pImageMemoryBarriers[i], - &tempTransition, - &layoutChanging, - oldLayouts, - newLayouts, - false); + VkAccessFlags srcAccessMask = pMemoryBarriers[memoryBarrierIdx].srcAccessMask; + VkAccessFlags dstAccessMask = pMemoryBarriers[memoryBarrierIdx].dstAccessMask; - VkFormat format = pImage->GetFormat(); + m_pDevice->GetBarrierPolicy().ApplyBarrierCacheFlags( + srcAccessMask, + dstAccessMask, + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_GENERAL, + &tempTransition); - uint32_t layoutIdx = 0; - uint32_t palRangeIdx = 0; - uint32_t palRangeCount = 0; + acquireReleaseInfo.srcGlobalAccessMask |= tempTransition.srcCacheMask; + acquireReleaseInfo.dstGlobalAccessMask |= tempTransition.dstCacheMask; - Pal::SubresRange palRanges[MaxPalAspectsPerMask]; + memoryBarrierIdx++; + } - VkToPalSubresRange( - format, - pImageMemoryBarriers[i].subresourceRange, - pImage->GetMipLevels(), - pImage->GetArraySize(), - palRanges, - &palRangeCount, - settings); + while ((acquireReleaseInfo.memoryBarrierCount < maxBufferBarrierCount) && + (bufferMemoryBarrierIdx < bufferMemoryBarrierCount)) + { + Pal::BarrierTransition tempTransition = {}; + + const Buffer* pBuffer = Buffer::ObjectFromHandle( + pBufferMemoryBarriers[bufferMemoryBarrierIdx].buffer); + + pBuffer->GetBarrierPolicy().ApplyBufferMemoryBarrier( + GetQueueFamilyIndex(), + pBufferMemoryBarriers[bufferMemoryBarrierIdx], + &tempTransition); + + pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].flags.u32All = + 0; + pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].memory.pGpuMemory = + pBuffer->PalMemory(deviceIdx); + pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].memory.offset = + pBufferMemoryBarriers[bufferMemoryBarrierIdx].offset; + pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].memory.size = + pBufferMemoryBarriers[bufferMemoryBarrierIdx].size; + pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].srcAccessMask = + tempTransition.srcCacheMask; + pPalBufferMemoryBarriers[acquireReleaseInfo.memoryBarrierCount].dstAccessMask = + tempTransition.dstCacheMask; + + acquireReleaseInfo.memoryBarrierCount++; + + bufferMemoryBarrierIdx++; + } - if (layoutChanging && Formats::HasStencil(format)) + // Accounting for the max sub ranges, if we do not have enough space left for another image, + // break from this loop. The info for remaining barriers will be passed to PAL in subsequent calls. + while (((MaxPalAspectsPerMask + acquireReleaseInfo.imageBarrierCount) < maxImageBarrierCount) && + (locationIndex < maxLocationCount) && + (imageMemoryBarrierIdx < imageMemoryBarrierCount)) { - if (palRangeCount == MaxPalDepthAspectsPerMask) + Pal::BarrierTransition tempTransition = {}; + + bool layoutChanging = false; + Pal::ImageLayout oldLayouts[MaxPalAspectsPerMask]; + Pal::ImageLayout newLayouts[MaxPalAspectsPerMask]; + + const Image* pImage = Image::ObjectFromHandle(pImageMemoryBarriers[imageMemoryBarrierIdx].image); + + // When using CmdReleaseThenAcquire() to execute barriers, vulkan driver does not need to add an + // optimization for Image barrier with the same oldLayout & newLayout,like VK_IMAGE_LAYOUT_GENERAL + // to VK_IMAGE_LAYOUT_GENERAL. PAL should not be doing any transition logic and only flush or + // invalidate caches as apporiate. so we make use of the template flag skipMatchingLayouts to skip + // this if-checking for the same layout change by setting the flag skipMatchingLayouts to false. + pImage->GetBarrierPolicy().ApplyImageMemoryBarrier( + GetQueueFamilyIndex(), + pImageMemoryBarriers[imageMemoryBarrierIdx], + &tempTransition, + &layoutChanging, + oldLayouts, + newLayouts, + false); + + VkFormat format = pImage->GetFormat(); + + uint32_t layoutIdx = 0; + uint32_t palRangeIdx = 0; + uint32_t palRangeCount = 0; + + Pal::SubresRange palRanges[MaxPalAspectsPerMask]; + + VkToPalSubresRange( + format, + pImageMemoryBarriers[imageMemoryBarrierIdx].subresourceRange, + pImage->GetMipLevels(), + pImage->GetArraySize(), + palRanges, + &palRangeCount, + settings); + + if (layoutChanging && Formats::HasStencil(format)) { - // Find the subset of an images subres ranges that need to be transitioned based changes between - // the source and destination layouts. - if ((oldLayouts[0].usages == newLayouts[0].usages) && - (oldLayouts[0].engines == newLayouts[0].engines)) + if (palRangeCount == MaxPalDepthAspectsPerMask) { - // Skip the depth transition - palRangeCount--; + // Find the subset of an images subres ranges that need to be transitioned based changes + // between the source and destination layouts. + if ((oldLayouts[0].usages == newLayouts[0].usages) && + (oldLayouts[0].engines == newLayouts[0].engines)) + { + // Skip the depth transition + palRangeCount--; - palRangeIdx++; - layoutIdx++; + palRangeIdx++; + layoutIdx++; + } + else if ((oldLayouts[1].usages == newLayouts[1].usages) && + (oldLayouts[1].engines == newLayouts[1].engines)) + { + // Skip the stencil transition + palRangeCount--; + } } - else if ((oldLayouts[1].usages == newLayouts[1].usages) && - (oldLayouts[1].engines == newLayouts[1].engines)) + else if (pImageMemoryBarriers[imageMemoryBarrierIdx].subresourceRange.aspectMask & + VK_IMAGE_ASPECT_STENCIL_BIT) { - // Skip the stencil transition - palRangeCount--; + VK_ASSERT((pImageMemoryBarriers[imageMemoryBarrierIdx].subresourceRange.aspectMask & + VK_IMAGE_ASPECT_DEPTH_BIT) == 0); + + // Always use the second layout for stencil transitions. It is the only valid one for + // combined depth stencil layouts, and LayoutUsageHelper replicates stencil-only layouts + // to all aspects. + layoutIdx++; } } - else if (pImageMemoryBarriers[i].subresourceRange.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) + + EXTRACT_VK_STRUCTURES_1( + Barrier, + ImageMemoryBarrier, + SampleLocationsInfoEXT, + &pImageMemoryBarriers[imageMemoryBarrierIdx], + IMAGE_MEMORY_BARRIER, + SAMPLE_LOCATIONS_INFO_EXT) + + for (uint32_t transitionIdx = 0; transitionIdx < palRangeCount; transitionIdx++) { - VK_ASSERT((pImageMemoryBarriers[i].subresourceRange.aspectMask & - VK_IMAGE_ASPECT_DEPTH_BIT) == 0); + pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].srcAccessMask = + tempTransition.srcCacheMask; + pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].dstAccessMask = + tempTransition.dstCacheMask; + pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].pImage = + pImage->PalImage(deviceIdx); + pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].subresRange = + palRanges[palRangeIdx]; + pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].oldLayout = + oldLayouts[layoutIdx]; + pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].newLayout = + newLayouts[layoutIdx]; + pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].pQuadSamplePattern = + nullptr; - // Always use the second layout for stencil transitions. It is the only valid one for combined - // depth stencil layouts, and LayoutUsageHelper replicates stencil-only layouts to all aspects. - layoutIdx++; - } - } + if (pSampleLocationsInfoEXT == nullptr) + { + pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].pQuadSamplePattern = nullptr; + } + else if (pLocations != nullptr) // Could be null due to an OOM error + { + VK_ASSERT(static_cast(pSampleLocationsInfoEXT->sType) == + VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT); + VK_ASSERT(pImage->IsSampleLocationsCompatibleDepth()); - EXTRACT_VK_STRUCTURES_1( - Barrier, - ImageMemoryBarrier, - SampleLocationsInfoEXT, - &pImageMemoryBarriers[i], - IMAGE_MEMORY_BARRIER, - SAMPLE_LOCATIONS_INFO_EXT) + ConvertToPalMsaaQuadSamplePattern(pSampleLocationsInfoEXT, &pLocations[locationIndex]); - for (uint32_t transitionIdx = 0; transitionIdx < palRangeCount; transitionIdx++) - { - pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].srcAccessMask = tempTransition.srcCacheMask; - pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].dstAccessMask = tempTransition.dstCacheMask; - pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].pImage = pImage->PalImage(deviceIdx); - pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].subresRange = palRanges[palRangeIdx]; - pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].oldLayout = oldLayouts[layoutIdx]; - pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].newLayout = newLayouts[layoutIdx]; - pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].pQuadSamplePattern = nullptr; - - if (pSampleLocationsInfoEXT == nullptr) - { - pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].pQuadSamplePattern = nullptr; - } - else if (pLocations != nullptr) // Could be null due to an OOM error - { - VK_ASSERT(static_cast(pSampleLocationsInfoEXT->sType) == - VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT); - VK_ASSERT(pImage->IsSampleLocationsCompatibleDepth()); + pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].pQuadSamplePattern = + &pLocations[locationIndex]; + } - ConvertToPalMsaaQuadSamplePattern(pSampleLocationsInfoEXT, &pLocations[locationIndex]); + acquireReleaseInfo.imageBarrierCount++; - pPalImageBarriers[acquireReleaseInfo.imageBarrierCount].pQuadSamplePattern = - &pLocations[locationIndex]; + layoutIdx++; + palRangeIdx++; } - acquireReleaseInfo.imageBarrierCount++; + if (pSampleLocationsInfoEXT != nullptr) + { + ++locationIndex; + } - layoutIdx++; - palRangeIdx++; + imageMemoryBarrierIdx++; } - if (pSampleLocationsInfoEXT != nullptr) - { - ++locationIndex; - } + PalCmdBuffer(deviceIdx)->CmdReleaseThenAcquire(acquireReleaseInfo); } - PalCmdBuffer(deviceIdx)->CmdReleaseThenAcquire(acquireReleaseInfo); - virtStackFrame.FreeArray(pLocations); virtStackFrame.FreeArray(pPalImageBarriers); virtStackFrame.FreeArray(pPalBufferMemoryBarriers); @@ -5751,7 +5837,7 @@ void CmdBuffer::PipelineBarrier( { DbgBarrierPreCmd(DbgBarrierPipelineBarrierWaitEvents); - if (m_flags.hasReleaseAcquire && m_pDevice->GetRuntimeSettings().useRelThenAcqForVkCmdPipelineBarrier) + if (m_flags.useReleaseAcquire) { ExecuteReleaseThenAcquire(srcStageMask, destStageMask, @@ -5801,7 +5887,7 @@ void CmdBuffer::PipelineBarrier2( { DbgBarrierPreCmd(DbgBarrierPipelineBarrierWaitEvents); - if (m_flags.hasReleaseAcquire) + if (m_flags.useReleaseAcquire) { utils::IterateMask deviceGroup(m_curDeviceMask); do @@ -6199,29 +6285,39 @@ void CmdBuffer::PalCmdBarrier( const Pal::BarrierInfo& info, uint32_t deviceMask) { - // If you trip this assert, you've forgotten to populate a value for this field. You should use one of the - // RgpBarrierReason enum values from sqtt_rgp_annotations.h. Preferably you should add a new one as described - // in the header, but temporarily you may use the generic "unknown" reason so as not to block your main code change. - VK_ASSERT(info.reason != 0); - -#if PAL_ENABLE_PRINTS_ASSERTS - for (uint32_t i = 0; i < info.transitionCount; ++i) + if (m_flags.useReleaseAcquire) { - // Detect if PAL may execute a barrier blt using this image - VK_ASSERT(info.pTransitions[i].imageInfo.pImage == nullptr); - // You need to use the other PalCmdBarrier method (below) which uses vk::Image ptrs to obtain the - // corresponding Pal::IImage ptr for each image transition + // Translate the Pal::BarrierInfo to an equivalent Pal::AcquireReleaseInfo struct and then call + // Pal::CmdReleaseThenAcquire() instead of Pal::CmdBarrier() + TranslateBarrierInfoToAcqRel(info, deviceMask); } + else + { + // If you trip this assert, you've forgotten to populate a value for this field. You should use one of the + // RgpBarrierReason enum values from sqtt_rgp_annotations.h. Preferably you should add a new one as described + // in the header, but temporarily you may use the generic "unknown" reason so as not to block your main code + // change. + VK_ASSERT(info.reason != 0); + +#if PAL_ENABLE_PRINTS_ASSERTS + for (uint32_t i = 0; i < info.transitionCount; ++i) + { + // Detect if PAL may execute a barrier blt using this image + VK_ASSERT(info.pTransitions[i].imageInfo.pImage == nullptr); + // You need to use the other PalCmdBarrier method (below) which uses vk::Image ptrs to obtain the + // corresponding Pal::IImage ptr for each image transition + } #endif - utils::IterateMask deviceGroup(deviceMask); - do - { - const uint32_t deviceIdx = deviceGroup.Index(); + utils::IterateMask deviceGroup(deviceMask); + do + { + const uint32_t deviceIdx = deviceGroup.Index(); - PalCmdBuffer(deviceIdx)->CmdBarrier(info); + PalCmdBuffer(deviceIdx)->CmdBarrier(info); + } + while (deviceGroup.IterateNext()); } - while (deviceGroup.IterateNext()); } // ===================================================================================================================== @@ -6270,6 +6366,135 @@ void CmdBuffer::PalCmdBarrier( while (deviceGroup.IterateNext()); } +// ===================================================================================================================== +// Translates the Pal::BarrierInfo into equivalent Pal::AcquireReleaseInfo struct. This function does a 1-to-1 mapping +// for struct members and hence should not be used in any special cases. +void CmdBuffer::TranslateBarrierInfoToAcqRel( + const Pal::BarrierInfo& barrierInfo, + uint32_t deviceMask) +{ + VirtualStackFrame virtStackFrame(m_pStackAllocator); + + Pal::AcquireReleaseInfo info = {}; + + Pal::ImgBarrier* pImageBarriers = (barrierInfo.transitionCount != 0) ? + virtStackFrame.AllocArray(barrierInfo.transitionCount) : + nullptr; + + info.dstStageMask = ConvertPipePointToPipeStage(barrierInfo.waitPoint); + info.srcGlobalAccessMask = barrierInfo.globalSrcCacheMask; + info.dstGlobalAccessMask = barrierInfo.globalDstCacheMask; + info.reason = barrierInfo.reason; + + for (uint32_t i = 0; i < barrierInfo.pipePointWaitCount; i++) + { + info.srcStageMask |= ConvertPipePointToPipeStage(barrierInfo.pPipePoints[i]); + } + + for (uint32_t i = 0; i < barrierInfo.transitionCount; i++) + { + // Pal::AcquireReleaseInfo requires a section of an IGpuMemory object to be provided for memory barriers. Since + // we do not have any information about it, any memory barrier transitions in Pal::BarrierInfo will have to be + // specified via global cache masks in Pal::AcquireReleaseInfo. + if (barrierInfo.pTransitions[i].imageInfo.pImage == nullptr) + { + info.srcGlobalAccessMask |= barrierInfo.pTransitions[i].srcCacheMask; + info.dstGlobalAccessMask |= barrierInfo.pTransitions[i].dstCacheMask; + } + else + { + pImageBarriers[i].pImage = barrierInfo.pTransitions[i].imageInfo.pImage; + pImageBarriers[i].subresRange = barrierInfo.pTransitions[i].imageInfo.subresRange; + pImageBarriers[i].box = {}; + pImageBarriers[i].srcAccessMask = barrierInfo.pTransitions[i].srcCacheMask; + pImageBarriers[i].dstAccessMask = barrierInfo.pTransitions[i].dstCacheMask; + pImageBarriers[i].oldLayout = barrierInfo.pTransitions[i].imageInfo.oldLayout; + pImageBarriers[i].newLayout = barrierInfo.pTransitions[i].imageInfo.newLayout; + pImageBarriers[i].pQuadSamplePattern = barrierInfo.pTransitions[i].imageInfo.pQuadSamplePattern; + + info.imageBarrierCount++; + } + } + + if (info.imageBarrierCount > 0) + { + info.pImageBarriers = pImageBarriers; + } + + PalCmdReleaseThenAcquire(info, deviceMask); + + if (pImageBarriers != nullptr) + { + virtStackFrame.FreeArray(pImageBarriers); + } +} + +// ===================================================================================================================== +// This is the main hook for any CmdReleaseThenAcquire going into PAL. Always call this function instead of CmdBarrier +// directly. +void CmdBuffer::PalCmdReleaseThenAcquire( + const Pal::AcquireReleaseInfo& info, + uint32_t deviceMask) +{ + // If you trip this assert, you've forgotten to populate a value for this field. You should use one of the + // RgpBarrierReason enum values from sqtt_rgp_annotations.h. Preferably you should add a new one as described + // in the header, but temporarily you may use the generic "unknown" reason so as not to block your main code change. + VK_ASSERT(info.reason != 0); + +#if PAL_ENABLE_PRINTS_ASSERTS + for (uint32_t i = 0; i < info.imageBarrierCount; ++i) + { + // Detect if PAL may execute a barrier blt using this image + VK_ASSERT(info.pImageBarriers[i].pImage == nullptr); + // You need to use the other PalCmdReleaseThenAcquire method (below) which uses vk::Image ptrs to obtain the + // corresponding Pal::IImage ptr for each image transition + } +#endif + + utils::IterateMask deviceGroup(deviceMask); + do + { + const uint32_t deviceIdx = deviceGroup.Index(); + + PalCmdBuffer(deviceIdx)->CmdReleaseThenAcquire(info); + } + while (deviceGroup.IterateNext()); +} + +// ===================================================================================================================== +void CmdBuffer::PalCmdReleaseThenAcquire( + Pal::AcquireReleaseInfo* pAcquireReleaseInfo, + Pal::ImgBarrier* const pImageBarriers, + const Image** const pTransitionImages, + uint32_t deviceMask) +{ + // If you trip this assert, you've forgot to populate a value for this field. You should use one of the + // RgpBarrierReason enum values from sqtt_rgp_annotations.h. Preferably you should add a new one as described + // in the header, but temporarily you may use the generic "unknown" reason so as not to block you. + VK_ASSERT(pAcquireReleaseInfo->reason != 0); + + utils::IterateMask deviceGroup(deviceMask); + do + { + const uint32_t deviceIdx = deviceGroup.Index(); + + if (deviceIdx > 0) + { + for (uint32_t i = 0; i < pAcquireReleaseInfo->imageBarrierCount; i++) + { + if (pImageBarriers[i].pImage != nullptr) + { + pImageBarriers[i].pImage = pTransitionImages[i]->PalImage(deviceIdx); + } + } + pAcquireReleaseInfo->pImageBarriers = pImageBarriers; + } + + PalCmdBuffer(deviceIdx)->CmdReleaseThenAcquire(*pAcquireReleaseInfo); + } + while (deviceGroup.IterateNext()); +} + // ===================================================================================================================== void CmdBuffer::PalCmdBindMsaaStates( const Pal::IMsaaState* const * pStates) @@ -7024,10 +7249,10 @@ void CmdBuffer::RPBeginSubpass() } // ===================================================================================================================== -// Executes a "sync point" during a render pass instance. There are a number of these at different stages between -// subpasses where we handle execution/memory dependencies from subpass dependencies as well as trigger automatic -// layout transitions. -void CmdBuffer::RPSyncPoint( +// Executes a "sync point" during a render pass instance using the legacy barriers. There are a number of these at +// different stages between subpasses where we handle execution/memory dependencies from subpass dependencies as well as +// trigger automatic layout transitions. +void CmdBuffer::RPSyncPointLegacy( const RPSyncPointInfo& syncPoint, VirtualStackFrame* pVirtStack) { @@ -7089,8 +7314,8 @@ void CmdBuffer::RPSyncPoint( tr.attachment, plane); - if (oldLayout.usages != newLayout.usages || - oldLayout.engines != newLayout.engines) + if ((oldLayout.usages != newLayout.usages) || + (oldLayout.engines != newLayout.engines)) { VK_ASSERT(barrier.transitionCount < maxTransitionCount); @@ -7111,22 +7336,22 @@ void CmdBuffer::RPSyncPoint( if (sampleCount > 0) { - if (attachment.pImage->IsSampleLocationsCompatibleDepth() && - tr.flags.isInitialLayoutTransition) - { - VK_ASSERT(attachment.pImage->HasDepth()); - - // Use the provided sample locations for this attachment if this is its - // initial layout transition - pQuadSamplePattern = - &m_renderPassInstance.pAttachments[tr.attachment].initialSamplePattern.locations; - } - else - { - // Otherwise, use the subpass' sample locations - uint32_t subpass = m_renderPassInstance.subpass; - pQuadSamplePattern = &m_renderPassInstance.pSamplePatterns[subpass].locations; - } + if (attachment.pImage->IsSampleLocationsCompatibleDepth() && + tr.flags.isInitialLayoutTransition) + { + VK_ASSERT(attachment.pImage->HasDepth()); + + // Use the provided sample locations for this attachment if this is its + // initial layout transition + pQuadSamplePattern = + &m_renderPassInstance.pAttachments[tr.attachment].initialSamplePattern.locations; + } + else + { + // Otherwise, use the subpass' sample locations + uint32_t subpass = m_renderPassInstance.subpass; + pQuadSamplePattern = &m_renderPassInstance.pSamplePatterns[subpass].locations; + } } pLayoutTransition->imageInfo.pQuadSamplePattern = pQuadSamplePattern; @@ -7166,9 +7391,9 @@ void CmdBuffer::RPSyncPoint( // Execute the barrier if it actually did anything if ((barrier.waitPoint != Pal::HwPipeBottom) || - (barrier.transitionCount > 0) || - ((barrier.pipePointWaitCount > 1) || - (barrier.pipePointWaitCount == 1 && barrier.pPipePoints[0] != Pal::HwPipeTop))) + (barrier.transitionCount > 0) || + ((barrier.pipePointWaitCount > 1) || + ((barrier.pipePointWaitCount == 1) && (barrier.pPipePoints[0] != Pal::HwPipeTop)))) { PalCmdBarrier(&barrier, pPalTransitions, ppImages, GetRpDeviceMask()); } @@ -7184,6 +7409,155 @@ void CmdBuffer::RPSyncPoint( } } +// ===================================================================================================================== +// Executes a "sync point" during a render pass instance. There are a number of these at different stages between +// subpasses where we handle execution/memory dependencies from subpass dependencies as well as trigger automatic +// layout transitions. +void CmdBuffer::RPSyncPoint( + const RPSyncPointInfo& syncPoint, + VirtualStackFrame* pVirtStack) +{ + const auto& rpBarrier = syncPoint.barrier; + + const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); + + if (m_flags.useReleaseAcquire) + { + Pal::AcquireReleaseInfo acquireReleaseInfo = {}; + + acquireReleaseInfo.reason = RgpBarrierExternalRenderPassSync; + acquireReleaseInfo.srcStageMask = rpBarrier.srcStageMask; + acquireReleaseInfo.dstStageMask = rpBarrier.dstStageMask; + + const uint32_t maxTransitionCount = MaxPalAspectsPerMask * syncPoint.transitionCount; + + Pal::ImgBarrier* pPalTransitions = (maxTransitionCount != 0) ? + pVirtStack->AllocArray(maxTransitionCount) : + nullptr; + const Image** ppImages = (maxTransitionCount != 0) ? + pVirtStack->AllocArray(maxTransitionCount) : + nullptr; + + // Construct global memory dependency to synchronize caches (subpass dependencies + implicit synchronization) + if (rpBarrier.flags.needsGlobalTransition) + { + Pal::BarrierTransition globalTransition = { }; + + m_pDevice->GetBarrierPolicy().ApplyBarrierCacheFlags( + rpBarrier.srcAccessMask, + rpBarrier.dstAccessMask, + VK_IMAGE_LAYOUT_GENERAL, + VK_IMAGE_LAYOUT_GENERAL, + &globalTransition); + + acquireReleaseInfo.srcGlobalAccessMask = globalTransition.srcCacheMask | rpBarrier.implicitSrcCacheMask; + acquireReleaseInfo.dstGlobalAccessMask = globalTransition.dstCacheMask | rpBarrier.implicitDstCacheMask; + } + + if ((pPalTransitions != nullptr) && (ppImages != nullptr)) + { + // Construct attachment-specific layout transitions + for (uint32_t t = 0; t < syncPoint.transitionCount; ++t) + { + const RPTransitionInfo& tr = syncPoint.pTransitions[t]; + + const Framebuffer::Attachment& attachment = m_allGpuState.pFramebuffer->GetAttachment(tr.attachment); + + for (uint32_t sr = 0; sr < attachment.subresRangeCount; ++sr) + { + const uint32_t plane = attachment.subresRange[sr].startSubres.plane; + + const RPImageLayout nextLayout = (plane == 1) ? tr.nextStencilLayout : tr.nextLayout; + + const Pal::ImageLayout newLayout = attachment.pImage->GetAttachmentLayout( + nextLayout, + plane, + this); + + const Pal::ImageLayout oldLayout = RPGetAttachmentLayout( + tr.attachment, + plane); + + if ((oldLayout.usages != newLayout.usages) || + (oldLayout.engines != newLayout.engines)) + { + VK_ASSERT(acquireReleaseInfo.imageBarrierCount < maxTransitionCount); + + ppImages[acquireReleaseInfo.imageBarrierCount] = attachment.pImage; + + pPalTransitions[acquireReleaseInfo.imageBarrierCount].srcAccessMask = 0; + pPalTransitions[acquireReleaseInfo.imageBarrierCount].dstAccessMask = 0; + pPalTransitions[acquireReleaseInfo.imageBarrierCount].pImage = attachment.pImage-> + PalImage(DefaultDeviceIndex); + pPalTransitions[acquireReleaseInfo.imageBarrierCount].oldLayout = oldLayout; + pPalTransitions[acquireReleaseInfo.imageBarrierCount].newLayout = newLayout; + pPalTransitions[acquireReleaseInfo.imageBarrierCount].subresRange = attachment.subresRange[sr]; + + const Pal::MsaaQuadSamplePattern* pQuadSamplePattern = nullptr; + + const uint32_t sampleCount = attachment.pImage->GetImageSamples(); + + if (sampleCount > 0) + { + if (attachment.pImage->IsSampleLocationsCompatibleDepth() && + tr.flags.isInitialLayoutTransition) + { + VK_ASSERT(attachment.pImage->HasDepth()); + + // Use the provided sample locations for this attachment if this is its + // initial layout transition + pQuadSamplePattern = + &m_renderPassInstance.pAttachments[tr.attachment].initialSamplePattern.locations; + } + else + { + // Otherwise, use the subpass' sample locations + uint32_t subpass = m_renderPassInstance.subpass; + pQuadSamplePattern = &m_renderPassInstance.pSamplePatterns[subpass].locations; + } + } + + pPalTransitions[acquireReleaseInfo.imageBarrierCount].pQuadSamplePattern = pQuadSamplePattern; + + RPSetAttachmentLayout(tr.attachment, plane, newLayout); + + acquireReleaseInfo.imageBarrierCount++; + } + } + } + + acquireReleaseInfo.pImageBarriers = pPalTransitions; + } + else if (maxTransitionCount != 0) + { + m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; + } + + // Execute the barrier if it actually did anything + if ((rpBarrier.waitPoint != Pal::HwPipeBottom) || + (acquireReleaseInfo.imageBarrierCount > 0) || + ((rpBarrier.pipePointCount > 1) || + ((rpBarrier.pipePointCount == 1) && (rpBarrier.pipePoints[0] != Pal::HwPipeTop)))) + { + PalCmdReleaseThenAcquire(&acquireReleaseInfo, pPalTransitions, ppImages, GetRpDeviceMask()); + } + + if (pPalTransitions != nullptr) + { + pVirtStack->FreeArray(pPalTransitions); + } + + if (ppImages != nullptr) + { + pVirtStack->FreeArray(ppImages); + } + } + else + { + RPSyncPointLegacy(syncPoint, pVirtStack); + } +} + // ===================================================================================================================== // Does one or more load-op color clears during a render pass instance. void CmdBuffer::RPLoadOpClearColor( @@ -8004,7 +8378,7 @@ void CmdBuffer::PushConstantsIssueWrites( } - if ((stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) != 0) + if ((stageFlags & ShaderStageAllGraphics) != 0) { WritePushConstants(PipelineBindGraphics, Pal::PipelineBindPoint::Graphics, @@ -9113,10 +9487,12 @@ void CmdBuffer::SetPrimitiveRestartEnableEXT( void CmdBuffer::SetDepthBiasEnableEXT( VkBool32 depthBiasEnable) { - if (m_allGpuState.triangleRasterState.flags.depthBiasEnable != depthBiasEnable) + if ((m_allGpuState.triangleRasterState.flags.frontDepthBiasEnable != depthBiasEnable) || + (m_allGpuState.triangleRasterState.flags.backDepthBiasEnable != depthBiasEnable)) { - m_allGpuState.triangleRasterState.flags.depthBiasEnable = depthBiasEnable; - m_allGpuState.dirtyGraphics.rasterState = 1; + m_allGpuState.triangleRasterState.flags.frontDepthBiasEnable = depthBiasEnable; + m_allGpuState.triangleRasterState.flags.backDepthBiasEnable = depthBiasEnable; + m_allGpuState.dirtyGraphics.rasterState = 1; } m_allGpuState.staticTokens.triangleRasterState = DynamicRenderStateToken; diff --git a/icd/api/vk_conv.cpp b/icd/api/vk_conv.cpp index f1e843d1..334e23dd 100644 --- a/icd/api/vk_conv.cpp +++ b/icd/api/vk_conv.cpp @@ -1041,9 +1041,9 @@ VkResult PalToVkError( } // ===================================================================================================================== -uint32_t GetBufferSrdFormatInfo( - PhysicalDevice* pPhysicalDevice, - Pal::SwizzledFormat swizzledFormat) +static uint32_t GetBufferSrdFormatInfo( + const PhysicalDevice* pPhysicalDevice, + const Pal::SwizzledFormat swizzledFormat) { if (swizzledFormat.format == Pal::ChNumFormat::Undefined) { @@ -1082,7 +1082,7 @@ uint32_t GetBufferSrdFormatInfo( // ===================================================================================================================== VkResult InitializeUberFetchShaderFormatTable( - PhysicalDevice* pPhysicalDevice, + const PhysicalDevice* pPhysicalDevice, UberFetchShaderFormatInfoMap* pFormatInfoMap) { INIT_UBER_FORMATINFO(A2B10G10R10_SINT_PACK32, @@ -1319,9 +1319,9 @@ VkResult InitializeUberFetchShaderFormatTable( // ===================================================================================================================== UberFetchShaderFormatInfo GetUberFetchShaderFormatInfo( - UberFetchShaderFormatInfoMap* pFormatInfoMap, - VkFormat vkFormat, - bool isZeroStride) + const UberFetchShaderFormatInfoMap* pFormatInfoMap, + const VkFormat vkFormat, + const bool isZeroStride) { UberFetchShaderFormatInfo formatInfo = {}; auto pFormatInfo = pFormatInfoMap->FindKey(vkFormat); diff --git a/icd/api/vk_descriptor_pool.cpp b/icd/api/vk_descriptor_pool.cpp index d13fb6fe..f02d29af 100644 --- a/icd/api/vk_descriptor_pool.cpp +++ b/icd/api/vk_descriptor_pool.cpp @@ -155,7 +155,6 @@ VkResult DescriptorPool::Init( if (m_pDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead) { - m_addresses[deviceIdx].fmaskGpuAddr = m_staticInternalMem.GpuShadowVirtAddr(deviceIdx); m_addresses[deviceIdx].fmaskCpuAddr = static_cast(m_gpuMemHeap.CpuShadowAddr(deviceIdx)); } } diff --git a/icd/api/vk_descriptor_set.cpp b/icd/api/vk_descriptor_set.cpp index 3239b7a4..b85d7327 100644 --- a/icd/api/vk_descriptor_set.cpp +++ b/icd/api/vk_descriptor_set.cpp @@ -80,8 +80,6 @@ void DescriptorSet::Reassign( if (pBaseAddrs[deviceIdx].fmaskCpuAddr != nullptr) { - m_addresses[deviceIdx].fmaskGpuAddr = pBaseAddrs[deviceIdx].fmaskGpuAddr + gpuMemOffset; - m_addresses[deviceIdx].fmaskCpuAddr = static_cast(Util::VoidPtrInc(pBaseAddrs[deviceIdx].fmaskCpuAddr, static_cast(gpuMemOffset))); VK_ASSERT(Util::IsPow2Aligned(reinterpret_cast(m_addresses[deviceIdx].fmaskCpuAddr), sizeof(uint32_t))); } @@ -461,7 +459,6 @@ template void DescriptorUpdate::WriteDescriptorSets( const Device* pDevice, @@ -537,7 +534,7 @@ void DescriptorUpdate::WriteDescriptorSets( destBinding.sta.dwArrayStride); } - if (fmaskBasedMsaaReadEnabled && (destBinding.sta.dwSize > 0)) + if (fmaskDescSize != 0) { WriteFmaskDescriptors( params.pImageInfo, @@ -567,7 +564,7 @@ void DescriptorUpdate::WriteDescriptorSets( params.descriptorCount, destBinding.sta.dwArrayStride); - if (fmaskBasedMsaaReadEnabled && (destBinding.sta.dwSize > 0)) + if (fmaskDescSize != 0) { WriteFmaskDescriptors( params.pImageInfo, @@ -675,7 +672,7 @@ void DescriptorUpdate::WriteDescriptorSets( // ===================================================================================================================== // Copy from one descriptor set to another -template +template void DescriptorUpdate::CopyDescriptorSets( const Device* pDevice, uint32_t deviceIdx, @@ -769,7 +766,7 @@ void DescriptorUpdate::CopyDescriptorSets( memcpy(pDestAddr, pSrcAddr, srcBinding.sta.dwArrayStride * sizeof(uint32_t) * count); } - if (fmaskBasedMsaaReadEnabled && srcBinding.sta.dwSize > 0 && + if ((fmaskDescSize != 0) && ((srcBinding.info.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) || (srcBinding.info.descriptorType == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE) || (srcBinding.info.descriptorType == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT))) @@ -809,8 +806,7 @@ template + uint32_t numPalDevices> VKAPI_ATTR void VKAPI_CALL DescriptorUpdate::UpdateDescriptorSets( VkDevice device, uint32_t descriptorWriteCount, @@ -822,20 +818,17 @@ VKAPI_ATTR void VKAPI_CALL DescriptorUpdate::UpdateDescriptorSets( for (uint32_t deviceIdx = 0; deviceIdx < numPalDevices; deviceIdx++) { - WriteDescriptorSets< - imageDescSize, fmaskDescSize, samplerDescSize, bufferDescSize, - fmaskBasedMsaaReadEnabled, numPalDevices>( - pDevice, - deviceIdx, - descriptorWriteCount, - pDescriptorWrites); - - CopyDescriptorSets< - imageDescSize, fmaskDescSize, fmaskBasedMsaaReadEnabled, numPalDevices>( - pDevice, - deviceIdx, - descriptorCopyCount, - pDescriptorCopies); + WriteDescriptorSets( + pDevice, + deviceIdx, + descriptorWriteCount, + pDescriptorWrites); + + CopyDescriptorSets( + pDevice, + deviceIdx, + descriptorCopyCount, + pDescriptorCopies); } } @@ -874,25 +867,6 @@ PFN_vkUpdateDescriptorSets DescriptorUpdate::GetUpdateDescriptorSetsFunc( // ===================================================================================================================== template -PFN_vkUpdateDescriptorSets DescriptorUpdate::GetUpdateDescriptorSetsFunc( - const Device* pDevice) -{ - PFN_vkUpdateDescriptorSets pFunc = nullptr; - - if (pDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead) - { - pFunc = GetUpdateDescriptorSetsFunc(pDevice); - } - else - { - pFunc = GetUpdateDescriptorSetsFunc(pDevice); - } - - return pFunc; -} - -// ===================================================================================================================== -template PFN_vkUpdateDescriptorSets DescriptorUpdate::GetUpdateDescriptorSetsFunc( const Device* pDevice) { @@ -906,15 +880,23 @@ PFN_vkUpdateDescriptorSets DescriptorUpdate::GetUpdateDescriptorSetsFunc( (samplerDescSize == 16) && (bufferDescSize == 16)) { - if (fmaskDescSize == 32) + if ((pDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead == false) || (fmaskDescSize == 0)) + { + pFunc = &UpdateDescriptorSets< + 32, + 0, + 16, + 16, + numPalDevices>; + } + else if (fmaskDescSize == 32) { pFunc = &UpdateDescriptorSets< 32, 32, 16, 16, - numPalDevices, - fmaskBasedMsaaReadEnabled>; + numPalDevices>; } else { @@ -962,6 +944,15 @@ void DescriptorUpdate::WriteFmaskDescriptors<32, 32>( uint32_t dwStride, size_t descriptorStrideInBytes); +template +void DescriptorUpdate::WriteFmaskDescriptors<32, 0>( + const VkDescriptorImageInfo* pDescriptors, + uint32_t deviceIdx, + uint32_t* pDestAddr, + uint32_t count, + uint32_t dwStride, + size_t descriptorStrideInBytes); + template void DescriptorUpdate::WriteSamplerDescriptors<16>( const VkDescriptorImageInfo* pDescriptors, @@ -1069,67 +1060,67 @@ DescriptorSet<1>::DescriptorSet(uint32_t heapIndex); template void DescriptorSet<1>::Reassign( - const DescriptorSetLayout* pLayout, - Pal::gpusize gpuMemOffset, - DescriptorAddr* pBaseAddrs, - void* pAllocHandle); + const DescriptorSetLayout* pLayout, + Pal::gpusize gpuMemOffset, + DescriptorAddr* pBaseAddrs, + void* pAllocHandle); template void DescriptorSet<1>::Reset(); template void DescriptorSet<1>::WriteImmutableSamplers( - uint32_t imageDescSizeInBytes); + uint32_t imageDescSizeInBytes); template DescriptorSet<2>::DescriptorSet(uint32_t heapIndex); template void DescriptorSet<2>::Reassign( - const DescriptorSetLayout* pLayout, - Pal::gpusize gpuMemOffset, - DescriptorAddr* pBaseAddrs, - void* pAllocHandle); + const DescriptorSetLayout* pLayout, + Pal::gpusize gpuMemOffset, + DescriptorAddr* pBaseAddrs, + void* pAllocHandle); template void DescriptorSet<2>::Reset(); template void DescriptorSet<2>::WriteImmutableSamplers( - uint32_t imageDescSizeInBytes); + uint32_t imageDescSizeInBytes); template DescriptorSet<3>::DescriptorSet(uint32_t heapIndex); template void DescriptorSet<3>::Reassign( - const DescriptorSetLayout* pLayout, - Pal::gpusize gpuMemOffset, - DescriptorAddr* pBaseAddrs, - void* pAllocHandle); + const DescriptorSetLayout* pLayout, + Pal::gpusize gpuMemOffset, + DescriptorAddr* pBaseAddrs, + void* pAllocHandle); template void DescriptorSet<3>::Reset(); template void DescriptorSet<3>::WriteImmutableSamplers( - uint32_t imageDescSizeInBytes); + uint32_t imageDescSizeInBytes); template DescriptorSet<4>::DescriptorSet(uint32_t heapIndex); template void DescriptorSet<4>::Reassign( - const DescriptorSetLayout* pLayout, - Pal::gpusize gpuMemOffset, - DescriptorAddr* pBaseAddrs, - void* pAllocHandle); + const DescriptorSetLayout* pLayout, + Pal::gpusize gpuMemOffset, + DescriptorAddr* pBaseAddrs, + void* pAllocHandle); template void DescriptorSet<4>::Reset(); template void DescriptorSet<4>::WriteImmutableSamplers( - uint32_t imageDescSizeInBytes); + uint32_t imageDescSizeInBytes); } // namespace vk diff --git a/icd/api/vk_descriptor_set_layout.cpp b/icd/api/vk_descriptor_set_layout.cpp index dd5eed62..a66c9d63 100644 --- a/icd/api/vk_descriptor_set_layout.cpp +++ b/icd/api/vk_descriptor_set_layout.cpp @@ -659,6 +659,8 @@ VkResult DescriptorSetLayout::Create( // Set the base pointer of the immutable sampler data to the appropriate location within the allocated memory info.imm.pImmutableSamplerData = reinterpret_cast(Util::VoidPtrInc(pSysMem, apiSize + bindingInfoAuxSize)); + info.flags = pCreateInfo->flags; + // Fill descriptor set layout information VkResult result = ConvertCreateInfo( pDevice, @@ -728,6 +730,15 @@ void DescriptorSetLayout::Merge( const CreateInfo& refInfo = pRef->Info(); const VkShaderStageFlags shaderMask = pShaderMasks[i]; + if (i == 0) + { + mergedInfo.flags = DescriptorSetLayout::ObjectFromHandle(pLayouts[i])->Info().flags; + } + else + { + VK_ASSERT(mergedInfo.flags == DescriptorSetLayout::ObjectFromHandle(pLayouts[i])->Info().flags); + } + for (uint32_t j = 0; j < refInfo.count; ++j) { const BindingInfo& refBinding = pRef->Binding(j); diff --git a/icd/api/vk_descriptor_update_template.cpp b/icd/api/vk_descriptor_update_template.cpp index 05de7524..032398d1 100644 --- a/icd/api/vk_descriptor_update_template.cpp +++ b/icd/api/vk_descriptor_update_template.cpp @@ -112,7 +112,6 @@ template DescriptorUpdateTemplate::PfnUpdateEntry DescriptorUpdateTemplate::GetUpdateEntryFunc( - const Device* pDevice, VkDescriptorType descriptorType, const DescriptorSetLayout::BindingInfo& dstBinding) { @@ -124,61 +123,31 @@ DescriptorUpdateTemplate::PfnUpdateEntry DescriptorUpdateTemplate::GetUpdateEntr pFunc = &UpdateEntrySampler; break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - if (pDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead && (dstBinding.sta.dwSize > 0)) + if (dstBinding.imm.dwSize != 0) { - if (dstBinding.imm.dwSize != 0) + if (dstBinding.bindingFlags.ycbcrConversionUsage != 0) { - if (dstBinding.bindingFlags.ycbcrConversionUsage != 0) - { - pFunc = &UpdateEntryCombinedImageSampler; - } - else - { - pFunc = &UpdateEntryCombinedImageSampler; - } + pFunc = &UpdateEntryCombinedImageSampler; } else { pFunc = &UpdateEntryCombinedImageSampler; + true, false, numPalDevices>; } } else { - if (dstBinding.imm.dwSize != 0) - { - pFunc = &UpdateEntryCombinedImageSampler; - } - else - { - pFunc = &UpdateEntryCombinedImageSampler; - } + pFunc = &UpdateEntryCombinedImageSampler; } break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - if (pDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead && (dstBinding.sta.dwSize > 0)) - { - pFunc = &UpdateEntrySampledImage; - } - else - { - pFunc = &UpdateEntrySampledImage; - } + pFunc = &UpdateEntrySampledImage; break; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - if (pDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead && (dstBinding.sta.dwSize > 0)) - { - pFunc = &UpdateEntrySampledImage; - } - else - { - pFunc = &UpdateEntrySampledImage; - } + pFunc = &UpdateEntrySampledImage; break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: pFunc = &UpdateEntryTexelBuffer; @@ -228,14 +197,23 @@ DescriptorUpdateTemplate::PfnUpdateEntry DescriptorUpdateTemplate::GetUpdateEntr (bufferDescSize == 16)) { - if (fmaskDescSize == 32) + if ((pDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead == false) || (fmaskDescSize == 0)) + { + pFunc = GetUpdateEntryFunc< + 32, + 0, + 16, + 16, + numPalDevices>(descriptorType, dstBinding); + } + else if (fmaskDescSize == 32) { pFunc = GetUpdateEntryFunc< 32, 32, 16, 16, - numPalDevices>(pDevice, descriptorType, dstBinding); + numPalDevices>(descriptorType, dstBinding); } else { @@ -333,7 +311,7 @@ void DescriptorUpdateTemplate::Update( } // ===================================================================================================================== -template void DescriptorUpdateTemplate::UpdateEntryCombinedImageSampler( const Device* pDevice, @@ -387,7 +365,7 @@ void DescriptorUpdateTemplate::UpdateEntryCombinedImageSampler( entry.srcStride); } - if (updateFmask) + if (fmaskDescSize != 0) { uint32_t* pDestFmaskAddr = pDstSet->FmaskCpuAddress(deviceIdx) + entry.dstStaOffset; @@ -513,13 +491,12 @@ void DescriptorUpdateTemplate::UpdateEntrySampler( } // ===================================================================================================================== -template +template void DescriptorUpdateTemplate::UpdateEntrySampledImage( - const Device* pDevice, - VkDescriptorSet descriptorSet, - const void* pDescriptorInfo, - const TemplateUpdateInfo& entry) + const Device* pDevice, + VkDescriptorSet descriptorSet, + const void* pDescriptorInfo, + const TemplateUpdateInfo& entry) { DescriptorSet* pDstSet = DescriptorSet::ObjectFromHandle(descriptorSet); @@ -539,7 +516,7 @@ void DescriptorUpdateTemplate::UpdateEntrySampledImage( entry.dstBindStaDwArrayStride, entry.srcStride); - if (updateFmask) + if (fmaskDescSize != 0) { uint32_t* pDestFmaskAddr = pDstSet->FmaskCpuAddress(deviceIdx) + entry.dstStaOffset; @@ -583,7 +560,8 @@ void DescriptorUpdateTemplate::UpdateEntryInlineUniformBlock( ); deviceIdx++; - } while (deviceIdx < numPalDevices); + } + while (deviceIdx < numPalDevices); } namespace entry diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index f74b6f1a..b0cbf542 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -268,7 +268,7 @@ Device::Device( #if VKI_GPU_DECOMPRESS m_pGpuDecoderLayer(nullptr), #endif - m_allocationSizeTracking(m_settings.memoryDeviceOverallocationAllowed ? false : true), + m_allocationSizeTracking(true), m_useComputeAsTransferQueue(useComputeAsTransferQueue), m_useUniversalAsComputeQueue(pPhysicalDevices[DefaultDeviceIndex]->GetRuntimeSettings().useUniversalAsComputeQueue), m_useGlobalGpuVa(false), @@ -453,7 +453,7 @@ VkResult Device::Create( if (!DeviceExtensions::EnableExtensions(pCreateInfo->ppEnabledExtensionNames, pCreateInfo->enabledExtensionCount, pPhysicalDevice->GetAllowedExtensions(), - enabledDeviceExtensions)) + &enabledDeviceExtensions)) { return VK_ERROR_EXTENSION_NOT_PRESENT; } @@ -997,8 +997,7 @@ VkResult Device::Create( vkResult = VK_SUCCESS; // Finalize the physical device settings before they are cached in the device - pPhysicalDevices[DefaultDeviceIndex]->GetSettingsLoader()->FinalizeSettings( - ); + pPhysicalDevices[DefaultDeviceIndex]->GetSettingsLoader()->FinalizeSettings(enabledDeviceExtensions); // Construct API device object. VK_INIT_DISPATCHABLE(Device, pMemory, ( @@ -1063,7 +1062,6 @@ VkResult Device::Create( // Create a TMZ queue at the protected capability queue creation time // when this engine support per queue level tmz. - const Pal::DeviceProperties& deviceProps = pPhysicalDevices[deviceIdx]->PalProperties(); if ((queueFlags[queueFamilyIndex] & VK_DEVICE_QUEUE_CREATE_PROTECTED_BIT) && (properties.engineProperties[queueCreateInfo.engineType].tmzSupportLevel == @@ -1437,8 +1435,8 @@ VkResult Device::Initialize( if (result == VK_SUCCESS) { - // For apps running on APU, disable allocation size tracking, allocate remote heap instead when local heap is used up. - if (palProps.gpuType == Pal::GpuType::Integrated) + // Allow overallocation by disabling tracking for apps running on APU or with the setting. + if ((palProps.gpuType == Pal::GpuType::Integrated) || m_settings.memoryDeviceOverallocationAllowed) { m_allocationSizeTracking = false; } @@ -1479,8 +1477,6 @@ VkResult Device::Initialize( { case VK_MEMORY_OVERALLOCATION_BEHAVIOR_ALLOWED_AMD: m_allocationSizeTracking = false; - m_overallocationRequestedForPalHeap[Pal::GpuHeap::GpuHeapInvisible] = true; - m_overallocationRequestedForPalHeap[Pal::GpuHeap::GpuHeapLocal] = true; break; case VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD: m_allocationSizeTracking = true; @@ -1492,9 +1488,7 @@ VkResult Device::Initialize( else if (enabled.IsExtensionEnabled(DeviceExtensions::ExtensionId::EXT_PAGEABLE_DEVICE_LOCAL_MEMORY) && pageableDeviceLocalMemory) { - // Add back-up heaps for device-local heaps - m_overallocationRequestedForPalHeap[Pal::GpuHeap::GpuHeapInvisible] = true; - m_overallocationRequestedForPalHeap[Pal::GpuHeap::GpuHeapLocal] = true; + m_allocationSizeTracking = false; } else if ((m_settings.overrideHeapChoiceToLocal != 0) && (palProps.gpuType == Pal::GpuType::Discrete)) { @@ -1504,6 +1498,13 @@ VkResult Device::Initialize( m_overallocationRequestedForPalHeap[Pal::GpuHeap::GpuHeapLocal] = true; } } + + if (m_allocationSizeTracking == false) + { + // Add back-up heaps for device-local heaps + m_overallocationRequestedForPalHeap[Pal::GpuHeap::GpuHeapInvisible] = true; + m_overallocationRequestedForPalHeap[Pal::GpuHeap::GpuHeapLocal] = true; + } } #if ICD_GPUOPEN_DEVMODE_BUILD @@ -1934,6 +1935,7 @@ VkResult Device::CreateInternalComputePipeline( flags, codeByteSize, pCode, + false, nullptr, nullptr, &shaderModule); diff --git a/icd/api/vk_event.cpp b/icd/api/vk_event.cpp index 70ad538a..f075aa51 100644 --- a/icd/api/vk_event.cpp +++ b/icd/api/vk_event.cpp @@ -80,14 +80,15 @@ VkResult Event::Create( Pal::DeviceProperties info; pDevice->PalDevice(DefaultDeviceIndex)->GetProperties(&info); + const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); + // If supportReleaseAcquireInterface is true, the ASIC provides new barrier interface CmdReleaseThenAcquire() // designed for Acquire/Release-based driver. This flag is currently enabled for gfx9 and above. // If supportSplitReleaseAcquire is true, the ASIC provides split CmdRelease() and CmdAcquire() to express barrier, // and CmdReleaseThenAcquire() is still valid. This flag is currently enabled for gfx10 and above. bool useSplitReleaseAcquire = info.gfxipProperties.flags.supportReleaseAcquireInterface && - info.gfxipProperties.flags.supportSplitReleaseAcquire; - - const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); + info.gfxipProperties.flags.supportSplitReleaseAcquire && + settings.useReleaseAcquireInterface; if (useSplitReleaseAcquire && settings.syncTokenEnabled && ((pCreateInfo->flags & VK_EVENT_CREATE_DEVICE_ONLY_BIT_KHR) != 0)) diff --git a/icd/api/vk_graphics_pipeline.cpp b/icd/api/vk_graphics_pipeline.cpp index 5d1f1db0..4df93767 100644 --- a/icd/api/vk_graphics_pipeline.cpp +++ b/icd/api/vk_graphics_pipeline.cpp @@ -27,6 +27,7 @@ #include "include/vk_conv.h" #include "include/vk_device.h" #include "include/vk_graphics_pipeline.h" +#include "include/vk_graphics_pipeline_library.h" #include "include/vk_instance.h" #include "include/vk_memory.h" #include "include/vk_pipeline_cache.h" @@ -430,9 +431,8 @@ VkResult GraphicsPipeline::Create( &pPipelineCreationFeedbackCreateInfo); // 1. Get pipeline layout - bool isMergedLayout = false; - PipelineLayout* pPipelineLayout = nullptr; - VkResult result = AchievePipelineLayout(pDevice, pCreateInfo, pAllocator, &pPipelineLayout, &isMergedLayout); + VK_ASSERT(pCreateInfo->layout != VK_NULL_HANDLE); + PipelineLayout* pPipelineLayout = PipelineLayout::ObjectFromHandle(pCreateInfo->layout); // 2. Build pipeline binary create info GraphicsPipelineBinaryCreateInfo binaryCreateInfo = {}; @@ -441,19 +441,16 @@ VkResult GraphicsPipeline::Create( PipelineInternalBufferInfo internalBufferInfo = {}; ShaderModuleHandle tempModules[ShaderStage::ShaderStageGfxCount] = {}; - if (result == VK_SUCCESS) - { - result = BuildPipelineBinaryCreateInfo( - pDevice, - pCreateInfo, - pPipelineLayout, - pPipelineCache, - &binaryCreateInfo, - &shaderStageInfo, - &vbInfo, - &internalBufferInfo, - tempModules); - } + VkResult result = BuildPipelineBinaryCreateInfo( + pDevice, + pCreateInfo, + pPipelineLayout, + pPipelineCache, + &binaryCreateInfo, + &shaderStageInfo, + &vbInfo, + &internalBufferInfo, + tempModules); // 3. Create pipeine binaries size_t pipelineBinarySizes[MaxPalDevices] = {}; @@ -512,12 +509,6 @@ VkResult GraphicsPipeline::Create( // Free the temporary newly-built shader modules FreeTempModules(pDevice, ShaderStage::ShaderStageGfxCount, tempModules); - // Free the temporary merged pipeline layout used only for current pipeline - if (isMergedLayout) - { - pPipelineLayout->Destroy(pDevice, pAllocator); - } - if (internalBufferInfo.pData != nullptr) { pDevice->VkInstance()->FreeMem(internalBufferInfo.pData); @@ -1284,7 +1275,10 @@ void GraphicsPipeline::BindToCmdBuffer( if (ContainsStaticState(DynamicStatesInternal::DepthBiasEnableExt)) { - pRenderState->triangleRasterState.flags.depthBiasEnable = m_info.triangleRasterState.flags.depthBiasEnable; + pRenderState->triangleRasterState.flags.frontDepthBiasEnable = + m_info.triangleRasterState.flags.frontDepthBiasEnable; + pRenderState->triangleRasterState.flags.backDepthBiasEnable = + m_info.triangleRasterState.flags.backDepthBiasEnable; } pRenderState->dirtyGraphics.rasterState = 1; diff --git a/icd/api/vk_graphics_pipeline_library.cpp b/icd/api/vk_graphics_pipeline_library.cpp new file mode 100644 index 00000000..35e1be36 --- /dev/null +++ b/icd/api/vk_graphics_pipeline_library.cpp @@ -0,0 +1,644 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2021-2022 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ + +#include "include/vk_graphics_pipeline_library.h" +#include "include/vk_pipeline_layout.h" +#include "palVectorImpl.h" + +namespace vk +{ + +// ===================================================================================================================== +static const VkPipelineVertexInputDivisorStateCreateInfoEXT* DumpVkPipelineVertexInputDivisorStateCreateInfoEXT( + const VkPipelineVertexInputDivisorStateCreateInfoEXT* pSrc, + void* pDst, + size_t* pSize) +{ + VkPipelineVertexInputDivisorStateCreateInfoEXT* pDivisorState = nullptr; + + if (pSrc != nullptr) + { + const size_t bindingSize = pSrc->vertexBindingDivisorCount * sizeof(VkVertexInputBindingDivisorDescriptionEXT); + + if (pSize != nullptr) + { + *pSize = sizeof(VkPipelineVertexInputDivisorStateCreateInfoEXT) + bindingSize; + } + + if (pDst != nullptr) + { + pDivisorState = reinterpret_cast(pDst); + + VkVertexInputBindingDivisorDescriptionEXT* pVertexBindingDivisor = + reinterpret_cast( + Util::VoidPtrInc(pDst, sizeof(VkPipelineVertexInputDivisorStateCreateInfoEXT))); + + memcpy(pVertexBindingDivisor, pSrc->pVertexBindingDivisors, bindingSize); + + pDivisorState->sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT; + pDivisorState->pNext = nullptr; + pDivisorState->vertexBindingDivisorCount = pSrc->vertexBindingDivisorCount; + pDivisorState->pVertexBindingDivisors = pVertexBindingDivisor; + } + } + else if (pSize != nullptr) + { + *pSize = 0; + } + + return pDivisorState; +} + +// ===================================================================================================================== +static const VkPipelineVertexInputStateCreateInfo* DumpVkPipelineVertexInputStateCreateInfo( + const VkPipelineVertexInputStateCreateInfo* pSrc, + void* pDst, + size_t* pSize) +{ + VkPipelineVertexInputStateCreateInfo* pVertexInput = nullptr; + + if (pSrc != nullptr) + { + EXTRACT_VK_STRUCTURES_0( + divisorState, + PipelineVertexInputDivisorStateCreateInfoEXT, + static_cast(pSrc->pNext), + PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); + + const size_t bindingDescSize = + pSrc->vertexBindingDescriptionCount * sizeof(VkVertexInputBindingDescription); + const size_t AttribDescSize = + pSrc->vertexAttributeDescriptionCount * sizeof(VkVertexInputAttributeDescription); + + if (pSize != nullptr) + { + *pSize = sizeof(VkPipelineVertexInputStateCreateInfo) + bindingDescSize + AttribDescSize; + + size_t divisorState = 0; + DumpVkPipelineVertexInputDivisorStateCreateInfoEXT( + pPipelineVertexInputDivisorStateCreateInfoEXT, nullptr, &divisorState); + *pSize += divisorState; + } + + if (pDst != nullptr) + { + pVertexInput = reinterpret_cast(pDst); + VkVertexInputBindingDescription* pBindingDesc = + reinterpret_cast( + Util::VoidPtrInc(pDst, sizeof(VkPipelineVertexInputStateCreateInfo))); + VkVertexInputAttributeDescription* pAttribDesc = + reinterpret_cast( + Util::VoidPtrInc(pBindingDesc, bindingDescSize)); + + const VkPipelineVertexInputDivisorStateCreateInfoEXT* pDivisorState = + DumpVkPipelineVertexInputDivisorStateCreateInfoEXT( + pPipelineVertexInputDivisorStateCreateInfoEXT, + Util::VoidPtrInc(pAttribDesc, AttribDescSize), + nullptr); + + memcpy(pBindingDesc, pSrc->pVertexBindingDescriptions, bindingDescSize); + memcpy(pAttribDesc, pSrc->pVertexAttributeDescriptions, AttribDescSize); + + pVertexInput->sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + pVertexInput->pNext = pDivisorState; + pVertexInput->flags = pSrc->flags; + pVertexInput->vertexBindingDescriptionCount = pSrc->vertexBindingDescriptionCount; + pVertexInput->vertexAttributeDescriptionCount = pSrc->vertexAttributeDescriptionCount; + pVertexInput->pVertexBindingDescriptions = pBindingDesc; + pVertexInput->pVertexAttributeDescriptions = pAttribDesc; + } + } + else if (pSize != nullptr) + { + *pSize = 0; + } + + return pVertexInput; +} + +// ===================================================================================================================== +static const VkSpecializationInfo* DumpVkSpecializationInfo( + const VkSpecializationInfo* pSrc, + void* pDst, + size_t* pSize) +{ + VkSpecializationInfo* pInfo = nullptr; + + if (pSrc != nullptr) + { + const size_t mapEntrySize = pSrc->mapEntryCount * sizeof(VkSpecializationMapEntry); + + if (pSize != nullptr) + { + *pSize = mapEntrySize + pSrc->dataSize + sizeof(VkSpecializationInfo); + } + + if (pDst != nullptr) + { + pInfo = reinterpret_cast(pDst); + void* pMapEntries = Util::VoidPtrInc(pInfo, sizeof(VkSpecializationInfo)); + void* pData = Util::VoidPtrInc(pMapEntries, mapEntrySize); + + memcpy(pMapEntries, pSrc->pMapEntries, mapEntrySize); + memcpy(pData, pSrc->pData, pSrc->dataSize); + + pInfo->mapEntryCount = pSrc->mapEntryCount; + pInfo->pMapEntries = reinterpret_cast(pMapEntries); + pInfo->dataSize = pSrc->dataSize; + pInfo->pData = pData; + } + } + else if (pSize != nullptr) + { + *pSize = 0; + } + + return pInfo; +} + +// ===================================================================================================================== +// Copy the content of PipelineShaderInfo in GraphicsPipelineBinaryCreateInfo +// Note that module data Vkgc::PipelineShaderInfo::pModuleData is not copied here. +// Module data is maintained by graphics pipeline library directly. +static GraphicsPipelineBinaryCreateInfo* DumpGraphicsPipelineBinaryCreateInfo( + const GraphicsPipelineBinaryCreateInfo* pBinInfo, + void* pDst, + size_t* pSize) +{ + GraphicsPipelineBinaryCreateInfo* pCreateInfo = nullptr; + + if (pBinInfo != nullptr) + { + const Vkgc::PipelineShaderInfo* pInShaderInfos[] = + { + &pBinInfo->pipelineInfo.vs, + &pBinInfo->pipelineInfo.tcs, + &pBinInfo->pipelineInfo.tes, + &pBinInfo->pipelineInfo.gs, + &pBinInfo->pipelineInfo.fs, + }; + + size_t objSize = 0; + + // Calculate the size used by VkPipelineVertexInputStateCreateInfo in GraphicsPipelineBinaryCreateInfo + size_t vertexInputSize = 0; + DumpVkPipelineVertexInputStateCreateInfo(pBinInfo->pipelineInfo.pVertexInput, nullptr, &vertexInputSize); + objSize += vertexInputSize; + + size_t specializationInfoSizes[ShaderStage::ShaderStageGfxCount] = {}; + size_t entryTargetSizes[ShaderStage::ShaderStageGfxCount] = {}; + for (uint32_t stage = 0; stage < Util::ArrayLen(pInShaderInfos); ++stage) + { + DumpVkSpecializationInfo(pInShaderInfos[stage]->pSpecializationInfo, + nullptr, + &specializationInfoSizes[stage]); + + entryTargetSizes[stage] = pInShaderInfos[stage]->pEntryTarget == nullptr ? 0 : + strlen(pInShaderInfos[stage]->pEntryTarget) + 1; + + objSize += (specializationInfoSizes[stage] + entryTargetSizes[stage]); + } + + if (pSize != nullptr) + { + *pSize = objSize + sizeof(GraphicsPipelineBinaryCreateInfo); + } + + if (pDst != nullptr) + { + void* pSystemMem = pDst; + + pCreateInfo = reinterpret_cast(pSystemMem); + *pCreateInfo = *pBinInfo; + + pSystemMem = Util::VoidPtrInc(pSystemMem, sizeof(GraphicsPipelineBinaryCreateInfo)); + + pCreateInfo->pipelineInfo.pVertexInput = + DumpVkPipelineVertexInputStateCreateInfo(pBinInfo->pipelineInfo.pVertexInput, pSystemMem, nullptr); + + pSystemMem = Util::VoidPtrInc(pSystemMem, vertexInputSize); + + Vkgc::PipelineShaderInfo* pOutShaderInfos[] = + { + &pCreateInfo->pipelineInfo.vs, + &pCreateInfo->pipelineInfo.tcs, + &pCreateInfo->pipelineInfo.tes, + &pCreateInfo->pipelineInfo.gs, + &pCreateInfo->pipelineInfo.fs, + }; + + for (uint32_t stage = 0; stage < Util::ArrayLen(pOutShaderInfos); ++stage) + { + if (specializationInfoSizes[stage] != 0) + { + pOutShaderInfos[stage]->pSpecializationInfo = + DumpVkSpecializationInfo(pInShaderInfos[stage]->pSpecializationInfo, pSystemMem, nullptr); + + pSystemMem = Util::VoidPtrInc(pSystemMem, specializationInfoSizes[stage]); + } + + if (entryTargetSizes[stage] != 0) + { + memcpy(pSystemMem, pInShaderInfos[stage]->pEntryTarget, entryTargetSizes[stage]); + + pOutShaderInfos[stage]->pEntryTarget = static_cast(pSystemMem); + + pSystemMem = Util::VoidPtrInc(pSystemMem, entryTargetSizes[stage]); + } + } + } + } + else if (pSize != nullptr) + { + *pSize = 0; + } + + return pCreateInfo; +} + +// ===================================================================================================================== +void GraphicsPipelineLibrary::CreatePartialPipelineBinary( + const Device* pDevice, + const GraphicsPipelineLibraryInfo* pLibInfo, + const GraphicsPipelineShaderStageInfo* pShaderStageInfo, + const bool disableRasterization, + GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, + ShaderModuleHandle* pTempModules, + TempModuleState* pTempModuleStages) +{ + PipelineCompiler* pCompiler = pDevice->GetCompiler(DefaultDeviceIndex); + + uint32_t tempIdx = 0; + + for (uint32_t i = 0; i < ShaderStage::ShaderStageGfxCount; ++i) + { + if ((pShaderStageInfo->stages[i].pModuleHandle != nullptr) && + pCompiler->IsValidShaderModule(pShaderStageInfo->stages[i].pModuleHandle)) + { + VK_ASSERT(pShaderStageInfo->stages[i].pModuleHandle == &pTempModules[tempIdx]); + + bool canBuildShader = (((pShaderStageInfo->stages[i].stage == ShaderStage::ShaderStageFragment) && + disableRasterization) + == false); + + if (canBuildShader) + { + // We don't take care of return result. Early compile failure in some cases is expected + pCompiler->CreateGraphicsShaderBinary( + pDevice, pShaderStageInfo->stages[i].stage, pBinaryCreateInfo, &pTempModules[tempIdx]); + } + + pTempModuleStages[tempIdx].stage = pShaderStageInfo->stages[i].stage; + pTempModuleStages[tempIdx].needFreeBinaryOnly = false; + + ++tempIdx; + } + } + + if (pLibInfo->flags.optimize) + { + // We need to re-compile some stage if related new state is available + if ((pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) && + (pLibInfo->pPreRasterizationShaderLib != nullptr)) + { + const ShaderModuleHandle* pParentHandle = + pLibInfo->pPreRasterizationShaderLib->GetShaderModuleHandle(ShaderStage::ShaderStageVertex); + + VK_ASSERT(pParentHandle != nullptr); + + if (pParentHandle != nullptr) + { + pBinaryCreateInfo->pipelineInfo.enableUberFetchShader = false; + + pTempModules[tempIdx] = *pParentHandle; + + pCompiler->CreateGraphicsShaderBinary( + pDevice, ShaderStage::ShaderStageVertex, pBinaryCreateInfo, &pTempModules[tempIdx]); + + pTempModuleStages[tempIdx].stage = ShaderStage::ShaderStageVertex; + pTempModuleStages[tempIdx].needFreeBinaryOnly = true; + + ++tempIdx; + } + } + + if ((pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) && + (pLibInfo->pFragmentShaderLib != nullptr)) + { + const ShaderModuleHandle* pParentHandle = + pLibInfo->pPreRasterizationShaderLib->GetShaderModuleHandle(ShaderStage::ShaderStageFragment); + + VK_ASSERT(pParentHandle != nullptr); + + if (pParentHandle != nullptr) + { + pTempModules[tempIdx] = *pParentHandle; + + pCompiler->CreateGraphicsShaderBinary( + pDevice, ShaderStage::ShaderStageVertex, pBinaryCreateInfo, &pTempModules[tempIdx]); + + pTempModuleStages[tempIdx].stage = ShaderStage::ShaderStageFragment; + pTempModuleStages[tempIdx].needFreeBinaryOnly = true; + + ++tempIdx; + } + } + } + + for (uint32_t i = tempIdx; i < ShaderStage::ShaderStageGfxCount; ++i) + { + pTempModuleStages[i].stage = ShaderStage::ShaderStageInvalid; + } + + for (uint32_t i = 0; i < tempIdx; ++i) + { + PipelineCompiler::SetPartialGraphicsPipelineBinaryInfo( + &pTempModules[i], pTempModuleStages[i].stage, pBinaryCreateInfo); + } +} + +// ===================================================================================================================== +VkResult GraphicsPipelineLibrary::Create( + Device* pDevice, + PipelineCache* pPipelineCache, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + uint64 startTimeTicks = Util::GetPerfCpuTime(); + + VkResult result = VK_SUCCESS; + uint64_t apiPsoHash = 0; + size_t apiSize = 0; + void* pSysMem = nullptr; + + GraphicsPipelineLibraryInfo libInfo; + ExtractLibraryInfo(pCreateInfo, &libInfo); + + // 1. Get pipeline layout + VK_ASSERT(pCreateInfo->layout != VK_NULL_HANDLE); + PipelineLayout* pPipelineLayout = PipelineLayout::ObjectFromHandle(pCreateInfo->layout); + + // 2. Fill GraphicsPipelineBinaryCreateInfo + GraphicsPipelineBinaryCreateInfo binaryCreateInfo = {}; + GraphicsPipelineShaderStageInfo shaderStageInfo = {}; + ShaderModuleHandle tempModules[ShaderStage::ShaderStageGfxCount] = {}; + TempModuleState tempModuleStates[ShaderStage::ShaderStageGfxCount] = {}; + VbBindingInfo vbInfo = {}; + PipelineInternalBufferInfo internalBufferInfo = {}; + if (result == VK_SUCCESS) + { + result = BuildPipelineBinaryCreateInfo( + pDevice, + pCreateInfo, + pPipelineLayout, + pPipelineCache, + &binaryCreateInfo, + &shaderStageInfo, + &vbInfo, + &internalBufferInfo, + tempModules); + } + + // 3. Fill GraphicsPipelineObjectCreateInfo + GraphicsPipelineObjectCreateInfo objectCreateInfo = {}; + if (result == VK_SUCCESS) + { + GraphicsPipelineBinaryInfo binaryInfo = {}; + binaryInfo.pOptimizerKey = &binaryCreateInfo.pipelineProfileKey; + + BuildPipelineObjectCreateInfo( + pDevice, + pCreateInfo, + &vbInfo, + &binaryInfo, + pPipelineLayout, + &objectCreateInfo); + } + + if (result == VK_SUCCESS) + { + // 4. Create partial pipeline binary for fast-link + CreatePartialPipelineBinary( + pDevice, + &libInfo, + &shaderStageInfo, + objectCreateInfo.immedInfo.rasterizerDiscardEnable, + &binaryCreateInfo, + tempModules, + tempModuleStates); + + // 5. Create pipeline object + apiPsoHash = BuildApiHash(pCreateInfo, &objectCreateInfo); + + // Calculate object size + apiSize = sizeof(GraphicsPipelineLibrary); + size_t auxiliarySize = 0; + DumpGraphicsPipelineBinaryCreateInfo(&binaryCreateInfo, nullptr, &auxiliarySize); + + const size_t objSize = apiSize + auxiliarySize; + + // Allocate memory + pSysMem = pDevice->AllocApiObject(pAllocator, objSize); + + if (pSysMem == nullptr) + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + + if (result == VK_SUCCESS) + { + GraphicsPipelineBinaryCreateInfo* pBinInfo = + DumpGraphicsPipelineBinaryCreateInfo(&binaryCreateInfo, Util::VoidPtrInc(pSysMem, apiSize), nullptr); + + VK_PLACEMENT_NEW(pSysMem) GraphicsPipelineLibrary( + pDevice, + objectCreateInfo, + pBinInfo, + libInfo, + apiPsoHash, + tempModules, + tempModuleStates, + pPipelineLayout); + + *pPipeline = GraphicsPipelineLibrary::HandleFromVoidPointer(pSysMem); + + // Generate feedback info + PipelineCompiler* pCompiler = pDevice->GetCompiler(DefaultDeviceIndex); + + const VkPipelineCreationFeedbackCreateInfoEXT* pPipelineCreationFeedbackCreateInfo = nullptr; + pCompiler->GetPipelineCreationFeedback(static_cast(pCreateInfo->pNext), + &pPipelineCreationFeedbackCreateInfo); + + uint64_t durationTicks = Util::GetPerfCpuTime() - startTimeTicks; + uint64_t duration = vk::utils::TicksToNano(durationTicks); + pBinInfo->pipelineFeedback.feedbackValid = true; + pBinInfo->pipelineFeedback.duration = duration; + + bool hitPipelineCache = true; + bool containValidStage = false; + for (uint32_t i = 0; i < ShaderStage::ShaderStageGfxCount; ++i) + { + const bool isValidStage = (shaderStageInfo.stages[i].pModuleHandle == nullptr) ? + false : pCompiler->IsValidShaderModule(shaderStageInfo.stages[i].pModuleHandle); + containValidStage |= isValidStage; + hitPipelineCache &= ((isValidStage == false) || + (pBinInfo->stageFeedback[i].hitApplicationCache == true)); + } + pBinInfo->pipelineFeedback.hitApplicationCache = (hitPipelineCache && containValidStage); + + pCompiler->SetPipelineCreationFeedbackInfo( + pPipelineCreationFeedbackCreateInfo, + pCreateInfo->stageCount, + pCreateInfo->pStages, + &pBinInfo->pipelineFeedback, + pBinInfo->stageFeedback); + } + + return result; +} + +// ===================================================================================================================== +VkResult GraphicsPipelineLibrary::Destroy( + Device* pDevice, + const VkAllocationCallbacks* pAllocator) +{ + // Free the temporary newly-built shader modules + uint32_t newShaderCount = 0; + for (uint32_t i = 0; i < ShaderStage::ShaderStageGfxCount; ++i) + { + if ((m_tempModuleStates[i].stage != ShaderStage::ShaderStageInvalid) && + (m_tempModuleStates[i].needFreeBinaryOnly == false)) + { + newShaderCount++; + } + else + { + break; + } + } + FreeTempModules(pDevice, newShaderCount, m_tempModules); + + // Free the shader binary for the modules whose ownership is not fully belong to current library + for (uint32_t i = newShaderCount; i < ShaderStage::ShaderStageGfxCount; ++i) + { + if ((m_tempModuleStates[i].stage != ShaderStage::ShaderStageInvalid) && + m_tempModuleStates[i].needFreeBinaryOnly) + { + PipelineCompiler::FreeGraphicsShaderBinary(m_tempModules + i); + } + else + { + break; + } + } + + return Pipeline::Destroy(pDevice, pAllocator); +} + +// ===================================================================================================================== +GraphicsPipelineLibrary::GraphicsPipelineLibrary( + Device* pDevice, + const GraphicsPipelineObjectCreateInfo& objectInfo, + const GraphicsPipelineBinaryCreateInfo* pBinaryInfo, + const GraphicsPipelineLibraryInfo& libInfo, + const uint64_t apiHash, + const ShaderModuleHandle* pTempModules, + const TempModuleState* pTempModuleStates, + PipelineLayout* pPipelineLayout) + : GraphicsPipelineCommon(pDevice), + m_objectCreateInfo(objectInfo), + m_pBinaryCreateInfo(pBinaryInfo), + m_libInfo(libInfo) +{ + Pipeline::Init( + nullptr, + pPipelineLayout, + nullptr, + objectInfo.staticStateMask, + apiHash); + + memcpy(m_tempModules, pTempModules, ShaderStage::ShaderStageGfxCount * sizeof(ShaderModuleHandle)); + memcpy(m_tempModuleStates, pTempModuleStates, ShaderStage::ShaderStageGfxCount * sizeof(TempModuleState)); +} + +// ===================================================================================================================== +const ShaderModuleHandle* GraphicsPipelineLibrary::GetShaderModuleHandle( + const ShaderStage stage + ) const +{ + const ShaderModuleHandle* pHandle = nullptr; + VkGraphicsPipelineLibraryFlagsEXT libFlag = 0; + + switch (stage) + { + case ShaderStage::ShaderStageVertex: + case ShaderStage::ShaderStageTessControl: + case ShaderStage::ShaderStageTessEval: + case ShaderStage::ShaderStageGeometry: + libFlag = VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT; + break; + case ShaderStage::ShaderStageFragment: + libFlag = VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT; + break; + default: + VK_NEVER_CALLED(); + break; + } + + // Find shader module handle from temp modules in current library + if (libFlag & m_libInfo.libFlags) + { + for (uint32_t i = 0; i < ShaderStage::ShaderStageGfxCount; ++i) + { + if (stage == m_tempModuleStates[i].stage) + { + pHandle = m_tempModules + i; + break; + } + else if (ShaderStageInvalid == m_tempModuleStates[i].stage) + { + break; + } + } + } + // Find the shader module handle from parent library + else if (libFlag & (m_pBinaryCreateInfo->libFlags & ~m_libInfo.libFlags)) + { + if (libFlag == VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) + { + pHandle = m_libInfo.pPreRasterizationShaderLib->GetShaderModuleHandle(stage); + } + else if (libFlag == VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) + { + pHandle = m_libInfo.pFragmentShaderLib->GetShaderModuleHandle(stage); + } + } + + return pHandle; +} + +} diff --git a/icd/api/vk_image.cpp b/icd/api/vk_image.cpp index ccb3c104..5f37092f 100644 --- a/icd/api/vk_image.cpp +++ b/icd/api/vk_image.cpp @@ -216,9 +216,10 @@ void Image::ConvertImageCreateInfo( const ResourceOptimizerKey& resourceKey, Pal::ImageCreateInfo* pPalCreateInfo) { - VkImageUsageFlags imageUsage = pCreateInfo->usage; - const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); - VkFormat createInfoFormat = GetCreateInfoFormat(pCreateInfo, extStructs); + VkImageUsageFlags imageUsage = pCreateInfo->usage; + const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); + const Pal::DeviceProperties& palProperties = pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties(); + VkFormat createInfoFormat = GetCreateInfoFormat(pCreateInfo, extStructs); // VK_IMAGE_CREATE_EXTENDED_USAGE_BIT indicates that the image can be created with usage flags that are not // supported for the format the image is created with but are supported for at least one format a VkImageView @@ -243,6 +244,7 @@ void Image::ConvertImageCreateInfo( pPalCreateInfo->extent.width = pCreateInfo->extent.width; pPalCreateInfo->extent.height = pCreateInfo->extent.height; + pPalCreateInfo->extent.depth = pCreateInfo->extent.depth; pPalCreateInfo->imageType = VkToPalImageType(pCreateInfo->imageType); pPalCreateInfo->swizzledFormat = VkToPalFormat(createInfoFormat, settings); @@ -255,7 +257,7 @@ void Image::ConvertImageCreateInfo( if ((pPalCreateInfo->tilingOptMode == Pal::TilingOptMode::OptForSpace) && Pal::Formats::IsBlockCompressed(pPalCreateInfo->swizzledFormat.format) && - (pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().gfxLevel > Pal::GfxIpLevel::GfxIp9)) + (palProperties.gfxLevel > Pal::GfxIpLevel::GfxIp9)) { pPalCreateInfo->tilingOptMode = Pal::TilingOptMode::Balanced; } @@ -270,7 +272,7 @@ void Image::ConvertImageCreateInfo( pPalCreateInfo->tilingPreference = settings.imageTilingPreference; } - pPalCreateInfo->flags.u32All = VkToPalImageCreateFlags(pCreateInfo->flags, createInfoFormat); + pPalCreateInfo->flags.u32All = VkToPalImageCreateFlags(pCreateInfo->flags, createInfoFormat, imageUsage); pPalCreateInfo->usageFlags = VkToPalImageUsageFlags( imageUsage, pCreateInfo->samples, @@ -409,7 +411,7 @@ void Image::ConvertImageCreateInfo( // Don't force DCC to be enabled for performance reasons unless the image is larger than the minimum size set for // compression, another performance optimization. // Don't force DCC to be enabled for shader write image on pre-gfx10 ASICs as DCC is unsupported in shader write. - const Pal::GfxIpLevel gfxLevel = pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().gfxLevel; + const Pal::GfxIpLevel gfxLevel = palProperties.gfxLevel; if (((pPalCreateInfo->extent.width * pPalCreateInfo->extent.height) > (settings.disableSmallSurfColorCompressionSize * settings.disableSmallSurfColorCompressionSize)) && (Formats::IsColorFormat(createInfoFormat)) && diff --git a/icd/api/vk_image_view.cpp b/icd/api/vk_image_view.cpp index 3ae204fa..97a78b3c 100644 --- a/icd/api/vk_image_view.cpp +++ b/icd/api/vk_image_view.cpp @@ -316,6 +316,7 @@ Pal::Result ImageView::BuildColorTargetView( // ===================================================================================================================== Pal::Result ImageView::BuildDepthStencilView( + const Device* pDevice, const Pal::IDevice* pPalDevice, const Pal::IImage* pPalImage, VkImageViewType viewType, @@ -365,6 +366,13 @@ Pal::Result ImageView::BuildDepthStencilView( depthInfo.arraySize = zRange.extent; } +#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 730 + if (pDevice->GetAppProfile() == AppProfile::AngleEngine) + { + depthInfo.flags.lowZplanePolyOffsetBits = 1; + } +#endif + Pal::Result result = pPalDevice->CreateDepthStencilView( depthInfo, pPalViewMemory, @@ -647,7 +655,8 @@ VkResult ImageView::Create( void* pPalMem = Util::VoidPtrInc(pMemory, depthViewSegmentOffset + (depthViewSegmentSize * deviceIdx)); - result = BuildDepthStencilView(pDevice->PalDevice(deviceIdx), + result = BuildDepthStencilView(pDevice, + pDevice->PalDevice(deviceIdx), pImage->PalImage(deviceIdx), pCreateInfo->viewType, imageViewUsage, diff --git a/icd/api/vk_instance.cpp b/icd/api/vk_instance.cpp index 05729446..b268644e 100644 --- a/icd/api/vk_instance.cpp +++ b/icd/api/vk_instance.cpp @@ -179,7 +179,7 @@ VkResult Instance::Create( if (!InstanceExtensions::EnableExtensions(pCreateInfo->ppEnabledExtensionNames, pCreateInfo->enabledExtensionCount, Instance::GetSupportedExtensions(), - enabledInstanceExtensions)) + &enabledInstanceExtensions)) { return VK_ERROR_EXTENSION_NOT_PRESENT; } @@ -541,10 +541,7 @@ VkResult Instance::Init( m_logTagIdMask = pPhysicalDevice->GetRuntimeSettings().logTagIdMask; AmdvlkLog(m_logTagIdMask, GeneralPrint, "%s Begin ********\n", GetApplicationName()); - } - if (status == VK_SUCCESS) - { InitDispatchTable(); } diff --git a/icd/api/vk_memory.cpp b/icd/api/vk_memory.cpp index b249e407..9df5ceca 100644 --- a/icd/api/vk_memory.cpp +++ b/icd/api/vk_memory.cpp @@ -113,7 +113,7 @@ VkResult Memory::Create( createInfo.heapCount = 1; createInfo.heaps[0] = pDevice->GetPalHeapFromVkTypeIndex(pAllocInfo->memoryTypeIndex); - if (pDevice->ShouldAddRemoteBackupHeap(DefaultDeviceIndex, pAllocInfo->memoryTypeIndex, createInfo.heaps[0])) + if (pDevice->OverallocationRequestedForPalHeap(createInfo.heaps[0])) { createInfo.heaps[createInfo.heapCount++] = Pal::GpuHeapGartUswc; @@ -152,7 +152,7 @@ VkResult Memory::Create( pDevice->VkPhysicalDevice(DefaultDeviceIndex)->IsOverrideHeapChoiceToLocalWithinBudget(createInfo.size)) { // When this setting is active (not supported by MGPU), prefer local visible before the requested heap until - // the allowable budget for it is reached. ShouldAddRemoteBackupHeap's choice may be updated here. + // the allowable budget for it is reached. OverallocationRequestedForPalHeap's choice may be updated here. createInfo.heaps[1] = createInfo.heaps[0]; createInfo.heaps[0] = Pal::GpuHeapLocal; } diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index 1bdab1c1..4a5cc8ed 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -352,7 +352,7 @@ PhysicalDevice::PhysicalDevice( memset(&m_queueFamilies, 0, sizeof(m_queueFamilies)); memset(&m_memoryProperties, 0, sizeof(m_memoryProperties)); memset(&m_gpaProps, 0, sizeof(m_gpaProps)); - memset(&m_memoryVkIndexAddRemoteBackupHeap, 0, sizeof(m_memoryVkIndexAddRemoteBackupHeap)); + for (uint32_t i = 0; i < Pal::GpuHeapCount; i++) { m_memoryPalHeapToVkIndexBits[i] = 0; // invalid bits @@ -874,18 +874,6 @@ VkResult PhysicalDevice::Initialize() if (m_memoryProperties.memoryHeaps[heapIndex].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { pMemoryType->propertyFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - - if (m_memoryProperties.memoryHeapCount > 1) - { - // Add back-up heap for parts with VRAM smaller than 8GB. - // Note: The back-up heap can also be added if overallocation is allowed via - // VK_AMD_memory_overallocation_behavior. - // Use m_heapVkToPal instead of palGpuHeap here to handle cases where multiple memory types - // share the same heap. - const Pal::gpusize heapSize = heapProperties[m_heapVkToPal[pMemoryType->heapIndex]].heapSize; - m_memoryVkIndexAddRemoteBackupHeap[memoryTypeIndex] = - (heapSize < settings.memoryRemoteBackupHeapMinHeapSize); - } } if (m_properties.gfxipProperties.flags.supportGl2Uncached) @@ -975,9 +963,6 @@ VkResult PhysicalDevice::Initialize() m_memoryTypeMask |= 1 << m_memoryProperties.memoryTypeCount; - m_memoryVkIndexAddRemoteBackupHeap[m_memoryProperties.memoryTypeCount] = - m_memoryVkIndexAddRemoteBackupHeap[memoryTypeIndex]; - ++m_memoryProperties.memoryTypeCount; } } @@ -1477,9 +1462,7 @@ size_t PhysicalDevice::GetFeatures( pFeatures->shaderInt64 = (PalProperties().gfxipProperties.flags.support64BitInstructions ? VK_TRUE : VK_FALSE); - if ((PalProperties().gfxipProperties.flags.support16BitInstructions) && - ((settings.optOnlyEnableFP16ForGfx9Plus == false) || - (PalProperties().gfxLevel >= Pal::GfxIpLevel::GfxIp9))) + if (PalProperties().gfxipProperties.flags.support16BitInstructions) { pFeatures->shaderInt16 = VK_TRUE; } @@ -3862,6 +3845,8 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_DYNAMIC_RENDERING)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_PIPELINE_LIBRARY)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_SHADER_INTEGER_DOT_PRODUCT)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_COPY_COMMANDS2)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW)); @@ -3917,16 +3902,13 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(AMD_SHADER_IMAGE_LOAD_STORE_LOD)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(AMD_SHADER_INFO)); + if ((pPhysicalDevice == nullptr) || pPhysicalDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead) { - if ((pPhysicalDevice == nullptr) || pPhysicalDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead) - { - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(AMD_SHADER_FRAGMENT_MASK)); - } + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(AMD_SHADER_FRAGMENT_MASK)); } -#if VK_IS_PAL_VERSION_AT_LEAST(664, 1) + if ((pPhysicalDevice == nullptr) || (pPhysicalDevice->PalProperties().gfxipProperties.flags.supportTextureGatherBiasLod)) -#endif { availableExtensions.AddExtension(VK_DEVICE_EXTENSION(AMD_TEXTURE_GATHER_BIAS_LOD)); } @@ -3950,18 +3932,14 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( } if ((pPhysicalDevice == nullptr) || - ((pPhysicalDevice->PalProperties().gfxipProperties.flags.support16BitInstructions) && - ((pPhysicalDevice->GetRuntimeSettings().optOnlyEnableFP16ForGfx9Plus == false) || - (pPhysicalDevice->PalProperties().gfxLevel >= Pal::GfxIpLevel::GfxIp9)))) + (pPhysicalDevice->PalProperties().gfxipProperties.flags.support16BitInstructions)) { // Deprecation by shaderFloat16 from VK_KHR_shader_float16_int8 availableExtensions.AddExtension(VK_DEVICE_EXTENSION(AMD_GPU_SHADER_HALF_FLOAT)); } if ((pPhysicalDevice == nullptr) || - ((pPhysicalDevice->PalProperties().gfxipProperties.flags.support16BitInstructions) && - ((pPhysicalDevice->GetRuntimeSettings().optOnlyEnableFP16ForGfx9Plus == false) || - (pPhysicalDevice->PalProperties().gfxLevel >= Pal::GfxIpLevel::GfxIp9)))) + (pPhysicalDevice->PalProperties().gfxipProperties.flags.support16BitInstructions)) { // Deprecation by shaderFloat16 from VK_KHR_shader_float16_int8 and shaderInt16 availableExtensions.AddExtension(VK_DEVICE_EXTENSION(AMD_GPU_SHADER_INT16)); @@ -4135,7 +4113,7 @@ void PhysicalDevice::PopulateQueueFamilies() case Pal::EngineTypeUniversal: palImageLayoutFlag = Pal::LayoutUniversalEngine; transferGranularityOverride = settings.transferGranularityUniversalOverride; - m_queueFamilies[m_queueFamilyCount].validShaderStages = VK_SHADER_STAGE_ALL_GRAPHICS | + m_queueFamilies[m_queueFamilyCount].validShaderStages = ShaderStageAllGraphics | VK_SHADER_STAGE_COMPUTE_BIT; break; case Pal::EngineTypeCompute: @@ -4554,9 +4532,7 @@ void PhysicalDevice::GetPhysicalDeviceDotProduct16Properties( VkBool32* pIntegerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated ) const { - const VkBool32 int16DotSupport = ((PalProperties().gfxipProperties.flags.support16BitInstructions) && - ((GetRuntimeSettings().optOnlyEnableFP16ForGfx9Plus == false) || - (PalProperties().gfxLevel >= Pal::GfxIpLevel::GfxIp9)) + const VkBool32 int16DotSupport = ((PalProperties().gfxipProperties.flags.support16BitInstructions) ) ? VK_TRUE : VK_FALSE; *pIntegerDotProduct16BitUnsignedAccelerated = int16DotSupport; @@ -4848,9 +4824,7 @@ void PhysicalDevice::GetPhysicalDevice16BitStorageFeatures( // Currently we seem to only support 16-bit inputs/outputs on ASICs supporting // 16-bit ALU. It's unclear at this point whether we can do any better. - if (PalProperties().gfxipProperties.flags.support16BitInstructions && - ((GetRuntimeSettings().optOnlyEnableFP16ForGfx9Plus == false) || - (PalProperties().gfxLevel >= Pal::GfxIpLevel::GfxIp9))) + if (PalProperties().gfxipProperties.flags.support16BitInstructions) { *pStorageInputOutput16 = VK_TRUE; } @@ -5986,6 +5960,22 @@ size_t PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_FEATURES_EXT: + { + if (IsExtensionSupported(DeviceExtensions::EXT_GRAPHICS_PIPELINE_LIBRARY)) + { + auto* pExtInfo = reinterpret_cast(pHeader); + + if (updateFeatures) + { + pExtInfo->graphicsPipelineLibrary = VK_TRUE; + } + + structSize = sizeof(*pExtInfo); + } + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: { auto* pExtInfo = reinterpret_cast(pHeader); @@ -6900,6 +6890,17 @@ void PhysicalDevice::GetDeviceProperties2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_PROPERTIES_EXT: + { + if (IsExtensionSupported(DeviceExtensions::EXT_GRAPHICS_PIPELINE_LIBRARY)) + { + auto* pProps = static_cast(pNext); + pProps->graphicsPipelineLibraryFastLinking = VK_TRUE; + pProps->graphicsPipelineLibraryIndependentInterpolationDecoration = VK_TRUE; + } + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES_KHR: { auto* pProps = static_cast(pNext); diff --git a/icd/api/vk_pipeline.cpp b/icd/api/vk_pipeline.cpp index cac956ad..fe903271 100644 --- a/icd/api/vk_pipeline.cpp +++ b/icd/api/vk_pipeline.cpp @@ -162,7 +162,7 @@ VkResult Pipeline::BuildShaderStageInfo( const Device* pDevice, const uint32_t stageCount, const VkPipelineShaderStageCreateInfo* pStages, - const bool duplicateExistingModules, + const bool isLibrary, uint32_t (*pfnGetOutputIdx)(const uint32_t inputIdx, const uint32_t stageIdx), ShaderStageInfo* pShaderStageInfo, @@ -176,6 +176,9 @@ VkResult Pipeline::BuildShaderStageInfo( uint32_t numNewModules = 0; + const bool duplicateExistingModules = isLibrary; + const bool adaptForFaskLink = isLibrary; + for (uint32_t i = 0; i < stageCount; ++i) { const VkPipelineShaderStageCreateInfo& stageInfo = pStages[i]; @@ -206,7 +209,7 @@ VkResult Pipeline::BuildShaderStageInfo( if (stageInfo.module != VK_NULL_HANDLE) { - // TODO: It's better to copy the compiled shader modules rather than compile them again. + // Shader needs to be recompiled with additional options for compatibility with fast-link mode const ShaderModule* pModule = ShaderModule::ObjectFromHandle(stageInfo.module); codeSize = pModule->GetCodeSize(); pCode = pModule->GetCode(); @@ -230,7 +233,8 @@ VkResult Pipeline::BuildShaderStageInfo( PipelineBinaryCache* pBinaryCache = (pCache == nullptr) ? nullptr : pCache->GetPipelineCache(); result = pCompiler->BuildShaderModule( - pDevice, flags, codeSize, pCode, pBinaryCache, pShaderFeedback, &pTempModules[numNewModules]); + pDevice,flags, codeSize, pCode, adaptForFaskLink, + pBinaryCache, pShaderFeedback, &pTempModules[numNewModules]); if (result != VK_SUCCESS) { diff --git a/icd/api/vk_pipeline_layout.cpp b/icd/api/vk_pipeline_layout.cpp index 28b6a064..78a6397b 100644 --- a/icd/api/vk_pipeline_layout.cpp +++ b/icd/api/vk_pipeline_layout.cpp @@ -41,6 +41,26 @@ namespace vk { +// ===================================================================================================================== +// Returns TRUE if user data should be reserved for uber fetch shader constant buffer +// This function doesn't control whether uber fetch shader is used in practice. There would be no additional user data +// or SGPR allocated if user data is reserved while uber fetch shader is not used during pipeline creation. +template +static bool IsUberFetchShaderEnabled(const Device* pDevice) +{ + bool enabled = false; + + if (pDevice->GetRuntimeSettings().enableUberFetchShader || + (pDevice->GetRuntimeSettings().enableEarlyCompile && (scheme == PipelineLayoutScheme::Compact)) || + pDevice->IsExtensionEnabled(DeviceExtensions::EXT_GRAPHICS_PIPELINE_LIBRARY) + ) + { + enabled = true; + } + + return enabled; +} + // ===================================================================================================================== // Generates the API hash using the contents of the VkPipelineLayoutCreateInfo struct uint64_t PipelineLayout::BuildApiHash( @@ -204,28 +224,32 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( // Finally, the vertex buffer table pointer is in the last user data register when applicable. // This allocation allows the descriptor set bindings to easily persist across pipeline switches. - VkResult result = VK_SUCCESS; - auto* pUserDataLayout = &pInfo->userDataLayout.compact; + VkResult result = VK_SUCCESS; + auto* pUserDataLayout = &pInfo->userDataLayout.compact; + + const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); memset(pPipelineInfo, 0, sizeof(PipelineInfo)); memset(&(pInfo->userDataLayout), 0, sizeof(UserDataLayout)); pInfo->userDataLayout.scheme = PipelineLayoutScheme::Compact; - // Always allocates 1 extra user data node for the vertex buffer table pointer - pPipelineInfo->numUserDataNodes = 1; - - if (pDevice->GetRuntimeSettings().enableEarlyCompile) + if (settings.enableEarlyCompile) { // Early compile mode will enable uber-fetch shader and spec constant buffer on vertex shader and - // fragment shader implicitly. so we need three reserved node. + // fragment shader implicitly. So we need three reserved node. + // Each buffer consume 2 user data register now. pPipelineInfo->numUserDataNodes += 3; - pInfo->userDataRegCount += 6; // Each buffer consume 2 user data register now. + pInfo->userDataRegCount += 3 * InternalConstBufferRegCount; + + pUserDataLayout->uberFetchConstBufRegBase = FetchShaderInternalBufferOffset; + pUserDataLayout->specConstBufVertexRegBase = SpecConstBufferVertexOffset; + pUserDataLayout->specConstBufFragmentRegBase = SpecConstBufferFragmentOffset; } - else if (pDevice->GetRuntimeSettings().enableUberFetchShader) + else { - // Reserve one user data nodes for uber-fetch shader. - pPipelineInfo->numUserDataNodes += 1; - pInfo->userDataRegCount += 2; + pUserDataLayout->uberFetchConstBufRegBase = InvalidReg; + pUserDataLayout->specConstBufVertexRegBase = InvalidReg; + pUserDataLayout->specConstBufFragmentRegBase = InvalidReg; } VK_ASSERT(pIn->setLayoutCount <= MaxDescriptorSets); @@ -233,6 +257,8 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( // Total number of dynamic descriptors across all descriptor sets uint32_t totalDynDescCount = 0; + const uint32_t pushConstRegCount = pushConstantsSizeInBytes / sizeof(uint32_t); + // Populate user data layouts for each descriptor set that is active pUserDataLayout->setBindingRegBase = pInfo->userDataRegCount; @@ -252,42 +278,45 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( // Test if this set is active in at least one stage if (setLayoutInfo.activeStageMask != 0) { - // Accumulate the space needed by all resource nodes for this set - pPipelineInfo->numRsrcMapNodes += setLayoutInfo.sta.numRsrcMapNodes; - - // Add count for FMASK nodes - if (pDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead) { + // Accumulate the space needed by all resource nodes for this set pPipelineInfo->numRsrcMapNodes += setLayoutInfo.sta.numRsrcMapNodes; - } - // Add space for the user data node entries needed for dynamic descriptors - pPipelineInfo->numUserDataNodes += setLayoutInfo.dyn.numRsrcMapNodes; + // Add count for FMASK nodes + if (settings.enableFmaskBasedMsaaRead) + { + pPipelineInfo->numRsrcMapNodes += setLayoutInfo.sta.numRsrcMapNodes; + } - // Add space for immutable sampler descriptor storage needed by the set - pPipelineInfo->numDescRangeValueNodes += setLayoutInfo.imm.numDescriptorValueNodes; + // Add space for the user data node entries needed for dynamic descriptors + pPipelineInfo->numUserDataNodes += setLayoutInfo.dyn.numRsrcMapNodes; - // Reserve user data register space for dynamic descriptor data - pSetUserData->dynDescDataRegOffset = pSetUserData->firstRegOffset + pSetUserData->totalRegCount; + // Add space for immutable sampler descriptor storage needed by the set + pPipelineInfo->numDescRangeValueNodes += setLayoutInfo.imm.numDescriptorValueNodes; - pSetUserData->totalRegCount += pSetUserData->dynDescCount * DescriptorSetLayout::GetDynamicBufferDescDwSize(pDevice); + // Reserve user data register space for dynamic descriptor data + pSetUserData->dynDescDataRegOffset = pSetUserData->firstRegOffset + pSetUserData->totalRegCount; - totalDynDescCount += setLayoutInfo.numDynamicDescriptors; + pSetUserData->totalRegCount += pSetUserData->dynDescCount * + DescriptorSetLayout::GetDynamicBufferDescDwSize(pDevice); - if (setLayoutInfo.sta.numRsrcMapNodes > 0) - { - // If the set has a static portion reserve an extra user data node entry for the set pointer - pPipelineInfo->numUserDataNodes++; + totalDynDescCount += setLayoutInfo.numDynamicDescriptors; - // In this case we also reserve the user data for the set pointer - pSetUserData->setPtrRegOffset = pSetUserData->firstRegOffset + pSetUserData->totalRegCount; - pSetUserData->totalRegCount += SetPtrRegCount; + if (setLayoutInfo.sta.numRsrcMapNodes > 0) + { + // If the set has a static portion reserve an extra user data node entry for the set pointer + pPipelineInfo->numUserDataNodes++; + + // In this case we also reserve the user data for the set pointer + pSetUserData->setPtrRegOffset = pSetUserData->firstRegOffset + pSetUserData->totalRegCount; + pSetUserData->totalRegCount += SetPtrRegCount; + } } } // Add the number of user data regs used by this set to the total count for the whole layout pInfo->userDataRegCount += pSetUserData->totalRegCount; - if (pDevice->GetRuntimeSettings().pipelineLayoutMode == PipelineLayoutAngle) + if (settings.pipelineLayoutMode == PipelineLayoutAngle) { // Force next set firstRegOffset align to AngleDescPattern. if ((i + 1) < Util::ArrayLen(AngleDescPattern::DescriptorSetOffset)) @@ -308,8 +337,6 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( // Allocate user data for push constants pPipelineInfo->numUserDataNodes += pushConstantsUserDataNodeCount; - uint32_t pushConstRegCount = pushConstantsSizeInBytes / sizeof(uint32_t); - pUserDataLayout->pushConstRegBase = pInfo->userDataRegCount; pUserDataLayout->pushConstRegCount = pushConstRegCount; pInfo->userDataRegCount += pushConstRegCount; @@ -323,8 +350,23 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( pPipelineInfo->numUserDataNodes += 1; } + // Reserve user data nodes for vertex buffer table + // Info::userDataRegCount is not increased since this node is always appended at the bottom of user data table + // Same for constant buffer for uber-fetch shader + pPipelineInfo->numUserDataNodes += 1; // In case we need an internal vertex buffer table, add nodes required for its entries, and its set pointer. - pPipelineInfo->numRsrcMapNodes += Pal::MaxVertexBuffers; + pPipelineInfo->numRsrcMapNodes += Pal::MaxVertexBuffers; + + // If uber-fetch shader is not enabled for early compile, the user data entries for uber-fetch shader const + // buffer is appended at the bottom of user data table. Just following vertex buffer table. + if (IsUberFetchShaderEnabled(pDevice) && + (pDevice->GetRuntimeSettings().enableEarlyCompile == false)) + { + VK_ASSERT(pUserDataLayout->uberFetchConstBufRegBase == InvalidReg); + + pUserDataLayout->uberFetchConstBufRegBase = pInfo->userDataRegCount + VbTablePtrRegCount; + pPipelineInfo->numUserDataNodes += 1; + } // Calculate the buffer size necessary for all resource mapping pPipelineInfo->mappingBufferSize = @@ -357,14 +399,14 @@ VkResult PipelineLayout::BuildIndirectSchemeInfo( // // The user data registers for various resources is allocated in the following fashion: // 1. one user data entry for the vertex buffer table pointer - // 2. one user data entry for the push constant buffer pointer - // 3. one user data entry for transform feedback buffer (if extension is enabled) - // 5. MaxDescriptorSets sets of user data entries which store the information for each descriptor set. Each set + // 2. two user data entries for constant buffer required by uber-fetch shader (if extension or setting require) + // 3. one user data entry for the push constant buffer pointer + // 4. one user data entry for transform feedback buffer (if extension is enabled) + // 6. MaxDescriptorSets sets of user data entries which store the information for each descriptor set. Each set // contains 2 user data entry: the 1st is for the dynamic descriptors and the 2nd is for static descriptors. // // TODO: The following features have not been supported by indirect scheme: - // 1. Uber-fetch shader - // 2. PipelineLayoutAngle mode + // 1. PipelineLayoutAngle mode VK_ASSERT(pIn->setLayoutCount <= MaxDescriptorSets); VK_ASSERT(pDevice->GetRuntimeSettings().pipelineLayoutMode != PipelineLayoutAngle); @@ -385,6 +427,14 @@ VkResult PipelineLayout::BuildIndirectSchemeInfo( pPipelineInfo->numRsrcMapNodes += Pal::MaxVertexBuffers; pInfo->userDataRegCount += 1; + // Allocate user data for constant buffer used by uber-fetch shader + if (IsUberFetchShaderEnabled(pDevice)) + { + pUserDataLayout->uberFetchConstBufRegBase = pInfo->userDataRegCount; + pPipelineInfo->numUserDataNodes += 1; + pInfo->userDataRegCount += InternalConstBufferRegCount; + } + // Allocate user data for push constant buffer pointer pUserDataLayout->pushConstPtrRegBase = pInfo->userDataRegCount; pUserDataLayout->pushConstSizeInDword = pushConstantsSizeInBytes / sizeof(uint32_t); @@ -496,6 +546,11 @@ PipelineLayoutScheme PipelineLayout::DeterminePipelineLayoutScheme( switch (settings.pipelineLayoutSchemeSelectionStrategy) { case AppControlled: + if (pIn->flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT) + { + scheme = PipelineLayoutScheme::Indirect; + } + else { scheme = PipelineLayoutScheme::Compact; } @@ -582,6 +637,12 @@ VkResult PipelineLayout::Create( { const DescriptorSetLayout* pLayout = DescriptorSetLayout::ObjectFromHandle(pCreateInfo->pSetLayouts[i]); + // If VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT is not set, pLayout must be a valid handle + if ((pCreateInfo->flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT) == 0) + { + VK_ASSERT(pLayout != nullptr); + } + if (pLayout != nullptr) { ppSetLayouts[i] = reinterpret_cast(Util::VoidPtrInc(pSysMem, currentSetLayoutOffset)); @@ -663,155 +724,79 @@ Vkgc::ResourceMappingNodeType PipelineLayout::MapLlpcResourceNodeType( } // ===================================================================================================================== -// Builds the VKGC resource mapping nodes for the static descriptors in a descriptor set -void PipelineLayout::BuildLlpcStaticSetMapping( - const DescriptorSetLayout* pLayout, - const uint32_t visibility, - const uint32_t setIndex, - Vkgc::ResourceMappingNode* pNodes, - uint32_t* pNodeCount, - Vkgc::StaticDescriptorValue* pDescriptorRangeValue, - uint32_t* pDescriptorRangeCount +// Builds the VKGC resource mapping node for a static descriptor in a descriptor set +void PipelineLayout::BuildLlpcStaticMapping( + const DescriptorSetLayout* pLayout, + const uint32_t visibility, + const uint32_t setIndex, + const DescriptorSetLayout::BindingInfo& binding, + Vkgc::ResourceMappingNode* pNode, + Vkgc::StaticDescriptorValue* pDescriptorRangeValue, + uint32_t* pDescriptorRangeCount ) const { - *pNodeCount = 0; - *pDescriptorRangeCount = 0; - for (uint32_t bindingIndex = 0; bindingIndex < pLayout->Info().count; ++bindingIndex) + pNode->type = MapLlpcResourceNodeType(binding.info.descriptorType); + pNode->offsetInDwords = binding.sta.dwOffset; + pNode->sizeInDwords = binding.sta.dwSize; + pNode->srdRange.binding = binding.info.binding; + pNode->srdRange.set = setIndex; + + if (binding.imm.dwSize > 0) { - const DescriptorSetLayout::BindingInfo& binding = pLayout->Binding(bindingIndex); + const uint32_t arraySize = binding.imm.dwSize / binding.imm.dwArrayStride; + const uint32_t* pImmutableSamplerData = pLayout->Info().imm.pImmutableSamplerData + + binding.imm.dwOffset; - if (binding.sta.dwSize > 0) + if (binding.bindingFlags.ycbcrConversionUsage == 0) { - Vkgc::ResourceMappingNode* pNode = pNodes + *pNodeCount; - - pNode->type = MapLlpcResourceNodeType(binding.info.descriptorType); - pNode->offsetInDwords = binding.sta.dwOffset; - pNode->sizeInDwords = binding.sta.dwSize; - pNode->srdRange.binding = binding.info.binding; - pNode->srdRange.set = setIndex; - (*pNodeCount)++; - - if (binding.imm.dwSize > 0) - { - const uint32_t arraySize = binding.imm.dwSize / binding.imm.dwArrayStride; - const uint32_t* pImmutableSamplerData = pLayout->Info().imm.pImmutableSamplerData + - binding.imm.dwOffset; + pDescriptorRangeValue->type = Vkgc::ResourceMappingNodeType::DescriptorSampler; + } + else + { + pNode->type = Vkgc::ResourceMappingNodeType::DescriptorYCbCrSampler; + pDescriptorRangeValue->type = Vkgc::ResourceMappingNodeType::DescriptorYCbCrSampler; + } - if (binding.bindingFlags.ycbcrConversionUsage == 0) - { - pDescriptorRangeValue->type = Vkgc::ResourceMappingNodeType::DescriptorSampler; - } - else - { - pNode->type = Vkgc::ResourceMappingNodeType::DescriptorYCbCrSampler; - pDescriptorRangeValue->type = Vkgc::ResourceMappingNodeType::DescriptorYCbCrSampler; - } + pDescriptorRangeValue->set = setIndex; + pDescriptorRangeValue->binding = binding.info.binding; + pDescriptorRangeValue->pValue = pImmutableSamplerData; + pDescriptorRangeValue->arraySize = arraySize; + pDescriptorRangeValue->visibility = visibility; - pDescriptorRangeValue->set = setIndex; - pDescriptorRangeValue->binding = binding.info.binding; - pDescriptorRangeValue->pValue = pImmutableSamplerData; - pDescriptorRangeValue->arraySize = arraySize; - pDescriptorRangeValue->visibility = visibility; - ++pDescriptorRangeValue; - ++(*pDescriptorRangeCount); - } - } + ++(*pDescriptorRangeCount); } } // ===================================================================================================================== -// Fill a root resource mapping node for a dynamic descriptor node -template <> -void PipelineLayout::FillDynamicSetNode( - const Vkgc::ResourceMappingNodeType type, - const uint32_t visibility, +// Builds the VKGC resource mapping node for a dynamic descriptor in a descriptor set +void PipelineLayout::BuildLlpcDynamicMapping( const uint32_t setIndex, - const DescriptorSetLayout::BindingInfo& binding, const uint32_t userDataRegBase, - Vkgc::ResourceMappingRootNode* pNode - ) const -{ - pNode->node.type = type; - pNode->node.offsetInDwords = userDataRegBase + binding.dyn.dwOffset; - pNode->node.sizeInDwords = binding.dyn.dwSize; - pNode->node.srdRange.binding = binding.info.binding; - pNode->node.srdRange.set = setIndex; - pNode->visibility = visibility; -} - -// ===================================================================================================================== -// Fill a normal resource mapping node for a dynamic descriptor node -template <> -void PipelineLayout::FillDynamicSetNode( - const Vkgc::ResourceMappingNodeType type, - const uint32_t visibility, - const uint32_t setIndex, const DescriptorSetLayout::BindingInfo& binding, - const uint32_t userDataRegBase, Vkgc::ResourceMappingNode* pNode ) const { - pNode->type = type; - pNode->offsetInDwords = binding.dyn.dwOffset; - pNode->sizeInDwords = binding.dyn.dwSize; - pNode->srdRange.binding = binding.info.binding; - pNode->srdRange.set = setIndex; -} - -// ===================================================================================================================== -// Builds the VKGC resource mapping nodes for the dynamic descriptors in a descriptor set -template -void PipelineLayout::BuildLlpcDynamicSetMapping( - const DescriptorSetLayout* pLayout, - const uint32_t visibility, - const uint32_t setIndex, - const uint32_t userDataRegBase, - NodeType* pNodes, - uint32_t* pNodeCount - ) const -{ - static_assert(std::is_same::value || - std::is_same::value, - "Unexpected resouce mapping node type!"); - - *pNodeCount = 0; + VK_ASSERT((binding.info.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) || + (binding.info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)); - for (uint32_t bindingIndex = 0; bindingIndex < pLayout->Info().count; ++bindingIndex) + if (binding.info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) { - const DescriptorSetLayout::BindingInfo& binding = pLayout->Binding(bindingIndex); - - if (binding.dyn.dwSize > 0) - { - VK_ASSERT((binding.info.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) || - (binding.info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)); - - Vkgc::ResourceMappingNodeType nodeType = Vkgc::ResourceMappingNodeType::Unknown; - if (binding.info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) - { - nodeType = (binding.dyn.dwArrayStride == 2) ? - Vkgc::ResourceMappingNodeType::DescriptorBufferCompact : - Vkgc::ResourceMappingNodeType::DescriptorBuffer; - - } - else - { - nodeType = (binding.dyn.dwArrayStride == 2) ? - Vkgc::ResourceMappingNodeType::DescriptorConstBufferCompact : - Vkgc::ResourceMappingNodeType::DescriptorConstBuffer; - } - - FillDynamicSetNode( - nodeType, - visibility, - setIndex, - binding, - userDataRegBase, - pNodes + *pNodeCount); - - (*pNodeCount)++; - } + pNode->type = (binding.dyn.dwArrayStride == 2) ? + Vkgc::ResourceMappingNodeType::DescriptorBufferCompact : + Vkgc::ResourceMappingNodeType::DescriptorBuffer; } + else + { + pNode->type = (binding.dyn.dwArrayStride == 2) ? + Vkgc::ResourceMappingNodeType::DescriptorConstBufferCompact : + Vkgc::ResourceMappingNodeType::DescriptorConstBuffer; + } + + pNode->offsetInDwords = userDataRegBase + binding.dyn.dwOffset; + pNode->sizeInDwords = binding.dyn.dwSize; + pNode->srdRange.binding = binding.info.binding; + pNode->srdRange.set = setIndex; } // ===================================================================================================================== @@ -824,8 +809,6 @@ void PipelineLayout::BuildLlpcVertexBufferTableMapping( uint32_t* pNodeCount ) const { - *pNodeCount = 0; - if (pVbInfo != nullptr) { // Build the table description itself @@ -839,7 +822,7 @@ void PipelineLayout::BuildLlpcVertexBufferTableMapping( pNode->node.userDataPtr.sizeInDwords = vbTableSize; pNode->visibility = Vkgc::ShaderStageVertexBit; - *pNodeCount = 1; + ++(*pNodeCount); } } @@ -856,8 +839,6 @@ void PipelineLayout::BuildLlpcTransformFeedbackMapping( uint32_t xfbStages = (stageMask & (Vkgc::ShaderStageFragmentBit - 1)) >> 1; uint32_t lastXfbStageBit = Vkgc::ShaderStageVertexBit; - *pNodeCount = 0; - while (xfbStages > 0) { lastXfbStageBit <<= 1; @@ -871,12 +852,34 @@ void PipelineLayout::BuildLlpcTransformFeedbackMapping( pNode->node.sizeInDwords = sizeInDwords; pNode->visibility = lastXfbStageBit; - *pNodeCount = 1; + ++(*pNodeCount); } } // ===================================================================================================================== -// Populates the resouce mapping nodes in compact scheme +void PipelineLayout::BuildLlpcInternalConstantBufferMapping( + const uint32_t stageMask, + const uint32_t offsetInDwords, + const uint32_t binding, + Vkgc::ResourceMappingRootNode* pNode, + uint32_t* pNodeCount + ) const +{ + if (stageMask != 0) + { + pNode->node.type = Vkgc::ResourceMappingNodeType::DescriptorBufferCompact; + pNode->node.offsetInDwords = offsetInDwords; + pNode->node.sizeInDwords = InternalConstBufferRegCount; + pNode->node.srdRange.set = Vkgc::InternalDescriptorSetId; + pNode->node.srdRange.binding = binding; + pNode->visibility = stageMask; + + ++(*pNodeCount); + } +} + +// ===================================================================================================================== +// Populates the resource mapping nodes in compact scheme VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( const uint32_t stageMask, const VbBindingInfo* pVbInfo, @@ -887,8 +890,9 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( { VK_ASSERT(m_info.userDataLayout.scheme == PipelineLayoutScheme::Compact); - VkResult result = VK_SUCCESS; - const auto& userDataLayout = m_info.userDataLayout.compact; + VkResult result = VK_SUCCESS; + const auto& userDataLayout = m_info.userDataLayout.compact; + const bool enableEarlyCompile = m_pDevice->GetRuntimeSettings().enableEarlyCompile; Vkgc::ResourceMappingRootNode* pUserDataNodes = static_cast(pBuffer); Vkgc::ResourceMappingNode* pResourceNodes = @@ -900,48 +904,29 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( uint32_t mappingNodeCount = 0; // Number of consumed ResourceMappingNodes (only sub-nodes) uint32_t descriptorRangeCount = 0; // Number of consumed StaticResourceValues - constexpr uint32_t InternalCbRegCount = 2; - - if (appendFetchShaderCb && pVbInfo != nullptr) + if (enableEarlyCompile) { - // Append node for uber fetch shader constant buffer - auto pFetchShaderCbNode = &pUserDataNodes[userDataNodeCount]; - pFetchShaderCbNode->node.type = Vkgc::ResourceMappingNodeType::DescriptorBufferCompact; - pFetchShaderCbNode->node.offsetInDwords = FetchShaderInternalBufferOffset; - pFetchShaderCbNode->node.sizeInDwords = InternalCbRegCount; - pFetchShaderCbNode->node.srdRange.set = Vkgc::InternalDescriptorSetId; - pFetchShaderCbNode->node.srdRange.binding = Vkgc::FetchShaderInternalBufferBinding; - pFetchShaderCbNode->visibility = Vkgc::ShaderStageVertexBit; - - userDataNodeCount += 1; - } + VK_ASSERT(userDataLayout.specConstBufVertexRegBase == SpecConstBufferVertexOffset); + VK_ASSERT(userDataLayout.specConstBufFragmentRegBase == SpecConstBufferFragmentOffset); - if (m_pDevice->GetRuntimeSettings().enableEarlyCompile) - { if (stageMask & Vkgc::ShaderStageVertexBit) { - auto pSpecConstVertexCbNode = &pUserDataNodes[userDataNodeCount]; - pSpecConstVertexCbNode->node.type = Vkgc::ResourceMappingNodeType::DescriptorBufferCompact; - pSpecConstVertexCbNode->node.offsetInDwords = SpecConstBufferVertexOffset; - pSpecConstVertexCbNode->node.sizeInDwords = InternalCbRegCount; - pSpecConstVertexCbNode->node.srdRange.set = Vkgc::InternalDescriptorSetId; - pSpecConstVertexCbNode->node.srdRange.binding = SpecConstVertexInternalBufferBindingId; - pSpecConstVertexCbNode->visibility = Vkgc::ShaderStageVertexBit; - - userDataNodeCount += 1; + BuildLlpcInternalConstantBufferMapping( + Vkgc::ShaderStageVertexBit, + userDataLayout.specConstBufVertexRegBase, + SpecConstVertexInternalBufferBindingId, + &pUserDataNodes[userDataNodeCount], + &userDataNodeCount); } if (stageMask & Vkgc::ShaderStageFragmentBit) { - auto pSpecConstVertexCbNode = &pUserDataNodes[userDataNodeCount]; - pSpecConstVertexCbNode->node.type = Vkgc::ResourceMappingNodeType::DescriptorBufferCompact; - pSpecConstVertexCbNode->node.offsetInDwords = SpecConstBufferFragmentOffset; - pSpecConstVertexCbNode->node.sizeInDwords = InternalCbRegCount; - pSpecConstVertexCbNode->node.srdRange.set = Vkgc::InternalDescriptorSetId; - pSpecConstVertexCbNode->node.srdRange.binding = SpecConstFragmentInternalBufferBindingId; - pSpecConstVertexCbNode->visibility = Vkgc::ShaderStageVertexBit; - - userDataNodeCount += 1; + BuildLlpcInternalConstantBufferMapping( + Vkgc::ShaderStageFragmentBit, + userDataLayout.specConstBufFragmentRegBase, + SpecConstFragmentInternalBufferBindingId, + &pUserDataNodes[userDataNodeCount], + &userDataNodeCount); } } @@ -956,39 +941,47 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( if (visibility != 0) { // Build the resource mapping nodes for the contents of this set. - auto pDynNodes = &pUserDataNodes[userDataNodeCount]; - auto pStaNodes = &pResourceNodes[mappingNodeCount]; - auto pDescValues = &pDescriptorRangeValues[descriptorRangeCount]; - - uint32_t dynNodeCount = 0; - uint32_t staNodeCount = 0; - uint32_t descRangeCount = 0; - - BuildLlpcDynamicSetMapping( - pSetLayout, - visibility, - setIndex, - userDataLayout.setBindingRegBase + pSetUserData->dynDescDataRegOffset, - pDynNodes, - &dynNodeCount); - - BuildLlpcStaticSetMapping( - pSetLayout, - visibility, - setIndex, - pStaNodes, - &staNodeCount, - pDescValues, - &descRangeCount); + auto pStaNodes = &pResourceNodes[mappingNodeCount]; + uint32_t staNodeCount = 0; - // Increase the number of mapping nodes used by the number of static section nodes added. - mappingNodeCount += staNodeCount; + for (uint32_t bindingIndex = 0; bindingIndex < pSetLayout->Info().count; ++bindingIndex) + { + const DescriptorSetLayout::BindingInfo& binding = pSetLayout->Binding(bindingIndex); - // Increase the number of user data nodes used by the number of dynamic section nodes added. - userDataNodeCount += dynNodeCount; + if (binding.dyn.dwSize > 0) + { + auto* pDynNode = &pUserDataNodes[userDataNodeCount++]; + + pDynNode->visibility = visibility; + + BuildLlpcDynamicMapping( + setIndex, + userDataLayout.setBindingRegBase + pSetUserData->dynDescDataRegOffset, + binding, + &pDynNode->node); + } + + if (binding.sta.dwSize > 0) + { + Vkgc::ResourceMappingNode* pNode = nullptr; + + { + pNode = &pStaNodes[staNodeCount++]; + } + + BuildLlpcStaticMapping( + pSetLayout, + visibility, + setIndex, + binding, + pNode, + &pDescriptorRangeValues[descriptorRangeCount], + &descriptorRangeCount); + } + } - // Increase the number of descriptor range value nodes used by immutable samplers - descriptorRangeCount += descRangeCount; + // Increase the number of mapping nodes used by the number of static section nodes added. + mappingNodeCount += staNodeCount; // Add a top-level user data node entry for this set's pointer if there are static nodes. if (pSetUserData->setPtrRegOffset != InvalidReg) @@ -1022,32 +1015,45 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( if (userDataLayout.transformFeedbackRegCount > 0) { - uint32_t nodeCount; - BuildLlpcTransformFeedbackMapping( stageMask, userDataLayout.transformFeedbackRegBase, userDataLayout.transformFeedbackRegCount, &pUserDataNodes[userDataNodeCount], - &nodeCount); - - userDataNodeCount += nodeCount; + &userDataNodeCount); } if (pVbInfo != nullptr) { - // Build the internal vertex buffer table mapping - constexpr uint32_t VbTablePtrRegCount = 1; // PAL requires all indirect user data tables to be 1DW + const uint32_t tailingVertexBufferRegCount = + (appendFetchShaderCb && (enableEarlyCompile == false)) ? + (VbTablePtrRegCount + InternalConstBufferRegCount) : VbTablePtrRegCount; - if ((m_info.userDataRegCount + VbTablePtrRegCount) <= + if ((m_info.userDataRegCount + tailingVertexBufferRegCount) <= m_pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().gfxipProperties.maxUserDataEntries) { - uint32_t nodeCount; - BuildLlpcVertexBufferTableMapping( - pVbInfo, m_info.userDataRegCount, VbTablePtrRegCount, &pUserDataNodes[userDataNodeCount], &nodeCount); + pVbInfo, + m_info.userDataRegCount, + VbTablePtrRegCount, + &pUserDataNodes[userDataNodeCount], + &userDataNodeCount); - userDataNodeCount += nodeCount; + if (appendFetchShaderCb) + { + VK_ASSERT((enableEarlyCompile == false) || + (userDataLayout.uberFetchConstBufRegBase == FetchShaderInternalBufferOffset)); + VK_ASSERT((enableEarlyCompile == true) || + (userDataLayout.uberFetchConstBufRegBase == m_info.userDataRegCount + VbTablePtrRegCount)); + + // Append node for uber fetch shader constant buffer + BuildLlpcInternalConstantBufferMapping( + Vkgc::ShaderStageVertexBit, + userDataLayout.uberFetchConstBufRegBase, + Vkgc::FetchShaderInternalBufferBinding, + &pUserDataNodes[userDataNodeCount], + &userDataNodeCount); + } } else { @@ -1069,10 +1075,11 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( } // ===================================================================================================================== -// Populates the resouce mapping nodes in indirect scheme +// Populates the resource mapping nodes in indirect scheme void PipelineLayout::BuildIndirectSchemeLlpcPipelineMapping( const uint32_t stageMask, const VbBindingInfo* pVbInfo, + const bool appendFetchShaderCb, void* pBuffer, Vkgc::ResourceMappingData* pResourceMapping ) const @@ -1084,11 +1091,15 @@ void PipelineLayout::BuildIndirectSchemeLlpcPipelineMapping( constexpr uint32_t TransformFeedbackRegCount = 1; constexpr uint32_t DescSetsPtrRegCount = 2 * SetPtrRegCount * MaxDescriptorSets; + const bool uberFetchShaderEnabled = IsUberFetchShaderEnabled(m_pDevice); const bool transformFeedbackEnabled = m_pDevice->IsExtensionEnabled(DeviceExtensions::EXT_TRANSFORM_FEEDBACK); - const uint32_t vbTablePtrRegBase = 0; - const uint32_t pushConstPtrRegBase = vbTablePtrRegBase + VbTablePtrRegCount; + const uint32_t vbTablePtrRegBase = 0; + const uint32_t uberFetchCbRegBase = + uberFetchShaderEnabled ? (vbTablePtrRegBase + VbTablePtrRegCount) : InvalidReg; + const uint32_t pushConstPtrRegBase = + uberFetchShaderEnabled ? uberFetchCbRegBase + InternalConstBufferRegCount : vbTablePtrRegBase + VbTablePtrRegCount; const uint32_t transformFeedbackRegBase = (transformFeedbackEnabled == false) ? InvalidReg : (pushConstPtrRegBase + PushConstPtrRegCount); const uint32_t setBindingPtrRegBase = @@ -1108,20 +1119,29 @@ void PipelineLayout::BuildIndirectSchemeLlpcPipelineMapping( uint32_t descriptorRangeCount = 0; // Number of consumed StaticResourceValues // Build the internal vertex buffer table mapping - if (pVbInfo != nullptr) + BuildLlpcVertexBufferTableMapping( + pVbInfo, + vbTablePtrRegBase, + VbTablePtrRegCount, + &pUserDataNodes[userDataNodeCount], + &userDataNodeCount); + + if ((pVbInfo != nullptr) && appendFetchShaderCb) { - uint32_t nodeCount; - - BuildLlpcVertexBufferTableMapping( - pVbInfo, vbTablePtrRegBase, VbTablePtrRegCount, &pUserDataNodes[userDataNodeCount], &nodeCount); + VK_ASSERT(uberFetchCbRegBase == userDataLayout.uberFetchConstBufRegBase); - userDataNodeCount += nodeCount; + BuildLlpcInternalConstantBufferMapping( + Vkgc::ShaderStageVertexBit, + uberFetchCbRegBase, + Vkgc::FetchShaderInternalBufferBinding, + &pUserDataNodes[userDataNodeCount], + &userDataNodeCount); } // Build push constants mapping if (userDataLayout.pushConstSizeInDword > 0) { - // Build mapping for push constant resouce + // Build mapping for push constant resource Vkgc::ResourceMappingNode* pPushConstNode = &pResourceNodes[mappingNodeCount]; pPushConstNode->type = Vkgc::ResourceMappingNodeType::PushConst; @@ -1147,16 +1167,12 @@ void PipelineLayout::BuildIndirectSchemeLlpcPipelineMapping( // Build transform feedback buffer mapping if (transformFeedbackEnabled) { - uint32_t nodeCount; - BuildLlpcTransformFeedbackMapping( stageMask, transformFeedbackRegBase, TransformFeedbackRegCount, &pUserDataNodes[userDataNodeCount], - &nodeCount); - - userDataNodeCount += nodeCount; + &userDataNodeCount); } // Build mapping for each set of descriptors @@ -1171,25 +1187,51 @@ void PipelineLayout::BuildIndirectSchemeLlpcPipelineMapping( if (visibility != 0) { - uint32_t dynNodeCount = 0; - uint32_t staNodeCount = 0; - uint32_t descRangeCount = 0; + uint32_t dynNodeCount = 0; + uint32_t staNodeCount = 0; Vkgc::ResourceMappingNode* pDynNodes = &pResourceNodes[mappingNodeCount]; - BuildLlpcDynamicSetMapping( - pSetLayout, visibility, setIndex, 0, pDynNodes, &dynNodeCount); - Vkgc::ResourceMappingNode* pStaNodes = &pResourceNodes[mappingNodeCount + dynNodeCount]; - Vkgc::StaticDescriptorValue* pDescValues = &pDescriptorRangeValues[descriptorRangeCount]; - BuildLlpcStaticSetMapping( - pSetLayout, visibility, setIndex, pStaNodes, &staNodeCount, pDescValues, &descRangeCount); + for (uint32_t bindingIndex = 0; bindingIndex < pSetLayout->Info().count; ++bindingIndex) + { + const DescriptorSetLayout::BindingInfo& binding = pSetLayout->Binding(bindingIndex); + + if (binding.dyn.dwSize > 0) + { + auto* pDynNode = &pDynNodes[dynNodeCount++]; + + BuildLlpcDynamicMapping( + setIndex, + 0, + binding, + pDynNode); + } + } + + Vkgc::ResourceMappingNode* pStaNodes = &pResourceNodes[mappingNodeCount + dynNodeCount]; + + for (uint32_t bindingIndex = 0; bindingIndex < pSetLayout->Info().count; ++bindingIndex) + { + const DescriptorSetLayout::BindingInfo& binding = pSetLayout->Binding(bindingIndex); + + if (binding.sta.dwSize > 0) + { + auto* pStaNode = &pStaNodes[staNodeCount++]; + + BuildLlpcStaticMapping( + pSetLayout, + visibility, + setIndex, + binding, + pStaNode, + &pDescriptorRangeValues[descriptorRangeCount], + &descriptorRangeCount); + } + } // Increase the number of mapping nodes used by the number of static section nodes added. mappingNodeCount += (dynNodeCount + staNodeCount); - // Increase the number of descriptor range value nodes used by immutable samplers - descriptorRangeCount += descRangeCount; - // Add a top-level user data node entry for dynamic nodes. if (pSetLayout->Info().dyn.numRsrcMapNodes > 0) { @@ -1253,7 +1295,7 @@ VkResult PipelineLayout::BuildLlpcPipelineMapping( else if (m_info.userDataLayout.scheme == PipelineLayoutScheme::Indirect) { BuildIndirectSchemeLlpcPipelineMapping( - stageMask, pVbInfo, pBuffer, pResourceMapping); + stageMask, pVbInfo, appendFetchShaderCb, pBuffer, pResourceMapping); } else { diff --git a/icd/api/vk_queue.cpp b/icd/api/vk_queue.cpp index 911af358..dbf43489 100644 --- a/icd/api/vk_queue.cpp +++ b/icd/api/vk_queue.cpp @@ -750,12 +750,10 @@ VkResult Queue::PalSignalSemaphores( const uint32_t* pSemaphoreDeviceIndices) { #if ICD_GPUOPEN_DEVMODE_BUILD - const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); DevModeMgr* pDevModeMgr = m_pDevice->VkInstance()->GetDevModeMgr(); bool timedQueueEvents = ((pDevModeMgr != nullptr) && - pDevModeMgr->IsQueueTimingActive(m_pDevice) && - settings.devModeSemaphoreQueueTimingEnable); + pDevModeMgr->IsQueueTimingActive(m_pDevice)); #else bool timedQueueEvents = false; #endif @@ -829,12 +827,10 @@ VkResult Queue::PalWaitSemaphores( uint32_t deviceIdx = DefaultDeviceIndex; #if ICD_GPUOPEN_DEVMODE_BUILD - const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); DevModeMgr* pDevModeMgr = m_pDevice->VkInstance()->GetDevModeMgr(); bool timedQueueEvents = ((pDevModeMgr != nullptr) && - pDevModeMgr->IsQueueTimingActive(m_pDevice) && - settings.devModeSemaphoreQueueTimingEnable); + pDevModeMgr->IsQueueTimingActive(m_pDevice)); #else bool timedQueueEvents = false; #endif diff --git a/icd/api/vk_shader.cpp b/icd/api/vk_shader.cpp index 8c39f294..038da370 100644 --- a/icd/api/vk_shader.cpp +++ b/icd/api/vk_shader.cpp @@ -192,7 +192,8 @@ VkResult ShaderModule::Init(const Device* pDevice, VkShaderModuleCreateFlags fla { PipelineCompiler* pCompiler = pDevice->GetCompiler(DefaultDeviceIndex); - VkResult result = pCompiler->BuildShaderModule(pDevice, flags, m_codeSize, m_pCode, nullptr, nullptr, &m_handle); + VkResult result = pCompiler->BuildShaderModule( + pDevice, flags, m_codeSize, m_pCode, false, nullptr, nullptr, &m_handle); if (result == VK_SUCCESS) { diff --git a/icd/api/vk_utils.cpp b/icd/api/vk_utils.cpp new file mode 100644 index 00000000..47648e8a --- /dev/null +++ b/icd/api/vk_utils.cpp @@ -0,0 +1,49 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file vk_utils.cpp + * @brief Utility functions for Vulkan. This file is rebuilt every time. + *********************************************************************************************************************** + */ + +#include "vk_utils.h" + +namespace vk +{ + +namespace utils +{ + +// ===================================================================================================================== +// Get driver build time hash +uint32_t GetBuildTimeHash() +{ + return Util::HashLiteralString(__DATE__ __TIME__); +} + +} // namespace utils + +} // namespace vk diff --git a/icd/make/importdefs b/icd/make/importdefs index 55fd244d..9d09390a 100644 --- a/icd/make/importdefs +++ b/icd/make/importdefs @@ -26,7 +26,7 @@ # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. It must # be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -ICD_PAL_CLIENT_MAJOR_VERSION = 720 +ICD_PAL_CLIENT_MAJOR_VERSION = 729 ICD_PAL_CLIENT_MINOR_VERSION = 0 # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. It describes diff --git a/icd/res/ver.h b/icd/res/ver.h index c3e00017..0ffcf353 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 221 +#define VULKAN_ICD_BUILD_VERSION 226 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION @@ -45,7 +45,7 @@ // These values specify the driver ID and driver info string #define VULKAN_DRIVER_ID VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR // "AMDOPEN" #define VULKAN_DRIVER_NAME_STR "AMD open-source driver" -#define VULKAN_DRIVER_INFO_STR "2022.Q2.1" +#define VULKAN_DRIVER_INFO_STR "2022.Q2.2" // These values tell which version of the conformance test the driver is compliant against #define CTS_VERSION_MAJOR 1 diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index 6efb1298..b9016343 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -138,6 +138,8 @@ void VulkanSettingsLoader::OverrideSettingsBySystemInfo() MakeAbsolutePath(m_settings.shaderReplaceDir, sizeof(m_settings.shaderReplaceDir), pRootPath, m_settings.shaderReplaceDir); + MakeAbsolutePath(m_settings.appProfileDumpDir, sizeof(m_settings.appProfileDumpDir), + pRootPath, m_settings.appProfileDumpDir); MakeAbsolutePath(m_settings.pipelineProfileDumpFile, sizeof(m_settings.pipelineProfileDumpFile), pRootPath, m_settings.pipelineProfileDumpFile); #if ICD_RUNTIME_APP_PROFILE @@ -192,6 +194,8 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( if (pInfo->gfxLevel <= Pal::GfxIpLevel::GfxIp9) { m_settings.forceResolveLayoutForDepthStencilTransferUsage = true; + + m_settings.useReleaseAcquireInterface = false; } // In general, DCC is very beneficial for color attachments, 2D, 3D shader storage resources that have BPP>=32. @@ -204,6 +208,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( ForceDccFor3DShaderStorage | ForceDccFor32BppShaderStorage | ForceDccFor64BppShaderStorage); + m_settings.optImgMaskToApplyShaderReadUsageForTransferSrc |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; } if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) @@ -215,8 +220,8 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } - // Put command buffers in local for large/resizable BAR systems - const gpusize minLocalSize = 256 * 1024 * 1024; + // Put command buffers in local for large/resizable BAR systems with > 7 GBs of local heap + const gpusize minLocalSize = 7ull * 1024ull * 1024ull * 1024ull; if ((gpuMemoryHeapPropertiesResult == Pal::Result::Success) && (heapProperties[Pal::GpuHeapLocal].heapSize > minLocalSize)) @@ -521,7 +526,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( ForceDccFor64BppShaderStorage); m_settings.enableNgg = 0x0; - } if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) @@ -634,15 +638,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( if (pInfo->revision == Pal::AsicRevision::Navi23) { m_settings.overrideLocalHeapSizeInGBs = 8; - - if ((gpuMemoryHeapPropertiesResult == Pal::Result::Success) && - ((m_settings.overrideLocalHeapSizeInGBs * 1024 * 1024 *1024) > - (heapProperties[Pal::GpuHeapLocal].heapSize + - heapProperties[Pal::GpuHeapInvisible].heapSize))) - { - m_settings.memoryRemoteBackupHeapMinHeapSize = 0x220000000; - m_settings.memoryDeviceOverallocationAllowed = true; - } + m_settings.memoryDeviceOverallocationAllowed = true; } if (pInfo->revision == Pal::AsicRevision::Navi24) { @@ -652,7 +648,6 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( ForceDccFor32BppShaderStorage); m_settings.overrideLocalHeapSizeInGBs = 8; - m_settings.memoryRemoteBackupHeapMinHeapSize = 0x220000000; m_settings.memoryDeviceOverallocationAllowed = true; } } @@ -925,27 +920,24 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( void VulkanSettingsLoader::DumpAppProfileChanges( AppProfile appProfile) { - if (m_settings.appProfileDumpDir[0] == '\0') + if ((m_settings.appProfileDumpMask & AppProfileDumpFlags::AppProfileValue) != 0) { - // Don't do anything if dump directory has not been set - return; - } - - wchar_t executableName[PATH_MAX]; - wchar_t executablePath[PATH_MAX]; - utils::GetExecutableNameAndPath(executableName, executablePath); + wchar_t executableName[PATH_MAX]; + wchar_t executablePath[PATH_MAX]; + utils::GetExecutableNameAndPath(executableName, executablePath); - char fileName[512] = {}; - Util::Snprintf(&fileName[0], sizeof(fileName), "%s/vkAppProfile.txt", &m_settings.appProfileDumpDir[0]); + char fileName[512] = {}; + Util::Snprintf(&fileName[0], sizeof(fileName), "%s/vkAppProfile.txt", &m_settings.appProfileDumpDir[0]); - Util::File dumpFile; - if (dumpFile.Open(fileName, Util::FileAccessAppend) == Pal::Result::Success) - { - dumpFile.Printf("Executable: %S%S\nApp Profile Enumeration: %d\n\n", - &executablePath[0], - &executableName[0], - static_cast(appProfile)); - dumpFile.Close(); + Util::File dumpFile; + if (dumpFile.Open(fileName, Util::FileAccessAppend) == Pal::Result::Success) + { + dumpFile.Printf("Executable: %S%S\nApp Profile Enumeration: %d\n\n", + &executablePath[0], + &executableName[0], + static_cast(appProfile)); + dumpFile.Close(); + } } } @@ -1094,36 +1086,6 @@ void VulkanSettingsLoader::ValidateSettings() m_settings.enableFmaskBasedMsaaRead = false; } -#if !VKI_GPUOPEN_PROTOCOL_ETW_CLIENT - // Internal semaphore queue timing is always enabled when ETW is not available - m_settings.devModeSemaphoreQueueTimingEnable = true; -#endif - - // Undo any heap overrides to local if oversubscription is allowed by default because they will likely - // degrade performance instead of improve it. When not allowed, testing should catch these cases - // so that overrides to local aren't added in the first place. - Pal::GpuMemoryHeapProperties heapProperties[Pal::GpuHeapCount] = {}; - - if ((m_pDevice->GetGpuMemoryHeapProperties(heapProperties) == Pal::Result::Success) && - (heapProperties[Pal::GpuHeapLocal].heapSize < m_settings.memoryRemoteBackupHeapMinHeapSize) && - (heapProperties[Pal::GpuHeapInvisible].heapSize < m_settings.memoryRemoteBackupHeapMinHeapSize)) - { - if (heapProperties[Pal::GpuHeapGartUswc].heapSize > 0) - { - if (m_settings.cmdAllocatorDataHeap == Pal::GpuHeapLocal) - { - m_settings.cmdAllocatorDataHeap = Pal::GpuHeapGartUswc; - } - - if (m_settings.cmdAllocatorEmbeddedHeap == Pal::GpuHeapLocal) - { - m_settings.cmdAllocatorEmbeddedHeap = Pal::GpuHeapGartUswc; - } - } - - m_settings.overrideHeapChoiceToLocal = 0; - } - // Command buffer prefetching was found to be slower for command buffers in local memory. if (m_settings.cmdAllocatorDataHeap == Pal::GpuHeapLocal) { @@ -1179,8 +1141,13 @@ void VulkanSettingsLoader::GenerateSettingHash() // Completes the initialization of the settings by overriding values from the registry and validating the final settings // struct void VulkanSettingsLoader::FinalizeSettings( - ) + const DeviceExtensions::Enabled& enabledExtensions) { + if ( + false) + { + m_settings.enableFmaskBasedMsaaRead = false; + } m_state = Pal::SettingsLoaderState::Final; diff --git a/icd/settings/settings.h b/icd/settings/settings.h index aaec52de..f240ad52 100644 --- a/icd/settings/settings.h +++ b/icd/settings/settings.h @@ -41,6 +41,7 @@ // then we need to include this file from "settings" dir. #include "settings/g_settings.h" #include "include/app_profile.h" +#include "include/vk_extensions.h" namespace Pal { @@ -71,7 +72,7 @@ class VulkanSettingsLoader : public Pal::ISettingsLoader void UpdatePalSettings(); void FinalizeSettings( - ); + const DeviceExtensions::Enabled& enabledExtensions); const RuntimeSettings& GetSettings() const { return m_settings; }; RuntimeSettings* GetSettingsPtr() { return &m_settings; } diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json index 06a815b0..d91a4478 100644 --- a/icd/settings/settings_xgl.json +++ b/icd/settings/settings_xgl.json @@ -3169,7 +3169,7 @@ { "Description": "Determines where the vkAppProfile.txt file will be written to. This file contains the app profile enumeration index.", "Tags": [ - "General" + "Optimization" ], "Flags": { "IsPath": true @@ -3182,6 +3182,38 @@ "Size": 512, "Scope": "Driver" }, + { + "Flags": { + "IsHex": true, + "IsBitmask": true + }, + "ValidValues": { + "IsEnum": true, + "Values": [ + { + "Name": "None", + "Value": 0, + "Description": "Default, do not dump app profile information" + }, + { + "Name": "AppProfileValue", + "Value": 1, + "Description": "Dump the application path and the applied app profile index (like ForceAppProfileValue)" + } + ], + "Name": "AppProfileDumpFlags" + }, + "Description": "Dumping options for working with application profiles", + "Tags": [ + "Optimization" + ], + "Defaults": { + "Default": 0 + }, + "Type": "uint32", + "Name": "AppProfileDumpMask", + "Scope": "Driver" + }, { "Name": "OptEnablePrt", "Description": "Enable PRT feature in general. The detailed feature set is decided according to caps reported by Pal.", @@ -3344,21 +3376,6 @@ "Type": "uint32", "Name": "MemoryBaseAddrAlignmentCpuVisibleWin32" }, - { - "Name": "MemoryRemoteBackupHeapMinHeapSize", - "Description": "If the size of a device-local heap is smaller than this value, the remote (GART USWC) heap is used as a secondary heap for VkMemory objects created using the default GPU-local memory type for that device-local heap. The remote back-up heap is added regardless when overallocation is allowed via the VK_AMD_memory_overallocation extension. ", - "Tags": [ - "Memory" - ], - "Defaults": { - "Default": 8053063680 - }, - "Scope": "Driver", - "Type": "gpusize", - "Flags": { - "IsHex": true - } - }, { "Description": "Default priority of all VkMemory objects as two hex digits. The first (most-significant) digit defines the priority level, and the second digit defines the priority offset. Valid priority level values (Pal::GpuMemPriority) are: 0: Unused 1: VeryLow 2: Low 3: Normal 4: High 5: VeryHigh. Valid priority offset values (Pal::GpuMemPriorityOffset) are: 0: Offset0 (same as base level) 1: Offset1 2: Offset2 3: Offset3 4: Offset4 5: Offset5 6: Offset6 7: Offset7 ", "Tags": [ @@ -4389,19 +4406,6 @@ "VariableName": "optImgMaskToApplyShaderWriteUsageForTransferDst", "Name": "TransferDstUsageAsShaderWriteMask" }, - { - "Description": "Enable extension AMD_GPU_SHADER_HALF_FLOAT and AMD_GPU_SHADER_INT16 only on ASIC >= gfx9.", - "Tags": [ - "Optimization" - ], - "Defaults": { - "Default": true - }, - "Type": "bool", - "VariableName": "optOnlyEnableFP16ForGfx9Plus", - "Name": "OnlyEnableFP16ForGfx9Plus", - "Scope": "Driver" - }, { "Description": "If set, an image with color target usage bit does not implicitly allow to be in resolve_src or resolve_dst layout.", "Tags": [ @@ -4554,7 +4558,7 @@ "Optimization" ], "Defaults": { - "Default": false + "Default": true }, "Type": "bool", "Name": "PrefetchShaders", @@ -4647,8 +4651,8 @@ "Type": "uint32" }, { - "Name": "UseRelThenAcqForVkCmdPipelineBarrier", - "Description": "Enables the use of PAL::CmdReleaseThenAcquire() barrier interface on supported ASICs (gfx9+) via vkCmdPipelineBarrier API call", + "Name": "UseReleaseAcquireInterface", + "Description": "Enables the use of Pal::CmdReleaseThenAcquire(), Pal::CmdRelease() and Pal::CmdAcquire() barrier interfaces on supported ASICs (gfx9+).", "Tags": [ "Optimization" ], @@ -4882,23 +4886,6 @@ "Type": "bool", "Name": "DevModeQueueTimingEnable" }, - { - "Description": "This controls if the legacy timed queue semaphores are to be used on Windows instead of the ETW client. This parameter does not affect Linux.", - "Tags": [ - "Developer Mode" - ], - "Defaults": { - "Default": true - }, - "DependsOn": { - "OS": [ - "Windows" - ] - }, - "Scope": "Driver", - "Type": "bool", - "Name": "DevModeSemaphoreQueueTimingEnable" - }, { "Description": "This controls what kind of SQTT instrumentation marker data is output from the driver. Note that this only fine-tunes instrumentation: the master toggle for SQTT is always whether developer mode is enabled. General SQTT data (event, wave, instruction) is also generated independent of this setting if SQTT is enabled. ", "Tags": [ diff --git a/icd/tools/generate/genShaderProfile.py b/icd/tools/generate/genShaderProfile.py index 791f5c2c..1a5f4832 100644 --- a/icd/tools/generate/genShaderProfile.py +++ b/icd/tools/generate/genShaderProfile.py @@ -141,7 +141,8 @@ def parseJsonFlags(key, flags): # Parses stage actions from the input json file and fetches code template from shaderProfileTemplate.py. # Includes parsing options for # [ -# 'optStrategyFlags', 'vgprLimit', 'sgprLimit', 'ldsSpillLimitDwords', 'maxArraySizeForFastDynamicIndexing', +# 'optStrategyFlags', 'optStrategyFlags2', 'vgprLimit', 'sgprLimit', 'ldsSpillLimitDwords', +# 'maxArraySizeForFastDynamicIndexing', # 'userDataSpillThreshold', 'maxThreadGroupsPerComputeUnit', 'scOptions', 'scOptionsMask', 'trapPresent', # 'debugMode', 'allowReZ', 'shaderReplaceEnabled', 'fpControlFlags', 'optimizationIntent', 'disableLoopUnrolls', # 'enableSelectiveInline', 'maxOccupancyOptions', 'lowLatencyOptions', 'waveSize', 'wgpMode', 'waveBreakSize', @@ -184,6 +185,8 @@ def parseJsonProfileActionShader(shaderActions): if shaderActionKey in BRANCHES: if shaderActionKey == 'optStrategyFlags': result["optStrategyFlags"] = True + elif shaderActionKey == 'optStrategyFlags2': + result["optStrategyFlags2"] = True elif shaderActionKey == 'fpControlFlags': result["fpControlFlags"] = True elif shaderActionKey == 'maxOccupancyOptions': diff --git a/icd/tools/generate/shaderProfileTemplate.py b/icd/tools/generate/shaderProfileTemplate.py index 953be698..f4c4e78d 100644 --- a/icd/tools/generate/shaderProfileTemplate.py +++ b/icd/tools/generate/shaderProfileTemplate.py @@ -593,6 +593,22 @@ def jsonEnumReaderTemplate(values, prefix=""): ], }, + "optStrategyFlags2": { + "type": [int, dict], + "jsonReadable": True, + "entityInfo": [ + { + "parent": "shaderCreate.anonStruct", + "entity": "bitField", + "varName": "optStrategyFlags2", + "dataType": "uint32_t", + "defaultValue": 1, + "jsonWritable": True, + "buildTypes": {}, + } + ], + }, + "vgprLimit": { "type": [int], "jsonReadable": True,