From 3252b6223947f9fc67399e0798b1062983925fce Mon Sep 17 00:00:00 2001 From: Jacob He Date: Sun, 29 Sep 2019 14:26:49 +0800 Subject: [PATCH] Update xgl from commit: 2f3287e * Implement VK_EXT_post_depth_coverage * Re-work vkPipelineCache * Add "enableLoadScalarizer" option to app_shader_optimizer * Tune shader performance for F1 2017 and the Talos principle * EXT_vertex_attribute_divisor: Add missing features query and support verification * Fix a case fallthrough bug with VK_AMD_memory_overallocation_behavior at device creation * Move platformKey to physical device * Make InitializePlatformKey() as a void function * Add ShaderDbg to LLPC * Bump LLPC client interface version to 34 * Update PAL Interface in Vulkan to 534 Change-Id: I8f6833890aaf717ade2bc04235ed81863f2fbad3 --- CMakeLists.txt | 1 - icd/CMakeLists.txt | 10 +- icd/api/app_shader_optimizer.cpp | 199 ++++++++++++ icd/api/compiler_solution.cpp | 47 +-- icd/api/compiler_solution_llpc.cpp | 6 +- icd/api/gpu_event_mgr.cpp | 391 ----------------------- icd/api/include/app_shader_optimizer.h | 1 + icd/api/include/compiler_solution.h | 2 +- icd/api/include/compiler_solution_llpc.h | 2 +- icd/api/include/gpu_event_mgr.h | 136 -------- icd/api/include/pipeline_binary_cache.h | 38 ++- icd/api/include/pipeline_compiler.h | 2 - icd/api/include/vk_cmd_pool.h | 8 - icd/api/include/vk_cmdbuffer.h | 4 - icd/api/include/vk_conv.h | 8 +- icd/api/include/vk_extensions.h | 1 + icd/api/include/vk_physical_device.h | 29 ++ icd/api/include/vk_pipeline_cache.h | 2 +- icd/api/include/vk_render_pass.h | 1 - icd/api/pipeline_binary_cache.cpp | 321 +++++++++++++++---- icd/api/pipeline_compiler.cpp | 2 +- icd/api/renderpass/renderpass_logger.cpp | 4 +- icd/api/sqtt/sqtt_rgp_annotations.h | 150 ++++----- icd/api/strings/base_extensions.txt | 1 + icd/api/vk_cmd_pool.cpp | 64 ---- icd/api/vk_cmdbuffer.cpp | 66 +--- icd/api/vk_device.cpp | 54 +++- icd/api/vk_event.cpp | 1 - icd/api/vk_image.cpp | 2 +- icd/api/vk_physical_device.cpp | 205 +++++++++--- icd/api/vk_pipeline_cache.cpp | 143 ++++++--- icd/api/vk_queue.cpp | 11 +- icd/make/importdefs | 6 +- icd/res/ver.h | 2 +- icd/settings/settings.cpp | 19 +- icd/settings/settings_xgl.json | 346 +++++++++++--------- 36 files changed, 1134 insertions(+), 1151 deletions(-) delete mode 100644 icd/api/gpu_event_mgr.cpp delete mode 100644 icd/api/include/gpu_event_mgr.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 737df5e6..ce6d7d33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,7 +134,6 @@ set(XGL_PAL_PATH ${PROJECT_SOURCE_DIR}/../pal CACHE PATH "Specify the path to th set(PAL_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_PAL_CLIENT_MAJOR_VERSION} CACHE STRING "${PROJECT_NAME} override." FORCE) set(PAL_CLIENT "VULKAN" CACHE STRING "${PROJECT_NAME} override." FORCE) -set(PAL_DEVELOPER_BUILD ${VK_INTERNAL_DEVELOPER} CACHE BOOL "${PROJECT_NAME} override." FORCE) if(ICD_BUILD_SPVONLY) set(PAL_ENABLE_PRINTS_ASSERTS ON CACHE BOOL "${PROJECT_NAME} override." FORCE) endif() diff --git a/icd/CMakeLists.txt b/icd/CMakeLists.txt index 60126c13..a60a9db8 100644 --- a/icd/CMakeLists.txt +++ b/icd/CMakeLists.txt @@ -116,11 +116,10 @@ if(ICD_MEMTRACK) endif() # Configure Vulkan SDK version definitions -if(USE_NEXT_SDK) - target_compile_definitions(xgl PRIVATE VKI_SDK_1_2=1) -else() - target_compile_definitions(xgl PRIVATE VKI_SDK_1_1=1) -endif() + +set(SDK VKI_SDK_1_1) + +target_compile_definitions(xgl PRIVATE ${SDK}=1) # Enable relevant GPUOpen preprocessor definitions if(ICD_GPUOPEN_DEVMODE_BUILD) @@ -183,7 +182,6 @@ target_sources(xgl PRIVATE api/barrier_policy.cpp api/color_space_helper.cpp api/compiler_solution.cpp - api/gpu_event_mgr.cpp api/internal_mem_mgr.cpp api/pipeline_compiler.cpp api/pipeline_binary_cache.cpp diff --git a/icd/api/app_shader_optimizer.cpp b/icd/api/app_shader_optimizer.cpp index 3a023193..c5aad96b 100644 --- a/icd/api/app_shader_optimizer.cpp +++ b/icd/api/app_shader_optimizer.cpp @@ -136,6 +136,12 @@ void ShaderOptimizer::ApplyProfileToShaderCreateInfo( options.pPipelineOptions->reconfigWorkgroupLayout = true; } #endif +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 33 + if (shaderCreate.tuningOptions.enableLoadScalarizer) + { + options.pOptions->enableLoadScalarizer = true; + } +#endif if (shaderCreate.apply.waveSize) { @@ -817,6 +823,7 @@ void ShaderOptimizer::BuildAppProfileLlpc() m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x8296579A6570BC13; m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x44FA946844F62696; m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; /////////////////////////////////////////////////////////////////////////////////////////////////////////// // 0xE4B55319684F59F228A2B57C92339574, PS @@ -844,6 +851,160 @@ void ShaderOptimizer::BuildAppProfileLlpc() m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x7E1F46BE56E427AA; m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xA3EB7292C77A0365; m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0xF341093EF870C70A0AECE7808011C4B8, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x0AECE7808011C4B8; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xF341093EF870C70A; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0xB60900B3E1256DDFC7A889DBAC76F591, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xC7A889DBAC76F591; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xB60900B3E1256DDF; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x41DF226419CD26C217CE9268FE52D03B, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x17CE9268FE52D03B; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x41DF226419CD26C2; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x1D9EB7DDBA66FDF78AED19D93B57535B, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x8AED19D93B57535B; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x1D9EB7DDBA66FDF7; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x20E5DA2E5917E2416A43398F36D72603, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x6A43398F36D72603; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x20E5DA2E5917E241; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0xF3AF74681BD7980350FBF528DC8AFBA5, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x50FBF528DC8AFBA5; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xF3AF74681BD79803; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x51D59E18E8BD64D9955B7EEAB9F6CDAA, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x955B7EEAB9F6CDAA; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x51D59E18E8BD64D9; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x154112D144C95DE5ECF087B422ED60CE, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xECF087B422ED60CE; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x154112D144C95DE5; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0xE39F6C59BF345B466DE524A0717A4D67, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x6DE524A0717A4D67; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xE39F6C59BF345B46; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0xB020780B537A01C426365F3E39BE59E6, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x26365F3E39BE59E6; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xB020780B537A01C4; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0xFBAD8E5EE07D12D0F5E3F18201C348E6, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xF5E3F18201C348E6; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xFBAD8E5EE07D12D0; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0xCD911627E2D20F9B7D5DFF0970FB823A, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x7D5DFF0970FB823A; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xCD911627E2D20F9B; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x2DAC71E14EB7945D50DD68ED10CBE1AF, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x50DD68ED10CBE1AF; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x2DAC71E14EB7945D; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x3C1101DC3E3B206E2D99D8DAAF0FE1BE, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x2D99D8DAAF0FE1BE; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x3C1101DC3E3B206E; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x474C4C2966E08232DE5274426C9F365C, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xDE5274426C9F365C; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x474C4C2966E08232; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0xD85FA2403788076B3BA507665B126C33, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x3BA507665B126C33; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xD85FA2403788076B; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x6A07F5C0DAAB96E6D1C630198DDC7F21, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xD1C630198DDC7F21; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x6A07F5C0DAAB96E6; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; } else if (appProfile == AppProfile::SeriousSamFusion) { @@ -1011,6 +1172,35 @@ void ShaderOptimizer::BuildAppProfileLlpc() m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x549373FA25856E20; m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true; } + else if (appProfile == AppProfile::F1_2017) + { + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x7C92A52E3084149659025B19EDAE3734, CS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageCompute].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageCompute].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageCompute].codeHash.lower = 0x59025B19EDAE3734; + m_appProfile.entries[i].pattern.shaders[ShaderStageCompute].codeHash.upper = 0x7C92A52E30841496; + m_appProfile.entries[i].action.shaders[ShaderStageCompute].shaderCreate.tuningOptions.useSiScheduler = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x767991F055DE051DEC878C820BD1D81E, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xEC878C820BD1D81E; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x767991F055DE051D; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true; + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // 0x8648E5203943C0B00EBEFF2CBF131944, PS + i = m_appProfile.entryCount++; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x0EBEFF2CBF131944; + m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x8648E5203943C0B0; + m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true; + } } #if PAL_ENABLE_PRINTS_ASSERTS @@ -1152,6 +1342,7 @@ static bool ParseJsonProfileActionShader( "enableSelectiveInline", "useSiScheduler", "reconfigWorkgroupLayout", + "enableLoadScalarizer", "waveSize", "wgpMode", "waveBreakSize", @@ -1384,6 +1575,14 @@ static bool ParseJsonProfileActionShader( } } + if ((pItem = utils::JsonGetValue(pJson, "enableLoadScalarizer")) != nullptr) + { + if (pItem->integerValue != 0) + { + pActions->shaderCreate.tuningOptions.enableLoadScalarizer = true; + } + } + return success; } diff --git a/icd/api/compiler_solution.cpp b/icd/api/compiler_solution.cpp index 2fce9c40..8b482715 100644 --- a/icd/api/compiler_solution.cpp +++ b/icd/api/compiler_solution.cpp @@ -49,49 +49,12 @@ CompilerSolution::~CompilerSolution() // ===================================================================================================================== // Initialize CompilerSolution class -VkResult CompilerSolution::Initialize() +VkResult CompilerSolution::Initialize( + Llpc::GfxIpVersion gfxIp, + Pal::GfxIpLevel gfxIpLevel) { - Pal::IDevice* pPalDevice = m_pPhysicalDevice->PalDevice(); - const RuntimeSettings& settings = m_pPhysicalDevice->GetRuntimeSettings(); - - // Initialize GfxIp informations per PAL device properties - Pal::DeviceProperties info; - pPalDevice->GetProperties(&info); - - switch (info.gfxLevel) - { - case Pal::GfxIpLevel::GfxIp6: - m_gfxIp.major = 6; - m_gfxIp.minor = 0; - break; - case Pal::GfxIpLevel::GfxIp7: - m_gfxIp.major = 7; - m_gfxIp.minor = 0; - break; - case Pal::GfxIpLevel::GfxIp8: - m_gfxIp.major = 8; - m_gfxIp.minor = 0; - break; - case Pal::GfxIpLevel::GfxIp8_1: - m_gfxIp.major = 8; - m_gfxIp.minor = 1; - break; - case Pal::GfxIpLevel::GfxIp9: - m_gfxIp.major = 9; - m_gfxIp.minor = 0; - break; - case Pal::GfxIpLevel::GfxIp10_1: - m_gfxIp.major = 10; - m_gfxIp.minor = 1; - break; - - default: - VK_NEVER_CALLED(); - break; - } - - m_gfxIp.stepping = info.gfxStepping; - m_gfxIpLevel = info.gfxLevel; + m_gfxIp = gfxIp; + m_gfxIpLevel = gfxIpLevel; return VK_SUCCESS; } diff --git a/icd/api/compiler_solution_llpc.cpp b/icd/api/compiler_solution_llpc.cpp index 5609a6b4..2f4dbd07 100644 --- a/icd/api/compiler_solution_llpc.cpp +++ b/icd/api/compiler_solution_llpc.cpp @@ -54,9 +54,11 @@ CompilerSolutionLlpc::~CompilerSolutionLlpc() // ===================================================================================================================== // Initialize CompilerSolutionLlpc class -VkResult CompilerSolutionLlpc::Initialize() +VkResult CompilerSolutionLlpc::Initialize( + Llpc::GfxIpVersion gfxIp, + Pal::GfxIpLevel gfxIpLevel) { - VkResult result = CompilerSolution::Initialize(); + VkResult result = CompilerSolution::Initialize(gfxIp, gfxIpLevel); if (result == VK_SUCCESS) { diff --git a/icd/api/gpu_event_mgr.cpp b/icd/api/gpu_event_mgr.cpp deleted file mode 100644 index fa79097c..00000000 --- a/icd/api/gpu_event_mgr.cpp +++ /dev/null @@ -1,391 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2014-2019 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************************************************************************/ - -#include "include/vk_cmdbuffer.h" -#include "include/vk_conv.h" -#include "include/vk_device.h" -#include "sqtt/sqtt_rgp_annotations.h" - -#include "palGpuEvent.h" -#include "palIntrusiveListImpl.h" - -namespace vk -{ - -// ===================================================================================================================== -void GpuEvents::Destroy() -{ - for (uint32_t i = 0; i < m_numDeviceEvents; i++) - { - m_pEvents[i]->Destroy(); - } -} - -// ===================================================================================================================== -GpuEventMgr::GpuEventMgr(Device* pDevice) - : - m_parentNode(this), - m_pFirstChunk(nullptr), - m_needWaitRecycleEvents(false), - m_pDevice(pDevice), - m_totalEventCount(0) -{ - -} - -// ===================================================================================================================== -GpuEventMgr::~GpuEventMgr() -{ - Destroy(); -} - -// ===================================================================================================================== -// Should be called during the parent's vkBeginCommandBuffer() -void GpuEventMgr::BeginCmdBuf( - CmdBuffer* pOwner, - const Pal::CmdBufferBuildInfo& info) -{ - // If this command buffer can be submitted multiple times, we need to make sure that we wait on its previous - // incarnation to complete before allowing any events to be accessed. This is because we need to make sure nothing - // signals these events while the GPU is still accessing this command buffer. - m_needWaitRecycleEvents = (info.flags.optimizeOneTimeSubmit == false); -} - -// ===================================================================================================================== -// Called when this event manager's event memory should be reset. This will mark all events as free for allocation but -// does not release any of their GPU memory. -// -// This is called either when a command buffer is being reset, or when a command buffer's resources are being -// are being released back to the command pool (e.g. when destroyed). -void GpuEventMgr::ResetEvents() -{ - // Mark all previously-created events as free for reuse. When resetting a command buffer, the application is - // responsible for ensuring that no previous access to the command buffer by the GPU is pending which means that - // we don't need to wait before resetting the GPU value of these events (this actual reset happens during - // RequestEvents()). - EventChunk* pChunk = m_pFirstChunk; - - while (pChunk != nullptr) - { - pChunk->eventNextFree = 0; - pChunk = pChunk->pNextChunk; - } -} - -// ===================================================================================================================== -// Called when the command buffer that owns this event manager is reset. -void GpuEventMgr::ResetCmdBuf( - CmdBuffer* pOwner) -{ - // Reset all events back to available. - ResetEvents(); -} - -// ===================================================================================================================== -// Destroys the event manager's internal memory -void GpuEventMgr::Destroy() -{ - Instance* pInstance = m_pDevice->VkInstance(); - - EventChunk* pChunk = m_pFirstChunk; - - while (pChunk != nullptr) - { - EventChunk* pNext = pChunk->pNextChunk; - - DestroyChunk(pChunk); - - pChunk = pNext; - } - - m_pFirstChunk = nullptr; - m_totalEventCount = 0; -} - -// ===================================================================================================================== -// Destroys the given batch of GPU events. Called when the command buffer is destroyed or as part of allocation -// failure clean-up. -void GpuEventMgr::DestroyChunk(EventChunk* pChunk) -{ - if (pChunk != nullptr) - { - for (uint32_t i = 0; i < pChunk->eventCount; ++i) - { - pChunk->ppGpuEvents[i]->Destroy(); - } - - m_pDevice->MemMgr()->FreeGpuMem(&pChunk->gpuMemory); - - m_pDevice->VkInstance()->FreeMem(pChunk); - } -} - -// ===================================================================================================================== -// Requests some number of events to be given to the command buffer. -// -// WARNING: THIS FUNCTIONALITY IS INCOMPATIBLE WITH COMMAND BUFFERS THAT CAN BE SUBMITTED IN PARALLEL ON MULTIPLE -// QUEUES. PARALLEL EXECUTION OF THE SAME COMMAND BUFFER WILL CAUSE IT TO TRIP OVER ITS OWN EVENTS. -// -// There is currently no use case for that with the exception of compute engine command buffers and such command -// buffers should not make use of this functionality. -VkResult GpuEventMgr::RequestEvents( - CmdBuffer* pCmdBuf, - uint32_t eventCount, - GpuEvents*** pppGpuEvents) -{ - if (eventCount == 0) - { - *pppGpuEvents = nullptr; - - return VK_SUCCESS; - } - -#if PAL_ENABLE_PRINTS_ASSERTS - const Pal::DeviceProperties& deviceProps = m_pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties(); - const Pal::EngineType engineType = pCmdBuf->GetPalEngineType(); - - // See above comment - VK_ASSERT(deviceProps.engineProperties[engineType].engineCount == 1); -#endif - - if (m_needWaitRecycleEvents) - { - WaitToRecycleEvents(pCmdBuf); - } - - VkResult result = VK_SUCCESS; - - EventChunk* pChunk = FindFreeExistingChunk(eventCount); - - if (pChunk == nullptr) - { - result = CreateNewChunk(eventCount, &pChunk); - } - - if (pChunk != nullptr) - { - VK_ASSERT(result == VK_SUCCESS); - - AllocEventsFromChunk(pCmdBuf, eventCount, pChunk, pppGpuEvents); - } - - return result; -} - -// ===================================================================================================================== -// Tries to find enough space in an existing batch of GPU events. -GpuEventMgr::EventChunk* GpuEventMgr::FindFreeExistingChunk(uint32_t eventCount) -{ - EventChunk* pChunk = m_pFirstChunk; - - while (pChunk != nullptr) - { - if (pChunk->eventCount - pChunk->eventNextFree >= eventCount) - { - return pChunk; - } - - pChunk = pChunk->pNextChunk; - } - - return nullptr; -} - -// ===================================================================================================================== -// Allocates GPU events from the given chunk of events. -void GpuEventMgr::AllocEventsFromChunk( - CmdBuffer* pCmdBuf, - uint32_t eventCount, - EventChunk* pChunk, - GpuEvents*** pppGpuEvents) -{ - GpuEvents** ppEvents = pChunk->ppGpuEvents + pChunk->eventNextFree; - - pChunk->eventNextFree += eventCount; - - VK_ASSERT(pChunk->eventNextFree <= pChunk->eventCount); - - // Reset the event status - // Note that the top of pipe reset below is okay because any previous reads have already been taken care of by the - // insertion of the inter-submit barrier - VK_ASSERT(m_needWaitRecycleEvents == false); - - for (uint32_t i = 0; i < eventCount; ++i) - { - pCmdBuf->PalCmdResetEvent(ppEvents[i], Pal::HwPipeTop); - } - - *pppGpuEvents = ppEvents; -} - -// ===================================================================================================================== -// Creates a new chunk at least large enough to fit the requested number of events. -VkResult GpuEventMgr::CreateNewChunk( - uint32_t eventCount, - EventChunk** ppChunk) -{ - const auto& settings = m_pDevice->VkPhysicalDevice(DefaultDeviceIndex)->GetRuntimeSettings(); - - if (eventCount < settings.cmdBufGpuEventMinAllocCount) - { - eventCount = settings.cmdBufGpuEventMinAllocCount; - } - - VkResult result = VK_SUCCESS; - - EventChunk* pChunk = CreateChunkState(eventCount); - - if (pChunk != nullptr) - { - pChunk->pNextChunk = m_pFirstChunk; - m_pFirstChunk = pChunk; - - m_totalEventCount += pChunk->eventCount; - - *ppChunk = pChunk; - } - else - { - DestroyChunk(pChunk); - - *ppChunk = nullptr; - } - - return result; -} - -// ===================================================================================================================== -GpuEventMgr::EventChunk::EventChunk() - : - ppGpuEvents(nullptr), - eventCount(0), - eventNextFree(0), - pNextChunk(nullptr) -{ - -} - -// ===================================================================================================================== -// Initializes the system memory and state of a new event chunk. -GpuEventMgr::EventChunk* GpuEventMgr::CreateChunkState(uint32_t eventCount) -{ - size_t totalSize = 0; - - size_t chunkHeaderSize = sizeof(EventChunk); - - totalSize += chunkHeaderSize; - - size_t eventPtrArraySize = eventCount * sizeof(GpuEvents); - - totalSize += eventPtrArraySize; - - size_t eventPalObjSize = 0; - Pal::GpuEventCreateInfo eventCreateInfo = {}; - for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); ++deviceIdx) - { - eventPalObjSize += m_pDevice->PalDevice(deviceIdx)->GetGpuEventSize(eventCreateInfo, nullptr); - } - - size_t eventSysMemSize = eventCount * (sizeof(GpuEvents) + eventPalObjSize); - - totalSize += eventSysMemSize; - - void* pMem = m_pDevice->VkInstance()->AllocMem(totalSize, VK_DEFAULT_MEM_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - void* pOrigMem = pMem; - - if (pMem == nullptr) - { - return nullptr; - } - - EventChunk* pChunk = reinterpret_cast(pMem); - pMem = Util::VoidPtrInc(pMem, chunkHeaderSize); - - VK_PLACEMENT_NEW(pChunk) GpuEventMgr::EventChunk(); - - pChunk->ppGpuEvents = reinterpret_cast(pMem); - - pMem = Util::VoidPtrInc(pMem, eventPtrArraySize); - - Pal::Result result = Pal::Result::Success; - - const Pal::GpuEventCreateInfo createInfo = {}; - - for (pChunk->eventCount = 0; - (pChunk->eventCount < eventCount) && (result == Pal::Result::Success); - pChunk->eventCount++) - { - Pal::IGpuEvent* pPalEvents[MaxPalDevices] = {}; - - size_t memOffset = sizeof(GpuEvents); - for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); ++deviceIdx) - { - result = m_pDevice->PalDevice(deviceIdx)->CreateGpuEvent(createInfo, - Util::VoidPtrInc(pMem, memOffset), &pPalEvents[deviceIdx] ); - - memOffset += m_pDevice->PalDevice(deviceIdx)->GetGpuEventSize(createInfo, nullptr); - } - VK_PLACEMENT_NEW(pMem) GpuEvents(m_pDevice->NumPalDevices(), pPalEvents); - - pChunk->ppGpuEvents[pChunk->eventCount] = reinterpret_cast(pMem); - - pMem = Util::VoidPtrInc(pMem, sizeof(GpuEvents) + eventPalObjSize); - } - - VK_ASSERT(Util::VoidPtrDiff(pMem, pOrigMem) == totalSize); - - if (result == Pal::Result::Success) - { - return pChunk; - } - else - { - return nullptr; - } -} - -// ===================================================================================================================== -// Waits for any previous access to all events to finish. -void GpuEventMgr::WaitToRecycleEvents(CmdBuffer* pCmdBuf) -{ - Pal::BarrierInfo barrier = {}; - Pal::HwPipePoint signalPoint = Pal::HwPipeTop; - - barrier.flags.u32All = 0; - barrier.waitPoint = Pal::HwPipeTop; - barrier.pipePointWaitCount = 1; - barrier.pPipePoints = &signalPoint; - barrier.pSplitBarrierGpuEvent = nullptr; - barrier.reason = RgpBarrierInternalGpuEventRecycleStall; - - for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++) - { - pCmdBuf->PalCmdBuffer(deviceIdx)->CmdBarrier(barrier); - } - - m_needWaitRecycleEvents = false; -} - -}; diff --git a/icd/api/include/app_shader_optimizer.h b/icd/api/include/app_shader_optimizer.h index ad7e82b0..643bbb7e 100644 --- a/icd/api/include/app_shader_optimizer.h +++ b/icd/api/include/app_shader_optimizer.h @@ -114,6 +114,7 @@ struct ShaderTuningOptions uint32_t waveBreakSize; uint32_t useSiScheduler; uint32_t reconfigWorkgroupLayout; + bool enableLoadScalarizer; }; struct ShaderProfileAction diff --git a/icd/api/include/compiler_solution.h b/icd/api/include/compiler_solution.h index f056efed..b93e5f45 100644 --- a/icd/api/include/compiler_solution.h +++ b/icd/api/include/compiler_solution.h @@ -106,7 +106,7 @@ class CompilerSolution CompilerSolution(PhysicalDevice* pPhysicalDevice); virtual ~CompilerSolution(); - virtual VkResult Initialize() = 0; + virtual VkResult Initialize(Llpc::GfxIpVersion gfxIp, Pal::GfxIpLevel gfxIpLevel) = 0; virtual void Destroy() = 0; diff --git a/icd/api/include/compiler_solution_llpc.h b/icd/api/include/compiler_solution_llpc.h index 76778172..d2c2ab9d 100644 --- a/icd/api/include/compiler_solution_llpc.h +++ b/icd/api/include/compiler_solution_llpc.h @@ -45,7 +45,7 @@ class CompilerSolutionLlpc : public CompilerSolution public: // Overidded functions - virtual VkResult Initialize(); + virtual VkResult Initialize(Llpc::GfxIpVersion gfxIp, Pal::GfxIpLevel gfxIpLevel); virtual void Destroy(); diff --git a/icd/api/include/gpu_event_mgr.h b/icd/api/include/gpu_event_mgr.h deleted file mode 100644 index c57759d9..00000000 --- a/icd/api/include/gpu_event_mgr.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - *********************************************************************************************************************** - * - * Copyright (c) 2014-2019 Advanced Micro Devices, Inc. All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - **********************************************************************************************************************/ -/** - ************************************************************************************************** - * @file gpu_event_mgr.h - * @brief Manages internal GPU events while building command buffers - ************************************************************************************************** - */ - -#ifndef __GPU_EVENT_MGR_H__ -#define __GPU_EVENT_MGR_H__ - -#pragma once - -#include "include/khronos/vulkan.h" - -#include "include/internal_mem_mgr.h" - -#include "palIntrusiveList.h" - -// Forward declare PAL classes used in this file -namespace Pal -{ -struct CmdBufferBuildInfo; -class IGpuEvent; -}; - -// Forward declare Vulkan classes used in this file -namespace vk -{ -class CmdBuffer; -class Device; -}; - -namespace vk -{ - -// ===================================================================================================================== -// Class contains Pal::IGpuEvent* objects which are part of a device group -class GpuEvents -{ - -public: - GpuEvents(uint32_t numDeviceEvents, - Pal::IGpuEvent** pPalEvents) : - m_numDeviceEvents(numDeviceEvents) - { - memcpy(m_pEvents, pPalEvents, sizeof(m_pEvents[0]) * numDeviceEvents); - } - - void Destroy(); - - VK_INLINE Pal::IGpuEvent* PalEvent(uint32_t deviceIdx) const - { - VK_ASSERT(deviceIdx < m_numDeviceEvents); - return m_pEvents[deviceIdx]; - } - -private: - uint32_t m_numDeviceEvents; - Pal::IGpuEvent* m_pEvents[MaxPalDevices]; -}; - -// ===================================================================================================================== -// Manages GPU events used internally by command buffers. -class GpuEventMgr -{ -public: - typedef Util::IntrusiveList List; - - GpuEventMgr(Device* pDevice); - ~GpuEventMgr(); - - void BeginCmdBuf(CmdBuffer* pOwner, const Pal::CmdBufferBuildInfo& info); - VkResult RequestEvents(CmdBuffer* pCmdBuf, uint32_t eventCount, GpuEvents*** pppGpuEvents); - void ResetCmdBuf(CmdBuffer* pOwner); - void ResetEvents(); - void Destroy(); - - List::Node* ListNode() { return &m_parentNode; } - -protected: - struct EventChunk - { - EventChunk(); - - InternalMemory gpuMemory; - GpuEvents** ppGpuEvents; - uint32_t eventCount; - uint32_t eventNextFree; - EventChunk* pNextChunk; - }; - - void DestroyChunk(EventChunk* pChunk); - EventChunk* FindFreeExistingChunk(uint32_t eventCount); - VkResult CreateNewChunk(uint32_t eventCount, EventChunk** ppChunk); - EventChunk* CreateChunkState(uint32_t eventCount); - void AllocEventsFromChunk( - CmdBuffer* pCmdBuf, - uint32_t eventCount, - EventChunk* pChunk, - GpuEvents*** ppGpuEvents); - void WaitToRecycleEvents(CmdBuffer* pCmdBuf); - - List::Node m_parentNode; // Intrusive list parent node - EventChunk* m_pFirstChunk; // Linked list of event chunks - bool m_needWaitRecycleEvents; // True if we still need to wait for previous access to events to complete - Device* const m_pDevice; // Device pointer - uint32_t m_totalEventCount; // Total number of GPU event objects created so far -}; - -}; - -#endif /* __GPU_EVENT_MGR_H__ */ diff --git a/icd/api/include/pipeline_binary_cache.h b/icd/api/include/pipeline_binary_cache.h index 554e6ade..65355827 100644 --- a/icd/api/include/pipeline_binary_cache.h +++ b/icd/api/include/pipeline_binary_cache.h @@ -44,6 +44,18 @@ class IPlatformKey; namespace vk { +struct BinaryCacheEntry +{ + Util::MetroHash::Hash hashId; + size_t dataSize; +}; + +constexpr size_t SHA_DIGEST_LENGTH = 20; +struct PipelineBinaryCachePrivateHeader +{ + uint8_t hashId[SHA_DIGEST_LENGTH]; +}; + // Unified pipeline cache interface class PipelineBinaryCache { @@ -58,10 +70,13 @@ class PipelineBinaryCache const Llpc::GfxIpVersion& gfxIp, const PhysicalDevice* pPhysicalDevice); - VkResult Initialize( + static bool IsValidBlob( const PhysicalDevice* pPhysicalDevice, - size_t initDataSize, - const void* pInitData); + size_t dataSize, + const void* pData); + + VkResult Initialize( + const PhysicalDevice* pPhysicalDevice); Util::Result QueryPipelineBinary( const CacheId* pCacheId, @@ -70,13 +85,21 @@ class PipelineBinaryCache Util::Result LoadPipelineBinary( const CacheId* pCacheId, size_t* pPipelineBinarySize, - const void** ppPipelineBinary); + const void** ppPipelineBinary) const; Util::Result StorePipelineBinary( const CacheId* pCacheId, size_t pipelineBinarySize, const void* pPipelineBinary); + VkResult Serialize( + void* pBlob, + size_t* pSize); + + VkResult Merge( + uint32_t srcCacheCount, + const PipelineBinaryCache** ppSrcCaches); + #if ICD_GPUOPEN_DEVMODE_BUILD Util::Result LoadReinjectionBinary( const CacheId* pInternalPipelineHash, @@ -132,8 +155,6 @@ class PipelineBinaryCache VkResult InitLayers( const PhysicalDevice* pPhysicalDevice, - size_t initDataSize, - const void* pInitData, bool internal, const RuntimeSettings& settings); @@ -152,6 +173,7 @@ class PipelineBinaryCache const PhysicalDevice* pPhysicalDevice, const RuntimeSettings& settings); + Util::ICacheLayer* GetMemoryLayer() const { return m_pMemoryLayer; } Util::IArchiveFile* OpenReadOnlyArchive(const char* path, const char* fileName, size_t bufferSize); Util::IArchiveFile* OpenWritableArchive(const char* path, const char* fileName, size_t bufferSize); Util::ICacheLayer* CreateFileLayer(Util::IArchiveFile* pFile); @@ -171,7 +193,9 @@ class PipelineBinaryCache Llpc::GfxIpVersion m_gfxIp; // Compared against e_flags of reinjected elf files Instance* const m_pInstance; // Allocator for use when interacting with the cache - Util::IPlatformKey* m_pPlatformKey; // Platform identifying key + + const Util::IPlatformKey* m_pPlatformKey; // Platform identifying key + Util::ICacheLayer* m_pTopLayer; // Top layer of the cache chain where queries are submitted #if ICD_GPUOPEN_DEVMODE_BUILD diff --git a/icd/api/include/pipeline_compiler.h b/icd/api/include/pipeline_compiler.h index af4b014c..7f49f1f4 100644 --- a/icd/api/include/pipeline_compiler.h +++ b/icd/api/include/pipeline_compiler.h @@ -39,8 +39,6 @@ #include "include/vk_shader_code.h" -#define ICD_BUILD_MULTI_COMPILER 0 - namespace vk { diff --git a/icd/api/include/vk_cmd_pool.h b/icd/api/include/vk_cmd_pool.h index 2c99f529..7010547e 100644 --- a/icd/api/include/vk_cmd_pool.h +++ b/icd/api/include/vk_cmd_pool.h @@ -39,8 +39,6 @@ #include "include/vk_dispatch.h" #include "include/vk_alloccb.h" -#include "include/gpu_event_mgr.h" - #include "palCmdAllocator.h" #include "palHashSet.h" @@ -79,9 +77,6 @@ class CmdPool : public NonDispatchable void UnregisterCmdBuffer(CmdBuffer* pCmdBuffer); - GpuEventMgr* AcquireGpuEventMgr(); - void ReleaseGpuEventMgr(GpuEventMgr* pGpuEventMgr); - VkResult PalCmdAllocatorReset(); VK_INLINE uint32_t GetQueueFamilyIndex() const { return m_queueFamilyIndex; } @@ -93,8 +88,6 @@ class CmdPool : public NonDispatchable uint32_t queueFamilyIndex, bool sharedCmdAllocator); - void DestroyGpuEventMgrs(); - Device* m_pDevice; Pal::ICmdAllocator* m_pPalCmdAllocators[MaxPalDevices]; const uint32_t m_queueFamilyIndex; @@ -102,7 +95,6 @@ class CmdPool : public NonDispatchable Util::HashSet m_cmdBufferRegistry; - Util::IntrusiveList m_freeEventMgrs; uint32_t m_totalEventMgrCount; }; diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h index 05eb0bbe..90b2abc2 100644 --- a/icd/api/include/vk_cmdbuffer.h +++ b/icd/api/include/vk_cmdbuffer.h @@ -43,7 +43,6 @@ #include "include/vk_render_pass.h" #include "include/vk_utils.h" -#include "include/gpu_event_mgr.h" #include "include/internal_mem_mgr.h" #include "include/stencil_ops_combiner.h" #include "include/vert_buf_binding_mgr.h" @@ -643,8 +642,6 @@ class CmdBuffer VK_FORCEINLINE VirtualStackAllocator* GetStackAllocator() { return m_pStackAllocator; } - void RequestRenderPassEvents(uint32_t eventCount, GpuEvents*** pppGpuEvents); - void PalCmdBarrier( const Pal::BarrierInfo& info, uint32_t deviceMask); @@ -995,7 +992,6 @@ class CmdBuffer VkShaderStageFlags m_validShaderStageFlags; Pal::ICmdBuffer* m_pPalCmdBuffers[MaxPalDevices]; VirtualStackAllocator* m_pStackAllocator; - GpuEventMgr* m_pGpuEventMgr; CmdBufferRenderState m_state; // Render state tracked during command buffer building diff --git a/icd/api/include/vk_conv.h b/icd/api/include/vk_conv.h index 130c0499..fe82af40 100644 --- a/icd/api/include/vk_conv.h +++ b/icd/api/include/vk_conv.h @@ -2766,14 +2766,14 @@ VK_INLINE void VkToPalScissorRect( VK_INLINE Pal::QueuePriority VkToPalGlobalPriority( VkQueueGlobalPriorityEXT vkPriority) { - Pal::QueuePriority palPriority = Pal::QueuePriority::Low; + Pal::QueuePriority palPriority = Pal::QueuePriority::Normal; switch (static_cast(vkPriority)) { case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT: - palPriority = Pal::QueuePriority::VeryLow; + palPriority = Pal::QueuePriority::Idle; break; case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT: - palPriority = Pal::QueuePriority::Low; + palPriority = Pal::QueuePriority::Normal; break; case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT: palPriority = Pal::QueuePriority::Medium; @@ -2782,7 +2782,7 @@ VK_INLINE Pal::QueuePriority VkToPalGlobalPriority( palPriority = Pal::QueuePriority::High; break; default: - palPriority = Pal::QueuePriority::Low; + palPriority = Pal::QueuePriority::Normal; break; } diff --git a/icd/api/include/vk_extensions.h b/icd/api/include/vk_extensions.h index 49015c37..984c8573 100644 --- a/icd/api/include/vk_extensions.h +++ b/icd/api/include/vk_extensions.h @@ -295,6 +295,7 @@ class DeviceExtensions : public Extensions EXT_MEMORY_PRIORITY, AMD_DEVICE_COHERENT_MEMORY, EXT_MEMORY_BUDGET, + EXT_POST_DEPTH_COVERAGE, EXT_HOST_QUERY_RESET, EXT_BUFFER_DEVICE_ADDRESS, EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, diff --git a/icd/api/include/vk_physical_device.h b/icd/api/include/vk_physical_device.h index 161393df..58fe1010 100644 --- a/icd/api/include/vk_physical_device.h +++ b/icd/api/include/vk_physical_device.h @@ -58,6 +58,11 @@ class IDevice; } // namespace Pal +namespace Util +{ +class IPlatformKey; +} // namespace Util + namespace vk { @@ -192,6 +197,18 @@ class PhysicalDevice return m_queueFamilies[queueFamilyIndex].palEngineType; } + VK_INLINE uint32_t GetCompQueueEngineIndex( + const uint32_t queueIndex) const + { + return m_compQueueEnginesNdx[queueIndex]; + } + + VK_INLINE uint32_t GetUniversalQueueEngineIndex( + const uint32_t queueIndex) const + { + return m_universalQueueEnginesNdx[queueIndex]; + } + VK_INLINE uint32_t GetQueueFamilyPalImageLayoutFlag( uint32_t queueFamilyIndex) const { @@ -637,6 +654,8 @@ class PhysicalDevice VK_INLINE bool ShouldAddRemoteBackupHeap(uint32_t vkIndex) const { return m_memoryVkIndexAddRemoteBackupHeap[vkIndex]; } + Util::IPlatformKey* GetPlatformKey() const { return m_pPlatformKey; } + protected: PhysicalDevice(PhysicalDeviceManager* pPhysicalDeviceManager, Pal::IDevice* pPalDevice, @@ -649,6 +668,8 @@ class PhysicalDevice void PopulateExtensions(); void PopulateGpaProperties(); + void InitializePlatformKey(const RuntimeSettings& settings); + VK_FORCEINLINE bool IsPerChannelMinMaxFilteringSupported() const { return m_properties.gfxipProperties.flags.supportPerChannelMinMaxFilter; @@ -686,6 +707,12 @@ class PhysicalDevice VkQueueFamilyProperties properties; } m_queueFamilies[Queue::MaxQueueFamilies]; + // List of indices for compute engines that aren't exclusive. + uint32_t m_compQueueEnginesNdx[Queue::MaxQueuesPerFamily]; + + // List of indices for universal engines that aren't exclusive. + uint32_t m_universalQueueEnginesNdx[Queue::MaxQueuesPerFamily]; + const AppProfile m_appProfile; bool m_prtOnDmaSupported; @@ -705,6 +732,8 @@ class PhysicalDevice } m_memoryUsageTracker; uint8_t m_pipelineCacheUUID[VK_UUID_SIZE]; + + Util::IPlatformKey* m_pPlatformKey; // Platform identifying key }; VK_DEFINE_DISPATCHABLE(PhysicalDevice); diff --git a/icd/api/include/vk_pipeline_cache.h b/icd/api/include/vk_pipeline_cache.h index 50d6f210..74910630 100644 --- a/icd/api/include/vk_pipeline_cache.h +++ b/icd/api/include/vk_pipeline_cache.h @@ -77,7 +77,7 @@ class PipelineCache : public NonDispatchable VkResult Merge(uint32_t srcCacheCount, const PipelineCache** ppSrcCaches); - VK_INLINE PipelineBinaryCache* GetPipelineCache() { return m_pBinaryCache; } + VK_INLINE PipelineBinaryCache* GetPipelineCache() const { return m_pBinaryCache; } protected: PipelineCache(const Device* pDevice, ShaderCache* pShaderCaches, diff --git a/icd/api/include/vk_render_pass.h b/icd/api/include/vk_render_pass.h index 1a24cf24..f92238b4 100644 --- a/icd/api/include/vk_render_pass.h +++ b/icd/api/include/vk_render_pass.h @@ -48,7 +48,6 @@ namespace vk class Device; class CmdBuffer; class Framebuffer; -class GpuEvents; class RenderPassCmdList; struct RenderPassExtCreateInfo diff --git a/icd/api/pipeline_binary_cache.cpp b/icd/api/pipeline_binary_cache.cpp index af5740d9..e20e7be3 100644 --- a/icd/api/pipeline_binary_cache.cpp +++ b/icd/api/pipeline_binary_cache.cpp @@ -66,6 +66,76 @@ const uint32_t PipelineBinaryCache::ElfType = Util::HashString(ElfTypeString static Util::Hash128 ParseHash128(const char* str); #endif +static Util::Result CalculateHashId( + Instance* pInstance, + const Util::IPlatformKey* pPlatformKey, + const void* pData, + size_t dataSize, + uint8_t* pHashId) +{ + Util::Result result = Util::Result::Success; + Util::IHashContext* pContext = nullptr; + size_t contextSize = pPlatformKey->GetKeyContext()->GetDuplicateObjectSize(); + void* pContextMem = pInstance->AllocMem( + contextSize, + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (pContextMem != nullptr) + { + result = pPlatformKey->GetKeyContext()->Duplicate(pContextMem, &pContext); + } + if (result == Util::Result::Success) + { + result = pContext->AddData(pData, dataSize); + } + if (result == Util::Result::Success) + { + result = pContext->Finish(pHashId); + } + if (pContext != nullptr) + { + pContext->Destroy(); + } + if (pContextMem != nullptr) + { + pInstance->FreeMem(pContextMem); + } + + return result; +} + +bool PipelineBinaryCache::IsValidBlob( + const PhysicalDevice* pPhysicalDevice, + size_t dataSize, + const void* pData) +{ + bool isValid = false; + size_t blobSize = dataSize; + auto pBinaryPrivateHeader = static_cast(pData); + uint8_t hashId[SHA_DIGEST_LENGTH]; + + pData = Util::VoidPtrInc(pData, sizeof(PipelineBinaryCachePrivateHeader)); + blobSize -= sizeof(PipelineBinaryCachePrivateHeader); + + if (pPhysicalDevice->GetPlatformKey() != nullptr) + { + Util::Result result = CalculateHashId( + pPhysicalDevice->Manager()->VkInstance(), + pPhysicalDevice->GetPlatformKey(), + pData, + blobSize, + hashId); + + if (result == Util::Result::Success) + { + isValid = (memcmp(hashId, pBinaryPrivateHeader->hashId, SHA_DIGEST_LENGTH) == 0); + } + } + + return isValid; +} + // ===================================================================================================================== // Allocate and initialize a PipelineBinaryCache object PipelineBinaryCache* PipelineBinaryCache::Create( @@ -82,12 +152,44 @@ PipelineBinaryCache* PipelineBinaryCache::Create( { pObj = VK_PLACEMENT_NEW(pMem) PipelineBinaryCache(pInstance, gfxIp, internal); - if (pObj->Initialize(pPhysicalDevice, initDataSize, pInitData) != VK_SUCCESS) + if (pObj->Initialize(pPhysicalDevice) != VK_SUCCESS) { pObj->Destroy(); pInstance->FreeMem(pMem); pObj = nullptr; } + else if ((pInitData != nullptr) && + (initDataSize > (sizeof(BinaryCacheEntry) + sizeof(PipelineBinaryCachePrivateHeader)))) + { + const void* pBlob = pInitData; + size_t blobSize = initDataSize; + constexpr size_t EntrySize = sizeof(BinaryCacheEntry); + + pBlob = Util::VoidPtrInc(pBlob, sizeof(PipelineBinaryCachePrivateHeader)); + blobSize -= sizeof(PipelineBinaryCachePrivateHeader); + while (blobSize > EntrySize) + { + const BinaryCacheEntry* pEntry = static_cast(pBlob); + const void* pData = Util::VoidPtrInc(pBlob, sizeof(BinaryCacheEntry)); + const size_t entryAndDataSize = pEntry->dataSize + sizeof(BinaryCacheEntry); + + if (blobSize >= entryAndDataSize) + { + //add to cache + Util::Result result = pObj->StorePipelineBinary(&pEntry->hashId, pEntry->dataSize, pData); + if (result != Util::Result::Success) + { + break; + } + pBlob = Util::VoidPtrInc(pBlob, entryAndDataSize); + blobSize -= entryAndDataSize; + } + else + { + break; + } + } + } } return pObj; } @@ -109,22 +211,17 @@ PipelineBinaryCache::PipelineBinaryCache( m_pArchiveLayer { nullptr }, m_openFiles { pInstance->Allocator() }, m_archiveLayers { pInstance->Allocator() }, - m_isInternalCache { internal } + m_isInternalCache { internal } { // Without copy constructor, a class type variable can't be initialized in initialization list with gcc 4.8.5. // Initialize m_gfxIp here instead to make gcc 4.8.5 work. m_gfxIp = gfxIp; + } // ===================================================================================================================== PipelineBinaryCache::~PipelineBinaryCache() { - if (m_pPlatformKey != nullptr) - { - m_pPlatformKey->Destroy(); - m_pInstance->FreeMem(m_pPlatformKey); - } - for (FileVector::Iter i = m_openFiles.Begin(); i.IsValid(); i.Next()) { i.Get()->Destroy(); @@ -171,7 +268,7 @@ Util::Result PipelineBinaryCache::QueryPipelineBinary( Util::Result PipelineBinaryCache::LoadPipelineBinary( const CacheId* pCacheId, size_t* pPipelineBinarySize, - const void** ppPipelineBinary) + const void** ppPipelineBinary) const { VK_ASSERT(m_pTopLayer != nullptr); @@ -347,9 +444,7 @@ void PipelineBinaryCache::FreePipelineBinary( // ===================================================================================================================== // Build the cache layer chain VkResult PipelineBinaryCache::Initialize( - const PhysicalDevice* pPhysicalDevice, - size_t initDataSize, - const void* pInitData) + const PhysicalDevice* pPhysicalDevice) { VkResult result = VK_SUCCESS; @@ -357,12 +452,17 @@ VkResult PipelineBinaryCache::Initialize( if (result == VK_SUCCESS) { - result = InitializePlatformKey(pPhysicalDevice, settings); + m_pPlatformKey = pPhysicalDevice->GetPlatformKey(); + } + + if (m_pPlatformKey == nullptr) + { + result = VK_ERROR_INITIALIZATION_FAILED; } if (result == VK_SUCCESS) { - result = InitLayers(pPhysicalDevice, initDataSize, pInitData, m_isInternalCache, settings); + result = InitLayers(pPhysicalDevice, m_isInternalCache, settings); } if (result == VK_SUCCESS) @@ -402,52 +502,6 @@ VkResult PipelineBinaryCache::Initialize( return result; } -// ===================================================================================================================== -// Generate our platform key -VkResult PipelineBinaryCache::InitializePlatformKey( - const PhysicalDevice* pPhysicalDevice, - const RuntimeSettings& settings) -{ - static constexpr Util::HashAlgorithm KeyAlgorithm = Util::HashAlgorithm::Sha1; - - struct - { - VkPhysicalDeviceProperties properties; - char* timestamp[sizeof(__TIMESTAMP__)]; - } initialData; - - memset(&initialData, 0, sizeof(initialData)); - - VkResult result = pPhysicalDevice->GetDeviceProperties(&initialData.properties); - - if (result == VK_SUCCESS) - { - size_t memSize = Util::GetPlatformKeySize(KeyAlgorithm); - void* pMem = m_pInstance->AllocMem(memSize, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - - if (pMem == nullptr) - { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - } - else - { - if (settings.markPipelineCacheWithBuildTimestamp) - { - memcpy(initialData.timestamp, __TIMESTAMP__, sizeof(__TIMESTAMP__)); - } - - if (Util::CreatePlatformKey(KeyAlgorithm, &initialData, sizeof(initialData), pMem, &m_pPlatformKey) != - Util::Result::Success) - { - m_pInstance->FreeMem(pMem); - result = VK_ERROR_INITIALIZATION_FAILED; - } - } - } - - return result; -} - #if ICD_GPUOPEN_DEVMODE_BUILD // ===================================================================================================================== // Initialize reinjection cache layer @@ -1033,8 +1087,6 @@ VkResult PipelineBinaryCache::InitArchiveLayers( // Initialize layers (a single layer that supports storage for binaries needs to succeed) VkResult PipelineBinaryCache::InitLayers( const PhysicalDevice* pPhysicalDevice, - size_t initDataSize, - const void* pInitData, bool internal, const RuntimeSettings& settings) { @@ -1122,4 +1174,149 @@ VkResult PipelineBinaryCache::OrderLayers( return result; } +// ===================================================================================================================== +// Copies the pipeline cache data to the memory blob provided by the calling function. +// +// NOTE: It is expected that the calling function has not used this pipeline cache since querying the size +VkResult PipelineBinaryCache::Serialize( + void* pBlob, // [out] System memory pointer where the serialized data should be placed + size_t* pSize) // [in,out] Size of the memory pointed to by pBlob. If the value stored in pSize is zero then no + // data will be copied and instead the size required for serialization will be returned in pSize +{ + VkResult result = VK_ERROR_INITIALIZATION_FAILED; + +#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 534 + if (m_pMemoryLayer != nullptr) + { + if (*pSize == 0) + { + size_t curCount, curDataSize; + + result = PalToVkResult(Util::GetMemoryCacheLayerCurSize(m_pMemoryLayer, &curCount, &curDataSize)); + if (result == VK_SUCCESS) + { + *pSize = curCount * sizeof(BinaryCacheEntry) + curDataSize + sizeof(PipelineBinaryCachePrivateHeader); + } + } + else + { + size_t curCount, curDataSize; + + result = PalToVkResult(Util::GetMemoryCacheLayerCurSize(m_pMemoryLayer, &curCount, &curDataSize)); + if (result == VK_SUCCESS) + { + if (*pSize > (sizeof(BinaryCacheEntry) + sizeof(PipelineBinaryCachePrivateHeader))) + { + Util::AutoBuffer cacheIds(curCount, m_pInstance->Allocator()); + size_t remainingSpace = *pSize - sizeof(PipelineBinaryCachePrivateHeader); + + result = PalToVkResult(Util::GetMemoryCacheLayerHashIds(m_pMemoryLayer, curCount, &cacheIds[0])); + if (result == VK_SUCCESS) + { + void* pDataDst = pBlob; + + // reserved for privateHeader + pDataDst = Util::VoidPtrInc(pDataDst, sizeof(PipelineBinaryCachePrivateHeader)); + + for (uint32_t i = 0; i < curCount && remainingSpace > sizeof(BinaryCacheEntry); i++) + { + size_t dataSize; + const void* pBinaryCacheData; + + result = PalToVkResult(LoadPipelineBinary(&cacheIds[i], &dataSize, &pBinaryCacheData)); + if (result == VK_SUCCESS) + { + if (remainingSpace >= (sizeof(BinaryCacheEntry) + dataSize)) + { + BinaryCacheEntry* pEntry = static_cast(pDataDst); + + pEntry->hashId = cacheIds[i]; + pEntry->dataSize = dataSize; + + pDataDst = Util::VoidPtrInc(pDataDst, sizeof(BinaryCacheEntry)); + memcpy(pDataDst, pBinaryCacheData, dataSize); + pDataDst = Util::VoidPtrInc(pDataDst, dataSize); + remainingSpace -= (sizeof(BinaryCacheEntry) + dataSize); + } + m_pInstance->FreeMem(const_cast(pBinaryCacheData)); + } + } + } + if (*pSize < (sizeof(BinaryCacheEntry) * curCount + curDataSize + sizeof(PipelineBinaryCachePrivateHeader))) + { + result = VK_INCOMPLETE; + } + *pSize -= remainingSpace; + + auto pBinaryPrivateHeader = static_cast(pBlob); + void* pData = Util::VoidPtrInc(pBlob, sizeof(PipelineBinaryCachePrivateHeader)); + + result = PalToVkResult(CalculateHashId( + m_pInstance, + m_pPlatformKey, + pData, + *pSize - sizeof(PipelineBinaryCachePrivateHeader), + pBinaryPrivateHeader->hashId)); + } + else + { + result = VK_ERROR_INITIALIZATION_FAILED; + } + } + } + } +#endif + return result; +} + +// ===================================================================================================================== +// Merge the pipeline cache data into one +// +VkResult PipelineBinaryCache::Merge( + uint32_t srcCacheCount, + const PipelineBinaryCache** ppSrcCaches) +{ + VkResult result = VK_ERROR_INITIALIZATION_FAILED; + +#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 534 + if (m_pMemoryLayer != nullptr) + { + for (uint32_t i = 0; i < srcCacheCount; i++) + { + Util::ICacheLayer* pMemoryLayer = ppSrcCaches[i]->GetMemoryLayer(); + size_t curCount, curDataSize; + + result = PalToVkResult(Util::GetMemoryCacheLayerCurSize(pMemoryLayer, &curCount, &curDataSize)); + if ((result == VK_SUCCESS) && (curCount > 0)) + { + Util::AutoBuffer cacheIds(curCount, m_pInstance->Allocator()); + + result = PalToVkResult(Util::GetMemoryCacheLayerHashIds(pMemoryLayer, curCount, &cacheIds[0])); + if (result == VK_SUCCESS) + { + for (uint32_t j = 0; j < curCount; j++) + { + size_t dataSize; + const void* pBinaryCacheData; + + result = PalToVkResult(ppSrcCaches[i]->LoadPipelineBinary(&cacheIds[j], &dataSize, &pBinaryCacheData)); + if (result == VK_SUCCESS) + { + result = PalToVkResult(StorePipelineBinary(&cacheIds[j], dataSize, pBinaryCacheData)); + m_pInstance->FreeMem(const_cast(pBinaryCacheData)); + if (result != VK_SUCCESS) + { + break; + } + } + } + } + } + } + } +#endif + + return result; +} + } // namespace vk diff --git a/icd/api/pipeline_compiler.cpp b/icd/api/pipeline_compiler.cpp index 40b0c206..62316c24 100644 --- a/icd/api/pipeline_compiler.cpp +++ b/icd/api/pipeline_compiler.cpp @@ -151,7 +151,7 @@ VkResult PipelineCompiler::Initialize() if (result == VK_SUCCESS) { - result = m_compilerSolutionLlpc.Initialize(); + result = m_compilerSolutionLlpc.Initialize(m_gfxIp, info.gfxLevel); } if ((result == VK_SUCCESS) && diff --git a/icd/api/renderpass/renderpass_logger.cpp b/icd/api/renderpass/renderpass_logger.cpp index e4c451ca..98683a42 100644 --- a/icd/api/renderpass/renderpass_logger.cpp +++ b/icd/api/renderpass/renderpass_logger.cpp @@ -220,7 +220,7 @@ void RenderPassLogger::LogAttachmentReference( const AttachmentReference& reference) { LogAttachment(reference.attachment); - Log(" in %s, %s", ImageLayoutString(reference.layout, false), ImageLayoutString(reference.stencilLayout, false)); + Log(" in %s", ImageLayoutString(reference.layout, false)); Log(" aspectMask "); LogImageAspectMask(reference.aspectMask, false); } @@ -232,8 +232,6 @@ void RenderPassLogger::LogAttachmentReference( LogAttachment(reference.attachment); Log(" in "); LogImageLayout(reference.layout); - Log(", "); - LogImageLayout(reference.stencilLayout); } // ===================================================================================================================== diff --git a/icd/api/sqtt/sqtt_rgp_annotations.h b/icd/api/sqtt/sqtt_rgp_annotations.h index 7392b557..e5b2e478 100644 --- a/icd/api/sqtt/sqtt_rgp_annotations.h +++ b/icd/api/sqtt/sqtt_rgp_annotations.h @@ -176,35 +176,35 @@ constexpr uint32_t RgpSqttMarkerCbEndWordCount = 3; // existing values can't be changed. enum class RgpSqttMarkerEventType : uint32_t { - CmdDraw = 0, // vkCmdDraw - CmdDrawIndexed = 1, // vkCmdDrawIndexed - CmdDrawIndirect = 2, // vkCmdDrawIndirect - CmdDrawIndexedIndirect = 3, // vkCmdDrawIndexedIndirect - CmdDrawIndirectCountAMD = 4, // vkCmdDrawIndirectCountAMD - CmdDrawIndexedIndirectCountAMD = 5, // vkCmdDrawIndexedIndirectCountAMD - CmdDispatch = 6, // vkCmdDispatch - CmdDispatchIndirect = 7, // vkCmdDispatchIndirect - CmdCopyBuffer = 8, // vkCmdCopyBuffer - CmdCopyImage = 9, // vkCmdCopyImage - CmdBlitImage = 10, // vkCmdBlitImage - CmdCopyBufferToImage = 11, // vkCmdCopyBufferToImage - CmdCopyImageToBuffer = 12, // vkCmdCopyImageToBuffer - CmdUpdateBuffer = 13, // vkCmdUpdateBuffer - CmdFillBuffer = 14, // vkCmdFillBuffer - CmdClearColorImage = 15, // vkCmdClearColorImage - CmdClearDepthStencilImage = 16, // vkCmdClearDepthStencilImage - CmdClearAttachments = 17, // vkCmdClearAttachments - CmdResolveImage = 18, // vkCmdResolveImage - CmdWaitEvents = 19, // vkCmdWaitEvents - CmdPipelineBarrier = 20, // vkCmdPipelineBarrier - CmdResetQueryPool = 21, // vkCmdResetQueryPool - CmdCopyQueryPoolResults = 22, // vkCmdCopyQueryPoolResults - RenderPassColorClear = 23, // Render pass: Color clear triggered by attachment load op - RenderPassDepthStencilClear = 24, // Render pass: Depth-stencil clear triggered by attachment load op - RenderPassResolve = 25, // Render pass: Color multisample resolve triggered by resolve attachment - InternalUnknown = 26, // Draw or dispatch by PAL due to a reason we do not know - CmdDrawIndirectCountKHR = 27, // vkCmdDrawIndirectCountKHR - CmdDrawIndexedIndirectCountKHR = 28, // vkCmdDrawIndexedIndirectCountKHR + CmdDraw = 0, // vkCmdDraw + CmdDrawIndexed = 1, // vkCmdDrawIndexed + CmdDrawIndirect = 2, // vkCmdDrawIndirect + CmdDrawIndexedIndirect = 3, // vkCmdDrawIndexedIndirect + CmdDrawIndirectCountAMD = 4, // vkCmdDrawIndirectCountAMD + CmdDrawIndexedIndirectCountAMD = 5, // vkCmdDrawIndexedIndirectCountAMD + CmdDispatch = 6, // vkCmdDispatch + CmdDispatchIndirect = 7, // vkCmdDispatchIndirect + CmdCopyBuffer = 8, // vkCmdCopyBuffer + CmdCopyImage = 9, // vkCmdCopyImage + CmdBlitImage = 10, // vkCmdBlitImage + CmdCopyBufferToImage = 11, // vkCmdCopyBufferToImage + CmdCopyImageToBuffer = 12, // vkCmdCopyImageToBuffer + CmdUpdateBuffer = 13, // vkCmdUpdateBuffer + CmdFillBuffer = 14, // vkCmdFillBuffer + CmdClearColorImage = 15, // vkCmdClearColorImage + CmdClearDepthStencilImage = 16, // vkCmdClearDepthStencilImage + CmdClearAttachments = 17, // vkCmdClearAttachments + CmdResolveImage = 18, // vkCmdResolveImage + CmdWaitEvents = 19, // vkCmdWaitEvents + CmdPipelineBarrier = 20, // vkCmdPipelineBarrier + CmdResetQueryPool = 21, // vkCmdResetQueryPool + CmdCopyQueryPoolResults = 22, // vkCmdCopyQueryPoolResults + RenderPassColorClear = 23, // Render pass: Color clear triggered by attachment load op + RenderPassDepthStencilClear = 24, // Render pass: Depth-stencil clear triggered by attachment load op + RenderPassResolve = 25, // Render pass: Color multisample resolve triggered by resolve attachment + InternalUnknown = 26, // Draw or dispatch by PAL due to a reason we do not know + CmdDrawIndirectCountKHR = 27, // vkCmdDrawIndirectCountKHR + CmdDrawIndexedIndirectCountKHR = 28, // vkCmdDrawIndexedIndirectCountKHR Invalid = 0xffffffff }; @@ -431,52 +431,52 @@ struct RgpSqttMarkerUserEventWithString enum class RgpSqttMarkerGeneralApiType : uint32_t { // Interesting subset of core Vulkan 1.0: - CmdBindPipeline = 0, - CmdBindDescriptorSets = 1, - CmdBindIndexBuffer = 2, - CmdBindVertexBuffers = 3, - CmdDraw = 4, - CmdDrawIndexed = 5, - CmdDrawIndirect = 6, - CmdDrawIndexedIndirect = 7, - CmdDrawIndirectCountAMD = 8, - CmdDrawIndexedIndirectCountAMD = 9, - CmdDispatch = 10, - CmdDispatchIndirect = 11, - CmdCopyBuffer = 12, - CmdCopyImage = 13, - CmdBlitImage = 14, - CmdCopyBufferToImage = 15, - CmdCopyImageToBuffer = 16, - CmdUpdateBuffer = 17, - CmdFillBuffer = 18, - CmdClearColorImage = 19, - CmdClearDepthStencilImage = 20, - CmdClearAttachments = 21, - CmdResolveImage = 22, - CmdWaitEvents = 23, - CmdPipelineBarrier = 24, - CmdBeginQuery = 25, - CmdEndQuery = 26, - CmdResetQueryPool = 27, - CmdWriteTimestamp = 28, - CmdCopyQueryPoolResults = 29, - CmdPushConstants = 30, - CmdBeginRenderPass = 31, - CmdNextSubpass = 32, - CmdEndRenderPass = 33, - CmdExecuteCommands = 34, - CmdSetViewport = 35, - CmdSetScissor = 36, - CmdSetLineWidth = 37, - CmdSetDepthBias = 38, - CmdSetBlendConstants = 39, - CmdSetDepthBounds = 40, - CmdSetStencilCompareMask = 41, - CmdSetStencilWriteMask = 42, - CmdSetStencilReference = 43, - CmdDrawIndirectCountKHR = 44, - CmdDrawIndexedIndirectCountKHR = 45, + CmdBindPipeline = 0, + CmdBindDescriptorSets = 1, + CmdBindIndexBuffer = 2, + CmdBindVertexBuffers = 3, + CmdDraw = 4, + CmdDrawIndexed = 5, + CmdDrawIndirect = 6, + CmdDrawIndexedIndirect = 7, + CmdDrawIndirectCountAMD = 8, + CmdDrawIndexedIndirectCountAMD = 9, + CmdDispatch = 10, + CmdDispatchIndirect = 11, + CmdCopyBuffer = 12, + CmdCopyImage = 13, + CmdBlitImage = 14, + CmdCopyBufferToImage = 15, + CmdCopyImageToBuffer = 16, + CmdUpdateBuffer = 17, + CmdFillBuffer = 18, + CmdClearColorImage = 19, + CmdClearDepthStencilImage = 20, + CmdClearAttachments = 21, + CmdResolveImage = 22, + CmdWaitEvents = 23, + CmdPipelineBarrier = 24, + CmdBeginQuery = 25, + CmdEndQuery = 26, + CmdResetQueryPool = 27, + CmdWriteTimestamp = 28, + CmdCopyQueryPoolResults = 29, + CmdPushConstants = 30, + CmdBeginRenderPass = 31, + CmdNextSubpass = 32, + CmdEndRenderPass = 33, + CmdExecuteCommands = 34, + CmdSetViewport = 35, + CmdSetScissor = 36, + CmdSetLineWidth = 37, + CmdSetDepthBias = 38, + CmdSetBlendConstants = 39, + CmdSetDepthBounds = 40, + CmdSetStencilCompareMask = 41, + CmdSetStencilWriteMask = 42, + CmdSetStencilReference = 43, + CmdDrawIndirectCountKHR = 44, + CmdDrawIndexedIndirectCountKHR = 45, Invalid = 0xffffffff }; diff --git a/icd/api/strings/base_extensions.txt b/icd/api/strings/base_extensions.txt index 6e22799b..a84c7d47 100644 --- a/icd/api/strings/base_extensions.txt +++ b/icd/api/strings/base_extensions.txt @@ -109,3 +109,4 @@ VK_EXT_subgroup_size_control VK_EXT_calibrated_timestamps VK_KHR_pipeline_executable_properties VK_EXT_line_rasterization +VK_EXT_post_depth_coverage diff --git a/icd/api/vk_cmd_pool.cpp b/icd/api/vk_cmd_pool.cpp index c100efbc..b153ab2b 100644 --- a/icd/api/vk_cmd_pool.cpp +++ b/icd/api/vk_cmd_pool.cpp @@ -218,8 +218,6 @@ VkResult CmdPool::Destroy( pAllocator->pfnFree(pAllocator->pUserData, m_pPalCmdAllocators[DefaultDeviceIndex]); } - DestroyGpuEventMgrs(); - Util::Destructor(this); pAllocator->pfnFree(pAllocator->pUserData, this); @@ -227,26 +225,6 @@ VkResult CmdPool::Destroy( return VK_SUCCESS; } -// ===================================================================================================================== -void CmdPool::DestroyGpuEventMgrs() -{ - while (m_freeEventMgrs.IsEmpty() == false) - { - VK_ASSERT(m_totalEventMgrCount > 0); - - m_totalEventMgrCount--; - - GpuEventMgr::List::Iter it = m_freeEventMgrs.Begin(); - GpuEventMgr* pEventMgr = it.Get(); - m_freeEventMgrs.Erase(&it); - - pEventMgr->Destroy(); - m_pDevice->VkInstance()->FreeMem(pEventMgr); - } - - VK_ASSERT(m_totalEventMgrCount == 0); -} - // ===================================================================================================================== VkResult CmdPool::PalCmdAllocatorReset() { @@ -308,48 +286,6 @@ void CmdPool::UnregisterCmdBuffer(CmdBuffer* pCmdBuffer) m_cmdBufferRegistry.Erase(pCmdBuffer); } -// ===================================================================================================================== -GpuEventMgr* CmdPool::AcquireGpuEventMgr() -{ - GpuEventMgr* pEventMgr = nullptr; - - if (!m_freeEventMgrs.IsEmpty()) - { - GpuEventMgr::List::Iter it = m_freeEventMgrs.Begin(); - - pEventMgr = it.Get(); - - m_freeEventMgrs.Erase(&it); - } - - if (pEventMgr == nullptr) - { - void* pMemory = m_pDevice->VkInstance()->AllocMem( - sizeof(GpuEventMgr), - VK_DEFAULT_MEM_ALIGN, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - - if (pMemory != nullptr) - { - pEventMgr = VK_PLACEMENT_NEW(pMemory) GpuEventMgr(m_pDevice); - - m_totalEventMgrCount++; - } - } - - return pEventMgr; -} - -// ===================================================================================================================== -void CmdPool::ReleaseGpuEventMgr(GpuEventMgr* pGpuEventMgr) -{ - VK_ASSERT(pGpuEventMgr->ListNode()->InList() == false); - - pGpuEventMgr->ResetEvents(); - - m_freeEventMgrs.PushBack(pGpuEventMgr->ListNode()); -} - /** *********************************************************************************************************************** * C-Callable entry points start here. These entries go in the dispatch table(s). diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp index 40601688..b8427177 100644 --- a/icd/api/vk_cmdbuffer.cpp +++ b/icd/api/vk_cmdbuffer.cpp @@ -352,7 +352,6 @@ CmdBuffer::CmdBuffer( m_cbBeginDeviceMask(0), m_validShaderStageFlags(pDevice->VkPhysicalDevice(DefaultDeviceIndex)->GetValidShaderStages(queueFamilyIndex)), m_pStackAllocator(nullptr), - m_pGpuEventMgr(nullptr), m_vbMgr(pDevice), m_is2ndLvl(false), m_isRecording(false), @@ -1088,24 +1087,7 @@ VkResult CmdBuffer::Begin( } } - // Get a GPU event manager if we don't already have one - if (m_pGpuEventMgr == nullptr) - { - m_pGpuEventMgr = m_pCmdPool->AcquireGpuEventMgr(); - - if (m_pGpuEventMgr == nullptr) - { - result = Pal::Result::ErrorOutOfMemory; - } - } - - // Notify the GPU event manager we're starting a new command buffer - if (m_pGpuEventMgr != nullptr) - { - m_pGpuEventMgr->BeginCmdBuf(this, cmdInfo); - - m_isRecording = true; - } + m_isRecording = true; if (m_is2ndLvl && pRenderPass) // secondary VkCommandBuffer will be used inside VkRenderPass { @@ -1225,11 +1207,6 @@ void CmdBuffer::ResetState() m_curDeviceMask = InvalidPalDeviceMask; - if (m_pGpuEventMgr != nullptr) - { - m_pGpuEventMgr->ResetCmdBuf(this); - } - m_renderPassInstance.pExecuteInfo = nullptr; m_renderPassInstance.subpass = VK_SUBPASS_EXTERNAL; m_renderPassInstance.flags.u32All = 0; @@ -1569,13 +1546,6 @@ void CmdBuffer::ReleaseResources() m_renderPassInstance.maxSubpassCount = 0; } - // Release the GPU event manager back to the command pool - if (m_pGpuEventMgr != nullptr) - { - m_pCmdPool->ReleaseGpuEventMgr(m_pGpuEventMgr); - m_pGpuEventMgr = nullptr; - } - if (m_pStackAllocator != nullptr) { pInstance->StackMgr()->ReleaseAllocator(m_pStackAllocator); @@ -2704,12 +2674,6 @@ void CmdBuffer::PalCmdResetEvent( } } -// ===================================================================================================================== -// Instantiate the template function -template void CmdBuffer::PalCmdResetEvent( - GpuEvents* pEvent, - Pal::HwPipePoint resetPoint); - // ===================================================================================================================== template void CmdBuffer::PalCmdSetEvent( @@ -2725,12 +2689,6 @@ void CmdBuffer::PalCmdSetEvent( } } -// ===================================================================================================================== -// Instantiate the template function -template void CmdBuffer::PalCmdSetEvent( - GpuEvents* pEvent, - Pal::HwPipePoint resetPoint); - // ===================================================================================================================== template void CmdBuffer::PalCmdResolveImage( @@ -5035,28 +4993,6 @@ void CmdBuffer::PushConstants( DbgBarrierPostCmd(DbgBarrierBindSetsPushConstants); } -// ===================================================================================================================== -void CmdBuffer::RequestRenderPassEvents( - uint32_t eventCount, - GpuEvents*** pppGpuEvents) -{ - VK_ASSERT(m_pGpuEventMgr != nullptr); - - // This function may fail if we've run out of system/video memory. There is no way to return "out of memory" - // during command buffer building -- the function is just expected to succeed. Under these extreme conditions, - // the render pass logic will fall back to using a hard pipeline barrier between every node. - VkResult result = m_pGpuEventMgr->RequestEvents(this, eventCount, pppGpuEvents); - - if (result != VK_SUCCESS) - { - // This situation should be so rare that it's worth asserting here. If we actually ever hit this condition, - // we are probably leaking GPU memory somewhere. - VK_ALERT("Failed to create GPU events for render passes."); - - *pppGpuEvents = nullptr; - } -} - // ===================================================================================================================== void CmdBuffer::SetViewport( uint32_t firstViewport, diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index b66c573e..bfadb4d5 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -294,22 +294,37 @@ static void ConstructQueueCreateInfo( { VK_ASSERT(queuePriority == VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT); - pQueueCreateInfo->engineType = Pal::EngineType::EngineTypeExclusiveCompute; + pQueueCreateInfo->engineType = Pal::EngineType::EngineTypeCompute; pQueueCreateInfo->engineIndex = rtCuHighComputeSubEngineIndex; pQueueCreateInfo->numReservedCu = dedicatedComputeUnits; } - else if ((palQueuePriority > Pal::QueuePriority::Low) && - (palQueueType == Pal::QueueType::QueueTypeCompute) && - (vrHighPriorityIndex != UINT32_MAX)) + else if (palQueueType == Pal::QueueType::QueueTypeCompute) { - pQueueCreateInfo->engineType = Pal::EngineType::EngineTypeExclusiveCompute; - pQueueCreateInfo->engineIndex = vrHighPriorityIndex; + pQueueCreateInfo->engineType = Pal::EngineType::EngineTypeCompute; + + if ((palQueuePriority > Pal::QueuePriority::Idle) && + (vrHighPriorityIndex != UINT32_MAX)) + { + pQueueCreateInfo->engineIndex = vrHighPriorityIndex; + } + else + { + pQueueCreateInfo->engineIndex = pPhysicalDevices[deviceIdx]->GetCompQueueEngineIndex(queueIndex); + } } else { pQueueCreateInfo->engineType = pPhysicalDevices[deviceIdx]->GetQueueFamilyPalEngineType(queueFamilyIndex); - pQueueCreateInfo->engineIndex = queueIndex; + + if (palQueueType == Pal::QueueType::QueueTypeUniversal) + { + pQueueCreateInfo->engineIndex = pPhysicalDevices[deviceIdx]->GetUniversalQueueEngineIndex(queueIndex); + } + else + { + pQueueCreateInfo->engineIndex = queueIndex; + } } pQueueCreateInfo->queueType = palQueueType; @@ -638,6 +653,17 @@ VkResult Device::Create( reinterpret_cast(pHeader); overallocationBehavior = pMemoryOverallocationCreateInfo->overallocationBehavior; + + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: + { + vkResult = VerifyRequestedPhysicalDeviceFeatures( + pPhysicalDevice, + reinterpret_cast(pHeader)); + + break; } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: @@ -1146,7 +1172,7 @@ VkResult Device::Initialize( } case AppProfile::WolfensteinII: // This application optimization layer is currently GFX10-specific - if (deviceProps.gfxLevel > Pal::GfxIpLevel::GfxIp9) + if (deviceProps.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1) { void* pMemory = VkInstance()->AllocMem(sizeof(Wolfenstein2Layer), VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); @@ -1820,15 +1846,19 @@ void Device::DestroyInternalPipelines() // Wait for device idle. Punts to PAL device. VkResult Device::WaitIdle(void) { - for (uint32_t i = 0; i < Queue::MaxQueueFamilies; ++i) + VkResult result = VK_SUCCESS; + + for (uint32_t i = 0; (i < Queue::MaxQueueFamilies) && (result == VK_SUCCESS); ++i) { - for (uint32_t j = 0; (j < Queue::MaxQueuesPerFamily) && (m_pQueues[i][j] != nullptr); ++j) + for (uint32_t j = 0; + (j < Queue::MaxQueuesPerFamily) && (m_pQueues[i][j] != nullptr) && (result == VK_SUCCESS); + ++j) { - (*m_pQueues[i][j])->WaitIdle(); + result = (*m_pQueues[i][j])->WaitIdle(); } } - return VK_SUCCESS; + return result; } // ===================================================================================================================== diff --git a/icd/api/vk_event.cpp b/icd/api/vk_event.cpp index e46f156e..93200e6d 100644 --- a/icd/api/vk_event.cpp +++ b/icd/api/vk_event.cpp @@ -29,7 +29,6 @@ *********************************************************************************************************************** */ -#include "include/gpu_event_mgr.h" #include "include/vk_conv.h" #include "include/vk_device.h" #include "include/vk_event.h" diff --git a/icd/api/vk_image.cpp b/icd/api/vk_image.cpp index 7d53246d..fed901a9 100644 --- a/icd/api/vk_image.cpp +++ b/icd/api/vk_image.cpp @@ -659,7 +659,7 @@ VkResult Image::Create( // b. If dev enables the extension: keep DCC enabled for UAVs with <= 4 mips // c. Can app-detect un-disable DCC for cases where we know devs don't store to multiple mips Pal::GfxIpLevel gfxLevel = pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().gfxLevel; - if ((gfxLevel > Pal::GfxIpLevel::GfxIp9) && (gfxLevel <= Pal::GfxIpLevel::GfxIp10_1) && + if ((gfxLevel == Pal::GfxIpLevel::GfxIp10_1) && pDevice->IsExtensionEnabled(DeviceExtensions::AMD_SHADER_IMAGE_LOAD_STORE_LOD) && (pCreateInfo->mipLevels > 4) && (pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT)) { diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index c09acdca..a20a43c0 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -59,6 +59,7 @@ #include "palLib.h" #include "palMath.h" #include "palMsaaState.h" +#include "palPlatformKey.h" #include "palScreen.h" #include "palHashLiteralString.h" #include @@ -259,7 +260,8 @@ PhysicalDevice::PhysicalDevice( m_prtOnDmaSupported(true), m_supportedExtensions(), m_allowedExtensions(), - m_compiler(this) + m_compiler(this), + m_pPlatformKey(nullptr) { memset(&m_limits, 0, sizeof(m_limits)); memset(m_formatFeatureMsaaTarget, 0, sizeof(m_formatFeatureMsaaTarget)); @@ -444,6 +446,44 @@ void PhysicalDevice::DecreaseAllocatedMemorySize( m_memoryUsageTracker.allocatedMemorySize[heapIdx] -= allocationSize; } +// ===================================================================================================================== +// Generate our platform key +void PhysicalDevice::InitializePlatformKey( + const RuntimeSettings& settings) +{ + static constexpr Util::HashAlgorithm KeyAlgorithm = Util::HashAlgorithm::Sha1; + + struct + { + VkPhysicalDeviceProperties properties; + char* timestamp[sizeof(__TIMESTAMP__)]; + } initialData; + + memset(&initialData, 0, sizeof(initialData)); + + VkResult result = GetDeviceProperties(&initialData.properties); + + if (result == VK_SUCCESS) + { + size_t memSize = Util::GetPlatformKeySize(KeyAlgorithm); + void* pMem = VkInstance()->AllocMem(memSize, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + + if (pMem != nullptr) + { + if (settings.markPipelineCacheWithBuildTimestamp) + { + memcpy(initialData.timestamp, __TIMESTAMP__, sizeof(__TIMESTAMP__)); + } + + if (Util::CreatePlatformKey(KeyAlgorithm, &initialData, sizeof(initialData), pMem, &m_pPlatformKey) != + Util::Result::Success) + { + VkInstance()->FreeMem(pMem); + } + } + } +} + // ===================================================================================================================== VkResult PhysicalDevice::Initialize() { @@ -466,14 +506,8 @@ VkResult PhysicalDevice::Initialize() { for (uint32_t idx = 0; idx < Pal::EngineTypeCount; ++idx) { - // We do not currently create a high priority universal queue, so we don't need that engine. - // In order to support global priority, we still need exclusive compute engine to be initialized - // but this engine can only be selected according to the global priority set by application - if (idx != static_cast(Pal::EngineTypeHighPriorityUniversal)) - { - const auto& engineProps = m_properties.engineProperties[idx]; - finalizeInfo.requestedEngineCounts[idx].engines = ((1 << engineProps.engineCount) - 1); - } + const auto& engineProps = m_properties.engineProperties[idx]; + finalizeInfo.requestedEngineCounts[idx].engines = ((1 << engineProps.engineCount) - 1); } } @@ -742,8 +776,10 @@ VkResult PhysicalDevice::Initialize() } VkResult vkResult = PalToVkResult(result); + if (vkResult == VK_SUCCESS) { + InitializePlatformKey(settings); vkResult = m_compiler.Initialize(); } @@ -895,6 +931,12 @@ void PhysicalDevice::LateInitialize() // ===================================================================================================================== VkResult PhysicalDevice::Destroy(void) { + if (m_pPlatformKey != nullptr) + { + m_pPlatformKey->Destroy(); + VkInstance()->FreeMem(m_pPlatformKey); + } + m_compiler.Destroy(); this->~PhysicalDevice(); @@ -1033,8 +1075,10 @@ VkResult PhysicalDevice::GetFeatures( pFeatures->shaderStorageImageArrayDynamicIndexing = VK_TRUE; pFeatures->shaderClipDistance = VK_TRUE; pFeatures->shaderCullDistance = VK_TRUE; - pFeatures->shaderFloat64 = VK_TRUE; - pFeatures->shaderInt64 = VK_TRUE; + pFeatures->shaderFloat64 = + (PalProperties().gfxipProperties.flags.support64BitInstructions ? VK_TRUE : VK_FALSE); + pFeatures->shaderInt64 = + (PalProperties().gfxipProperties.flags.support64BitInstructions ? VK_TRUE : VK_FALSE); if ((PalProperties().gfxipProperties.flags.support16BitInstructions) && ((GetRuntimeSettings().optOnlyEnableFP16ForGfx9Plus == false) || @@ -3202,16 +3246,11 @@ void PhysicalDevice::PopulateQueueFamilies() VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT, - // Pal::EngineTypeExclusiveCompute - 0, // Pal::EngineTypeDma VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT, // Pal::EngineTypeTimer 0, - // Pal::EngineTypeHighPriorityUniversal - 0, - }; // While it's possible for an engineType to support multiple queueTypes, @@ -3220,10 +3259,8 @@ void PhysicalDevice::PopulateQueueFamilies() { Pal::QueueTypeUniversal, Pal::QueueTypeCompute, - Pal::QueueTypeCompute, Pal::QueueTypeDma, Pal::QueueTypeTimer, - Pal::QueueTypeUniversal, }; @@ -3231,11 +3268,9 @@ void PhysicalDevice::PopulateQueueFamilies() (VK_ARRAY_SIZE(palQueueTypes) == Pal::EngineTypeCount) && (Pal::EngineTypeUniversal == 0) && (Pal::EngineTypeCompute == 1) && - (Pal::EngineTypeExclusiveCompute == 2) && - (Pal::EngineTypeDma == 3) && - (Pal::EngineTypeTimer == 4) && - (Pal::EngineTypeHighPriorityUniversal == 0x5), - "PAL engine types have changed, need to update the tables above"); + (Pal::EngineTypeDma == 2) && + (Pal::EngineTypeTimer == 3) + , "PAL engine types have changed, need to update the tables above"); // Always enable core queue flags. Final determination of support will be done on a per-engine basis. uint32_t enabledQueueFlags = @@ -3244,17 +3279,48 @@ void PhysicalDevice::PopulateQueueFamilies() VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT; - // find out the sub engine index of VrHighPriority. - const auto& exclusiveComputeProps = m_properties.engineProperties[Pal::EngineTypeExclusiveCompute]; - for (uint32_t subEngineIndex = 0; subEngineIndex < exclusiveComputeProps.engineCount; subEngineIndex++) + const uint32 queueSupportPriority = Pal::QueuePrioritySupport::SupportQueuePriorityNormal | + Pal::QueuePrioritySupport::SupportQueuePriorityIdle; + + // find out the sub engine index of VrHighPriority and indices for compute engines that aren't exclusive. { - if (exclusiveComputeProps.engineSubType[subEngineIndex] == Pal::EngineSubType::VrHighPriority) + const auto& computeProps = m_properties.engineProperties[Pal::EngineTypeCompute]; + uint32_t engineIndex = 0u; + for (uint32_t subEngineIndex = 0; subEngineIndex < computeProps.engineCount; subEngineIndex++) { - m_vrHighPrioritySubEngineIndex = subEngineIndex; + if (computeProps.capabilities[subEngineIndex].flags.exclusive == 1) + { + if (computeProps.capabilities[subEngineIndex].queuePrioritySupport & + Pal::QueuePrioritySupport::SupportQueuePriorityRealtime) + { + m_RtCuHighComputeSubEngineIndex = subEngineIndex; + } + else if (computeProps.capabilities[subEngineIndex].queuePrioritySupport & + Pal::QueuePrioritySupport::SupportQueuePriorityHigh) + { + m_vrHighPrioritySubEngineIndex = subEngineIndex; + } + } + else if ((computeProps.capabilities[subEngineIndex].queuePrioritySupport == queueSupportPriority) || + (computeProps.capabilities[subEngineIndex].queuePrioritySupport == 0u)) + { + m_compQueueEnginesNdx[engineIndex++] = subEngineIndex; + } } - else if (exclusiveComputeProps.engineSubType[subEngineIndex] == Pal::EngineSubType::RtCuHighCompute) + } + + // find out universal engines that aren't exclusive. + { + const auto& universalProps = m_properties.engineProperties[Pal::EngineTypeUniversal]; + uint32_t engineIndex = 0u; + for (uint32_t subEngineIndex = 0; subEngineIndex < universalProps.engineCount; subEngineIndex++) { - m_RtCuHighComputeSubEngineIndex = subEngineIndex; + if ((universalProps.capabilities[subEngineIndex].flags.exclusive == 0) && + ((universalProps.capabilities[subEngineIndex].queuePrioritySupport == queueSupportPriority) || + (universalProps.capabilities[subEngineIndex].queuePrioritySupport == 0u))) + { + m_universalQueueEnginesNdx[engineIndex++] = subEngineIndex; + } } } @@ -3302,8 +3368,6 @@ void PhysicalDevice::PopulateQueueFamilies() break; case Pal::EngineTypeCompute: pComputeQueueFamilyProperties = &m_queueFamilies[m_queueFamilyCount].properties; - // fallthrough - case Pal::EngineTypeExclusiveCompute: palImageLayoutFlag = Pal::LayoutComputeEngine; transferGranularityOverride = settings.transferGranularityComputeOverride; m_queueFamilies[m_queueFamilyCount].validShaderStages |= VK_SHADER_STAGE_COMPUTE_BIT; @@ -3327,9 +3391,21 @@ void PhysicalDevice::PopulateQueueFamilies() VkQueueFamilyProperties* pQueueFamilyProps = &m_queueFamilies[m_queueFamilyCount].properties; pQueueFamilyProps->queueFlags = (vkQueueFlags[engineType] & supportedQueueFlags); - pQueueFamilyProps->queueCount = (engineType == Pal::EngineTypeCompute) - ? Util::Min(settings.asyncComputeQueueLimit, engineProps.engineCount) - : engineProps.engineCount; + pQueueFamilyProps->queueCount = 0u; + + for (uint32 engineNdx = 0u; engineNdx < engineProps.engineCount; ++engineNdx) + { + if ((engineProps.capabilities[engineNdx].flags.exclusive == 0) && + ((engineProps.capabilities[engineNdx].queuePrioritySupport == queueSupportPriority) || + (engineProps.capabilities[engineNdx].queuePrioritySupport == 0u))) + { + pQueueFamilyProps->queueCount++; + } + } + pQueueFamilyProps->queueCount = (engineType == Pal::EngineTypeCompute) + ? Util::Min(settings.asyncComputeQueueLimit, pQueueFamilyProps->queueCount) + : pQueueFamilyProps->queueCount; + pQueueFamilyProps->timestampValidBits = (engineProps.flags.supportsTimestamps != 0) ? 64 : 0; pQueueFamilyProps->minImageTransferGranularity = PalToVkExtent3d(engineProps.minTiledImageCopyAlignment); @@ -3498,17 +3574,17 @@ void PhysicalDevice::GetPhysicalDeviceIDProperties( *pDeviceLUIDValid = VK_FALSE; #if defined(INTEROP_DRIVER_UUID) - const char* pDriverUuidString = INTEROP_DRIVER_UUID; + const char driverUuidString[] = INTEROP_DRIVER_UUID; #else - const char* pDriverUuidString = "AMD-LINUX-DRV"; + const char driverUuidString[] = "AMD-LINUX-DRV"; #endif - static_assert(VK_UUID_SIZE >= sizeof(pDriverUuidString), + static_assert(VK_UUID_SIZE >= sizeof(driverUuidString), "The driver UUID string has changed and now exceeds the maximum length permitted by Vulkan"); memcpy(pDriverUUID, - pDriverUuidString, - strlen(pDriverUuidString)); + driverUuidString, + strlen(driverUuidString)); } // ===================================================================================================================== @@ -3608,19 +3684,10 @@ void PhysicalDevice::GetPhysicalDeviceFloatControlsProperties( pFloatControlsProperties->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR; pFloatControlsProperties->shaderSignedZeroInfNanPreserveFloat32 = VK_TRUE; - pFloatControlsProperties->shaderSignedZeroInfNanPreserveFloat64 = VK_TRUE; - pFloatControlsProperties->shaderDenormPreserveFloat32 = VK_TRUE; - pFloatControlsProperties->shaderDenormPreserveFloat64 = VK_TRUE; - pFloatControlsProperties->shaderDenormFlushToZeroFloat32 = VK_TRUE; - pFloatControlsProperties->shaderDenormFlushToZeroFloat64 = VK_TRUE; - pFloatControlsProperties->shaderRoundingModeRTEFloat32 = VK_TRUE; - pFloatControlsProperties->shaderRoundingModeRTEFloat64 = VK_TRUE; - pFloatControlsProperties->shaderRoundingModeRTZFloat32 = VK_TRUE; - pFloatControlsProperties->shaderRoundingModeRTZFloat64 = VK_TRUE; if (PalProperties().gfxipProperties.flags.supportDoubleRate16BitInstructions) { @@ -3638,6 +3705,23 @@ void PhysicalDevice::GetPhysicalDeviceFloatControlsProperties( pFloatControlsProperties->shaderRoundingModeRTEFloat16 = VK_FALSE; pFloatControlsProperties->shaderRoundingModeRTZFloat16 = VK_FALSE; } + + if (PalProperties().gfxipProperties.flags.support64BitInstructions) + { + pFloatControlsProperties->shaderSignedZeroInfNanPreserveFloat64 = VK_TRUE; + pFloatControlsProperties->shaderDenormPreserveFloat64 = VK_TRUE; + pFloatControlsProperties->shaderDenormFlushToZeroFloat64 = VK_TRUE; + pFloatControlsProperties->shaderRoundingModeRTEFloat64 = VK_TRUE; + pFloatControlsProperties->shaderRoundingModeRTZFloat64 = VK_TRUE; + } + else + { + pFloatControlsProperties->shaderSignedZeroInfNanPreserveFloat64 = VK_FALSE; + pFloatControlsProperties->shaderDenormPreserveFloat64 = VK_FALSE; + pFloatControlsProperties->shaderDenormFlushToZeroFloat64 = VK_FALSE; + pFloatControlsProperties->shaderRoundingModeRTEFloat64 = VK_FALSE; + pFloatControlsProperties->shaderRoundingModeRTZFloat64 = VK_FALSE; + } } // ===================================================================================================================== @@ -3840,8 +3924,16 @@ void PhysicalDevice::GetPhysicalDeviceShaderAtomicInt64Features( VkBool32* pShaderSharedInt64Atomics ) const { - *pShaderBufferInt64Atomics = VK_TRUE; - *pShaderSharedInt64Atomics = VK_TRUE; + if (PalProperties().gfxipProperties.flags.support64BitInstructions) + { + *pShaderBufferInt64Atomics = VK_TRUE; + *pShaderSharedInt64Atomics = VK_TRUE; + } + else + { + *pShaderBufferInt64Atomics = VK_FALSE; + *pShaderSharedInt64Atomics = VK_FALSE; + } } // ===================================================================================================================== @@ -4183,6 +4275,17 @@ void PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: + { + VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT* pVertexAttributeDivisorFeatures = + reinterpret_cast(pHeader); + + pVertexAttributeDivisorFeatures->vertexAttributeInstanceRateDivisor = VK_TRUE; + pVertexAttributeDivisorFeatures->vertexAttributeInstanceRateZeroDivisor = VK_FALSE; + + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: { VkPhysicalDeviceCoherentMemoryFeaturesAMD * pDeviceCoherentMemory = diff --git a/icd/api/vk_pipeline_cache.cpp b/icd/api/vk_pipeline_cache.cpp index 32273a7e..1503dc0d 100644 --- a/icd/api/vk_pipeline_cache.cpp +++ b/icd/api/vk_pipeline_cache.cpp @@ -76,6 +76,8 @@ VkResult PipelineCache::Create( size_t shaderCacheSize = 0; size_t pipelineCacheSize[MaxPalDevices]; + bool usePipelineCacheInitialData = false; + PipelineCompilerType cacheType = pDevice->GetCompiler(DefaultDeviceIndex)->GetShaderCacheType(); for (uint32_t i = 0; i < numPalDevices; i++) @@ -99,12 +101,20 @@ VkResult PipelineCache::Create( pDevice->VkPhysicalDevice(DefaultDeviceIndex)->GetDeviceProperties(&physicalDeviceProps); if (memcmp(pHeader->UUID, physicalDeviceProps.pipelineCacheUUID, sizeof(pHeader->UUID)) == 0) { - auto pPrivateDataHeader = reinterpret_cast( - Util::VoidPtrInc(pCreateInfo->pInitialData, sizeof(PipelineCacheHeaderData))); + const void* pData = Util::VoidPtrInc(pCreateInfo->pInitialData, sizeof(PipelineCacheHeaderData)); + size_t dataSize = pCreateInfo->initialDataSize - sizeof(PipelineCacheHeaderData); - if (pPrivateDataHeader->cacheType == cacheType) + if (PipelineBinaryCache::IsValidBlob(pDevice->VkPhysicalDevice(DefaultDeviceIndex), dataSize, pData)) + { + usePipelineCacheInitialData = true; + } + else { - useInitialData = true; + auto pPrivateDataHeader = reinterpret_cast(pData); + if (pPrivateDataHeader->cacheType == cacheType) + { + useInitialData = true; + } } } } @@ -179,12 +189,19 @@ VkResult PipelineCache::Create( if (result == VK_SUCCESS) { PipelineBinaryCache* pBinaryCache = nullptr; - if (((settings.usePalPipelineCaching) || - (pDevice->VkPhysicalDevice(DefaultDeviceIndex)->VkInstance()->GetDevModeMgr() != nullptr)) && - (settings.allowExternalPipelineCacheObject)) + if (settings.allowExternalPipelineCacheObject) { + const void* pInitialData = nullptr; + size_t initialDataSize = 0; + + if (usePipelineCacheInitialData) + { + pInitialData = Util::VoidPtrInc(pCreateInfo->pInitialData, sizeof(PipelineCacheHeaderData)); + initialDataSize = pCreateInfo->initialDataSize - sizeof(PipelineCacheHeaderData); + } + pBinaryCache = PipelineBinaryCache::Create(pDevice->VkPhysicalDevice(DefaultDeviceIndex)->VkInstance(), - pCreateInfo->initialDataSize, pCreateInfo->pInitialData, false, + initialDataSize, pInitialData, false, pDevice->GetCompiler(DefaultDeviceIndex)->GetGfxIp(), pDevice->VkPhysicalDevice(DefaultDeviceIndex)); // This isn't a terminal failure, the device can continue without the pipeline cache if need be. @@ -207,7 +224,7 @@ VkResult PipelineCache::Destroy( const Device* pDevice, const VkAllocationCallbacks* pAllocator) { - if (m_pBinaryCache) + if (m_pBinaryCache != nullptr) { m_pBinaryCache->Destroy(); pDevice->VkPhysicalDevice(DefaultDeviceIndex)->VkInstance()->FreeMem(m_pBinaryCache); @@ -229,41 +246,51 @@ VkResult PipelineCache::GetData( VK_ASSERT(pSize != nullptr); VkResult result = VK_SUCCESS; - uint32_t numPalDevices = m_pDevice->NumPalDevices(); - - size_t allBlobSize = sizeof(PipelineCachePrivateHeaderData); - PipelineCachePrivateHeaderData headerData = {}; - - headerData.cacheType = m_shaderCaches[0].GetCacheType(); - for (uint32_t i = 0; i < numPalDevices; i++) - { - size_t blobSize = 0; - result = m_shaderCaches[i].Serialize(nullptr, &blobSize); - VK_ASSERT(result == VK_SUCCESS); - headerData.blobSize[i] = blobSize; - allBlobSize += blobSize; - } - if (*pSize == 0) +#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 534 + if (m_pBinaryCache != nullptr) { - *pSize = allBlobSize; + result = m_pBinaryCache->Serialize(pData, pSize); } else +#endif { - VK_ASSERT(*pSize >= allBlobSize); - memcpy(pData, &headerData, sizeof(headerData)); + uint32_t numPalDevices = m_pDevice->NumPalDevices(); - void* pBlob = Util::VoidPtrInc(pData, sizeof(headerData)); + size_t allBlobSize = sizeof(PipelineCachePrivateHeaderData); + PipelineCachePrivateHeaderData headerData = {}; + headerData.cacheType = m_shaderCaches[0].GetCacheType(); for (uint32_t i = 0; i < numPalDevices; i++) { - size_t blobSize = static_cast(headerData.blobSize[i]); - result = m_shaderCaches[i].Serialize(pBlob, &blobSize); - if (result != VK_SUCCESS) + size_t blobSize = 0; + result = m_shaderCaches[i].Serialize(nullptr, &blobSize); + VK_ASSERT(result == VK_SUCCESS); + headerData.blobSize[i] = blobSize; + allBlobSize += blobSize; + } + + if (*pSize == 0) + { + *pSize = allBlobSize; + } + else + { + VK_ASSERT(*pSize >= allBlobSize); + memcpy(pData, &headerData, sizeof(headerData)); + + void* pBlob = Util::VoidPtrInc(pData, sizeof(headerData)); + + for (uint32_t i = 0; i < numPalDevices; i++) { - break; + size_t blobSize = static_cast(headerData.blobSize[i]); + result = m_shaderCaches[i].Serialize(pBlob, &blobSize); + if (result != VK_SUCCESS) + { + break; + } + pBlob = Util::VoidPtrInc(pBlob, blobSize); } - pBlob = Util::VoidPtrInc(pBlob, blobSize); } } @@ -275,30 +302,49 @@ VkResult PipelineCache::Merge( uint32_t srcCacheCount, const PipelineCache** ppSrcCaches) { - Util::AutoBuffer shaderCaches( - srcCacheCount * m_pDevice->NumPalDevices(), - m_pDevice->VkInstance()->Allocator()); + VkResult result = VK_SUCCESS; - for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++) +#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 534 + if (m_pBinaryCache != nullptr) { + Util::AutoBuffer binaryCaches( + srcCacheCount, + m_pDevice->VkInstance()->Allocator()); + for (uint32_t cacheIdx = 0; cacheIdx < srcCacheCount; cacheIdx++) { - VK_ASSERT(ppSrcCaches[cacheIdx]->GetShaderCache(deviceIdx).GetCacheType() == - GetShaderCache(deviceIdx).GetCacheType()); - // Store all PAL caches like this d0c0,d0c1,d0c2...,d1c0,d1c2,d1c3... - shaderCaches[deviceIdx * srcCacheCount + cacheIdx] = - ppSrcCaches[cacheIdx]->GetShaderCache(deviceIdx).GetCachePtr(); + binaryCaches[cacheIdx] = ppSrcCaches[cacheIdx]->GetPipelineCache(); } - } - VkResult result = VK_SUCCESS; - for (uint32_t i = 0; i < m_pDevice->NumPalDevices(); i++) + result = m_pBinaryCache->Merge(srcCacheCount, &binaryCaches[0]); + } + else +#endif { - result = m_shaderCaches[i].Merge(srcCacheCount, &shaderCaches[i * srcCacheCount]); + Util::AutoBuffer shaderCaches( + srcCacheCount * m_pDevice->NumPalDevices(), + m_pDevice->VkInstance()->Allocator()); - if (result != VK_SUCCESS) + for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++) + { + for (uint32_t cacheIdx = 0; cacheIdx < srcCacheCount; cacheIdx++) + { + VK_ASSERT(ppSrcCaches[cacheIdx]->GetShaderCache(deviceIdx).GetCacheType() == + GetShaderCache(deviceIdx).GetCacheType()); + // Store all PAL caches like this d0c0,d0c1,d0c2...,d1c0,d1c2,d1c3... + shaderCaches[deviceIdx * srcCacheCount + cacheIdx] = + ppSrcCaches[cacheIdx]->GetShaderCache(deviceIdx).GetCachePtr(); + } + } + + for (uint32_t i = 0; i < m_pDevice->NumPalDevices(); i++) { - break; + result = m_shaderCaches[i].Merge(srcCacheCount, &shaderCaches[i * srcCacheCount]); + + if (result != VK_SUCCESS) + { + break; + } } } @@ -389,6 +435,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineCacheData( { void* pPrivateData = Util::VoidPtrInc(pData, HeaderSize); result = pCache->GetData(pPrivateData, &privateDataSize); + *pDataSize = privateDataSize + HeaderSize; } } diff --git a/icd/api/vk_queue.cpp b/icd/api/vk_queue.cpp index cb99ed11..cf58119a 100644 --- a/icd/api/vk_queue.cpp +++ b/icd/api/vk_queue.cpp @@ -409,13 +409,16 @@ VkResult Queue::WaitIdle(void) { VK_ASSERT(m_pPalQueues != nullptr); - for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++) + Pal::Result palResult = Pal::Result::Success; + + for (uint32_t deviceIdx = 0; + (deviceIdx < m_pDevice->NumPalDevices()) && (palResult == Pal::Result::Success); + deviceIdx++) { - PalQueue(deviceIdx)->WaitIdle(); + palResult = PalQueue(deviceIdx)->WaitIdle(); } - // Pal::IQueue::WaitIdle returns void. We have no errors to produce here. - return VK_SUCCESS; + return PalToVkResult(palResult); } // ===================================================================================================================== diff --git a/icd/make/importdefs b/icd/make/importdefs index 7c766840..7bf1ac25 100644 --- a/icd/make/importdefs +++ b/icd/make/importdefs @@ -1,7 +1,7 @@ # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. It must # be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -ICD_PAL_CLIENT_MAJOR_VERSION = 527 +ICD_PAL_CLIENT_MAJOR_VERSION = 534 ICD_PAL_CLIENT_MINOR_VERSION = 0 # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. It describes @@ -12,9 +12,9 @@ ICD_GPUOPEN_CLIENT_MINOR_VERSION = 0 #if ICD_BUILD_SCPC # This will become the value of SCPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_SCPC=1. It describes the version of the # interface version of SCPC (currently part of PAL) that the ICD supports. -ICD_SCPC_CLIENT_MAJOR_VERSION = 49 +ICD_SCPC_CLIENT_MAJOR_VERSION = 56 #endif # This will become the value of LLPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_LLPC=1. It describes the version of the # interface version of LLPC that the ICD supports. -ICD_LLPC_CLIENT_MAJOR_VERSION = 32 +ICD_LLPC_CLIENT_MAJOR_VERSION = 34 diff --git a/icd/res/ver.h b/icd/res/ver.h index 2b0550aa..e0b4ef73 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 109 +#define VULKAN_ICD_BUILD_VERSION 111 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index 3c17dc44..a68a2aaa 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -154,7 +154,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings( // In general, DCC is very beneficial for color attachments. If this is completely offset, maybe by increased // shader read latency or partial writes of DCC blocks, it should be debugged on a case by case basis. - if (info.gfxLevel > Pal::GfxIpLevel::GfxIp9) + if (info.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1) { m_settings.forceDccForColorAttachments = true; } @@ -214,7 +214,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings( ForceImageSharingModeExclusive; } - if (info.gfxLevel > Pal::GfxIpLevel::GfxIp9) + if (info.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1) { m_settings.asyncComputeQueueLimit = 1; } @@ -232,7 +232,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings( if (((appProfile == AppProfile::WolfensteinII) || (appProfile == AppProfile::Doom)) && - (info.gfxLevel > Pal::GfxIpLevel::GfxIp9)) + (info.gfxLevel == Pal::GfxIpLevel::GfxIp10_1)) { m_settings.asyncComputeQueueMaxWavesPerCu = 40; m_settings.nggSubgroupSizing = NggSubgroupExplicit; @@ -256,10 +256,11 @@ void VulkanSettingsLoader::OverrideProfiledSettings( } // WWZ performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now. - if (info.gfxLevel > Pal::GfxIpLevel::GfxIp9) + if (info.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1) { m_settings.forceDccForColorAttachments = false; } + } if (appProfile == AppProfile::IdTechEngine) @@ -329,6 +330,10 @@ void VulkanSettingsLoader::OverrideProfiledSettings( m_settings.preciseAnisoMode = DisablePreciseAnisoAll; } + if (appProfile == AppProfile::StrangeBrigade) + { + } + if (appProfile == AppProfile::MadMax) { m_settings.preciseAnisoMode = DisablePreciseAnisoAll; @@ -341,7 +346,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings( m_settings.prefetchShaders = true; // F1 2017 performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now. - if (info.gfxLevel > Pal::GfxIpLevel::GfxIp9) + if (info.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1) { m_settings.forceDccForColorAttachments = false; } @@ -355,7 +360,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings( if (appProfile == AppProfile::DiRT4) { // DiRT 4 performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now. - if (info.gfxLevel > Pal::GfxIpLevel::GfxIp9) + if (info.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1) { m_settings.forceDccForColorAttachments = false; } @@ -364,7 +369,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings( if (appProfile == AppProfile::WarHammerII) { // WarHammer II performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now. - if (info.gfxLevel > Pal::GfxIpLevel::GfxIp9) + if (info.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1) { m_settings.forceDccForColorAttachments = false; } diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json index fe84c653..1ef65905 100644 --- a/icd/settings/settings_xgl.json +++ b/icd/settings/settings_xgl.json @@ -1455,7 +1455,7 @@ "Value": 0 }, { - "Description": " Enable for vertex shaders", + "Description": "Enable for vertex shaders", "Value": 1 }, { @@ -1602,6 +1602,204 @@ "VariableName": "skipUnsupportedOpCode", "Name": "SkipUnsupportedOpCode" }, + { + "Description": "Force vertex shaders' optimization IR mode selection.", + "Tags": [ + "SPIRV Options" + ], + "ValidValues": { + "Values": [ + { + "Description": "Let the compiler choose the IR mode.", + "Value": 0 + }, + { + "Description": "Force the compiler to use Old IR.", + "Value": 1 + }, + { + "Description": "Force the compiler to use New IR with retry mechanism.", + "Value": 2 + }, + { + "Description": "Force the compiler to use New IR with error mechanism.", + "Value": 3 + } + ] + }, + "Defaults": { + "Default": 0 + }, + "Type": "uint32", + "VariableName": "vsIrMode", + "Name": "VsIrMode", + "Scope": "Driver" + }, + { + "Description": "Force tessellation control shaders' optimization IR mode selection.", + "Tags": [ + "SPIRV Options" + ], + "ValidValues": { + "Values": [ + { + "Description": "Let the compiler choose the IR mode.", + "Value": 0 + }, + { + "Description": "Force the compiler to use Old IR.", + "Value": 1 + }, + { + "Description": "Force the compiler to use New IR with retry mechanism.", + "Value": 2 + }, + { + "Description": "Force the compiler to use New IR with error mechanism.", + "Value": 3 + } + ] + }, + "Defaults": { + "Default": 0 + }, + "Type": "uint32", + "VariableName": "tcsIrMode", + "Name": "TcsIrMode", + "Scope": "Driver" + }, + { + "Description": "Force tessellation evaluation shaders' optimization IR mode selection.", + "Tags": [ + "SPIRV Options" + ], + "ValidValues": { + "Values": [ + { + "Description": "Let the compiler choose the IR mode.", + "Value": 0 + }, + { + "Description": "Force the compiler to use Old IR.", + "Value": 1 + }, + { + "Description": "Force the compiler to use New IR with retry mechanism.", + "Value": 2 + }, + { + "Description": "Force the compiler to use New IR with error mechanism.", + "Value": 3 + } + ] + }, + "Defaults": { + "Default": 0 + }, + "Type": "uint32", + "VariableName": "tesIrMode", + "Name": "TesIrMode", + "Scope": "Driver" + }, + { + "Description": "Force geometry shaders' optimization IR mode selection.", + "Tags": [ + "SPIRV Options" + ], + "ValidValues": { + "Values": [ + { + "Description": "Let the compiler choose the IR mode.", + "Value": 0 + }, + { + "Description": "Force the compiler to use Old IR.", + "Value": 1 + }, + { + "Description": "Force the compiler to use New IR with retry mechanism.", + "Value": 2 + }, + { + "Description": "Force the compiler to use New IR with error mechanism.", + "Value": 3 + } + ] + }, + "Defaults": { + "Default": 0 + }, + "Type": "uint32", + "VariableName": "gsIrMode", + "Name": "GsIrMode", + "Scope": "Driver" + }, + { + "Description": "Force fragment shaders' optimization IR mode selection.", + "Tags": [ + "SPIRV Options" + ], + "ValidValues": { + "Values": [ + { + "Description": "Let the compiler choose the IR mode.", + "Value": 0 + }, + { + "Description": "Force the compiler to use Old IR.", + "Value": 1 + }, + { + "Description": "Force the compiler to use New IR with retry mechanism.", + "Value": 2 + }, + { + "Description": "Force the compiler to use New IR with error mechanism.", + "Value": 3 + } + ] + }, + "Defaults": { + "Default": 0 + }, + "Type": "uint32", + "VariableName": "fsIrMode", + "Name": "FsIrMode", + "Scope": "Driver" + }, + { + "Description": "Force compute shaders' optimization IR mode selection.", + "Tags": [ + "SPIRV Options" + ], + "ValidValues": { + "Values": [ + { + "Description": "Let the compiler choose the IR mode.", + "Value": 0 + }, + { + "Description": "Force the compiler to use Old IR.", + "Value": 1 + }, + { + "Description": "Force the compiler to use New IR with retry mechanism.", + "Value": 2 + }, + { + "Description": "Force the compiler to use New IR with error mechanism.", + "Value": 3 + } + ] + }, + "Defaults": { + "Default": 0 + }, + "Type": "uint32", + "VariableName": "csIrMode", + "Name": "CsIrMode", + "Scope": "Driver" + }, { "Description": "[LLPC ONLY] LLPC general options. Max length of the option is 256. for example: Disable Loop unroll: -pragma-unroll-threshold=1 Enable si-scheduler: -enable-si-scheduler Please see amdllpc -help or -help-hidden for detail", "Tags": [ @@ -1687,7 +1885,7 @@ }, { "Name": "AllowExternalPipelineCacheObject", - "Description": "Controls whether a pipeline cache object is allowed to be created via vkCreatePipelineCache in addition to the cache residing within the pipeline compiler. (Default: FALSE)", + "Description": "Controls whether a pipeline cache object is allowed to be created via vkCreatePipelineCache in addition to the cache residing within the pipeline compiler. (Default: TRUE)", "Tags": [ "SPIRV Options" ], @@ -3926,150 +4124,6 @@ "VariableName": "devModeSqttPrepareFrameCount", "Name": "DevModeSqttPrepareFrameCount" }, - { - "Description": "Allow chill to run. Chill is a user interaction dependent FPS limiter, used for power saving. ", - "Tags": [ - "Chill" - ], - "Defaults": { - "Default": true - }, - "Type": "bool", - "VariableName": "allowChill", - "Name": "AllowChill", - "Scope": "Driver" - }, - { - "Description": "If per-app chill profile settings is enabled. ", - "Tags": [ - "Chill" - ], - "Defaults": { - "Default": false - }, - "Scope": "Driver", - "Type": "bool", - "VariableName": "chillProfileEnable", - "Name": "Chill_ProfileEnable" - }, - { - "ValidValues": { - "IsEnum": true, - "Values": [ - { - "Name": "IcdChillLevelDisable", - "Value": 0, - "Description": "Disable" - }, - { - "Name": "IcdChillLevelSubtle", - "Value": 1, - "Description": "Subtle" - }, - { - "Name": "IcdChillLevelMedium", - "Value": 2, - "Description": "Medium" - }, - { - "Name": "IcdChillLevelFull", - "Value": 3, - "Description": "Full" - } - ], - "Name": "IcdChillLevelMode" - }, - "Description": "Chill level setting, default is medium.", - "Tags": [ - "Chill" - ], - "Defaults": { - "Default": "IcdChillLevelMedium" - }, - "Flags": { - "IsHex": true - }, - "Scope": "Driver", - "Type": "enum", - "VariableName": "chillLevel", - "Name": "Chill_ChillLevel" - }, - { - "ValidValues": { - "Values": [ - { - "LogicOp": "GreaterThanOrEqual", - "Value": 30 - }, - { - "LogicOp": "LessThanOrEqual", - "Value": 300 - } - ] - }, - "Description": "Min chill frame rate; valid range is 30-300fps.", - "Tags": [ - "Chill" - ], - "Defaults": { - "Default": 70 - }, - "Scope": "Driver", - "Type": "uint32", - "VariableName": "chillMinFrameRate", - "Name": "Chill_MinFramerate" - }, - { - "ValidValues": { - "Values": [ - { - "LogicOp": "GreaterThanOrEqual", - "Value": 30 - }, - { - "LogicOp": "LessThanOrEqual", - "Value": 300 - } - ] - }, - "Description": "Max chill frame rate; valid range is 30-300fps.", - "Tags": [ - "Chill" - ], - "Defaults": { - "Default": 144 - }, - "Scope": "Driver", - "Type": "uint32", - "VariableName": "chillMaxFrameRate", - "Name": "Chill_MaxFramerate" - }, - { - "Description": "The threshold number of draw calls per frame used to distinguish between loading screens and gameplay.", - "Tags": [ - "Chill" - ], - "Defaults": { - "Default": 150 - }, - "Scope": "Driver", - "Type": "uint32", - "VariableName": "chillLoadingScreenDrawsThresh", - "Name": "Chill_LoadingScreenDrawsThresh" - }, - { - "Description": "When true, we will not disable chill based on KMD workstation flag or Big Software version.", - "Tags": [ - "Chill" - ], - "Defaults": { - "Default": false - }, - "Scope": "Driver", - "Type": "bool", - "VariableName": "chillIgnoreBaseDriverRestrictions", - "Name": "Chill_IgnoreBaseDriverRestrictions" - }, { "Name": "OverrideShaderParams", "Description": "Indicate that shader parameter override is enabled - mainly used for automation",