diff --git a/cmake/XglCompilerOptions.cmake b/cmake/XglCompilerOptions.cmake index 4097db54..945c8c4d 100644 --- a/cmake/XglCompilerOptions.cmake +++ b/cmake/XglCompilerOptions.cmake @@ -308,6 +308,7 @@ function(xgl_compiler_options TARGET) # add global definition to enable LTO here since some components have no option # to enable it. add_definitions("-flto=thin") + add_link_options("-flto=thin") message(WARNING "LTO enabled for ${TARGET}") endif() endif() diff --git a/cmake/XglPackaging.cmake b/cmake/XglPackaging.cmake index d29a898b..2b4c8714 100644 --- a/cmake/XglPackaging.cmake +++ b/cmake/XglPackaging.cmake @@ -161,7 +161,8 @@ function(generatePackageTarget) endif() set(CPACK_DEBIAN_PACKAGE_PRIORITY "optional") set(CPACK_DEBIAN_PACKAGE_SECTION "libs") - set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>=2.17), libgcc1 (>= 1:3.4), libstdc++6 (>= 5.2), libssl1.1") + set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>=2.17), libgcc1 (>= 1:3.4), libstdc++6 (>= 5.2)") + set(CPACK_DEBIAN_PACKAGE_RECOMMENDS "libssl1.1") if(PACKAGE_RELEASE) set(CPACK_PACKAGE_VERSION "${PACKAGE_VERSION}-${PACKAGE_RELEASE}") else() @@ -185,7 +186,14 @@ function(generatePackageTarget) set(CPACK_RPM_PACKAGE_DESCRIPTION "${PACKAGE_DESCRIPTION}") set(CPACK_RPM_PACKAGE_GROUP "System Environment/Libraries") set(CPACK_RPM_PACKAGE_LICENSE "MIT") - set(CPACK_RPM_PACKAGE_REQUIRES "openssl-libs(x86-${TARGET_ARCHITECTURE_BITS})") + execute_process(COMMAND ${lsb_release_exec} -rs + OUTPUT_VARIABLE lsb_release_number + OUTPUT_STRIP_TRAILING_WHITESPACE) + if (rpm_distros MATCHES "RedHat.*" AND NOT lsb_release_number MATCHES "7.*") + set(CPACK_RPM_PACKAGE_SUGGESTS "openssl-libs(x86-${TARGET_ARCHITECTURE_BITS})") + else() + message(WARNING "soft dependency of openssl-libs is not added") + endif() endif() include(CPack) diff --git a/icd/Loader/LunarG/Lnx/amd-icd.json b/icd/Loader/LunarG/Lnx/amd-icd.json index 64ae3706..adb6eaf0 100644 --- a/icd/Loader/LunarG/Lnx/amd-icd.json +++ b/icd/Loader/LunarG/Lnx/amd-icd.json @@ -2,13 +2,13 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.2.195" + "api_version": "1.2.197" }, "layer": { "name": "VK_LAYER_AMD_switchable_graphics_@ISABITS@", "type": "GLOBAL", "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.2.195", + "api_version": "1.2.197", "implementation_version": "1", "description": "AMD switchable graphics layer", "functions": { diff --git a/icd/api/app_profile.cpp b/icd/api/app_profile.cpp index 7d01e513..79762939 100644 --- a/icd/api/app_profile.cpp +++ b/icd/api/app_profile.cpp @@ -433,6 +433,18 @@ constexpr AppProfilePatternEntry AppNameValheim = "valheim" }; +constexpr AppProfilePatternEntry AppExeKnockoutcity = +{ + PatternExeNameLower, + "knockoutcity" +}; + +constexpr AppProfilePatternEntry AppNameEvilGenius2 = +{ + PatternAppNameLower, + "evil genius 2" +}; + constexpr AppProfilePatternEntry PatternEnd = {}; // This is a table of patterns. The first matching pattern in this table will be returned. @@ -798,6 +810,20 @@ AppProfilePattern AppPatternTable[] = PatternEnd } }, + { + AppProfile::KnockoutCity, + { + AppExeKnockoutcity, + PatternEnd + } + }, + { + AppProfile::EvilGenius2, + { + AppNameEvilGenius2, + PatternEnd + } + }, { AppProfile::ScimitarEngine, diff --git a/icd/api/app_resource_optimizer.cpp b/icd/api/app_resource_optimizer.cpp index 1d53326f..2572f84b 100644 --- a/icd/api/app_resource_optimizer.cpp +++ b/icd/api/app_resource_optimizer.cpp @@ -447,6 +447,7 @@ void ResourceOptimizer::BuildAppProfile() #if ICD_RUNTIME_APP_PROFILE void ResourceOptimizer::BuildRuntimeProfile() { + memset(&m_runtimeProfile, 0, sizeof(m_runtimeProfile)); // TODO: JSON parsing should go here } #endif diff --git a/icd/api/app_shader_optimizer.cpp b/icd/api/app_shader_optimizer.cpp index 4242272c..185b9427 100644 --- a/icd/api/app_shader_optimizer.cpp +++ b/icd/api/app_shader_optimizer.cpp @@ -174,13 +174,6 @@ void ShaderOptimizer::ApplyProfileToShaderCreateInfo( options.pNggState->enableNgg = false; } -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 45 - if (shaderCreate.apply.nggFasterLaunchRate) - { - options.pNggState->enableFastLaunch = true; - } -#endif - if (shaderCreate.apply.nggVertexReuse) { options.pNggState->enableVertexReuse = true; diff --git a/icd/api/compiler_solution_llpc.cpp b/icd/api/compiler_solution_llpc.cpp index 38815814..ba6b99e1 100644 --- a/icd/api/compiler_solution_llpc.cpp +++ b/icd/api/compiler_solution_llpc.cpp @@ -140,9 +140,6 @@ VkResult CompilerSolutionLlpc::BuildShaderModule( auto pPipelineCompiler = m_pPhysicalDevice->GetCompiler(); pPipelineCompiler->ApplyPipelineOptions(pDevice, 0, &moduleInfo.options.pipelineOptions); -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 50 - moduleInfo.options.enableOpt = (flags & VK_SHADER_MODULE_ENABLE_OPT_BIT) ? true : false; -#endif Vkgc::Result llpcResult = m_pLlpc->BuildShaderModule(&moduleInfo, &buildOut); diff --git a/icd/api/graphics_pipeline_common.cpp b/icd/api/graphics_pipeline_common.cpp index 8f3b06e6..1afe0ddd 100644 --- a/icd/api/graphics_pipeline_common.cpp +++ b/icd/api/graphics_pipeline_common.cpp @@ -24,9 +24,9 @@ **********************************************************************************************************************/ #include "include/app_shader_optimizer.h" -#include "include/graphics_pipeline_common.h" #include "include/vk_cmdbuffer.h" #include "include/vk_device.h" +#include "include/vk_graphics_pipeline.h" #include "include/vk_pipeline_layout.h" #include "include/vk_render_pass.h" @@ -220,10 +220,64 @@ bool GraphicsPipelineCommon::IsSrcAlphaUsedInBlend(VkBlendFactor blend) } } +// ===================================================================================================================== +void GraphicsPipelineCommon::GetSubpassSampleCount( + const VkPipelineMultisampleStateCreateInfo* pMs, + const RenderPass* pRenderPass, + const uint32_t subpass, + uint32_t* pCoverageSampleCount, + uint32_t* pColorSampleCount, + uint32_t* pDepthSampleCount) +{ + const uint32_t rasterizationSampleCount = pMs->rasterizationSamples; + + uint32_t coverageSampleCount = (pRenderPass != VK_NULL_HANDLE) ? + pRenderPass->GetSubpassMaxSampleCount(subpass) : rasterizationSampleCount; + + // subpassCoverageSampleCount would be equal to zero if there are zero attachments. + coverageSampleCount = (coverageSampleCount == 0) ? rasterizationSampleCount : coverageSampleCount; + + VK_ASSERT(rasterizationSampleCount == coverageSampleCount); + + if (pCoverageSampleCount != nullptr) + { + *pCoverageSampleCount = coverageSampleCount; + } + + // In case we are rendering to color only, we make sure to set the DepthSampleCount to CoverageSampleCount. + // CoverageSampleCount is really the ColorSampleCount in this case. This makes sure we have a consistent + // sample count and that we get correct MSAA behavior. + // Similar thing for when we are rendering to depth only. The expectation in that case is that all + // sample counts should match. + // This shouldn't interfere with EQAA. For EQAA, if ColorSampleCount is not equal to DepthSampleCount + // and they are both greater than one, then we do not force them to match. + + if (pColorSampleCount != nullptr) + { + uint32_t colorSampleCount = (pRenderPass != VK_NULL_HANDLE) ? + pRenderPass->GetSubpassColorSampleCount(subpass) : rasterizationSampleCount; + + colorSampleCount = (colorSampleCount == 0) ? coverageSampleCount : colorSampleCount; + + *pColorSampleCount = colorSampleCount; + } + + if (pDepthSampleCount != nullptr) + { + uint32_t depthSampleCount = (pRenderPass != VK_NULL_HANDLE) ? + pRenderPass->GetSubpassDepthSampleCount(subpass) : rasterizationSampleCount; + + depthSampleCount = (depthSampleCount == 0) ? coverageSampleCount : depthSampleCount; + + *pDepthSampleCount = depthSampleCount; + } +} + // ===================================================================================================================== static VkFormat GetDepthFormat( const RenderPass* pRenderPass, - const uint32_t subpassIndex + const uint32_t subpassIndex, + const VkPipelineRenderingCreateInfoKHR* pPipelineRenderingCreateInfoKHR ) { VkFormat format = VK_FORMAT_UNDEFINED; @@ -232,6 +286,12 @@ static VkFormat GetDepthFormat( { format = pRenderPass->GetDepthStencilAttachmentFormat(subpassIndex); } + else if (pPipelineRenderingCreateInfoKHR != nullptr) + { + format = (pPipelineRenderingCreateInfoKHR->depthAttachmentFormat != VK_FORMAT_UNDEFINED) ? + pPipelineRenderingCreateInfoKHR->depthAttachmentFormat : + pPipelineRenderingCreateInfoKHR->stencilAttachmentFormat; + } return format; } @@ -239,10 +299,12 @@ static VkFormat GetDepthFormat( // ===================================================================================================================== static uint32_t GetColorAttachmentCount( const RenderPass* pRenderPass, - const uint32_t subpassIndex + const uint32_t subpassIndex, + const VkPipelineRenderingCreateInfoKHR* pPipelineRenderingCreateInfoKHR ) { return (pRenderPass != nullptr) ? pRenderPass->GetSubpassColorReferenceCount(subpassIndex) : + (pPipelineRenderingCreateInfoKHR != nullptr) ? pPipelineRenderingCreateInfoKHR->colorAttachmentCount : 0u; } @@ -288,6 +350,15 @@ uint32_t GraphicsPipelineCommon::GetDynamicStateFlags( { switch (static_cast(pDy->pDynamicStates[i])) { + case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT: + dynamicState |= viiMask & (1 << static_cast(DynamicStatesInternal::PrimitiveTopologyExt)); + break; + case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT: + dynamicState |= viiMask & (1 << static_cast(DynamicStatesInternal::VertexInputBindingStrideExt)); + break; + case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT: + dynamicState |= viiMask & (1 << static_cast(DynamicStatesInternal::PrimitiveRestartEnableExt)); + break; case VK_DYNAMIC_STATE_VIEWPORT: dynamicState |= prsMask & (1 << static_cast(DynamicStatesInternal::Viewport)); break; @@ -300,33 +371,9 @@ uint32_t GraphicsPipelineCommon::GetDynamicStateFlags( case VK_DYNAMIC_STATE_DEPTH_BIAS: dynamicState |= prsMask & (1 << static_cast(DynamicStatesInternal::DepthBias)); break; - case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT: - dynamicState |= prsMask & (1 << static_cast(DynamicStatesInternal::DepthBiasEnableExt)); - break; - case VK_DYNAMIC_STATE_BLEND_CONSTANTS: - dynamicState |= foiMask & (1 << static_cast(DynamicStatesInternal::BlendConstants)); - break; - case VK_DYNAMIC_STATE_DEPTH_BOUNDS: - dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::DepthBounds)); - break; - case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: - dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::StencilCompareMask)); - break; - case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: - dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::StencilWriteMask)); - break; - case VK_DYNAMIC_STATE_STENCIL_REFERENCE: - dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::StencilReference)); - break; - case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: - dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::SampleLocationsExt)); - break; case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT: dynamicState |= prsMask & (1 << static_cast(DynamicStatesInternal::LineStippleExt)); break; - case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR: - dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::FragmentShadingRateStateKhr)); - break; case VK_DYNAMIC_STATE_CULL_MODE_EXT: dynamicState |= prsMask & (1 << static_cast(DynamicStatesInternal::CullModeExt)); break; @@ -341,14 +388,26 @@ uint32_t GraphicsPipelineCommon::GetDynamicStateFlags( dynamicState |= prsMask & (1 << static_cast(DynamicStatesInternal::ScissorCount)); dynamicState |= prsMask & (1 << static_cast(DynamicStatesInternal::Scissor)); break; - case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT: - dynamicState |= viiMask & (1 << static_cast(DynamicStatesInternal::PrimitiveTopologyExt)); + case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT: + dynamicState |= prsMask & (1 << static_cast(DynamicStatesInternal::DepthBiasEnableExt)); break; - case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT: - dynamicState |= viiMask & (1 << static_cast(DynamicStatesInternal::VertexInputBindingStrideExt)); + case VK_DYNAMIC_STATE_DEPTH_BOUNDS: + dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::DepthBounds)); break; - case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT: - dynamicState |= viiMask & (1 << static_cast(DynamicStatesInternal::PrimitiveRestartEnableExt)); + case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: + dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::StencilCompareMask)); + break; + case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: + dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::StencilWriteMask)); + break; + case VK_DYNAMIC_STATE_STENCIL_REFERENCE: + dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::StencilReference)); + break; + case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: + dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::SampleLocationsExt)); + break; + case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR: + dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::FragmentShadingRateStateKhr)); break; case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT: dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::DepthTestEnableExt)); @@ -368,14 +427,17 @@ uint32_t GraphicsPipelineCommon::GetDynamicStateFlags( case VK_DYNAMIC_STATE_STENCIL_OP_EXT: dynamicState |= fgsMask & (1 << static_cast(DynamicStatesInternal::StencilOpExt)); break; - case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: - dynamicState |= foiMask & (1 << static_cast(DynamicStatesInternal::ColorWriteEnableExt)); + case VK_DYNAMIC_STATE_BLEND_CONSTANTS: + dynamicState |= foiMask & (1 << static_cast(DynamicStatesInternal::BlendConstants)); break; case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT: dynamicState |= foiMask & (1 << static_cast(DynamicStatesInternal::RasterizerDiscardEnableExt)); break; + case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: + dynamicState |= foiMask & (1 << static_cast(DynamicStatesInternal::ColorWriteEnableExt)); + break; default: - // skip unknown dynamic state + VK_ASSERT(!"Unknown dynamic state"); break; } } @@ -574,6 +636,7 @@ static void BuildRasterizationState( pNext = pHeader->pNext; } +#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 691 // For optimal performance, depth clamping should be enabled by default. Only disable it if dealing // with depth values outside of [0.0, 1.0] range. // Note that this is the opposite of the default Vulkan setting which is depthClampEnable = false. @@ -590,6 +653,30 @@ static void BuildRasterizationState( // Clipping is updated in pipeline compiler. pInfo->pipeline.rsState.depthClampDisable = false; } +#else + if (pRs->depthClampEnable == VK_FALSE) + { + // For optimal performance, depth clamping should be enabled by default, even if API says otherwise. + // Only disable it if dealing with depth values outside of [0.0, 1.0] range. + // Otherwise clamp to [0.0, 1.0] interval. + if(pDevice->IsExtensionEnabled(DeviceExtensions::EXT_DEPTH_RANGE_UNRESTRICTED) || + ((pInfo->pipeline.viewportInfo.depthClipNearEnable == false) && + (pInfo->pipeline.viewportInfo.depthClipFarEnable == false))) + { + pInfo->pipeline.rsState.DepthClampMode = Pal::DepthClampMode::None; + } + else + { + pInfo->pipeline.rsState.DepthClampMode = Pal::DepthClampMode::ZeroToOne; + } + } + else + { + // When depth clamping is enabled, depth clipping should be disabled, and vice versa. + // Clipping is updated in pipeline compiler. + pInfo->pipeline.rsState.DepthClampMode = Pal::DepthClampMode::Viewport; + } +#endif pInfo->pipeline.rsState.pointCoordOrigin = Pal::PointOrigin::UpperLeft; pInfo->pipeline.rsState.shadeMode = Pal::ShadeMode::Flat; @@ -733,33 +820,12 @@ static void BuildMultisampleState( pInfo->flags.customSampleLocations = ((pPipelineSampleLocationsStateCreateInfoEXT != nullptr) && (pPipelineSampleLocationsStateCreateInfoEXT->sampleLocationsEnable)); - uint32_t rasterizationSampleCount = pMs->rasterizationSamples; - - uint32_t subpassCoverageSampleCount = rasterizationSampleCount; - uint32_t subpassColorSampleCount = rasterizationSampleCount; - uint32_t subpassDepthSampleCount = rasterizationSampleCount; - - if (pRenderPass != VK_NULL_HANDLE) - { - subpassCoverageSampleCount = pRenderPass->GetSubpassMaxSampleCount(subpass); - subpassColorSampleCount = pRenderPass->GetSubpassColorSampleCount(subpass); - subpassDepthSampleCount = pRenderPass->GetSubpassDepthSampleCount(subpass); - } - - // subpassCoverageSampleCount would be equal to zero if there are zero attachments. - subpassCoverageSampleCount = subpassCoverageSampleCount == 0 ? rasterizationSampleCount : subpassCoverageSampleCount; - - // In case we are rendering to color only, we make sure to set the DepthSampleCount to CoverageSampleCount. - // CoverageSampleCount is really the ColorSampleCount in this case. This makes sure we have a consistent - // sample count and that we get correct MSAA behavior. - // Similar thing for when we are rendering to depth only. The expectation in that case is that all - // sample counts should match. - // This shouldn't interfere with EQAA. For EQAA, if ColorSampleCount is not equal to DepthSampleCount - // and they are both greater than one, then we do not force them to match. - subpassColorSampleCount = subpassColorSampleCount == 0 ? subpassCoverageSampleCount : subpassColorSampleCount; - subpassDepthSampleCount = subpassDepthSampleCount == 0 ? subpassCoverageSampleCount : subpassDepthSampleCount; - - VK_ASSERT(rasterizationSampleCount == subpassCoverageSampleCount); + uint32_t subpassCoverageSampleCount; + uint32_t subpassColorSampleCount; + uint32_t subpassDepthSampleCount; + GraphicsPipelineCommon::GetSubpassSampleCount( + pMs, pRenderPass, subpass, + &subpassCoverageSampleCount, &subpassColorSampleCount, &subpassDepthSampleCount); pInfo->msaa.coverageSamples = subpassCoverageSampleCount; pInfo->msaa.exposedSamples = subpassCoverageSampleCount; @@ -803,7 +869,8 @@ static void BuildMultisampleState( &pPipelineSampleLocationsStateCreateInfoEXT->sampleLocationsInfo, &pInfo->immedInfo.samplePattern.locations); - VK_ASSERT(pInfo->immedInfo.samplePattern.sampleCount == rasterizationSampleCount); + VK_ASSERT(pInfo->immedInfo.samplePattern.sampleCount == + static_cast(pMs->rasterizationSamples)); pInfo->staticStateMask |= (1 << static_cast(DynamicStatesInternal::SampleLocationsExt)); @@ -812,9 +879,9 @@ static void BuildMultisampleState( else { // We store the standard sample locations if custom sample locations are not enabled. - pInfo->immedInfo.samplePattern.sampleCount = rasterizationSampleCount; + pInfo->immedInfo.samplePattern.sampleCount = pMs->rasterizationSamples; pInfo->immedInfo.samplePattern.locations = - *Device::GetDefaultQuadSamplePattern(rasterizationSampleCount); + *Device::GetDefaultQuadSamplePattern(pMs->rasterizationSamples); pInfo->staticStateMask |= 1 << static_cast(DynamicStatesInternal::SampleLocationsExt); @@ -888,6 +955,7 @@ static void BuildDepthStencilState( // ===================================================================================================================== static void BuildColorBlendState( const Device* pDevice, + const VkPipelineRenderingCreateInfoKHR* pRendering, const VkPipelineColorBlendStateCreateInfo* pCb, const RenderPass* pRenderPass, const uint32_t subpass, @@ -944,6 +1012,11 @@ static void BuildColorBlendState( const VkFormat cbFormat = pRenderPass->GetColorAttachmentFormat(subpass, i); pCbDst->swizzledFormat = VkToPalFormat(cbFormat, pDevice->GetRuntimeSettings()); } + else if (pRendering != nullptr) + { + const VkFormat cbFormat = pRendering->pColorAttachmentFormats[i]; + pCbDst->swizzledFormat = VkToPalFormat(cbFormat, pDevice->GetRuntimeSettings()); + } // If the sub pass attachment format is UNDEFINED, then it means that that subpass does not // want to write to any attachment for that output (VK_ATTACHMENT_UNUSED). Under such cases, // disable shader writes through that target. @@ -991,14 +1064,15 @@ static void BuildColorBlendState( // ===================================================================================================================== static void BuildRenderingState( const Device* pDevice, + const VkPipelineRenderingCreateInfoKHR* pRendering, + const VkPipelineColorBlendStateCreateInfo* pCb, const RenderPass* pRenderPass, GraphicsPipelineObjectCreateInfo* pInfo) { pInfo->pipeline.viewInstancingDesc = {}; - if (((pRenderPass != nullptr) && - pRenderPass->IsMultiviewEnabled()) - ) + if (((pRenderPass != nullptr) && pRenderPass->IsMultiviewEnabled()) || + ((pRendering != nullptr) && (Util::CountSetBits(pRendering->viewMask)!= 0))) { pInfo->pipeline.viewInstancingDesc.viewInstanceCount = Pal::MaxViewInstanceCount; pInfo->pipeline.viewInstancingDesc.enableMasking = true; @@ -1101,14 +1175,8 @@ static void BuildFragmentShaderState( const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pIn->renderPass); const uint32_t subpass = pIn->subpass; - // Build states via VkPipelineMultisampleStateCreateInfo - BuildMultisampleState(pIn->pMultisampleState, pRenderPass, subpass, dynamicStateFlags, pInfo); - - if (GetDepthFormat(pRenderPass, subpass) != VK_FORMAT_UNDEFINED) - { - // Build states via VkPipelineDepthStencilStateCreateInfo - BuildDepthStencilState(pIn->pDepthStencilState, dynamicStateFlags, pInfo); - } + // Build states via VkPipelineDepthStencilStateCreateInfo + BuildDepthStencilState(pIn->pDepthStencilState, dynamicStateFlags, pInfo); if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::DepthTestEnableExt) == false) { @@ -1142,13 +1210,24 @@ static void BuildFragmentOutputInterfaceState( const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pIn->renderPass); const uint32_t subpass = pIn->subpass; - pInfo->dbFormat = GetDepthFormat(pRenderPass, subpass); + // Build states via VkPipelineMultisampleStateCreateInfo + BuildMultisampleState(pIn->pMultisampleState, pRenderPass, subpass, dynamicStateFlags, pInfo); + + // Extract VkPipelineRenderingFormatCreateInfoKHR for VK_KHR_dynamic_rendering extension + EXTRACT_VK_STRUCTURES_0( + renderingCreateInfo, + PipelineRenderingCreateInfoKHR, + static_cast(pIn->pNext), + PIPELINE_RENDERING_CREATE_INFO_KHR); + + pInfo->dbFormat = GetDepthFormat(pRenderPass, subpass, pPipelineRenderingCreateInfoKHR); - if (GetColorAttachmentCount(pRenderPass, subpass) != 0) + if (GetColorAttachmentCount(pRenderPass, subpass, pPipelineRenderingCreateInfoKHR) != 0) { // Build states via VkPipelineColorBlendStateCreateInfo BuildColorBlendState( - pDevice, + pDevice, + pPipelineRenderingCreateInfoKHR, pIn->pColorBlendState, pRenderPass, subpass, @@ -1157,6 +1236,8 @@ static void BuildFragmentOutputInterfaceState( } BuildRenderingState(pDevice, + pPipelineRenderingCreateInfoKHR, + pIn->pColorBlendState, pRenderPass, pInfo); @@ -1887,8 +1968,14 @@ void GraphicsPipelineCommon::GenerateHashForFragmentShaderState( const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pCreateInfo->renderPass); + EXTRACT_VK_STRUCTURES_0( + renderingCreateInfo, + PipelineRenderingCreateInfoKHR, + static_cast(pCreateInfo->pNext), + PIPELINE_RENDERING_CREATE_INFO_KHR); + if ((pCreateInfo->pDepthStencilState != nullptr) && - (GetDepthFormat(pRenderPass, pCreateInfo->subpass) != VK_FORMAT_UNDEFINED)) + (GetDepthFormat(pRenderPass, pCreateInfo->subpass, pPipelineRenderingCreateInfoKHR) != VK_FORMAT_UNDEFINED)) { GenerateHashFromDepthStencilStateCreateInfo(*pCreateInfo->pDepthStencilState, pApiHasher); } @@ -1914,11 +2001,28 @@ void GraphicsPipelineCommon::GenerateHashForFragmentOutputInterfaceState( Util::MetroHash128* pBaseHasher, Util::MetroHash128* pApiHasher) { + EXTRACT_VK_STRUCTURES_0( + renderingCreateInfo, + PipelineRenderingCreateInfoKHR, + static_cast(pCreateInfo->pNext), + PIPELINE_RENDERING_CREATE_INFO_KHR); + + if (pPipelineRenderingCreateInfoKHR != nullptr) + { + pApiHasher->Update(pPipelineRenderingCreateInfoKHR->viewMask); + pApiHasher->Update(pPipelineRenderingCreateInfoKHR->colorAttachmentCount); + for (uint32_t i = 0; i < pPipelineRenderingCreateInfoKHR->colorAttachmentCount; ++i) + { + pApiHasher->Update(pPipelineRenderingCreateInfoKHR->pColorAttachmentFormats[i]); + } + pApiHasher->Update(pPipelineRenderingCreateInfoKHR->depthAttachmentFormat); + pApiHasher->Update(pPipelineRenderingCreateInfoKHR->stencilAttachmentFormat); + } const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pCreateInfo->renderPass); if ((pCreateInfo->pColorBlendState != nullptr) && - (GetColorAttachmentCount(pRenderPass, pCreateInfo->subpass) != 0)) + (GetColorAttachmentCount(pRenderPass, pCreateInfo->subpass, pPipelineRenderingCreateInfoKHR) != 0)) { GenerateHashFromColorBlendStateCreateInfo(*pCreateInfo->pColorBlendState, pBaseHasher, pApiHasher); diff --git a/icd/api/include/app_profile.h b/icd/api/include/app_profile.h index 907adc7c..8f77d835 100644 --- a/icd/api/include/app_profile.h +++ b/icd/api/include/app_profile.h @@ -87,6 +87,8 @@ enum class AppProfile : uint32_t Quake2RTX, // Quake2 RTX Valheim, // Valheim by Coffee Stain Studios WolfensteinCyberpilot, // Wolfenstein Cyberpilot by Machine Games + EvilGenius2, // Evil Genius 2 + KnockoutCity, // Knockout City IdTechEngine, // id Tech Engine (Default) Feral3DEngine, // Feral3D Engine (Default) diff --git a/icd/api/include/compiler_solution.h b/icd/api/include/compiler_solution.h index a2418a5b..bda77b5f 100644 --- a/icd/api/include/compiler_solution.h +++ b/icd/api/include/compiler_solution.h @@ -91,7 +91,6 @@ struct GraphicsPipelineBinaryCreateInfo size_t mappingBufferSize; VkPipelineCreateFlags flags; VkFormat dbFormat; - VkExtent2D sampleLocationGridSize; PipelineOptimizerKey pipelineProfileKey; PipelineCompilerType compilerType; FreeCompilerBinary freeCompilerBinary; @@ -143,6 +142,10 @@ class CompilerSolution ShaderModuleHandle* pShaderModule, const Util::MetroHash::Hash& hash) = 0; + virtual void TryEarlyCompileShaderModule( + const Device* pDevice, + ShaderModuleHandle* pShaderModule) = 0; + virtual void FreeShaderModule(ShaderModuleHandle* pShaderModule) = 0; virtual VkResult CreateGraphicsPipelineBinary( @@ -158,6 +161,12 @@ class CompilerSolution Util::MetroHash::Hash* pCacheId, int64_t* pCompileTime) = 0; + virtual VkResult CreateGraphicsShaderBinary( + const Device* pDevice, + const ShaderStage stage, + const GraphicsPipelineBinaryCreateInfo* pCreateInfo, + ShaderModuleHandle* pShaderModule) = 0; + virtual VkResult CreateComputePipelineBinary( Device* pDevice, uint32_t deviceIdx, diff --git a/icd/api/include/compiler_solution_llpc.h b/icd/api/include/compiler_solution_llpc.h index 17e399a9..90f054bf 100644 --- a/icd/api/include/compiler_solution_llpc.h +++ b/icd/api/include/compiler_solution_llpc.h @@ -67,6 +67,10 @@ class CompilerSolutionLlpc final : public CompilerSolution ShaderModuleHandle* pShaderModule, const Util::MetroHash::Hash& hash) override; + virtual void TryEarlyCompileShaderModule( + const Device* pDevice, + ShaderModuleHandle* pModule) override { } + virtual void FreeShaderModule(ShaderModuleHandle* pShaderModule) override; virtual VkResult CreateGraphicsPipelineBinary( @@ -82,6 +86,12 @@ class CompilerSolutionLlpc final : public CompilerSolution Util::MetroHash::Hash* pCacheId, int64_t* pCompileTime) override; + virtual VkResult CreateGraphicsShaderBinary( + const Device* pDevice, + const ShaderStage stage, + const GraphicsPipelineBinaryCreateInfo* pCreateInfo, + ShaderModuleHandle* pShaderModule) override { return VK_SUCCESS; } + virtual VkResult CreateComputePipelineBinary( Device* pDevice, uint32_t deviceIdx, diff --git a/icd/api/include/graphics_pipeline_common.h b/icd/api/include/graphics_pipeline_common.h index d0a94aeb..9f3fc11a 100644 --- a/icd/api/include/graphics_pipeline_common.h +++ b/icd/api/include/graphics_pipeline_common.h @@ -38,6 +38,7 @@ namespace vk { class PipelineCache; +class RenderPass; struct PipelineOptimizerKey; struct GraphicsPipelineBinaryCreateInfo; struct GraphicsPipelineShaderStageInfo; @@ -128,7 +129,6 @@ struct GraphicsPipelineObjectCreateInfo Pal::GraphicsPipelineCreateInfo pipeline; Pal::MsaaStateCreateInfo msaa; Pal::ColorBlendStateCreateInfo blend; - Pal::DepthStencilStateCreateInfo ds; GraphicsPipelineObjectImmedInfo immedInfo; uint32_t staticStateMask; uint32_t sampleCoverage; @@ -148,7 +148,8 @@ struct GraphicsPipelineObjectCreateInfo uint32_t customSampleLocations : 1; uint32_t force1x1ShaderRate : 1; uint32_t sampleShadingEnable : 1; - uint32_t reserved : 23; + uint32_t isPointSizeUsed : 1; + uint32_t reserved : 22; }; uint32_t value; } flags; @@ -188,6 +189,15 @@ class GraphicsPipelineCommon : public Pipeline // Returns true if src alpha is used in blending static bool IsSrcAlphaUsedInBlend(VkBlendFactor blend); + // Get sample count from multisample state or render pass + static void GetSubpassSampleCount( + const VkPipelineMultisampleStateCreateInfo* pMs, + const RenderPass* pRenderPass, + const uint32_t subpass, + uint32_t* pCoverageSampleCount, + uint32_t* pColorSampleCount, + uint32_t* pDepthSampleCount); + // Get the dynamics states specified by API info static uint32_t GetDynamicStateFlags( const VkPipelineDynamicStateCreateInfo* pDy diff --git a/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std.h b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std.h index f8c0cef4..3338fe14 100644 --- a/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std.h +++ b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std.h @@ -14,170 +14,182 @@ extern "C" { #include "vk_video/vulkan_video_codecs_common.h" // Vulkan 0.9 provisional Vulkan video H.264 encode and decode std specification version number -#define VK_STD_VULKAN_VIDEO_CODEC_H264_API_VERSION_0_9 VK_MAKE_VIDEO_STD_VERSION(0, 9, 0) // Patch version should always be set to 0 +#define VK_STD_VULKAN_VIDEO_CODEC_H264_API_VERSION_0_9_5 VK_MAKE_VIDEO_STD_VERSION(0, 9, 5) // Patch version should always be set to 0 // Format must be in the form XX.XX where the first two digits are the major and the second two, the minor. -#define VK_STD_VULKAN_VIDEO_CODEC_H264_SPEC_VERSION VK_STD_VULKAN_VIDEO_CODEC_H264_API_VERSION_0_9 +#define VK_STD_VULKAN_VIDEO_CODEC_H264_SPEC_VERSION VK_STD_VULKAN_VIDEO_CODEC_H264_API_VERSION_0_9_5 #define VK_STD_VULKAN_VIDEO_CODEC_H264_EXTENSION_NAME "VK_STD_vulkan_video_codec_h264" // ************************************************* // Video H.264 common definitions: // ************************************************* +#define STD_VIDEO_H264_CPB_CNT_LIST_SIZE 32 +#define STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS 6 +#define STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS 16 +#define STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS 2 +#define STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS 64 + typedef enum StdVideoH264ChromaFormatIdc { - std_video_h264_chroma_format_idc_monochrome = 0, - std_video_h264_chroma_format_idc_420 = 1, - std_video_h264_chroma_format_idc_422 = 2, - std_video_h264_chroma_format_idc_444 = 3, + STD_VIDEO_H264_CHROMA_FORMAT_IDC_MONOCHROME = 0, + STD_VIDEO_H264_CHROMA_FORMAT_IDC_420 = 1, + STD_VIDEO_H264_CHROMA_FORMAT_IDC_422 = 2, + STD_VIDEO_H264_CHROMA_FORMAT_IDC_444 = 3, + STD_VIDEO_H264_CHROMA_FORMAT_IDC_INVALID = 0x7FFFFFFF } StdVideoH264ChromaFormatIdc; typedef enum StdVideoH264ProfileIdc { - std_video_h264_profile_idc_baseline = 66, /* Only constrained baseline is supported */ - std_video_h264_profile_idc_main = 77, - std_video_h264_profile_idc_high = 100, - std_video_h264_profile_idc_high_444_predictive = 244, - std_video_h264_profile_idc_invalid = 0x7FFFFFFF + STD_VIDEO_H264_PROFILE_IDC_BASELINE = 66, /* Only constrained baseline is supported */ + STD_VIDEO_H264_PROFILE_IDC_MAIN = 77, + STD_VIDEO_H264_PROFILE_IDC_HIGH = 100, + STD_VIDEO_H264_PROFILE_IDC_HIGH_444_PREDICTIVE = 244, + STD_VIDEO_H264_PROFILE_IDC_INVALID = 0x7FFFFFFF } StdVideoH264ProfileIdc; typedef enum StdVideoH264Level { - std_video_h264_level_1_0 = 0, - std_video_h264_level_1_1 = 1, - std_video_h264_level_1_2 = 2, - std_video_h264_level_1_3 = 3, - std_video_h264_level_2_0 = 4, - std_video_h264_level_2_1 = 5, - std_video_h264_level_2_2 = 6, - std_video_h264_level_3_0 = 7, - std_video_h264_level_3_1 = 8, - std_video_h264_level_3_2 = 9, - std_video_h264_level_4_0 = 10, - std_video_h264_level_4_1 = 11, - std_video_h264_level_4_2 = 12, - std_video_h264_level_5_0 = 13, - std_video_h264_level_5_1 = 14, - std_video_h264_level_5_2 = 15, - std_video_h264_level_6_0 = 16, - std_video_h264_level_6_1 = 17, - std_video_h264_level_6_2 = 18, - std_video_h264_level_invalid = 0x7FFFFFFF + STD_VIDEO_H264_LEVEL_1_0 = 0, + STD_VIDEO_H264_LEVEL_1_1 = 1, + STD_VIDEO_H264_LEVEL_1_2 = 2, + STD_VIDEO_H264_LEVEL_1_3 = 3, + STD_VIDEO_H264_LEVEL_2_0 = 4, + STD_VIDEO_H264_LEVEL_2_1 = 5, + STD_VIDEO_H264_LEVEL_2_2 = 6, + STD_VIDEO_H264_LEVEL_3_0 = 7, + STD_VIDEO_H264_LEVEL_3_1 = 8, + STD_VIDEO_H264_LEVEL_3_2 = 9, + STD_VIDEO_H264_LEVEL_4_0 = 10, + STD_VIDEO_H264_LEVEL_4_1 = 11, + STD_VIDEO_H264_LEVEL_4_2 = 12, + STD_VIDEO_H264_LEVEL_5_0 = 13, + STD_VIDEO_H264_LEVEL_5_1 = 14, + STD_VIDEO_H264_LEVEL_5_2 = 15, + STD_VIDEO_H264_LEVEL_6_0 = 16, + STD_VIDEO_H264_LEVEL_6_1 = 17, + STD_VIDEO_H264_LEVEL_6_2 = 18, + STD_VIDEO_H264_LEVEL_INVALID = 0x7FFFFFFF } StdVideoH264Level; typedef enum StdVideoH264PocType { - std_video_h264_poc_type_0 = 0, - std_video_h264_poc_type_1 = 1, - std_video_h264_poc_type_2 = 2, - std_video_h264_poc_type_invalid = 0x7FFFFFFF + STD_VIDEO_H264_POC_TYPE_0 = 0, + STD_VIDEO_H264_POC_TYPE_1 = 1, + STD_VIDEO_H264_POC_TYPE_2 = 2, + STD_VIDEO_H264_POC_TYPE_INVALID = 0x7FFFFFFF } StdVideoH264PocType; typedef enum StdVideoH264AspectRatioIdc { - std_video_h264_aspect_ratio_idc_unspecified = 0, - std_video_h264_aspect_ratio_idc_square = 1, - std_video_h264_aspect_ratio_idc_12_11 = 2, - std_video_h264_aspect_ratio_idc_10_11 = 3, - std_video_h264_aspect_ratio_idc_16_11 = 4, - std_video_h264_aspect_ratio_idc_40_33 = 5, - std_video_h264_aspect_ratio_idc_24_11 = 6, - std_video_h264_aspect_ratio_idc_20_11 = 7, - std_video_h264_aspect_ratio_idc_32_11 = 8, - std_video_h264_aspect_ratio_idc_80_33 = 9, - std_video_h264_aspect_ratio_idc_18_11 = 10, - std_video_h264_aspect_ratio_idc_15_11 = 11, - std_video_h264_aspect_ratio_idc_64_33 = 12, - std_video_h264_aspect_ratio_idc_160_99 = 13, - std_video_h264_aspect_ratio_idc_4_3 = 14, - std_video_h264_aspect_ratio_idc_3_2 = 15, - std_video_h264_aspect_ratio_idc_2_1 = 16, - std_video_h264_aspect_ratio_idc_extended_sar = 255, - std_video_h264_aspect_ratio_idc_invalid = 0x7FFFFFFF + STD_VIDEO_H264_ASPECT_RATIO_IDC_UNSPECIFIED = 0, + STD_VIDEO_H264_ASPECT_RATIO_IDC_SQUARE = 1, + STD_VIDEO_H264_ASPECT_RATIO_IDC_12_11 = 2, + STD_VIDEO_H264_ASPECT_RATIO_IDC_10_11 = 3, + STD_VIDEO_H264_ASPECT_RATIO_IDC_16_11 = 4, + STD_VIDEO_H264_ASPECT_RATIO_IDC_40_33 = 5, + STD_VIDEO_H264_ASPECT_RATIO_IDC_24_11 = 6, + STD_VIDEO_H264_ASPECT_RATIO_IDC_20_11 = 7, + STD_VIDEO_H264_ASPECT_RATIO_IDC_32_11 = 8, + STD_VIDEO_H264_ASPECT_RATIO_IDC_80_33 = 9, + STD_VIDEO_H264_ASPECT_RATIO_IDC_18_11 = 10, + STD_VIDEO_H264_ASPECT_RATIO_IDC_15_11 = 11, + STD_VIDEO_H264_ASPECT_RATIO_IDC_64_33 = 12, + STD_VIDEO_H264_ASPECT_RATIO_IDC_160_99 = 13, + STD_VIDEO_H264_ASPECT_RATIO_IDC_4_3 = 14, + STD_VIDEO_H264_ASPECT_RATIO_IDC_3_2 = 15, + STD_VIDEO_H264_ASPECT_RATIO_IDC_2_1 = 16, + STD_VIDEO_H264_ASPECT_RATIO_IDC_EXTENDED_SAR = 255, + STD_VIDEO_H264_ASPECT_RATIO_IDC_INVALID = 0x7FFFFFFF } StdVideoH264AspectRatioIdc; -typedef enum StdVideoH264WeightedBiPredIdc { - std_video_h264_default_weighted_b_slices_prediction_idc = 0, - std_video_h264_explicit_weighted_b_slices_prediction_idc = 1, - std_video_h264_implicit_weighted_b_slices_prediction_idc = 2, - std_video_h264_invalid_weighted_b_slices_prediction_idc = 0x7FFFFFFF -} StdVideoH264WeightedBiPredIdc; +typedef enum StdVideoH264WeightedBipredIdc { + STD_VIDEO_H264_WEIGHTED_BIPRED_IDC_DEFAULT = 0, + STD_VIDEO_H264_WEIGHTED_BIPRED_IDC_EXPLICIT = 1, + STD_VIDEO_H264_WEIGHTED_BIPRED_IDC_IMPLICIT = 2, + STD_VIDEO_H264_WEIGHTED_BIPRED_IDC_INVALID = 0x7FFFFFFF +} StdVideoH264WeightedBipredIdc; typedef enum StdVideoH264ModificationOfPicNumsIdc { - std_video_h264_modification_of_pic_nums_idc_short_term_subtract = 0, - std_video_h264_modification_of_pic_nums_idc_short_term_add = 1, - std_video_h264_modification_of_pic_nums_idc_long_term = 2, - std_video_h264_modification_of_pic_nums_idc_end = 3, - std_video_h264_modification_of_pic_nums_idc_invalid = 0x7FFFFFFF + STD_VIDEO_H264_MODIFICATION_OF_PIC_NUMS_IDC_SHORT_TERM_SUBTRACT = 0, + STD_VIDEO_H264_MODIFICATION_OF_PIC_NUMS_IDC_SHORT_TERM_ADD = 1, + STD_VIDEO_H264_MODIFICATION_OF_PIC_NUMS_IDC_LONG_TERM = 2, + STD_VIDEO_H264_MODIFICATION_OF_PIC_NUMS_IDC_END = 3, + STD_VIDEO_H264_MODIFICATION_OF_PIC_NUMS_IDC_INVALID = 0x7FFFFFFF } StdVideoH264ModificationOfPicNumsIdc; typedef enum StdVideoH264MemMgmtControlOp { - std_video_h264_mem_mgmt_control_op_end = 0, - std_video_h264_mem_mgmt_control_op_unmark_short_term = 1, - std_video_h264_mem_mgmt_control_op_unmark_long_term = 2, - std_video_h264_mem_mgmt_control_op_mark_long_term = 3, - std_video_h264_mem_mgmt_control_op_set_max_long_term_index = 4, - std_video_h264_mem_mgmt_control_op_unmark_all = 5, - std_video_h264_mem_mgmt_control_op_mark_current_as_long_term = 6, - std_video_h264_mem_mgmt_control_op_invalid = 0x7FFFFFFF + STD_VIDEO_H264_MEM_MGMT_CONTROL_OP_END = 0, + STD_VIDEO_H264_MEM_MGMT_CONTROL_OP_UNMARK_SHORT_TERM = 1, + STD_VIDEO_H264_MEM_MGMT_CONTROL_OP_UNMARK_LONG_TERM = 2, + STD_VIDEO_H264_MEM_MGMT_CONTROL_OP_MARK_LONG_TERM = 3, + STD_VIDEO_H264_MEM_MGMT_CONTROL_OP_SET_MAX_LONG_TERM_INDEX = 4, + STD_VIDEO_H264_MEM_MGMT_CONTROL_OP_UNMARK_ALL = 5, + STD_VIDEO_H264_MEM_MGMT_CONTROL_OP_MARK_CURRENT_AS_LONG_TERM = 6, + STD_VIDEO_H264_MEM_MGMT_CONTROL_OP_INVALID = 0x7FFFFFFF } StdVideoH264MemMgmtControlOp; typedef enum StdVideoH264CabacInitIdc { - std_video_h264_cabac_init_idc_0 = 0, - std_video_h264_cabac_init_idc_1 = 1, - std_video_h264_cabac_init_idc_2 = 2, - std_video_h264_cabac_init_idc_invalid = 0x7FFFFFFF + STD_VIDEO_H264_CABAC_INIT_IDC_0 = 0, + STD_VIDEO_H264_CABAC_INIT_IDC_1 = 1, + STD_VIDEO_H264_CABAC_INIT_IDC_2 = 2, + STD_VIDEO_H264_CABAC_INIT_IDC_INVALID = 0x7FFFFFFF } StdVideoH264CabacInitIdc; typedef enum StdVideoH264DisableDeblockingFilterIdc { - std_video_h264_disable_deblocking_filter_idc_disabled = 0, - std_video_h264_disable_deblocking_filter_idc_enabled = 1, - std_video_h264_disable_deblocking_filter_idc_partial = 2, - std_video_h264_disable_deblocking_filter_idc_invalid = 0x7FFFFFFF + STD_VIDEO_H264_DISABLE_DEBLOCKING_FILTER_IDC_DISABLED = 0, + STD_VIDEO_H264_DISABLE_DEBLOCKING_FILTER_IDC_ENABLED = 1, + STD_VIDEO_H264_DISABLE_DEBLOCKING_FILTER_IDC_PARTIAL = 2, + STD_VIDEO_H264_DISABLE_DEBLOCKING_FILTER_IDC_INVALID = 0x7FFFFFFF } StdVideoH264DisableDeblockingFilterIdc; -typedef enum StdVideoH264PictureType { - std_video_h264_picture_type_i = 0, - std_video_h264_picture_type_p = 1, - std_video_h264_picture_type_b = 2, - std_video_h264_picture_type_invalid = 0x7FFFFFFF -} StdVideoH264PictureType; - typedef enum StdVideoH264SliceType { - std_video_h264_slice_type_i = 0, - std_video_h264_slice_type_p = 1, - std_video_h264_slice_type_b = 2, - std_video_h264_slice_type_invalid = 0x7FFFFFFF + STD_VIDEO_H264_SLICE_TYPE_P = 0, + STD_VIDEO_H264_SLICE_TYPE_B = 1, + STD_VIDEO_H264_SLICE_TYPE_I = 2, + // reserved STD_VIDEO_H264_SLICE_TYPE_SP = 3, + // reserved STD_VIDEO_H264_SLICE_TYPE_SI = 4, + STD_VIDEO_H264_SLICE_TYPE_INVALID = 0x7FFFFFFF } StdVideoH264SliceType; +typedef enum StdVideoH264PictureType { + STD_VIDEO_H264_PICTURE_TYPE_P = 0, + STD_VIDEO_H264_PICTURE_TYPE_B = 1, + STD_VIDEO_H264_PICTURE_TYPE_I = 2, + // reserved STD_VIDEO_H264_PICTURE_TYPE_SP = 3, + // reserved STD_VIDEO_H264_PICTURE_TYPE_SI = 4, + STD_VIDEO_H264_PICTURE_TYPE_IDR = 5, + STD_VIDEO_H264_PICTURE_TYPE_INVALID = 0x7FFFFFFF +} StdVideoH264PictureType; + typedef enum StdVideoH264NonVclNaluType { - std_video_h264_non_vcl_nalu_type_sps = 0, - std_video_h264_non_vcl_nalu_type_pps = 1, - std_video_h264_non_vcl_nalu_type_aud = 2, - std_video_h264_non_vcl_nalu_type_prefix = 3, - std_video_h264_non_vcl_nalu_type_end_of_sequence = 4, - std_video_h264_non_vcl_nalu_type_end_of_stream = 5, - std_video_h264_non_vcl_nalu_type_precoded = 6, - std_video_h264_non_vcl_nalu_type_invalid = 0x7FFFFFFF + STD_VIDEO_H264_NON_VCL_NALU_TYPE_SPS = 0, + STD_VIDEO_H264_NON_VCL_NALU_TYPE_PPS = 1, + STD_VIDEO_H264_NON_VCL_NALU_TYPE_AUD = 2, + STD_VIDEO_H264_NON_VCL_NALU_TYPE_PREFIX = 3, + STD_VIDEO_H264_NON_VCL_NALU_TYPE_END_OF_SEQUENCE = 4, + STD_VIDEO_H264_NON_VCL_NALU_TYPE_END_OF_STREAM = 5, + STD_VIDEO_H264_NON_VCL_NALU_TYPE_PRECODED = 6, + STD_VIDEO_H264_NON_VCL_NALU_TYPE_INVALID = 0x7FFFFFFF } StdVideoH264NonVclNaluType; typedef struct StdVideoH264SpsVuiFlags { - uint32_t aspect_ratio_info_present_flag:1; - uint32_t overscan_info_present_flag:1; - uint32_t overscan_appropriate_flag:1; - uint32_t video_signal_type_present_flag:1; - uint32_t video_full_range_flag:1; - uint32_t color_description_present_flag:1; - uint32_t chroma_loc_info_present_flag:1; - uint32_t timing_info_present_flag:1; - uint32_t fixed_frame_rate_flag:1; - uint32_t bitstream_restriction_flag:1; - uint32_t nal_hrd_parameters_present_flag:1; - uint32_t vcl_hrd_parameters_present_flag:1; + uint32_t aspect_ratio_info_present_flag : 1; + uint32_t overscan_info_present_flag : 1; + uint32_t overscan_appropriate_flag : 1; + uint32_t video_signal_type_present_flag : 1; + uint32_t video_full_range_flag : 1; + uint32_t color_description_present_flag : 1; + uint32_t chroma_loc_info_present_flag : 1; + uint32_t timing_info_present_flag : 1; + uint32_t fixed_frame_rate_flag : 1; + uint32_t bitstream_restriction_flag : 1; + uint32_t nal_hrd_parameters_present_flag : 1; + uint32_t vcl_hrd_parameters_present_flag : 1; } StdVideoH264SpsVuiFlags; -typedef struct StdVideoH264HrdParameters { +typedef struct StdVideoH264HrdParameters { // hrd_parameters uint8_t cpb_cnt_minus1; uint8_t bit_rate_scale; uint8_t cpb_size_scale; - uint32_t bit_rate_value_minus1[32]; - uint32_t cpb_size_value_minus1[32]; - uint8_t cbr_flag[32]; + uint32_t bit_rate_value_minus1[STD_VIDEO_H264_CPB_CNT_LIST_SIZE]; // cpb_cnt_minus1 number of valid elements + uint32_t cpb_size_value_minus1[STD_VIDEO_H264_CPB_CNT_LIST_SIZE]; // cpb_cnt_minus1 number of valid elements + uint8_t cbr_flag[STD_VIDEO_H264_CPB_CNT_LIST_SIZE]; // cpb_cnt_minus1 number of valid elements uint32_t initial_cpb_removal_delay_length_minus1; uint32_t cpb_removal_delay_length_minus1; uint32_t dpb_output_delay_length_minus1; @@ -194,30 +206,29 @@ typedef struct StdVideoH264SequenceParameterSetVui { uint8_t matrix_coefficients; uint32_t num_units_in_tick; uint32_t time_scale; - StdVideoH264HrdParameters hrd_parameters; - uint8_t num_reorder_frames; + StdVideoH264HrdParameters* pHrdParameters; // must be a valid ptr to hrd_parameters, if nal_hrd_parameters_present_flag or vcl_hrd_parameters_present_flag are set + uint8_t max_num_reorder_frames; uint8_t max_dec_frame_buffering; StdVideoH264SpsVuiFlags flags; } StdVideoH264SequenceParameterSetVui; typedef struct StdVideoH264SpsFlags { - uint32_t constraint_set0_flag:1; - uint32_t constraint_set1_flag:1; - uint32_t constraint_set2_flag:1; - uint32_t constraint_set3_flag:1; - uint32_t constraint_set4_flag:1; - uint32_t constraint_set5_flag:1; - uint32_t direct_8x8_inference_flag:1; - uint32_t mb_adaptive_frame_field_flag:1; - uint32_t frame_mbs_only_flag:1; - uint32_t delta_pic_order_always_zero_flag:1; - uint32_t residual_colour_transform_flag:1; - uint32_t gaps_in_frame_num_value_allowed_flag:1; - uint32_t first_picture_after_seek_flag:1; // where is this being documented? - uint32_t qpprime_y_zero_transform_bypass_flag:1; - uint32_t frame_cropping_flag:1; - uint32_t scaling_matrix_present_flag:1; - uint32_t vui_parameters_present_flag:1; + uint32_t constraint_set0_flag : 1; + uint32_t constraint_set1_flag : 1; + uint32_t constraint_set2_flag : 1; + uint32_t constraint_set3_flag : 1; + uint32_t constraint_set4_flag : 1; + uint32_t constraint_set5_flag : 1; + uint32_t direct_8x8_inference_flag : 1; + uint32_t mb_adaptive_frame_field_flag : 1; + uint32_t frame_mbs_only_flag : 1; + uint32_t delta_pic_order_always_zero_flag : 1; + uint32_t separate_colour_plane_flag : 1; + uint32_t gaps_in_frame_num_value_allowed_flag : 1; + uint32_t qpprime_y_zero_transform_bypass_flag : 1; + uint32_t frame_cropping_flag : 1; + uint32_t seq_scaling_matrix_present_flag : 1; + uint32_t vui_parameters_present_flag : 1; } StdVideoH264SpsFlags; typedef struct StdVideoH264ScalingLists @@ -234,8 +245,8 @@ typedef struct StdVideoH264ScalingLists // bit 0 - 5 are for each entry of ScalingList4x4 // bit 6 - 7 are for each entry plus 6 for ScalingList8x8 uint8_t use_default_scaling_matrix_mask; - uint8_t ScalingList4x4[6][16]; - uint8_t ScalingList8x8[2][64]; + uint8_t ScalingList4x4[STD_VIDEO_H264_SCALING_LIST_4X4_NUM_LISTS][STD_VIDEO_H264_SCALING_LIST_4X4_NUM_ELEMENTS]; + uint8_t ScalingList8x8[STD_VIDEO_H264_SCALING_LIST_8X8_NUM_LISTS][STD_VIDEO_H264_SCALING_LIST_8X8_NUM_ELEMENTS]; } StdVideoH264ScalingLists; typedef struct StdVideoH264SequenceParameterSet @@ -260,21 +271,23 @@ typedef struct StdVideoH264SequenceParameterSet uint32_t frame_crop_top_offset; uint32_t frame_crop_bottom_offset; StdVideoH264SpsFlags flags; - int32_t offset_for_ref_frame[255]; // The number of valid values are defined by the num_ref_frames_in_pic_order_cnt_cycle - StdVideoH264ScalingLists* pScalingLists; // Must be a valid pointer if scaling_matrix_present_flag is set + // pOffsetForRefFrame is a pointer representing the offset_for_ref_frame array with num_ref_frames_in_pic_order_cnt_cycle number of elements + // If pOffsetForRefFrame has nullptr value, then num_ref_frames_in_pic_order_cnt_cycle must also be "0". + int32_t* pOffsetForRefFrame; + StdVideoH264ScalingLists* pScalingLists; // Must be a valid pointer if seq_scaling_matrix_present_flag is set StdVideoH264SequenceParameterSetVui* pSequenceParameterSetVui; // Must be a valid pointer if StdVideoH264SpsFlags:vui_parameters_present_flag is set } StdVideoH264SequenceParameterSet; typedef struct StdVideoH264PpsFlags { - uint32_t transform_8x8_mode_flag:1; - uint32_t redundant_pic_cnt_present_flag:1; - uint32_t constrained_intra_pred_flag:1; - uint32_t deblocking_filter_control_present_flag:1; - uint32_t weighted_bipred_idc_flag:1; - uint32_t weighted_pred_flag:1; - uint32_t pic_order_present_flag:1; - uint32_t entropy_coding_mode_flag:1; - uint32_t scaling_matrix_present_flag:1; + uint32_t transform_8x8_mode_flag : 1; + uint32_t redundant_pic_cnt_present_flag : 1; + uint32_t constrained_intra_pred_flag : 1; + uint32_t deblocking_filter_control_present_flag : 1; + uint32_t weighted_bipred_idc_flag : 1; + uint32_t weighted_pred_flag : 1; + uint32_t pic_order_present_flag : 1; + uint32_t entropy_coding_mode_flag : 1; + uint32_t pic_scaling_matrix_present_flag : 1; } StdVideoH264PpsFlags; typedef struct StdVideoH264PictureParameterSet @@ -283,13 +296,13 @@ typedef struct StdVideoH264PictureParameterSet uint8_t pic_parameter_set_id; uint8_t num_ref_idx_l0_default_active_minus1; uint8_t num_ref_idx_l1_default_active_minus1; - StdVideoH264WeightedBiPredIdc weighted_bipred_idc; + StdVideoH264WeightedBipredIdc weighted_bipred_idc; int8_t pic_init_qp_minus26; int8_t pic_init_qs_minus26; int8_t chroma_qp_index_offset; int8_t second_chroma_qp_index_offset; StdVideoH264PpsFlags flags; - StdVideoH264ScalingLists* pScalingLists; // Must be a valid pointer if StdVideoH264PpsFlags::scaling_matrix_present_flag is set. + StdVideoH264ScalingLists* pScalingLists; // Must be a valid pointer if StdVideoH264PpsFlags::pic_scaling_matrix_present_flag is set. } StdVideoH264PictureParameterSet; #ifdef __cplusplus diff --git a/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std_decode.h b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std_decode.h index 2b4fc646..6f2d6d7e 100644 --- a/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std_decode.h +++ b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std_decode.h @@ -17,12 +17,21 @@ extern "C" { // Video H.264 Decode related parameters: // ************************************************* +#define STD_VIDEO_DECODE_H264_MVC_REF_LIST_SIZE 15 + +typedef enum StdVideoDecodeH264FieldOrderCount { + STD_VIDEO_DECODE_H264_FIELD_ORDER_COUNT_TOP = 0, + STD_VIDEO_DECODE_H264_FIELD_ORDER_COUNT_BOTTOM = 1, + STD_VIDEO_DECODE_H264_FIELD_ORDER_COUNT_LIST_SIZE = 2, + STD_VIDEO_DECODE_H264_FIELD_ORDER_COUNT_INVALID = 0x7FFFFFFF +} StdVideoDecodeH264FieldOrderCnt; + typedef struct StdVideoDecodeH264PictureInfoFlags { - uint32_t field_pic_flag:1; // Is field picture - uint32_t is_intra:1; // Is intra picture - uint32_t bottom_field_flag:1; // bottom (true) or top (false) field if field_pic_flag is set. - uint32_t is_reference:1; // This only applies to picture info, and not to the DPB lists. - uint32_t complementary_field_pair:1; // complementary field pair, complementary non-reference field pair, complementary reference field pair + uint32_t field_pic_flag : 1; // Is field picture + uint32_t is_intra : 1; // Is intra picture + uint32_t bottom_field_flag : 1; // bottom (true) or top (false) field if field_pic_flag is set. + uint32_t is_reference : 1; // This only applies to picture info, and not to the DPB lists. + uint32_t complementary_field_pair : 1; // complementary field pair, complementary non-reference field pair, complementary reference field pair } StdVideoDecodeH264PictureInfoFlags; typedef struct StdVideoDecodeH264PictureInfo { @@ -32,15 +41,15 @@ typedef struct StdVideoDecodeH264PictureInfo { uint16_t frame_num; // 7.4.3 Slice header semantics uint16_t idr_pic_id; // 7.4.3 Slice header semantics // PicOrderCnt is based on TopFieldOrderCnt and BottomFieldOrderCnt. See 8.2.1 Decoding process for picture order count type 0 - 2 - int32_t PicOrderCnt[2]; // TopFieldOrderCnt and BottomFieldOrderCnt fields. + int32_t PicOrderCnt[STD_VIDEO_DECODE_H264_FIELD_ORDER_COUNT_LIST_SIZE]; // TopFieldOrderCnt and BottomFieldOrderCnt fields. StdVideoDecodeH264PictureInfoFlags flags; } StdVideoDecodeH264PictureInfo; typedef struct StdVideoDecodeH264ReferenceInfoFlags { - uint32_t top_field_flag:1; // Reference is used for top field reference. - uint32_t bottom_field_flag:1; // Reference is used for bottom field reference. - uint32_t is_long_term:1; // this is a long term reference - uint32_t is_non_existing:1; // Must be handled in accordance with 8.2.5.2: Decoding process for gaps in frame_num + uint32_t top_field_flag : 1; // Reference is used for top field reference. + uint32_t bottom_field_flag : 1; // Reference is used for bottom field reference. + uint32_t is_long_term : 1; // this is a long term reference + uint32_t is_non_existing : 1; // Must be handled in accordance with 8.2.5.2: Decoding process for gaps in frame_num } StdVideoDecodeH264ReferenceInfoFlags; typedef struct StdVideoDecodeH264ReferenceInfo { @@ -52,9 +61,9 @@ typedef struct StdVideoDecodeH264ReferenceInfo { } StdVideoDecodeH264ReferenceInfo; typedef struct StdVideoDecodeH264MvcElementFlags { - uint32_t non_idr:1; - uint32_t anchor_pic:1; - uint32_t inter_view:1; + uint32_t non_idr : 1; + uint32_t anchor_pic : 1; + uint32_t inter_view : 1; } StdVideoDecodeH264MvcElementFlags; typedef struct StdVideoDecodeH264MvcElement { @@ -64,13 +73,13 @@ typedef struct StdVideoDecodeH264MvcElement { uint16_t temporalId; // move out? uint16_t priorityId; // move out? uint16_t numOfAnchorRefsInL0; - uint16_t viewIdOfAnchorRefsInL0[15]; + uint16_t viewIdOfAnchorRefsInL0[STD_VIDEO_DECODE_H264_MVC_REF_LIST_SIZE]; uint16_t numOfAnchorRefsInL1; - uint16_t viewIdOfAnchorRefsInL1[15]; + uint16_t viewIdOfAnchorRefsInL1[STD_VIDEO_DECODE_H264_MVC_REF_LIST_SIZE]; uint16_t numOfNonAnchorRefsInL0; - uint16_t viewIdOfNonAnchorRefsInL0[15]; + uint16_t viewIdOfNonAnchorRefsInL0[STD_VIDEO_DECODE_H264_MVC_REF_LIST_SIZE]; uint16_t numOfNonAnchorRefsInL1; - uint16_t viewIdOfNonAnchorRefsInL1[15]; + uint16_t viewIdOfNonAnchorRefsInL1[STD_VIDEO_DECODE_H264_MVC_REF_LIST_SIZE]; } StdVideoDecodeH264MvcElement; typedef struct StdVideoDecodeH264Mvc { diff --git a/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std_encode.h b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std_encode.h index 71845620..f3a0d3ad 100644 --- a/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std_encode.h +++ b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std_encode.h @@ -18,24 +18,24 @@ extern "C" { // ************************************************* typedef struct StdVideoEncodeH264SliceHeaderFlags { - uint32_t idr_flag:1; - uint32_t is_reference_flag:1; - uint32_t num_ref_idx_active_override_flag:1; - uint32_t no_output_of_prior_pics_flag:1; - uint32_t long_term_reference_flag:1; - uint32_t adaptive_ref_pic_marking_mode_flag:1; - uint32_t no_prior_references_available_flag:1; + uint32_t idr_flag : 1; + uint32_t is_reference_flag : 1; + uint32_t num_ref_idx_active_override_flag : 1; + uint32_t no_output_of_prior_pics_flag : 1; + uint32_t long_term_reference_flag : 1; + uint32_t adaptive_ref_pic_marking_mode_flag : 1; + uint32_t no_prior_references_available_flag : 1; } StdVideoEncodeH264SliceHeaderFlags; typedef struct StdVideoEncodeH264PictureInfoFlags { - uint32_t idr_flag:1; - uint32_t is_reference_flag:1; - uint32_t long_term_reference_flag:1; + uint32_t idr_flag : 1; + uint32_t is_reference_flag : 1; + uint32_t long_term_reference_flag : 1; } StdVideoEncodeH264PictureInfoFlags; typedef struct StdVideoEncodeH264RefMgmtFlags { - uint32_t ref_pic_list_modification_l0_flag:1; - uint32_t ref_pic_list_modification_l1_flag:1; + uint32_t ref_pic_list_modification_l0_flag : 1; + uint32_t ref_pic_list_modification_l1_flag : 1; } StdVideoEncodeH264RefMgmtFlags; typedef struct StdVideoEncodeH264RefListModEntry { diff --git a/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h265std.h b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h265std.h index 185b5504..179c6b70 100644 --- a/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h265std.h +++ b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h265std.h @@ -14,58 +14,89 @@ extern "C" { #include "vk_video/vulkan_video_codecs_common.h" // Vulkan 0.5 version number WIP -#define VK_STD_VULKAN_VIDEO_CODEC_H265_API_VERSION_0_5 VK_MAKE_VIDEO_STD_VERSION(0, 5, 0) // Patch version should always be set to 0 +#define VK_STD_VULKAN_VIDEO_CODEC_H265_API_VERSION_0_9_5 VK_MAKE_VIDEO_STD_VERSION(0, 9, 5) // Patch version should always be set to 0 // Format must be in the form XX.XX where the first two digits are the major and the second two, the minor. -#define VK_STD_VULKAN_VIDEO_CODEC_H265_SPEC_VERSION VK_STD_VULKAN_VIDEO_CODEC_H265_API_VERSION_0_5 +#define VK_STD_VULKAN_VIDEO_CODEC_H265_SPEC_VERSION VK_STD_VULKAN_VIDEO_CODEC_H265_API_VERSION_0_9_5 #define VK_STD_VULKAN_VIDEO_CODEC_H265_EXTENSION_NAME "VK_STD_vulkan_video_codec_h265" +#define STD_VIDEO_H265_CPB_CNT_LIST_SIZE 32 +#define STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE 7 +#define STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS 6 +#define STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS 16 +#define STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS 6 +#define STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS 64 +#define STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS 6 +#define STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS 64 +#define STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS 2 +#define STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS 64 +#define STD_VIDEO_H265_CHROMA_QP_OFFSET_LIST_SIZE 6 +#define STD_VIDEO_H265_CHROMA_QP_OFFSET_TILE_COLS_LIST_SIZE 19 +#define STD_VIDEO_H265_CHROMA_QP_OFFSET_TILE_ROWS_LIST_SIZE 21 +#define STD_VIDEO_H265_PREDICTOR_PALETTE_COMPONENTS_LIST_SIZE 3 +#define STD_VIDEO_H265_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE 128 + typedef enum StdVideoH265ChromaFormatIdc { - std_video_h265_chroma_format_idc_monochrome = 0, - std_video_h265_chroma_format_idc_420 = 1, - std_video_h265_chroma_format_idc_422 = 2, - std_video_h265_chroma_format_idc_444 = 3, + STD_VIDEO_H265_CHROMA_FORMAT_IDC_MONOCHROME = 0, + STD_VIDEO_H265_CHROMA_FORMAT_IDC_420 = 1, + STD_VIDEO_H265_CHROMA_FORMAT_IDC_422 = 2, + STD_VIDEO_H265_CHROMA_FORMAT_IDC_444 = 3, + STD_VIDEO_H265_CHROMA_FORMAT_IDC_INVALID = 0x7FFFFFFF } StdVideoH265ChromaFormatIdc; typedef enum StdVideoH265ProfileIdc { - std_video_h265_profile_idc_main = 1, - std_video_h265_profile_idc_main_10 = 2, - std_video_h265_profile_idc_main_still_picture = 3, - std_video_h265_profile_idc_format_range_extensions = 4, - std_video_h265_profile_idc_scc_extensions = 9, - std_video_h265_profile_idc_invalid = 0x7FFFFFFF + STD_VIDEO_H265_PROFILE_IDC_MAIN = 1, + STD_VIDEO_H265_PROFILE_IDC_MAIN_10 = 2, + STD_VIDEO_H265_PROFILE_IDC_MAIN_STILL_PICTURE = 3, + STD_VIDEO_H265_PROFILE_IDC_FORMAT_RANGE_EXTENSIONS = 4, + STD_VIDEO_H265_PROFILE_IDC_SCC_EXTENSIONS = 9, + STD_VIDEO_H265_PROFILE_IDC_INVALID = 0x7FFFFFFF } StdVideoH265ProfileIdc; typedef enum StdVideoH265Level { - std_video_h265_level_1_0 = 0, - std_video_h265_level_2_0 = 1, - std_video_h265_level_2_1 = 2, - std_video_h265_level_3_0 = 3, - std_video_h265_level_3_1 = 4, - std_video_h265_level_4_0 = 5, - std_video_h265_level_4_1 = 6, - std_video_h265_level_5_0 = 7, - std_video_h265_level_5_1 = 8, - std_video_h265_level_5_2 = 9, - std_video_h265_level_6_0 = 10, - std_video_h265_level_6_1 = 11, - std_video_h265_level_6_2 = 12, - std_video_h265_level_invalid = 0x7FFFFFFF + STD_VIDEO_H265_LEVEL_1_0 = 0, + STD_VIDEO_H265_LEVEL_2_0 = 1, + STD_VIDEO_H265_LEVEL_2_1 = 2, + STD_VIDEO_H265_LEVEL_3_0 = 3, + STD_VIDEO_H265_LEVEL_3_1 = 4, + STD_VIDEO_H265_LEVEL_4_0 = 5, + STD_VIDEO_H265_LEVEL_4_1 = 6, + STD_VIDEO_H265_LEVEL_5_0 = 7, + STD_VIDEO_H265_LEVEL_5_1 = 8, + STD_VIDEO_H265_LEVEL_5_2 = 9, + STD_VIDEO_H265_LEVEL_6_0 = 10, + STD_VIDEO_H265_LEVEL_6_1 = 11, + STD_VIDEO_H265_LEVEL_6_2 = 12, + STD_VIDEO_H265_LEVEL_INVALID = 0x7FFFFFFF } StdVideoH265Level; +typedef enum StdVideoH265SliceType { + STD_VIDEO_H265_SLICE_TYPE_B = 0, + STD_VIDEO_H265_SLICE_TYPE_P = 1, + STD_VIDEO_H265_SLICE_TYPE_I = 2, + STD_VIDEO_H265_SLICE_TYPE_INVALID = 0x7FFFFFFF +} StdVideoH265SliceType; + +typedef enum StdVideoH265PictureType { + STD_VIDEO_H265_PICTURE_TYPE_P = 0, + STD_VIDEO_H265_PICTURE_TYPE_B = 1, + STD_VIDEO_H265_PICTURE_TYPE_I = 2, + STD_VIDEO_H265_PICTURE_TYPE_IDR = 3, + STD_VIDEO_H265_PICTURE_TYPE_INVALID = 0x7FFFFFFF +} StdVideoH265PictureType; typedef struct StdVideoH265DecPicBufMgr { - uint32_t max_latency_increase_plus1[7]; - uint8_t max_dec_pic_buffering_minus1[7]; - uint8_t max_num_reorder_pics[7]; + uint32_t max_latency_increase_plus1[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; + uint8_t max_dec_pic_buffering_minus1[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; + uint8_t max_num_reorder_pics[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; } StdVideoH265DecPicBufMgr; -typedef struct StdVideoH265SubLayerHrdParameters { - uint32_t bit_rate_value_minus1[32]; - uint32_t cpb_size_value_minus1[32]; - uint32_t cpb_size_du_value_minus1[32]; - uint32_t bit_rate_du_value_minus1[32]; +typedef struct StdVideoH265SubLayerHrdParameters { // sub_layer_hrd_parameters + uint32_t bit_rate_value_minus1[STD_VIDEO_H265_CPB_CNT_LIST_SIZE]; + uint32_t cpb_size_value_minus1[STD_VIDEO_H265_CPB_CNT_LIST_SIZE]; + uint32_t cpb_size_du_value_minus1[STD_VIDEO_H265_CPB_CNT_LIST_SIZE]; + uint32_t bit_rate_du_value_minus1[STD_VIDEO_H265_CPB_CNT_LIST_SIZE]; uint32_t cbr_flag; // each bit represents a range of CpbCounts (bit 0 - cpb_cnt_minus1) per sub-layer } StdVideoH265SubLayerHrdParameters; @@ -74,9 +105,9 @@ typedef struct StdVideoH265HrdFlags { uint32_t vcl_hrd_parameters_present_flag : 1; uint32_t sub_pic_hrd_params_present_flag : 1; uint32_t sub_pic_cpb_params_in_pic_timing_sei_flag : 1; - uint8_t fixed_pic_rate_general_flag; // each bit represents a sublayer, bit 0 - vps_max_sub_layers_minus1 - uint8_t fixed_pic_rate_within_cvs_flag; // each bit represents a sublayer, bit 0 - vps_max_sub_layers_minus1 - uint8_t low_delay_hrd_flag; // each bit represents a sublayer, bit 0 - vps_max_sub_layers_minus1 + uint32_t fixed_pic_rate_general_flag : 8; // each bit represents a sublayer, bit 0 - vps_max_sub_layers_minus1 + uint32_t fixed_pic_rate_within_cvs_flag : 8; // each bit represents a sublayer, bit 0 - vps_max_sub_layers_minus1 + uint32_t low_delay_hrd_flag : 8; // each bit represents a sublayer, bit 0 - vps_max_sub_layers_minus1 } StdVideoH265HrdFlags; typedef struct StdVideoH265HrdParameters { @@ -89,10 +120,10 @@ typedef struct StdVideoH265HrdParameters { uint8_t initial_cpb_removal_delay_length_minus1; uint8_t au_cpb_removal_delay_length_minus1; uint8_t dpb_output_delay_length_minus1; - uint8_t cpb_cnt_minus1[7]; - uint16_t elemental_duration_in_tc_minus1[7]; - StdVideoH265SubLayerHrdParameters* SubLayerHrdParametersNal[7]; - StdVideoH265SubLayerHrdParameters* SubLayerHrdParametersVcl[7]; + uint8_t cpb_cnt_minus1[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; + uint16_t elemental_duration_in_tc_minus1[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; + StdVideoH265SubLayerHrdParameters* pSubLayerHrdParametersNal[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; // NAL per layer ptr to sub_layer_hrd_parameters + StdVideoH265SubLayerHrdParameters* pSubLayerHrdParametersVcl[STD_VIDEO_H265_SUBLAYERS_MINUS1_LIST_SIZE]; // VCL per layer ptr to sub_layer_hrd_parameters StdVideoH265HrdFlags flags; } StdVideoH265HrdParameters; @@ -111,18 +142,18 @@ typedef struct StdVideoH265VideoParameterSet uint32_t vps_time_scale; uint32_t vps_num_ticks_poc_diff_one_minus1; StdVideoH265DecPicBufMgr* pDecPicBufMgr; - StdVideoH265HrdParameters* hrd_parameters; + StdVideoH265HrdParameters* pHrdParameters; StdVideoH265VpsFlags flags; } StdVideoH265VideoParameterSet; typedef struct StdVideoH265ScalingLists { - uint8_t ScalingList4x4[6][16]; // ScalingList[ 0 ][ MatrixID ][ i ] (sizeID = 0) - uint8_t ScalingList8x8[6][64]; // ScalingList[ 1 ][ MatrixID ][ i ] (sizeID = 1) - uint8_t ScalingList16x16[6][64]; // ScalingList[ 2 ][ MatrixID ][ i ] (sizeID = 2) - uint8_t ScalingList32x32[2][64]; // ScalingList[ 3 ][ MatrixID ][ i ] (sizeID = 3) - uint8_t ScalingListDCCoef16x16[6]; // scaling_list_dc_coef_minus8[ sizeID - 2 ][ matrixID ] + 8, sizeID = 2 - uint8_t ScalingListDCCoef32x32[2]; // scaling_list_dc_coef_minus8[ sizeID - 2 ][ matrixID ] + 8. sizeID = 3 + uint8_t ScalingList4x4[STD_VIDEO_H265_SCALING_LIST_4X4_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_4X4_NUM_ELEMENTS]; // ScalingList[ 0 ][ MatrixID ][ i ] (sizeID = 0) + uint8_t ScalingList8x8[STD_VIDEO_H265_SCALING_LIST_8X8_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_8X8_NUM_ELEMENTS]; // ScalingList[ 1 ][ MatrixID ][ i ] (sizeID = 1) + uint8_t ScalingList16x16[STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_16X16_NUM_ELEMENTS]; // ScalingList[ 2 ][ MatrixID ][ i ] (sizeID = 2) + uint8_t ScalingList32x32[STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS][STD_VIDEO_H265_SCALING_LIST_32X32_NUM_ELEMENTS]; // ScalingList[ 3 ][ MatrixID ][ i ] (sizeID = 3) + uint8_t ScalingListDCCoef16x16[STD_VIDEO_H265_SCALING_LIST_16X16_NUM_LISTS]; // scaling_list_dc_coef_minus8[ sizeID - 2 ][ matrixID ] + 8, sizeID = 2 + uint8_t ScalingListDCCoef32x32[STD_VIDEO_H265_SCALING_LIST_32X32_NUM_LISTS]; // scaling_list_dc_coef_minus8[ sizeID - 2 ][ matrixID ] + 8. sizeID = 3 } StdVideoH265ScalingLists; typedef struct StdVideoH265SpsVuiFlags { @@ -163,7 +194,7 @@ typedef struct StdVideoH265SequenceParameterSetVui { uint32_t vui_num_units_in_tick; uint32_t vui_time_scale; uint32_t vui_num_ticks_poc_diff_one_minus1; - StdVideoH265HrdParameters* hrd_parameters; + StdVideoH265HrdParameters* pHrdParameters; uint16_t min_spatial_segmentation_idc; uint8_t max_bytes_per_pic_denom; uint8_t max_bits_per_min_cu_denom; @@ -174,10 +205,9 @@ typedef struct StdVideoH265SequenceParameterSetVui { typedef struct StdVideoH265PredictorPaletteEntries { - uint16_t PredictorPaletteEntries[3][128]; + uint16_t PredictorPaletteEntries[STD_VIDEO_H265_PREDICTOR_PALETTE_COMPONENTS_LIST_SIZE][STD_VIDEO_H265_PREDICTOR_PALETTE_COMP_ENTRIES_LIST_SIZE]; } StdVideoH265PredictorPaletteEntries; - typedef struct StdVideoH265SpsFlags { uint32_t sps_temporal_id_nesting_flag : 1; uint32_t separate_colour_plane_flag : 1; @@ -194,7 +224,7 @@ typedef struct StdVideoH265SpsFlags { uint32_t sps_extension_present_flag : 1; uint32_t sps_range_extension_flag : 1; - // extension SPS flags, valid when std_video_h265_profile_idc_format_range_extensions is set + // extension SPS flags, valid when STD_VIDEO_H265_PROFILE_IDC_FORMAT_RANGE_EXTENSIONS is set uint32_t transform_skip_rotation_enabled_flag : 1; uint32_t transform_skip_context_enabled_flag : 1; uint32_t implicit_rdpcm_enabled_flag : 1; @@ -205,7 +235,7 @@ typedef struct StdVideoH265SpsFlags { uint32_t persistent_rice_adaptation_enabled_flag : 1; uint32_t cabac_bypass_alignment_enabled_flag : 1; - // extension SPS flags, valid when std_video_h265_profile_idc_scc_extensions is set + // extension SPS flags, valid when STD_VIDEO_H265_PROFILE_IDC_SCC_EXTENSIONS is set uint32_t sps_curr_pic_ref_enabled_flag : 1; uint32_t palette_mode_enabled_flag : 1; uint32_t sps_palette_predictor_initializer_present_flag : 1; @@ -247,7 +277,7 @@ typedef struct StdVideoH265SequenceParameterSet StdVideoH265ScalingLists* pScalingLists; // Must be a valid pointer if sps_scaling_list_data_present_flag is set StdVideoH265SequenceParameterSetVui* pSequenceParameterSetVui; // Must be a valid pointer if StdVideoH265SpsFlags:vui_parameters_present_flag is set palette_max_size; - // extension SPS flags, valid when std_video_h265_profile_idc_scc_extensions is set + // extension SPS flags, valid when STD_VIDEO_H265_PROFILE_IDC_SCC_EXTENSIONS is set uint8_t palette_max_size; uint8_t delta_palette_max_predictor_size; uint8_t motion_vector_resolution_control_idc; @@ -281,11 +311,11 @@ typedef struct StdVideoH265PpsFlags { uint32_t slice_segment_header_extension_present_flag : 1; uint32_t pps_extension_present_flag : 1; - // extension PPS flags, valid when std_video_h265_profile_idc_format_range_extensions is set + // extension PPS flags, valid when STD_VIDEO_H265_PROFILE_IDC_FORMAT_RANGE_EXTENSIONS is set uint32_t cross_component_prediction_enabled_flag : 1; uint32_t chroma_qp_offset_list_enabled_flag : 1; - // extension PPS flags, valid when std_video_h265_profile_idc_scc_extensions is set + // extension PPS flags, valid when STD_VIDEO_H265_PROFILE_IDC_SCC_EXTENSIONS is set uint32_t pps_curr_pic_ref_enabled_flag : 1; uint32_t residual_adaptive_colour_transform_enabled_flag : 1; uint32_t pps_slice_act_qp_offsets_present_flag : 1; @@ -307,24 +337,24 @@ typedef struct StdVideoH265PictureParameterSet int8_t pps_cr_qp_offset; uint8_t num_tile_columns_minus1; uint8_t num_tile_rows_minus1; - uint16_t column_width_minus1[19]; - uint16_t row_height_minus1[21]; + uint16_t column_width_minus1[STD_VIDEO_H265_CHROMA_QP_OFFSET_TILE_COLS_LIST_SIZE]; + uint16_t row_height_minus1[STD_VIDEO_H265_CHROMA_QP_OFFSET_TILE_ROWS_LIST_SIZE]; int8_t pps_beta_offset_div2; int8_t pps_tc_offset_div2; uint8_t log2_parallel_merge_level_minus2; StdVideoH265PpsFlags flags; StdVideoH265ScalingLists* pScalingLists; // Must be a valid pointer if pps_scaling_list_data_present_flag is set - // extension PPS, valid when std_video_h265_profile_idc_format_range_extensions is set + // extension PPS, valid when STD_VIDEO_H265_PROFILE_IDC_FORMAT_RANGE_EXTENSIONS is set uint8_t log2_max_transform_skip_block_size_minus2; uint8_t diff_cu_chroma_qp_offset_depth; uint8_t chroma_qp_offset_list_len_minus1; - int8_t cb_qp_offset_list[6]; - int8_t cr_qp_offset_list[6]; + int8_t cb_qp_offset_list[STD_VIDEO_H265_CHROMA_QP_OFFSET_LIST_SIZE]; + int8_t cr_qp_offset_list[STD_VIDEO_H265_CHROMA_QP_OFFSET_LIST_SIZE]; uint8_t log2_sao_offset_scale_luma; uint8_t log2_sao_offset_scale_chroma; - // extension PPS, valid when std_video_h265_profile_idc_scc_extensions is set + // extension PPS, valid when STD_VIDEO_H265_PROFILE_IDC_SCC_EXTENSIONS is set int8_t pps_act_y_qp_offset_plus5; int8_t pps_act_cb_qp_offset_plus5; int8_t pps_act_cr_qp_offset_plus5; diff --git a/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h265std_decode.h b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h265std_decode.h index 4be8b5f1..a1efa055 100644 --- a/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h265std_decode.h +++ b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h265std_decode.h @@ -17,6 +17,8 @@ extern "C" { // Video h265 Decode related parameters: // ************************************************* +#define STD_VIDEO_DECODE_H265_REF_PIC_SET_LIST_SIZE 8 + typedef struct StdVideoDecodeH265PictureInfoFlags { uint32_t IrapPicFlag : 1; uint32_t IdrPicFlag : 1; @@ -33,11 +35,14 @@ typedef struct StdVideoDecodeH265PictureInfo { uint16_t NumBitsForSTRefPicSetInSlice; // number of bits used in st_ref_pic_set() //when short_term_ref_pic_set_sps_flag is 0; otherwise set to 0. uint8_t NumDeltaPocsOfRefRpsIdx; // NumDeltaPocs[ RefRpsIdx ] when short_term_ref_pic_set_sps_flag = 1, otherwise 0 - uint8_t RefPicSetStCurrBefore[8]; // slotIndex as used in VkVideoReferenceSlotKHR structures representing + uint8_t RefPicSetStCurrBefore[STD_VIDEO_DECODE_H265_REF_PIC_SET_LIST_SIZE]; // slotIndex as used in + // VkVideoReferenceSlotKHR structures representing //pReferenceSlots in VkVideoDecodeInfoKHR, 0xff for invalid slotIndex - uint8_t RefPicSetStCurrAfter[8]; // slotIndex as used in VkVideoReferenceSlotKHR structures representing + uint8_t RefPicSetStCurrAfter[STD_VIDEO_DECODE_H265_REF_PIC_SET_LIST_SIZE]; // slotIndex as used in + // VkVideoReferenceSlotKHR structures representing //pReferenceSlots in VkVideoDecodeInfoKHR, 0xff for invalid slotIndex - uint8_t RefPicSetLtCurr[8]; // slotIndex as used in VkVideoReferenceSlotKHR structures representing + uint8_t RefPicSetLtCurr[STD_VIDEO_DECODE_H265_REF_PIC_SET_LIST_SIZE]; // slotIndex as used in + // VkVideoReferenceSlotKHR structures representing //pReferenceSlots in VkVideoDecodeInfoKHR, 0xff for invalid slotIndex StdVideoDecodeH265PictureInfoFlags flags; } StdVideoDecodeH265PictureInfo; diff --git a/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h265std_encode.h b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h265std_encode.h new file mode 100644 index 00000000..ffffef20 --- /dev/null +++ b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h265std_encode.h @@ -0,0 +1,122 @@ +/* +** Copyright (c) 2019-2021 The Khronos Group Inc. +** +** SPDX-License-Identifier: Apache-2.0 +*/ + +#ifndef VULKAN_VIDEO_CODEC_H265STD_ENCODE_H_ +#define VULKAN_VIDEO_CODEC_H265STD_ENCODE_H_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +#include "vk_video/vulkan_video_codec_h265std.h" + +// ************************************************* +// Video h265 Encode related parameters: +// ************************************************* + +#define STD_VIDEO_ENCODE_H265_LUMA_LIST_SIZE 15 +#define STD_VIDEO_ENCODE_H265_CHROMA_LIST_SIZE 15 +#define STD_VIDEO_ENCODE_H265_CHROMA_LISTS_NUM 2 + +typedef struct StdVideoEncodeH265SliceHeaderFlags { + uint32_t first_slice_segment_in_pic_flag : 1; + uint32_t no_output_of_prior_pics_flag : 1; + uint32_t dependent_slice_segment_flag : 1; + uint32_t short_term_ref_pic_set_sps_flag : 1; + uint32_t slice_temporal_mvp_enable_flag : 1; + uint32_t slice_sao_luma_flag : 1; + uint32_t slice_sao_chroma_flag : 1; + uint32_t num_ref_idx_active_override_flag : 1; + uint32_t mvd_l1_zero_flag : 1; + uint32_t cabac_init_flag : 1; + uint32_t slice_deblocking_filter_disable_flag : 1; + uint32_t collocated_from_l0_flag : 1; + uint32_t slice_loop_filter_across_slices_enabled_flag : 1; + uint32_t bLastSliceInPic : 1; + uint32_t reservedBits : 18; + uint16_t luma_weight_l0_flag; // bit 0 - num_ref_idx_l0_active_minus1 + uint16_t chroma_weight_l0_flag; // bit 0 - num_ref_idx_l0_active_minus1 + uint16_t luma_weight_l1_flag; // bit 0 - num_ref_idx_l1_active_minus1 + uint16_t chroma_weight_l1_flag; // bit 0 - num_ref_idx_l1_active_minus1 +} StdVideoEncodeH265SliceHeaderFlags; + +typedef struct StdVideoEncodeH265SliceHeader { + StdVideoH265SliceType slice_type; + uint8_t slice_pic_parameter_set_id; + uint8_t num_short_term_ref_pic_sets; + uint32_t slice_segment_address; + uint8_t short_term_ref_pic_set_idx; + uint8_t num_long_term_sps; + uint8_t num_long_term_pics; + uint8_t collocated_ref_idx; + uint8_t num_ref_idx_l0_active_minus1; // [0, 14] + uint8_t num_ref_idx_l1_active_minus1; // [0, 14] + uint8_t luma_log2_weight_denom; // [0, 7] + int8_t delta_chroma_log2_weight_denom; + int8_t delta_luma_weight_l0[STD_VIDEO_ENCODE_H265_LUMA_LIST_SIZE]; + int8_t luma_offset_l0[STD_VIDEO_ENCODE_H265_LUMA_LIST_SIZE]; + int8_t delta_chroma_weight_l0[STD_VIDEO_ENCODE_H265_CHROMA_LIST_SIZE][STD_VIDEO_ENCODE_H265_CHROMA_LISTS_NUM]; + int8_t delta_chroma_offset_l0[STD_VIDEO_ENCODE_H265_CHROMA_LIST_SIZE][STD_VIDEO_ENCODE_H265_CHROMA_LISTS_NUM]; + int8_t delta_luma_weight_l1[STD_VIDEO_ENCODE_H265_LUMA_LIST_SIZE]; + int8_t luma_offset_l1[STD_VIDEO_ENCODE_H265_LUMA_LIST_SIZE]; + int8_t delta_chroma_weight_l1[STD_VIDEO_ENCODE_H265_CHROMA_LIST_SIZE][STD_VIDEO_ENCODE_H265_CHROMA_LISTS_NUM]; + int8_t delta_chroma_offset_l1[STD_VIDEO_ENCODE_H265_CHROMA_LIST_SIZE][STD_VIDEO_ENCODE_H265_CHROMA_LISTS_NUM]; + uint8_t MaxNumMergeCand; + int8_t slice_qp_delta; + int8_t slice_cb_qp_offset; // [-12, 12] + int8_t slice_cr_qp_offset; // [-12, 12] + int8_t slice_beta_offset_div2; // [-6, 6] + int8_t slice_tc_offset_div2; // [-6, 6] + int8_t slice_act_y_qp_offset; + int8_t slice_act_cb_qp_offset; + int8_t slice_act_cr_qp_offset; + StdVideoEncodeH265SliceHeaderFlags flags; +} StdVideoEncodeH265SliceHeader; + +typedef struct StdVideoEncodeH265ReferenceModificationFlags { + uint32_t ref_pic_list_modification_flag_l0 : 1; + uint32_t ref_pic_list_modification_flag_l1 : 1; +} StdVideoEncodeH265ReferenceModificationFlags; + +typedef struct StdVideoEncodeH265ReferenceModifications { + StdVideoEncodeH265ReferenceModificationFlags flags; + uint8_t referenceList0ModificationsCount; // num_ref_idx_l0_active_minus1 + uint8_t* pReferenceList0Modifications; // list_entry_l0 + uint8_t referenceList1ModificationsCount; // num_ref_idx_l1_active_minus1 + uint8_t* pReferenceList1Modifications; // list_entry_l1 +} StdVideoEncodeH265ReferenceModifications; + +typedef struct StdVideoEncodeH265PictureInfoFlags { + uint32_t is_reference_flag : 1; + uint32_t IrapPicFlag : 1; + uint32_t long_term_flag : 1; +} StdVideoEncodeH265PictureInfoFlags; + +typedef struct StdVideoEncodeH265PictureInfo { + StdVideoH265PictureType PictureType; + uint8_t sps_video_parameter_set_id; + uint8_t pps_seq_parameter_set_id; + int32_t PicOrderCntVal; + uint8_t TemporalId; + StdVideoEncodeH265PictureInfoFlags flags; +} StdVideoEncodeH265PictureInfo; + +typedef struct StdVideoEncodeH265ReferenceInfoFlags { + uint32_t is_long_term : 1; + uint32_t isUsedFlag : 1; +} StdVideoEncodeH265ReferenceInfoFlags; + +typedef struct StdVideoEncodeH265ReferenceInfo { + int32_t PicOrderCntVal; + uint8_t TemporalId; + StdVideoEncodeH265ReferenceInfoFlags flags; +} StdVideoEncodeH265ReferenceInfo; + +#ifdef __cplusplus +} +#endif + +#endif // VULKAN_VIDEO_CODEC_H265STD_ENCODE_H_ diff --git a/icd/api/include/khronos/sdk-1.2/vk_icd.h b/icd/api/include/khronos/sdk-1.2/vulkan/vk_icd.h similarity index 99% rename from icd/api/include/khronos/sdk-1.2/vk_icd.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vk_icd.h index ae006d06..41989ee3 100644 --- a/icd/api/include/khronos/sdk-1.2/vk_icd.h +++ b/icd/api/include/khronos/sdk-1.2/vulkan/vk_icd.h @@ -33,7 +33,7 @@ // Version 2 - Add Loader/ICD Interface version negotiation // via vk_icdNegotiateLoaderICDInterfaceVersion. // Version 3 - Add ICD creation/destruction of KHR_surface objects. -// Version 4 - Add unknown physical device extension qyering via +// Version 4 - Add unknown physical device extension querying via // vk_icdGetPhysicalDeviceProcAddr. // Version 5 - Tells ICDs that the loader is now paying attention to the // application version of Vulkan passed into the ApplicationInfo diff --git a/icd/api/include/khronos/sdk-1.2/vk_layer.h b/icd/api/include/khronos/sdk-1.2/vulkan/vk_layer.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vk_layer.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vk_layer.h diff --git a/icd/api/include/khronos/sdk-1.2/vk_platform.h b/icd/api/include/khronos/sdk-1.2/vulkan/vk_platform.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vk_platform.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vk_platform.h diff --git a/icd/api/include/khronos/sdk-1.2/vk_sdk_platform.h b/icd/api/include/khronos/sdk-1.2/vulkan/vk_sdk_platform.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vk_sdk_platform.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vk_sdk_platform.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_android.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_android.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan_android.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_android.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_beta.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_beta.h similarity index 83% rename from icd/api/include/khronos/sdk-1.2/vulkan_beta.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_beta.h index 2fedade6..d2f34d1c 100644 --- a/icd/api/include/khronos/sdk-1.2/vulkan_beta.h +++ b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_beta.h @@ -37,6 +37,9 @@ typedef enum VkVideoCodecOperationFlagBitsKHR { #ifdef VK_ENABLE_BETA_EXTENSIONS VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_EXT = 0x00010000, #endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_EXT = 0x00020000, +#endif #ifdef VK_ENABLE_BETA_EXTENSIONS VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_EXT = 0x00000001, #endif @@ -564,6 +567,134 @@ typedef struct VkVideoEncodeH264ProfileEXT { +#define VK_EXT_video_encode_h265 1 +#include "vk_video/vulkan_video_codec_h265std.h" +#include "vk_video/vulkan_video_codec_h265std_encode.h" +#define VK_EXT_VIDEO_ENCODE_H265_SPEC_VERSION 2 +#define VK_EXT_VIDEO_ENCODE_H265_EXTENSION_NAME "VK_EXT_video_encode_h265" +typedef VkFlags VkVideoEncodeH265CapabilityFlagsEXT; + +typedef enum VkVideoEncodeH265InputModeFlagBitsEXT { + VK_VIDEO_ENCODE_H265_INPUT_MODE_FRAME_BIT_EXT = 0x00000001, + VK_VIDEO_ENCODE_H265_INPUT_MODE_SLICE_BIT_EXT = 0x00000002, + VK_VIDEO_ENCODE_H265_INPUT_MODE_NON_VCL_BIT_EXT = 0x00000004, + VK_VIDEO_ENCODE_H265_INPUT_MODE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkVideoEncodeH265InputModeFlagBitsEXT; +typedef VkFlags VkVideoEncodeH265InputModeFlagsEXT; + +typedef enum VkVideoEncodeH265OutputModeFlagBitsEXT { + VK_VIDEO_ENCODE_H265_OUTPUT_MODE_FRAME_BIT_EXT = 0x00000001, + VK_VIDEO_ENCODE_H265_OUTPUT_MODE_SLICE_BIT_EXT = 0x00000002, + VK_VIDEO_ENCODE_H265_OUTPUT_MODE_NON_VCL_BIT_EXT = 0x00000004, + VK_VIDEO_ENCODE_H265_OUTPUT_MODE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkVideoEncodeH265OutputModeFlagBitsEXT; +typedef VkFlags VkVideoEncodeH265OutputModeFlagsEXT; +typedef VkFlags VkVideoEncodeH265CreateFlagsEXT; + +typedef enum VkVideoEncodeH265CtbSizeFlagBitsEXT { + VK_VIDEO_ENCODE_H265_CTB_SIZE_8_BIT_EXT = 0x00000001, + VK_VIDEO_ENCODE_H265_CTB_SIZE_16_BIT_EXT = 0x00000002, + VK_VIDEO_ENCODE_H265_CTB_SIZE_32_BIT_EXT = 0x00000004, + VK_VIDEO_ENCODE_H265_CTB_SIZE_64_BIT_EXT = 0x00000008, + VK_VIDEO_ENCODE_H265_CTB_SIZE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkVideoEncodeH265CtbSizeFlagBitsEXT; +typedef VkFlags VkVideoEncodeH265CtbSizeFlagsEXT; +typedef struct VkVideoEncodeH265CapabilitiesEXT { + VkStructureType sType; + const void* pNext; + VkVideoEncodeH265CapabilityFlagsEXT flags; + VkVideoEncodeH265InputModeFlagsEXT inputModeFlags; + VkVideoEncodeH265OutputModeFlagsEXT outputModeFlags; + VkVideoEncodeH265CtbSizeFlagsEXT ctbSizes; + VkExtent2D inputImageDataAlignment; + uint8_t maxNumL0ReferenceForP; + uint8_t maxNumL0ReferenceForB; + uint8_t maxNumL1Reference; + uint8_t maxNumSubLayers; + uint8_t qualityLevelCount; + VkExtensionProperties stdExtensionVersion; +} VkVideoEncodeH265CapabilitiesEXT; + +typedef struct VkVideoEncodeH265SessionCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkVideoEncodeH265CreateFlagsEXT flags; + const VkExtensionProperties* pStdExtensionVersion; +} VkVideoEncodeH265SessionCreateInfoEXT; + +typedef struct VkVideoEncodeH265SessionParametersAddInfoEXT { + VkStructureType sType; + const void* pNext; + uint32_t vpsStdCount; + const StdVideoH265VideoParameterSet* pVpsStd; + uint32_t spsStdCount; + const StdVideoH265SequenceParameterSet* pSpsStd; + uint32_t ppsStdCount; + const StdVideoH265PictureParameterSet* pPpsStd; +} VkVideoEncodeH265SessionParametersAddInfoEXT; + +typedef struct VkVideoEncodeH265SessionParametersCreateInfoEXT { + VkStructureType sType; + const void* pNext; + uint32_t maxVpsStdCount; + uint32_t maxSpsStdCount; + uint32_t maxPpsStdCount; + const VkVideoEncodeH265SessionParametersAddInfoEXT* pParametersAddInfo; +} VkVideoEncodeH265SessionParametersCreateInfoEXT; + +typedef struct VkVideoEncodeH265DpbSlotInfoEXT { + VkStructureType sType; + const void* pNext; + int8_t slotIndex; + const StdVideoEncodeH265ReferenceInfo* pStdReferenceInfo; +} VkVideoEncodeH265DpbSlotInfoEXT; + +typedef struct VkVideoEncodeH265ReferenceListsEXT { + VkStructureType sType; + const void* pNext; + uint8_t referenceList0EntryCount; + const VkVideoEncodeH265DpbSlotInfoEXT* pReferenceList0Entries; + uint8_t referenceList1EntryCount; + const VkVideoEncodeH265DpbSlotInfoEXT* pReferenceList1Entries; + const StdVideoEncodeH265ReferenceModifications* pReferenceModifications; +} VkVideoEncodeH265ReferenceListsEXT; + +typedef struct VkVideoEncodeH265NaluSliceEXT { + VkStructureType sType; + const void* pNext; + uint32_t ctbCount; + const VkVideoEncodeH265ReferenceListsEXT* pReferenceFinalLists; + const StdVideoEncodeH265SliceHeader* pSliceHeaderStd; +} VkVideoEncodeH265NaluSliceEXT; + +typedef struct VkVideoEncodeH265VclFrameInfoEXT { + VkStructureType sType; + const void* pNext; + const VkVideoEncodeH265ReferenceListsEXT* pReferenceFinalLists; + uint32_t naluSliceEntryCount; + const VkVideoEncodeH265NaluSliceEXT* pNaluSliceEntries; + const StdVideoEncodeH265PictureInfo* pCurrentPictureInfo; +} VkVideoEncodeH265VclFrameInfoEXT; + +typedef struct VkVideoEncodeH265EmitPictureParametersEXT { + VkStructureType sType; + const void* pNext; + uint8_t vpsId; + uint8_t spsId; + VkBool32 emitVpsEnable; + VkBool32 emitSpsEnable; + uint32_t ppsIdEntryCount; + const uint8_t* ppsIdEntries; +} VkVideoEncodeH265EmitPictureParametersEXT; + +typedef struct VkVideoEncodeH265ProfileEXT { + VkStructureType sType; + const void* pNext; + StdVideoH265ProfileIdc stdProfileIdc; +} VkVideoEncodeH265ProfileEXT; + + + #define VK_EXT_video_decode_h264 1 #include "vk_video/vulkan_video_codec_h264std_decode.h" #define VK_EXT_VIDEO_DECODE_H264_SPEC_VERSION 3 @@ -639,7 +770,6 @@ typedef struct VkVideoDecodeH264DpbSlotInfoEXT { #define VK_EXT_video_decode_h265 1 -#include "vk_video/vulkan_video_codec_h265std.h" #include "vk_video/vulkan_video_codec_h265std_decode.h" #define VK_EXT_VIDEO_DECODE_H265_SPEC_VERSION 1 #define VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME "VK_EXT_video_decode_h265" diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_core.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_core.h similarity index 98% rename from icd/api/include/khronos/sdk-1.2/vulkan_core.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_core.h index 83f2c3aa..a2f4e771 100644 --- a/icd/api/include/khronos/sdk-1.2/vulkan_core.h +++ b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_core.h @@ -72,7 +72,7 @@ extern "C" { #define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0 // Version of this file -#define VK_HEADER_VERSION 195 +#define VK_HEADER_VERSION 197 // Complete version of this file #define VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION(0, 1, 2, VK_HEADER_VERSION) @@ -459,6 +459,36 @@ typedef enum VkStructureType { #ifdef VK_ENABLE_BETA_EXTENSIONS VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_EXT = 1000038008, #endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_CAPABILITIES_EXT = 1000039000, +#endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_CREATE_INFO_EXT = 1000039001, +#endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_CREATE_INFO_EXT = 1000039002, +#endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_SESSION_PARAMETERS_ADD_INFO_EXT = 1000039003, +#endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_VCL_FRAME_INFO_EXT = 1000039004, +#endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_DPB_SLOT_INFO_EXT = 1000039005, +#endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_NALU_SLICE_EXT = 1000039006, +#endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_EMIT_PICTURE_PARAMETERS_EXT = 1000039007, +#endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_PROFILE_EXT = 1000039008, +#endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_REFERENCE_LISTS_EXT = 1000039009, +#endif #ifdef VK_ENABLE_BETA_EXTENSIONS VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_EXT = 1000040000, #endif @@ -484,6 +514,15 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_EXT = 1000040007, #endif VK_STRUCTURE_TYPE_TEXTURE_LOD_GATHER_FORMAT_PROPERTIES_AMD = 1000041000, + VK_STRUCTURE_TYPE_RENDERING_INFO_KHR = 1000044000, + VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR = 1000044001, + VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR = 1000044002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR = 1000044003, + VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO_KHR = 1000044004, + VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR = 1000044006, + VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_INFO_EXT = 1000044007, + VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD = 1000044008, + VK_STRUCTURE_TYPE_MULTIVIEW_PER_VIEW_ATTRIBUTES_INFO_NVX = 1000044009, VK_STRUCTURE_TYPE_STREAM_DESCRIPTOR_SURFACE_CREATE_INFO_GGP = 1000049000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CORNER_SAMPLED_IMAGE_FEATURES_NV = 1000050000, VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_NV = 1000056000, @@ -860,6 +899,8 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_EXT = 1000388001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT = 1000392000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT = 1000392001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT = 1000411000, + VK_STRUCTURE_TYPE_SAMPLER_BORDER_COLOR_COMPONENT_MAPPING_CREATE_INFO_EXT = 1000411001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PAGEABLE_DEVICE_LOCAL_MEMORY_FEATURES_EXT = 1000412000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES_KHR = 1000413000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES_KHR = 1000413001, @@ -868,6 +909,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES, VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT, + VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_NV = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD, VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES, @@ -1736,8 +1778,9 @@ typedef enum VkAttachmentLoadOp { typedef enum VkAttachmentStoreOp { VK_ATTACHMENT_STORE_OP_STORE = 0, VK_ATTACHMENT_STORE_OP_DONT_CARE = 1, - VK_ATTACHMENT_STORE_OP_NONE_EXT = 1000301000, - VK_ATTACHMENT_STORE_OP_NONE_QCOM = VK_ATTACHMENT_STORE_OP_NONE_EXT, + VK_ATTACHMENT_STORE_OP_NONE_KHR = 1000301000, + VK_ATTACHMENT_STORE_OP_NONE_QCOM = VK_ATTACHMENT_STORE_OP_NONE_KHR, + VK_ATTACHMENT_STORE_OP_NONE_EXT = VK_ATTACHMENT_STORE_OP_NONE_KHR, VK_ATTACHMENT_STORE_OP_MAX_ENUM = 0x7FFFFFFF } VkAttachmentStoreOp; @@ -2171,6 +2214,8 @@ typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_DERIVATIVE_BIT = 0x00000004, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT = 0x00000008, VK_PIPELINE_CREATE_DISPATCH_BASE_BIT = 0x00000010, + VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = 0x00200000, + VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = 0x00400000, VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR = 0x00004000, VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 0x00008000, VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR = 0x00010000, @@ -6454,6 +6499,116 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateSharedSwapchainsKHR( #define VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME "VK_KHR_sampler_mirror_clamp_to_edge" +#define VK_KHR_dynamic_rendering 1 +#define VK_KHR_DYNAMIC_RENDERING_SPEC_VERSION 1 +#define VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME "VK_KHR_dynamic_rendering" + +typedef enum VkRenderingFlagBitsKHR { + VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT_KHR = 0x00000001, + VK_RENDERING_SUSPENDING_BIT_KHR = 0x00000002, + VK_RENDERING_RESUMING_BIT_KHR = 0x00000004, + VK_RENDERING_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkRenderingFlagBitsKHR; +typedef VkFlags VkRenderingFlagsKHR; +typedef struct VkRenderingAttachmentInfoKHR { + VkStructureType sType; + const void* pNext; + VkImageView imageView; + VkImageLayout imageLayout; + VkResolveModeFlagBits resolveMode; + VkImageView resolveImageView; + VkImageLayout resolveImageLayout; + VkAttachmentLoadOp loadOp; + VkAttachmentStoreOp storeOp; + VkClearValue clearValue; +} VkRenderingAttachmentInfoKHR; + +typedef struct VkRenderingInfoKHR { + VkStructureType sType; + const void* pNext; + VkRenderingFlagsKHR flags; + VkRect2D renderArea; + uint32_t layerCount; + uint32_t viewMask; + uint32_t colorAttachmentCount; + const VkRenderingAttachmentInfoKHR* pColorAttachments; + const VkRenderingAttachmentInfoKHR* pDepthAttachment; + const VkRenderingAttachmentInfoKHR* pStencilAttachment; +} VkRenderingInfoKHR; + +typedef struct VkPipelineRenderingCreateInfoKHR { + VkStructureType sType; + const void* pNext; + uint32_t viewMask; + uint32_t colorAttachmentCount; + const VkFormat* pColorAttachmentFormats; + VkFormat depthAttachmentFormat; + VkFormat stencilAttachmentFormat; +} VkPipelineRenderingCreateInfoKHR; + +typedef struct VkPhysicalDeviceDynamicRenderingFeaturesKHR { + VkStructureType sType; + void* pNext; + VkBool32 dynamicRendering; +} VkPhysicalDeviceDynamicRenderingFeaturesKHR; + +typedef struct VkCommandBufferInheritanceRenderingInfoKHR { + VkStructureType sType; + const void* pNext; + VkRenderingFlagsKHR flags; + uint32_t viewMask; + uint32_t colorAttachmentCount; + const VkFormat* pColorAttachmentFormats; + VkFormat depthAttachmentFormat; + VkFormat stencilAttachmentFormat; + VkSampleCountFlagBits rasterizationSamples; +} VkCommandBufferInheritanceRenderingInfoKHR; + +typedef struct VkRenderingFragmentShadingRateAttachmentInfoKHR { + VkStructureType sType; + const void* pNext; + VkImageView imageView; + VkImageLayout imageLayout; + VkExtent2D shadingRateAttachmentTexelSize; +} VkRenderingFragmentShadingRateAttachmentInfoKHR; + +typedef struct VkRenderingFragmentDensityMapAttachmentInfoEXT { + VkStructureType sType; + const void* pNext; + VkImageView imageView; + VkImageLayout imageLayout; +} VkRenderingFragmentDensityMapAttachmentInfoEXT; + +typedef struct VkAttachmentSampleCountInfoAMD { + VkStructureType sType; + const void* pNext; + uint32_t colorAttachmentCount; + const VkSampleCountFlagBits* pColorAttachmentSamples; + VkSampleCountFlagBits depthStencilAttachmentSamples; +} VkAttachmentSampleCountInfoAMD; + +typedef VkAttachmentSampleCountInfoAMD VkAttachmentSampleCountInfoNV; + +typedef struct VkMultiviewPerViewAttributesInfoNVX { + VkStructureType sType; + const void* pNext; + VkBool32 perViewAttributes; + VkBool32 perViewAttributesPositionXOnly; +} VkMultiviewPerViewAttributesInfoNVX; + +typedef void (VKAPI_PTR *PFN_vkCmdBeginRenderingKHR)(VkCommandBuffer commandBuffer, const VkRenderingInfoKHR* pRenderingInfo); +typedef void (VKAPI_PTR *PFN_vkCmdEndRenderingKHR)(VkCommandBuffer commandBuffer); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkCmdBeginRenderingKHR( + VkCommandBuffer commandBuffer, + const VkRenderingInfoKHR* pRenderingInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdEndRenderingKHR( + VkCommandBuffer commandBuffer); +#endif + + #define VK_KHR_multiview 1 #define VK_KHR_MULTIVIEW_SPEC_VERSION 1 #define VK_KHR_MULTIVIEW_EXTENSION_NAME "VK_KHR_multiview" @@ -12901,6 +13056,25 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDrawMultiIndexedEXT( #define VK_EXT_LOAD_STORE_OP_NONE_EXTENSION_NAME "VK_EXT_load_store_op_none" +#define VK_EXT_border_color_swizzle 1 +#define VK_EXT_BORDER_COLOR_SWIZZLE_SPEC_VERSION 1 +#define VK_EXT_BORDER_COLOR_SWIZZLE_EXTENSION_NAME "VK_EXT_border_color_swizzle" +typedef struct VkPhysicalDeviceBorderColorSwizzleFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 borderColorSwizzle; + VkBool32 borderColorSwizzleFromImage; +} VkPhysicalDeviceBorderColorSwizzleFeaturesEXT; + +typedef struct VkSamplerBorderColorComponentMappingCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkComponentMapping components; + VkBool32 srgb; +} VkSamplerBorderColorComponentMappingCreateInfoEXT; + + + #define VK_EXT_pageable_device_local_memory 1 #define VK_EXT_PAGEABLE_DEVICE_LOCAL_MEMORY_SPEC_VERSION 1 #define VK_EXT_PAGEABLE_DEVICE_LOCAL_MEMORY_EXTENSION_NAME "VK_EXT_pageable_device_local_memory" diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_directfb.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_directfb.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan_directfb.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_directfb.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_fuchsia.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_fuchsia.h similarity index 98% rename from icd/api/include/khronos/sdk-1.2/vulkan_fuchsia.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_fuchsia.h index bc47273a..44b4ace3 100644 --- a/icd/api/include/khronos/sdk-1.2/vulkan_fuchsia.h +++ b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_fuchsia.h @@ -119,10 +119,6 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetSemaphoreZirconHandleFUCHSIA( VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkBufferCollectionFUCHSIA) #define VK_FUCHSIA_BUFFER_COLLECTION_SPEC_VERSION 2 #define VK_FUCHSIA_BUFFER_COLLECTION_EXTENSION_NAME "VK_FUCHSIA_buffer_collection" - -typedef enum VkImageFormatConstraintsFlagBitsFUCHSIA { - VK_IMAGE_FORMAT_CONSTRAINTS_FLAG_BITS_MAX_ENUM_FUCHSIA = 0x7FFFFFFF -} VkImageFormatConstraintsFlagBitsFUCHSIA; typedef VkFlags VkImageFormatConstraintsFlagsFUCHSIA; typedef enum VkImageConstraintsInfoFlagBitsFUCHSIA { diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_ggp.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_ggp.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan_ggp.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_ggp.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_ios.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_ios.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan_ios.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_ios.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_macos.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_macos.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan_macos.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_macos.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_metal.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_metal.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan_metal.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_metal.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_screen.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_screen.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan_screen.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_screen.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_vi.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_vi.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan_vi.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_vi.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_wayland.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_wayland.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan_wayland.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_wayland.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_win32.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_win32.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan_win32.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_win32.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_xcb.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_xcb.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan_xcb.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_xcb.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_xlib.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_xlib.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan_xlib.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_xlib.h diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_xlib_xrandr.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_xlib_xrandr.h similarity index 100% rename from icd/api/include/khronos/sdk-1.2/vulkan_xlib_xrandr.h rename to icd/api/include/khronos/sdk-1.2/vulkan/vulkan_xlib_xrandr.h diff --git a/icd/api/include/khronos/vk_icd.h b/icd/api/include/khronos/vk_icd.h index 40881f2a..9b4519ea 100644 --- a/icd/api/include/khronos/vk_icd.h +++ b/icd/api/include/khronos/vk_icd.h @@ -35,7 +35,7 @@ #if EXTERNAL_VULKAN_HEADERS #include "vulkan/vk_icd.h" #else -#include "sdk-1.2/vk_icd.h" +#include "sdk-1.2/vulkan/vk_icd.h" #endif #endif /* __VK_ICD_H_PROXY__ */ diff --git a/icd/api/include/khronos/vk_layer.h b/icd/api/include/khronos/vk_layer.h index 48a08d5d..75f1b678 100644 --- a/icd/api/include/khronos/vk_layer.h +++ b/icd/api/include/khronos/vk_layer.h @@ -35,7 +35,7 @@ #if EXTERNAL_VULKAN_HEADERS #include "vulkan/vk_layer.h" #else -#include "sdk-1.2/vk_layer.h" +#include "sdk-1.2/vulkan/vk_layer.h" #endif #endif /* __VK_LAYER_H_PROXY__ */ diff --git a/icd/api/include/khronos/vk_platform.h b/icd/api/include/khronos/vk_platform.h index 8d4ae3a2..d704e5fa 100644 --- a/icd/api/include/khronos/vk_platform.h +++ b/icd/api/include/khronos/vk_platform.h @@ -35,7 +35,7 @@ #if EXTERNAL_VULKAN_HEADERS #include "vulkan/vk_platform.h" #else -#include "sdk-1.2/vk_platform.h" +#include "sdk-1.2/vulkan/vk_platform.h" #endif #ifdef VK_USE_PLATFORM_XLIB_KHR diff --git a/icd/api/include/khronos/vulkan.h b/icd/api/include/khronos/vulkan.h index 0d91a8aa..8d183afa 100644 --- a/icd/api/include/khronos/vulkan.h +++ b/icd/api/include/khronos/vulkan.h @@ -37,7 +37,7 @@ #if EXTERNAL_VULKAN_HEADERS #include "vulkan/vulkan.h" #else -#include "sdk-1.2/vulkan.h" +#include "sdk-1.2/vulkan/vulkan.h" #endif #ifdef VK_USE_PLATFORM_XLIB_KHR @@ -127,10 +127,6 @@ #define VK_PIPELINE_BIND_POINT_END_RANGE VK_PIPELINE_BIND_POINT_COMPUTE #define VK_PIPELINE_BIND_POINT_RANGE_SIZE (VK_PIPELINE_BIND_POINT_COMPUTE - VK_PIPELINE_BIND_POINT_GRAPHICS + 1) -#define VK_INDEX_TYPE_BEGIN_RANGE VK_INDEX_TYPE_UINT16 -#define VK_INDEX_TYPE_END_RANGE VK_INDEX_TYPE_UINT32 -#define VK_INDEX_TYPE_RANGE_SIZE (VK_INDEX_TYPE_UINT32 - VK_INDEX_TYPE_UINT16 + 1) - #define VK_OBJECT_TYPE_BEGIN_RANGE VK_OBJECT_TYPE_UNKNOWN #define VK_OBJECT_TYPE_END_RANGE VK_OBJECT_TYPE_COMMAND_POOL diff --git a/icd/api/include/pipeline_compiler.h b/icd/api/include/pipeline_compiler.h index a7c74dfd..9707743e 100644 --- a/icd/api/include/pipeline_compiler.h +++ b/icd/api/include/pipeline_compiler.h @@ -128,6 +128,10 @@ class PipelineCompiler const void* pCode, ShaderModuleHandle* pModule); + void TryEarlyCompileShaderModule( + const Device* pDevice, + ShaderModuleHandle* pModule); + bool IsValidShaderModule( const ShaderModuleHandle* pShaderModule) const; @@ -143,6 +147,15 @@ class PipelineCompiler const void** ppPipelineBinary, Util::MetroHash::Hash* pCacheId); + VkResult CreateGraphicsShaderBinary( + const Device* pDevice, + const ShaderStage stage, + const GraphicsPipelineBinaryCreateInfo* pCreateInfo, + ShaderModuleHandle* pModule); + + static void FreeGraphicsShaderBinary( + ShaderModuleHandle* pShaderModule); + VkResult CreateComputePipelineBinary( Device* pDevice, uint32_t deviceIndex, @@ -176,6 +189,11 @@ class PipelineCompiler VbBindingInfo* pVbInfo, PipelineInternalBufferInfo* pInternalBufferInfo); + static void SetPartialGraphicsPipelineBinaryInfo( + const ShaderModuleHandle* pShaderModuleHandle, + const ShaderStage stage, + GraphicsPipelineBinaryCreateInfo* pCreateInfo); + VkResult ConvertComputePipelineInfo( const Device* pDevice, const VkComputePipelineCreateInfo* pIn, @@ -221,8 +239,8 @@ class PipelineCompiler void DestroyPipelineBinaryCache(); - VkResult BuildPipelineInternalBufferData(GraphicsPipelineBinaryCreateInfo* pCreateInfo, - PipelineInternalBufferInfo* pInternalBufferInfo); + void BuildPipelineInternalBufferData(GraphicsPipelineBinaryCreateInfo* pCreateInfo, + PipelineInternalBufferInfo* pInternalBufferInfo); void GetComputePipelineCacheId( uint32_t deviceIdx, diff --git a/icd/api/include/render_state_cache.h b/icd/api/include/render_state_cache.h index 7c6d1917..a72f974a 100644 --- a/icd/api/include/render_state_cache.h +++ b/icd/api/include/render_state_cache.h @@ -37,7 +37,6 @@ #include "include/khronos/vulkan.h" #include "include/vk_alloccb.h" -#include "include/vk_graphics_pipeline.h" #include "palHashMap.h" #include "palColorBlendState.h" @@ -137,9 +136,6 @@ class RenderStateCache uint32_t CreateScissorRect(const Pal::ScissorRectParams& params); void DestroyScissorRect(const Pal::ScissorRectParams& params, uint32_t token); - uint32_t CreateSamplePattern(const SamplePattern& samplePattern); - void DestroySamplePattern(const SamplePattern& samplePattern, uint32_t token); - uint32_t CreateFragmentShadingRate(const Pal::VrsRateParams& params); void DestroyFragmentShadingRate(const Pal::VrsRateParams& params, uint32_t token); @@ -337,17 +333,6 @@ class RenderStateCache StaticMsaaState*, PalAllocator> m_msaaRefs; - static const size_t SamplePatternHashGroupSize = (sizeof(SamplePattern) + sizeof(StaticParamState)) * 8; - - Util::HashMap, - SamplePatternHashGroupSize> m_samplePattern; - uint32_t m_samplePatternNextId; - Util::HashMap const Device* pDevice, VkMemoryRequirements* pMemoryRequirements); + static void CalculateMemoryRequirements( + const Device* pDevice, + const VkDeviceBufferMemoryRequirementsKHR* pInfo, + VkMemoryRequirements2* pMemoryRequirements); + VkDeviceSize GetSize() const { return m_size; } @@ -116,8 +121,8 @@ class Buffer final : public NonDispatchable uint32_t createSparseBinding : 1; // VK_BUFFER_CREATE_SPARSE_BINDING_BIT uint32_t createSparseResidency : 1; // VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT uint32_t createProtected : 1; // VK_BUFFER_CREATE_PROTECTED_BIT - - uint32_t reserved : 23; + uint32_t reserved2 : 1; + uint32_t reserved : 22; }; uint32_t u32All; }; diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h index bf5116cb..190c4da5 100644 --- a/icd/api/include/vk_cmdbuffer.h +++ b/icd/api/include/vk_cmdbuffer.h @@ -44,9 +44,10 @@ #include "include/vk_render_pass.h" #include "include/vk_utils.h" +#include "include/barrier_policy.h" +#include "include/graphics_pipeline_common.h" #include "include/internal_mem_mgr.h" #include "include/virtual_stack_mgr.h" -#include "include/barrier_policy.h" #include "renderpass/renderpass_builder.h" @@ -131,7 +132,8 @@ union DirtyGraphicsState uint32 vrs : 1; uint32 colorWriteEnable : 1; uint32 rasterizerDiscardEnable : 1; - uint32 reserved : 23; + uint32 samplePattern : 1; + uint32 reserved : 22; }; uint32 u32All; @@ -162,6 +164,28 @@ struct PerGpuRenderState uint32_t maxPipelineStackSize; }; +struct DynamicRenderingAttachments +{ + VkResolveModeFlagBits resolveMode; + const ImageView* pImageView; + Pal::ImageLayout imageLayout; + const ImageView* pResolveImageView; + Pal::ImageLayout resolveImageLayout; + VkFormat attachmentFormat; + VkSampleCountFlagBits rasterizationSamples; +}; + +struct DynamicRenderingInstance +{ + uint32_t viewMask; + uint32_t renderAreaCount; + Pal::Rect renderArea[MaxPalDevices]; + uint32_t colorAttachmentCount; + DynamicRenderingAttachments colorAttachments[Pal::MaxColorTargets]; + DynamicRenderingAttachments depthAttachment; + DynamicRenderingAttachments stencilAttachment; +}; + // Members of CmdBufferRenderState that are the same for each GPU struct AllGpuRenderState { @@ -188,7 +212,6 @@ struct AllGpuRenderState uint32_t depthBounds; uint32_t viewports; uint32_t scissorRect; - uint32_t samplePattern; uint32_t fragmentShadingRate; } staticTokens; @@ -209,6 +232,8 @@ struct AllGpuRenderState bool rasterizerDiscardEnable; + DynamicRenderingInstance dynamicRenderingInstance; + // ===================================================================================================================== // The first part of the structure will be cleared with a memset in CmdBuffer::ResetState(). // The second part of the structure contains the larger members that are selectively reset in CmdBuffer::ResetState(). @@ -229,6 +254,7 @@ struct AllGpuRenderState Pal::DepthStencilStateCreateInfo depthStencilCreateInfo; Pal::VrsRateParams vrsRate; Pal::ColorWriteMaskParams colorWriteMaskParams; + SamplePattern samplePattern; }; // State tracked during a render pass instance when building a command buffer. @@ -452,6 +478,18 @@ class CmdBuffer uint32_t rectCount, const VkClearRect* pRects); + void ClearDynamicRenderingImages( + uint32_t attachmentCount, + const VkClearAttachment* pAttachments, + uint32_t rectCount, + const VkClearRect* pRects); + + void ClearDynamicRenderingBoundAttachments( + uint32_t attachmentCount, + const VkClearAttachment* pAttachments, + uint32_t rectCount, + const VkClearRect* pRects); + template void ResolveImage( VkImage srcImage, @@ -625,6 +663,11 @@ class CmdBuffer void PipelineBarrierSync2ToSync1( const VkDependencyInfoKHR* pDependencyInfo); + void BeginRendering( + const VkRenderingInfoKHR* pRenderingInfo); + + void EndRendering(); + void BeginQueryIndexed( VkQueryPool queryPool, uint32_t query, @@ -1035,6 +1078,8 @@ class CmdBuffer void ValidateStates(); + void ValidateSamplePattern(uint32_t sampleCount, SamplePattern* pSamplePattern); + CmdBuffer( Device* pDevice, CmdPool* pCmdPool, @@ -1102,7 +1147,31 @@ class CmdBuffer void RPBindTargets(const RPBindTargetsInfo& targets); void RPSyncPostLoadOpColorClear(); - void RPInitSamplePattern(); + void BindTargets( + const VkRenderingInfoKHR* pRenderingInfo, + const VkRenderingFragmentShadingRateAttachmentInfoKHR* pRenderingFragmentShadingRateAttachmentInfoKHR); + + void ResolveImage( + const DynamicRenderingAttachments& dynamicRenderingAttachments); + + void LoadOpClearColor( + const Pal::Rect* pDeviceGroupRenderArea, + const VkRenderingInfoKHR* pRenderingInfo); + + void LoadOpClearDepthStencil( + const Pal::Rect* pDeviceGroupRenderArea, + const VkRenderingInfoKHR* pRenderingInfo); + + void GetImageLayout( + VkImageView imageView, + VkImageLayout imageLayout, + VkImageAspectFlags aspectMask, + Pal::SubresRange* palSubresRange, + Pal::ImageLayout* palImageLayout); + + void StoreAttachmentInfo( + const VkRenderingAttachmentInfoKHR& renderingAttachmentInfo, + DynamicRenderingAttachments* pDynamicRenderingAttachement); Pal::ImageLayout RPGetAttachmentLayout( uint32_t attachment, @@ -1209,7 +1278,7 @@ class CmdBuffer uint32_t hasReleaseAcquire : 1; uint32_t useSplitReleaseAcquire : 1; uint32_t reserved2 : 3; - uint32_t reserved3 : 1; + uint32_t isRenderingSuspended : 1; uint32_t reserved4 : 1; uint32_t reserved : 15; }; @@ -1817,6 +1886,13 @@ VKAPI_ATTR void VKAPI_CALL vkCmdWriteBufferMarker2AMD( VkDeviceSize dstOffset, uint32_t marker); +VKAPI_ATTR void VKAPI_CALL vkCmdBeginRenderingKHR( + VkCommandBuffer commandBuffer, + const VkRenderingInfoKHR* pRenderingInfo); + +VKAPI_ATTR void VKAPI_CALL vkCmdEndRenderingKHR( + VkCommandBuffer commandBuffer); + VKAPI_ATTR void VKAPI_CALL vkCmdSetCullModeEXT( VkCommandBuffer commandBuffer, VkCullModeFlags cullMode); diff --git a/icd/api/include/vk_conv.h b/icd/api/include/vk_conv.h index c909045f..52f5b5dc 100644 --- a/icd/api/include/vk_conv.h +++ b/icd/api/include/vk_conv.h @@ -334,37 +334,29 @@ inline Pal::FillMode VkToPalFillMode(VkPolygonMode fillMode) return convert::FillMode(fillMode); } -// No range size and begin range in VkCullModeFlagBits, so no direct macro mapping here -namespace convert -{ - inline Pal::CullMode CullMode(VkCullModeFlags cullMode) - { - switch (cullMode) +// ===================================================================================================================== +// Converts Vulkan cull mode to PAL equivalent +inline Pal::CullMode VkToPalCullMode( + VkCullModeFlags cullMode) +{ + switch (cullMode) + { + case VK_CULL_MODE_NONE: + return Pal::CullMode::None; + case VK_CULL_MODE_FRONT_BIT: + return Pal::CullMode::Front; + case VK_CULL_MODE_BACK_BIT: + return Pal::CullMode::Back; + case VK_CULL_MODE_FRONT_AND_BACK: + return Pal::CullMode::FrontAndBack; + default: { - case VK_CULL_MODE_NONE: - return Pal::CullMode::None; - case VK_CULL_MODE_FRONT_BIT: - return Pal::CullMode::Front; - case VK_CULL_MODE_BACK_BIT: - return Pal::CullMode::Back; - case VK_CULL_MODE_FRONT_AND_BACK: - return Pal::CullMode::FrontAndBack; - default: - { - VK_ASSERT(!"Unknown Cull Mode!"); - return Pal::CullMode::None; - } + VK_ASSERT(!"Unknown Cull Mode!"); + return Pal::CullMode::None; } } } -// ===================================================================================================================== -// Converts Vulkan cull mode to PAL equivalent -inline Pal::CullMode VkToPalCullMode(VkCullModeFlags cullMode) -{ - return convert::CullMode(cullMode); -} - // ===================================================================================================================== VK_TO_PAL_TABLE_I( FRONT_FACE, FrontFace, FaceOrientation, // ===================================================================================================================== @@ -500,18 +492,23 @@ inline Pal::CompareFunc VkToPalCompareFunc(VkCompareOp compareOp) return convert::CompareFunc(compareOp); } -VK_TO_PAL_TABLE_X( INDEX_TYPE, IndexType, IndexType, -// ===================================================================================================================== -VK_TO_PAL_ENTRY_X( INDEX_TYPE_UINT16, IndexType::Idx16 ) -VK_TO_PAL_ENTRY_X( INDEX_TYPE_UINT32, IndexType::Idx32 ) -// ===================================================================================================================== -) - // ===================================================================================================================== // Converts Vulkan index type to PAL equivalent. -inline Pal::IndexType VkToPalIndexType(VkIndexType indexType) +inline Pal::IndexType VkToPalIndexType( + VkIndexType indexType) { - return convert::IndexType(indexType); + switch (indexType) + { + case VK_INDEX_TYPE_UINT8_EXT: + return Pal::IndexType::Idx8; + case VK_INDEX_TYPE_UINT16: + return Pal::IndexType::Idx16; + case VK_INDEX_TYPE_UINT32: + return Pal::IndexType::Idx32; + default: + VK_ASSERT(!"Unknown VkIndexType"); + return Pal::IndexType::Idx32; + } } // ===================================================================================================================== @@ -1796,6 +1793,65 @@ inline Pal::SwizzledFormat VkToPalFormat(VkFormat format, const RuntimeSettings& } } +// ===================================================================================================================== +// Pal to Vulkan swapchain format, for use with formats returned from IDevice::GetSwapChainProperties only. +inline VkFormat PalToVkSwapChainFormat(Pal::SwizzledFormat palFormat) +{ + VkFormat format = VK_FORMAT_UNDEFINED; + + switch (palFormat.format) + { + case Pal::ChNumFormat::X8Y8Z8W8_Unorm: + { + if ((palFormat.swizzle.r == Pal::ChannelSwizzle::X) && + (palFormat.swizzle.g == Pal::ChannelSwizzle::Y) && + (palFormat.swizzle.b == Pal::ChannelSwizzle::Z) && + (palFormat.swizzle.a == Pal::ChannelSwizzle::W)) + { + format = VK_FORMAT_R8G8B8A8_UNORM; + } + else if ((palFormat.swizzle.r == Pal::ChannelSwizzle::Z) && + (palFormat.swizzle.g == Pal::ChannelSwizzle::Y) && + (palFormat.swizzle.b == Pal::ChannelSwizzle::X) && + (palFormat.swizzle.a == Pal::ChannelSwizzle::W)) + { + format = VK_FORMAT_B8G8R8A8_UNORM; + } + break; + } + case Pal::ChNumFormat::X16Y16Z16W16_Float: + { + if ((palFormat.swizzle.r == Pal::ChannelSwizzle::X) && + (palFormat.swizzle.g == Pal::ChannelSwizzle::Y) && + (palFormat.swizzle.b == Pal::ChannelSwizzle::Z) && + (palFormat.swizzle.a == Pal::ChannelSwizzle::W)) + { + format = VK_FORMAT_R16G16B16A16_SFLOAT; + } + break; + } + case Pal::ChNumFormat::X10Y10Z10W2_Unorm: + { + if ((palFormat.swizzle.r == Pal::ChannelSwizzle::X) && + (palFormat.swizzle.g == Pal::ChannelSwizzle::Y) && + (palFormat.swizzle.b == Pal::ChannelSwizzle::Z) && + (palFormat.swizzle.a == Pal::ChannelSwizzle::W)) + { + format = VK_FORMAT_A2B10G10R10_UNORM_PACK32; + } + break; + } + default: + { + break; + } + } + + VK_ASSERT_MSG(format != VK_FORMAT_UNDEFINED, "Unknown swapchain format, consider adding it here"); + + return format; +} + // ===================================================================================================================== // TODO: VK_EXT_swapchain_colorspace combines the concept of a transfer function and a color space, which is // insufficient. For now, map the capabilities of Pal using either the transfer function OR color space @@ -2426,39 +2482,6 @@ inline Pal::SwapChainMode VkToPalSwapChainMode(VkPresentModeKHR presentMode) return convert::SwapChainMode(presentMode); } -#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 610 -namespace convert -{ - inline Pal::CompositeAlphaMode CompositeAlpha(VkCompositeAlphaFlagBitsKHR compositeAlpha) - { - switch (compositeAlpha) - { - case VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR: - return Pal::CompositeAlphaMode::Opaque; - - case VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR: - return Pal::CompositeAlphaMode::PreMultiplied; - - case VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR: - return Pal::CompositeAlphaMode::PostMultiplied; - - case VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR: - return Pal::CompositeAlphaMode::Inherit; - - default: - VK_ASSERT(!"Unknown CompositeAlphaFlag!"); - return Pal::CompositeAlphaMode::Opaque; - } - } -} - -// ===================================================================================================================== -// Converts Vulkan composite alpha flag to PAL equivalent. -inline Pal::CompositeAlphaMode VkToPalCompositeAlphaMode(VkCompositeAlphaFlagBitsKHR compositeAlpha) -{ - return convert::CompositeAlpha(compositeAlpha); -} -#else // ===================================================================================================================== // Converts Vulkan composite alpha flag to PAL equivalent. inline Pal::CompositeAlphaMode VkToPalCompositeAlphaMode(VkCompositeAlphaFlagBitsKHR compositeAlpha) @@ -2482,7 +2505,6 @@ inline VkCompositeAlphaFlagsKHR PalToVkSupportedCompositeAlphaMode(uint32 compos return static_cast(compositeAlpha); } -#endif // ===================================================================================================================== // Converts Vulkan image creation flags to PAL image creation flags (unfortunately, PAL doesn't define a dedicated type @@ -3168,23 +3190,50 @@ inline void VkToPalScissorRect( } // ===================================================================================================================== +template inline Pal::QueuePriority VkToPalGlobalPriority( - VkQueueGlobalPriorityEXT vkPriority) + VkQueueGlobalPriorityEXT vkPriority, + const T& engineCapabilities) { Pal::QueuePriority palPriority = Pal::QueuePriority::Normal; switch (static_cast(vkPriority)) { case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT: - palPriority = Pal::QueuePriority::Idle; + if ((engineCapabilities.queuePrioritySupport & Pal::QueuePrioritySupport::SupportQueuePriorityIdle) != 0) + { + palPriority = Pal::QueuePriority::Idle; + } + else + { + palPriority = Pal::QueuePriority::Normal; + } break; case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT: palPriority = Pal::QueuePriority::Normal; break; case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT: - palPriority = Pal::QueuePriority::High; + if ((engineCapabilities.queuePrioritySupport & Pal::QueuePrioritySupport::SupportQueuePriorityHigh) != 0) + { + palPriority = Pal::QueuePriority::High; + } + else + { + palPriority = Pal::QueuePriority::Normal; + } break; case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT: - palPriority = Pal::QueuePriority::Realtime; + if ((engineCapabilities.queuePrioritySupport & Pal::QueuePrioritySupport::SupportQueuePriorityRealtime) != 0) + { + palPriority = Pal::QueuePriority::Realtime; + } + else if ((engineCapabilities.queuePrioritySupport & Pal::QueuePrioritySupport::SupportQueuePriorityHigh) != 0) + { + palPriority = Pal::QueuePriority::High; + } + else + { + palPriority = Pal::QueuePriority::Normal; + } break; default: palPriority = Pal::QueuePriority::Normal; diff --git a/icd/api/include/vk_device.h b/icd/api/include/vk_device.h index bedf47f9..04c94673 100644 --- a/icd/api/include/vk_device.h +++ b/icd/api/include/vk_device.h @@ -144,7 +144,8 @@ class Device // True if EXT_MEMORY_PRIORITY or EXT_PAGEABLE_DEVICE_LOCAL_MEMORY is enabled. uint32 appControlledMemPriority : 1; uint32 mustWriteImmutableSamplers : 1; - uint32 reserved : 22; + uint32 strictImageSizeRequirements : 1; + uint32 reserved : 21; }; uint32 u32All; @@ -367,7 +368,8 @@ class Device bool scalarBlockLayoutEnabled, const ExtendedRobustness& extendedRobustnessEnabled, bool bufferDeviceAddressMultiDeviceEnabled, - bool pageableDeviceLocalMemory); + bool pageableDeviceLocalMemory, + bool maintenance4Enabled); void InitDispatchTable(); @@ -482,7 +484,7 @@ class Device VkResult BindImageMemory( uint32_t bindInfoCount, - const VkBindImageMemoryInfo* pBindInfos) const; + const VkBindImageMemoryInfo* pBindInfos); const DeviceFeatures& GetEnabledFeatures() const { return m_enabledFeatures; } @@ -722,6 +724,8 @@ class Device bool useComputeAsTransferQueue, bool isTmzQueue); + Pal::TilingOptMode GetTilingOptMode() const; + protected: Device( uint32_t deviceCount, @@ -1148,6 +1152,22 @@ VKAPI_ATTR VkResult VKAPI_CALL vkSetDebugUtilsObjectTagEXT( VkDevice device, const VkDebugUtilsObjectTagInfoEXT* pTagInfo); +VKAPI_ATTR void VKAPI_CALL vkGetDeviceBufferMemoryRequirementsKHR( + VkDevice device, + const VkDeviceBufferMemoryRequirementsKHR* pInfo, + VkMemoryRequirements2* pMemoryRequirements); + +VKAPI_ATTR void VKAPI_CALL vkGetDeviceImageMemoryRequirementsKHR( + VkDevice device, + const VkDeviceImageMemoryRequirementsKHR* pInfo, + VkMemoryRequirements2* pMemoryRequirements); + +VKAPI_ATTR void VKAPI_CALL vkGetDeviceImageSparseMemoryRequirementsKHR( + VkDevice device, + const VkDeviceImageMemoryRequirementsKHR* pInfo, + uint32_t* pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2* pSparseMemoryRequirements); + VKAPI_ATTR void VKAPI_CALL vkCmdSetLineStippleEXT( VkCommandBuffer commandBuffer, uint32_t lineStippleFactor, diff --git a/icd/api/include/vk_extensions.h b/icd/api/include/vk_extensions.h index 7c94b1eb..21687a37 100644 --- a/icd/api/include/vk_extensions.h +++ b/icd/api/include/vk_extensions.h @@ -85,6 +85,7 @@ class Extensions { for (int32_t i = 0; i < T::Count; ++i) { + m_supported[i].pName = nullptr; m_supported[i].specVersion = 0; } } @@ -94,21 +95,24 @@ class Extensions return m_supported[id].specVersion != 0; } - void AddExtension(typename T::ExtensionId id, const char* name, uint32_t specVersion) + void AddExtension(typename T::ExtensionId id, const char* pName, uint32_t specVersion) { // Don't allow adding extensions redundantly. VK_ASSERT(!IsExtensionSupported(id)); - strncpy(m_supported[id].extensionName, name, VK_MAX_EXTENSION_NAME_SIZE); - m_supported[id].specVersion = specVersion; + m_supported[id].pName = pName; + m_supported[id].specVersion = specVersion; m_supportedCount++; } - const VkExtensionProperties& GetExtensionInfo(typename T::ExtensionId id) const + void GetExtensionInfo(typename T::ExtensionId id, VkExtensionProperties* pProperties) const { VK_ASSERT(IsExtensionSupported(id)); - return m_supported[id]; + + strncpy(pProperties->extensionName, m_supported[id].pName, VK_MAX_EXTENSION_NAME_SIZE); + + pProperties->specVersion = m_supported[id].specVersion; } uint32_t GetExtensionCount() const @@ -117,8 +121,14 @@ class Extensions } protected: - VkExtensionProperties m_supported[T::Count]; - uint32_t m_supportedCount; + /// Array of an internal VkExtensionProperties struct that uses a pointer for the name for a smaller size + struct ExtensionProperties + { + const char* pName; + uint32_t specVersion; + } m_supported[T::Count]; + + uint32_t m_supportedCount; }; class Enabled @@ -156,6 +166,8 @@ class Extensions for (uint32_t i = 0; i < extensionNameCount && !invalidExtensionRequested; ++i) { + VkExtensionProperties ext = {}; + int32_t j; for (j = 0; j < T::Count; ++j) @@ -164,7 +176,7 @@ class Extensions if (supported.IsExtensionSupported(id)) { - const VkExtensionProperties& ext = supported.GetExtensionInfo(id); + supported.GetExtensionInfo(id, &ext); if (strcmp(extensionNames[i], ext.extensionName) == 0) { @@ -248,6 +260,7 @@ class DeviceExtensions final : public Extensions KHR_DEVICE_GROUP, KHR_DRAW_INDIRECT_COUNT, KHR_DRIVER_PROPERTIES, + KHR_DYNAMIC_RENDERING, KHR_EXTERNAL_FENCE, KHR_EXTERNAL_FENCE_FD, KHR_EXTERNAL_FENCE_WIN32, @@ -257,6 +270,7 @@ class DeviceExtensions final : public Extensions KHR_EXTERNAL_SEMAPHORE, KHR_EXTERNAL_SEMAPHORE_FD, KHR_EXTERNAL_SEMAPHORE_WIN32, + KHR_FORMAT_FEATURE_FLAGS2, KHR_FRAGMENT_SHADING_RATE, KHR_GET_MEMORY_REQUIREMENTS2, KHR_IMAGELESS_FRAMEBUFFER, @@ -265,6 +279,7 @@ class DeviceExtensions final : public Extensions KHR_MAINTENANCE1, KHR_MAINTENANCE2, KHR_MAINTENANCE3, + KHR_MAINTENANCE4, KHR_MULTIVIEW, KHR_PIPELINE_EXECUTABLE_PROPERTIES, KHR_RELAXED_BLOCK_LAYOUT, @@ -295,6 +310,7 @@ class DeviceExtensions final : public Extensions // EXT Extensions EXT_4444_FORMATS, + EXT_BORDER_COLOR_SWIZZLE, EXT_CALIBRATED_TIMESTAMPS, EXT_COLOR_WRITE_ENABLE, EXT_CONDITIONAL_RENDERING, @@ -312,6 +328,7 @@ class DeviceExtensions final : public Extensions EXT_HDR_METADATA, EXT_HOST_QUERY_RESET, EXT_IMAGE_ROBUSTNESS, + EXT_INDEX_TYPE_UINT8, EXT_INLINE_UNIFORM_BLOCK, EXT_LINE_RASTERIZATION, EXT_LOAD_STORE_OP_NONE, @@ -340,6 +357,7 @@ class DeviceExtensions final : public Extensions EXT_SHADER_VIEWPORT_INDEX_LAYER, EXT_SUBGROUP_SIZE_CONTROL, EXT_TEXEL_BUFFER_ALIGNMENT, + EXT_TOOLING_INFO, EXT_TRANSFORM_FEEDBACK, EXT_VERTEX_ATTRIBUTE_DIVISOR, EXT_YCBCR_IMAGE_ARRAYS, diff --git a/icd/api/include/vk_framebuffer.h b/icd/api/include/vk_framebuffer.h index 6e667257..fec846bf 100644 --- a/icd/api/include/vk_framebuffer.h +++ b/icd/api/include/vk_framebuffer.h @@ -90,6 +90,8 @@ class Framebuffer final : public NonDispatchable void SetImageViews(const VkRenderPassAttachmentBeginInfo* pRenderPassAttachmentBeginInfo); + void SetImageViews(const VkRenderingInfoKHR* pRenderingInfo); + const Pal::GlobalScissorParams& GetGlobalScissorParams() const { return m_globalScissorParams; diff --git a/icd/api/include/vk_graphics_pipeline.h b/icd/api/include/vk_graphics_pipeline.h index df537f38..048e629c 100644 --- a/icd/api/include/vk_graphics_pipeline.h +++ b/icd/api/include/vk_graphics_pipeline.h @@ -30,7 +30,6 @@ #include -#include "include/vk_pipeline.h" #include "include/vk_device.h" #include "include/vk_shader_code.h" #include "include/graphics_pipeline_common.h" @@ -189,6 +188,9 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch bool Force1x1ShaderRateEnabled() const { return m_flags.force1x1ShaderRate; } + bool IsPointSizeUsed() const + { return m_flags.isPointSizeUsed; } + static void BindNullPipeline(CmdBuffer* pCmdBuffer); // Returns value of VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT @@ -209,6 +211,7 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch bool bindInputAssemblyState, bool force1x1ShaderRate, bool customSampleLocations, + bool isPointSizeUsed, const VbBindingInfo& vbInfo, const PipelineInternalBufferInfo* pInternalBuffer, Pal::IMsaaState** pPalMsaa, @@ -321,7 +324,7 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch uint8 bindInputAssemblyState : 1; uint8 customSampleLocations : 1; uint8 force1x1ShaderRate : 1; - uint8 reserved : 1; + uint8 isPointSizeUsed : 1; }; } m_flags; }; diff --git a/icd/api/include/vk_image.h b/icd/api/include/vk_image.h index d8278745..89429359 100644 --- a/icd/api/include/vk_image.h +++ b/icd/api/include/vk_image.h @@ -101,12 +101,17 @@ class Image final : public NonDispatchable Device* pDevice, const VkAllocationCallbacks* pAllocator); - VkResult GetMemoryRequirements( - const Device* pDevice, - VkMemoryRequirements* pMemoryRequirements); + const VkMemoryRequirements& GetMemoryRequirements() + { return m_memoryRequirements; } + + void SetMemoryRequirements(const VkMemoryRequirements& memoryRequirements) + { m_memoryRequirements = memoryRequirements; } + + void SetMemoryRequirementsAtCreate( + const Device* pDevice); VkResult BindMemory( - const Device* pDevice, + Device* pDevice, VkDeviceMemory mem, VkDeviceSize memOffset, uint32_t deviceIndexCount, @@ -129,10 +134,26 @@ class Image final : public NonDispatchable VkSubresourceLayout* pLayout) const; void GetSparseMemoryRequirements( - const Device* pDevice, + Device* pDevice, uint32_t* pNumRequirements, utils::ArrayView sparseMemoryRequirements); + static void CalculateMemoryRequirements( + Device* pDevice, + const VkDeviceImageMemoryRequirementsKHR* pInfo, + VkMemoryRequirements2* pMemoryRequirements); + + static void CalculateAlignedMemoryRequirements( + Device* pDevice, + const VkImageCreateInfo* pCreateInfo, + Image* pImage); + + static void CalculateSparseMemoryRequirements( + Device* pDevice, + const VkDeviceImageMemoryRequirementsKHR* pInfo, + uint32_t* pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2* pSparseMemoryRequirements); + VK_FORCEINLINE Pal::IImage* PalImage(int32_t idx) const { return m_perGpu[idx].pPalImage; } @@ -356,6 +377,8 @@ class Image final : public NonDispatchable ResourceOptimizerKey m_ResourceKey; + VkMemoryRequirements m_memoryRequirements; // Image's memory requirements, including strict size if used + // This goes last. The memory for the rest of the array is calculated dynamically based on the number of GPUs in // use. PerGpuInfo m_perGpu[1]; diff --git a/icd/api/include/vk_physical_device.h b/icd/api/include/vk_physical_device.h index c46c5e07..be8c16c1 100644 --- a/icd/api/include/vk_physical_device.h +++ b/icd/api/include/vk_physical_device.h @@ -257,6 +257,10 @@ class PhysicalDevice return VK_SUCCESS; } + VkResult GetExtendedFormatProperties( + VkFormat format, + VkFormatProperties3KHR* pFormatProperties) const; + bool FormatSupportsMsaa(VkFormat format) const { uint32_t formatIndex = Formats::GetIndex(format); @@ -417,6 +421,10 @@ class PhysicalDevice uint32_t* pTimeDomainCount, VkTimeDomainEXT* pTimeDomains); + VkResult GetPhysicalDeviceToolPropertiesEXT( + uint32_t* pToolCount, + VkPhysicalDeviceToolPropertiesEXT* pToolProperties); + void GetExternalBufferProperties( const VkPhysicalDeviceExternalBufferInfo* pExternalBufferInfo, VkExternalBufferProperties* pExternalBufferProperties); @@ -1065,6 +1073,11 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( uint32_t* pTimeDomainCount, VkTimeDomainEXT* pTimeDomains); +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceToolPropertiesEXT( + VkPhysicalDevice physicalDevice, + uint32_t* pToolCount, + VkPhysicalDeviceToolPropertiesEXT* pToolProperties); + VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceFragmentShadingRatesKHR( VkPhysicalDevice physicalDevice, uint32* pFragmentShadingRateCount, diff --git a/icd/api/include/vk_pipeline.h b/icd/api/include/vk_pipeline.h index 84d7110e..19ee5d5c 100644 --- a/icd/api/include/vk_pipeline.h +++ b/icd/api/include/vk_pipeline.h @@ -32,6 +32,7 @@ #include "include/vk_utils.h" #include "include/vk_defines.h" #include "include/vk_dispatch.h" +#include "include/vk_pipeline_layout.h" #include "include/internal_mem_mgr.h" #include "palFile.h" @@ -63,27 +64,6 @@ struct RuntimeSettings; struct ShaderStageInfo; struct ShaderModuleHandle; -// The top-level user data layout is portioned into different sections based on the value type (push constant, -// descriptor set addresses, etc.). This structure describes the offsets and sizes of those regions. -struct UserDataLayout -{ - // Base user data register index to use for the descriptor set binding data (including registers for - // dynamic descriptor offsets) - uint32_t setBindingRegBase; - // Number of user data registers used for the set binding points - uint32_t setBindingRegCount; - - // Base user data register index to use for push constants - uint32_t pushConstRegBase; - // Number of user data registers used for push constants - uint32_t pushConstRegCount; - - // Base user data register index to use for transform feedback. - uint32_t transformFeedbackRegBase; - // Number of user data registers used for transform feedback - uint32_t transformFeedbackRegCount; -}; - // Structure containing information about a retrievable pipeline binary. These are only retained by Pipeline objects // when specific device extensions (VK_AMD_shader_info/VK_KHR_pipeline_executable_properties) that can query them are // enabled. diff --git a/icd/api/include/vk_pipeline_layout.h b/icd/api/include/vk_pipeline_layout.h index e12b72ad..f1f014f3 100644 --- a/icd/api/include/vk_pipeline_layout.h +++ b/icd/api/include/vk_pipeline_layout.h @@ -36,17 +36,73 @@ #include "include/khronos/vulkan.h" #include "include/vk_device.h" -#include "include/vk_graphics_pipeline.h" -#include "include/vk_shader_code.h" - -#include "palPipeline.h" namespace vk { class DescriptorSetLayout; -class ShaderModule; +// Determine mapping layout of the resouces used in shaders +enum class PipelineLayoutScheme : uint32_t +{ + // Compact scheme make full use of all the user data registers and can achieve best performance in theory. + // See PipelineLayout::BuildCompactSchemeInfo() for more details + Compact = 0, + // The searching path of resouce belongs to a specific binding is fixed in indirect scheme. + // See PipelineLayout::BuildIndirectSchemeInfo() for more details + Indirect +}; + +// The top-level user data layout is portioned into different sections based on the value type (push constant, +// descriptor set addresses, etc.). This structure describes the offsets and sizes of those regions. +struct UserDataLayout +{ + PipelineLayoutScheme scheme; + + union + { + struct + { + // Base user data register index to use for the descriptor set binding data + // (including registers for dynamic descriptor offsets) + uint32_t setBindingRegBase; + // Number of user data registers used for the set binding points + uint32_t setBindingRegCount; + + // Base user data register index to use for push constants + uint32_t pushConstRegBase; + // Number of user data registers used for push constants + uint32_t pushConstRegCount; + + // Base user data register index to use for transform feedback. + uint32_t transformFeedbackRegBase; + // Number of user data registers used for transform feedback + uint32_t transformFeedbackRegCount; + + } compact; + + struct + { + // Base user data register index to use for transform feedback. + // The number of user data register used is always 1 + uint32_t transformFeedbackRegBase; + + // Base user data register index to use for the pointers pointing to the buffers + // storing descriptor set bingding data. + // Each set occupy 2 entries: one for static and one for descriptor descriptors + // The total number of user data registers used is always MaxDescriptorSets * 2 * SetPtrRegCount + uint32_t setBindingPtrRegBase; + + // Base user data register index to use for buffer storing push constant data + // The number of user data register used is always 1 + uint32_t pushConstPtrRegBase; + + // The size of buffer required to store push constants + uint32_t pushConstSizeInDword; + + } indirect; + }; +}; // ===================================================================================================================== // API implementation of Vulkan pipeline layout objects. @@ -117,9 +173,9 @@ class PipelineLayout final : public NonDispatchable(Util::VoidPtrInc(this, sizeof(*this)))[setIndex]; } // Original descriptor set layout pointers - const DescriptorSetLayout* GetSetLayouts(uint32_t setIndex) const + const DescriptorSetLayout* GetSetLayouts(const uint32_t setIndex) const { return static_cast( Util::VoidPtrInc(this, sizeof(*this) + SetUserDataLayoutSize()))[setIndex]; } - DescriptorSetLayout* GetSetLayouts(uint32_t setIndex) + DescriptorSetLayout* GetSetLayouts(const uint32_t setIndex) { return static_cast( Util::VoidPtrInc(this, sizeof(*this) + SetUserDataLayoutSize()))[setIndex]; @@ -176,6 +235,33 @@ class PipelineLayout final : public NonDispatchable + void FillDynamicSetNode( + const Vkgc::ResourceMappingNodeType type, + const uint32_t visibility, + const uint32_t setIndex, + const uint32_t bindingIndex, + const uint32_t offsetInDwords, + const uint32_t sizeInDwords, + const uint32_t userDataRegBase, + NodeType* pNode) const; + + template + void BuildLlpcDynamicSetMapping( + const DescriptorSetLayout* pLayout, + const uint32_t visibility, + const uint32_t setIndex, + const uint32_t userDataRegBase, + NodeType* pNodes, + uint32_t* pNodeCount) const; + + void BuildLlpcVertexBufferTableMapping( + const VbBindingInfo* pVbInfo, + const uint32_t offsetInDwords, + const uint32_t sizeInDwords, + Vkgc::ResourceMappingRootNode* pNode, + uint32_t* pNodeCount) const; + + void BuildLlpcTransformFeedbackMapping( + const uint32_t stageMask, + const uint32_t offsetInDwords, + const uint32_t sizeInDwords, + Vkgc::ResourceMappingRootNode* pNode, + uint32_t* pNodeCount) const; + VkResult BuildLlpcSetMapping( uint32_t visibility, uint32_t setIndex, diff --git a/icd/api/include/vk_shader.h b/icd/api/include/vk_shader.h index 505be418..b4daeda3 100644 --- a/icd/api/include/vk_shader.h +++ b/icd/api/include/vk_shader.h @@ -34,11 +34,6 @@ namespace Pal { enum class ResourceMappingNodeType : Pal::uint32; } -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 50 -// NOTE: Internal shader module create flag, please modify it if it conflict with vulkan header files. -#define VK_SHADER_MODULE_ENABLE_OPT_BIT 0x40000000u -#endif - namespace vk { diff --git a/icd/api/include/vk_swapchain.h b/icd/api/include/vk_swapchain.h index e42986a2..24ebbbc2 100644 --- a/icd/api/include/vk_swapchain.h +++ b/icd/api/include/vk_swapchain.h @@ -147,9 +147,10 @@ class SwapChain final : public NonDispatchable { return m_colorParams; } Pal::IGpuMemory* UpdatePresentInfo( - uint32_t deviceIdx, - uint32_t imageIndex, - Pal::PresentSwapChainInfo* pPresentInfo); + uint32_t deviceIdx, + uint32_t imageIndex, + Pal::PresentSwapChainInfo* pPresentInfo, + const Pal::FlipStatusFlags& flipFlags); Pal::IQueue* PrePresent( uint32_t deviceIdx, @@ -170,7 +171,13 @@ class SwapChain final : public NonDispatchable void SetHdrMetadata( const VkHdrMetadataEXT* pMetadata); - void MarkAsDeprecated(); + void MarkAsDeprecated( + const VkAllocationCallbacks* pAllocator); + + bool IsDxgiEnabled() const + { + return (m_properties.displayableInfo.palPlatform == Pal::WsiPlatform::Dxgi); + } bool IsSuboptimal(uint32_t deviceIdx); @@ -200,8 +207,6 @@ class SwapChain final : public NonDispatchable uint32_t m_queueFamilyIndex; // Queue family index of the last present - static bool s_forceTurboSyncEnable; // Force turbosync enable when synchronizing across swapchains - private: PAL_DISALLOW_COPY_AND_ASSIGN(SwapChain); }; @@ -259,8 +264,9 @@ class FullscreenMgr void Destroy(const VkAllocationCallbacks* pAllocator); void UpdatePresentInfo( - SwapChain* pSwapChain, - Pal::PresentSwapChainInfo* pPresentInfo); + SwapChain* pSwapChain, + Pal::PresentSwapChainInfo* pPresentInfo, + const Pal::FlipStatusFlags& flipFlags); ExclusiveModeFlags GetExclusiveModeFlags() const { return m_exclusiveModeFlags; } diff --git a/icd/api/include/vk_utils.h b/icd/api/include/vk_utils.h index 299e451f..761831cf 100644 --- a/icd/api/include/vk_utils.h +++ b/icd/api/include/vk_utils.h @@ -58,6 +58,7 @@ // Reuse some PAL macros here #define VK_ASSERT PAL_ASSERT +#define VK_ASSERT_MSG PAL_ASSERT_MSG #define VK_DEBUG_BUILD_ONLY_ASSERT PAL_DEBUG_BUILD_ONLY_ASSERT #define VK_ALERT PAL_ALERT #define VK_ALERT_ALWAYS_MSG PAL_ALERT_ALWAYS_MSG diff --git a/icd/api/internal_mem_mgr.cpp b/icd/api/internal_mem_mgr.cpp index 8100eb48..3e767adc 100644 --- a/icd/api/internal_mem_mgr.cpp +++ b/icd/api/internal_mem_mgr.cpp @@ -729,7 +729,9 @@ VkResult InternalMemMgr::AllocAndBindGpuMem( } } - createInfo.pal.heapCount = memReqs.heapCount; + createInfo.pal.flags.cpuInvisible = (memReqs.flags.cpuAccess ? 0 : 1); + createInfo.pal.heapCount = memReqs.heapCount; + for (uint32_t h = 0; h < memReqs.heapCount; ++h) { createInfo.pal.heaps[h] = memReqs.heaps[h]; diff --git a/icd/api/pipeline_compiler.cpp b/icd/api/pipeline_compiler.cpp index 8db86850..2498f062 100644 --- a/icd/api/pipeline_compiler.cpp +++ b/icd/api/pipeline_compiler.cpp @@ -555,6 +555,21 @@ VkResult PipelineCompiler::BuildShaderModule( return result; } +// ===================================================================================================================== +// Try to early compile shader if possible +void PipelineCompiler::TryEarlyCompileShaderModule( + const Device* pDevice, + ShaderModuleHandle* pModule) +{ + const uint32_t compilerMask = GetCompilerCollectionMask(); + + if (compilerMask & (1 << PipelineCompilerTypeLlpc)) + { + m_compilerSolutionLlpc.TryEarlyCompileShaderModule(pDevice, pModule); + } + +} + // ===================================================================================================================== // Check whether the shader module is valid bool PipelineCompiler::IsValidShaderModule( @@ -1127,6 +1142,32 @@ VkResult PipelineCompiler::CreateGraphicsPipelineBinary( return result; } +// ===================================================================================================================== +// Create ISA/relocable shader for a specific shader based on pipeline information +VkResult PipelineCompiler::CreateGraphicsShaderBinary( + const Device* pDevice, + const ShaderStage stage, + const GraphicsPipelineBinaryCreateInfo* pCreateInfo, + ShaderModuleHandle* pModule) +{ + VkResult result = VK_SUCCESS; + const uint32_t compilerMask = GetCompilerCollectionMask(); + + if (compilerMask & (1 << PipelineCompilerTypeLlpc)) + { + result = m_compilerSolutionLlpc.CreateGraphicsShaderBinary(pDevice, stage, pCreateInfo, pModule); + } + + return result; +} + +// ===================================================================================================================== +// Free and only free early compiled shader in ShaderModuleHandle +void PipelineCompiler::FreeGraphicsShaderBinary( + ShaderModuleHandle* pShaderModule) +{ +} + // ===================================================================================================================== // Creates compute pipeline binary. VkResult PipelineCompiler::CreateComputePipelineBinary( @@ -1471,16 +1512,6 @@ static void BuildRasterizationState( pCreateInfo->pipelineInfo.vpState.depthClipEnable = (pRs->depthClampEnable == VK_FALSE); pCreateInfo->pipelineInfo.rsState.rasterizerDiscardEnable = (pRs->rasterizerDiscardEnable != VK_FALSE); -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 48 - pCreateInfo->pipelineInfo.rsState.polygonMode = pRs->polygonMode; -#endif -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 47 - pCreateInfo->pipelineInfo.rsState.cullMode = pRs->cullMode; - pCreateInfo->pipelineInfo.rsState.frontFace = pRs->frontFace; -#endif -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 46 - pCreateInfo->pipelineInfo.rsState.depthBiasEnable = pRs->depthBiasEnable; -#endif if (pPipelineRasterizationDepthClipStateCreateInfoEXT != nullptr) { @@ -1513,42 +1544,21 @@ static void BuildRasterizationState( } // ===================================================================================================================== -static void BuildMultisampleState( +static void BuildMultisampleStateInFgs( const Device* pDevice, const VkPipelineMultisampleStateCreateInfo* pMs, const RenderPass* pRenderPass, const uint32_t subpass, - const uint32_t dynamicStateFlags, GraphicsPipelineBinaryCreateInfo* pCreateInfo) { if (pMs != nullptr) { - EXTRACT_VK_STRUCTURES_0( - SampleLocations, - PipelineSampleLocationsStateCreateInfoEXT, - static_cast(pMs->pNext), - PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); - if (pMs->rasterizationSamples != 1) { - uint32_t rasterizationSampleCount = pMs->rasterizationSamples; - - uint32_t subpassCoverageSampleCount = (pRenderPass != nullptr) ? - pRenderPass->GetSubpassMaxSampleCount(subpass) : - rasterizationSampleCount; - - uint32_t subpassColorSampleCount = (pRenderPass != nullptr) ? - pRenderPass->GetSubpassColorSampleCount(subpass) : - rasterizationSampleCount; - - // subpassCoverageSampleCount would be equal to zero if there are zero attachments. - subpassCoverageSampleCount = (subpassCoverageSampleCount == 0) ? - rasterizationSampleCount : - subpassCoverageSampleCount; - - subpassColorSampleCount = (subpassColorSampleCount == 0) ? - subpassCoverageSampleCount : - subpassColorSampleCount; + uint32_t subpassCoverageSampleCount; + uint32_t subpassColorSampleCount; + GraphicsPipelineCommon::GetSubpassSampleCount( + pMs, pRenderPass, subpass, &subpassCoverageSampleCount, &subpassColorSampleCount, nullptr); if (pMs->sampleShadingEnable && (pMs->minSampleShading > 0.0f)) { @@ -1560,7 +1570,7 @@ static void BuildMultisampleState( pCreateInfo->pipelineInfo.rsState.perSampleShading = false; } - pCreateInfo->pipelineInfo.rsState.numSamples = rasterizationSampleCount; + pCreateInfo->pipelineInfo.rsState.numSamples = pMs->rasterizationSamples; // NOTE: The sample pattern index here is actually the offset of sample position pair. This is // different from the field of creation info of image view. For image view, the sample pattern @@ -1569,18 +1579,24 @@ static void BuildMultisampleState( Device::GetDefaultSamplePatternIndex(subpassCoverageSampleCount) * Pal::MaxMsaaRasterizerSamples; } - pCreateInfo->pipelineInfo.cbState.alphaToCoverageEnable = (pMs->alphaToCoverageEnable == VK_TRUE); - if (pPipelineSampleLocationsStateCreateInfoEXT != nullptr) - { - pCreateInfo->sampleLocationGridSize = - pPipelineSampleLocationsStateCreateInfoEXT->sampleLocationsInfo.sampleLocationGridSize; - } + pCreateInfo->pipelineInfo.options.enableInterpModePatch = false; if (pCreateInfo->pipelineInfo.rsState.perSampleShading) { - const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); - if (!(pCreateInfo->sampleLocationGridSize.width > 1 || pCreateInfo->sampleLocationGridSize.height > 1 - )) + EXTRACT_VK_STRUCTURES_0( + SampleLocations, + PipelineSampleLocationsStateCreateInfoEXT, + static_cast(pMs->pNext), + PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); + + VkExtent2D gridSize = {}; + if (pPipelineSampleLocationsStateCreateInfoEXT != nullptr) + { + gridSize = pPipelineSampleLocationsStateCreateInfoEXT->sampleLocationsInfo.sampleLocationGridSize; + } + + if ((gridSize.width <= 1) && (gridSize.height <= 1) + ) { pCreateInfo->pipelineInfo.options.enableInterpModePatch = true; } @@ -1592,6 +1608,17 @@ static void BuildMultisampleState( } } +// ===================================================================================================================== +static void BuildMultisampleStateInFoi( + const VkPipelineMultisampleStateCreateInfo* pMs, + GraphicsPipelineBinaryCreateInfo* pCreateInfo) +{ + if (pMs != nullptr) + { + pCreateInfo->pipelineInfo.cbState.alphaToCoverageEnable = (pMs->alphaToCoverageEnable == VK_TRUE); + } +} + // ===================================================================================================================== static void BuildViewportState( const Device* pDevice, @@ -1628,20 +1655,10 @@ void PipelineCompiler::BuildNggState( pCreateInfo->pipelineInfo.nggState.enableGsUse = Util::TestAnyFlagSet( settings.enableNgg, GraphicsPipelineTypeGs | GraphicsPipelineTypeTessGs); -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 44 - pCreateInfo->pipelineInfo.nggState.forceNonPassthrough = settings.nggForceCullingMode; -#else pCreateInfo->pipelineInfo.nggState.forceCullingMode = settings.nggForceCullingMode; -#endif -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 47 - pCreateInfo->pipelineInfo.nggState.alwaysUsePrimShaderTable = settings.nggAlwaysUsePrimShaderTable; -#endif pCreateInfo->pipelineInfo.nggState.compactMode = static_cast(settings.nggCompactionMode); -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 45 - pCreateInfo->pipelineInfo.nggState.enableFastLaunch = false; -#endif pCreateInfo->pipelineInfo.nggState.enableVertexReuse = false; pCreateInfo->pipelineInfo.nggState.enableBackfaceCulling = (isConservativeOverestimation ? false : settings.nggEnableBackfaceCulling); @@ -1696,6 +1713,12 @@ static void BuildDepthStencilState( { if (pDs != nullptr) { + pCreateInfo->pipelineInfo.dsState.depthTestEnable = pDs->depthTestEnable; + pCreateInfo->pipelineInfo.dsState.depthWriteEnable = pDs->depthWriteEnable; + pCreateInfo->pipelineInfo.dsState.depthCompareOp = pDs->depthCompareOp; + pCreateInfo->pipelineInfo.dsState.front = pDs->front; + pCreateInfo->pipelineInfo.dsState.back = pDs->back; + pCreateInfo->pipelineInfo.dsState.stencilTestEnable = pDs->stencilTestEnable; } } @@ -1780,8 +1803,8 @@ static VkResult BuildPipelineResourceMapping( // LLPC can understand. result = pLayout->BuildLlpcPipelineMapping(stageMask, pVbInfo, - pCreateInfo->pTempBuffer, pCreateInfo->pipelineInfo.enableUberFetchShader, + pCreateInfo->pTempBuffer, &pCreateInfo->pipelineInfo.resourceMapping); } } @@ -1826,8 +1849,6 @@ static void BuildPipelineShadersInfo( GraphicsPipelineBinaryCreateInfo* pCreateInfo) { - pCreateInfo->flags = pIn->flags; - pDevice->GetCompiler(DefaultDeviceIndex)->ApplyPipelineOptions(pDevice, pIn->flags, &pCreateInfo->pipelineInfo.options); Vkgc::PipelineShaderInfo* ppShaderInfoOut[] = @@ -1858,6 +1879,7 @@ static void BuildPipelineShadersInfo( static void BuildColorBlendState( const Device* pDevice, const VkPipelineColorBlendStateCreateInfo* pCb, + const VkPipelineRenderingCreateInfoKHR* pRendering, const RenderPass* pRenderPass, const uint32_t subpass, GraphicsPipelineBinaryCreateInfo* pCreateInfo) @@ -1879,6 +1901,11 @@ static void BuildColorBlendState( { cbFormat = pRenderPass->GetColorAttachmentFormat(subpass, i); } + else if ((pRendering != nullptr) && + (i < pRendering->colorAttachmentCount)) + { + cbFormat = pRendering->pColorAttachmentFormats[i]; + } // If the sub pass attachment format is UNDEFINED, then it means that that subpass does not // want to write to any attachment for that output (VK_ATTACHMENT_UNUSED). Under such cases, @@ -1914,6 +1941,11 @@ static void BuildColorBlendState( { dbFormat = pRenderPass->GetDepthStencilAttachmentFormat(subpass); } + else if (pRendering != nullptr) + { + dbFormat = (pRendering->depthAttachmentFormat != VK_FORMAT_UNDEFINED) ? + pRendering->depthAttachmentFormat : pRendering->stencilAttachmentFormat; + } pCreateInfo->dbFormat = dbFormat; } @@ -2004,12 +2036,11 @@ static void BuildFragmentShaderState( const Device* pDevice, const VkGraphicsPipelineCreateInfo* pIn, const GraphicsPipelineShaderStageInfo* pShaderInfo, - const uint32_t dynamicStateFlags, GraphicsPipelineBinaryCreateInfo* pCreateInfo) { const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pIn->renderPass); - BuildMultisampleState(pDevice, pIn->pMultisampleState, pRenderPass, pIn->subpass, dynamicStateFlags, pCreateInfo); + BuildMultisampleStateInFgs(pDevice, pIn->pMultisampleState, pRenderPass, pIn->subpass, pCreateInfo); BuildDepthStencilState(pIn->pDepthStencilState, pCreateInfo); @@ -2026,37 +2057,43 @@ static void BuildFragmentOutputInterfaceState( { const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pIn->renderPass); + EXTRACT_VK_STRUCTURES_0( + dynamicRendering, + PipelineRenderingCreateInfoKHR, + reinterpret_cast(pIn->pNext), + PIPELINE_RENDERING_CREATE_INFO_KHR) + + BuildMultisampleStateInFoi(pIn->pMultisampleState, pCreateInfo); + BuildColorBlendState(pDevice, pIn->pColorBlendState, + pPipelineRenderingCreateInfoKHR, pRenderPass, pIn->subpass, pCreateInfo); pCreateInfo->pipelineInfo.iaState.enableMultiView = (pRenderPass != nullptr) ? pRenderPass->IsMultiviewEnabled() : - false; + ((pPipelineRenderingCreateInfoKHR != nullptr) && + (Util::CountSetBits(pPipelineRenderingCreateInfoKHR->viewMask) != 0)); } // ===================================================================================================================== -static VkResult BuildPipelineInternalBufferData( +static void BuildPipelineInternalBufferData( const Device* pDevice, GraphicsPipelineBinaryCreateInfo* pCreateInfo, PipelineInternalBufferInfo* pInternalBufferInfo) { PipelineCompiler* pDefaultCompiler = pDevice->GetCompiler(DefaultDeviceIndex); VK_ASSERT(pCreateInfo->pipelineInfo.enableUberFetchShader); - auto result = pDefaultCompiler->BuildPipelineInternalBufferData(pCreateInfo, - pInternalBufferInfo); - return result; + pDefaultCompiler->BuildPipelineInternalBufferData(pCreateInfo, pInternalBufferInfo); } // ===================================================================================================================== -static VkResult BuildExecutablePipelineState( +static void BuildExecutablePipelineState( const Device* pDevice, const VkGraphicsPipelineCreateInfo* pIn, const GraphicsPipelineShaderStageInfo* pShaderInfo, - const PipelineLayout* pPipelineLayout, const uint32_t dynamicStateFlags, GraphicsPipelineBinaryCreateInfo* pCreateInfo, - VbBindingInfo* pVbInfo, PipelineInternalBufferInfo* pInternalBufferInfo) { const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); @@ -2075,53 +2112,28 @@ static VkResult BuildExecutablePipelineState( pCreateInfo->pipelineInfo.cbState.dualSourceBlendEnable = false; } - Vkgc::PipelineShaderInfo* shaderInfos[] = - { - &pCreateInfo->pipelineInfo.vs, - &pCreateInfo->pipelineInfo.tcs, - &pCreateInfo->pipelineInfo.tes, - &pCreateInfo->pipelineInfo.gs, - &pCreateInfo->pipelineInfo.fs, - }; - - uint32_t availableStageMask = 0; + // Compiler info is required to be re-built here since we may need to change the compiler when all the states + // of an executable graphics pipeline are available. The shader mask here refers to the shader stages which + // are valid in this pipeline. + const Vkgc::GraphicsPipelineBuildInfo& pipelineInfo = pCreateInfo->pipelineInfo; + uint32_t shaderMask = 0; + shaderMask |= (pipelineInfo.vs.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageVertex) : 0; + shaderMask |= (pipelineInfo.tcs.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageTessControl) : 0; + shaderMask |= (pipelineInfo.tes.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageTessEval) : 0; + shaderMask |= (pipelineInfo.gs.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageGeometry) : 0; + shaderMask |= (pipelineInfo.fs.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageFragment) : 0; + BuildCompilerInfo(pDevice, pShaderInfo, shaderMask, pCreateInfo); - for (uint32_t stage = 0; stage < ShaderStage::ShaderStageGfxCount; ++stage) + if (pCreateInfo->compilerType == PipelineCompilerTypeLlpc) { - if (shaderInfos[stage]->pModuleData != nullptr) - { - availableStageMask |= (1 << stage); - } + pCreateInfo->pipelineInfo.enableUberFetchShader = false; } - VkResult result = BuildPipelineResourceMapping(pDevice, pPipelineLayout, availableStageMask, pVbInfo, pCreateInfo); - - if (result == VK_SUCCESS) + if (pCreateInfo->pipelineInfo.enableUberFetchShader) { - // Compiler info is required to be re-built here since we may need to change the compiler when all the states - // of an executable graphics pipeline are available. The shader mask here refers to the shader stages which - // are valid in this pipeline. - const Vkgc::GraphicsPipelineBuildInfo& pipelineInfo = pCreateInfo->pipelineInfo; - uint32_t shaderMask = 0; - shaderMask |= (pipelineInfo.vs.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageVertex) : 0; - shaderMask |= (pipelineInfo.tcs.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageTessControl) : 0; - shaderMask |= (pipelineInfo.tes.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageTessEval) : 0; - shaderMask |= (pipelineInfo.gs.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageGeometry) : 0; - shaderMask |= (pipelineInfo.fs.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageFragment) : 0; - BuildCompilerInfo(pDevice, pShaderInfo, shaderMask, pCreateInfo); - - if (pCreateInfo->compilerType == PipelineCompilerTypeLlpc) - { - pCreateInfo->pipelineInfo.enableUberFetchShader = false; - } - - if (pCreateInfo->pipelineInfo.enableUberFetchShader) - { - result = BuildPipelineInternalBufferData(pDevice, pCreateInfo, pInternalBufferInfo); - } + BuildPipelineInternalBufferData(pDevice, pCreateInfo, pInternalBufferInfo); } - return result; } // ===================================================================================================================== @@ -2147,6 +2159,8 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( pIn->pDynamicState ); + pCreateInfo->flags = pIn->flags; + BuildVertexInputInterfaceState(pDevice, pIn, dynamicStateFlags, pCreateInfo, pVbInfo); BuildPreRasterizationShaderState(pDevice, pIn, pShaderInfo, dynamicStateFlags, activeStages, pCreateInfo); @@ -2157,19 +2171,51 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( if (enableRasterization) { - BuildFragmentShaderState(pDevice, pIn, pShaderInfo, dynamicStateFlags, pCreateInfo); + BuildFragmentShaderState(pDevice, pIn, pShaderInfo, pCreateInfo); BuildFragmentOutputInterfaceState(pDevice, pIn, pCreateInfo); } { - result = BuildExecutablePipelineState( - pDevice, pIn, pShaderInfo, pPipelineLayout, dynamicStateFlags, pCreateInfo, pVbInfo, pInternalBufferInfo); + const Vkgc::PipelineShaderInfo* shaderInfos[] = + { + &pCreateInfo->pipelineInfo.vs, + &pCreateInfo->pipelineInfo.tcs, + &pCreateInfo->pipelineInfo.tes, + &pCreateInfo->pipelineInfo.gs, + &pCreateInfo->pipelineInfo.fs, + }; + + uint32_t availableStageMask = 0; + + for (uint32_t stage = 0; stage < ShaderStage::ShaderStageGfxCount; ++stage) + { + if (shaderInfos[stage]->pModuleData != nullptr) + { + availableStageMask |= (1 << stage); + } + } + result = BuildPipelineResourceMapping(pDevice, pPipelineLayout, availableStageMask, pVbInfo, pCreateInfo); + } + + if ((result == VK_SUCCESS) + ) + { + BuildExecutablePipelineState(pDevice, pIn, pShaderInfo, dynamicStateFlags, pCreateInfo, pInternalBufferInfo); } return result; } +// ===================================================================================================================== +// Fill partial pipeline binary info in GraphicsPipelineBinaryCreateInfo +void PipelineCompiler::SetPartialGraphicsPipelineBinaryInfo( + const ShaderModuleHandle* pShaderModuleHandle, + const ShaderStage stage, + GraphicsPipelineBinaryCreateInfo* pCreateInfo) +{ +} + // ===================================================================================================================== // Checks which compiler is used template @@ -2323,8 +2369,8 @@ VkResult PipelineCompiler::ConvertComputePipelineInfo( // LLPC can understand. result = pLayout->BuildLlpcPipelineMapping(Vkgc::ShaderStageComputeBit, nullptr, - pCreateInfo->pTempBuffer, false, + pCreateInfo->pTempBuffer, &pCreateInfo->pipelineInfo.resourceMapping); } } @@ -2599,18 +2645,15 @@ void PipelineCompiler::GetGraphicsPipelineCacheId( } // ===================================================================================================================== -VkResult PipelineCompiler::BuildPipelineInternalBufferData( +void PipelineCompiler::BuildPipelineInternalBufferData( GraphicsPipelineBinaryCreateInfo* pCreateInfo, PipelineInternalBufferInfo* pInternalBufferInfo) { - - VkResult result = VK_SUCCESS; if (pCreateInfo->compilerType == PipelineCompilerTypeLlpc) { VK_NOT_IMPLEMENTED; } - return result; } // ===================================================================================================================== diff --git a/icd/api/render_state_cache.cpp b/icd/api/render_state_cache.cpp index f4f896bc..7b91fd5e 100644 --- a/icd/api/render_state_cache.cpp +++ b/icd/api/render_state_cache.cpp @@ -66,8 +66,6 @@ RenderStateCache::RenderStateCache( m_scissorRectNextId(FirstStaticRenderStateToken), m_msaaStates(NumStateBuckets, pDevice->VkInstance()->Allocator()), m_msaaRefs(NumStateBuckets, pDevice->VkInstance()->Allocator()), - m_samplePattern(NumStateBuckets, pDevice->VkInstance()->Allocator()), - m_samplePatternNextId(FirstStaticRenderStateToken), m_colorBlendStates(NumStateBuckets, pDevice->VkInstance()->Allocator()), m_colorBlendRefs(NumStateBuckets, pDevice->VkInstance()->Allocator()), m_depthStencilStates(NumStateBuckets, pDevice->VkInstance()->Allocator()), @@ -134,11 +132,6 @@ VkResult RenderStateCache::Init() result = m_msaaRefs.Init(); } - if (result == Pal::Result::Success) - { - result = m_samplePattern.Init(); - } - if (result == Pal::Result::Success) { result = m_colorBlendStates.Init(); @@ -1034,29 +1027,6 @@ void RenderStateCache::DestroyScissorRect( &m_scissorRect); } -// ===================================================================================================================== -uint32_t RenderStateCache::CreateSamplePattern( - const SamplePattern& samplePattern) -{ - return CreateStaticParamsState( - OptRenderStateCacheStaticSamplePattern, - samplePattern, - &m_samplePattern, - &m_samplePatternNextId); -} - -// ===================================================================================================================== -void RenderStateCache::DestroySamplePattern( - const SamplePattern& samplePattern, - uint32_t token) -{ - return DestroyStaticParamsState( - OptRenderStateCacheStaticSamplePattern, - samplePattern, - token, - &m_samplePattern); -} - // ===================================================================================================================== uint32_t RenderStateCache::CreateLineStipple( const Pal::LineStippleStateParams& params) diff --git a/icd/api/strings/entry_points.txt b/icd/api/strings/entry_points.txt index a03242e5..bfdf41cb 100644 --- a/icd/api/strings/entry_points.txt +++ b/icd/api/strings/entry_points.txt @@ -376,6 +376,8 @@ vkSignalSemaphoreKHR @device @dext(KHR_timeli vkCmdBeginConditionalRenderingEXT @device @dext(EXT_conditional_rendering) vkCmdEndConditionalRenderingEXT @device @dext(EXT_conditional_rendering) +vkGetPhysicalDeviceToolPropertiesEXT @device @dext(EXT_tooling_info) + vkCmdSetEvent2KHR @device @dext(KHR_synchronization2) vkCmdResetEvent2KHR @device @dext(KHR_synchronization2) vkCmdWaitEvents2KHR @device @dext(KHR_synchronization2) @@ -384,6 +386,9 @@ vkCmdWriteTimestamp2KHR @device @dext(KHR_synchr vkQueueSubmit2KHR @device @dext(KHR_synchronization2) vkCmdWriteBufferMarker2AMD @device @dext(KHR_synchronization2) +vkCmdBeginRenderingKHR @device @dext(KHR_dynamic_rendering) +vkCmdEndRenderingKHR @device @dext(KHR_dynamic_rendering) + vkCmdSetCullModeEXT @device @dext(EXT_extended_dynamic_state) vkCmdSetFrontFaceEXT @device @dext(EXT_extended_dynamic_state) vkCmdSetPrimitiveTopologyEXT @device @dext(EXT_extended_dynamic_state) @@ -412,3 +417,7 @@ vkCmdCopyImage2KHR @device @dext(KHR_copy_c vkCmdCopyImageToBuffer2KHR @device @dext(KHR_copy_commands2) vkCmdResolveImage2KHR @device @dext(KHR_copy_commands2) +vkGetDeviceBufferMemoryRequirementsKHR @device @dext(KHR_maintenance4) +vkGetDeviceImageMemoryRequirementsKHR @device @dext(KHR_maintenance4) +vkGetDeviceImageSparseMemoryRequirementsKHR @device @dext(KHR_maintenance4) + diff --git a/icd/api/strings/extensions.txt b/icd/api/strings/extensions.txt index 4f2afb95..e1ab18fa 100644 --- a/icd/api/strings/extensions.txt +++ b/icd/api/strings/extensions.txt @@ -126,6 +126,7 @@ VK_KHR_timeline_semaphore VK_EXT_conditional_rendering VK_KHR_separate_depth_stencil_layouts VK_EXT_pipeline_creation_feedback +VK_EXT_tooling_info VK_EXT_shader_image_atomic_int64 VK_EXT_pipeline_creation_cache_control VK_KHR_sampler_ycbcr_conversion @@ -137,12 +138,17 @@ VK_EXT_extended_dynamic_state VK_KHR_shader_subgroup_uniform_control_flow VK_EXT_image_robustness VK_EXT_4444_formats +VK_EXT_border_color_swizzle VK_EXT_color_write_enable VK_KHR_shader_terminate_invocation VK_KHR_synchronization2 VK_EXT_primitive_topology_list_restart +VK_KHR_dynamic_rendering +VK_KHR_format_feature_flags2 VK_EXT_extended_dynamic_state2 VK_KHR_copy_commands2 VK_EXT_ycbcr_image_arrays VK_KHR_zero_initialize_workgroup_memory VK_EXT_load_store_op_none +VK_KHR_maintenance4 +VK_EXT_index_type_uint8 diff --git a/icd/api/vk_buffer.cpp b/icd/api/vk_buffer.cpp index a8bc8579..bfb7c358 100644 --- a/icd/api/vk_buffer.cpp +++ b/icd/api/vk_buffer.cpp @@ -370,6 +370,33 @@ void Buffer::GetMemoryRequirements( GetBufferMemoryRequirements(pDevice, &m_internalFlags, m_size, pMemoryRequirements); } +// ===================================================================================================================== +// Get the buffer's memory requirements from VkBufferCreateInfo +void Buffer::CalculateMemoryRequirements( + const Device* pDevice, + const VkDeviceBufferMemoryRequirementsKHR* pInfo, + VkMemoryRequirements2* pMemoryRequirements) +{ + BufferFlags bufferFlags; + + CalculateBufferFlags(pDevice, pInfo->pCreateInfo, &bufferFlags); + + VkMemoryDedicatedRequirements* pMemDedicatedRequirements = + static_cast(pMemoryRequirements->pNext); + + if ((pMemDedicatedRequirements != nullptr) && + (pMemDedicatedRequirements->sType == VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS)) + { + pMemDedicatedRequirements->prefersDedicatedAllocation = bufferFlags.dedicatedRequired; + pMemDedicatedRequirements->requiresDedicatedAllocation = bufferFlags.dedicatedRequired; + } + + GetBufferMemoryRequirements(pDevice, + &bufferFlags, + pInfo->pCreateInfo->size, + &pMemoryRequirements->memoryRequirements); +} + // ===================================================================================================================== // Get the buffer's memory requirements void Buffer::GetBufferMemoryRequirements( diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp index 5f17a9f8..dc9d69aa 100644 --- a/icd/api/vk_cmdbuffer.cpp +++ b/icd/api/vk_cmdbuffer.cpp @@ -1117,6 +1117,7 @@ VkResult CmdBuffer::Begin( VK_ASSERT(pBeginInfo->sType == VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO); VK_ASSERT(!m_flags.isRecording); + m_flags.isRenderingSuspended = false; m_flags.wasBegun = true; // Beginning a command buffer implicitly resets its state @@ -1130,6 +1131,8 @@ VkResult CmdBuffer::Begin( RenderPass* pRenderPass = nullptr; Framebuffer* pFramebuffer = nullptr; + const VkCommandBufferInheritanceRenderingInfoKHR* pInheritanceRenderingInfoKHR = nullptr; + m_cbBeginDeviceMask = m_pDevice->GetPalDeviceMask(); cmdInfo.flags.u32All = 0; @@ -1191,6 +1194,24 @@ VkResult CmdBuffer::Begin( inheritedStateParams.stateFlags.predication = pExtInfo->conditionalRenderingEnable; m_flags.hasConditionalRendering = pExtInfo->conditionalRenderingEnable; } + else if (pHeader->sType == VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO_KHR) + { + VK_ASSERT(m_flags.is2ndLvl); + + pInheritanceRenderingInfoKHR = static_cast(pNext); + + inheritedStateParams.colorTargetCount = pInheritanceRenderingInfoKHR->colorAttachmentCount; + inheritedStateParams.stateFlags.targetViewState = 1; + + for (uint32_t i = 0; i < inheritedStateParams.colorTargetCount; i++) + { + inheritedStateParams.colorTargetSwizzledFormats[i] = + VkToPalFormat(pInheritanceRenderingInfoKHR->pColorAttachmentFormats[i], + m_pDevice->GetRuntimeSettings()); + + inheritedStateParams.sampleCount[i] = pInheritanceRenderingInfoKHR->rasterizationSamples; + } + } pNext = pHeader->pNext; } @@ -1270,6 +1291,31 @@ VkResult CmdBuffer::Begin( m_renderPassInstance.subpass = currentSubPass; } + if (pInheritanceRenderingInfoKHR != nullptr) + { + m_allGpuState.dynamicRenderingInstance.viewMask = + pInheritanceRenderingInfoKHR->viewMask; + + m_allGpuState.dynamicRenderingInstance.colorAttachmentCount = + pInheritanceRenderingInfoKHR->colorAttachmentCount; + + for (uint32_t i = 0; i < m_allGpuState.dynamicRenderingInstance.colorAttachmentCount; ++i) + { + DynamicRenderingAttachments& dynamicAttachment = + m_allGpuState.dynamicRenderingInstance.colorAttachments[i]; + + dynamicAttachment.attachmentFormat = pInheritanceRenderingInfoKHR->pColorAttachmentFormats[i]; + dynamicAttachment.rasterizationSamples = pInheritanceRenderingInfoKHR->rasterizationSamples; + } + m_allGpuState.dynamicRenderingInstance.depthAttachment.attachmentFormat = + (pInheritanceRenderingInfoKHR->depthAttachmentFormat != VK_FORMAT_UNDEFINED) ? + pInheritanceRenderingInfoKHR->depthAttachmentFormat : + pInheritanceRenderingInfoKHR->stencilAttachmentFormat; + + m_allGpuState.dynamicRenderingInstance.depthAttachment.rasterizationSamples = + pInheritanceRenderingInfoKHR->rasterizationSamples; + } + // if input frame buffer object pointer is NULL, it means // either this is for a primary command buffer, or this is a secondary command buffer // and the command buffer will get the frame buffer object and execution time from @@ -1282,8 +1328,8 @@ VkResult CmdBuffer::Begin( m_flags.isRecording = true; - if (pRenderPass - ) // secondary VkCommandBuffer will be used inside VkRenderPass + if ((pRenderPass != nullptr) || (pInheritanceRenderingInfoKHR != nullptr)) + // secondary VkCommandBuffer will be used inside VkRenderPass { VK_ASSERT(m_flags.is2ndLvl); @@ -1420,6 +1466,8 @@ void CmdBuffer::ResetPipelineState() memset(&m_allGpuState.depthStencilCreateInfo, 0u, sizeof(m_allGpuState.depthStencilCreateInfo)); + memset(&m_allGpuState.samplePattern, 0u, sizeof(m_allGpuState.samplePattern)); + uint32_t bindIdx = 0; do @@ -1436,10 +1484,9 @@ void CmdBuffer::ResetPipelineState() } while (bindIdx < PipelineBindCount); - static_assert(VK_ARRAY_SIZE(m_allGpuState.palToApiPipeline) == 2, ""); - - m_allGpuState.palToApiPipeline[uint32_t(Pal::PipelineBindPoint::Compute)] = PipelineBindCompute; - m_allGpuState.palToApiPipeline[uint32_t(Pal::PipelineBindPoint::Graphics)] = PipelineBindGraphics; + m_allGpuState.palToApiPipeline[uint32_t(Pal::PipelineBindPoint::Compute)] = PipelineBindCompute; + m_allGpuState.palToApiPipeline[uint32_t(Pal::PipelineBindPoint::Graphics)] = PipelineBindGraphics; + static_assert(VK_ARRAY_SIZE(m_allGpuState.palToApiPipeline) == 2, "PAL PipelineBindPoint not handled"); const uint32_t numPalDevices = m_numPalDevices; uint32_t deviceIdx = 0; @@ -1564,9 +1611,7 @@ void CmdBuffer::RebindPipeline() Pal::PipelineBindPoint palBindPoint; - switch (bindPoint) - { - case PipelineBindCompute: + if (bindPoint == PipelineBindCompute) { const ComputePipeline* pPipeline = m_allGpuState.pComputePipeline; @@ -1597,10 +1642,8 @@ void CmdBuffer::RebindPipeline() } palBindPoint = Pal::PipelineBindPoint::Compute; - break; } - - case PipelineBindGraphics: + else if (bindPoint == PipelineBindGraphics) { const GraphicsPipeline* pPipeline = m_allGpuState.pGraphicsPipeline; @@ -1621,40 +1664,47 @@ void CmdBuffer::RebindPipeline() } palBindPoint = Pal::PipelineBindPoint::Graphics; - break; - }; + } - default: + else { VK_NEVER_CALLED(); } - break; - } RebindUserDataFlags rebindFlags = 0; - // Update the current owner of the compute PAL pipeline binding if we bound a pipeline - if ((fromBindPipeline == false) && - (palBindPoint == Pal::PipelineBindPoint::Compute)) - { - // If the ownership of the PAL binding is changing, the current user data belongs to the old binding and must - // be reloaded. - if (PalPipelineBindingOwnedBy(palBindPoint, bindPoint) == false) - { - rebindFlags |= RebindUserDataAll; - } + // In compact scheme, the top-level user data layout of two compatible pipeline layout may be different. + // Thus, pipeline layout needs to be checked and rebind the user data if needed. + // In indirect scheme, the top-level user data layout is always the same for all the pipeline layouts built + // in this scheme. So user data doesn't require to be rebind in this case. + // Pipeline layouts in different scheme can never be compatible. In this case, calling vkCmdBindDescriptorSets() + // to rebind descirptor sets is mandatory for user. + if ((pNewUserDataLayout->scheme == m_allGpuState.pipelineState[bindPoint].userDataLayout.scheme) && + (pNewUserDataLayout->scheme == PipelineLayoutScheme::Compact)) + { + // Update the current owner of the compute PAL pipeline binding if we bound a pipeline + if ((fromBindPipeline == false) && + (palBindPoint == Pal::PipelineBindPoint::Compute)) + { + // If the ownership of the PAL binding is changing, the current user data belongs to the old binding and must + // be reloaded. + if (PalPipelineBindingOwnedBy(palBindPoint, bindPoint) == false) + { + rebindFlags |= RebindUserDataAll; + } - m_allGpuState.palToApiPipeline[size_t(Pal::PipelineBindPoint::Compute)] = bindPoint; - } + m_allGpuState.palToApiPipeline[size_t(Pal::PipelineBindPoint::Compute)] = bindPoint; + } - // Graphics pipeline owner should always remain fixed, so we don't have to worry about reloading - // user data (for that reason) or ownership updates. - VK_ASSERT(PalPipelineBindingOwnedBy(Pal::PipelineBindPoint::Graphics, PipelineBindGraphics)); + // Graphics pipeline owner should always remain fixed, so we don't have to worry about reloading + // user data (for that reason) or ownership updates. + VK_ASSERT(PalPipelineBindingOwnedBy(Pal::PipelineBindPoint::Graphics, PipelineBindGraphics)); - // A user data layout switch may also require some user data to be reloaded (for both gfx and compute). - if (pNewUserDataLayout != nullptr) - { - rebindFlags |= SwitchUserDataLayouts(bindPoint, pNewUserDataLayout); + // A user data layout switch may also require some user data to be reloaded (for both gfx and compute). + if (pNewUserDataLayout != nullptr) + { + rebindFlags |= SwitchUserDataLayouts(bindPoint, pNewUserDataLayout); + } } // Reprogram the user data if necessary @@ -1727,13 +1777,15 @@ CmdBuffer::RebindUserDataFlags CmdBuffer::SwitchUserDataLayouts( const UserDataLayout* pNewUserDataLayout) { VK_ASSERT(pNewUserDataLayout != nullptr); + VK_ASSERT(pNewUserDataLayout->scheme == PipelineLayoutScheme::Compact); + VK_ASSERT(m_allGpuState.pipelineState[apiBindPoint].userDataLayout.scheme == PipelineLayoutScheme::Compact); PipelineBindState* pBindState = &m_allGpuState.pipelineState[apiBindPoint]; RebindUserDataFlags flags = 0; - const UserDataLayout& newUserDataLayout = *pNewUserDataLayout; - const UserDataLayout& curUserDataLayout = pBindState->userDataLayout; + const auto& newUserDataLayout = pNewUserDataLayout->compact; + const auto& curUserDataLayout = pBindState->userDataLayout.compact; // Rebind descriptor set bindings if necessary if ((newUserDataLayout.setBindingRegBase != curUserDataLayout.setBindingRegBase) | @@ -1750,7 +1802,7 @@ CmdBuffer::RebindUserDataFlags CmdBuffer::SwitchUserDataLayouts( } // Cache the new user data layout information - pBindState->userDataLayout = newUserDataLayout; + pBindState->userDataLayout = *pNewUserDataLayout; return flags; } @@ -1764,9 +1816,10 @@ void CmdBuffer::RebindUserData( RebindUserDataFlags flags) { VK_ASSERT(flags != 0); + VK_ASSERT(m_allGpuState.pipelineState[apiBindPoint].userDataLayout.scheme == PipelineLayoutScheme::Compact); - const PipelineBindState& bindState = m_allGpuState.pipelineState[apiBindPoint]; - const UserDataLayout& userDataLayout = bindState.userDataLayout; + const PipelineBindState& bindState = m_allGpuState.pipelineState[apiBindPoint]; + const auto& userDataLayout = bindState.userDataLayout.compact; if ((flags & RebindUserDataDescriptorSets) != 0) { @@ -1993,43 +2046,102 @@ void CmdBuffer::BindDescriptorSets( } } - // Figure out the total range of user data registers written by this sequence of descriptor set binds - const PipelineLayout::SetUserDataLayout& firstSetLayout = pLayout->GetSetUserData(firstSet); - const PipelineLayout::SetUserDataLayout& lastSetLayout = pLayout->GetSetUserData(firstSet + setCount - 1); + if (pLayout->GetScheme() == PipelineLayoutScheme::Compact) + { + // Figure out the total range of user data registers written by this sequence of descriptor set binds + const PipelineLayout::SetUserDataLayout& firstSetLayout = pLayout->GetSetUserData(firstSet); + const PipelineLayout::SetUserDataLayout& lastSetLayout = pLayout->GetSetUserData(firstSet + setCount - 1); + + const uint32_t rangeOffsetBegin = firstSetLayout.firstRegOffset; + const uint32_t rangeOffsetEnd = lastSetLayout.firstRegOffset + lastSetLayout.totalRegCount; - const uint32_t rangeOffsetBegin = firstSetLayout.firstRegOffset; - const uint32_t rangeOffsetEnd = lastSetLayout.firstRegOffset + lastSetLayout.totalRegCount; + // Update the high watermark of number of user data entries written for currently bound descriptor sets and + // their dynamic offsets in the current command buffer state. + pBindState->boundSetCount = Util::Max(pBindState->boundSetCount, rangeOffsetEnd); - // Update the high watermark of number of user data entries written for currently bound descriptor sets and - // their dynamic offsets in the current command buffer state. - pBindState->boundSetCount = Util::Max(pBindState->boundSetCount, rangeOffsetEnd); + // Descriptor set with zero resource binding is allowed in spec, so we need to check this and only proceed + // when there are at least 1 user data to update. + const uint32_t rangeRegCount = rangeOffsetEnd - rangeOffsetBegin; - // Descriptor set with zero resource binding is allowed in spec, so we need to check this and only proceed when - // there are at least 1 user data to update. - const uint32_t rangeRegCount = rangeOffsetEnd - rangeOffsetBegin; + if (rangeRegCount > 0) + { + // Program the user data register only if the current user data layout base matches that of the given + // layout. Otherwise, what's happening is that the application is binding descriptor sets for a future + // pipeline layout (e.g. at the top of the command buffer) and this register write will be redundant. + // A future vkCmdBindPipeline will reprogram the user data register. + if (PalPipelineBindingOwnedBy(palBindPoint, apiBindPoint) && + (pBindState->userDataLayout.compact.setBindingRegBase == + layoutInfo.userDataLayout.compact.setBindingRegBase)) + { + utils::IterateMask deviceGroup(m_curDeviceMask); + do + { + const uint32_t deviceIdx = deviceGroup.Index(); - if (rangeRegCount > 0) + PalCmdBuffer(deviceIdx)->CmdSetUserData( + palBindPoint, + pBindState->userDataLayout.compact.setBindingRegBase + rangeOffsetBegin, + rangeRegCount, + &(PerGpuState(deviceIdx)->setBindingData[apiBindPoint][rangeOffsetBegin])); + } + while (deviceGroup.IterateNext()); + } + } + } + else if (pLayout->GetScheme() == PipelineLayoutScheme::Indirect) { - // Program the user data register only if the current user data layout base matches that of the given - // layout. Otherwise, what's happening is that the application is binding descriptor sets for a future - // pipeline layout (e.g. at the top of the command buffer) and this register write will be redundant. A - // future vkCmdBindPipeline will reprogram the user data register. - if (PalPipelineBindingOwnedBy(palBindPoint, apiBindPoint) && - (pBindState->userDataLayout.setBindingRegBase == layoutInfo.userDataLayout.setBindingRegBase)) + const auto& userDataLayout = layoutInfo.userDataLayout.indirect; + + for (uint32_t setIdx = firstSet; setIdx < firstSet + setCount; ++setIdx) { + const PipelineLayout::SetUserDataLayout& setLayoutInfo = pLayout->GetSetUserData(setIdx); + utils::IterateMask deviceGroup(m_curDeviceMask); do { const uint32_t deviceIdx = deviceGroup.Index(); - PalCmdBuffer(deviceIdx)->CmdSetUserData( - palBindPoint, - pBindState->userDataLayout.setBindingRegBase + rangeOffsetBegin, - rangeRegCount, - &(PerGpuState(deviceIdx)->setBindingData[apiBindPoint][rangeOffsetBegin])); - } while (deviceGroup.IterateNext()); + if (setLayoutInfo.dynDescCount > 0) + { + const uint32_t dynBufferSizeDw = + setLayoutInfo.dynDescCount * DescriptorSetLayout::GetDynamicBufferDescDwSize(m_pDevice); + + Pal::gpusize gpuAddr; + + void* pCpuAddr = PalCmdBuffer(deviceIdx)->CmdAllocateEmbeddedData( + dynBufferSizeDw, + m_pDevice->GetProperties().descriptorSizes.alignment / sizeof(uint32_t), + &gpuAddr); + + const uint32_t gpuAddrLow = static_cast(gpuAddr); + + memcpy(pCpuAddr, + &(PerGpuState(deviceIdx)->setBindingData[apiBindPoint][setLayoutInfo.dynDescDataRegOffset]), + dynBufferSizeDw * sizeof(uint32_t)); + + PalCmdBuffer(deviceIdx)->CmdSetUserData( + palBindPoint, + userDataLayout.setBindingPtrRegBase + 2 * setIdx * PipelineLayout::SetPtrRegCount, + PipelineLayout::SetPtrRegCount, + &gpuAddrLow); + } + + if (setLayoutInfo.setPtrRegOffset != PipelineLayout::InvalidReg) + { + PalCmdBuffer(deviceIdx)->CmdSetUserData( + palBindPoint, + userDataLayout.setBindingPtrRegBase + (2 * setIdx + 1) * PipelineLayout::SetPtrRegCount, + PipelineLayout::SetPtrRegCount, + &(PerGpuState(deviceIdx)->setBindingData[apiBindPoint][setLayoutInfo.setPtrRegOffset])); + } + } + while (deviceGroup.IterateNext()); } } + else + { + VK_NEVER_CALLED(); + } } DbgBarrierPostCmd(DbgBarrierBindSetsPushConstants); @@ -2308,11 +2420,13 @@ void CmdBuffer::Draw( ValidateStates(); - PalCmdDraw(firstVertex, - vertexCount, - firstInstance, - instanceCount, - 0u); + { + PalCmdDraw(firstVertex, + vertexCount, + firstInstance, + instanceCount, + 0u); + } DbgBarrierPostCmd(DbgBarrierDrawNonIndexed); } @@ -2989,6 +3103,8 @@ void CmdBuffer::ClearDepthStencilImage( const Pal::ImageLayout layout = pImage->GetBarrierPolicy().GetTransferLayout( imageLayout, GetQueueFamilyIndex()); + ValidateSamplePattern(pImage->GetImageSamples(), nullptr); + for (uint32_t rangeIdx = 0; rangeIdx < rangeCount;) { uint32_t palRangeCount = 0; @@ -3041,6 +3157,19 @@ void CmdBuffer::ClearAttachments( uint32_t rectCount, const VkClearRect* pRects) { + // if pRenderPass is null, than dynamic rendering is being used + if (m_allGpuState.pRenderPass == nullptr) + { + if (m_flags.is2ndLvl == false) + { + ClearDynamicRenderingImages(attachmentCount, pAttachments, rectCount, pRects); + } + else + { + ClearDynamicRenderingBoundAttachments(attachmentCount, pAttachments, rectCount, pRects); + } + } + else { if ((m_flags.is2ndLvl == false) && (m_allGpuState.pFramebuffer != nullptr)) { @@ -3054,23 +3183,175 @@ void CmdBuffer::ClearAttachments( } // ===================================================================================================================== -// Clears a set of attachments in the current subpass using PAL's CmdClearBound*Targets commands. -void CmdBuffer::ClearBoundAttachments( +// Clears a set of attachments in the current dynamic rendering pass. +void CmdBuffer::ClearDynamicRenderingImages( uint32_t attachmentCount, const VkClearAttachment* pAttachments, uint32_t rectCount, const VkClearRect* pRects) { // Note: Bound target clears are pipelined by the HW, so we do not have to insert any barriers - VirtualStackFrame virtStackFrame(m_pStackAllocator); - // Get the current renderpass and subpass - const RenderPass* pRenderPass = m_allGpuState.pRenderPass; - const uint32_t subpass = m_renderPassInstance.subpass; + const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(*pRects)); - Util::Vector clearRegions { &virtStackFrame }; - Util::Vector colorTargets { &virtStackFrame }; + for (uint32_t idx = 0; idx < attachmentCount; ++idx) + { + const VkClearAttachment& clearInfo = pAttachments[idx]; + + // Detect if color clear or depth clear + if ((clearInfo.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0) + { + DynamicRenderingAttachments& attachment = + m_allGpuState.dynamicRenderingInstance.colorAttachments[clearInfo.colorAttachment]; + + // Clear only if the referenced attachment index is active + if ((attachment.pImageView != nullptr) && (attachment.pImageView->GetImage() != nullptr)) + { + const Image* pImage = attachment.pImageView->GetImage(); + + Util::Vector clearBoxes{ &virtStackFrame }; + Util::Vector clearSubresRanges{ &virtStackFrame }; + + auto rectBatch = Util::Min(rectCount, maxRects); + const auto palResult1 = clearBoxes.Reserve(rectBatch); + const auto palResult2 = clearSubresRanges.Reserve(rectBatch); + + if ((palResult1 == Pal::Result::Success) && + (palResult2 == Pal::Result::Success)) + { + for (uint32_t rectIdx = 0; rectIdx < rectCount; rectIdx += rectBatch) + { + // Obtain the baseArrayLayer of the image view to apply it when clearing the image itself. + const uint32_t zOffset = static_cast(attachment.pImageView->GetZRange().offset); + + rectBatch = Util::Min(rectCount - rectIdx, maxRects); + + CreateClearRegions( + rectCount, + (pRects + rectIdx), + m_allGpuState.dynamicRenderingInstance.viewMask, + zOffset, + &clearBoxes); + + CreateClearSubresRanges( + attachment.pImageView, + clearInfo, + rectCount, + pRects + rectIdx, + m_allGpuState.dynamicRenderingInstance.viewMask, + &clearSubresRanges); + + PalCmdClearColorImage( + *pImage, + attachment.imageLayout, + VkToPalClearColor(&clearInfo.clearValue.color, + VkToPalFormat(pImage->GetFormat(), m_pDevice->GetRuntimeSettings())), + clearSubresRanges.NumElements(), + clearSubresRanges.Data(), + clearBoxes.NumElements(), + clearBoxes.Data(), + Pal::ClearColorImageFlags::ColorClearAutoSync); + } + } + else + { + m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + } + else + { + DynamicRenderingAttachments& depthAttachment = m_allGpuState.dynamicRenderingInstance.depthAttachment; + DynamicRenderingAttachments& stencilAttachment = m_allGpuState.dynamicRenderingInstance.stencilAttachment; + + // Depth and Stencil Views are the same if both exist + Pal::ImageLayout imageLayout = {}; + const ImageView* pDepthStencilView = nullptr; + + if ((depthAttachment.pImageView != nullptr) && + ((clearInfo.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0)) + { + pDepthStencilView = depthAttachment.pImageView; + imageLayout = depthAttachment.imageLayout; + } + else if ((stencilAttachment.pImageView != nullptr) && + ((clearInfo.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0)) + { + pDepthStencilView = stencilAttachment.pImageView; + imageLayout = stencilAttachment.imageLayout; + } + + // Clear only if the referenced attachment index is active + if (pDepthStencilView != nullptr) + { + Util::Vector clearRects{ &virtStackFrame }; + Util::Vector clearSubresRanges{ &virtStackFrame }; + + auto rectBatch = Util::Min((rectCount * MaxPalDepthAspectsPerMask), maxRects); + const auto palResult1 = clearRects.Reserve(rectBatch); + const auto palResult2 = clearSubresRanges.Reserve(rectBatch); + + if ((palResult1 == Pal::Result::Success) && + (palResult2 == Pal::Result::Success)) + { + ValidateSamplePattern(pDepthStencilView->GetImage()->GetImageSamples(), nullptr); + + for (uint32_t rectIdx = 0; rectIdx < rectCount; rectIdx += rectBatch) + { + // Obtain the baseArrayLayer of the image view to apply it when clearing the image itself. + const uint32_t zOffset = static_cast(pDepthStencilView->GetZRange().offset); + + rectBatch = Util::Min(rectCount - rectIdx, maxRects); + + CreateClearRects( + rectCount, + (pRects + rectIdx), + &clearRects); + + CreateClearSubresRanges( + pDepthStencilView, + clearInfo, + rectCount, + pRects + rectIdx, + m_allGpuState.dynamicRenderingInstance.viewMask, + &clearSubresRanges); + + PalCmdClearDepthStencil( + *pDepthStencilView->GetImage(), + imageLayout, + imageLayout, + VkToPalClearDepth(clearInfo.clearValue.depthStencil.depth), + clearInfo.clearValue.depthStencil.stencil, + clearSubresRanges.NumElements(), + clearSubresRanges.Data(), + clearRects.NumElements(), + clearRects.Data(), + Pal::ClearDepthStencilFlags::DsClearAutoSync); + } + } + else + { + m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + } + } +} + +// ===================================================================================================================== +// Clears a set of attachments in the current renderpass using PAL's CmdClearBound*Targets commands. +void CmdBuffer::ClearDynamicRenderingBoundAttachments( + uint32_t attachmentCount, + const VkClearAttachment* pAttachments, + uint32_t rectCount, + const VkClearRect* pRects) +{ + // Note: Bound target clears are pipelined by the HW, so we do not have to insert any barriers + VirtualStackFrame virtStackFrame(m_pStackAllocator); + + Util::Vector clearRegions{ &virtStackFrame }; + Util::Vector colorTargets{ &virtStackFrame }; const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(*pRects)); auto rectBatch = Util::Min(rectCount, maxRects); @@ -3089,69 +3370,71 @@ void CmdBuffer::ClearBoundAttachments( // Detect if color clear or depth clear if ((clearInfo.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0) { - // Get the corresponding color reference in the current subpass - const AttachmentReference& colorRef = pRenderPass->GetSubpassColorReference( - subpass, clearInfo.colorAttachment); + DynamicRenderingAttachments& attachment = + m_allGpuState.dynamicRenderingInstance.colorAttachments[clearInfo.colorAttachment]; + + // Fill in bound target information for this target, but don't clear yet + const uint32_t tgtIdx = clearInfo.colorAttachment; // Clear only if the attachment reference is active - if (colorRef.attachment != VK_ATTACHMENT_UNUSED) + if (tgtIdx != VK_ATTACHMENT_UNUSED) { - // Fill in bound target information for this target, but don't clear yet - const uint32_t tgtIdx = clearInfo.colorAttachment; - Pal::BoundColorTarget target = {}; - target.targetIndex = tgtIdx; - target.swizzledFormat = VkToPalFormat(pRenderPass->GetColorAttachmentFormat(subpass, tgtIdx), - m_pDevice->GetRuntimeSettings()); - target.samples = pRenderPass->GetColorAttachmentSamples(subpass, tgtIdx); - target.fragments = pRenderPass->GetColorAttachmentSamples(subpass, tgtIdx); - target.clearValue = VkToPalClearColor(&clearInfo.clearValue.color, target.swizzledFormat); + target.targetIndex = tgtIdx; + const ImageView* pImageView = + m_allGpuState.dynamicRenderingInstance.colorAttachments[tgtIdx].pImageView; + target.swizzledFormat = VkToPalFormat( + ((pImageView != nullptr) ? + pImageView->GetViewFormat() : + m_allGpuState.dynamicRenderingInstance.colorAttachments[tgtIdx].attachmentFormat), + m_pDevice->GetRuntimeSettings()); + target.samples = + m_allGpuState.dynamicRenderingInstance.colorAttachments[tgtIdx].rasterizationSamples; + + target.fragments = + m_allGpuState.dynamicRenderingInstance.colorAttachments[tgtIdx].rasterizationSamples; + + target.clearValue = + VkToPalClearColor(&clearInfo.clearValue.color, target.swizzledFormat); colorTargets.PushBack(target); } } else // Depth-stencil clear { - // Get the corresponding color reference in the current subpass - const AttachmentReference& depthStencilRef = pRenderPass->GetSubpassDepthStencilReference(subpass); - - // Clear only if the attachment reference is active - if (depthStencilRef.attachment != VK_ATTACHMENT_UNUSED) - { - Pal::DepthStencilSelectFlags selectFlags = {}; - - selectFlags.depth = ((clearInfo.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0); - selectFlags.stencil = ((clearInfo.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0); - - DbgBarrierPreCmd(DbgBarrierClearDepth); - - for (uint32_t rectIdx = 0; rectIdx < rectCount; rectIdx += rectBatch) - { - rectBatch = Util::Min(rectCount - rectIdx, maxRects); - - uint32_t viewMask = pRenderPass->GetViewMask(subpass); + Pal::DepthStencilSelectFlags selectFlags = {}; - CreateClearRegions( - rectBatch, - pRects + rectIdx, - viewMask, - 0u, - &clearRegions); + selectFlags.depth = ((clearInfo.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0); + selectFlags.stencil = ((clearInfo.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0); - // Clear the bound depth stencil target immediately - PalCmdBuffer(DefaultDeviceIndex)->CmdClearBoundDepthStencilTargets( - VkToPalClearDepth(clearInfo.clearValue.depthStencil.depth), - clearInfo.clearValue.depthStencil.stencil, - StencilWriteMaskFull, - pRenderPass->GetDepthStencilAttachmentSamples(subpass), - pRenderPass->GetDepthStencilAttachmentSamples(subpass), - selectFlags, - clearRegions.NumElements(), - clearRegions.Data()); - } + DbgBarrierPreCmd(DbgBarrierClearDepth); - DbgBarrierPostCmd(DbgBarrierClearDepth); + for (uint32_t rectIdx = 0; rectIdx < rectCount; rectIdx += rectBatch) + { + rectBatch = Util::Min(rectCount - rectIdx, maxRects); + + uint32_t viewMask = m_allGpuState.dynamicRenderingInstance.viewMask; + + CreateClearRegions( + rectBatch, + pRects + rectIdx, + viewMask, + 0u, + &clearRegions); + + // Clear the bound depth stencil target immediately + PalCmdBuffer(DefaultDeviceIndex)->CmdClearBoundDepthStencilTargets( + VkToPalClearDepth(clearInfo.clearValue.depthStencil.depth), + clearInfo.clearValue.depthStencil.stencil, + StencilWriteMaskFull, + m_allGpuState.dynamicRenderingInstance.depthAttachment.rasterizationSamples, + m_allGpuState.dynamicRenderingInstance.depthAttachment.rasterizationSamples, + selectFlags, + clearRegions.NumElements(), + clearRegions.Data()); } + + DbgBarrierPostCmd(DbgBarrierClearDepth); } } @@ -3163,7 +3446,7 @@ void CmdBuffer::ClearBoundAttachments( { rectBatch = Util::Min(rectCount - rectIdx, maxRects); - uint32_t viewMask = pRenderPass->GetViewMask(subpass); + uint32_t viewMask = m_allGpuState.dynamicRenderingInstance.viewMask; CreateClearRegions( rectBatch, @@ -3186,9 +3469,141 @@ void CmdBuffer::ClearBoundAttachments( } // ===================================================================================================================== -void CmdBuffer::PalCmdClearColorImage( - const Image& image, - Pal::ImageLayout imageLayout, +// Clears a set of attachments in the current subpass using PAL's CmdClearBound*Targets commands. +void CmdBuffer::ClearBoundAttachments( + uint32_t attachmentCount, + const VkClearAttachment* pAttachments, + uint32_t rectCount, + const VkClearRect* pRects) +{ + // Note: Bound target clears are pipelined by the HW, so we do not have to insert any barriers + + VirtualStackFrame virtStackFrame(m_pStackAllocator); + + // Get the current renderpass and subpass + const RenderPass* pRenderPass = m_allGpuState.pRenderPass; + const uint32_t subpass = m_renderPassInstance.subpass; + + Util::Vector clearRegions { &virtStackFrame }; + Util::Vector colorTargets { &virtStackFrame }; + + const auto maxRects = EstimateMaxObjectsOnVirtualStack(sizeof(*pRects)); + auto rectBatch = Util::Min(rectCount, maxRects); + const auto palResult1 = clearRegions.Reserve(rectBatch); + const auto palResult2 = colorTargets.Reserve(attachmentCount); + + m_recordingResult = ((palResult1 == Pal::Result::Success) && + (palResult2 == Pal::Result::Success)) ? VK_SUCCESS : VK_ERROR_OUT_OF_HOST_MEMORY; + + if (m_recordingResult == VK_SUCCESS) + { + for (uint32_t idx = 0; idx < attachmentCount; ++idx) + { + const VkClearAttachment& clearInfo = pAttachments[idx]; + + // Detect if color clear or depth clear + if ((clearInfo.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0) + { + // Get the corresponding color reference in the current subpass + const AttachmentReference& colorRef = pRenderPass->GetSubpassColorReference( + subpass, clearInfo.colorAttachment); + + // Clear only if the attachment reference is active + if (colorRef.attachment != VK_ATTACHMENT_UNUSED) + { + // Fill in bound target information for this target, but don't clear yet + const uint32_t tgtIdx = clearInfo.colorAttachment; + + Pal::BoundColorTarget target = {}; + target.targetIndex = tgtIdx; + target.swizzledFormat = VkToPalFormat(pRenderPass->GetColorAttachmentFormat(subpass, tgtIdx), + m_pDevice->GetRuntimeSettings()); + target.samples = pRenderPass->GetColorAttachmentSamples(subpass, tgtIdx); + target.fragments = pRenderPass->GetColorAttachmentSamples(subpass, tgtIdx); + target.clearValue = VkToPalClearColor(&clearInfo.clearValue.color, target.swizzledFormat); + + colorTargets.PushBack(target); + } + } + else // Depth-stencil clear + { + // Get the corresponding color reference in the current subpass + const AttachmentReference& depthStencilRef = pRenderPass->GetSubpassDepthStencilReference(subpass); + + // Clear only if the attachment reference is active + if (depthStencilRef.attachment != VK_ATTACHMENT_UNUSED) + { + Pal::DepthStencilSelectFlags selectFlags = {}; + + selectFlags.depth = ((clearInfo.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0); + selectFlags.stencil = ((clearInfo.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0); + + DbgBarrierPreCmd(DbgBarrierClearDepth); + + for (uint32_t rectIdx = 0; rectIdx < rectCount; rectIdx += rectBatch) + { + rectBatch = Util::Min(rectCount - rectIdx, maxRects); + + uint32_t viewMask = pRenderPass->GetViewMask(subpass); + + CreateClearRegions( + rectBatch, + pRects + rectIdx, + viewMask, + 0u, + &clearRegions); + + // Clear the bound depth stencil target immediately + PalCmdBuffer(DefaultDeviceIndex)->CmdClearBoundDepthStencilTargets( + VkToPalClearDepth(clearInfo.clearValue.depthStencil.depth), + clearInfo.clearValue.depthStencil.stencil, + StencilWriteMaskFull, + pRenderPass->GetDepthStencilAttachmentSamples(subpass), + pRenderPass->GetDepthStencilAttachmentSamples(subpass), + selectFlags, + clearRegions.NumElements(), + clearRegions.Data()); + } + + DbgBarrierPostCmd(DbgBarrierClearDepth); + } + } + } + + if (colorTargets.NumElements() > 0) + { + DbgBarrierPreCmd(DbgBarrierClearColor); + + for (uint32_t rectIdx = 0; rectIdx < rectCount; rectIdx += rectBatch) + { + rectBatch = Util::Min(rectCount - rectIdx, maxRects); + + uint32_t viewMask = pRenderPass->GetViewMask(subpass); + + CreateClearRegions( + rectBatch, + pRects + rectIdx, + viewMask, + 0u, + &clearRegions); + + // Clear the bound color targets + PalCmdBuffer(DefaultDeviceIndex)->CmdClearBoundColorTargets( + colorTargets.NumElements(), + colorTargets.Data(), + clearRegions.NumElements(), + clearRegions.Data()); + } + + DbgBarrierPostCmd(DbgBarrierClearColor); + } + } +} + +// ===================================================================================================================== +void CmdBuffer::PalCmdClearColorImage( + const Image& image, + Pal::ImageLayout imageLayout, const Pal::ClearColor& color, uint32_t rangeCount, const Pal::SubresRange* pRanges, @@ -3452,6 +3867,8 @@ void CmdBuffer::ClearImageAttachments( if ((palResult1 == Pal::Result::Success) && (palResult2 == Pal::Result::Success)) { + ValidateSamplePattern(attachment.pImage->GetImageSamples(), nullptr); + for (uint32_t rectIdx = 0; rectIdx < rectCount; rectIdx += rectBatch) { rectBatch = Util::Min(rectCount - rectIdx, maxRects); @@ -3531,101 +3948,577 @@ void CmdBuffer::ResolveImage( const Pal::ImageLayout palDestImageLayout = pDstImage->GetBarrierPolicy().GetTransferLayout( destImageLayout, GetQueueFamilyIndex()); - for (uint32_t rectIdx = 0; rectIdx < rectCount;) + if (pSrcImage->IsDepthStencilFormat()) + { + ValidateSamplePattern(pSrcImage->GetImageSamples(), nullptr); + } + + for (uint32_t rectIdx = 0; rectIdx < rectCount;) + { + uint32_t palRegionCount = 0; + + while ((rectIdx < rectCount) && + (palRegionCount <= (rectBatch - MaxPalAspectsPerMask))) + { + // We expect MSAA images to never have mipmaps + VK_ASSERT(pRects[rectIdx].srcSubresource.mipLevel == 0); + + VkToPalImageResolveRegion(pRects[rectIdx], srcFormat.format, dstFormat.format, pPalRegions, &palRegionCount); + + ++rectIdx; + } + + PalCmdResolveImage( + *pSrcImage, + palSrcImageLayout, + *pDstImage, + palDestImageLayout, + Pal::ResolveMode::Average, + palRegionCount, + pPalRegions, + m_curDeviceMask); + } + + virtStackFrame.FreeArray(pPalRegions); + } + else + { + m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; + } + + PalCmdSuspendPredication(false); +} + +// ===================================================================================================================== +// Implementation of vkCmdSetEvent() +void CmdBuffer::SetEvent( + VkEvent event, + PipelineStageFlags stageMask) +{ + DbgBarrierPreCmd(DbgBarrierSetResetEvent); + + PalCmdSetEvent(Event::ObjectFromHandle(event), VkToPalSrcPipePoint(stageMask)); + + DbgBarrierPostCmd(DbgBarrierSetResetEvent); +} + +// ===================================================================================================================== +// Implementation of vkCmdSetEvent2KHR() +void CmdBuffer::SetEvent2( + VkEvent event, + const VkDependencyInfoKHR* pDependencyInfo) +{ + DbgBarrierPreCmd(DbgBarrierSetResetEvent); + + if (m_flags.useSplitReleaseAcquire) + { + utils::IterateMask deviceGroup(m_curDeviceMask); + do + { + ExecuteAcquireRelease(1, + &event, + deviceGroup.Index(), + 1, + pDependencyInfo, + Release, + RgpBarrierExternalCmdWaitEvents); + } + while (deviceGroup.IterateNext()); + } + else + { + PipelineStageFlags stageMask = 0; + + for(uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) + { + stageMask |= pDependencyInfo->pMemoryBarriers[i].srcStageMask; + } + + for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++) + { + stageMask |= pDependencyInfo->pBufferMemoryBarriers[i].srcStageMask; + } + + for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) + { + stageMask |= pDependencyInfo->pImageMemoryBarriers[i].srcStageMask; + } + + PalCmdSetEvent(Event::ObjectFromHandle(event), VkToPalSrcPipePoint(stageMask)); + } + + DbgBarrierPostCmd(DbgBarrierSetResetEvent); +} + +// ===================================================================================================================== +// Returns attachment's PAL subresource ranges defined by clearInfo for Dynamic Rendering LoadOp Clear. +// When multiview is enabled, layer ranges are modified according active views during a renderpass. +Util::Vector +LoadOpClearSubresRanges( + const uint32_t& viewMask, + const Pal::SubresRange& subresRange) +{ + // Note that no allocation will be performed, so Util::Vector allocator is nullptr. + Util::Vector clearSubresRanges{ nullptr }; + + if (viewMask > 0) + { + const auto layerRanges = RangesOfOnesInBitMask(viewMask); + + for (auto layerRangeIt = layerRanges.Begin(); layerRangeIt.IsValid(); layerRangeIt.Next()) + { + clearSubresRanges.PushBack(subresRange); + clearSubresRanges.Back().startSubres.arraySlice += layerRangeIt.Get().offset; + clearSubresRanges.Back().numSlices = layerRangeIt.Get().extent; + } + } + else + { + clearSubresRanges.PushBack(subresRange); + } + + return clearSubresRanges; +} + +// ===================================================================================================================== +// Clear Color for VK_KHR_dynamic_rendering +void CmdBuffer::LoadOpClearColor( + const Pal::Rect* pDeviceGroupRenderArea, + const VkRenderingInfoKHR* pRenderingInfo) +{ + for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; ++i) + { + const VkRenderingAttachmentInfoKHR& attachmentInfo = pRenderingInfo->pColorAttachments[i]; + + if (attachmentInfo.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) + { + // Get the image view from the attachment info + const ImageView* const pImageView = ImageView::ObjectFromHandle(attachmentInfo.imageView); + + // Get the attachment image + const Image* pImage = pImageView->GetImage(); + + // Convert the clear color to the format of the attachment view + Pal::ClearColor clearColor = VkToPalClearColor( + &(attachmentInfo.clearValue.color), + VkToPalFormat(pImageView->GetViewFormat(), m_pDevice->GetRuntimeSettings())); + + // Get subres range from the image view + Pal::SubresRange subresRange = {}; + pImageView->GetFrameBufferAttachmentSubresRange(&subresRange); + + const auto clearSubresRanges = LoadOpClearSubresRanges( + pRenderingInfo->viewMask, + subresRange); + + // Clear Layout + const Pal::ImageLayout clearLayout = pImage->GetBarrierPolicy().GetAspectLayout( + attachmentInfo.imageLayout, + subresRange.startSubres.plane, + GetQueueFamilyIndex(), + pImage->GetFormat()); + + utils::IterateMask deviceGroup(GetDeviceMask()); + + do + { + const uint32_t deviceIdx = deviceGroup.Index(); + + // Clear Box + Pal::Box clearBox = BuildClearBox( + pDeviceGroupRenderArea[deviceIdx], + *pImageView); + + PalCmdBuffer(deviceIdx)->CmdClearColorImage( + *pImage->PalImage(deviceIdx), + clearLayout, + clearColor, + clearSubresRanges.NumElements(), + clearSubresRanges.Data(), + 1, + &clearBox, + Pal::ColorClearAutoSync); + } + while (deviceGroup.IterateNext()); + } + } +} + +// ===================================================================================================================== +// Clear Depth Stencil for VK_KHR_dynamic_rendering +void CmdBuffer::LoadOpClearDepthStencil( + const Pal::Rect* pDeviceGroupRenderArea, + const VkRenderingInfoKHR* pRenderingInfo) +{ + // Note that no allocation will be performed, so Util::Vector allocator is nullptr. + Util::Vector clearSubresRanges{ nullptr }; + + const Image* pDepthStencilImage = nullptr; + + Pal::SubresRange subresRange = {}; + Pal::ImageLayout depthLayout = {}; + Pal::ImageLayout stencilLayout = {}; + + float clearDepth = 0.0f; + uint8 clearStencil = 0; + + const VkRenderingAttachmentInfoKHR* pDepthAttachmentInfo = pRenderingInfo->pDepthAttachment; + const VkRenderingAttachmentInfoKHR* pStencilAttachmentInfo = pRenderingInfo->pStencilAttachment; + + if ((pDepthAttachmentInfo != nullptr) && + (pDepthAttachmentInfo->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)) + { + const ImageView* const pDepthImageView = ImageView::ObjectFromHandle(pDepthAttachmentInfo->imageView); + + pDepthStencilImage = pDepthImageView->GetImage(); + + GetImageLayout( + pDepthAttachmentInfo->imageView, + pDepthAttachmentInfo->imageLayout, + VK_IMAGE_ASPECT_DEPTH_BIT, + &subresRange, + &depthLayout); + + clearSubresRanges.PushBack(subresRange); + + clearDepth = pDepthAttachmentInfo->clearValue.depthStencil.depth; + } + + if ((pStencilAttachmentInfo != nullptr) && + (pStencilAttachmentInfo->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)) + { + const ImageView* const pStencilImageView = ImageView::ObjectFromHandle(pStencilAttachmentInfo->imageView); + + pDepthStencilImage = pStencilImageView->GetImage(); + + GetImageLayout( + pStencilAttachmentInfo->imageView, + pStencilAttachmentInfo->imageLayout, + VK_IMAGE_ASPECT_STENCIL_BIT, + &subresRange, + &stencilLayout); + + clearSubresRanges.PushBack(subresRange); + + clearStencil = pStencilAttachmentInfo->clearValue.depthStencil.stencil; + } + + if (pDepthStencilImage != nullptr) + { + ValidateSamplePattern(pDepthStencilImage->GetImageSamples(), nullptr); + + utils::IterateMask deviceGroup(GetDeviceMask()); + + do + { + const uint32_t deviceIdx = deviceGroup.Index(); + + PalCmdBuffer(deviceIdx)->CmdClearDepthStencil( + *pDepthStencilImage->PalImage(deviceIdx), + depthLayout, + stencilLayout, + clearDepth, + clearStencil, + StencilWriteMaskFull, + clearSubresRanges.NumElements(), + clearSubresRanges.Data(), + 1, + &(pDeviceGroupRenderArea[deviceIdx]), + Pal::DsClearAutoSync); + } + while (deviceGroup.IterateNext()); + } +} + +// ===================================================================================================================== +// StoreAttachment for VK_KHR_dynamic_rendering +void CmdBuffer::StoreAttachmentInfo( + const VkRenderingAttachmentInfoKHR& renderingAttachmentInfo, + DynamicRenderingAttachments* pDynamicRenderingAttachement) +{ + const ImageView* const pImageView = ImageView::ObjectFromHandle(renderingAttachmentInfo.imageView); + + if(pImageView != nullptr) + { + const Image* pColorImage = pImageView->GetImage(); + + Pal::ImageLayout colorImageLayout = pColorImage->GetAttachmentLayout( + { renderingAttachmentInfo.imageLayout, 0 }, + 0, + this); + + pDynamicRenderingAttachement->resolveMode = renderingAttachmentInfo.resolveMode; + pDynamicRenderingAttachement->pImageView = pImageView; + pDynamicRenderingAttachement->imageLayout = colorImageLayout; + pDynamicRenderingAttachement->pResolveImageView = ImageView::ObjectFromHandle( + renderingAttachmentInfo.resolveImageView); + + if (pDynamicRenderingAttachement->pResolveImageView != nullptr) + { + const Image* pResolveImage = pDynamicRenderingAttachement->pResolveImageView->GetImage(); + + if (pResolveImage != nullptr) + { + pDynamicRenderingAttachement->resolveImageLayout = + pResolveImage->GetAttachmentLayout( + { renderingAttachmentInfo.resolveImageLayout, Pal::LayoutResolveDst }, 0, this); + } + } + } +} + +// ===================================================================================================================== +// vkCmdBeginRendering for VK_KHR_dynamic_rendering +void CmdBuffer::BeginRendering( + const VkRenderingInfoKHR* pRenderingInfo) +{ + VK_ASSERT(pRenderingInfo != nullptr); + + DbgBarrierPreCmd(DbgBarrierBeginRendering); + + bool isResuming = (pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT_KHR); + bool isSuspended = (pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT_KHR); + + bool skipEverything = isResuming && m_flags.isRenderingSuspended; + bool skipClears = isResuming && (m_flags.isRenderingSuspended == false); + + if (!skipEverything) + { + EXTRACT_VK_STRUCTURES_2( + RENDERING_INFO_KHR, + RenderingInfoKHR, + DeviceGroupRenderPassBeginInfo, + RenderingFragmentShadingRateAttachmentInfoKHR, + pRenderingInfo, + RENDER_PASS_BEGIN_INFO, + DEVICE_GROUP_RENDER_PASS_BEGIN_INFO, + RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR) + + bool replicateRenderArea = true; + + if (pDeviceGroupRenderPassBeginInfo != nullptr) + { + SetDeviceMask(pDeviceGroupRenderPassBeginInfo->deviceMask); + + m_allGpuState.dynamicRenderingInstance.renderAreaCount = + pDeviceGroupRenderPassBeginInfo->deviceRenderAreaCount; + + VK_ASSERT(m_allGpuState.dynamicRenderingInstance.renderAreaCount <= MaxPalDevices); + + VK_ASSERT(m_renderPassInstance.renderAreaCount <= MaxPalDevices); + + if (pDeviceGroupRenderPassBeginInfo->deviceRenderAreaCount > 0) + { + utils::IterateMask deviceGroup(pDeviceGroupRenderPassBeginInfo->deviceMask); + + VK_ASSERT(m_numPalDevices == pDeviceGroupRenderPassBeginInfo->deviceRenderAreaCount); + + do + { + const uint32_t deviceIdx = deviceGroup.Index(); + + const VkRect2D& srcRect = pDeviceGroupRenderPassBeginInfo->pDeviceRenderAreas[deviceIdx]; + auto* pDstRect = &m_allGpuState.dynamicRenderingInstance.renderArea[deviceIdx]; + + *pDstRect = VkToPalRect(srcRect); + } + while (deviceGroup.IterateNext()); + + replicateRenderArea = false; + } + } + + if (replicateRenderArea) + { + m_allGpuState.dynamicRenderingInstance.renderAreaCount = m_numPalDevices; + + const auto& srcRect = pRenderingInfo->renderArea; + + for (uint32_t deviceIdx = 0; deviceIdx < m_numPalDevices; deviceIdx++) + { + auto* pDstRect = &m_allGpuState.dynamicRenderingInstance.renderArea[deviceIdx]; + + *pDstRect = VkToPalRect(srcRect); + } + } + + Pal::GlobalScissorParams scissorParams = {}; + scissorParams.scissorRegion = VkToPalRect(pRenderingInfo->renderArea); + + utils::IterateMask deviceGroup(GetDeviceMask()); + do + { + const uint32_t deviceIdx = deviceGroup.Index(); + PalCmdBuffer(deviceIdx)->CmdSetGlobalScissor(scissorParams); + } + while (deviceGroup.IterateNext()); + + if (!skipClears) { - uint32_t palRegionCount = 0; + LoadOpClearColor( + m_allGpuState.dynamicRenderingInstance.renderArea, + pRenderingInfo); - while ((rectIdx < rectCount) && - (palRegionCount <= (rectBatch - MaxPalAspectsPerMask))) - { - // We expect MSAA images to never have mipmaps - VK_ASSERT(pRects[rectIdx].srcSubresource.mipLevel == 0); + LoadOpClearDepthStencil( + m_allGpuState.dynamicRenderingInstance.renderArea, + pRenderingInfo); + } - VkToPalImageResolveRegion(pRects[rectIdx], srcFormat.format, dstFormat.format, pPalRegions, &palRegionCount); + BindTargets( + pRenderingInfo, + pRenderingFragmentShadingRateAttachmentInfoKHR); - ++rectIdx; - } + uint32_t numMultiViews = Util::CountSetBits(pRenderingInfo->viewMask); + uint32_t viewInstanceMask = (numMultiViews > 0) ? pRenderingInfo->viewMask : GetDeviceMask(); + PalCmdBuffer(DefaultDeviceIndex)->CmdSetViewInstanceMask(viewInstanceMask); + } - PalCmdResolveImage( - *pSrcImage, - palSrcImageLayout, - *pDstImage, - palDestImageLayout, - Pal::ResolveMode::Average, - palRegionCount, - pPalRegions, - m_curDeviceMask); - } + m_allGpuState.dynamicRenderingInstance.viewMask = pRenderingInfo->viewMask; + m_allGpuState.dynamicRenderingInstance.colorAttachmentCount = pRenderingInfo->colorAttachmentCount; - virtStackFrame.FreeArray(pPalRegions); + for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; ++i) + { + const VkRenderingAttachmentInfoKHR& colorAttachmentInfo = pRenderingInfo->pColorAttachments[i]; + + StoreAttachmentInfo( + colorAttachmentInfo, + &m_allGpuState.dynamicRenderingInstance.colorAttachments[i]); } - else + + if (pRenderingInfo->pDepthAttachment != nullptr) { - m_recordingResult = VK_ERROR_OUT_OF_HOST_MEMORY; + const VkRenderingAttachmentInfoKHR& depthAttachmentInfo = *pRenderingInfo->pDepthAttachment; + + StoreAttachmentInfo( + depthAttachmentInfo, + &m_allGpuState.dynamicRenderingInstance.depthAttachment); } - PalCmdSuspendPredication(false); -} + if (pRenderingInfo->pStencilAttachment != nullptr) + { + const VkRenderingAttachmentInfoKHR& stencilAttachmentInfo = *pRenderingInfo->pStencilAttachment; -// ===================================================================================================================== -// Implementation of vkCmdSetEvent() -void CmdBuffer::SetEvent( - VkEvent event, - PipelineStageFlags stageMask) -{ - DbgBarrierPreCmd(DbgBarrierSetResetEvent); + StoreAttachmentInfo( + stencilAttachmentInfo, + &m_allGpuState.dynamicRenderingInstance.stencilAttachment); + } - PalCmdSetEvent(Event::ObjectFromHandle(event), VkToPalSrcPipePoint(stageMask)); + m_flags.isRenderingSuspended = isSuspended; - DbgBarrierPostCmd(DbgBarrierSetResetEvent); + DbgBarrierPostCmd(DbgBarrierBeginRendering); } // ===================================================================================================================== -// Implementation of vkCmdSetEvent2KHR() -void CmdBuffer::SetEvent2( - VkEvent event, - const VkDependencyInfoKHR* pDependencyInfo) +// Call resolve image for VK_KHR_dynamic_rendering +void CmdBuffer::ResolveImage( + const DynamicRenderingAttachments& dynamicRenderingAttachments) { - DbgBarrierPreCmd(DbgBarrierSetResetEvent); + // Save PAL Resolve Region + VkImageAspectFlags aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - if (m_flags.useSplitReleaseAcquire) + Pal::ImageResolveRegion regions[MaxPalDevices] = {}; + + for (uint32_t idx = 0; idx < m_allGpuState.dynamicRenderingInstance.renderAreaCount; idx++) { - utils::IterateMask deviceGroup(m_curDeviceMask); - do + const Pal::Rect& renderArea = m_allGpuState.dynamicRenderingInstance.renderArea[idx]; + Pal::SubresRange subresRangeSrc = {}; + Pal::SubresRange subresRangeDst = {}; + + dynamicRenderingAttachments.pResolveImageView->GetFrameBufferAttachmentSubresRange(&subresRangeDst); + dynamicRenderingAttachments.pImageView->GetFrameBufferAttachmentSubresRange(&subresRangeSrc); + + const uint32_t sliceCount = Util::Min(subresRangeSrc.numSlices, + subresRangeDst.numSlices); + + regions[idx].swizzledFormat = Pal::UndefinedSwizzledFormat; + regions[idx].extent.width = renderArea.extent.width; + regions[idx].extent.height = renderArea.extent.height; + regions[idx].extent.depth = 1; + regions[idx].numSlices = 1; + regions[idx].srcOffset.x = renderArea.offset.x; + regions[idx].srcOffset.y = renderArea.offset.y; + regions[idx].srcOffset.z = 0; + regions[idx].dstOffset.x = renderArea.offset.x; + regions[idx].dstOffset.y = renderArea.offset.y; + regions[idx].dstOffset.z = 0; + regions[idx].dstMipLevel = subresRangeDst.startSubres.mipLevel; + regions[idx].dstSlice = subresRangeDst.startSubres.arraySlice; + regions[idx].numSlices = sliceCount; + } + + PalCmdResolveImage( + *dynamicRenderingAttachments.pImageView->GetImage(), + dynamicRenderingAttachments.imageLayout, + *dynamicRenderingAttachments.pResolveImageView->GetImage(), + dynamicRenderingAttachments.resolveImageLayout, + VkToPalResolveMode(dynamicRenderingAttachments.resolveMode), + m_allGpuState.dynamicRenderingInstance.renderAreaCount, + regions, + m_curDeviceMask); + + if (dynamicRenderingAttachments.pResolveImageView->GetImage()->IsDepthStencilFormat() && + dynamicRenderingAttachments.pImageView->GetImage()->IsDepthStencilFormat()) + { + for (uint32_t idx = 0; idx < m_allGpuState.dynamicRenderingInstance.renderAreaCount; idx++) { - ExecuteAcquireRelease(1, - &event, - deviceGroup.Index(), - 1, - pDependencyInfo, - Release, - RgpBarrierExternalCmdWaitEvents); + regions[idx].srcPlane = 1; + regions[idx].dstPlane = 1; } - while (deviceGroup.IterateNext()); + + ValidateSamplePattern(dynamicRenderingAttachments.pImageView->GetImage()->GetImageSamples(), nullptr); + + PalCmdResolveImage( + *dynamicRenderingAttachments.pImageView->GetImage(), + dynamicRenderingAttachments.imageLayout, + *dynamicRenderingAttachments.pResolveImageView->GetImage(), + dynamicRenderingAttachments.resolveImageLayout, + VkToPalResolveMode(dynamicRenderingAttachments.resolveMode), + m_allGpuState.dynamicRenderingInstance.renderAreaCount, + regions, + m_curDeviceMask); } - else - { - PipelineStageFlags stageMask = 0; +} - for(uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) +// ===================================================================================================================== +// vkCmdEndRendering for VK_KHR_dynamic_rendering +void CmdBuffer::EndRendering() +{ + DbgBarrierPreCmd(DbgBarrierEndRenderPass); + + // Only do resolves if renderpass isn't suspended + if (m_flags.isRenderingSuspended == false) + { + // Resolve Color Images + for (uint32_t i = 0; i < m_allGpuState.dynamicRenderingInstance.colorAttachmentCount; ++i) { - stageMask |= pDependencyInfo->pMemoryBarriers[i].srcStageMask; + DynamicRenderingAttachments& renderingAttachmentInfo = + m_allGpuState.dynamicRenderingInstance.colorAttachments[i]; + + if (renderingAttachmentInfo.pResolveImageView != nullptr) + { + ResolveImage(renderingAttachmentInfo); + } } - for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++) + // Resolve Depth Image + if (m_allGpuState.dynamicRenderingInstance.depthAttachment.pResolveImageView != nullptr) { - stageMask |= pDependencyInfo->pBufferMemoryBarriers[i].srcStageMask; + ResolveImage(m_allGpuState.dynamicRenderingInstance.depthAttachment); } - for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) + // Resolve Stencil Image + if (m_allGpuState.dynamicRenderingInstance.stencilAttachment.pResolveImageView != nullptr) { - stageMask |= pDependencyInfo->pImageMemoryBarriers[i].srcStageMask; + ResolveImage(m_allGpuState.dynamicRenderingInstance.stencilAttachment); } - - PalCmdSetEvent(Event::ObjectFromHandle(event), VkToPalSrcPipePoint(stageMask)); } - DbgBarrierPostCmd(DbgBarrierSetResetEvent); + // Reset attachment counts at End of Rendering + m_allGpuState.dynamicRenderingInstance.colorAttachmentCount = 0; + m_allGpuState.dynamicRenderingInstance.depthAttachment = {}; + m_allGpuState.dynamicRenderingInstance.stencilAttachment = {}; + + DbgBarrierPostCmd(DbgBarrierEndRenderPass); } // ===================================================================================================================== @@ -4778,11 +5671,11 @@ void CmdBuffer::BeginQueryIndexed( // // Implementations may write the total result to the first query and // write zero to the other queries. - if (((pRenderPass != nullptr) && pRenderPass->IsMultiviewEnabled()) - ) + if (((pRenderPass != nullptr) && pRenderPass->IsMultiviewEnabled()) || + (m_allGpuState.dynamicRenderingInstance.viewMask != 0)) { const auto viewMask = (pRenderPass != nullptr) ? pRenderPass->GetViewMask(m_renderPassInstance.subpass) : - 0; + m_allGpuState.dynamicRenderingInstance.viewMask; const auto viewCount = Util::CountSetBits(viewMask); @@ -5319,11 +6212,11 @@ void CmdBuffer::WriteTimestamp( // // The first query is a timestamp value and (if more than one bit is set in the view mask) // zero is written to the remaining queries. - if (((pRenderPass != nullptr) && pRenderPass->IsMultiviewEnabled()) - ) + if (((pRenderPass != nullptr) && pRenderPass->IsMultiviewEnabled()) || + (m_allGpuState.dynamicRenderingInstance.viewMask != 0)) { const auto viewMask = (pRenderPass != nullptr) ? pRenderPass->GetViewMask(m_renderPassInstance.subpass) : - 0; + m_allGpuState.dynamicRenderingInstance.viewMask; const auto viewCount = Util::CountSetBits(viewMask); VK_ASSERT(viewCount > 0); @@ -5363,28 +6256,11 @@ void CmdBuffer::SetSampleLocations( uint32_t sampleLocationsPerPixel = (uint32_t)pSampleLocationsInfo->sampleLocationsPerPixel; ConvertToPalMsaaQuadSamplePattern(pSampleLocationsInfo, &locations); - PalCmdSetMsaaQuadSamplePattern(sampleLocationsPerPixel, locations); - - m_allGpuState.staticTokens.samplePattern = DynamicRenderStateToken; -} -// ===================================================================================================================== -// Programs the current GPU sample pattern to the one belonging to the given subpass in a current render pass instance -void CmdBuffer::RPInitSamplePattern() -{ - const RenderPass* pRenderPass = m_allGpuState.pRenderPass; - - if (pRenderPass->GetAttachmentCount() > 0) - { - const SamplePattern* pSamplePattern = &m_renderPassInstance.pSamplePatterns[0]; + m_allGpuState.samplePattern.sampleCount = sampleLocationsPerPixel; + m_allGpuState.samplePattern.locations = locations; - if (pSamplePattern->sampleCount > 0) - { - PalCmdSetMsaaQuadSamplePattern( - pSamplePattern->sampleCount, - pSamplePattern->locations); - } - } + m_allGpuState.dirtyGraphics.samplePattern = 1; } // ===================================================================================================================== @@ -5496,6 +6372,8 @@ void CmdBuffer::BeginRenderPass( if (m_renderPassInstance.pAttachments != nullptr) { m_renderPassInstance.maxAttachmentCount = maxAttachmentCount; + memset(m_renderPassInstance.pAttachments, 0, + maxAttachmentCount * sizeof(RenderPassInstanceState::AttachmentState)); } else { @@ -5599,26 +6477,6 @@ void CmdBuffer::BeginRenderPass( } } - for (uint32_t subpassIndex = 0; subpassIndex < subpassCount; subpassIndex++) - { - const uint32_t subpassMaxSampleCount = - m_allGpuState.pRenderPass->GetSubpassMaxSampleCount(m_renderPassInstance.subpass); - - if (subpassMaxSampleCount > 0) - { - // If sample patterns are set in a bound pipeline, use those as the defaults - const Pal::MsaaQuadSamplePattern* pipelineSampleLocations = - ((m_allGpuState.pGraphicsPipeline != nullptr) && - m_allGpuState.pGraphicsPipeline->CustomSampleLocationsEnabled()) ? - m_allGpuState.pGraphicsPipeline->GetSampleLocations() : nullptr; - - // Set render pass instance sample patterns - m_renderPassInstance.pSamplePatterns[subpassIndex].sampleCount = subpassMaxSampleCount; - m_renderPassInstance.pSamplePatterns[subpassIndex].locations = (pipelineSampleLocations != nullptr) ? - *pipelineSampleLocations : *Device::GetDefaultQuadSamplePattern(subpassMaxSampleCount); - } - } - if (pRenderPassSampleLocationsBeginInfoEXT != nullptr) { uint32_t attachmentInitialSampleLocationCount = @@ -5667,9 +6525,6 @@ void CmdBuffer::BeginRenderPass( } } - // Initialize sample pattern - RPInitSamplePattern(); - // Begin the first subpass m_renderPassInstance.pExecuteInfo = m_allGpuState.pRenderPass->GetExecuteInfo(); @@ -5826,6 +6681,7 @@ void CmdBuffer::RPBeginSubpass() // Bind targets RPBindTargets(subpass.begin.bindTargets); } + RPLoadOpClearDepthStencil(subpass.begin.loadOps.dsClearCount, subpass.begin.loadOps.pDsClears); } @@ -6163,6 +7019,10 @@ void CmdBuffer::RPLoadOpClearDepthStencil( Pal::SubresRange subresRange; attachment.pView->GetFrameBufferAttachmentSubresRange(&subresRange); + ValidateSamplePattern( + attachment.pImage->GetImageSamples(), + &m_renderPassInstance.pAttachments[clear.attachment].initialSamplePattern); + do { const uint32_t deviceIdx = deviceGroup.Index(); @@ -6319,6 +7179,13 @@ void CmdBuffer::RPResolveAttachments( } } + if (srcAttachment.pImage->IsDepthStencilFormat()) + { + ValidateSamplePattern( + srcAttachment.pImage->GetImageSamples(), + &m_renderPassInstance.pSamplePatterns[m_renderPassInstance.subpass]); + } + // Depth and stencil might have different resolve mode, so allowing resolve each aspect independently. for (uint32_t aspectRegionIndex = 0; aspectRegionIndex < aspectRegionCount; ++aspectRegionIndex) { @@ -6440,6 +7307,161 @@ void CmdBuffer::RPBindTargets( while (deviceGroup.IterateNext()); } +// ===================================================================================================================== +// Get Pal Image aspect layout from imageView +void CmdBuffer::GetImageLayout( + VkImageView imageView, + VkImageLayout imageLayout, + VkImageAspectFlags aspectMask, + Pal::SubresRange* palSubresRange, + Pal::ImageLayout* palImageLayout) +{ + // Get the image view from the attachment info + const ImageView* const pImageView = ImageView::ObjectFromHandle(imageView); + + // Get the attachment image + const Image* pImage = pImageView->GetImage(); + + // Get subres range from the image view + pImageView->GetFrameBufferAttachmentSubresRange(palSubresRange); + + palSubresRange->startSubres.plane = VkToPalImagePlaneSingle( + pImage->GetFormat(), + aspectMask, + m_pDevice->GetRuntimeSettings()); + + // Get the Depth Layout from the view image + *palImageLayout = pImage->GetBarrierPolicy().GetAspectLayout( + imageLayout, + palSubresRange->startSubres.plane, + GetQueueFamilyIndex(), + pImage->GetFormat()); +} + +// ===================================================================================================================== +// Binds color/depth targets for VK_KHR_dynamic_rendering +void CmdBuffer::BindTargets( + const VkRenderingInfoKHR* pRenderingInfo, + const VkRenderingFragmentShadingRateAttachmentInfoKHR* pRenderingFragmentShadingRateAttachmentInfoKHR) +{ + Pal::BindTargetParams params = {}; + + params.colorTargetCount = pRenderingInfo->colorAttachmentCount; + + static constexpr Pal::ImageLayout NullLayout = {}; + + utils::IterateMask deviceGroup(GetDeviceMask()); + do + { + const uint32_t deviceIdx = deviceGroup.Index(); + + for (uint32_t i = 0; i < params.colorTargetCount; ++i) + { + const VkRenderingAttachmentInfoKHR& renderingAttachmentInfo = pRenderingInfo->pColorAttachments[i]; + + if (renderingAttachmentInfo.imageView != VK_NULL_HANDLE) + { + // Get the image view from the attachment info + const ImageView* const pImageView = ImageView::ObjectFromHandle(renderingAttachmentInfo.imageView); + + // Get the attachment image + const Image* pImage = pImageView->GetImage(); + + params.colorTargets[i].pColorTargetView = pImageView->PalColorTargetView(deviceIdx); + + RPImageLayout imageLayout = + { + renderingAttachmentInfo.imageLayout, + 0 + }; + + params.colorTargets[i].imageLayout = + pImage->GetAttachmentLayout( + imageLayout, + 0, + this); + + } + else + { + params.colorTargets[i].pColorTargetView = nullptr; + params.colorTargets[i].imageLayout = NullLayout; + + } + } + + const VkRenderingAttachmentInfoKHR* pStencilAttachmentInfo = pRenderingInfo->pStencilAttachment; + + if ((pStencilAttachmentInfo != nullptr) && + (pStencilAttachmentInfo->imageView != VK_NULL_HANDLE)) + { + const ImageView* const pStencilImageView = + ImageView::ObjectFromHandle(pStencilAttachmentInfo->imageView); + + Pal::SubresRange subresRange = {}; + Pal::ImageLayout stencilLayout = {}; + + GetImageLayout( + pStencilAttachmentInfo->imageView, + pStencilAttachmentInfo->imageLayout, + VK_IMAGE_ASPECT_STENCIL_BIT, + &subresRange, + &stencilLayout); + + params.depthTarget.pDepthStencilView = pStencilImageView->PalDepthStencilView(deviceIdx); + params.depthTarget.stencilLayout = stencilLayout; + } + else + { + params.depthTarget.pDepthStencilView = nullptr; + params.depthTarget.stencilLayout = NullLayout; + } + + const VkRenderingAttachmentInfoKHR* pDepthAttachmentInfo = pRenderingInfo->pDepthAttachment; + + if ((pDepthAttachmentInfo != nullptr) && + (pDepthAttachmentInfo->imageView != VK_NULL_HANDLE)) + { + const ImageView* const pDepthImageView = + ImageView::ObjectFromHandle(pDepthAttachmentInfo->imageView); + + Pal::SubresRange subresRange = {}; + Pal::ImageLayout depthLayout = {}; + + GetImageLayout( + pDepthAttachmentInfo->imageView, + pDepthAttachmentInfo->imageLayout, + VK_IMAGE_ASPECT_DEPTH_BIT, + &subresRange, + &depthLayout); + + params.depthTarget.pDepthStencilView = pDepthImageView->PalDepthStencilView(deviceIdx); + params.depthTarget.depthLayout = depthLayout; + } + else + { + // Set the depthLayout for stencil only formats to avoid incorrect PAL asserts. + params.depthTarget.depthLayout = params.depthTarget.stencilLayout; + } + + PalCmdBuffer(deviceIdx)->CmdBindTargets(params); + + if (pRenderingFragmentShadingRateAttachmentInfoKHR != nullptr) + { + // Get the image view from the attachment info + const ImageView* const pImageView = + ImageView::ObjectFromHandle(pRenderingFragmentShadingRateAttachmentInfoKHR->imageView); + + // Get the attachment image + const Image* pImage = pImageView->GetImage(); + + PalCmdBuffer(deviceIdx)->CmdBindSampleRateImage(pImage->PalImage(deviceIdx)); + } + + } + while (deviceGroup.IterateNext()); +} + // ===================================================================================================================== // Sets view instance mask for a subpass during a render pass instance (on devices within passed in device mask). void CmdBuffer::SetViewInstanceMask( @@ -6451,6 +7473,10 @@ void CmdBuffer::SetViewInstanceMask( { subpassViewMask = m_allGpuState.pRenderPass->GetViewMask(m_renderPassInstance.subpass); } + else if (m_allGpuState.dynamicRenderingInstance.viewMask > 0) + { + subpassViewMask = m_allGpuState.dynamicRenderingInstance.viewMask; + } utils::IterateMask deviceGroup(deviceMask); @@ -6549,27 +7575,61 @@ void CmdBuffer::WritePushConstants( const UserDataLayout& userDataLayout = pLayout->GetInfo().userDataLayout; - // Program the user data register only if the current user data layout base matches that of the given - // layout. Otherwise, what's happening is that the application is pushing constants for a future - // pipeline layout (e.g. at the top of the command buffer) and this register write will be redundant because - // a future vkCmdBindPipeline will reprogram the user data registers during the rebase. - if (PalPipelineBindingOwnedBy(palBindPoint, apiBindPoint) && - (pBindState->userDataLayout.pushConstRegBase == userDataLayout.pushConstRegBase) && - (pBindState->userDataLayout.pushConstRegCount >= (startInDwords + lengthInDwords))) + if (userDataLayout.scheme == PipelineLayoutScheme::Compact) + { + // Program the user data register only if the current user data layout base matches that of the given + // layout. Otherwise, what's happening is that the application is pushing constants for a future + // pipeline layout (e.g. at the top of the command buffer) and this register write will be redundant because + // a future vkCmdBindPipeline will reprogram the user data registers during the rebase. + if (PalPipelineBindingOwnedBy(palBindPoint, apiBindPoint) && + (pBindState->userDataLayout.compact.pushConstRegBase == userDataLayout.compact.pushConstRegBase) && + (pBindState->userDataLayout.compact.pushConstRegCount >= (startInDwords + lengthInDwords))) + { + utils::IterateMask deviceGroup(m_curDeviceMask); + do + { + const uint32_t deviceIdx = deviceGroup.Index(); + + PalCmdBuffer(deviceIdx)->CmdSetUserData( + palBindPoint, + pBindState->userDataLayout.compact.pushConstRegBase + startInDwords, + lengthInDwords, + pUserDataPtr); + } + while (deviceGroup.IterateNext()); + } + } + else if (userDataLayout.scheme == PipelineLayoutScheme::Indirect) { utils::IterateMask deviceGroup(m_curDeviceMask); + do { const uint32_t deviceIdx = deviceGroup.Index(); + Pal::gpusize gpuAddr; + + void* pCpuAddr = PalCmdBuffer(deviceIdx)->CmdAllocateEmbeddedData( + userDataLayout.indirect.pushConstSizeInDword, + m_pDevice->GetProperties().descriptorSizes.alignment / sizeof(uint32_t), + &gpuAddr); + + memcpy(pCpuAddr, pUserData, userDataLayout.indirect.pushConstSizeInDword * sizeof(uint32_t)); + + const uint32_t gpuAddrLow = static_cast(gpuAddr); + PalCmdBuffer(deviceIdx)->CmdSetUserData( palBindPoint, - pBindState->userDataLayout.pushConstRegBase + startInDwords, - lengthInDwords, - pUserDataPtr); + userDataLayout.indirect.pushConstPtrRegBase, + PipelineLayout::SetPtrRegCount, + &gpuAddrLow); } while (deviceGroup.IterateNext()); } + else + { + VK_NEVER_CALLED(); + } } // ===================================================================================================================== @@ -7293,7 +8353,19 @@ void CmdBuffer::ValidateStates() { DbgBarrierPreCmd(DbgBarrierSetDynamicPipelineState); - PalCmdBuffer(deviceIdx)->CmdSetViewports(PerGpuState(deviceIdx)->viewport); + const GraphicsPipeline* pGraphicsPipeline = m_allGpuState.pGraphicsPipeline; + + const bool isPointSizeUsed = pGraphicsPipeline->IsPointSizeUsed(); + Pal::ViewportParams viewport = PerGpuState(deviceIdx)->viewport; + if (isPointSizeUsed) + { + // The default vaule is 1.0f which means the guardband is disabled. + // Values more than 1.0f enable guardband. + viewport.horzDiscardRatio = 10.0f; + viewport.vertDiscardRatio = 10.0f; + } + + PalCmdBuffer(deviceIdx)->CmdSetViewports(viewport); DbgBarrierPostCmd(DbgBarrierSetDynamicPipelineState); } @@ -7433,6 +8505,13 @@ void CmdBuffer::ValidateStates() DbgBarrierPostCmd(DbgBarrierSetDynamicPipelineState); } + + if (m_allGpuState.dirtyGraphics.samplePattern && (m_allGpuState.samplePattern.sampleCount != 0)) + { + PalCmdBuffer(deviceGroup.Index())->CmdSetMsaaQuadSamplePattern( + m_allGpuState.samplePattern.sampleCount, + m_allGpuState.samplePattern.locations); + } } while (deviceGroup.IterateNext()); @@ -7441,6 +8520,57 @@ void CmdBuffer::ValidateStates() } } +// ===================================================================================================================== +void CmdBuffer::ValidateSamplePattern( + uint32_t sampleCount, + SamplePattern* pSamplePattern) +{ + if (m_palQueueType == Pal::QueueTypeUniversal) + { + // if the current sample count is different than the current state, + // use the sample pattern passed in or the default one + if (sampleCount != m_allGpuState.samplePattern.sampleCount) + { + const Pal::MsaaQuadSamplePattern* pLocations; + + if (pSamplePattern != nullptr && (pSamplePattern->sampleCount > 0)) + { + VK_ASSERT(sampleCount == pSamplePattern->sampleCount); + + PalCmdSetMsaaQuadSamplePattern(pSamplePattern->sampleCount, pSamplePattern->locations); + pLocations = &pSamplePattern->locations; + } + else + { + pLocations = Device::GetDefaultQuadSamplePattern(sampleCount); + PalCmdSetMsaaQuadSamplePattern(sampleCount, *pLocations); + } + + // If the current state doesn't have a valid sample count/pattern, update to this and clear the dirty bit. + // Otherwise, we have to assume that a draw may be issued next depending on the previous sample pattern. + if (m_allGpuState.samplePattern.sampleCount == 0) + { + m_allGpuState.samplePattern.sampleCount = sampleCount; + m_allGpuState.samplePattern.locations = *pLocations; + m_allGpuState.dirtyGraphics.samplePattern = 0; + } + else + { + m_allGpuState.dirtyGraphics.samplePattern = 1; + } + } + // set current sample pattern in the hardware if it hasn't been set yet + else if (m_allGpuState.dirtyGraphics.samplePattern) + { + PalCmdSetMsaaQuadSamplePattern( + m_allGpuState.samplePattern.sampleCount, + m_allGpuState.samplePattern.locations); + + m_allGpuState.dirtyGraphics.samplePattern = 0; + } + } +} + // ===================================================================================================================== void CmdBuffer::SetCullModeEXT( VkCullModeFlags cullMode) @@ -8651,6 +9781,21 @@ VKAPI_ATTR void VKAPI_CALL vkCmdWriteBufferMarker2AMD( ApiCmdBuffer::ObjectFromHandle(commandBuffer)->WriteBufferMarker(stage, dstBuffer, dstOffset, marker); } +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkCmdBeginRenderingKHR( + VkCommandBuffer commandBuffer, + const VkRenderingInfoKHR* pRenderingInfo) +{ + ApiCmdBuffer::ObjectFromHandle(commandBuffer)->BeginRendering(pRenderingInfo); +} + +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkCmdEndRenderingKHR( + VkCommandBuffer commandBuffer) +{ + ApiCmdBuffer::ObjectFromHandle(commandBuffer)->EndRendering(); +} + // ===================================================================================================================== VKAPI_ATTR void VKAPI_CALL vkCmdSetCullModeEXT( VkCommandBuffer commandBuffer, diff --git a/icd/api/vk_conv.cpp b/icd/api/vk_conv.cpp index c111b877..66e28d11 100644 --- a/icd/api/vk_conv.cpp +++ b/icd/api/vk_conv.cpp @@ -385,7 +385,6 @@ VK_TO_PAL_DECL_LOOKUP_TABLE(PRIMITIVE_TOPOLOGY, PrimitiveTopology VK_TO_PAL_DECL_LOOKUP_TABLE(FORMAT, SwizzledFormat ) VK_TO_PAL_DECL_LOOKUP_TABLE(PRIMITIVE_TOPOLOGY, PrimitiveType ) VK_TO_PAL_DECL_LOOKUP_TABLE_COMPLEX(QUERY_TYPE, PalQueryTypePool, QueryTypePool ) -VK_TO_PAL_DECL_LOOKUP_TABLE(INDEX_TYPE, IndexType ) VK_TO_PAL_DECL_LOOKUP_TABLE(IMAGE_VIEW_TYPE, ImageViewType ) VK_TO_PAL_DECL_LOOKUP_TABLE(LOGIC_OP, LogicOp ) VK_TO_PAL_DECL_LOOKUP_TABLE(SAMPLER_ADDRESS_MODE, TexAddressMode ) diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index e5e2adee..9be35db1 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -348,8 +348,9 @@ static void ConstructQueueCreateInfo( bool useComputeAsTransferQueue, bool isTmzQueue) { + const auto& palProperties = (*pPhysicalDevices)->PalProperties(); const Pal::QueuePriority palQueuePriority = - VkToPalGlobalPriority(queuePriority); + VkToPalGlobalPriority(queuePriority, palProperties.engineProperties[queueFamilyIndex].capabilities[queueIndex]); // Some configs can use this feature with any priority, but it's not useful for // lower priorities. @@ -510,6 +511,7 @@ VkResult Device::Create( bool privateDataEnabled = false; size_t privateDataSize = 0; bool bufferDeviceAddressMultiDeviceEnabled = false; + bool maintenance4Enabled = false; const VkStructHeader* pHeader = nullptr; @@ -680,7 +682,7 @@ VkResult Device::Create( break; } - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PAGEABLE_DEVICE_LOCAL_MEMORY_FEATURES_EXT: + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PAGEABLE_DEVICE_LOCAL_MEMORY_FEATURES_EXT: { if (reinterpret_cast( pHeader)->pageableDeviceLocalMemory) @@ -691,6 +693,17 @@ VkResult Device::Create( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES_KHR: + { + if (reinterpret_cast( + pHeader)->maintenance4) + { + maintenance4Enabled = true; + } + + break; + } + default: break; } @@ -715,8 +728,12 @@ VkResult Device::Create( pCreateInfo->pEnabledFeatures, true); } + } - // Release the stack allocator + if (palResult == Pal::Result::Success) + { + // Don't free pStackAllocator before virtStackFrame's destructor + // Otherwise memory corruption when multi-threads are using same linear allocator pInstance->StackMgr()->ReleaseAllocator(pStackAllocator); } else @@ -1117,7 +1134,8 @@ VkResult Device::Create( scalarBlockLayoutEnabled, extendedRobustnessEnabled, bufferDeviceAddressMultiDeviceEnabled, - pageableDeviceLocalMemory); + pageableDeviceLocalMemory, + maintenance4Enabled); // If we've failed to Initialize, make sure we destroy anything we might have allocated. if (vkResult != VK_SUCCESS) @@ -1152,7 +1170,8 @@ VkResult Device::Initialize( bool scalarBlockLayoutEnabled, const ExtendedRobustness& extendedRobustnessEnabled, bool bufferDeviceAddressMultiDeviceEnabled, - bool pageableDeviceLocalMemory) + bool pageableDeviceLocalMemory, + bool maintenance4Enabled) { // Initialize the internal memory manager VkResult result = m_internalMemMgr.Init(); @@ -1270,6 +1289,17 @@ VkResult Device::Initialize( m_enabledFeatures.appControlledMemPriority = true; } + if ((m_settings.strictImageSizeRequirements == StrictImageSizeOn) || + ((m_settings.strictImageSizeRequirements == StrictImageSizeAppControlled) && + maintenance4Enabled)) + { + m_enabledFeatures.strictImageSizeRequirements = true; + } + else + { + m_enabledFeatures.strictImageSizeRequirements = false; + } + // If VkPhysicalDeviceBufferDeviceAddressFeaturesEXT.bufferDeviceAddressMultiDevice is enabled // and if globalGpuVaSupport is supported and if multiple devices are used set the global GpuVa. m_useGlobalGpuVa = (bufferDeviceAddressMultiDeviceEnabled && @@ -2427,7 +2457,21 @@ VkResult Device::CreateImage( const VkAllocationCallbacks* pAllocator, VkImage* pImage) { - return Image::Create(this, pCreateInfo, pAllocator, pImage); + VkResult result = Image::Create(this, pCreateInfo, pAllocator, pImage); + + if (result == VK_SUCCESS) + { + Image* pCreatedImage = Image::ObjectFromHandle(*pImage); + + pCreatedImage->SetMemoryRequirementsAtCreate(this); + + if (m_enabledFeatures.strictImageSizeRequirements && Formats::IsDepthStencilFormat(pCreateInfo->format)) + { + Image::CalculateAlignedMemoryRequirements(this, pCreateInfo, pCreatedImage); + } + } + + return result; } // ===================================================================================================================== @@ -2512,12 +2556,14 @@ VkResult Device::CreateComputePipelines( { const VkComputePipelineCreateInfo* pCreateInfo = &pCreateInfos[i]; - VkResult result = ComputePipeline::Create( - this, - pPipelineCache, - pCreateInfo, - pAllocator, - &pPipelines[i]); + VkResult result = VK_SUCCESS; + + result = ComputePipeline::Create( + this, + pPipelineCache, + pCreateInfo, + pAllocator, + &pPipelines[i]); if (result != VK_SUCCESS) { @@ -2666,7 +2712,7 @@ VkResult Device::BindBufferMemory( // ===================================================================================================================== VkResult Device::BindImageMemory( uint32_t bindInfoCount, - const VkBindImageMemoryInfo* pBindInfos) const + const VkBindImageMemoryInfo* pBindInfos) { for (uint32 bindIdx = 0; bindIdx < bindInfoCount; bindIdx++) { @@ -3588,6 +3634,13 @@ Pal::Result Device::CreatePalQueue( return palResult; } +Pal::TilingOptMode Device::GetTilingOptMode() const +{ + return m_enabledFeatures.strictImageSizeRequirements ? + Pal::TilingOptMode::OptForSpace : + m_settings.imageTilingOptMode; +} + /** *********************************************************************************************************************** * C-Callable entry points start here. These entries go in the dispatch table(s). @@ -4249,6 +4302,44 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetMemoryHostPointerPropertiesEXT( return result; } +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkGetDeviceBufferMemoryRequirementsKHR( + VkDevice device, + const VkDeviceBufferMemoryRequirementsKHR* pInfo, + VkMemoryRequirements2* pMemoryRequirements) +{ + const Device* pDevice = ApiDevice::ObjectFromHandle(device); + Buffer::CalculateMemoryRequirements(pDevice, + pInfo, + pMemoryRequirements); +} + +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkGetDeviceImageMemoryRequirementsKHR( + VkDevice device, + const VkDeviceImageMemoryRequirementsKHR* pInfo, + VkMemoryRequirements2* pMemoryRequirements) +{ + Device* pDevice = ApiDevice::ObjectFromHandle(device); + Image::CalculateMemoryRequirements(pDevice, + pInfo, + pMemoryRequirements); +} + +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkGetDeviceImageSparseMemoryRequirementsKHR( + VkDevice device, + const VkDeviceImageMemoryRequirementsKHR* pInfo, + uint32_t* pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2* pSparseMemoryRequirements) +{ + Device* pDevice = ApiDevice::ObjectFromHandle(device); + Image::CalculateSparseMemoryRequirements(pDevice, + pInfo, + pSparseMemoryRequirementCount, + pSparseMemoryRequirements); +} + // ===================================================================================================================== VKAPI_ATTR void VKAPI_CALL vkSetDeviceMemoryPriorityEXT( VkDevice device, diff --git a/icd/api/vk_dispatch.cpp b/icd/api/vk_dispatch.cpp index 56cbf30d..27440f3a 100644 --- a/icd/api/vk_dispatch.cpp +++ b/icd/api/vk_dispatch.cpp @@ -607,6 +607,11 @@ void DispatchTable::Init() INIT_DISPATCH_ENTRY(vkCmdWriteBufferMarker2AMD ); INIT_DISPATCH_ENTRY(vkQueueSubmit2KHR ); + INIT_DISPATCH_ENTRY(vkCmdBeginRenderingKHR ); + INIT_DISPATCH_ENTRY(vkCmdEndRenderingKHR ); + + INIT_DISPATCH_ENTRY(vkGetPhysicalDeviceToolPropertiesEXT ); + INIT_DISPATCH_ENTRY(vkCmdSetCullModeEXT ); INIT_DISPATCH_ENTRY(vkCmdSetFrontFaceEXT ); INIT_DISPATCH_ENTRY(vkCmdSetPrimitiveTopologyEXT ); @@ -641,6 +646,9 @@ void DispatchTable::Init() INIT_DISPATCH_ENTRY(vkCmdCopyImage2KHR ); INIT_DISPATCH_ENTRY(vkCmdCopyImageToBuffer2KHR ); INIT_DISPATCH_ENTRY(vkCmdResolveImage2KHR ); + INIT_DISPATCH_ENTRY(vkGetDeviceBufferMemoryRequirementsKHR ); + INIT_DISPATCH_ENTRY(vkGetDeviceImageMemoryRequirementsKHR ); + INIT_DISPATCH_ENTRY(vkGetDeviceImageSparseMemoryRequirementsKHR ); } // ===================================================================================================================== diff --git a/icd/api/vk_event.cpp b/icd/api/vk_event.cpp index 808a29ee..6580fbc5 100644 --- a/icd/api/vk_event.cpp +++ b/icd/api/vk_event.cpp @@ -186,11 +186,12 @@ VkResult Event::Initialize( Pal::GpuMemoryRequirements gpuMemReqs = {}; m_pPalEvents[0]->GetGpuMemoryRequirements(&gpuMemReqs); - InternalMemCreateInfo allocInfo = {}; - allocInfo.pal.size = gpuMemReqs.size; - allocInfo.pal.alignment = gpuMemReqs.alignment; - allocInfo.pal.priority = Pal::GpuMemPriority::Normal; - allocInfo.pal.flags.shareable = (numDeviceEvents > 1) ? 1 : 0; + InternalMemCreateInfo allocInfo = {}; + allocInfo.pal.size = gpuMemReqs.size; + allocInfo.pal.alignment = gpuMemReqs.alignment; + allocInfo.pal.priority = Pal::GpuMemPriority::Normal; + allocInfo.pal.flags.shareable = (numDeviceEvents > 1) ? 1 : 0; + allocInfo.pal.flags.cpuInvisible = (gpuMemReqs.flags.cpuAccess ? 0 : 1); InternalSubAllocPool pool = InternalPoolCpuCacheableGpuUncached; diff --git a/icd/api/vk_fence.cpp b/icd/api/vk_fence.cpp index 88d19ccc..84974759 100644 --- a/icd/api/vk_fence.cpp +++ b/icd/api/vk_fence.cpp @@ -288,12 +288,13 @@ VkResult Fence::RestoreFence( if ((m_flags.isPermanence == 0) && m_flags.isOpened) { m_pPalTemporaryFences->Destroy(); - m_pPalTemporaryFences = nullptr; + m_flags.isPermanence = 1; m_flags.isOpened = 0; VkAllocationCallbacks* pAllocator = pDevice->VkInstance()->GetAllocCallbacks(); pAllocator->pfnFree(pAllocator->pUserData, m_pPalTemporaryFences); + m_pPalTemporaryFences = nullptr; } return ret; diff --git a/icd/api/vk_framebuffer.cpp b/icd/api/vk_framebuffer.cpp index 692753c9..fa7f1ee6 100644 --- a/icd/api/vk_framebuffer.cpp +++ b/icd/api/vk_framebuffer.cpp @@ -208,6 +208,21 @@ void Framebuffer::SetImageViews( } } +// ===================================================================================================================== +// Set ImageViews for a Framebuffer attachment +void Framebuffer::SetImageViews( + const VkRenderingInfoKHR* pRenderingInfo) +{ + Attachment* pAttachments = static_cast(Util::VoidPtrInc(this, GetAttachmentsOffset())); + + for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) + { + SetImageViews( + pRenderingInfo->pColorAttachments[i].imageView, + &(pAttachments[i])); + } +} + // ===================================================================================================================== // Update the subrange for framebuffer attachments void Framebuffer::SetSubresRanges( diff --git a/icd/api/vk_graphics_pipeline.cpp b/icd/api/vk_graphics_pipeline.cpp index 9384a90c..9cdb11dd 100644 --- a/icd/api/vk_graphics_pipeline.cpp +++ b/icd/api/vk_graphics_pipeline.cpp @@ -383,6 +383,7 @@ VkResult GraphicsPipeline::CreatePipelineObjects( pObjectCreateInfo->flags.bindInputAssemblyState, pObjectCreateInfo->flags.force1x1ShaderRate, pObjectCreateInfo->flags.customSampleLocations, + pObjectCreateInfo->flags.isPointSizeUsed, *pVbInfo, &internalBuffer, pPalMsaa, @@ -498,7 +499,6 @@ VkResult GraphicsPipeline::Create( objectCreateInfo.immedInfo.checkDeferCompilePipeline = pDevice->GetRuntimeSettings().deferCompileOptimizedPipeline && (binaryCreateInfo.pipelineInfo.enableEarlyCompile || binaryCreateInfo.pipelineInfo.enableUberFetchShader); - // 5. Create pipeline objects result = CreatePipelineObjects( pDevice, @@ -914,6 +914,7 @@ GraphicsPipeline::GraphicsPipeline( bool bindInputAssemblyState, bool force1x1ShaderRate, bool customSampleLocations, + bool isPointSizeUsed, const VbBindingInfo& vbInfo, const PipelineInternalBufferInfo* pInternalBuffer, Pal::IMsaaState** pPalMsaa, @@ -948,6 +949,7 @@ GraphicsPipeline::GraphicsPipeline( m_flags.bindInputAssemblyState = bindInputAssemblyState; m_flags.customSampleLocations = customSampleLocations; m_flags.force1x1ShaderRate = force1x1ShaderRate; + m_flags.isPointSizeUsed = isPointSizeUsed; CreateStaticState(); pPalPipelineHasher->Update(m_palPipelineHash); @@ -970,7 +972,6 @@ void GraphicsPipeline::CreateStaticState() pStaticTokens->depthBounds = DynamicRenderStateToken; pStaticTokens->viewport = DynamicRenderStateToken; pStaticTokens->scissorRect = DynamicRenderStateToken; - pStaticTokens->samplePattern = DynamicRenderStateToken; pStaticTokens->lineStippleState = DynamicRenderStateToken; pStaticTokens->fragmentShadingRate = DynamicRenderStateToken; @@ -1016,11 +1017,6 @@ void GraphicsPipeline::CreateStaticState() pStaticTokens->scissorRect = pCache->CreateScissorRect(m_info.scissorRectParams); } - if (ContainsStaticState(DynamicStatesInternal::SampleLocationsExt)) - { - pStaticTokens->samplePattern = pCache->CreateSamplePattern(m_info.samplePattern); - } - if (ContainsStaticState(DynamicStatesInternal::LineStippleExt)) { pStaticTokens->lineStippleState = pCache->CreateLineStipple(m_info.lineStippleParams); @@ -1078,9 +1074,6 @@ void GraphicsPipeline::DestroyStaticState( pCache->DestroyScissorRect(m_info.scissorRectParams, m_info.staticTokens.scissorRect); - pCache->DestroySamplePattern(m_info.samplePattern, - m_info.staticTokens.samplePattern); - pCache->DestroyLineStipple(m_info.lineStippleParams, m_info.staticTokens.lineStippleState); @@ -1493,11 +1486,12 @@ void GraphicsPipeline::BindToCmdBuffer( } if (ContainsStaticState(DynamicStatesInternal::SampleLocationsExt) && - CmdBuffer::IsStaticStateDifferent(oldTokens.samplePattern, newTokens.samplePattern)) + (memcmp(&pRenderState->samplePattern, &m_info.samplePattern, sizeof(SamplePattern)) != 0)) { pCmdBuffer->PalCmdSetMsaaQuadSamplePattern( m_info.samplePattern.sampleCount, m_info.samplePattern.locations); - pRenderState->staticTokens.samplePattern = newTokens.samplePattern; + pRenderState->samplePattern = m_info.samplePattern; + pRenderState->dirtyGraphics.samplePattern = 0; } if (ContainsStaticState(DynamicStatesInternal::ColorWriteEnableExt)) diff --git a/icd/api/vk_image.cpp b/icd/api/vk_image.cpp index 5d97d05b..33d3ddd8 100644 --- a/icd/api/vk_image.cpp +++ b/icd/api/vk_image.cpp @@ -198,7 +198,8 @@ Image::Image( barrierPolicyFormat, extraLayoutUsages), m_pSwapChain(nullptr), - m_ResourceKey(resourceKey) + m_ResourceKey(resourceKey), + m_memoryRequirements{} { m_internalFlags.u32All = internalFlags.u32All; @@ -289,7 +290,7 @@ static void ConvertImageCreateInfo( pPalCreateInfo->samples = pCreateInfo->samples; pPalCreateInfo->fragments = pCreateInfo->samples; pPalCreateInfo->tiling = VkToPalImageTiling(pCreateInfo->tiling); - pPalCreateInfo->tilingOptMode = settings.imageTilingOptMode; + pPalCreateInfo->tilingOptMode = pDevice->GetTilingOptMode(); if ((pCreateInfo->imageType == VK_IMAGE_TYPE_3D) && (pCreateInfo->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT))) @@ -349,6 +350,7 @@ static VkResult InitSparseVirtualMemory( pSparseMemCreateInfo->flags.globalGpuVa = pDevice->IsGlobalGpuVaEnabled(); pSparseMemCreateInfo->flags.virtualAlloc = 1; + pSparseMemCreateInfo->flags.cpuInvisible = (palReqs.flags.cpuAccess ? 0 : 1); pSparseMemCreateInfo->alignment = Util::RoundUpToMultiple(sparseAllocGranularity, palReqs.alignment); pSparseMemCreateInfo->size = Util::RoundUpToMultiple(palReqs.size, pSparseMemCreateInfo->alignment); pSparseMemCreateInfo->heapCount = 0; @@ -1219,7 +1221,7 @@ void GenerateBindIndices( // ===================================================================================================================== // Binds memory to this image. VkResult Image::BindMemory( - const Device* pDevice, + Device* pDevice, VkDeviceMemory mem, VkDeviceSize memOffset, uint32_t deviceIndexCount, @@ -1243,80 +1245,73 @@ VkResult Image::BindMemory( m_internalFlags.boundToExternalMemory = 1; } - VkMemoryRequirements reqs = {}; - - if (GetMemoryRequirements(pDevice, &reqs) == VK_SUCCESS) - { - Pal::Result result = Pal::Result::Success; - - const uint32_t numDevices = pDevice->NumPalDevices(); + VkMemoryRequirements reqs = GetMemoryRequirements(); - uint8_t bindIndices[MaxPalDevices]; - GenerateBindIndices(numDevices, - bindIndices, - deviceIndexCount, - pDeviceIndices, - rectCount, - pRects, - ((pMemory == nullptr) ? false : pMemory->IsMultiInstance())); + Pal::Result result = Pal::Result::Success; - for (uint32_t localDeviceIdx = 0; localDeviceIdx < numDevices; localDeviceIdx++) - { - const uint32_t sourceMemInst = bindIndices[localDeviceIdx]; + const uint32_t numDevices = pDevice->NumPalDevices(); - Pal::IImage* pPalImage = m_perGpu[localDeviceIdx].pPalImage; - Pal::IGpuMemory* pGpuMem = nullptr; - Pal::gpusize baseAddrOffset = 0; + uint8_t bindIndices[MaxPalDevices]; + GenerateBindIndices(numDevices, + bindIndices, + deviceIndexCount, + pDeviceIndices, + rectCount, + pRects, + ((pMemory == nullptr) ? false : pMemory->IsMultiInstance())); - if (pMemory != nullptr) - { - pGpuMem = pMemory->PalMemory(localDeviceIdx, sourceMemInst); + for (uint32_t localDeviceIdx = 0; localDeviceIdx < numDevices; localDeviceIdx++) + { + const uint32_t sourceMemInst = bindIndices[localDeviceIdx]; - // The bind offset within the memory should already be pre-aligned - VK_ASSERT(Util::IsPow2Aligned(memOffset, reqs.alignment)); + Pal::IImage* pPalImage = m_perGpu[localDeviceIdx].pPalImage; + Pal::IGpuMemory* pGpuMem = nullptr; + Pal::gpusize baseAddrOffset = 0; - VkDeviceSize baseGpuAddr = pGpuMem->Desc().gpuVirtAddr; + if (pMemory != nullptr) + { + pGpuMem = pMemory->PalMemory(localDeviceIdx, sourceMemInst); - // If the base address of the VkMemory is not already aligned - if ((Util::IsPow2Aligned(baseGpuAddr, reqs.alignment) == false) && - (m_internalFlags.externalD3DHandle == false)) - { - // This should only happen in situations where the image's alignment is extremely larger than - // the VkMemory object. - VK_ASSERT(pGpuMem->Desc().alignment < reqs.alignment); + // The bind offset within the memory should already be pre-aligned + VK_ASSERT(Util::IsPow2Aligned(memOffset, reqs.alignment)); - // Calculate the necessary offset to make the base address align to the image's requirements. - baseAddrOffset = Util::Pow2Align(baseGpuAddr, reqs.alignment) - baseGpuAddr; + VkDeviceSize baseGpuAddr = pGpuMem->Desc().gpuVirtAddr; - // Verify that we allocated sufficient padding to account for this offset - VK_ASSERT(baseAddrOffset <= CalcBaseAddrSizePadding(*pDevice, reqs)); - } + // If the base address of the VkMemory is not already aligned + if ((Util::IsPow2Aligned(baseGpuAddr, reqs.alignment) == false) && + (m_internalFlags.externalD3DHandle == false)) + { + // This should only happen in situations where the image's alignment is extremely larger than + // the VkMemory object. + VK_ASSERT(pGpuMem->Desc().alignment < reqs.alignment); - // After applying any necessary base address offset, the full GPU address should be aligned - VK_ASSERT(Util::IsPow2Aligned(baseGpuAddr + baseAddrOffset + memOffset, reqs.alignment)); + // Calculate the necessary offset to make the base address align to the image's requirements. + baseAddrOffset = Util::Pow2Align(baseGpuAddr, reqs.alignment) - baseGpuAddr; - if (pDevice->GetEnabledFeatures().appControlledMemPriority == false) - { - pMemory->ElevatePriority(m_priority); - } + // Verify that we allocated sufficient padding to account for this offset + VK_ASSERT(baseAddrOffset <= CalcBaseAddrSizePadding(*pDevice, reqs)); } - result = pPalImage->BindGpuMemory(pGpuMem, baseAddrOffset + memOffset); + // After applying any necessary base address offset, the full GPU address should be aligned + VK_ASSERT(Util::IsPow2Aligned(baseGpuAddr + baseAddrOffset + memOffset, reqs.alignment)); - if (result == Pal::Result::Success) + if (pDevice->GetEnabledFeatures().appControlledMemPriority == false) { - // Record the private base address offset. This is necessary for things like subresource layout - // calculation for linear images. - m_perGpu[localDeviceIdx].baseAddrOffset = baseAddrOffset; + pMemory->ElevatePriority(m_priority); } } - return PalToVkResult(result); - } - else - { - return VK_ERROR_INITIALIZATION_FAILED; + result = pPalImage->BindGpuMemory(pGpuMem, baseAddrOffset + memOffset); + + if (result == Pal::Result::Success) + { + // Record the private base address offset. This is necessary for things like subresource layout + // calculation for linear images. + m_perGpu[localDeviceIdx].baseAddrOffset = baseAddrOffset; + } } + + return PalToVkResult(result); } // ===================================================================================================================== @@ -1450,7 +1445,7 @@ VkResult Image::GetSubresourceLayout( // ===================================================================================================================== // Implementation of vkGetImageSparseMemoryRequirements void Image::GetSparseMemoryRequirements( - const Device* pDevice, + Device* pDevice, uint32_t* pNumRequirements, utils::ArrayView sparseMemoryRequirements) { @@ -1501,10 +1496,7 @@ void Image::GetSparseMemoryRequirements( { const uint32_t aspectsToReportCount = Util::Min(*pNumRequirements, usedAspectsCount); uint32_t reportedAspectsCount = 0; - VkMemoryRequirements memReqs = {}; - - VkResult result = GetMemoryRequirements(pDevice, &memReqs); - VK_ASSERT(result == VK_SUCCESS); + VkMemoryRequirements memReqs = GetMemoryRequirements(); // Get the memory layout of the sparse image @@ -1620,9 +1612,8 @@ void Image::GetSparseMemoryRequirements( // ===================================================================================================================== // Get the image's memory requirements -VkResult Image::GetMemoryRequirements( - const Device* pDevice, - VkMemoryRequirements* pReqs) +void Image::SetMemoryRequirementsAtCreate( + const Device* pDevice) { const bool isSparse = IsSparse(); Pal::GpuMemoryRequirements palReqs = {}; @@ -1642,16 +1633,16 @@ VkResult Image::GetMemoryRequirements( if (isSparse) { - pReqs->alignment = Util::RoundUpToMultiple(virtualGranularity, palReqs.alignment); - pReqs->size = Util::RoundUpToMultiple(palReqs.size, virtualGranularity); + m_memoryRequirements.alignment = Util::RoundUpToMultiple(virtualGranularity, palReqs.alignment); + m_memoryRequirements.size = Util::RoundUpToMultiple(palReqs.size, virtualGranularity); } else { - pReqs->alignment = palReqs.alignment; - pReqs->size = palReqs.size; + m_memoryRequirements.alignment = palReqs.alignment; + m_memoryRequirements.size = palReqs.size; } - pReqs->memoryTypeBits = 0; + m_memoryRequirements.memoryTypeBits = 0; for (uint32_t i = 0; i < palReqs.heapCount; ++i) { @@ -1659,59 +1650,193 @@ VkResult Image::GetMemoryRequirements( if (pDevice->GetVkTypeIndexBitsFromPalHeap(palReqs.heaps[i], &typeIndexBits)) { - pReqs->memoryTypeBits |= typeIndexBits; + m_memoryRequirements.memoryTypeBits |= typeIndexBits; } } // Limit heaps to those compatible with pinned system memory if (m_internalFlags.externalPinnedHost) { - pReqs->memoryTypeBits &= pDevice->GetPinnedSystemMemoryTypes(); + m_memoryRequirements.memoryTypeBits &= pDevice->GetPinnedSystemMemoryTypes(); - VK_ASSERT(pReqs->memoryTypeBits != 0); + VK_ASSERT(m_memoryRequirements.memoryTypeBits != 0); } if (m_internalFlags.externallyShareable) { - pReqs->memoryTypeBits &= pDevice->GetMemoryTypeMaskForExternalSharing(); + m_memoryRequirements.memoryTypeBits &= pDevice->GetMemoryTypeMaskForExternalSharing(); } if (m_internalFlags.isProtected) { // If the image is protected only keep the protected type - pReqs->memoryTypeBits &= pDevice->GetMemoryTypeMaskMatching(VK_MEMORY_PROPERTY_PROTECTED_BIT); + m_memoryRequirements.memoryTypeBits &= pDevice->GetMemoryTypeMaskMatching(VK_MEMORY_PROPERTY_PROTECTED_BIT); } else { // If the image isn't protected remove the protected types - pReqs->memoryTypeBits &= ~pDevice->GetMemoryTypeMaskMatching(VK_MEMORY_PROPERTY_PROTECTED_BIT); + m_memoryRequirements.memoryTypeBits &= ~pDevice->GetMemoryTypeMaskMatching(VK_MEMORY_PROPERTY_PROTECTED_BIT); } if (pDevice->GetEnabledFeatures().deviceCoherentMemory == false) { - // If the state of the device coherent memory feature (defined by the extension VK_AMD_device_coherent_memory) is disabled, - // remove the device coherent memory type - pReqs->memoryTypeBits &= ~pDevice->GetMemoryTypeMaskMatching(VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD); + // If the state of the device coherent memory feature (defined by the extension VK_AMD_device_coherent_memory) + // is disabled, remove the device coherent memory type + m_memoryRequirements.memoryTypeBits &= + ~pDevice->GetMemoryTypeMaskMatching(VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD); } - // Add an extra memory padding. This can be enabled while capturing GFXR traces and disabled later. Capturing with this setting - // enabled helps in replaying GFXR traces. When this setting is not used while capture, GFXR might return a fatal error while replaying - // with different DCC threshold values. This is caused because gfxreconstruct (just like vktrace used to) records the memory sizes and - // offsets at the time of capture and always resends the same values during replay. + // Add an extra memory padding. This can be enabled while capturing GFXR traces and disabled later. Capturing with + // this setting enabled helps in replaying GFXR traces. When this setting is not used while capture, GFXR might + // return a fatal error while replaying with different DCC threshold values. This is caused because gfxreconstruct + // (just like vktrace used to) records the memory sizes and offsets at the time of capture and always resends the + // same values during replay. if (pDevice->GetRuntimeSettings().addMemoryPaddingToImageMemoryRequirements) { - pReqs->size += (uint64_t)((pDevice->GetRuntimeSettings().memoryPaddingFactorForImageMemoryRequirements) * (pReqs->size)); + m_memoryRequirements.size += + (uint64_t)((pDevice->GetRuntimeSettings().memoryPaddingFactorForImageMemoryRequirements) * + (m_memoryRequirements.size)); } // Adjust the size to account for internal padding required to align the base address - pReqs->size += CalcBaseAddrSizePadding(*pDevice, *pReqs); + m_memoryRequirements.size += CalcBaseAddrSizePadding(*pDevice, m_memoryRequirements); if (isSparse) { - pReqs->size = Util::RoundUpToMultiple(palReqs.size, pReqs->alignment); + m_memoryRequirements.size = Util::RoundUpToMultiple(palReqs.size, m_memoryRequirements.alignment); } +} - return VK_SUCCESS; +// ===================================================================================================================== +// Calculate image's memory requirements from VkImageCreateInfo +void Image::CalculateMemoryRequirements( + Device* pDevice, + const VkDeviceImageMemoryRequirementsKHR* pInfo, + VkMemoryRequirements2* pMemoryRequirements) +{ + VkImage image; + const VkAllocationCallbacks* pAllocCallbacks = pDevice->VkInstance()->GetAllocCallbacks(); + + VkResult result = Image::Create(pDevice, pInfo->pCreateInfo, pAllocCallbacks, &image); + + if (result == VK_SUCCESS) + { + Image* pImage = Image::ObjectFromHandle(image); + + pImage->SetMemoryRequirementsAtCreate(pDevice); + + VkMemoryDedicatedRequirements* pMemDedicatedRequirements = + static_cast(pMemoryRequirements->pNext); + + if ((pMemDedicatedRequirements != nullptr) && + (pMemDedicatedRequirements->sType == VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS)) + { + pMemDedicatedRequirements->prefersDedicatedAllocation = pImage->DedicatedMemoryRequired(); + pMemDedicatedRequirements->requiresDedicatedAllocation = pImage->DedicatedMemoryRequired(); + } + + if (pDevice->GetEnabledFeatures().strictImageSizeRequirements && + Formats::IsDepthStencilFormat(pInfo->pCreateInfo->format)) + { + CalculateAlignedMemoryRequirements( + pDevice, + pInfo->pCreateInfo, + pImage); + } + + pMemoryRequirements->memoryRequirements = pImage->GetMemoryRequirements(); + + pImage->Destroy(pDevice, pAllocCallbacks); + } +} + +// ===================================================================================================================== +// Calculate image's memory requirements from VkImageCreateInfo for depth/stencil formats +void Image::CalculateAlignedMemoryRequirements( + Device* pDevice, + const VkImageCreateInfo* pCreateInfo, + Image* pImage) +{ + VkImage image; + const VkAllocationCallbacks* pAllocCallbacks = pDevice->VkInstance()->GetAllocCallbacks(); + VkImageCreateInfo createInfo = *pCreateInfo; + VkMemoryRequirements memoryRequirements = pImage->GetMemoryRequirements(); + + if (!IsPowerOfTwo(pCreateInfo->extent.width)) + { + // Round width down to the nearest power of 2. + createInfo.extent.width = Pow2Pad(pCreateInfo->extent.width) >> 1; + + VkResult createResult = Image::Create(pDevice, &createInfo, pAllocCallbacks, &image); + + if (createResult == VK_SUCCESS) + { + Image* pPow2AlignedImage = Image::ObjectFromHandle(image); + + pPow2AlignedImage->SetMemoryRequirementsAtCreate(pDevice); + + VkMemoryRequirements pow2MemoryRequirements = pPow2AlignedImage->GetMemoryRequirements(); + + pPow2AlignedImage->Destroy(pDevice, pAllocCallbacks); + + if (pow2MemoryRequirements.size > memoryRequirements.size) + { + memoryRequirements.size = pow2MemoryRequirements.size; + } + } + + createInfo.extent.width = pCreateInfo->extent.width; + } + + if (!IsPowerOfTwo(pCreateInfo->extent.height)) + { + // Round height down to the nearest power of 2. + createInfo.extent.height = Pow2Pad(pCreateInfo->extent.height) >> 1; + + VkResult createResult = Image::Create(pDevice, &createInfo, pAllocCallbacks, &image); + + if (createResult == VK_SUCCESS) + { + Image* pPow2AlignedImage = Image::ObjectFromHandle(image); + + pPow2AlignedImage->SetMemoryRequirementsAtCreate(pDevice); + + VkMemoryRequirements pow2MemoryRequirements = pPow2AlignedImage->GetMemoryRequirements(); + + pPow2AlignedImage->Destroy(pDevice, pAllocCallbacks); + + if (pow2MemoryRequirements.size > memoryRequirements.size) + { + memoryRequirements.size = pow2MemoryRequirements.size; + } + } + + createInfo.extent.height = pCreateInfo->extent.height; + } + + pImage->SetMemoryRequirements(memoryRequirements); +} + +// ===================================================================================================================== +// Calculate sparse image's memory requirements from VkImageCreateInfo +void Image::CalculateSparseMemoryRequirements( + Device* pDevice, + const VkDeviceImageMemoryRequirementsKHR* pInfo, + uint32_t* pSparseMemoryRequirementCount, + VkSparseImageMemoryRequirements2* pSparseMemoryRequirements) +{ + VkImage image; + + VkResult result = pDevice->CreateImage(pInfo->pCreateInfo, pDevice->VkInstance()->GetAllocCallbacks(), &image); + + if (result == VK_SUCCESS) + { + Image::ObjectFromHandle(image)->GetSparseMemoryRequirements( + pDevice, + pSparseMemoryRequirementCount, + utils::ArrayView(&pSparseMemoryRequirements->memoryRequirements)); + Image::ObjectFromHandle(image)->Destroy(pDevice, pDevice->VkInstance()->GetAllocCallbacks()); + } } // ===================================================================================================================== @@ -1786,7 +1911,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkBindImageMemory( VkDeviceMemory memory, VkDeviceSize memoryOffset) { - const Device* pDevice = ApiDevice::ObjectFromHandle(device); + Device* pDevice = ApiDevice::ObjectFromHandle(device); return Image::ObjectFromHandle(image)->BindMemory(pDevice, memory, memoryOffset, 0, nullptr, 0, nullptr); } @@ -1797,9 +1922,9 @@ VKAPI_ATTR void VKAPI_CALL vkGetImageMemoryRequirements( VkImage image, VkMemoryRequirements* pMemoryRequirements) { - const Device* pDevice = ApiDevice::ObjectFromHandle(device); + Device* pDevice = ApiDevice::ObjectFromHandle(device); - Image::ObjectFromHandle(image)->GetMemoryRequirements(pDevice, pMemoryRequirements); + *pMemoryRequirements = Image::ObjectFromHandle(image)->GetMemoryRequirements(); } // ===================================================================================================================== @@ -1809,7 +1934,7 @@ VKAPI_ATTR void VKAPI_CALL vkGetImageSparseMemoryRequirements( uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements* pSparseMemoryRequirements) { - const Device* pDevice = ApiDevice::ObjectFromHandle(device); + Device* pDevice = ApiDevice::ObjectFromHandle(device); Image::ObjectFromHandle(image)->GetSparseMemoryRequirements( pDevice, @@ -1838,11 +1963,10 @@ VKAPI_ATTR void VKAPI_CALL vkGetImageMemoryRequirements2( const VkImageMemoryRequirementsInfo2* pInfo, VkMemoryRequirements2* pMemoryRequirements) { - const Device* pDevice = ApiDevice::ObjectFromHandle(device); + Device* pDevice = ApiDevice::ObjectFromHandle(device); - VkMemoryRequirements* pMemReq = &pMemoryRequirements->memoryRequirements; Image* pImage = Image::ObjectFromHandle(pInfo->image); - pImage->GetMemoryRequirements(pDevice, pMemReq); + pMemoryRequirements->memoryRequirements = pImage->GetMemoryRequirements(); VkMemoryDedicatedRequirements* pMemDedicatedRequirements = static_cast(pMemoryRequirements->pNext); @@ -1862,7 +1986,7 @@ VKAPI_ATTR void VKAPI_CALL vkGetImageSparseMemoryRequirements2( uint32_t* pSparseMemoryRequirementCount, VkSparseImageMemoryRequirements2* pSparseMemoryRequirements) { - const Device* pDevice = ApiDevice::ObjectFromHandle(device); + Device* pDevice = ApiDevice::ObjectFromHandle(device); Image* pImage = Image::ObjectFromHandle(pInfo->image); auto memReqsView = utils::ArrayView( diff --git a/icd/api/vk_instance.cpp b/icd/api/vk_instance.cpp index 54189536..e6a73eb9 100644 --- a/icd/api/vk_instance.cpp +++ b/icd/api/vk_instance.cpp @@ -832,7 +832,7 @@ VkResult Instance::EnumerateExtensionProperties( // If this extension is supported then report it if (supportedExtensions.IsExtensionSupported(id)) { - *pProperties = supportedExtensions.GetExtensionInfo(id); + supportedExtensions.GetExtensionInfo(id, pProperties); pProperties++; copyCount--; } diff --git a/icd/api/vk_memory.cpp b/icd/api/vk_memory.cpp index 67eb3937..f91bf10d 100644 --- a/icd/api/vk_memory.cpp +++ b/icd/api/vk_memory.cpp @@ -244,12 +244,9 @@ VkResult Memory::Create( pBoundImage = Image::ObjectFromHandle(pExtInfo->image); createInfo.pImage = pBoundImage->PalImage(DefaultDeviceIndex); - VkMemoryRequirements reqs = {}; - if (pBoundImage->GetMemoryRequirements(pDevice, &reqs) == VK_SUCCESS) - { - VK_ASSERT(pAllocInfo->allocationSize >= reqs.size); - createInfo.alignment = reqs.alignment; - } + VkMemoryRequirements reqs = pBoundImage->GetMemoryRequirements(); + VK_ASSERT(pAllocInfo->allocationSize >= reqs.size); + createInfo.alignment = reqs.alignment; } dedicatedImage = pExtInfo->image; dedicatedBuffer = pExtInfo->buffer; diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index 927b1c38..692580dc 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -71,6 +71,9 @@ #include #include +#include "devmode/devmode_mgr.h" +#include "protocols/rgpProtocol.h" + namespace vk { // DisplayModeObject should be returned as a VkDisplayModeKHR, since in some cases we need to retrieve Pal::IScreen from @@ -1466,6 +1469,51 @@ size_t PhysicalDevice::GetFeatures( return sizeof(VkPhysicalDeviceFeatures); } +// ===================================================================================================================== +VkResult PhysicalDevice::GetExtendedFormatProperties( + VkFormat format, + VkFormatProperties3KHR* pFormatProperties + ) const +{ + Pal::MergedFormatPropertiesTable fmtProperties = {}; + m_pPalDevice->GetFormatProperties(&fmtProperties); + + const Pal::SwizzledFormat palFormat = VkToPalFormat(format, GetRuntimeSettings()); + const Pal::FormatFeatureFlags* formatBits = fmtProperties.features[static_cast(palFormat.format)]; + + if (formatBits[Pal::IsLinear] & Pal::FormatFeatureImageShaderWrite) + { + pFormatProperties->linearTilingFeatures |= VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT_KHR; + } + + if (formatBits[Pal::IsLinear] & Pal::FormatFeatureImageShaderRead) + { + pFormatProperties->linearTilingFeatures |= VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT_KHR; + + if (Formats::IsDepthStencilFormat(format)) + { + pFormatProperties->linearTilingFeatures |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_DEPTH_COMPARISON_BIT_KHR; + } + } + + if (formatBits[Pal::IsNonLinear] & Pal::FormatFeatureImageShaderWrite) + { + pFormatProperties->optimalTilingFeatures |= VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT_KHR; + } + + if (formatBits[Pal::IsNonLinear] & Pal::FormatFeatureImageShaderRead) + { + pFormatProperties->optimalTilingFeatures |= VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT_KHR; + + if (Formats::IsDepthStencilFormat(format)) + { + pFormatProperties->optimalTilingFeatures |= VK_FORMAT_FEATURE_2_SAMPLED_IMAGE_DEPTH_COMPARISON_BIT_KHR; + } + } + + return VK_SUCCESS; +} + // ===================================================================================================================== // Retrieve format properites. Called in response to vkGetPhysicalDeviceImageFormatProperties VkResult PhysicalDevice::GetImageFormatProperties( @@ -1986,6 +2034,64 @@ VkResult PhysicalDevice::GetPhysicalDeviceCalibrateableTimeDomainsEXT( return result; } +// ===================================================================================================================== +VkResult PhysicalDevice::GetPhysicalDeviceToolPropertiesEXT( + uint32_t* pToolCount, + VkPhysicalDeviceToolPropertiesEXT* pToolProperties) +{ + bool isProfilingEnabled = false; + VkResult result = VK_SUCCESS; + + DevModeMgr* devModeMgr = VkInstance()->GetDevModeMgr(); + + if (devModeMgr != nullptr) + { + isProfilingEnabled = devModeMgr->IsTracingEnabled(); + } + + if (pToolProperties == nullptr) + { + if (isProfilingEnabled) + { + *pToolCount = 1; + } + else + { + *pToolCount = 0; + } + } + else + { + + if (isProfilingEnabled) + { + if (*pToolCount == 0) + { + result = VK_INCOMPLETE; + } + else + { + VkPhysicalDeviceToolPropertiesEXT& properties = pToolProperties[0]; + + const std::string versionString = std::to_string(RGP_PROTOCOL_VERSION); + + properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES_EXT; + properties.pNext = nullptr; + strncpy(properties.name, "Radeon GPU Profiler", VK_MAX_EXTENSION_NAME_SIZE); + strncpy(properties.version, versionString.c_str(), VK_MAX_EXTENSION_NAME_SIZE); + properties.purposes = VK_TOOL_PURPOSE_PROFILING_BIT_EXT | VK_TOOL_PURPOSE_TRACING_BIT_EXT; + strncpy(properties.description, "Radeon GPU Profiler, a low-level optimization tool \ + that provides detailed timing and occupancy information on Radeon GPUs.", VK_MAX_DESCRIPTION_SIZE); + strncpy(properties.layer, "", VK_MAX_EXTENSION_NAME_SIZE); + + *pToolCount = 1; + } + } + } + + return result; +} + // ===================================================================================================================== // Returns the API version supported by this device. uint32_t PhysicalDevice::GetSupportedAPIVersion() const @@ -3129,6 +3235,7 @@ VkResult PhysicalDevice::GetSurfaceFormats( uint32_t numPresentFormats = 0; const uint32_t maxBufferCount = (pSurfaceFormats != nullptr) ? *pSurfaceFormatCount : 0; + const RuntimeSettings& settings = GetRuntimeSettings(); DisplayableSurfaceInfo displayableInfo = {}; if (pSurface != nullptr) @@ -3155,8 +3262,6 @@ VkResult PhysicalDevice::GetSurfaceFormats( bool needsWorkaround = pScreen == nullptr ? isWindowed : (palColorCaps.supportedColorSpaces == Pal::ScreenColorSpace::TfUndefined); - // This workaround is needed on Windows in cases where we get a valid screen object but it has - // no valid display properties. This scenario can happen when running apps without a display connected. if (needsWorkaround) { // The w/a here will be removed once more presentable format is supported on base driver side. @@ -3242,7 +3347,7 @@ VkResult PhysicalDevice::GetSurfaceFormats( for (uint32_t vkFmtIdx = VK_FORMAT_BEGIN_RANGE; vkFmtIdx <= VK_FORMAT_END_RANGE; vkFmtIdx++) { bool isFullscreenFormat = false; - const Pal::SwizzledFormat cmpFormat = VkToPalFormat(static_cast(vkFmtIdx), GetRuntimeSettings()); + const Pal::SwizzledFormat cmpFormat = VkToPalFormat(static_cast(vkFmtIdx), settings); for (uint32_t fmtIndx = 0; fmtIndx < numImgFormats; fmtIndx++) { @@ -3271,7 +3376,6 @@ VkResult PhysicalDevice::GetSurfaceFormats( ColorSpaceHelper::GetSupportedFormats(palColorCaps.supportedColorSpaces, &colorSpaceCount, pColorSpaces); - const RuntimeSettings& settings = GetSettingsLoader()->GetSettings(); // Report HDR in windowed mode only if OS is in HDR mode. Always report on fullscreen bool reportHdrSupport = (isWindowed == false) || palColorCaps.isHdrEnabled || settings.alwaysReportHdrFormats; @@ -3603,6 +3707,7 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_SCALAR_BLOCK_LAYOUT)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_MEMORY_BUDGET)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_MEMORY_PRIORITY)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_PAGEABLE_DEVICE_LOCAL_MEMORY)); if ((pPhysicalDevice == nullptr) || pPhysicalDevice->PalProperties().gfxipProperties.flags.supportPostDepthCoverage) { @@ -3641,6 +3746,8 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_PRIVATE_DATA)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_TOOLING_INFO)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_EXTENDED_DYNAMIC_STATE)); #if defined(__unix__) @@ -3665,6 +3772,10 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_ROBUSTNESS2)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_SHADER_TERMINATE_INVOCATION)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_EXTENDED_DYNAMIC_STATE2)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_FORMAT_FEATURE_FLAGS2)); + + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_DYNAMIC_RENDERING)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_SHADER_INTEGER_DOT_PRODUCT)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_COPY_COMMANDS2)); @@ -3688,6 +3799,22 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_ZERO_INITIALIZE_WORKGROUP_MEMORY)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_LOAD_STORE_OP_NONE)); +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 52 + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_YCBCR_IMAGE_ARRAYS)); +#else +#endif + +#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 1500 + if ((pPhysicalDevice == nullptr) || + ((pPhysicalDevice->PalProperties().gfxLevel != Pal::GfxIpLevel::GfxIp9) && + (pPhysicalDevice->PalProperties().gfxipProperties.flags.supportBorderColorSwizzle))) + { + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_BORDER_COLOR_SWIZZLE)); + } +#endif + + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_INDEX_TYPE_UINT8)); + bool disableAMDVendorExtensions = false; if (pPhysicalDevice != nullptr) { @@ -3770,8 +3897,7 @@ template static bool IsNormalQueue(const T& engineCapabilities) { return ((engineCapabilities.flags.exclusive == 0) && - (((engineCapabilities.queuePrioritySupport & Pal::QueuePrioritySupport::SupportQueuePriorityNormal) != 0) || - (engineCapabilities.queuePrioritySupport == 0))); + ((engineCapabilities.queuePrioritySupport & Pal::QueuePrioritySupport::SupportQueuePriorityNormal) != 0)); } // ===================================================================================================================== @@ -4056,7 +4182,7 @@ VkResult PhysicalDevice::EnumerateExtensionProperties( // If this extension is supported then report it if (supportedExtensions.IsExtensionSupported(id)) { - *pProperties = supportedExtensions.GetExtensionInfo(id); + supportedExtensions.GetExtensionInfo(id, pProperties); pProperties++; copyCount--; } @@ -5523,6 +5649,33 @@ size_t PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT: + { + auto* pExtInfo = reinterpret_cast(pHeader); + + if (updateFeatures) + { + pExtInfo->borderColorSwizzle = VK_TRUE; + pExtInfo->borderColorSwizzleFromImage = VK_TRUE; + } + + structSize = sizeof(*pExtInfo); + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES_KHR: + { + auto* pExtInfo = reinterpret_cast(pHeader); + + if (updateFeatures) + { + pExtInfo->dynamicRendering = VK_TRUE; + } + + structSize = sizeof(*pExtInfo); + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: { auto* pExtInfo = reinterpret_cast(pHeader); @@ -5557,7 +5710,7 @@ size_t PhysicalDevice::GetFeatures2( if (updateFeatures) { - pExtInfo->ycbcrImageArrays = VK_FALSE; + pExtInfo->ycbcrImageArrays = VK_TRUE; } structSize = sizeof(*pExtInfo); @@ -5652,6 +5805,19 @@ size_t PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES_KHR: + { + auto* pExtInfo = reinterpret_cast(pHeader); + + if (updateFeatures) + { + pExtInfo->maintenance4 = VK_TRUE; + } + + structSize = sizeof(*pExtInfo); + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PAGEABLE_DEVICE_LOCAL_MEMORY_FEATURES_EXT: { auto* pExtInfo = reinterpret_cast(pHeader); @@ -5665,6 +5831,19 @@ size_t PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: + { + auto* pExtInfo = reinterpret_cast(pHeader); + + if (updateFeatures) + { + pExtInfo->indexTypeUint8 = VK_TRUE; + } + + structSize = sizeof(*pExtInfo); + break; + } + default: { // skip any unsupported extension structures @@ -6288,6 +6467,13 @@ void PhysicalDevice::GetDeviceProperties2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES_KHR: + { + auto* pProps = static_cast(pNext); + pProps->maxBufferSize = 2u * 1024u * 1024u * 1024u; // TODO: replace with actual size + break; + } + default: break; } @@ -6304,6 +6490,36 @@ void PhysicalDevice::GetFormatProperties2( VK_ASSERT(pFormatProperties->sType == VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2); GetFormatProperties(format, &pFormatProperties->formatProperties); + void* pNext = pFormatProperties->pNext; + + while (pNext != nullptr) + { + auto* pHeader = static_cast(pNext); + + switch (static_cast(pHeader->sType)) + { + case VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3_KHR: + { + auto* pFormatPropertiesExtended = static_cast(pNext); + + // Replicate flags from pFormatProperties + pFormatPropertiesExtended->linearTilingFeatures = + static_cast(pFormatProperties->formatProperties.linearTilingFeatures); + pFormatPropertiesExtended->optimalTilingFeatures = + static_cast(pFormatProperties->formatProperties.optimalTilingFeatures); + pFormatPropertiesExtended->bufferFeatures = + static_cast(pFormatProperties->formatProperties.bufferFeatures); + + // Query for extended format properties + GetExtendedFormatProperties(format, pFormatPropertiesExtended); + break; + } + default: + break; + } + + pNext = pHeader->pNext; + } } // ===================================================================================================================== @@ -8037,6 +8253,16 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( pTimeDomains); } +// ===================================================================================================================== +VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceToolPropertiesEXT( + VkPhysicalDevice physicalDevice, + uint32_t* pToolCount, + VkPhysicalDeviceToolPropertiesEXT* pToolProperties) +{ + return ApiPhysicalDevice::ObjectFromHandle(physicalDevice)->GetPhysicalDeviceToolPropertiesEXT(pToolCount, + pToolProperties); +} + VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceFragmentShadingRatesKHR( VkPhysicalDevice physicalDevice, uint32* pFragmentShadingRateCount, diff --git a/icd/api/vk_pipeline.cpp b/icd/api/vk_pipeline.cpp index 79ffcaee..270967e5 100644 --- a/icd/api/vk_pipeline.cpp +++ b/icd/api/vk_pipeline.cpp @@ -899,9 +899,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineExecutablePropertiesKHR( constexpr char shaderPreName[] = "ShaderProperties"; // Return the name / description for the pExecutableCount number of executables. - for (uint32 i = 0; - Util::BitMaskScanForward(&i, hwStageMask); - (hwStageMask &= ~(1 << i)) && (outputCount < *pExecutableCount)) + uint32 i = 0; + while (Util::BitMaskScanForward(&i, hwStageMask) && (outputCount < *pExecutableCount)) { // Get an api shader type for the corresponding HW Shader Pal::ShaderType shaderType = GetApiShaderFromHwShader(static_cast(i), apiToHwShader); @@ -936,6 +935,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineExecutablePropertiesKHR( vkShaderStats.computeWorkGroupSize[2]; } + hwStageMask &= ~(1 << i); outputCount++; } diff --git a/icd/api/vk_pipeline_layout.cpp b/icd/api/vk_pipeline_layout.cpp index 28197fdc..e8e7bb8e 100644 --- a/icd/api/vk_pipeline_layout.cpp +++ b/icd/api/vk_pipeline_layout.cpp @@ -28,8 +28,10 @@ * @brief Contains implementation of Vulkan pipeline layout objects. *********************************************************************************************************************** */ -#include "include/vk_pipeline_layout.h" + +#include "include/graphics_pipeline_common.h" #include "include/vk_descriptor_set_layout.h" +#include "include/vk_pipeline_layout.h" #include "include/vk_shader.h" #include "include/vk_sampler.h" #include "include/vk_utils.h" @@ -111,6 +113,81 @@ VkResult PipelineLayout::ConvertCreateInfo( Info* pInfo, PipelineInfo* pPipelineInfo, SetUserDataLayout* pSetUserDataLayouts) +{ + VkResult result = VK_SUCCESS; + uint32_t pushConstantsSizeInBytes = 0; + uint32_t pushConstantsUserDataNodeCount = 0; + const PipelineLayoutScheme scheme = DeterminePipelineLayoutScheme(pDevice, pIn); + + pInfo->setCount = pIn->setLayoutCount; + + ProcessPushConstantsInfo( + pIn, + &pushConstantsSizeInBytes, + &pushConstantsUserDataNodeCount); + + if (scheme == PipelineLayoutScheme::Indirect) + { + result = BuildIndirectSchemeInfo( + pDevice, + pIn, + pushConstantsSizeInBytes, + pInfo, + pPipelineInfo, + pSetUserDataLayouts); + } + else if (scheme == PipelineLayoutScheme::Compact) + { + result = BuildCompactSchemeInfo( + pDevice, + pIn, + pushConstantsSizeInBytes, + pushConstantsUserDataNodeCount, + pInfo, + pPipelineInfo, + pSetUserDataLayouts); + } + else + { + VK_NEVER_CALLED(); + } + + return result; +} + +// ===================================================================================================================== +void PipelineLayout::ProcessPushConstantsInfo( + const VkPipelineLayoutCreateInfo* pIn, + uint32_t* pPushConstantsSizeInBytes, + uint32_t* pPushConstantsUserDataNodeCount) +{ + uint32_t pushConstantsSizeInBytes = 0; + uint32_t pushConstantsUserDataNodeCount = 1; + + for (uint32_t i = 0; i < pIn->pushConstantRangeCount; ++i) + { + const VkPushConstantRange* pRange = &pIn->pPushConstantRanges[i]; + + // Test if this push constant range is active in at least one stage + if (pRange->stageFlags != 0) + { + pushConstantsSizeInBytes = Util::Max(pushConstantsSizeInBytes, pRange->offset + pRange->size); + } + } + + *pPushConstantsSizeInBytes = pushConstantsSizeInBytes; + *pPushConstantsUserDataNodeCount = pushConstantsUserDataNodeCount; +} + +// ===================================================================================================================== +VkResult PipelineLayout::BuildCompactSchemeInfo( + const Device* pDevice, + const VkPipelineLayoutCreateInfo* pIn, + const uint32_t pushConstantsSizeInBytes, + const uint32_t pushConstantsUserDataNodeCount, + Info* pInfo, + PipelineInfo* pPipelineInfo, + SetUserDataLayout* pSetUserDataLayouts) { // We currently allocate user data registers for various resources in the following fashion: // First user data registers will hold the descriptor set bindings in increasing order by set index. @@ -120,39 +197,28 @@ VkResult PipelineLayout::ConvertCreateInfo( // Finally, the vertex buffer table pointer is in the last user data register when applicable. // This allocation allows the descriptor set bindings to easily persist across pipeline switches. - VkResult result = VK_SUCCESS; - - pPipelineInfo->numRsrcMapNodes = 0; - pPipelineInfo->numDescRangeValueNodes = 0; - - // Always allocates: - // 1 extra user data node for the vertex buffer table pointer - // 1 extra user data node for push constant - pPipelineInfo->numUserDataNodes = 2; + VkResult result = VK_SUCCESS; + auto* pUserDataLayout = &pInfo->userDataLayout.compact; - pInfo->setCount = pIn->setLayoutCount; - - pInfo->userDataRegCount = 0; + memset(pPipelineInfo, 0, sizeof(PipelineInfo)); + memset(&(pInfo->userDataLayout), 0, sizeof(UserDataLayout)); + pInfo->userDataLayout.scheme = PipelineLayoutScheme::Compact; - pInfo->userDataLayout.transformFeedbackRegBase = 0; - pInfo->userDataLayout.transformFeedbackRegCount = 0; - pInfo->userDataLayout.pushConstRegBase = 0; - pInfo->userDataLayout.pushConstRegCount = 0; - pInfo->userDataLayout.setBindingRegCount = 0; - pInfo->userDataLayout.setBindingRegBase = 0; + // Always allocates 1 extra user data node for the vertex buffer table pointer + pPipelineInfo->numUserDataNodes = 1; if (pDevice->GetRuntimeSettings().enableEarlyCompile) { // Early compile mode will enable uber-fetch shader and spec constant buffer on vertex shader and // fragment shader implicitly. so we need three reserved node. pPipelineInfo->numUserDataNodes += 3; - pInfo->userDataRegCount += 6; // Each buffer consume 2 user data register now. + pInfo->userDataRegCount += 6; // Each buffer consume 2 user data register now. } else if (pDevice->GetRuntimeSettings().enableUberFetchShader) { // Reserve one user data nodes for uber-fetch shader. pPipelineInfo->numUserDataNodes += 1; - pInfo->userDataRegCount += 2; + pInfo->userDataRegCount += 2; } VK_ASSERT(pIn->setLayoutCount <= MaxDescriptorSets); @@ -161,9 +227,9 @@ VkResult PipelineLayout::ConvertCreateInfo( uint32_t totalDynDescCount = 0; // Populate user data layouts for each descriptor set that is active - pInfo->userDataLayout.setBindingRegBase = pInfo->userDataRegCount; + pUserDataLayout->setBindingRegBase = pInfo->userDataRegCount; - for (uint32_t i = 0; i < pInfo->setCount; ++i) + for (uint32_t i = 0; i < pIn->setLayoutCount; ++i) { SetUserDataLayout* pSetUserData = &pSetUserDataLayouts[i]; @@ -173,7 +239,7 @@ VkResult PipelineLayout::ConvertCreateInfo( pSetUserData->setPtrRegOffset = InvalidReg; pSetUserData->dynDescDataRegOffset = 0; pSetUserData->dynDescCount = setLayoutInfo.numDynamicDescriptors; - pSetUserData->firstRegOffset = pInfo->userDataRegCount - pInfo->userDataLayout.setBindingRegBase; + pSetUserData->firstRegOffset = pInfo->userDataRegCount - pUserDataLayout->setBindingRegBase; pSetUserData->totalRegCount = 0; // Test if this set is active in at least one stage @@ -208,7 +274,7 @@ VkResult PipelineLayout::ConvertCreateInfo( // In this case we also reserve the user data for the set pointer pSetUserData->setPtrRegOffset = pSetUserData->firstRegOffset + pSetUserData->totalRegCount; - pSetUserData->totalRegCount += SetPtrRegCount; + pSetUserData->totalRegCount += SetPtrRegCount; } } @@ -228,37 +294,26 @@ VkResult PipelineLayout::ConvertCreateInfo( } // Calculate total number of user data regs used for active descriptor set data - pInfo->userDataLayout.setBindingRegCount = pInfo->userDataRegCount - pInfo->userDataLayout.setBindingRegBase; + pUserDataLayout->setBindingRegCount = pInfo->userDataRegCount - pUserDataLayout->setBindingRegBase; VK_ASSERT(totalDynDescCount <= MaxDynamicDescriptors); - // Calculate the number of bytes needed for push constants - uint32_t pushConstantsSizeInBytes = 0; - - for (uint32_t i = 0; i < pIn->pushConstantRangeCount; ++i) - { - const VkPushConstantRange* pRange = &pIn->pPushConstantRanges[i]; - - // Test if this push constant range is active in at least one stage - if (pRange->stageFlags != 0) - { - pushConstantsSizeInBytes = Util::Max(pushConstantsSizeInBytes, pRange->offset + pRange->size); - } - } + // Allocate user data for push constants + pPipelineInfo->numUserDataNodes += pushConstantsUserDataNodeCount; uint32_t pushConstRegCount = pushConstantsSizeInBytes / sizeof(uint32_t); - pInfo->userDataLayout.pushConstRegBase = pInfo->userDataRegCount; - pInfo->userDataLayout.pushConstRegCount = pushConstRegCount; - pInfo->userDataRegCount += pushConstRegCount; + pUserDataLayout->pushConstRegBase = pInfo->userDataRegCount; + pUserDataLayout->pushConstRegCount = pushConstRegCount; + pInfo->userDataRegCount += pushConstRegCount; // Reserve an user-data to store the VA of buffer for transform feedback. if (pDevice->IsExtensionEnabled(DeviceExtensions::EXT_TRANSFORM_FEEDBACK)) { - pInfo->userDataLayout.transformFeedbackRegBase = pInfo->userDataRegCount; - pInfo->userDataLayout.transformFeedbackRegCount = 1; - pInfo->userDataRegCount += pInfo->userDataLayout.transformFeedbackRegCount; - pPipelineInfo->numUserDataNodes += 1; + pUserDataLayout->transformFeedbackRegBase = pInfo->userDataRegCount; + pUserDataLayout->transformFeedbackRegCount = 1; + pInfo->userDataRegCount += pUserDataLayout->transformFeedbackRegCount; + pPipelineInfo->numUserDataNodes += 1; } // In case we need an internal vertex buffer table, add nodes required for its entries, and its set pointer. @@ -280,6 +335,173 @@ VkResult PipelineLayout::ConvertCreateInfo( return result; } +// ===================================================================================================================== +VkResult PipelineLayout::BuildIndirectSchemeInfo( + const Device* pDevice, + const VkPipelineLayoutCreateInfo* pIn, + const uint32_t pushConstantsSizeInBytes, + Info* pInfo, + PipelineInfo* pPipelineInfo, + SetUserDataLayout* pSetUserDataLayouts) +{ + // Indirect mode is designed for the case that the pipeline layout only contains part of layout information of the + // final executable pipeline. So that user data is used in a conservative way to make sure that this pipeline layout + // is always compatible with other layouts which may contain the descriptor which is not known currently. + // + // The user data registers for various resources is allocated in the following fashion: + // 1. one user data entry for the vertex buffer table pointer + // 2. one user data entry for the push constant buffer pointer + // 3. one user data entry for transform feedback buffer (if extension is enabled) + // 5. MaxDescriptorSets sets of user data entries which store the information for each descriptor set. Each set + // contains 2 user data entry: the 1st is for the dynamic descriptors and the 2nd is for static descriptors. + // + // TODO: The following features have not been supported by indirect scheme: + // 1. Uber-fetch shader + // 2. PipelineLayoutAngle mode + + VK_ASSERT(pIn->setLayoutCount <= MaxDescriptorSets); + VK_ASSERT(pDevice->GetRuntimeSettings().pipelineLayoutMode != PipelineLayoutAngle); + VK_ASSERT(pDevice->GetRuntimeSettings().enableEarlyCompile == false); + + VkResult result = VK_SUCCESS; + auto* pUserDataLayout = &pInfo->userDataLayout.indirect; + uint32_t totalDynDescCount = 0; + + memset(pPipelineInfo, 0, sizeof(PipelineInfo)); + memset(&(pInfo->userDataLayout), 0, sizeof(UserDataLayout)); + pInfo->userDataLayout.scheme = PipelineLayoutScheme::Indirect; + + VK_ASSERT(totalDynDescCount <= MaxDynamicDescriptors); + + // Allocate user data for vertex buffer table + pPipelineInfo->numUserDataNodes += 1; + pPipelineInfo->numRsrcMapNodes += Pal::MaxVertexBuffers; + pInfo->userDataRegCount += 1; + + // Allocate user data for push constant buffer pointer + pUserDataLayout->pushConstPtrRegBase = pInfo->userDataRegCount; + pUserDataLayout->pushConstSizeInDword = pushConstantsSizeInBytes / sizeof(uint32_t); + pPipelineInfo->numUserDataNodes += 1; + pPipelineInfo->numRsrcMapNodes += 1; + pInfo->userDataRegCount += 1; + + // Allocate user data for transform feedback buffer + if (pDevice->IsExtensionEnabled(DeviceExtensions::EXT_TRANSFORM_FEEDBACK)) + { + pUserDataLayout->transformFeedbackRegBase = pInfo->userDataRegCount; + pPipelineInfo->numUserDataNodes += 1; + pInfo->userDataRegCount += 1; + } + + // Allocate user data for descriptor sets + pUserDataLayout->setBindingPtrRegBase = pInfo->userDataRegCount; + pInfo->userDataRegCount += 2 * SetPtrRegCount * MaxDescriptorSets; + + // This simulate the descriptor set user data register layout of compact scheme + // so as to fill pSetUserDataLayouts[] + // Indirect scheme also need to fill pSetUserDataLayouts[] because we need the data + // in this array to locate the descriptor set data managed by vk::CmdBuffer + uint32_t setBindingCompactRegBase = pUserDataLayout->setBindingPtrRegBase; + + for (uint32_t i = 0; i < pIn->setLayoutCount; ++i) + { + SetUserDataLayout* pSetUserData = &pSetUserDataLayouts[i]; + + const DescriptorSetLayout::CreateInfo& setLayoutInfo = + DescriptorSetLayout::ObjectFromHandle(pIn->pSetLayouts[i])->Info(); + + pSetUserData->setPtrRegOffset = InvalidReg; + pSetUserData->dynDescDataRegOffset = 0; + pSetUserData->dynDescCount = setLayoutInfo.numDynamicDescriptors; + pSetUserData->firstRegOffset = setBindingCompactRegBase - pUserDataLayout->setBindingPtrRegBase; + pSetUserData->totalRegCount = 0; + + if (setLayoutInfo.activeStageMask != 0) + { + // Add space for static descriptors + if (setLayoutInfo.sta.numRsrcMapNodes > 0) + { + pPipelineInfo->numUserDataNodes += 1; + pPipelineInfo->numRsrcMapNodes += setLayoutInfo.sta.numRsrcMapNodes; + + // Add count for FMASK nodes + if (pDevice->GetRuntimeSettings().enableFmaskBasedMsaaRead) + { + pPipelineInfo->numRsrcMapNodes += setLayoutInfo.sta.numRsrcMapNodes; + } + } + + // Add space for immutable sampler descriptor storage needed by the set + pPipelineInfo->numDescRangeValueNodes += setLayoutInfo.imm.numDescriptorValueNodes; + + // Add space for dynamic descriptors + if (setLayoutInfo.dyn.numRsrcMapNodes > 0) + { + pPipelineInfo->numUserDataNodes += 1; + pPipelineInfo->numRsrcMapNodes += setLayoutInfo.dyn.numRsrcMapNodes; + totalDynDescCount += setLayoutInfo.numDynamicDescriptors; + } + + // Fill set user data layout + pSetUserData->dynDescDataRegOffset = pSetUserData->firstRegOffset + pSetUserData->totalRegCount; + pSetUserData->totalRegCount += + pSetUserData->dynDescCount * DescriptorSetLayout::GetDynamicBufferDescDwSize(pDevice); + if (setLayoutInfo.sta.numRsrcMapNodes > 0) + { + pSetUserData->setPtrRegOffset = pSetUserData->firstRegOffset + pSetUserData->totalRegCount; + pSetUserData->totalRegCount += SetPtrRegCount; + } + } + + setBindingCompactRegBase += pSetUserData->totalRegCount; + } + + // Calculate the buffer size necessary for all resource mapping + pPipelineInfo->mappingBufferSize = + (pPipelineInfo->numUserDataNodes * GetMaxResMappingRootNodeSize()) + + (pPipelineInfo->numRsrcMapNodes * GetMaxResMappingNodeSize() ) + + (pPipelineInfo->numDescRangeValueNodes * GetMaxStaticDescValueSize() ); + + // If we go past our user data limit, we can't support this pipeline + if (pInfo->userDataRegCount >= + pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().gfxipProperties.maxUserDataEntries) + { + result = VK_ERROR_INITIALIZATION_FAILED; + } + + return result; +} + +// ===================================================================================================================== +PipelineLayoutScheme PipelineLayout::DeterminePipelineLayoutScheme( + const Device* pDevice, + const VkPipelineLayoutCreateInfo* pIn) +{ + PipelineLayoutScheme scheme = PipelineLayoutScheme::Compact; + + const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); + + switch (settings.pipelineLayoutSchemeSelectionStrategy) + { + case AppControlled: + { + scheme = PipelineLayoutScheme::Compact; + } + break; + case ForceCompact: + scheme = PipelineLayoutScheme::Compact; + break; + case ForceIndirect: + scheme = PipelineLayoutScheme::Indirect; + break; + default: + VK_NEVER_CALLED(); + break; + } + + return scheme; +} + // ===================================================================================================================== // Creates a pipeline layout object. VkResult PipelineLayout::Create( @@ -447,7 +669,7 @@ VkResult PipelineLayout::Create( pushConstantRange.size = Util::Max(pushConstantRange.size, - layoutInfo.userDataLayout.pushConstRegCount * sizeof(uint32_t)); + layoutInfo.userDataLayout.compact.pushConstRegCount * sizeof(uint32_t)); } } @@ -585,7 +807,6 @@ Vkgc::ResourceMappingNodeType PipelineLayout::MapLlpcResourceNodeType( nodeType = Vkgc::ResourceMappingNodeType::DescriptorResource; break; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 49 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: nodeType = Vkgc::ResourceMappingNodeType::DescriptorImage; break; @@ -598,21 +819,6 @@ Vkgc::ResourceMappingNodeType PipelineLayout::MapLlpcResourceNodeType( case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: nodeType = Vkgc::ResourceMappingNodeType::DescriptorConstBufferCompact; break; -#else - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - nodeType = Vkgc::ResourceMappingNodeType::DescriptorResource; - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - nodeType = Vkgc::ResourceMappingNodeType::DescriptorTexelBuffer; - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - nodeType = Vkgc::ResourceMappingNodeType::DescriptorBuffer; - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: - nodeType = Vkgc::ResourceMappingNodeType::DescriptorBufferCompact; - break; -#endif - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: nodeType = Vkgc::ResourceMappingNodeType::DescriptorTexelBuffer; break; @@ -709,15 +915,9 @@ VkResult PipelineLayout::BuildLlpcSetMapping( } else { -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 49 pNode->node.type = (binding.dyn.dwArrayStride == 2) ? Vkgc::ResourceMappingNodeType::DescriptorConstBufferCompact: Vkgc::ResourceMappingNodeType::DescriptorConstBuffer; -#else - pNode->node.type = (binding.dyn.dwArrayStride == 2) ? - Vkgc::ResourceMappingNodeType::DescriptorBufferCompact : - Vkgc::ResourceMappingNodeType::DescriptorBuffer; -#endif } pNode->node.offsetInDwords = userDataRegBase + binding.dyn.dwOffset; pNode->node.sizeInDwords = binding.dyn.dwSize; @@ -732,16 +932,240 @@ VkResult PipelineLayout::BuildLlpcSetMapping( } // ===================================================================================================================== -// This function populates the resource mapping node details to the shader-stage specific pipeline info structure. -VkResult PipelineLayout::BuildLlpcPipelineMapping( +// Builds the VKGC resource mapping nodes for the static descriptors in a descriptor set +void PipelineLayout::BuildLlpcStaticSetMapping( + const DescriptorSetLayout* pLayout, + const uint32_t visibility, + const uint32_t setIndex, + Vkgc::ResourceMappingNode* pNodes, + uint32_t* pNodeCount, + Vkgc::StaticDescriptorValue* pDescriptorRangeValue, + uint32_t* pDescriptorRangeCount + ) const +{ + *pNodeCount = 0; + *pDescriptorRangeCount = 0; + + for (uint32_t bindingIndex = 0; bindingIndex < pLayout->Info().count; ++bindingIndex) + { + const DescriptorSetLayout::BindingInfo& binding = pLayout->Binding(bindingIndex); + + if (binding.sta.dwSize > 0) + { + Vkgc::ResourceMappingNode* pNode = pNodes + *pNodeCount; + + pNode->type = MapLlpcResourceNodeType(binding.info.descriptorType); + pNode->offsetInDwords = binding.sta.dwOffset; + pNode->sizeInDwords = binding.sta.dwSize; + pNode->srdRange.binding = binding.info.binding; + pNode->srdRange.set = + setIndex; + (*pNodeCount)++; + + if (binding.imm.dwSize > 0) + { + const uint32_t arraySize = binding.imm.dwSize / binding.imm.dwArrayStride; + const uint32_t* pImmutableSamplerData = pLayout->Info().imm.pImmutableSamplerData + + binding.imm.dwOffset; + + if (binding.bindingFlags.ycbcrConversionUsage == 0) + { + pDescriptorRangeValue->type = Vkgc::ResourceMappingNodeType::DescriptorSampler; + } + else + { + pNode->type = Vkgc::ResourceMappingNodeType::DescriptorYCbCrSampler; + pDescriptorRangeValue->type = Vkgc::ResourceMappingNodeType::DescriptorYCbCrSampler; + } + + pDescriptorRangeValue->set = setIndex; + pDescriptorRangeValue->binding = binding.info.binding; + pDescriptorRangeValue->pValue = pImmutableSamplerData; + pDescriptorRangeValue->arraySize = arraySize; + pDescriptorRangeValue->visibility = visibility; + ++pDescriptorRangeValue; + ++(*pDescriptorRangeCount); + } + } + } +} + +// ===================================================================================================================== +// Fill a root resource mapping node for a dynamic descriptor node +template <> +void PipelineLayout::FillDynamicSetNode( + const Vkgc::ResourceMappingNodeType type, + const uint32_t visibility, + const uint32_t setIndex, + const uint32_t bindingIndex, + const uint32_t offsetInDwords, + const uint32_t sizeInDwords, + const uint32_t userDataRegBase, + Vkgc::ResourceMappingRootNode* pNode + ) const +{ + pNode->node.type = type; + pNode->node.offsetInDwords = userDataRegBase + offsetInDwords; + pNode->node.sizeInDwords = sizeInDwords; + pNode->node.srdRange.binding = bindingIndex; + pNode->node.srdRange.set = setIndex; + pNode->visibility = visibility; +} + +// ===================================================================================================================== +// Fill a normal resource mapping node for a dynamic descriptor node +template <> +void PipelineLayout::FillDynamicSetNode( + const Vkgc::ResourceMappingNodeType type, + const uint32_t visibility, + const uint32_t setIndex, + const uint32_t bindingIndex, + const uint32_t offsetInDwords, + const uint32_t sizeInDwords, + const uint32_t userDataRegBase, + Vkgc::ResourceMappingNode* pNode + ) const +{ + pNode->type = type; + pNode->offsetInDwords = offsetInDwords; + pNode->sizeInDwords = sizeInDwords; + pNode->srdRange.binding = bindingIndex; + pNode->srdRange.set = setIndex; +} + +// ===================================================================================================================== +// Builds the VKGC resource mapping nodes for the dynamic descriptors in a descriptor set +template +void PipelineLayout::BuildLlpcDynamicSetMapping( + const DescriptorSetLayout* pLayout, + const uint32_t visibility, + const uint32_t setIndex, + const uint32_t userDataRegBase, + NodeType* pNodes, + uint32_t* pNodeCount + ) const +{ + static_assert(std::is_same::value || + std::is_same::value, + "Unexpected resouce mapping node type!"); + + *pNodeCount = 0; + + for (uint32_t bindingIndex = 0; bindingIndex < pLayout->Info().count; ++bindingIndex) + { + const DescriptorSetLayout::BindingInfo& binding = pLayout->Binding(bindingIndex); + + if (binding.dyn.dwSize > 0) + { + VK_ASSERT((binding.info.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) || + (binding.info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)); + + Vkgc::ResourceMappingNodeType nodeType = Vkgc::ResourceMappingNodeType::Unknown; + if (binding.info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + { + nodeType = (binding.dyn.dwArrayStride == 2) ? + Vkgc::ResourceMappingNodeType::DescriptorBufferCompact : + Vkgc::ResourceMappingNodeType::DescriptorBuffer; + + } + else + { + nodeType = (binding.dyn.dwArrayStride == 2) ? + Vkgc::ResourceMappingNodeType::DescriptorConstBufferCompact : + Vkgc::ResourceMappingNodeType::DescriptorConstBuffer; + } + + FillDynamicSetNode( + nodeType, + visibility, + setIndex, + binding.info.binding, + binding.dyn.dwOffset, + binding.dyn.dwSize, + userDataRegBase, + pNodes + *pNodeCount); + + (*pNodeCount)++; + } + } +} + +// ===================================================================================================================== +// Builds the VKGC resource mapping nodes for vertex buffer table +void PipelineLayout::BuildLlpcVertexBufferTableMapping( + const VbBindingInfo* pVbInfo, + const uint32_t offsetInDwords, + const uint32_t sizeInDwords, + Vkgc::ResourceMappingRootNode* pNode, + uint32_t* pNodeCount + ) const +{ + *pNodeCount = 0; + + if (pVbInfo != nullptr) + { + // Build the table description itself + const uint32_t srdDwSize = m_pDevice->GetProperties().descriptorSizes.bufferView / sizeof(uint32_t); + const uint32_t vbTableSize = pVbInfo->bindingTableSize * srdDwSize; + + // Add the set pointer node pointing to this table + pNode->node.type = Vkgc::ResourceMappingNodeType::IndirectUserDataVaPtr; + pNode->node.offsetInDwords = offsetInDwords; + pNode->node.sizeInDwords = sizeInDwords; + pNode->node.userDataPtr.sizeInDwords = vbTableSize; + pNode->visibility = Vkgc::ShaderStageVertexBit; + + *pNodeCount = 1; + } +} + +// ===================================================================================================================== +// Builds the VKGC resource mapping nodes for transform feedback buffer +void PipelineLayout::BuildLlpcTransformFeedbackMapping( + const uint32_t stageMask, + const uint32_t offsetInDwords, + const uint32_t sizeInDwords, + Vkgc::ResourceMappingRootNode* pNode, + uint32_t* pNodeCount + ) const +{ + uint32_t xfbStages = (stageMask & (Vkgc::ShaderStageFragmentBit - 1)) >> 1; + uint32_t lastXfbStageBit = Vkgc::ShaderStageVertexBit; + + *pNodeCount = 0; + + while (xfbStages > 0) + { + lastXfbStageBit <<= 1; + xfbStages >>= 1; + } + + if (lastXfbStageBit != 0) + { + pNode->node.type = Vkgc::ResourceMappingNodeType::StreamOutTableVaPtr; + pNode->node.offsetInDwords = offsetInDwords; + pNode->node.sizeInDwords = sizeInDwords; + pNode->visibility = lastXfbStageBit; + + *pNodeCount = 1; + } +} + +// ===================================================================================================================== +// Populates the resouce mapping nodes in compact scheme +VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( const uint32_t stageMask, - VbBindingInfo* pVbInfo, + const VbBindingInfo* pVbInfo, + const bool appendFetchShaderCb, void* pBuffer, - bool appendFetchShaderCb, Vkgc::ResourceMappingData* pResourceMapping ) const { - VkResult result = VK_SUCCESS; + VK_ASSERT(m_info.userDataLayout.scheme == PipelineLayoutScheme::Compact); + + VkResult result = VK_SUCCESS; + const auto& userDataLayout = m_info.userDataLayout.compact; + Vkgc::ResourceMappingRootNode* pUserDataNodes = static_cast(pBuffer); Vkgc::ResourceMappingNode* pResourceNodes = reinterpret_cast(pUserDataNodes + m_pipelineInfo.numUserDataNodes); @@ -798,7 +1222,7 @@ VkResult PipelineLayout::BuildLlpcPipelineMapping( } // Build descriptor for each set - for (uint32_t setIndex = 0; (setIndex < m_info.setCount) && (result == VK_SUCCESS); ++setIndex) + for (uint32_t setIndex = 0; setIndex < m_info.setCount; ++setIndex) { const auto pSetUserData = &GetSetUserData(setIndex); const auto pSetLayout = GetSetLayouts(setIndex); @@ -816,17 +1240,22 @@ VkResult PipelineLayout::BuildLlpcPipelineMapping( uint32_t staNodeCount = 0; uint32_t descRangeCount = 0; - result = BuildLlpcSetMapping( + BuildLlpcDynamicSetMapping( + pSetLayout, visibility, setIndex, - pSetLayout, + userDataLayout.setBindingRegBase + pSetUserData->dynDescDataRegOffset, pDynNodes, - &dynNodeCount, + &dynNodeCount); + + BuildLlpcStaticSetMapping( + pSetLayout, + visibility, + setIndex, pStaNodes, &staNodeCount, pDescValues, - &descRangeCount, - m_info.userDataLayout.setBindingRegBase + pSetUserData->dynDescDataRegOffset); + &descRangeCount); // Increase the number of mapping nodes used by the number of static section nodes added. mappingNodeCount += staNodeCount; @@ -843,7 +1272,7 @@ VkResult PipelineLayout::BuildLlpcPipelineMapping( auto pSetPtrNode = &pUserDataNodes[userDataNodeCount]; pSetPtrNode->node.type = Vkgc::ResourceMappingNodeType::DescriptorTableVaPtr; - pSetPtrNode->node.offsetInDwords = m_info.userDataLayout.setBindingRegBase + + pSetPtrNode->node.offsetInDwords = userDataLayout.setBindingRegBase + pSetUserData->setPtrRegOffset; pSetPtrNode->node.sizeInDwords = SetPtrRegCount; pSetPtrNode->node.tablePtr.nodeCount = staNodeCount; @@ -855,41 +1284,33 @@ VkResult PipelineLayout::BuildLlpcPipelineMapping( } // TODO: Build the internal push constant resource mapping - if (m_info.userDataLayout.pushConstRegCount > 0) + if (userDataLayout.pushConstRegCount > 0) { auto pPushConstNode = &pUserDataNodes[userDataNodeCount]; pPushConstNode->node.type = Vkgc::ResourceMappingNodeType::PushConst; - pPushConstNode->node.offsetInDwords = m_info.userDataLayout.pushConstRegBase; - pPushConstNode->node.sizeInDwords = m_info.userDataLayout.pushConstRegCount; + pPushConstNode->node.offsetInDwords = userDataLayout.pushConstRegBase; + pPushConstNode->node.sizeInDwords = userDataLayout.pushConstRegCount; pPushConstNode->node.srdRange.set = Vkgc::InternalDescriptorSetId; pPushConstNode->visibility = stageMask; userDataNodeCount += 1; } - if (m_info.userDataLayout.transformFeedbackRegCount > 0) + if (userDataLayout.transformFeedbackRegCount > 0) { - uint32_t xfbStages = (stageMask & (Vkgc::ShaderStageFragmentBit - 1)) >> 1; - uint32_t lastXfbStageBit = Vkgc::ShaderStageVertexBit; - - while (xfbStages > 0) - { - lastXfbStageBit <<= 1; - xfbStages >>= 1; - } + uint32_t nodeCount; - if (lastXfbStageBit != 0) - { - auto pTransformFeedbackNode = &pUserDataNodes[userDataNodeCount]; - pTransformFeedbackNode->node.type = Vkgc::ResourceMappingNodeType::StreamOutTableVaPtr; - pTransformFeedbackNode->node.offsetInDwords = m_info.userDataLayout.transformFeedbackRegBase; - pTransformFeedbackNode->node.sizeInDwords = m_info.userDataLayout.transformFeedbackRegCount; - pTransformFeedbackNode->visibility = lastXfbStageBit; + BuildLlpcTransformFeedbackMapping( + stageMask, + userDataLayout.transformFeedbackRegBase, + userDataLayout.transformFeedbackRegCount, + &pUserDataNodes[userDataNodeCount], + &nodeCount); - userDataNodeCount += 1; - } + userDataNodeCount += nodeCount; } - if ((result == VK_SUCCESS) && (pVbInfo != nullptr)) + + if (pVbInfo != nullptr) { // Build the internal vertex buffer table mapping constexpr uint32_t VbTablePtrRegCount = 1; // PAL requires all indirect user data tables to be 1DW @@ -897,22 +1318,12 @@ VkResult PipelineLayout::BuildLlpcPipelineMapping( if ((m_info.userDataRegCount + VbTablePtrRegCount) <= m_pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().gfxipProperties.maxUserDataEntries) { - VK_ASSERT(pVbInfo != nullptr); - - // Build the table description itself - const uint32_t srdDwSize = m_pDevice->GetProperties().descriptorSizes.bufferView / sizeof(uint32_t); - uint32_t vbTableSize = pVbInfo->bindingTableSize * srdDwSize; + uint32_t nodeCount; - // Add the set pointer node pointing to this table - auto pVbTblPtrNode = &pUserDataNodes[userDataNodeCount]; + BuildLlpcVertexBufferTableMapping( + pVbInfo, m_info.userDataRegCount, VbTablePtrRegCount, &pUserDataNodes[userDataNodeCount], &nodeCount); - pVbTblPtrNode->node.type = Vkgc::ResourceMappingNodeType::IndirectUserDataVaPtr; - pVbTblPtrNode->node.offsetInDwords = m_info.userDataRegCount; - pVbTblPtrNode->node.sizeInDwords = VbTablePtrRegCount; - pVbTblPtrNode->node.userDataPtr.sizeInDwords = vbTableSize; - pVbTblPtrNode->visibility = Vkgc::ShaderStageVertexBit; - - userDataNodeCount += 1; + userDataNodeCount += nodeCount; } else { @@ -921,8 +1332,8 @@ VkResult PipelineLayout::BuildLlpcPipelineMapping( } // If you hit these assert, we precomputed an insufficient amount of scratch space during layout creation. - VK_ASSERT(userDataNodeCount <= m_pipelineInfo.numUserDataNodes); - VK_ASSERT(mappingNodeCount <= m_pipelineInfo.numRsrcMapNodes); + VK_ASSERT(userDataNodeCount <= m_pipelineInfo.numUserDataNodes); + VK_ASSERT(mappingNodeCount <= m_pipelineInfo.numRsrcMapNodes); VK_ASSERT(descriptorRangeCount <= m_pipelineInfo.numDescRangeValueNodes); pResourceMapping->pUserDataNodes = pUserDataNodes; @@ -930,7 +1341,201 @@ VkResult PipelineLayout::BuildLlpcPipelineMapping( pResourceMapping->pStaticDescriptorValues = pDescriptorRangeValues; pResourceMapping->staticDescriptorValueCount = descriptorRangeCount; - return VK_SUCCESS; + return result; +} + +// ===================================================================================================================== +// Populates the resouce mapping nodes in indirect scheme +void PipelineLayout::BuildIndirectSchemeLlpcPipelineMapping( + const uint32_t stageMask, + const VbBindingInfo* pVbInfo, + void* pBuffer, + Vkgc::ResourceMappingData* pResourceMapping + ) const +{ + VK_ASSERT(m_info.userDataLayout.scheme == PipelineLayoutScheme::Indirect); + + constexpr uint32_t VbTablePtrRegCount = 1; // PAL requires all indirect user data tables to be 1DW + constexpr uint32_t PushConstPtrRegCount = 1; + constexpr uint32_t TransformFeedbackRegCount = 1; + constexpr uint32_t DescSetsPtrRegCount = 2 * SetPtrRegCount * MaxDescriptorSets; + + const bool transformFeedbackEnabled = + m_pDevice->IsExtensionEnabled(DeviceExtensions::EXT_TRANSFORM_FEEDBACK); + + const uint32_t vbTablePtrRegBase = 0; + const uint32_t pushConstPtrRegBase = vbTablePtrRegBase + VbTablePtrRegCount; + const uint32_t transformFeedbackRegBase = + (transformFeedbackEnabled == false) ? InvalidReg : (pushConstPtrRegBase + PushConstPtrRegCount); + const uint32_t setBindingPtrRegBase = + transformFeedbackEnabled ? (transformFeedbackRegBase + TransformFeedbackRegCount) : + (pushConstPtrRegBase + PushConstPtrRegCount); + + const auto& userDataLayout = m_info.userDataLayout.indirect; + + Vkgc::ResourceMappingRootNode* pUserDataNodes = static_cast(pBuffer); + Vkgc::ResourceMappingNode* pResourceNodes = + reinterpret_cast(pUserDataNodes + m_pipelineInfo.numUserDataNodes); + Vkgc::StaticDescriptorValue* pDescriptorRangeValues = + reinterpret_cast(pResourceNodes + m_pipelineInfo.numRsrcMapNodes); + + uint32_t userDataNodeCount = 0; // Number of consumed ResourceMappingRootNodes + uint32_t mappingNodeCount = 0; // Number of consumed ResourceMappingNodes (only sub-nodes) + uint32_t descriptorRangeCount = 0; // Number of consumed StaticResourceValues + + // Build the internal vertex buffer table mapping + if (pVbInfo != nullptr) + { + uint32_t nodeCount; + + BuildLlpcVertexBufferTableMapping( + pVbInfo, vbTablePtrRegBase, VbTablePtrRegCount, &pUserDataNodes[userDataNodeCount], &nodeCount); + + userDataNodeCount += nodeCount; + } + + // Build push constants mapping + if (userDataLayout.pushConstSizeInDword > 0) + { + // Build mapping for push constant resouce + Vkgc::ResourceMappingNode* pPushConstNode = &pResourceNodes[mappingNodeCount]; + + pPushConstNode->type = Vkgc::ResourceMappingNodeType::PushConst; + pPushConstNode->offsetInDwords = 0; + pPushConstNode->sizeInDwords = userDataLayout.pushConstSizeInDword; + pPushConstNode->srdRange.set = Vkgc::InternalDescriptorSetId; + + ++mappingNodeCount; + + // Build mapping for the pointer pointing to push constants buffer + Vkgc::ResourceMappingRootNode* pPushConstPtrNode = &pUserDataNodes[userDataNodeCount]; + + pPushConstPtrNode->node.type = Vkgc::ResourceMappingNodeType::DescriptorTableVaPtr; + pPushConstPtrNode->node.offsetInDwords = pushConstPtrRegBase; + pPushConstPtrNode->node.sizeInDwords = PushConstPtrRegCount; + pPushConstPtrNode->node.tablePtr.nodeCount = 1; + pPushConstPtrNode->node.tablePtr.pNext = pPushConstNode; + pPushConstPtrNode->visibility = stageMask; + + userDataNodeCount += 1; + } + + // Build transform feedback buffer mapping + if (transformFeedbackEnabled) + { + uint32_t nodeCount; + + BuildLlpcTransformFeedbackMapping( + stageMask, + transformFeedbackRegBase, + TransformFeedbackRegCount, + &pUserDataNodes[userDataNodeCount], + &nodeCount); + + userDataNodeCount += nodeCount; + } + + // Build mapping for each set of descriptors + VK_ASSERT(setBindingPtrRegBase == userDataLayout.setBindingPtrRegBase); + + for (uint32_t setIndex = 0; setIndex < m_info.setCount; ++setIndex) + { + const DescriptorSetLayout* pSetLayout = GetSetLayouts(setIndex); + + const uint32_t visibility = stageMask & VkToVkgcShaderStageMask(pSetLayout->Info().activeStageMask); + + if (visibility != 0) + { + uint32_t dynNodeCount = 0; + uint32_t staNodeCount = 0; + uint32_t descRangeCount = 0; + + Vkgc::ResourceMappingNode* pDynNodes = &pResourceNodes[mappingNodeCount]; + BuildLlpcDynamicSetMapping( + pSetLayout, visibility, setIndex, 0, pDynNodes, &dynNodeCount); + + Vkgc::ResourceMappingNode* pStaNodes = &pResourceNodes[mappingNodeCount + dynNodeCount]; + Vkgc::StaticDescriptorValue* pDescValues = &pDescriptorRangeValues[descriptorRangeCount]; + BuildLlpcStaticSetMapping( + pSetLayout, visibility, setIndex, pStaNodes, &staNodeCount, pDescValues, &descRangeCount); + + // Increase the number of mapping nodes used by the number of static section nodes added. + mappingNodeCount += (dynNodeCount + staNodeCount); + + // Increase the number of descriptor range value nodes used by immutable samplers + descriptorRangeCount += descRangeCount; + + // Add a top-level user data node entry for dynamic nodes. + if (pSetLayout->Info().dyn.numRsrcMapNodes > 0) + { + Vkgc::ResourceMappingRootNode* pSetPtrNode = &pUserDataNodes[userDataNodeCount]; + + pSetPtrNode->node.type = Vkgc::ResourceMappingNodeType::DescriptorTableVaPtr; + pSetPtrNode->node.offsetInDwords = 2 * setIndex * SetPtrRegCount + setBindingPtrRegBase; + pSetPtrNode->node.sizeInDwords = SetPtrRegCount; + pSetPtrNode->node.tablePtr.nodeCount = dynNodeCount; + pSetPtrNode->node.tablePtr.pNext = pDynNodes; + pSetPtrNode->visibility = visibility; + + ++userDataNodeCount; + } + + // Add a top-level user data node entry for static nodes. + if (pSetLayout->Info().sta.numRsrcMapNodes > 0) + { + Vkgc::ResourceMappingRootNode* pSetPtrNode = &pUserDataNodes[userDataNodeCount]; + + pSetPtrNode->node.type = Vkgc::ResourceMappingNodeType::DescriptorTableVaPtr; + pSetPtrNode->node.offsetInDwords = (2 * setIndex + 1) * SetPtrRegCount + setBindingPtrRegBase; + pSetPtrNode->node.sizeInDwords = SetPtrRegCount; + pSetPtrNode->node.tablePtr.nodeCount = staNodeCount; + pSetPtrNode->node.tablePtr.pNext = pStaNodes; + pSetPtrNode->visibility = visibility; + + ++userDataNodeCount; + } + } + } + + // If you hit these assert, we precomputed an insufficient amount of scratch space during layout creation. + VK_ASSERT(userDataNodeCount <= m_pipelineInfo.numUserDataNodes); + VK_ASSERT(mappingNodeCount <= m_pipelineInfo.numRsrcMapNodes); + VK_ASSERT(descriptorRangeCount <= m_pipelineInfo.numDescRangeValueNodes); + + pResourceMapping->pUserDataNodes = pUserDataNodes; + pResourceMapping->userDataNodeCount = userDataNodeCount; + pResourceMapping->pStaticDescriptorValues = pDescriptorRangeValues; + pResourceMapping->staticDescriptorValueCount = descriptorRangeCount; +} + +// ===================================================================================================================== +// This function populates the resource mapping node details to the shader-stage specific pipeline info structure. +VkResult PipelineLayout::BuildLlpcPipelineMapping( + const uint32_t stageMask, + const VbBindingInfo* pVbInfo, + const bool appendFetchShaderCb, + void* pBuffer, + Vkgc::ResourceMappingData* pResourceMapping + ) const +{ + VkResult result = VK_SUCCESS; + + if (m_info.userDataLayout.scheme == PipelineLayoutScheme::Compact) + { + result = BuildCompactSchemeLlpcPipelineMapping( + stageMask, pVbInfo, appendFetchShaderCb, pBuffer, pResourceMapping); + } + else if (m_info.userDataLayout.scheme == PipelineLayoutScheme::Indirect) + { + BuildIndirectSchemeLlpcPipelineMapping( + stageMask, pVbInfo, pBuffer, pResourceMapping); + } + else + { + VK_NEVER_CALLED(); + } + + return result; } // ===================================================================================================================== diff --git a/icd/api/vk_query.cpp b/icd/api/vk_query.cpp index aaeeadbb..faff6804 100644 --- a/icd/api/vk_query.cpp +++ b/icd/api/vk_query.cpp @@ -37,6 +37,7 @@ #include "palAutoBuffer.h" #include "palQueryPool.h" +#include "palSysUtil.h" namespace vk { diff --git a/icd/api/vk_queue.cpp b/icd/api/vk_queue.cpp index 962d0cdc..e7585561 100644 --- a/icd/api/vk_queue.cpp +++ b/icd/api/vk_queue.cpp @@ -1056,7 +1056,10 @@ VkResult Queue::Present( } // Fill in present information and obtain the PAL memory of the presentable image. - Pal::IGpuMemory* pGpuMemory = pSwapChain->UpdatePresentInfo(presentationDeviceIdx, imageIndex, &presentInfo); + Pal::IGpuMemory* pGpuMemory = pSwapChain->UpdatePresentInfo(presentationDeviceIdx, + imageIndex, + &presentInfo, + m_flipStatus.flipFlags); CmdBufState* pCmdBufState = AcquireInternalCmdBuf(presentationDeviceIdx); diff --git a/icd/api/vk_sampler.cpp b/icd/api/vk_sampler.cpp index d271742b..2ddc8952 100644 --- a/icd/api/vk_sampler.cpp +++ b/icd/api/vk_sampler.cpp @@ -63,10 +63,11 @@ uint64_t Sampler::BuildApiHash( { union { - const VkStructHeader* pInfo; - const VkSamplerYcbcrConversionInfo* pYCbCrConversionInfo; - const VkSamplerReductionModeCreateInfo* pReductionModeCreateInfo; - const VkSamplerCustomBorderColorCreateInfoEXT* pVkSamplerCustomBorderColorCreateInfoEXT; + const VkStructHeader* pInfo; + const VkSamplerYcbcrConversionInfo* pYCbCrConversionInfo; + const VkSamplerReductionModeCreateInfo* pReductionModeCreateInfo; + const VkSamplerCustomBorderColorCreateInfoEXT* pVkSamplerCustomBorderColorCreateInfoEXT; + const VkSamplerBorderColorComponentMappingCreateInfoEXT* pVkSamplerBorderColorComponentMappingCreateInfoEXT; }; pInfo = static_cast(pCreateInfo->pNext); @@ -97,6 +98,11 @@ uint64_t Sampler::BuildApiHash( hasher.Update(pVkSamplerCustomBorderColorCreateInfoEXT->customBorderColor); hasher.Update(pVkSamplerCustomBorderColorCreateInfoEXT->format); break; + case VK_STRUCTURE_TYPE_SAMPLER_BORDER_COLOR_COMPONENT_MAPPING_CREATE_INFO_EXT: + hasher.Update(pVkSamplerBorderColorComponentMappingCreateInfoEXT->sType); + hasher.Update(pVkSamplerBorderColorComponentMappingCreateInfoEXT->components); + hasher.Update(pVkSamplerBorderColorComponentMappingCreateInfoEXT->srgb); + break; default: break; } diff --git a/icd/api/vk_shader.cpp b/icd/api/vk_shader.cpp index e6b9a7be..4a226a8f 100644 --- a/icd/api/vk_shader.cpp +++ b/icd/api/vk_shader.cpp @@ -191,11 +191,15 @@ VkResult ShaderModule::Create( VkResult ShaderModule::Init(const Device* pDevice, VkShaderModuleCreateFlags flags) { PipelineCompiler* pCompiler = pDevice->GetCompiler(DefaultDeviceIndex); - return pCompiler->BuildShaderModule(pDevice, - flags, - m_codeSize, - m_pCode, - &m_handle); + + VkResult result = pCompiler->BuildShaderModule(pDevice, flags, m_codeSize, m_pCode, &m_handle); + + if (result == VK_SUCCESS) + { + pCompiler->TryEarlyCompileShaderModule(pDevice, &m_handle); + } + + return result; } // ===================================================================================================================== diff --git a/icd/api/vk_swapchain.cpp b/icd/api/vk_swapchain.cpp index 8e1c91d6..f3963721 100644 --- a/icd/api/vk_swapchain.cpp +++ b/icd/api/vk_swapchain.cpp @@ -53,8 +53,6 @@ namespace vk { - // Default to true - bool SwapChain::s_forceTurboSyncEnable = true; static bool EnableFullScreen( const Device* pDevice, @@ -103,8 +101,15 @@ VkResult SwapChain::Create( VkResult result = VK_SUCCESS; + const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); Properties properties = {}; + // the old swapchain should be flaged as deprecated no matter whether the new swapchain is created successfully. + if (pCreateInfo->oldSwapchain != VK_NULL_HANDLE) + { + SwapChain::ObjectFromHandle(pCreateInfo->oldSwapchain)->MarkAsDeprecated(pAllocator); + } + uint32 viewFormatCount = 0; const VkFormat* pViewFormats = nullptr; @@ -118,8 +123,7 @@ VkResult SwapChain::Create( // The swap chain is stereo if imageArraySize is 2 properties.flags.stereo = (pCreateInfo->imageArrayLayers == 2) ? 1 : 0; - properties.imageCreateInfo.swizzledFormat = VkToPalFormat(pCreateInfo->imageFormat, - pDevice->GetRuntimeSettings()); + properties.imageCreateInfo.swizzledFormat = VkToPalFormat(pCreateInfo->imageFormat, settings); properties.imageCreateInfo.flags.stereo = properties.flags.stereo; properties.imageCreateInfo.flags.peerWritable = (pDevice->NumPalDevices() > 1) ? 1 : 0; @@ -141,8 +145,8 @@ VkResult SwapChain::Create( // The swapchain image can be used as a blit source for driver post processing on present. properties.imageCreateInfo.usage.shaderRead = 1; - if ((pDevice->GetRuntimeSettings().disableDisplayDcc == DisplayableDcc::DisplayableDccDisabled) || - ((pDevice->GetRuntimeSettings().disableDisplayDcc == DisplayableDcc::DisplayableDccDisabledForMgpu) && + if ((settings.disableDisplayDcc == DisplayableDcc::DisplayableDccDisabled) || + ((settings.disableDisplayDcc == DisplayableDcc::DisplayableDccDisabledForMgpu) && (pDevice->IsMultiGpu()))) { properties.imageCreateInfo.usage.disableOptimizedDisplay = 1; @@ -202,8 +206,7 @@ VkResult SwapChain::Create( // expects that to be excluded from the list. if (pViewFormats[i] != pCreateInfo->imageFormat) { - palFormatList[properties.imageCreateInfo.viewFormatCount++] = VkToPalFormat(pViewFormats[i], - pDevice->GetRuntimeSettings()); + palFormatList[properties.imageCreateInfo.viewFormatCount++] = VkToPalFormat(pViewFormats[i], settings); } } } @@ -244,8 +247,12 @@ VkResult SwapChain::Create( swapChainCreateInfo.compositeAlpha = VkToPalCompositeAlphaMode(pCreateInfo->compositeAlpha); swapChainCreateInfo.imageArraySize = 1; swapChainCreateInfo.swapChainMode = VkToPalSwapChainMode(pCreateInfo->presentMode); + swapChainCreateInfo.colorSpace = VkToPalScreenSpace(VkSurfaceFormatKHR{ pCreateInfo->imageFormat, + pCreateInfo->imageColorSpace }); + + swapChainCreateInfo.flags.canAcquireBeforeSignaling = settings.enableAcquireBeforeSignal; - swapChainCreateInfo.flags.canAcquireBeforeSignaling = pDevice->GetRuntimeSettings().enableAcquireBeforeSignal; + Pal::IDevice* pPalDevice = pDevice->PalDevice(properties.presentationDeviceIdx); if (properties.displayableInfo.icdPlatform == VK_ICD_WSI_PLATFORM_DISPLAY) { @@ -266,7 +273,6 @@ VkResult SwapChain::Create( const FullscreenMgr::Mode mode = FullscreenMgr::Implicit; // Find the monitor is associated with the given window handle - Pal::IDevice* pPalDevice = pDevice->PalDevice(properties.presentationDeviceIdx); Pal::IScreen* pScreen = pDevice->VkInstance()->FindScreen(pPalDevice, swapChainCreateInfo.hWindow, properties.imageCreateInfo.hDisplay); @@ -536,12 +542,6 @@ VkResult SwapChain::Create( pDevice->FreeApiObject(pAllocator, pMemory); } - // the old swapchain should be flaged as deprecated no matter whether the new swapchain is created successfully. - if (pCreateInfo->oldSwapchain != 0) - { - SwapChain::ObjectFromHandle(pCreateInfo->oldSwapchain)->MarkAsDeprecated(); - } - return result; } @@ -599,11 +599,14 @@ VkResult SwapChain::Destroy(const VkAllocationCallbacks* pAllocator) m_pSwCompositor->Destroy(m_pDevice, pAllocator); } - for (uint32_t i = 0; i < m_properties.imageCount; ++i) + if (m_pPalSwapChain != nullptr) { - // Remove memory references to presentable image memory and destroy the images and image memory. - Memory::ObjectFromHandle(m_properties.imageMemory[i])->Free(m_pDevice, pAllocator); - Image::ObjectFromHandle(m_properties.images[i])->Destroy(m_pDevice, pAllocator); + for (uint32_t i = 0; i < m_properties.imageCount; ++i) + { + // Remove memory references to presentable image memory and destroy the images and image memory. + Memory::ObjectFromHandle(m_properties.imageMemory[i])->Free(m_pDevice, pAllocator); + Image::ObjectFromHandle(m_properties.images[i])->Destroy(m_pDevice, pAllocator); + } } if (m_pPalSwapChain != nullptr) @@ -781,9 +784,10 @@ VkResult SwapChain::GetSwapchainImagesKHR( // ===================================================================================================================== // Fills in the PAL swap chain present info with the appropriate image to present and returns its GPU memory. Pal::IGpuMemory* SwapChain::UpdatePresentInfo( - uint32_t deviceIdx, - uint32_t imageIndex, - Pal::PresentSwapChainInfo* pPresentInfo) + uint32_t deviceIdx, + uint32_t imageIndex, + Pal::PresentSwapChainInfo* pPresentInfo, + const Pal::FlipStatusFlags& flipFlags) { Pal::IGpuMemory* pSrcImageGpuMemory = nullptr; @@ -803,7 +807,7 @@ Pal::IGpuMemory* SwapChain::UpdatePresentInfo( if (m_pFullscreenMgr != nullptr) { - m_pFullscreenMgr->UpdatePresentInfo(this, pPresentInfo); + m_pFullscreenMgr->UpdatePresentInfo(this, pPresentInfo, flipFlags); } @@ -905,9 +909,30 @@ bool SwapChain::IsSuboptimal(uint32_t deviceIdx) } // ===================================================================================================================== -void SwapChain::MarkAsDeprecated() +void SwapChain::MarkAsDeprecated( + const VkAllocationCallbacks* pAllocator) { m_deprecated = true; + + // DXGI and the Vulkan spec enforce that only one swapchain may be tied to a HWND. We never cared about it before + // but we should now. Call into PAL to free the swapchain in DXGI mode. + if (IsDxgiEnabled() && (m_pPalSwapChain != nullptr)) + { + m_pPalSwapChain->WaitIdle(); + + for (uint32_t i = 0; i < m_properties.imageCount; ++i) + { + // Remove memory references to presentable image memory and destroy the images and image memory. + Memory::ObjectFromHandle(m_properties.imageMemory[i])->Free(m_pDevice, pAllocator); + Image::ObjectFromHandle(m_properties.images[i])->Destroy(m_pDevice, pAllocator); + } + + m_pPalSwapChain->Destroy(); + + // Set to null to avoid double deleting when the actual object gets destroyed. + m_pPalSwapChain = nullptr; + } + } // ===================================================================================================================== @@ -1059,7 +1084,7 @@ bool FullscreenMgr::TryExitExclusive( } // if we acquired full screen ownership before with this fullscreenmanager. - if ((m_pScreen != nullptr)) + if (m_pScreen != nullptr) { Pal::Result palResult = m_pScreen->ReleaseFullscreenOwnership(); @@ -1275,6 +1300,12 @@ void FullscreenMgr::PostPresent( VK_ASSERT(presentInfo.presentMode != Pal::PresentMode::Fullscreen); } + // If DXGI reports this error, try to force swapchain recreation to fix it. + if ((*pPresentResult == Pal::Result::ErrorInvalidValue) && pSwapChain->IsDxgiEnabled()) + { + *pPresentResult = Pal::Result::ErrorIncompatibleDisplayMode; + } + // There are cases under extreme alt-tabbing when DWM may return a null shared window handle (the windowed // blit destination surface). This will then subsequently cause PAL to fail that windowed present. // @@ -1309,15 +1340,37 @@ void FullscreenMgr::PostPresent( // This can only happen if the screen is currently compatible with fullscreen presents and we have successfully // acquired exclusive access to the screen. void FullscreenMgr::UpdatePresentInfo( - SwapChain* pSwapChain, - Pal::PresentSwapChainInfo* pPresentInfo) + SwapChain* pSwapChain, + Pal::PresentSwapChainInfo* pPresentInfo, + const Pal::FlipStatusFlags& flipFlags) { + // Present mode does not matter in DXGI as it is completely OS handled. This is for our internal tracking only + if (pSwapChain->IsDxgiEnabled()) + { + const auto& imageInfo = m_pImage->PalImage(DefaultDeviceIndex)->GetImageCreateInfo(); + + Pal::Extent2d imageExtent; + + imageExtent.width = imageInfo.extent.width; + imageExtent.height = imageInfo.extent.height; + + Pal::Result isFsePossible = m_pScreen->IsImplicitFullscreenOwnershipSafe(m_hDisplay, m_hWindow, imageExtent); + + // If KMD reported we're in Indpendent Flip and our window is fullscreen compatible, it is safe to assume + // that DXGI acquired FSE. + bool isFullscreen = (isFsePossible == Pal::Result::Success) && flipFlags.iFlip; + + pPresentInfo->presentMode = isFullscreen ? Pal::PresentMode::Fullscreen : Pal::PresentMode::Windowed; + } // Try to enter (or remain in) exclusive access mode on this swap chain's screen for this present - TryEnterExclusive(pSwapChain); + else + { + TryEnterExclusive(pSwapChain); - // Always fallback to windowed if FSE is not acquired to avoid missing presents. - pPresentInfo->presentMode = + // Always fallback to windowed if FSE is not acquired to avoid missing presents. + pPresentInfo->presentMode = m_exclusiveModeFlags.acquired ? Pal::PresentMode::Fullscreen : Pal::PresentMode::Windowed; + } } // ===================================================================================================================== diff --git a/icd/make/importdefs b/icd/make/importdefs index 86d88e0b..d175fdf4 100644 --- a/icd/make/importdefs +++ b/icd/make/importdefs @@ -26,7 +26,7 @@ # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. It must # be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -ICD_PAL_CLIENT_MAJOR_VERSION = 685 +ICD_PAL_CLIENT_MAJOR_VERSION = 687 ICD_PAL_CLIENT_MINOR_VERSION = 0 # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. It describes @@ -36,7 +36,7 @@ ICD_GPUOPEN_CLIENT_MINOR_VERSION = 0 # This will become the value of LLPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_LLPC=1. It describes the version of the # interface version of LLPC that the ICD supports. -ICD_LLPC_CLIENT_MAJOR_VERSION = 50 +ICD_LLPC_CLIENT_MAJOR_VERSION = 52 # When ICD_LLPC_CLIENT_MAJOR_VERSION >= 39, Set ENABLE_VKGC to 1 to use Vkgc namespace instead of Llpc namespace in ICD ENABLE_VKGC=1 diff --git a/icd/res/ver.h b/icd/res/ver.h index e488af32..c3b154c1 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 203 +#define VULKAN_ICD_BUILD_VERSION 207 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index eb46c671..ecb19c8f 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -399,6 +399,15 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; } + if (pInfo->revision == Pal::AsicRevision::Navi22) + { + m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccFor64BppShaderStorage); + + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; + } if (pInfo->revision == Pal::AsicRevision::Navi23) { m_settings.forceEnableDcc = (ForceDccFor32BppShaderStorage | @@ -605,6 +614,16 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( ForceDccForColorAttachments | ForceDccForNonColorAttachmentShaderStorage); } + else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + m_settings.nggEnableBackfaceCulling = false; + m_settings.nggEnableSmallPrimFilter = false; + + } + else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp9) + { + m_settings.imageTilingOptMode = Pal::TilingOptMode::OptForSpeed; + } } diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json index a3bdd103..761ce0ea 100644 --- a/icd/settings/settings_xgl.json +++ b/icd/settings/settings_xgl.json @@ -101,34 +101,29 @@ "Value": 1024, "Description": "Scissor rect state (only when marked static)" }, - { - "Name": "OptRenderStateCacheStaticSamplePattern", - "Value": 2048, - "Description": "Sample patern state (only when marked static)" - }, { "Name": "OptRenderStateCacheStaticGraphicsWaveLimits", - "Value": 4096, + "Value": 2048, "Description": "Graphics wave limits state (only when marked static)" }, { "Name": "OptRenderStateCacheStaticComputeWaveLimits", - "Value": 8192, + "Value": 4096, "Description": "Compute wave limits state (only when marked static)" }, { "Name": "OptRenderStateCacheStaticAnisoLodCompensation", - "Value": 16384, + "Value": 8192, "Description": "Aniso LOD compensation state (only when marked static)" }, { "Name": "OptRenderStateCacheStaticLineStipple", - "Value": 32768, + "Value": 16384, "Description": "Line stipple state (only when marked static)" }, { "Name": "OptRenderStateFragmentShadingRate", - "Value": 65536, + "Value": 32768, "Description": "Variable Rate Shading" } ] @@ -226,10 +221,6 @@ } ] }, - { - "Name": "BvhBuildMode", - "Values": [] - }, { "Name": "ThreadGroupSwizzleMode", "IsEnum": true, @@ -679,6 +670,39 @@ "Scope": "Driver", "Type": "enum" }, + { + "Name": "PipelineLayoutSchemeSelectionStrategy", + "Description": "Decide how to choose the scheme of pipeline layout", + "Tags": [ + "Pipeline Options" + ], + "Defaults": { + "Default": "AppControlled" + }, + "ValidValues": { + "IsEnum": true, + "Values": [ + { + "Name": "AppControlled", + "Value": 0, + "Description": "The scheme is decided by user" + }, + { + "Name": "ForceCompact", + "Value": 1, + "Description": "Always use compact scheme" + }, + { + "Name": "ForceIndirect", + "Value": 2, + "Description": "Always use indirect scheme" + } + ], + "Name": "PipelineLayoutSchemeSelectionStrategy" + }, + "Scope": "Driver", + "Type": "enum" + }, { "Name": "PipelineBinningMode", "Description": "Specifies whether to override binning setting for pipeline.", @@ -3995,6 +4019,40 @@ "Type": "enum", "Name": "ImageTilingOptMode" }, + { + "ValidValues": { + "IsEnum": true, + "Values": [ + { + "Name": "StrictImageSizeOff", + "Value": 0, + "Description": "Disable strictImageSizeRequirements always" + }, + { + "Name": "StrictImageSizeOn", + "Value": 1, + "Description": "Enable strictImageSizeRequirements always" + }, + { + "Name": "StrictImageSizeAppControlled", + "Value": 2, + "Description": "App enables strictImageSizeRequirements when KHR_maintenance4 is enabled" + } + ], + "Name": "StrictImageSizeRequirements" + }, + "Description": "Controls driver adherence to the KHR_maintenance4 requirement that a VkImage's size memory requirement is never greater than that of another VkImage created with a greater or equal value in each of extent.width, extent.height, and extent.depth; all other creation parameters being identical.", + "Tags": [ + "Optimization" + ], + "Defaults": { + "Default": 2 + }, + "Scope": "Driver", + "Type": "uint32", + "VariableName": "strictImageSizeRequirements", + "Name": "StrictImageSizeRequirements" + }, { "ValidValues": { "Values": [