From 286bf306dd4482d754f83b263377f9756e47bbe5 Mon Sep 17 00:00:00 2001 From: jaxl Date: Fri, 20 Aug 2021 15:08:51 +0800 Subject: [PATCH 1/4] Update xgl from commit: 2ad040f86f * Add Navi23 support --- .github/workflows/rebase.yml | 1 + cmake/XglCompileDefinitions.cmake | 4 ++++ cmake/XglOptions.cmake | 2 ++ cmake/XglOverrides.cmake | 4 ++++ 4 files changed, 11 insertions(+) diff --git a/.github/workflows/rebase.yml b/.github/workflows/rebase.yml index 270e9e2b..a0b7de74 100644 --- a/.github/workflows/rebase.yml +++ b/.github/workflows/rebase.yml @@ -7,6 +7,7 @@ jobs: name: Rebase if: github.event.issue.pull_request != '' && contains(github.event.comment.body, '/rebase') && (github.event.comment.author_association == 'OWNER' || + github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'COLLABORATOR') runs-on: ubuntu-latest steps: diff --git a/cmake/XglCompileDefinitions.cmake b/cmake/XglCompileDefinitions.cmake index 9a04ed2f..c9e1d299 100644 --- a/cmake/XglCompileDefinitions.cmake +++ b/cmake/XglCompileDefinitions.cmake @@ -66,6 +66,10 @@ macro(xgl_set_compile_definitions) target_compile_definitions(xgl PRIVATE VKI_BUILD_NAVI22=1) endif() + if(XGL_BUILD_NAVI23) + target_compile_definitions(xgl PRIVATE VKI_BUILD_NAVI23=1) + endif() + #if VKI_KHR_DISPLAY if(VKI_KHR_DISPLAY) target_compile_definitions(xgl PRIVATE VKI_KHR_DISPLAY) diff --git a/cmake/XglOptions.cmake b/cmake/XglOptions.cmake index 83fc9bdb..d67546db 100644 --- a/cmake/XglOptions.cmake +++ b/cmake/XglOptions.cmake @@ -40,6 +40,8 @@ macro(xgl_options) option(XGL_BUILD_NAVI22 "Build open source vulkan for Navi22" ON) + option(XGL_BUILD_NAVI23 "Build open source vulkan for Navi23" ON) + option(XGL_BUILD_LIT "Build with Lit test?" OFF) option(XGL_BUILD_CACHE_CREATOR "Build cache-creator tools?" OFF) diff --git a/cmake/XglOverrides.cmake b/cmake/XglOverrides.cmake index a59cdeb4..8e41acd9 100644 --- a/cmake/XglOverrides.cmake +++ b/cmake/XglOverrides.cmake @@ -162,6 +162,8 @@ macro(xgl_overrides_pal) set(PAL_BUILD_NAVI22 ${XGL_BUILD_NAVI22} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(PAL_BUILD_NAVI23 ${XGL_BUILD_NAVI23} CACHE BOOL "${PROJECT_NAME} override." FORCE) + # Wayland set(PAL_BUILD_WAYLAND ${BUILD_WAYLAND_SUPPORT} CACHE BOOL "Build PAL with Wayland support" FORCE) @@ -190,6 +192,8 @@ macro(xgl_overrides_vkgc) set(LLPC_BUILD_NAVI22 ${XGL_BUILD_NAVI22} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_NAVI23 ${XGL_BUILD_NAVI23} CACHE BOOL "${PROJECT_NAME} override." FORCE) + set(LLPC_BUILD_RAVEN2 ON CACHE BOOL "${PROJECT_NAME} override." FORCE) set(LLPC_BUILD_VEGA20 ${XGL_BUILD_VEGA20} CACHE BOOL "${PROJECT_NAME} override." FORCE) From 133ef76a809deb8078ebdbacea4d9b8bfe5dad93 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Mon, 2 Aug 2021 15:30:04 +0000 Subject: [PATCH 2/4] strings: Add output directory argument to generate_strings.py --- icd/CMakeLists.txt | 2 +- icd/api/strings/generate_strings.py | 35 ++++++++++++++++++++--------- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/icd/CMakeLists.txt b/icd/CMakeLists.txt index 79f38145..b3a19396 100644 --- a/icd/CMakeLists.txt +++ b/icd/CMakeLists.txt @@ -211,7 +211,7 @@ set(ICD_STRING_DIR ${CMAKE_CURRENT_SOURCE_DIR}/api/strings) # ICD settings code generation main script set(ICD_GEN_STRINGS ${ICD_STRING_DIR}/generate_strings.py) -set(ICD_GEN_STRINGS_OPT -w ${ICD_STRING_DIR}) +set(ICD_GEN_STRINGS_OPT -w ${ICD_STRING_DIR} -d ${ICD_STRING_DIR}) set(ICD_GEN_STRINGS_FILES ${ICD_GEN_STRINGS} ${ICD_STRING_DIR}/func_table_template.py) diff --git a/icd/api/strings/generate_strings.py b/icd/api/strings/generate_strings.py index 6ab732c1..f85e9832 100644 --- a/icd/api/strings/generate_strings.py +++ b/icd/api/strings/generate_strings.py @@ -54,11 +54,13 @@ **********************************************************************************************************************/ ''' -workDir = "./"; +workDir = os.getcwd() +outputDir = os.getcwd() openSource = True; def GetOpt(): global workDir; + global outputDir global openSource; parser = OptionParser() @@ -67,6 +69,10 @@ def GetOpt(): type="string", dest="workdir", help="the work directory") + parser.add_option("-d", "--output_dir", action="store", + type="string", + dest="output_dir", + help="the output directory") (options, args) = parser.parse_args() @@ -76,13 +82,20 @@ def GetOpt(): else: print("The work directroy is not specified, using default: " + workDir); - if (workDir[-1] != '/'): - workDir = workDir + '/'; + workDir = os.path.abspath(os.path.realpath(workDir)) + print("The work directory is %s" % (workDir)) - if (os.path.exists(workDir) == False) or (os.path.exists(workDir + "extensions.txt") == False): + if (os.path.exists(workDir) == False) or (os.path.exists(os.path.join(workDir, "extensions.txt")) == False): print("Work directory is not correct: " + workDir); exit(); + if options.output_dir: + outputDir = options.output_dir + else: + print("The output directory is not specified; using current directory") + outputDir = os.path.abspath(os.path.realpath(outputDir)) + print("The output directory is {}".format(outputDir)) + def generate_string(f, name, suffix, value, gentype): global openSource; @@ -143,12 +156,12 @@ def make_version(version): return "VK_MAKE_VERSION(%s, %s, 0)" % (tokens[0], tokens[1]) def generate_string_file_pass(string_file_prefix, header_file_prefix, gentype): - global PREFIX; + global outputDir string_file = "%s.txt" % (string_file_prefix); - header_file = "%sg_%s_%s.h" % (PREFIX, header_file_prefix, gentype); + header_file = os.path.join(outputDir, "g_{}_{}.h".format(header_file_prefix, gentype)) - print("Generating %s from %s ..." % (header_file, string_file)) + print("Generating %s from %s ..." % (os.path.basename(header_file), string_file)) f = open(string_file) lines = f.readlines() @@ -201,7 +214,7 @@ def generate_func_table(entry_file, header_file): global open_copyright global openSource; - global PREFIX + global outputDir print("Generating %s from %s ..." % (header_file, entry_file)) @@ -209,7 +222,8 @@ def generate_func_table(entry_file, header_file): lines = f.readlines() f.close() - header = open(PREFIX + header_file, 'w') + header_path = os.path.join(outputDir, header_file) + header = open(header_path, 'w') entry_point_members = '' prev_ext = '' @@ -269,8 +283,7 @@ def generate_func_table(entry_file, header_file): GetOpt() os.chdir(workDir) - -PREFIX = "./" +os.makedirs(outputDir, exist_ok=True) generate_string_file("extensions") generate_string_file("entry_points") From b78601f5e66f80505bce5222fbd92f4e27e704c2 Mon Sep 17 00:00:00 2001 From: jaxl Date: Tue, 31 Aug 2021 15:37:49 +0800 Subject: [PATCH 3/4] Revert a change causing hang of dEQP-VK.memory.allocation.* --- icd/api/vk_physical_device.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index e9283bc9..edfe60a0 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -957,9 +957,6 @@ VkResult PhysicalDevice::Initialize() m_memoryTypeMask |= 1 << m_memoryProperties.memoryTypeCount; - m_memoryVkIndexAddRemoteBackupHeap[m_memoryProperties.memoryTypeCount] = - m_memoryVkIndexAddRemoteBackupHeap[memoryTypeIndex]; - ++m_memoryProperties.memoryTypeCount; } } From 9db36fe159f43868223bd966ad30caf30e85a40d Mon Sep 17 00:00:00 2001 From: jaxl Date: Thu, 9 Sep 2021 13:27:20 +0800 Subject: [PATCH 4/4] Update xgl from commit: 540eb8c343 * Update PAL Interface in Vulkan to 675 * Cleanup use of VK_INLINE * Fix wrong use of pDepthStencilResolveAttachment in SubpassDescription::Init * Don't put cmdbufs in local when memory oversubscription is allowed * [XGL issue#126] Switchable graphics layer filters out cards AMDVLK dropped support for * Cleanup unused argument pCmdAllocator to CmdBuffer::PalCmdBufferReset() * VK_EXT_load_store_op_none - Expose the extension * VK_KHR_zero_initialize_workgroup_memory - Expose the extension * Add exclusion for implicit external synchronization if there is no layout transition. * Heavy flickering observed while performing ALT-TAB in Ghost Recon: Breakpoint Vulkan * Disable enableBackfaceCulling for conservative rasterization overestimation * Buffer creation enhancements * Update Khronos Headers to 1.2.188 * Overrides of mallNoAllocCtPolicy and mallNoAllocCtSsrPolicy should use enums * PalCmdResolveImage for device groups * Valhiem shader optimization --- icd/Loader/LunarG/Lnx/amd-icd.json | 4 +- icd/api/appopt/async_layer.h | 2 +- icd/api/appopt/async_shader_module.h | 2 +- icd/api/appopt/async_task_thread.h | 4 +- icd/api/appopt/gpu_decode_layer.cpp | 157 +++- icd/api/appopt/gpu_decode_layer.h | 4 +- icd/api/appopt/opt_layer.h | 2 +- icd/api/barrier_policy.cpp | 6 +- icd/api/compiler_solution_llpc.cpp | 4 + icd/api/devmode/devmode_mgr.h | 12 +- icd/api/graphics_pipeline_common.cpp | 324 +++++---- icd/api/include/app_resource_optimizer.h | 4 +- icd/api/include/compiler_solution.h | 4 +- icd/api/include/compiler_solution_llpc.h | 2 + icd/api/include/graphics_pipeline_common.h | 44 +- icd/api/include/internal_mem_mgr.h | 2 +- icd/api/include/khronos/sdk-1.2/vulkan_beta.h | 94 +-- icd/api/include/khronos/sdk-1.2/vulkan_core.h | 17 +- icd/api/include/pipeline_binary_cache.h | 4 +- icd/api/include/pipeline_compiler.h | 54 +- icd/api/include/render_state_cache.h | 2 +- icd/api/include/vk_buffer.h | 30 +- icd/api/include/vk_cmd_pool.h | 2 +- icd/api/include/vk_cmdbuffer.h | 117 ++- icd/api/include/vk_compute_pipeline.h | 2 +- icd/api/include/vk_conv.h | 225 +++--- icd/api/include/vk_descriptor_pool.h | 6 +- icd/api/include/vk_descriptor_set.h | 8 +- icd/api/include/vk_descriptor_set_layout.h | 64 +- icd/api/include/vk_device.h | 72 +- icd/api/include/vk_extensions.h | 18 +- icd/api/include/vk_fence.h | 6 +- icd/api/include/vk_formats.h | 46 +- icd/api/include/vk_framebuffer.h | 6 +- icd/api/include/vk_gpa_session.h | 4 +- icd/api/include/vk_graphics_pipeline.h | 11 +- icd/api/include/vk_image_view.h | 26 +- icd/api/include/vk_instance.h | 30 +- icd/api/include/vk_memory.h | 8 +- icd/api/include/vk_physical_device.h | 126 ++-- icd/api/include/vk_physical_device_manager.h | 2 +- icd/api/include/vk_pipeline.h | 13 +- icd/api/include/vk_pipeline_cache.h | 2 +- icd/api/include/vk_pipeline_layout.h | 22 +- icd/api/include/vk_query.h | 24 +- icd/api/include/vk_queue.h | 2 +- icd/api/include/vk_render_pass.h | 24 +- icd/api/include/vk_sampler.h | 6 +- icd/api/include/vk_shader_code.h | 2 +- icd/api/include/vk_swapchain.h | 20 +- icd/api/include/vk_utils.h | 33 +- icd/api/internal_mem_mgr.cpp | 2 +- icd/api/pipeline_compiler.cpp | 322 ++++++--- icd/api/render_state_cache.cpp | 2 +- icd/api/renderpass/renderpass_builder.cpp | 8 +- icd/api/renderpass/renderpass_types.h | 4 +- icd/api/sqtt/sqtt_layer.h | 8 +- icd/api/sqtt/sqtt_mgr.cpp | 2 +- icd/api/sqtt/sqtt_mgr.h | 2 +- icd/api/sqtt/sqtt_object_mgr.h | 6 +- icd/api/strings/entry_points.txt | 1 + icd/api/strings/extensions.txt | 3 + icd/api/utils/json_reader.cpp | 6 +- icd/api/utils/temp_mem_arena.cpp | 2 +- icd/api/utils/temp_mem_arena.h | 2 +- icd/api/vk_buffer.cpp | 177 ++--- icd/api/vk_cmdbuffer.cpp | 36 +- icd/api/vk_compute_pipeline.cpp | 1 + icd/api/vk_descriptor_set_layout.cpp | 358 ++++++++- icd/api/vk_device.cpp | 14 +- icd/api/vk_gpa_session.cpp | 2 +- icd/api/vk_graphics_pipeline.cpp | 57 +- icd/api/vk_physical_device.cpp | 126 +++- icd/api/vk_pipeline.cpp | 78 +- icd/api/vk_pipeline_layout.cpp | 351 +++++++-- icd/api/vk_render_pass.cpp | 2 +- icd/api/vk_shader.cpp | 2 +- icd/imported/gputexdecoder/gpuTexDecoder.cpp | 148 +++- icd/imported/gputexdecoder/gpuTexDecoder.h | 5 +- icd/imported/gputexdecoder/shaders.h | 1 + .../gputexdecoder/shaders/Etc2Decode.h | 684 ++++++++++++++++++ .../gputexdecoder/shaders/Etc2Decoder.comp | 289 ++++++++ icd/layers/vk_layer_switchable_graphics.cpp | 52 +- icd/make/importdefs | 4 +- icd/res/ver.h | 2 +- icd/settings/settings.cpp | 71 +- icd/settings/settings_xgl.json | 36 + 87 files changed, 3441 insertions(+), 1130 deletions(-) create mode 100644 icd/imported/gputexdecoder/shaders/Etc2Decode.h create mode 100755 icd/imported/gputexdecoder/shaders/Etc2Decoder.comp diff --git a/icd/Loader/LunarG/Lnx/amd-icd.json b/icd/Loader/LunarG/Lnx/amd-icd.json index 64ae431d..025cf513 100644 --- a/icd/Loader/LunarG/Lnx/amd-icd.json +++ b/icd/Loader/LunarG/Lnx/amd-icd.json @@ -2,13 +2,13 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.2.185" + "api_version": "1.2.188" }, "layer": { "name": "VK_LAYER_AMD_switchable_graphics_@ISABITS@", "type": "GLOBAL", "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.2.185", + "api_version": "1.2.188", "implementation_version": "1", "description": "AMD switchable graphics layer", "functions": { diff --git a/icd/api/appopt/async_layer.h b/icd/api/appopt/async_layer.h index 8b7f0ded..c7b3bf6e 100644 --- a/icd/api/appopt/async_layer.h +++ b/icd/api/appopt/async_layer.h @@ -78,7 +78,7 @@ class AsyncLayer final : public OptLayer virtual void OverrideDispatchTable(DispatchTable* pDispatchTable) override; - VK_INLINE Device* GetDevice() { return m_pDevice; } + Device* GetDevice() { return m_pDevice; } void* GetTaskThread(TaskType type) { diff --git a/icd/api/appopt/async_shader_module.h b/icd/api/appopt/async_shader_module.h index 1b312951..cca54ec6 100644 --- a/icd/api/appopt/async_shader_module.h +++ b/icd/api/appopt/async_shader_module.h @@ -57,7 +57,7 @@ class ShaderModule final : public vk::NonDispatchable(initInfo.pClientUserData); + + VK_ASSERT(constInfo.numConstants <= 4); VkSpecializationMapEntry mapEntries[4] = { - // local_thread_x + // local_thread_x - ASTC + // ALPHA_BITS - ETC2 { 0, 0, sizeof(uint32_t) }, - // local_thread_y + // local_thread_y - ASTC + // WIDTH = ETC2 { 1, 1 * sizeof(uint32_t), sizeof(uint32_t) }, - // isSrgb Format + // isSrgb Format - ASTC + // HEIGHT - ETC2 { 2, 2 * sizeof(uint32_t), sizeof(uint32_t) }, - // isBufferTexture + // isBufferTexture - ASTC { 3, 3 * sizeof(uint32_t), @@ -93,9 +98,9 @@ namespace GpuTexDecoder }; Vkgc::ResourceMappingRootNode rootNode = {}; - Vkgc::ResourceMappingNode nodes[GpuTexDecoder::AstcInternalPipelineNodes] = {}; if (buildInfo.shaderType == GpuTexDecoder::InternalTexConvertCsType::ConvertASTCToRGBA8) { + Vkgc::ResourceMappingNode nodes[GpuTexDecoder::AstcInternalPipelineNodes] = {}; GpuTexDecoder::GpuDecodeMappingNode* pDecodeNode = buildInfo.pUserDataNodes; for (size_t index = 0; index < GpuTexDecoder::AstcInternalPipelineNodes; index++) { @@ -129,6 +134,42 @@ namespace GpuTexDecoder rootNode.visibility = Vkgc::ShaderStageComputeBit; } + if (buildInfo.shaderType == GpuTexDecoder::InternalTexConvertCsType::ConvertETC2ToRGBA8) + { + Vkgc::ResourceMappingNode nodes[GpuTexDecoder::Etc2InternalPipelineNodes] = {}; + GpuTexDecoder::GpuDecodeMappingNode* pDecodeNode = buildInfo.pUserDataNodes; + for (size_t index = 0; index < GpuTexDecoder::Etc2InternalPipelineNodes; index++) + { + if (pDecodeNode[index].nodeType == GpuTexDecoder::NodeType::Image) + { + nodes[index].type = Vkgc::ResourceMappingNodeType::DescriptorResource; + nodes[index].sizeInDwords = pDecodeNode[index].sizeInDwords; + nodes[index].offsetInDwords = pDecodeNode[index].offsetInDwords; + nodes[index].srdRange.binding = pDecodeNode[index].binding; + nodes[index].srdRange.set = pDecodeNode[index].set; + } + else + { + Vkgc::ResourceMappingNodeType vkgcType = + (pDecodeNode[index].nodeType == GpuTexDecoder::NodeType::Buffer) ? + Vkgc::ResourceMappingNodeType::DescriptorBuffer : + Vkgc::ResourceMappingNodeType::DescriptorTexelBuffer; + nodes[index].type = vkgcType; + nodes[index].sizeInDwords = pDecodeNode[index].sizeInDwords; + nodes[index].offsetInDwords = pDecodeNode[index].offsetInDwords; + nodes[index].srdRange.binding = pDecodeNode[index].binding; + nodes[index].srdRange.set = pDecodeNode[index].set; + } + } + + rootNode.node.type = Vkgc::ResourceMappingNodeType::DescriptorTableVaPtr; + rootNode.node.offsetInDwords = 0; + rootNode.node.sizeInDwords = 1; + rootNode.node.tablePtr.nodeCount = GpuTexDecoder::Etc2InternalPipelineNodes; + rootNode.node.tablePtr.pNext = &nodes[0]; + rootNode.visibility = Vkgc::ShaderStageComputeBit; + } + Vkgc::BinaryData spvBin = { buildInfo.code.spvSize, buildInfo.code.pSpvCode }; result = pDevice->CreateInternalComputePipeline(buildInfo.code.spvSize, @@ -217,12 +258,11 @@ VKAPI_ATTR void VKAPI_CALL vkCmdCopyImage( const Image* const pSrcImage = Image::ObjectFromHandle(srcImage); const Image* const pDstImage = Image::ObjectFromHandle(dstImage); - const Pal::SwizzledFormat srcFormat = VkToPalFormat(pSrcImage->GetFormat(), pDevice->GetRuntimeSettings()); - const Pal::SwizzledFormat dstFormat = VkToPalFormat(pDstImage->GetFormat(), pDevice->GetRuntimeSettings()); - if (Formats::IsASTCFormat(pDstImage->GetFormat())) { - VK_ASSERT(Formats::IsASTCFormat(pDstImage->GetFormat())); + const Pal::SwizzledFormat srcFormat = VkToPalFormat(pSrcImage->GetFormat(), pDevice->GetRuntimeSettings()); + const Pal::SwizzledFormat dstFormat = VkToPalFormat(pDstImage->GetFormat(), pDevice->GetRuntimeSettings()); + uint32_t maxObj = pCmdBuffer->EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ImageCopyRegion)); const auto maxRegions = Util::Max(maxObj, MaxPalAspectsPerMask); @@ -274,6 +314,81 @@ VKAPI_ATTR void VKAPI_CALL vkCmdCopyImage( virtStackFrame.FreeArray(pPalRegions); } + else if (Formats::IsEtc2Format(pDstImage->GetFormat())) + { + VkFormat payloadFormat = pSrcImage->GetFormat(); + + const Pal::SwizzledFormat srcFormat = VkToPalFormat(payloadFormat, pDevice->GetRuntimeSettings()); + const Pal::SwizzledFormat dstFormat = VkToPalFormat(pDstImage->GetFormat(), pDevice->GetRuntimeSettings()); + + uint32_t maxObj = pCmdBuffer->EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ImageCopyRegion)); + + const auto maxRegions = Util::Max(maxObj, MaxPalAspectsPerMask); + auto regionBatch = Util::Min(regionCount * MaxPalAspectsPerMask, maxRegions); + + VirtualStackFrame virtStackFrame(pCmdBuffer->GetStackAllocator()); + Pal::ImageCopyRegion* pPalRegions = + virtStackFrame.AllocArray(regionBatch); + + VkFormat format = pDstImage->GetFormat(); + + uint32_t alphaBits = 0; + + switch (format) + { + case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: + case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: + alphaBits = 0; + break; + case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: + case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK: + alphaBits = 1; + break; + case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: + case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK: + alphaBits = 8; + break; + default: + break; + } + + const Pal::ImageCreateInfo& imageInfo = pDstImage->PalImage(DefaultDeviceIndex)->GetImageCreateInfo(); + + uint32_t const_data[3] = + { + alphaBits, + imageInfo.extent.width, + imageInfo.extent.height + }; + + GpuTexDecoder::CompileTimeConstants constInfo = {}; + constInfo.numConstants = 3; + constInfo.pConstants = const_data; + + for (uint32_t regionIdx = 0; regionIdx < regionCount;) + { + uint32_t palRegionCount = 0; + + while ((regionIdx < regionCount) && + (palRegionCount <= (regionBatch - MaxPalAspectsPerMask))) + { + VkToPalImageCopyRegion(pRegions[regionIdx], srcFormat.format, dstFormat.format, + pPalRegions, &palRegionCount); + + ++regionIdx; + } + + pDevice->GetGpuDecoderLayer()->GetTexDecoder()->GpuDecodeImage( + GpuTexDecoder::InternalTexConvertCsType::ConvertETC2ToRGBA8, + pCmdBuffer->PalCmdBuffer(DefaultDeviceIndex), + pSrcImage->PalImage(DefaultDeviceIndex), + pDstImage->PalImage(DefaultDeviceIndex), + regionCount, pPalRegions, constInfo); + } + + virtStackFrame.FreeArray(pPalRegions); + + } else { DECODER_WAPPER_CALL_NEXT_LAYER(vkCmdCopyImage(cmdBuffer, @@ -320,6 +435,30 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateImage( pImage); } + else if(Formats::IsEtc2Format(format) && + (pCreateInfo->usage == VK_IMAGE_USAGE_TRANSFER_SRC_BIT)) + { + VkImageCreateInfo etc2SrcInfo = *pCreateInfo; + switch (pCreateInfo->format) + { + case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK: + case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK: + etc2SrcInfo.format = VK_FORMAT_R32G32B32A32_UINT; + break; + case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK: + case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK: + case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK: + case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK: + etc2SrcInfo.format = VK_FORMAT_R32G32_UINT; + break; + default: + break; + } + vkResult = DECODER_WAPPER_CALL_NEXT_LAYER(vkCreateImage)(device, + &etc2SrcInfo, + pAllocator, + pImage); + } else { vkResult = DECODER_WAPPER_CALL_NEXT_LAYER(vkCreateImage)(device, diff --git a/icd/api/appopt/gpu_decode_layer.h b/icd/api/appopt/gpu_decode_layer.h index a1c7ffd1..71041ab4 100644 --- a/icd/api/appopt/gpu_decode_layer.h +++ b/icd/api/appopt/gpu_decode_layer.h @@ -54,12 +54,12 @@ class GpuDecoderLayer final : public OptLayer VkResult Init(Device* pDevice); virtual void OverrideDispatchTable(DispatchTable* pDispatchTable) override; - VK_INLINE GpuTexDecoder::Device* GetTexDecoder() + GpuTexDecoder::Device* GetTexDecoder() { return m_pGpuTexDecoder; } - VK_INLINE bool isAstcSrgbaFormat(VkFormat format) + bool isAstcSrgbaFormat(VkFormat format) { return Formats::IsASTCFormat(format) && (static_cast(format) % 2 == 0); diff --git a/icd/api/appopt/opt_layer.h b/icd/api/appopt/opt_layer.h index b4ddc927..db91a44c 100644 --- a/icd/api/appopt/opt_layer.h +++ b/icd/api/appopt/opt_layer.h @@ -49,7 +49,7 @@ class OptLayer virtual void OverrideDispatchTable(DispatchTable* pDispatchTable) = 0; - VK_INLINE const DispatchTable* GetNextLayer() const + const DispatchTable* GetNextLayer() const { return &m_nextLayer; } protected: diff --git a/icd/api/barrier_policy.cpp b/icd/api/barrier_policy.cpp index 8f47507d..499d2d54 100644 --- a/icd/api/barrier_policy.cpp +++ b/icd/api/barrier_policy.cpp @@ -242,7 +242,7 @@ static const LayoutUsageHelper g_LayoutUsageHelper; // ===================================================================================================================== // Converts ImageLayout to Cache masks, for use with VK_ACCESS_MEMORY_WRITE and VK_ACCESS_MEMORY_READ only. -static VK_INLINE uint32_t ImageLayoutToCacheMask(VkImageLayout imageLayout) +static uint32_t ImageLayoutToCacheMask(VkImageLayout imageLayout) { uint32_t cacheMask = 0; @@ -291,7 +291,7 @@ static VK_INLINE uint32_t ImageLayoutToCacheMask(VkImageLayout imageLayout) // ===================================================================================================================== // Converts source access flags to source cache coherency flags. -static VK_INLINE uint32_t SrcAccessToCacheMask(AccessFlags accessMask, VkImageLayout imageLayout) +static uint32_t SrcAccessToCacheMask(AccessFlags accessMask, VkImageLayout imageLayout) { uint32_t cacheMask = 0; @@ -355,7 +355,7 @@ static VK_INLINE uint32_t SrcAccessToCacheMask(AccessFlags accessMask, VkImageLa // ===================================================================================================================== // Converts destination access flags to destination cache coherency flags. -static VK_INLINE uint32_t DstAccessToCacheMask(AccessFlags accessMask, VkImageLayout imageLayout) +static uint32_t DstAccessToCacheMask(AccessFlags accessMask, VkImageLayout imageLayout) { uint32_t cacheMask = 0; diff --git a/icd/api/compiler_solution_llpc.cpp b/icd/api/compiler_solution_llpc.cpp index ba9c9e5c..e4a2052d 100644 --- a/icd/api/compiler_solution_llpc.cpp +++ b/icd/api/compiler_solution_llpc.cpp @@ -301,10 +301,12 @@ VkResult CompilerSolutionLlpc::CreateGraphicsPipelineBinary( Vkgc::PipelineShaderInfo** ppShadersInfo, void* pPipelineDumpHandle, uint64_t pipelineHash, + Util::MetroHash::Hash* pCacheId, int64_t* pCompileTime) { VK_IGNORE(pDevice); VK_IGNORE(pipelineHash); + VK_IGNORE(pCacheId); const RuntimeSettings& settings = m_pPhysicalDevice->GetRuntimeSettings(); auto pInstance = m_pPhysicalDevice->Manager()->VkInstance(); @@ -424,10 +426,12 @@ VkResult CompilerSolutionLlpc::CreateComputePipelineBinary( const void** ppPipelineBinary, void* pPipelineDumpHandle, uint64_t pipelineHash, + Util::MetroHash::Hash* pCacheId, int64_t* pCompileTime) { VK_IGNORE(pDevice); VK_IGNORE(pipelineHash); + VK_IGNORE(pCacheId); const RuntimeSettings& settings = m_pPhysicalDevice->GetRuntimeSettings(); auto pInstance = m_pPhysicalDevice->Manager()->VkInstance(); diff --git a/icd/api/devmode/devmode_mgr.h b/icd/api/devmode/devmode_mgr.h index 070825dd..2b2b19f8 100644 --- a/icd/api/devmode/devmode_mgr.h +++ b/icd/api/devmode/devmode_mgr.h @@ -179,9 +179,9 @@ class DevModeMgr uint64_t value, Pal::IQueueSemaphore* pQueueSemaphore); - VK_INLINE bool IsQueueTimingActive(const Device* pDevice) const; - VK_INLINE bool GetTraceFrameBeginTag(uint64_t* pTag) const; - VK_INLINE bool GetTraceFrameEndTag(uint64_t* pTag) const; + inline bool IsQueueTimingActive(const Device* pDevice) const; + inline bool GetTraceFrameBeginTag(uint64_t* pTag) const; + inline bool GetTraceFrameEndTag(uint64_t* pTag) const; Util::Result RegisterPipelineCache( PipelineBinaryCache* pPipelineCache, @@ -189,10 +189,10 @@ class DevModeMgr void DeregisterPipelineCache( PipelineBinaryCache* pPipelineCache); - VK_INLINE Util::ListIterator GetPipelineCacheListIterator() + Util::ListIterator GetPipelineCacheListIterator() { return m_pipelineCaches.Begin(); } - VK_INLINE Util::RWLock* GetPipelineReinjectionLock() + Util::RWLock* GetPipelineReinjectionLock() { return &m_pipelineReinjectionLock; } private: @@ -363,7 +363,7 @@ class DevModeMgr #if ICD_GPUOPEN_DEVMODE_BUILD // ===================================================================================================================== // Returns true if queue operations are currently being timed by RGP traces. -VK_INLINE bool DevModeMgr::IsQueueTimingActive( +inline bool DevModeMgr::IsQueueTimingActive( const Device* pDevice ) const { diff --git a/icd/api/graphics_pipeline_common.cpp b/icd/api/graphics_pipeline_common.cpp index 2fd5e4c2..65cb1dad 100644 --- a/icd/api/graphics_pipeline_common.cpp +++ b/icd/api/graphics_pipeline_common.cpp @@ -124,7 +124,7 @@ static bool IsDynamicStateEnabled(const uint32_t dynamicStateFlags, const Dynami // ===================================================================================================================== // Returns true if the given VkBlendFactor factor is a dual source blend factor -static VK_INLINE bool IsDualSourceBlend(VkBlendFactor blend) +static bool IsDualSourceBlend(VkBlendFactor blend) { switch (blend) { @@ -139,7 +139,7 @@ static VK_INLINE bool IsDualSourceBlend(VkBlendFactor blend) } // ===================================================================================================================== -static VK_INLINE void BuildPalColorBlendStateCreateInfo( +static void BuildPalColorBlendStateCreateInfo( const VkPipelineColorBlendStateCreateInfo* pColorBlendState, Pal::ColorBlendStateCreateInfo* pInfo) { @@ -221,7 +221,7 @@ bool GraphicsPipelineCommon::IsSrcAlphaUsedInBlend(VkBlendFactor blend) } // ===================================================================================================================== -static VK_INLINE VkFormat GetDepthFormat( +static VkFormat GetDepthFormat( const RenderPass* pRenderPass, const uint32_t subpassIndex ) @@ -232,7 +232,7 @@ static VK_INLINE VkFormat GetDepthFormat( } // ===================================================================================================================== -static VK_INLINE uint32_t GetColorAttachmentCount( +static uint32_t GetColorAttachmentCount( const RenderPass* pRenderPass, const uint32_t subpassIndex ) @@ -243,25 +243,24 @@ static VK_INLINE uint32_t GetColorAttachmentCount( // ===================================================================================================================== VkShaderStageFlagBits GraphicsPipelineCommon::GetActiveShaderStages( - const VkGraphicsPipelineCreateInfo* pGraphicsPipelineCreateInfo + const VkGraphicsPipelineCreateInfo* pGraphicsPipelineCreateInfo ) { VK_ASSERT(pGraphicsPipelineCreateInfo != nullptr); - VkShaderStageFlagBits activeStage = static_cast(0); - VkShaderStageFlagBits activeStageMask = static_cast(0xFFFFFFFF); + VkShaderStageFlagBits activeStages = static_cast(0); for (uint32_t i = 0; i < pGraphicsPipelineCreateInfo->stageCount; ++i) { - activeStage = static_cast(activeStage | pGraphicsPipelineCreateInfo->pStages[i].stage); + activeStages = static_cast(activeStages | pGraphicsPipelineCreateInfo->pStages[i].stage); } - return static_cast(activeStage & activeStageMask); + return activeStages; } // ===================================================================================================================== uint32_t GraphicsPipelineCommon::GetDynamicStateFlags( - const VkPipelineDynamicStateCreateInfo* pDy + const VkPipelineDynamicStateCreateInfo* pDy ) { uint32_t dynamicState = 0; @@ -380,6 +379,24 @@ uint32_t GraphicsPipelineCommon::GetDynamicStateFlags( return dynamicState; } +// ===================================================================================================================== +VkResult GraphicsPipelineCommon::Create( + Device* pDevice, + PipelineCache* pPipelineCache, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline) +{ + VkResult result; + + { + result = GraphicsPipeline::Create( + pDevice, pPipelineCache, pCreateInfo, pAllocator, pPipeline); + } + + return result; +} + // ===================================================================================================================== static void BuildRasterizationState( const Device* pDevice, @@ -762,6 +779,9 @@ static void BuildMultisampleState( pInfo->msaa.occlusionQuerySamples = subpassDepthSampleCount; pInfo->sampleCoverage = subpassCoverageSampleCount; + pInfo->pipeline.cbState.target[0].forceAlphaToOne = (pMs->alphaToOneEnable == VK_TRUE); + pInfo->pipeline.cbState.alphaToCoverageEnable = (pMs->alphaToCoverageEnable == VK_TRUE); + if (pInfo->flags.customSampleLocations) { // Enable single-sampled custom sample locations if necessary @@ -807,6 +827,7 @@ static void BuildDepthStencilState( { pInfo->immedInfo.depthStencilCreateInfo.stencilEnable = (pDs->stencilTestEnable == VK_TRUE); pInfo->immedInfo.depthStencilCreateInfo.depthEnable = (pDs->depthTestEnable == VK_TRUE); + pInfo->immedInfo.depthStencilCreateInfo.depthWriteEnable = (pDs->depthWriteEnable == VK_TRUE); pInfo->immedInfo.depthStencilCreateInfo.depthFunc = VkToPalCompareFunc(pDs->depthCompareOp); pInfo->immedInfo.depthStencilCreateInfo.depthBoundsEnable = (pDs->depthBoundsTestEnable == VK_TRUE); @@ -1028,8 +1049,6 @@ static void BuildPreRasterizationShaderState( const uint32_t dynamicStateFlags, GraphicsPipelineObjectCreateInfo* pInfo) { - pInfo->pLayout = PipelineLayout::ObjectFromHandle(pIn->layout); - // Build states via VkPipelineRasterizationStateCreateInfo BuildRasterizationState(pDevice, pIn->pRasterizationState, dynamicStateFlags, pInfo); @@ -1078,8 +1097,6 @@ static void BuildFragmentShaderState( const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pIn->renderPass); const uint32_t subpass = pIn->subpass; - pInfo->pLayout = PipelineLayout::ObjectFromHandle(pIn->layout); - // Build states via VkPipelineMultisampleStateCreateInfo BuildMultisampleState(pIn->pMultisampleState, pRenderPass, subpass, dynamicStateFlags, pInfo); @@ -1135,24 +1152,6 @@ static void BuildFragmentOutputInterfaceState( pInfo); } - // According to the spec, VkPipelineMultisampleStateCreateInfo::alphaToCoverageEnable and alphaToOneEnable - // belongs to fragment output interface section - // The alpha component of the fragment's first color output is replaced with one if alphaToOneEnable is set. - if (pIn->pMultisampleState != nullptr) - { - pInfo->pipeline.cbState.target[0].forceAlphaToOne = (pIn->pMultisampleState->alphaToOneEnable == VK_TRUE); - pInfo->pipeline.cbState.alphaToCoverageEnable = (pIn->pMultisampleState->alphaToCoverageEnable == VK_TRUE); - } - - // According to the spec, VkPipelineDepthStencilStateCreateInfo::depthWriteEnable belongs to fragment output - // interface section - if ((pInfo->dbFormat != VK_FORMAT_UNDEFINED) && - (pIn->pDepthStencilState != nullptr)) - { - pInfo->immedInfo.depthStencilCreateInfo.depthWriteEnable = - (pIn->pDepthStencilState->depthWriteEnable == VK_TRUE); - } - BuildRenderingState(pDevice, pRenderPass, pInfo); @@ -1301,16 +1300,16 @@ void GraphicsPipelineCommon::BuildPipelineObjectCreateInfo( const VkGraphicsPipelineCreateInfo* pIn, const VbInfo* pVbInfo, const GraphicsPipelineBinaryInfo* pBinInfo, + const PipelineLayout* pPipelineLayout, GraphicsPipelineObjectCreateInfo* pInfo) { - const VkGraphicsPipelineCreateInfo* pGraphicsPipelineCreateInfo = pIn; - pInfo->activeStages = GetActiveShaderStages(pGraphicsPipelineCreateInfo + pInfo->activeStages = GetActiveShaderStages(pIn ); - uint32_t dynamicStateFlags = GraphicsPipelineCommon::GetDynamicStateFlags( - pGraphicsPipelineCreateInfo->pDynamicState - ); + uint32_t dynamicStateFlags = GetDynamicStateFlags( + pIn->pDynamicState + ); BuildVertexInputInterfaceState(pDevice, pIn, &pVbInfo->bindingInfo, dynamicStateFlags, false, pInfo); @@ -1352,16 +1351,16 @@ void GraphicsPipelineCommon::BuildPipelineObjectCreateInfo( VkResult GraphicsPipelineCommon::BuildPipelineBinaryCreateInfo( const Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, + const PipelineLayout* pPipelineLayout, GraphicsPipelineBinaryCreateInfo* pBinInfo, GraphicsPipelineShaderStageInfo* pShaderInfo, VbInfo* pVbInfo, ShaderModuleHandle* pTempModules) { - PipelineCompiler* pCompiler = pDevice->GetCompiler(DefaultDeviceIndex); - VkResult result = BuildShaderStageInfo(pDevice, pCreateInfo->stageCount, pCreateInfo->pStages, + pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR, [](const uint32_t inputIdx, const uint32_t stageIdx) { return stageIdx; @@ -1371,7 +1370,8 @@ VkResult GraphicsPipelineCommon::BuildPipelineBinaryCreateInfo( if (result == VK_SUCCESS) { - result = pCompiler->ConvertGraphicsPipelineInfo(pDevice, pCreateInfo, pShaderInfo, pBinInfo, pVbInfo); + result = pDevice->GetCompiler(DefaultDeviceIndex)->ConvertGraphicsPipelineInfo( + pDevice, pCreateInfo, pShaderInfo, pPipelineLayout, pBinInfo, pVbInfo); } return result; @@ -1768,146 +1768,218 @@ static void GenerateHashFromColorBlendStateCreateInfo( } // ===================================================================================================================== -// Generates the API PSO hash using the contents of the VkGraphicsPipelineCreateInfo struct -// Pipeline compilation affected by: -// - pCreateInfo->pStages -// - pCreateInfo->pVertexInputState -// - pCreateInfo->pInputAssemblyState -// - pCreateInfo->pTessellationState -// - pCreateInfo->pRasterizationState -// - pCreateInfo->pMultisampleState -// - pCreateInfo->pColorBlendState -// - pCreateInfo->layout -// - pCreateInfo->renderPass -// - pCreateInfo->subpass -uint64_t GraphicsPipelineCommon::BuildApiHash( - const VkGraphicsPipelineCreateInfo* pCreateInfo, - const GraphicsPipelineObjectCreateInfo* pInfo) +void GraphicsPipelineCommon::GenerateHashForVertexInputInterfaceState( + const VkGraphicsPipelineCreateInfo* pCreateInfo, + Util::MetroHash128* pBaseHasher, + Util::MetroHash128* pApiHasher) { - Util::MetroHash::Hash baseHash; - - Util::MetroHash128 baseHasher; - Util::MetroHash128 apiHasher; - - baseHasher.Update(pCreateInfo->flags); - baseHasher.Update(pCreateInfo->stageCount); - - const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pCreateInfo->renderPass); - - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) + if (pCreateInfo->pVertexInputState != nullptr) { - GenerateHashFromShaderStageCreateInfo(pCreateInfo->pStages[i], &baseHasher); + GenerateHashFromVertexInputStateCreateInfo(*pCreateInfo->pVertexInputState, pBaseHasher); } - if (pCreateInfo->pVertexInputState != nullptr) + if (pCreateInfo->pInputAssemblyState != nullptr) { - GenerateHashFromVertexInputStateCreateInfo(*pCreateInfo->pVertexInputState, &baseHasher); + GenerateHashFromInputAssemblyStateCreateInfo(*pCreateInfo->pInputAssemblyState, pBaseHasher, pApiHasher); } +} - if (pCreateInfo->pInputAssemblyState != nullptr) +// ===================================================================================================================== +void GraphicsPipelineCommon::GenerateHashForPreRasterizationShadersState( + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const GraphicsPipelineObjectCreateInfo* pInfo, + Util::MetroHash128* pBaseHasher, + Util::MetroHash128* pApiHasher) +{ + for (uint32_t i = 0; i < pCreateInfo->stageCount; ++i) { - GenerateHashFromInputAssemblyStateCreateInfo(*pCreateInfo->pInputAssemblyState, &baseHasher, &apiHasher); + if (pCreateInfo->pStages[i].stage & PrsShaderMask) + { + GenerateHashFromShaderStageCreateInfo(pCreateInfo->pStages[i], pBaseHasher); + } } - if ((pInfo->activeStages & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) - && (pCreateInfo->pTessellationState != nullptr)) + if (pCreateInfo->layout != VK_NULL_HANDLE) { - GenerateHashFromTessellationStateCreateInfo(*pCreateInfo->pTessellationState, &baseHasher); + pBaseHasher->Update(PipelineLayout::ObjectFromHandle(pCreateInfo->layout)->GetApiHash()); } - if ((pInfo->immedInfo.rasterizerDiscardEnable != VK_TRUE) && (pCreateInfo->pViewportState != nullptr)) + if ((pInfo->immedInfo.rasterizerDiscardEnable == false) && (pCreateInfo->pViewportState != nullptr)) { - GenerateHashFromViewportStateCreateInfo(*pCreateInfo->pViewportState, pInfo->staticStateMask, &apiHasher); + GenerateHashFromViewportStateCreateInfo(*pCreateInfo->pViewportState, pInfo->staticStateMask, pApiHasher); } if (pCreateInfo->pRasterizationState != nullptr) { bool rasterizerDiscardEnableDynamic = ((pInfo->staticStateMask & - (1UL << static_cast(DynamicStatesInternal::RasterizerDiscardEnableExt))) == 0); + (1UL << static_cast(DynamicStatesInternal::RasterizerDiscardEnableExt))) == 0); GenerateHashFromRasterizationStateCreateInfo(*pCreateInfo->pRasterizationState, rasterizerDiscardEnableDynamic, - &baseHasher, - &apiHasher); + pBaseHasher, + pApiHasher); } - if ((pInfo->immedInfo.rasterizerDiscardEnable != VK_TRUE) && (pCreateInfo->pMultisampleState != nullptr)) + if ((pInfo->activeStages & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) + && (pCreateInfo->pTessellationState != nullptr)) { - GenerateHashFromMultisampleStateCreateInfo(*pCreateInfo->pMultisampleState, &baseHasher, &apiHasher); + GenerateHashFromTessellationStateCreateInfo(*pCreateInfo->pTessellationState, pBaseHasher); } - if ((pInfo->immedInfo.rasterizerDiscardEnable != VK_TRUE) && - (pCreateInfo->pDepthStencilState != nullptr) && - (GetDepthFormat(pRenderPass, pCreateInfo->subpass) != VK_FORMAT_UNDEFINED)) + if (pCreateInfo->renderPass != VK_NULL_HANDLE) { - GenerateHashFromDepthStencilStateCreateInfo(*pCreateInfo->pDepthStencilState, &apiHasher); + pBaseHasher->Update(RenderPass::ObjectFromHandle(pCreateInfo->renderPass)->GetHash()); } - if ((pInfo->immedInfo.rasterizerDiscardEnable != VK_TRUE) && - (pCreateInfo->pColorBlendState != nullptr) && - (GetColorAttachmentCount(pRenderPass, pCreateInfo->subpass) != 0)) + pBaseHasher->Update(pCreateInfo->subpass); + + EXTRACT_VK_STRUCTURES_0( + discardRectangle, + PipelineDiscardRectangleStateCreateInfoEXT, + static_cast(pCreateInfo->pNext), + PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT) + if (pPipelineDiscardRectangleStateCreateInfoEXT != nullptr) { - GenerateHashFromColorBlendStateCreateInfo(*pCreateInfo->pColorBlendState, &baseHasher, &apiHasher); + pApiHasher->Update(pPipelineDiscardRectangleStateCreateInfoEXT->sType); + pApiHasher->Update(pPipelineDiscardRectangleStateCreateInfoEXT->flags); + pApiHasher->Update(pPipelineDiscardRectangleStateCreateInfoEXT->discardRectangleMode); + pApiHasher->Update(pPipelineDiscardRectangleStateCreateInfoEXT->discardRectangleCount); + + if (pPipelineDiscardRectangleStateCreateInfoEXT->pDiscardRectangles != nullptr) + { + for (uint32 i = 0; i < pPipelineDiscardRectangleStateCreateInfoEXT->discardRectangleCount; i++) + { + pApiHasher->Update(pPipelineDiscardRectangleStateCreateInfoEXT->pDiscardRectangles[i]); + } + } } +} - if (pCreateInfo->pDynamicState != nullptr) +// ===================================================================================================================== +void GraphicsPipelineCommon::GenerateHashForFragmentShaderState( + const VkGraphicsPipelineCreateInfo* pCreateInfo, + Util::MetroHash128* pBaseHasher, + Util::MetroHash128* pApiHasher) +{ + for (uint32_t i = 0; i < pCreateInfo->stageCount; ++i) { - GenerateHashFromDynamicStateCreateInfo(*pCreateInfo->pDynamicState, &apiHasher); + if (pCreateInfo->pStages[i].stage & FgsShaderMask) + { + GenerateHashFromShaderStageCreateInfo(pCreateInfo->pStages[i], pBaseHasher); + } } - baseHasher.Update(PipelineLayout::ObjectFromHandle(pCreateInfo->layout)->GetApiHash()); + if (pCreateInfo->layout != VK_NULL_HANDLE) + { + pBaseHasher->Update(PipelineLayout::ObjectFromHandle(pCreateInfo->layout)->GetApiHash()); + } - if (pCreateInfo->renderPass != VK_NULL_HANDLE) + if (pCreateInfo->pMultisampleState != nullptr) { - baseHasher.Update(RenderPass::ObjectFromHandle(pCreateInfo->renderPass)->GetHash()); + GenerateHashFromMultisampleStateCreateInfo(*pCreateInfo->pMultisampleState, pBaseHasher, pApiHasher); } - baseHasher.Update(pCreateInfo->subpass); + const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pCreateInfo->renderPass); - if ((pCreateInfo->flags & VK_PIPELINE_CREATE_DERIVATIVE_BIT) && (pCreateInfo->basePipelineHandle != VK_NULL_HANDLE)) + if ((pCreateInfo->pDepthStencilState != nullptr) && + (GetDepthFormat(pRenderPass, pCreateInfo->subpass) != VK_FORMAT_UNDEFINED)) { - apiHasher.Update(GraphicsPipeline::ObjectFromHandle(pCreateInfo->basePipelineHandle)->GetApiHash()); + GenerateHashFromDepthStencilStateCreateInfo(*pCreateInfo->pDepthStencilState, pApiHasher); } - apiHasher.Update(pCreateInfo->basePipelineIndex); + EXTRACT_VK_STRUCTURES_0( + variableRateShading, + PipelineFragmentShadingRateStateCreateInfoKHR, + static_cast(pCreateInfo->pNext), + PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR) - if (pCreateInfo->pNext != nullptr) + if (pPipelineFragmentShadingRateStateCreateInfoKHR != nullptr) { - const void* pNext = pCreateInfo->pNext; + pApiHasher->Update(pPipelineFragmentShadingRateStateCreateInfoKHR->fragmentSize.width); + pApiHasher->Update(pPipelineFragmentShadingRateStateCreateInfoKHR->fragmentSize.height); + pApiHasher->Update(pPipelineFragmentShadingRateStateCreateInfoKHR->combinerOps[0]); + pApiHasher->Update(pPipelineFragmentShadingRateStateCreateInfoKHR->combinerOps[1]); + } +} - while (pNext != nullptr) - { - const auto* pHeader = static_cast(pNext); +// ===================================================================================================================== +void GraphicsPipelineCommon::GenerateHashForFragmentOutputInterfaceState( + const VkGraphicsPipelineCreateInfo* pCreateInfo, + Util::MetroHash128* pBaseHasher, + Util::MetroHash128* pApiHasher) +{ - switch (static_cast(pHeader->sType)) - { - case VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_ADVANCED_STATE_CREATE_INFO_EXT: - { - const auto* pExtInfo = static_cast(pNext); - apiHasher.Update(pExtInfo->sType); - apiHasher.Update(pExtInfo->flags); - apiHasher.Update(pExtInfo->discardRectangleMode); - apiHasher.Update(pExtInfo->discardRectangleCount); + const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pCreateInfo->renderPass); - if (pExtInfo->pDiscardRectangles != nullptr) - { - for (uint32 i = 0; i < pExtInfo->discardRectangleCount; i++) - { - apiHasher.Update(pExtInfo->pDiscardRectangles[i]); - } - } + if ((pCreateInfo->pColorBlendState != nullptr) && + (GetColorAttachmentCount(pRenderPass, pCreateInfo->subpass) != 0)) - break; - } - default: - break; - } + { + GenerateHashFromColorBlendStateCreateInfo(*pCreateInfo->pColorBlendState, pBaseHasher, pApiHasher); + } - pNext = pHeader->pNext; - } + if (pCreateInfo->renderPass != VK_NULL_HANDLE) + { + pBaseHasher->Update(RenderPass::ObjectFromHandle(pCreateInfo->renderPass)->GetHash()); } + pBaseHasher->Update(pCreateInfo->subpass); +} + +// ===================================================================================================================== +// Generates the API PSO hash using the contents of the VkGraphicsPipelineCreateInfo struct +// Pipeline compilation affected by: +// - pCreateInfo->pStages +// - pCreateInfo->pVertexInputState +// - pCreateInfo->pInputAssemblyState +// - pCreateInfo->pTessellationState +// - pCreateInfo->pRasterizationState +// - pCreateInfo->pMultisampleState +// - pCreateInfo->pColorBlendState +// - pCreateInfo->layout +// - pCreateInfo->renderPass +// - pCreateInfo->subpass +uint64_t GraphicsPipelineCommon::BuildApiHash( + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const GraphicsPipelineObjectCreateInfo* pInfo) +{ + Util::MetroHash::Hash baseHash; + + Util::MetroHash128 baseHasher; + Util::MetroHash128 apiHasher; + + uint32_t dynamicStateFlags = GetDynamicStateFlags( + pCreateInfo->pDynamicState + ); + + baseHasher.Update(pCreateInfo->flags); + baseHasher.Update(dynamicStateFlags); + + const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pCreateInfo->renderPass); + + GenerateHashForVertexInputInterfaceState(pCreateInfo, &baseHasher, &apiHasher); + + GenerateHashForPreRasterizationShadersState(pCreateInfo, pInfo, &baseHasher, &apiHasher); + + const bool enableRasterization = + (pInfo->immedInfo.rasterizerDiscardEnable == false) || + IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::RasterizerDiscardEnableExt); + + if (enableRasterization) + { + GenerateHashForFragmentShaderState(pCreateInfo, &baseHasher, &apiHasher); + + GenerateHashForFragmentOutputInterfaceState(pCreateInfo, &baseHasher, &apiHasher); + } + + if ((pCreateInfo->flags & VK_PIPELINE_CREATE_DERIVATIVE_BIT) && (pCreateInfo->basePipelineHandle != VK_NULL_HANDLE)) + { + apiHasher.Update(GraphicsPipeline::ObjectFromHandle(pCreateInfo->basePipelineHandle)->GetApiHash()); + } + + apiHasher.Update(pCreateInfo->basePipelineIndex); + baseHasher.Finalize(reinterpret_cast(&baseHash)); uint64_t apiHash; diff --git a/icd/api/include/app_resource_optimizer.h b/icd/api/include/app_resource_optimizer.h index 58988bbe..1611f70e 100644 --- a/icd/api/include/app_resource_optimizer.h +++ b/icd/api/include/app_resource_optimizer.h @@ -75,12 +75,12 @@ struct ResourceOptimizerKey uint64_t apiHash; // Hash of the *CreateInfo struct }; -VK_INLINE static bool ResourceKeysEqual( +inline bool ResourceKeysEqual( const ResourceOptimizerKey& lhs, const ResourceOptimizerKey& rhs) { return (lhs.dimensions == rhs.dimensions) && (lhs.apiHash == rhs.apiHash); } -VK_INLINE static bool ResourceKeysNotEqual( +inline bool ResourceKeysNotEqual( const ResourceOptimizerKey& lhs, const ResourceOptimizerKey& rhs) { return (lhs.dimensions != rhs.dimensions) || (lhs.apiHash != rhs.apiHash); } diff --git a/icd/api/include/compiler_solution.h b/icd/api/include/compiler_solution.h index 908b29df..c4d80dd2 100644 --- a/icd/api/include/compiler_solution.h +++ b/icd/api/include/compiler_solution.h @@ -51,7 +51,7 @@ class DeferredHostOperation; enum FreeCompilerBinary : uint32_t { - FreeWithCompiler, + FreeWithCompiler = 0, FreeWithInstanceAllocator, DoNotFree }; @@ -162,6 +162,7 @@ class CompilerSolution Vkgc::PipelineShaderInfo** ppShadersInfo, void* pPipelineDumpHandle, uint64_t pipelineHash, + Util::MetroHash::Hash* pCacheId, int64_t* pCompileTime) = 0; virtual VkResult CreateComputePipelineBinary( @@ -173,6 +174,7 @@ class CompilerSolution const void** ppPipelineBinary, void* pPipelineDumpHandle, uint64_t pipelineHash, + Util::MetroHash::Hash* pCacheId, int64_t* pCompileTime) = 0; virtual void FreeGraphicsPipelineBinary( diff --git a/icd/api/include/compiler_solution_llpc.h b/icd/api/include/compiler_solution_llpc.h index 7595481b..d7efd5e1 100644 --- a/icd/api/include/compiler_solution_llpc.h +++ b/icd/api/include/compiler_solution_llpc.h @@ -87,6 +87,7 @@ class CompilerSolutionLlpc final : public CompilerSolution Vkgc::PipelineShaderInfo** ppShadersInfo, void* pPipelineDumpHandle, uint64_t pipelineHash, + Util::MetroHash::Hash* pCacheId, int64_t* pCompileTime) override; virtual VkResult CreateComputePipelineBinary( @@ -98,6 +99,7 @@ class CompilerSolutionLlpc final : public CompilerSolution const void** ppPipelineBinary, void* pPipelineDumpHandle, uint64_t pipelineHash, + Util::MetroHash::Hash* pCacheId, int64_t* pCompileTime) override; virtual void FreeGraphicsPipelineBinary( diff --git a/icd/api/include/graphics_pipeline_common.h b/icd/api/include/graphics_pipeline_common.h index 788e01ab..d0640a96 100644 --- a/icd/api/include/graphics_pipeline_common.h +++ b/icd/api/include/graphics_pipeline_common.h @@ -37,6 +37,7 @@ namespace vk { +class PipelineCache; struct PipelineOptimizerKey; struct GraphicsPipelineBinaryCreateInfo; struct GraphicsPipelineShaderStageInfo; @@ -66,6 +67,8 @@ struct VbBindingInfo struct UberFetchShaderBufferInfo { + bool requirePerIntanceFetch; + bool requirePerCompFetch; uint32_t userDataOffset; uint32_t bufferSize; uint32_t bufferData[Vkgc::MaxFetchShaderInternalBufferSize]; @@ -127,7 +130,6 @@ struct GraphicsPipelineObjectCreateInfo Pal::DepthStencilStateCreateInfo ds; GraphicsPipelineObjectImmedInfo immedInfo; uint32_t staticStateMask; - const PipelineLayout* pLayout; uint32_t sampleCoverage; VkShaderStageFlagBits activeStages; VkFormat dbFormat; @@ -163,9 +165,17 @@ struct GraphicsPipelineBinaryInfo class GraphicsPipelineCommon : public Pipeline { public: + // Create an executable graphics pipline or graphics pipeline library + static VkResult Create( + Device* pDevice, + PipelineCache* pPipelineCache, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPipeline* pPipeline); + // Get the active shader stages through API info static VkShaderStageFlagBits GetActiveShaderStages( - const VkGraphicsPipelineCreateInfo* pGraphicsPipelineCreateInfo + const VkGraphicsPipelineCreateInfo* pGraphicsPipelineCreateInfo ); // Returns true if Dual Source Blending is to be enabled based on the given ColorBlendAttachmentState @@ -179,7 +189,7 @@ class GraphicsPipelineCommon : public Pipeline // Get the dynamics states specified by API info static uint32_t GetDynamicStateFlags( - const VkPipelineDynamicStateCreateInfo* pDy + const VkPipelineDynamicStateCreateInfo* pDy ); protected: @@ -187,6 +197,7 @@ class GraphicsPipelineCommon : public Pipeline static VkResult BuildPipelineBinaryCreateInfo( const Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, + const PipelineLayout* pPipelineLayout, GraphicsPipelineBinaryCreateInfo* pBinInfo, GraphicsPipelineShaderStageInfo* pShaderInfo, VbInfo* pVbInfo, @@ -198,6 +209,7 @@ class GraphicsPipelineCommon : public Pipeline const VkGraphicsPipelineCreateInfo* pIn, const VbInfo* pVbInfo, const GraphicsPipelineBinaryInfo* pBinInfo, + const PipelineLayout* pPipelineLayout, GraphicsPipelineObjectCreateInfo* pObjInfo); // Generates the API PSO hash using the contents of the VkGraphicsPipelineCreateInfo struct @@ -205,6 +217,32 @@ class GraphicsPipelineCommon : public Pipeline const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineObjectCreateInfo* pInfo); + // Generate API PSO hash for state of vertex input interface section + static void GenerateHashForVertexInputInterfaceState( + const VkGraphicsPipelineCreateInfo* pCreateInfo, + Util::MetroHash128* pBaseHasher, + Util::MetroHash128* pApiHasher); + + // Generate API PSO hash for state of pre-rasterization shaders section + static void GenerateHashForPreRasterizationShadersState( + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const GraphicsPipelineObjectCreateInfo* pInfo, + Util::MetroHash128* pBaseHasher, + Util::MetroHash128* pApiHasher); + + // Generate API PSO hash for state of fragment shader section + static void GenerateHashForFragmentShaderState( + const VkGraphicsPipelineCreateInfo* pCreateInfo, + Util::MetroHash128* pBaseHasher, + Util::MetroHash128* pApiHasher); + + // Generate API PSO hash for state of fragment output interface section + static void GenerateHashForFragmentOutputInterfaceState( + const VkGraphicsPipelineCreateInfo* pCreateInfo, + Util::MetroHash128* pBaseHasher, + Util::MetroHash128* pApiHasher); + + // Constructor of GraphicsPipelineCommon GraphicsPipelineCommon( Device* const pDevice) : Pipeline( diff --git a/icd/api/include/internal_mem_mgr.h b/icd/api/include/internal_mem_mgr.h index f8c837fc..4965d04d 100644 --- a/icd/api/include/internal_mem_mgr.h +++ b/icd/api/include/internal_mem_mgr.h @@ -122,7 +122,7 @@ struct InternalMemoryPool class InternalMemory { public: - VK_INLINE InternalMemory(); + inline InternalMemory(); Pal::IGpuMemory* PalMemory(int32_t idx) { diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_beta.h b/icd/api/include/khronos/sdk-1.2/vulkan_beta.h index c615bb35..e2337adf 100644 --- a/icd/api/include/khronos/sdk-1.2/vulkan_beta.h +++ b/icd/api/include/khronos/sdk-1.2/vulkan_beta.h @@ -22,7 +22,7 @@ extern "C" { #define VK_KHR_video_queue 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkVideoSessionKHR) VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkVideoSessionParametersKHR) -#define VK_KHR_VIDEO_QUEUE_SPEC_VERSION 1 +#define VK_KHR_VIDEO_QUEUE_SPEC_VERSION 2 #define VK_KHR_VIDEO_QUEUE_EXTENSION_NAME "VK_KHR_video_queue" typedef enum VkQueryResultStatusKHR { @@ -66,12 +66,12 @@ typedef enum VkVideoComponentBitDepthFlagBitsKHR { } VkVideoComponentBitDepthFlagBitsKHR; typedef VkFlags VkVideoComponentBitDepthFlagsKHR; -typedef enum VkVideoCapabilitiesFlagBitsKHR { - VK_VIDEO_CAPABILITIES_PROTECTED_CONTENT_BIT_KHR = 0x00000001, - VK_VIDEO_CAPABILITIES_SEPARATE_REFERENCE_IMAGES_BIT_KHR = 0x00000002, - VK_VIDEO_CAPABILITIES_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF -} VkVideoCapabilitiesFlagBitsKHR; -typedef VkFlags VkVideoCapabilitiesFlagsKHR; +typedef enum VkVideoCapabilityFlagBitsKHR { + VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR = 0x00000001, + VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR = 0x00000002, + VK_VIDEO_CAPABILITY_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF +} VkVideoCapabilityFlagBitsKHR; +typedef VkFlags VkVideoCapabilityFlagsKHR; typedef enum VkVideoSessionCreateFlagBitsKHR { VK_VIDEO_SESSION_CREATE_DEFAULT_KHR = 0, @@ -120,16 +120,16 @@ typedef struct VkVideoProfilesKHR { } VkVideoProfilesKHR; typedef struct VkVideoCapabilitiesKHR { - VkStructureType sType; - void* pNext; - VkVideoCapabilitiesFlagsKHR capabilityFlags; - VkDeviceSize minBitstreamBufferOffsetAlignment; - VkDeviceSize minBitstreamBufferSizeAlignment; - VkExtent2D videoPictureExtentGranularity; - VkExtent2D minExtent; - VkExtent2D maxExtent; - uint32_t maxReferencePicturesSlotsCount; - uint32_t maxReferencePicturesActiveCount; + VkStructureType sType; + void* pNext; + VkVideoCapabilityFlagsKHR capabilityFlags; + VkDeviceSize minBitstreamBufferOffsetAlignment; + VkDeviceSize minBitstreamBufferSizeAlignment; + VkExtent2D videoPictureExtentGranularity; + VkExtent2D minExtent; + VkExtent2D maxExtent; + uint32_t maxReferencePicturesSlotsCount; + uint32_t maxReferencePicturesActiveCount; } VkVideoCapabilitiesKHR; typedef struct VkPhysicalDeviceVideoFormatInfoKHR { @@ -433,10 +433,10 @@ VKAPI_ATTR void VKAPI_CALL vkCmdEncodeVideoKHR( #define VK_EXT_video_encode_h264 1 #include "vk_video/vulkan_video_codec_h264std.h" #include "vk_video/vulkan_video_codec_h264std_encode.h" -#define VK_EXT_VIDEO_ENCODE_H264_SPEC_VERSION 1 +#define VK_EXT_VIDEO_ENCODE_H264_SPEC_VERSION 2 #define VK_EXT_VIDEO_ENCODE_H264_EXTENSION_NAME "VK_EXT_video_encode_h264" -typedef enum VkVideoEncodeH264CapabilitiesFlagBitsEXT { +typedef enum VkVideoEncodeH264CapabilityFlagBitsEXT { VK_VIDEO_ENCODE_H264_CAPABILITY_CABAC_BIT_EXT = 0x00000001, VK_VIDEO_ENCODE_H264_CAPABILITY_CAVLC_BIT_EXT = 0x00000002, VK_VIDEO_ENCODE_H264_CAPABILITY_WEIGHTED_BI_PRED_IMPLICIT_BIT_EXT = 0x00000004, @@ -448,9 +448,9 @@ typedef enum VkVideoEncodeH264CapabilitiesFlagBitsEXT { VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_PARTIAL_BIT_EXT = 0x00000100, VK_VIDEO_ENCODE_H264_CAPABILITY_MULTIPLE_SLICE_PER_FRAME_BIT_EXT = 0x00000200, VK_VIDEO_ENCODE_H264_CAPABILITY_EVENLY_DISTRIBUTED_SLICE_SIZE_BIT_EXT = 0x00000400, - VK_VIDEO_ENCODE_H264_CAPABILITIES_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF -} VkVideoEncodeH264CapabilitiesFlagBitsEXT; -typedef VkFlags VkVideoEncodeH264CapabilitiesFlagsEXT; + VK_VIDEO_ENCODE_H264_CAPABILITY_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkVideoEncodeH264CapabilityFlagBitsEXT; +typedef VkFlags VkVideoEncodeH264CapabilityFlagsEXT; typedef enum VkVideoEncodeH264InputModeFlagBitsEXT { VK_VIDEO_ENCODE_H264_INPUT_MODE_FRAME_BIT_EXT = 0x00000001, @@ -475,19 +475,19 @@ typedef enum VkVideoEncodeH264CreateFlagBitsEXT { } VkVideoEncodeH264CreateFlagBitsEXT; typedef VkFlags VkVideoEncodeH264CreateFlagsEXT; typedef struct VkVideoEncodeH264CapabilitiesEXT { - VkStructureType sType; - const void* pNext; - VkVideoEncodeH264CapabilitiesFlagsEXT flags; - VkVideoEncodeH264InputModeFlagsEXT inputModeFlags; - VkVideoEncodeH264OutputModeFlagsEXT outputModeFlags; - VkExtent2D minPictureSizeInMbs; - VkExtent2D maxPictureSizeInMbs; - VkExtent2D inputImageDataAlignment; - uint8_t maxNumL0ReferenceForP; - uint8_t maxNumL0ReferenceForB; - uint8_t maxNumL1Reference; - uint8_t qualityLevelCount; - VkExtensionProperties stdExtensionVersion; + VkStructureType sType; + const void* pNext; + VkVideoEncodeH264CapabilityFlagsEXT flags; + VkVideoEncodeH264InputModeFlagsEXT inputModeFlags; + VkVideoEncodeH264OutputModeFlagsEXT outputModeFlags; + VkExtent2D minPictureSizeInMbs; + VkExtent2D maxPictureSizeInMbs; + VkExtent2D inputImageDataAlignment; + uint8_t maxNumL0ReferenceForP; + uint8_t maxNumL0ReferenceForB; + uint8_t maxNumL1Reference; + uint8_t qualityLevelCount; + VkExtensionProperties stdExtensionVersion; } VkVideoEncodeH264CapabilitiesEXT; typedef struct VkVideoEncodeH264SessionCreateInfoEXT { @@ -567,22 +567,22 @@ typedef struct VkVideoEncodeH264ProfileEXT { #define VK_EXT_video_decode_h264 1 #include "vk_video/vulkan_video_codec_h264std_decode.h" -#define VK_EXT_VIDEO_DECODE_H264_SPEC_VERSION 1 +#define VK_EXT_VIDEO_DECODE_H264_SPEC_VERSION 3 #define VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME "VK_EXT_video_decode_h264" -typedef enum VkVideoDecodeH264FieldLayoutFlagBitsEXT { - VK_VIDEO_DECODE_H264_PROGRESSIVE_PICTURES_ONLY_EXT = 0, - VK_VIDEO_DECODE_H264_FIELD_LAYOUT_LINE_INTERLACED_PLANE_BIT_EXT = 0x00000001, - VK_VIDEO_DECODE_H264_FIELD_LAYOUT_SEPARATE_INTERLACED_PLANE_BIT_EXT = 0x00000002, - VK_VIDEO_DECODE_H264_FIELD_LAYOUT_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF -} VkVideoDecodeH264FieldLayoutFlagBitsEXT; -typedef VkFlags VkVideoDecodeH264FieldLayoutFlagsEXT; +typedef enum VkVideoDecodeH264PictureLayoutFlagBitsEXT { + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_PROGRESSIVE_EXT = 0, + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_INTERLACED_INTERLEAVED_LINES_BIT_EXT = 0x00000001, + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_INTERLACED_SEPARATE_PLANES_BIT_EXT = 0x00000002, + VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkVideoDecodeH264PictureLayoutFlagBitsEXT; +typedef VkFlags VkVideoDecodeH264PictureLayoutFlagsEXT; typedef VkFlags VkVideoDecodeH264CreateFlagsEXT; typedef struct VkVideoDecodeH264ProfileEXT { - VkStructureType sType; - const void* pNext; - StdVideoH264ProfileIdc stdProfileIdc; - VkVideoDecodeH264FieldLayoutFlagsEXT fieldLayout; + VkStructureType sType; + const void* pNext; + StdVideoH264ProfileIdc stdProfileIdc; + VkVideoDecodeH264PictureLayoutFlagsEXT pictureLayout; } VkVideoDecodeH264ProfileEXT; typedef struct VkVideoDecodeH264CapabilitiesEXT { diff --git a/icd/api/include/khronos/sdk-1.2/vulkan_core.h b/icd/api/include/khronos/sdk-1.2/vulkan_core.h index 8166e173..0e081aaf 100644 --- a/icd/api/include/khronos/sdk-1.2/vulkan_core.h +++ b/icd/api/include/khronos/sdk-1.2/vulkan_core.h @@ -72,7 +72,7 @@ extern "C" { #define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0 // Version of this file -#define VK_HEADER_VERSION 185 +#define VK_HEADER_VERSION 188 // Complete version of this file #define VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION(0, 1, 2, VK_HEADER_VERSION) @@ -1706,13 +1706,15 @@ typedef enum VkAttachmentLoadOp { VK_ATTACHMENT_LOAD_OP_LOAD = 0, VK_ATTACHMENT_LOAD_OP_CLEAR = 1, VK_ATTACHMENT_LOAD_OP_DONT_CARE = 2, + VK_ATTACHMENT_LOAD_OP_NONE_EXT = 1000400000, VK_ATTACHMENT_LOAD_OP_MAX_ENUM = 0x7FFFFFFF } VkAttachmentLoadOp; typedef enum VkAttachmentStoreOp { VK_ATTACHMENT_STORE_OP_STORE = 0, VK_ATTACHMENT_STORE_OP_DONT_CARE = 1, - VK_ATTACHMENT_STORE_OP_NONE_QCOM = 1000301000, + VK_ATTACHMENT_STORE_OP_NONE_EXT = 1000301000, + VK_ATTACHMENT_STORE_OP_NONE_QCOM = VK_ATTACHMENT_STORE_OP_NONE_EXT, VK_ATTACHMENT_STORE_OP_MAX_ENUM = 0x7FFFFFFF } VkAttachmentStoreOp; @@ -5282,6 +5284,7 @@ typedef enum VkDriverId { VK_DRIVER_ID_MOLTENVK = 14, VK_DRIVER_ID_COREAVI_PROPRIETARY = 15, VK_DRIVER_ID_JUICE_PROPRIETARY = 16, + VK_DRIVER_ID_VERISILICON_PROPRIETARY = 17, VK_DRIVER_ID_AMD_PROPRIETARY_KHR = VK_DRIVER_ID_AMD_PROPRIETARY, VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR = VK_DRIVER_ID_AMD_OPEN_SOURCE, VK_DRIVER_ID_MESA_RADV_KHR = VK_DRIVER_ID_MESA_RADV, @@ -10133,9 +10136,10 @@ typedef VkGeometryFlagBitsKHR VkGeometryFlagBitsNV; typedef enum VkGeometryInstanceFlagBitsKHR { VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR = 0x00000001, - VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR = 0x00000002, + VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR = 0x00000002, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR = 0x00000004, VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR = 0x00000008, + VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR = VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR, VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NV = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_NV = VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_NV = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, @@ -12666,9 +12670,14 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDrawMultiIndexedEXT( #endif +#define VK_EXT_load_store_op_none 1 +#define VK_EXT_LOAD_STORE_OP_NONE_SPEC_VERSION 1 +#define VK_EXT_LOAD_STORE_OP_NONE_EXTENSION_NAME "VK_EXT_load_store_op_none" + + #define VK_KHR_acceleration_structure 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkAccelerationStructureKHR) -#define VK_KHR_ACCELERATION_STRUCTURE_SPEC_VERSION 11 +#define VK_KHR_ACCELERATION_STRUCTURE_SPEC_VERSION 12 #define VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME "VK_KHR_acceleration_structure" typedef enum VkBuildAccelerationStructureModeKHR { diff --git a/icd/api/include/pipeline_binary_cache.h b/icd/api/include/pipeline_binary_cache.h index b83ef8a7..30211ee6 100644 --- a/icd/api/include/pipeline_binary_cache.h +++ b/icd/api/include/pipeline_binary_cache.h @@ -147,10 +147,10 @@ class PipelineBinaryCache CacheId* GetCacheIdForPipeline( const Pal::PipelineHash* pInternalPipelineHash); - VK_INLINE HashMapping::Iterator GetHashMappingIterator() + HashMapping::Iterator GetHashMappingIterator() { return m_hashMapping.Begin(); } - VK_INLINE Util::RWLock* GetHashMappingLock() + Util::RWLock* GetHashMappingLock() { return &m_hashMappingLock; } #endif diff --git a/icd/api/include/pipeline_compiler.h b/icd/api/include/pipeline_compiler.h index 872efed2..466d3351 100644 --- a/icd/api/include/pipeline_compiler.h +++ b/icd/api/include/pipeline_compiler.h @@ -54,6 +54,31 @@ struct ShaderModuleHandle; class PipelineBinaryCache; +// ===================================================================================================================== +// Make sure that our internal values are the same as the VK values +static_assert((1 << ShaderStage::ShaderStageVertex) == VK_SHADER_STAGE_VERTEX_BIT, + "Internal vertex shader stage value is different from that defined in Vulkan."); +static_assert((1 << ShaderStage::ShaderStageTessControl) == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, + "Internal tessellation control shader stage value is different from that defined in Vulkan."); +static_assert((1 << ShaderStage::ShaderStageTessEval) == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, + "Internal tessellation evaluation stage value is different from that defined in Vulkan."); +static_assert((1 << ShaderStage::ShaderStageGeometry) == VK_SHADER_STAGE_GEOMETRY_BIT, + "Internal geometry shader stage value is different from that defined in Vulkan."); +static_assert((1 << ShaderStage::ShaderStageFragment) == VK_SHADER_STAGE_FRAGMENT_BIT, + "Internal fragment shader stage value is different from that defined in Vulkan."); + +// ===================================================================================================================== +// The shader stages of Pre-Rasterization Shaders section +constexpr uint32_t PrsShaderMask = 0 + | ((1 << ShaderStage::ShaderStageVertex) + | (1 << ShaderStage::ShaderStageTessControl) + | (1 << ShaderStage::ShaderStageTessEval) + | (1 << ShaderStage::ShaderStageGeometry)); + +// ===================================================================================================================== +// The shader stages of Fragment Shader (Post-Rasterization) section +constexpr uint32_t FgsShaderMask = (1 << ShaderStage::ShaderStageFragment); + // ===================================================================================================================== struct ShaderStageInfo { @@ -114,6 +139,12 @@ class PipelineCompiler const void* pCode, ShaderModuleHandle* pModule); + bool IsValidShaderModule( + const ShaderModuleHandle* pShaderModule) const; + + void FreeShaderModule( + ShaderModuleHandle* pShaderModule); + virtual VkResult CreatePartialPipelineBinary( uint32_t deviceIdx, void* pShaderModuleData, @@ -159,6 +190,7 @@ class PipelineCompiler const Device* pDevice, const VkGraphicsPipelineCreateInfo* pIn, const GraphicsPipelineShaderStageInfo* pShaderInfo, + const PipelineLayout* pPipelineLayout, GraphicsPipelineBinaryCreateInfo* pCreateInfo, VbInfo* pVbInfo); @@ -168,10 +200,6 @@ class PipelineCompiler const ComputePipelineShaderStageInfo* pShaderInfo, ComputePipelineBinaryCreateInfo* pInfo); - bool IsValidShaderModule(const ShaderModuleHandle* pShaderModule) const; - - void FreeShaderModule(ShaderModuleHandle* pShaderModule); - void FreeComputePipelineBinary( ComputePipelineBinaryCreateInfo* pCreateInfo, const void* pPipelineBinary, @@ -205,7 +233,7 @@ class PipelineCompiler Vkgc::PipelineShaderOptions* pShaderOptions ) const; - VK_INLINE Vkgc::GfxIpVersion& GetGfxIp() { return m_gfxIp; } + Vkgc::GfxIpVersion& GetGfxIp() { return m_gfxIp; } void GetElfCacheMetricString(char* pOutStr, size_t outStrSize); @@ -216,6 +244,20 @@ class PipelineCompiler bool isDynamicStride, UberFetchShaderBufferInfo* pFetchShaderBufferInfo); + void GetComputePipelineCacheId( + uint32_t deviceIdx, + ComputePipelineBinaryCreateInfo* pCreateInfo, + uint64_t pipelineHash, + const Util::MetroHash::Hash& settingsHash, + Util::MetroHash::Hash* pCacheId); + + void GetGraphicsPipelineCacheId( + uint32_t deviceIdx, + GraphicsPipelineBinaryCreateInfo* pCreateInfo, + uint64_t pipelineHash, + const Util::MetroHash::Hash& settingsHash, + Util::MetroHash::Hash* pCacheId); + private: PAL_DISALLOW_COPY_AND_ASSIGN(PipelineCompiler); @@ -282,8 +324,6 @@ class PipelineCompiler const VkStructHeader* pHeader, const VkPipelineCreationFeedbackCreateInfoEXT** ppPipelineCreationFeadbackCreateInfo); - static VkPipelineCreateFlags GetCacheIdControlFlags( - VkPipelineCreateFlags in); }; // class PipelineCompiler } // namespce vk diff --git a/icd/api/include/render_state_cache.h b/icd/api/include/render_state_cache.h index 405d4254..7c6d1917 100644 --- a/icd/api/include/render_state_cache.h +++ b/icd/api/include/render_state_cache.h @@ -215,7 +215,7 @@ class RenderStateCache uint32_t token, ParamHashMap* pMap); - VK_INLINE bool IsEnabled(uint32_t staticStateFlag) const; + bool IsEnabled(uint32_t staticStateFlag) const; Pal::Result AllocMem( size_t size, diff --git a/icd/api/include/vk_buffer.h b/icd/api/include/vk_buffer.h index cc5bee18..0d66f92a 100644 --- a/icd/api/include/vk_buffer.h +++ b/icd/api/include/vk_buffer.h @@ -78,10 +78,15 @@ class Buffer final : public NonDispatchable VkDeviceSize memOffset, const uint32_t* pDeviceIndices); - VkResult GetMemoryRequirements( + void GetMemoryRequirements( const Device* pDevice, VkMemoryRequirements* pMemoryRequirements); + static void CalculateMemoryRequirements( + const Device* pDevice, + const VkBufferCreateInfo* pCreateInfo, + VkMemoryRequirements* pMemoryRequirements); + VkDeviceSize GetSize() const { return m_size; } @@ -130,13 +135,8 @@ class Buffer final : public NonDispatchable Buffer(Device* pDevice, const VkAllocationCallbacks* pAllocator, - VkBufferCreateFlags flags, - VkBufferUsageFlags usage, + const VkBufferCreateInfo* pCreateInfo, Pal::IGpuMemory** pGpuMemory, - VkSharingMode sharingMode, - uint32_t queueFamilyIndexCount, - const uint32_t* pQueueFamilyIndices, - VkDeviceSize size, BufferFlags internalFlags); // Compute size required for the object. One copy of PerGpuInfo is included in the object and we need @@ -147,13 +147,23 @@ class Buffer final : public NonDispatchable } static void LogBufferCreate( - VkDeviceSize size, const VkBufferCreateInfo* pCreateInfo, VkBuffer buffer, const Device* pDevice); void LogGpuMemoryBind(const Device* pDevice, const Pal::IGpuMemory* pPalMemory, VkDeviceSize memOffset) const; + static void GetBufferMemoryRequirements( + const Device* pDevice, + const BufferFlags* pBufferFlags, + const VkDeviceSize size, + VkMemoryRequirements* pMemoryRequirements); + + static void CalculateBufferFlags( + const Device* pDevice, + const VkBufferCreateInfo* pCreateInfo, + BufferFlags* pBufferFlags); + const VkDeviceSize m_size; VkDeviceSize m_memOffset; BufferBarrierPolicy m_barrierPolicy; // Barrier policy to use for this buffer @@ -165,7 +175,7 @@ class Buffer final : public NonDispatchable }; // ===================================================================================================================== -VK_INLINE Pal::gpusize GetBufferAddress( +inline Pal::gpusize GetBufferAddress( uint32_t deviceIndex, VkBuffer buffer, VkDeviceSize offset) @@ -181,7 +191,7 @@ VK_INLINE Pal::gpusize GetBufferAddress( } // ===================================================================================================================== -VK_INLINE Pal::gpusize GetBufferSize( +inline Pal::gpusize GetBufferSize( VkBuffer buffer, VkDeviceSize offset) { diff --git a/icd/api/include/vk_cmd_pool.h b/icd/api/include/vk_cmd_pool.h index 6bcb8642..489df175 100644 --- a/icd/api/include/vk_cmd_pool.h +++ b/icd/api/include/vk_cmd_pool.h @@ -77,7 +77,7 @@ class CmdPool final : public NonDispatchable void UnregisterCmdBuffer(CmdBuffer* pCmdBuffer); - VK_INLINE uint32_t GetQueueFamilyIndex() const { return m_queueFamilyIndex; } + uint32_t GetQueueFamilyIndex() const { return m_queueFamilyIndex; } const VkAllocationCallbacks* GetCmdPoolAllocator() const { return m_pAllocator; } diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h index 4f6e4be7..9cdaef88 100644 --- a/icd/api/include/vk_cmdbuffer.h +++ b/icd/api/include/vk_cmdbuffer.h @@ -718,7 +718,7 @@ class CmdBuffer void CmdBeginConditionalRendering(const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin); void CmdEndConditionalRendering(); - VK_INLINE void SetDeviceMask(uint32_t deviceMask) + void SetDeviceMask(uint32_t deviceMask) { // Ensure we are enabling valid devices within the group VK_ASSERT((m_pDevice->GetPalDeviceMask() & deviceMask) == deviceMask); @@ -733,12 +733,12 @@ class CmdBuffer m_curDeviceMask = deviceMask; } - VK_INLINE uint32_t GetDeviceMask() const + uint32_t GetDeviceMask() const { return m_curDeviceMask; } - VK_INLINE void SetRpDeviceMask(uint32_t deviceMask) + void SetRpDeviceMask(uint32_t deviceMask) { VK_ASSERT(deviceMask != 0); @@ -748,17 +748,17 @@ class CmdBuffer m_rpDeviceMask = deviceMask; } - VK_INLINE uint32_t GetRpDeviceMask() const + uint32_t GetRpDeviceMask() const { return m_rpDeviceMask; } - VK_INLINE uint32_t GetBeginDeviceMask() const + uint32_t GetBeginDeviceMask() const { return m_cbBeginDeviceMask; } - VK_INLINE bool IsProtected() const + bool IsProtected() const { return m_pCmdPool->IsProtected(); } @@ -771,7 +771,7 @@ class CmdBuffer VK_FORCEINLINE Instance* VkInstance(void) const { return m_pDevice->VkInstance(); } - VK_INLINE Pal::ICmdBuffer* PalCmdBuffer( + Pal::ICmdBuffer* PalCmdBuffer( int32_t idx) const { VK_ASSERT((idx >= 0) && (idx < static_cast(MaxPalDevices))); @@ -798,7 +798,7 @@ class CmdBuffer const Pal::CmdBufferBuildInfo& cmdInfo); Pal::Result PalCmdBufferEnd(); - Pal::Result PalCmdBufferReset(Pal::ICmdAllocator* pCmdAllocator, bool returnGpuMemory); + Pal::Result PalCmdBufferReset(bool returnGpuMemory); void PalCmdBufferDestroy(); @@ -918,7 +918,6 @@ class CmdBuffer EventContainer_T* pEvent, Pal::HwPipePoint resetPoint); - template< bool regionPerDevice > void PalCmdResolveImage( const Image& srcImage, Pal::ImageLayout srcImageLayout, @@ -935,17 +934,17 @@ class CmdBuffer void PalCmdBindMsaaStates(const Pal::IMsaaState* const * pStates); - VK_INLINE void PalCmdBindMsaaState( + inline void PalCmdBindMsaaState( Pal::ICmdBuffer* pPalCmdBuf, uint32_t deviceIdx, const Pal::IMsaaState* pState); - VK_INLINE void PalCmdBindColorBlendState( + inline void PalCmdBindColorBlendState( Pal::ICmdBuffer* pPalCmdBuf, uint32_t deviceIdx, const Pal::IColorBlendState* pState); - VK_INLINE void PalCmdBindDepthStencilState( + inline void PalCmdBindDepthStencilState( Pal::ICmdBuffer* pPalCmdBuf, uint32_t deviceIdx, const Pal::IDepthStencilState* pState); @@ -954,37 +953,37 @@ class CmdBuffer uint32_t numSamplesPerPixel, const Pal::MsaaQuadSamplePattern& quadSamplePattern); - VK_INLINE void PalCmdBufferSetUserData( + inline void PalCmdBufferSetUserData( Pal::PipelineBindPoint bindPoint, uint32_t firstEntry, uint32_t entryCount, uint32_t perDeviceStride, const uint32_t* pEntryValues); - VK_INLINE void PalCmdSuspendPredication( + void PalCmdSuspendPredication( bool suspend); template< typename EventContainer_T > - VK_INLINE void InsertDeviceEvents( + void InsertDeviceEvents( const Pal::IGpuEvent** pDestEvents, const EventContainer_T* pSrcEvents, uint32_t index, uint32_t stride) const; - VK_INLINE uint32_t NumDeviceEvents(uint32_t numEvents) const + uint32_t NumDeviceEvents(uint32_t numEvents) const { return m_numPalDevices * numEvents; } #if VK_ENABLE_DEBUG_BARRIERS - VK_INLINE void DbgBarrierPreCmd(uint32_t cmd) + void DbgBarrierPreCmd(uint32_t cmd) { if (m_dbgBarrierPreCmdMask & (cmd)) { DbgCmdBarrier(true); } } - VK_INLINE void DbgBarrierPostCmd(uint32_t cmd) + void DbgBarrierPostCmd(uint32_t cmd) { if (m_dbgBarrierPostCmdMask & (cmd)) { @@ -992,16 +991,19 @@ class CmdBuffer } } #else - VK_INLINE void DbgBarrierPreCmd(uint32_t cmd) {} - VK_INLINE void DbgBarrierPostCmd(uint32_t cmd) {} + void DbgBarrierPreCmd(uint32_t cmd) {} + void DbgBarrierPostCmd(uint32_t cmd) {} #endif SqttCmdBufferState* GetSqttState() { return m_pSqttState; } - VK_INLINE static bool IsStaticStateDifferent( - uint32_t oldToken, - uint32_t newToken); + static bool IsStaticStateDifferent( + uint32_t currentToken, + uint32_t newToken) + { + return ((currentToken != newToken) || (currentToken == DynamicRenderStateToken)); + } static PFN_vkCmdBindDescriptorSets GetCmdBindDescriptorSetsFunc(const Device* pDevice); @@ -1050,7 +1052,7 @@ class CmdBuffer void ResetState(); - VK_INLINE void CalcCounterBufferAddrs( + void CalcCounterBufferAddrs( uint32_t firstCounterBuffer, uint32_t counterBufferCount, const VkBuffer* pCounterBuffers, @@ -1095,8 +1097,8 @@ class CmdBuffer Pal::PipelineBindPoint palBindPoint, RebindUserDataFlags flags); - VK_INLINE void RPBeginSubpass(); - VK_INLINE void RPEndSubpass(); + void RPBeginSubpass(); + void RPEndSubpass(); void RPResolveAttachments(uint32_t count, const RPResolveInfo* pResolves); void RPSyncPoint(const RPSyncPointInfo& syncPoint, VirtualStackFrame* pVirtStack); void RPLoadOpClearColor(uint32_t count, const RPLoadOpClearInfo* pClears); @@ -1106,8 +1108,24 @@ class CmdBuffer void RPInitSamplePattern(); - VK_INLINE Pal::ImageLayout RPGetAttachmentLayout(uint32_t attachment, uint32_t plane); - VK_INLINE void RPSetAttachmentLayout(uint32_t attachment, uint32_t plane, Pal::ImageLayout layout); + Pal::ImageLayout RPGetAttachmentLayout( + uint32_t attachment, + uint32_t plane) + { + VK_ASSERT(attachment < m_allGpuState.pRenderPass->GetAttachmentCount()); + VK_ASSERT(attachment < m_renderPassInstance.maxAttachmentCount); + return m_renderPassInstance.pAttachments[attachment].planeLayout[plane]; + } + + void RPSetAttachmentLayout( + uint32_t attachment, + uint32_t plane, + Pal::ImageLayout layout) + { + VK_ASSERT(attachment < m_allGpuState.pRenderPass->GetAttachmentCount()); + VK_ASSERT(attachment < m_renderPassInstance.maxAttachmentCount); + m_renderPassInstance.pAttachments[attachment].planeLayout[plane] = layout; + } void FillTimestampQueryPool( const TimestampQueryPool& timestampQueryPool, @@ -1145,17 +1163,20 @@ class CmdBuffer template static PFN_vkCmdBindDescriptorSets GetCmdBindDescriptorSetsFunc(const Device* pDevice); - VK_INLINE bool PalPipelineBindingOwnedBy( + bool PalPipelineBindingOwnedBy( Pal::PipelineBindPoint palBind, PipelineBindPoint apiBind - ) const; + ) const + { + return m_allGpuState.palToApiPipeline[static_cast(palBind)] == apiBind; + } - VK_INLINE static void ConvertPipelineBindPoint( + static void ConvertPipelineBindPoint( VkPipelineBindPoint pipelineBindPoint, Pal::PipelineBindPoint* pPalBindPoint, PipelineBindPoint* pApiBind); - VK_INLINE void WritePushConstants( + void WritePushConstants( PipelineBindPoint apiBindPoint, Pal::PipelineBindPoint palBindPoint, const PipelineLayout* pLayout, @@ -1167,7 +1188,7 @@ class CmdBuffer void ResetVertexBuffer(); void UpdateVertexBufferStrides(const GraphicsPipeline* pPipeline); - VK_INLINE void UpdateLargestPipelineStackSize(const uint32_t deviceIndex, const uint32_t pipelineStackSize) + void UpdateLargestPipelineStackSize(const uint32_t deviceIndex, const uint32_t pipelineStackSize) { PerGpuState(deviceIndex)->maxPipelineStackSize = Util::Max(PerGpuState(deviceIndex)->maxPipelineStackSize, pipelineStackSize); @@ -1236,13 +1257,6 @@ class CmdBuffer }; // ===================================================================================================================== -bool CmdBuffer::IsStaticStateDifferent( - uint32_t currentToken, - uint32_t newToken) -{ - return ((currentToken != newToken) || - (currentToken == DynamicRenderStateToken)); -} // ===================================================================================================================== void CmdBuffer::PalCmdBindMsaaState( @@ -1324,29 +1338,6 @@ void CmdBuffer::InsertDeviceEvents( } } -// ===================================================================================================================== -Pal::ImageLayout CmdBuffer::RPGetAttachmentLayout( - uint32_t attachment, - uint32_t plane) -{ - VK_ASSERT(attachment < m_allGpuState.pRenderPass->GetAttachmentCount()); - VK_ASSERT(attachment < m_renderPassInstance.maxAttachmentCount); - - return m_renderPassInstance.pAttachments[attachment].planeLayout[plane]; -} - -// ===================================================================================================================== -void CmdBuffer::RPSetAttachmentLayout( - uint32_t attachment, - uint32_t plane, - Pal::ImageLayout layout) -{ - VK_ASSERT(attachment < m_allGpuState.pRenderPass->GetAttachmentCount()); - VK_ASSERT(attachment < m_renderPassInstance.maxAttachmentCount); - - m_renderPassInstance.pAttachments[attachment].planeLayout[plane] = layout; -} - VK_DEFINE_DISPATCHABLE(CmdBuffer); namespace entry diff --git a/icd/api/include/vk_compute_pipeline.h b/icd/api/include/vk_compute_pipeline.h index e1d81f82..15427caa 100644 --- a/icd/api/include/vk_compute_pipeline.h +++ b/icd/api/include/vk_compute_pipeline.h @@ -74,7 +74,7 @@ class ComputePipeline final : public Pipeline, public NonDispatchable> 22; @@ -824,7 +824,7 @@ VK_INLINE uint32_t VkToPalVideoProfileLevel(uint32_t level) // ===================================================================================================================== // Converts PAL video profile level to Vulkan equivalent. -VK_INLINE uint32_t PalToVkVideoProfileLevel(uint32_t level) +inline uint32_t PalToVkVideoProfileLevel(uint32_t level) { // PAL level is represented as version multiplied by 10 uint32_t major = level / 10; @@ -865,21 +865,21 @@ VK_TO_PAL_STRUC_X( QUERY_TYPE_TIMESTAMP, // ===================================================================================================================== // Converts Vulkan query type to PAL equivalent -VK_INLINE Pal::QueryType VkToPalQueryType(VkQueryType queryType) +inline Pal::QueryType VkToPalQueryType(VkQueryType queryType) { return convert::QueryTypePool(queryType).m_type; } // ===================================================================================================================== // Converts Vulkan query type to PAL equivalent -VK_INLINE Pal::QueryPoolType VkToPalQueryPoolType(VkQueryType queryType) +inline Pal::QueryPoolType VkToPalQueryPoolType(VkQueryType queryType) { return convert::QueryTypePool(queryType).m_poolType; } // ===================================================================================================================== // Converts Vulkan query control flags to PAL equivalent -VK_INLINE Pal::QueryControlFlags VkToPalQueryControlFlags( +inline Pal::QueryControlFlags VkToPalQueryControlFlags( VkQueryType queryType, VkQueryControlFlags flags) { @@ -895,7 +895,7 @@ VK_INLINE Pal::QueryControlFlags VkToPalQueryControlFlags( // ===================================================================================================================== // Converts Vulkan query result flags to PAL equivalent -VK_INLINE Pal::QueryResultFlags VkToPalQueryResultFlags(VkQueryResultFlags flags) +inline Pal::QueryResultFlags VkToPalQueryResultFlags(VkQueryResultFlags flags) { uint32_t palFlags = Pal::QueryResultDefault; @@ -924,7 +924,7 @@ VK_INLINE Pal::QueryResultFlags VkToPalQueryResultFlags(VkQueryResultFlags flags // ===================================================================================================================== // Converts Vulkan pipeline statistics query flags to PAL equivalent -VK_INLINE Pal::QueryPipelineStatsFlags VkToPalQueryPipelineStatsFlags(VkQueryPipelineStatisticFlags flags) +inline Pal::QueryPipelineStatsFlags VkToPalQueryPipelineStatsFlags(VkQueryPipelineStatisticFlags flags) { static_assert( (static_cast(VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT) == @@ -973,7 +973,7 @@ VK_TO_PAL_ENTRY_X( COMPONENT_SWIZZLE_A, ChannelSwizzle::W // ==================================================================================================================== // Reswizzles a format given a component mapping. The input image format should be a previously unswizzled format, // such as one returned by VkToPalFormat() function. -VK_INLINE Pal::SwizzledFormat RemapFormatComponents( +inline Pal::SwizzledFormat RemapFormatComponents( Pal::SwizzledFormat format, Pal::SubresRange subresRange, const VkComponentMapping& mapping, @@ -1173,7 +1173,7 @@ VK_INLINE Pal::SwizzledFormat RemapFormatComponents( // ===================================================================================================================== // Returns the Vulkan image aspect flag bits corresponding to the given PAL YUV format. -VK_INLINE VkImageAspectFlags PalYuvFormatToVkImageAspectPlane( +inline VkImageAspectFlags PalYuvFormatToVkImageAspectPlane( const Pal::ChNumFormat format) { switch (format) @@ -1207,7 +1207,7 @@ VK_INLINE VkImageAspectFlags PalYuvFormatToVkImageAspectPlane( // ===================================================================================================================== // Converts Vulkan image subresource range to PAL equivalent. // It may generate two PAL subresource range entries in case both depth and stencil aspect is selected in the mask. -VK_INLINE void VkToPalSubresRange( +inline void VkToPalSubresRange( VkFormat format, const VkImageSubresourceRange& range, uint32_t mipLevels, @@ -1246,7 +1246,7 @@ VK_INLINE void VkToPalSubresRange( // ===================================================================================================================== // Converts a Vulkan scissor params to a PAL scissor rect params -VK_INLINE Pal::ScissorRectParams VkToPalScissorParams(const VkPipelineViewportStateCreateInfo& scissors) +inline Pal::ScissorRectParams VkToPalScissorParams(const VkPipelineViewportStateCreateInfo& scissors) { Pal::ScissorRectParams palScissors; @@ -1264,7 +1264,7 @@ VK_INLINE Pal::ScissorRectParams VkToPalScissorParams(const VkPipelineViewportSt // ===================================================================================================================== // Converts a Vulkan offset 2D to a PAL offset 2D -VK_INLINE Pal::Offset2d VkToPalOffset2d(const VkOffset2D& offset) +inline Pal::Offset2d VkToPalOffset2d(const VkOffset2D& offset) { Pal::Offset2d result; result.x = offset.x; @@ -1274,7 +1274,7 @@ VK_INLINE Pal::Offset2d VkToPalOffset2d(const VkOffset2D& offset) // ===================================================================================================================== // Converts a Vulkan offset 3D to a PAL offset 3D -VK_INLINE Pal::Offset3d VkToPalOffset3d(const VkOffset3D& offset) +inline Pal::Offset3d VkToPalOffset3d(const VkOffset3D& offset) { Pal::Offset3d result; result.x = offset.x; @@ -1285,7 +1285,7 @@ VK_INLINE Pal::Offset3d VkToPalOffset3d(const VkOffset3D& offset) // ===================================================================================================================== // Converts a Vulkan extent 2D to a PAL extent 2D -VK_INLINE Pal::Extent2d VkToPalExtent2d(const VkExtent2D& extent) +inline Pal::Extent2d VkToPalExtent2d(const VkExtent2D& extent) { Pal::Extent2d result; result.width = extent.width; @@ -1295,7 +1295,7 @@ VK_INLINE Pal::Extent2d VkToPalExtent2d(const VkExtent2D& extent) // ===================================================================================================================== // Converts a PAL extent 2D to a Vulkan extent 2D -VK_INLINE VkExtent2D PalToVkExtent2d(const Pal::Extent2d& extent) +inline VkExtent2D PalToVkExtent2d(const Pal::Extent2d& extent) { VkExtent2D result; result.width = extent.width; @@ -1305,7 +1305,7 @@ VK_INLINE VkExtent2D PalToVkExtent2d(const Pal::Extent2d& extent) // ===================================================================================================================== // Converts PAL GpuType to Vulkan VkPhysicalDeviceType -VK_INLINE VkPhysicalDeviceType PalToVkGpuType(const Pal::GpuType gpuType) +inline VkPhysicalDeviceType PalToVkGpuType(const Pal::GpuType gpuType) { const VkPhysicalDeviceType gpuTypeTbl[] = { @@ -1322,7 +1322,7 @@ VK_INLINE VkPhysicalDeviceType PalToVkGpuType(const Pal::GpuType gpuType) // ===================================================================================================================== // Converts a Vulkan extent 3D to a PAL extent 3D -VK_INLINE Pal::Extent3d VkToPalExtent3d(const VkExtent3D& extent) +inline Pal::Extent3d VkToPalExtent3d(const VkExtent3D& extent) { Pal::Extent3d result; result.width = extent.width; @@ -1333,7 +1333,7 @@ VK_INLINE Pal::Extent3d VkToPalExtent3d(const VkExtent3D& extent) // ===================================================================================================================== // Converts a PAL extent 3D to a Vulkan extent 3D -VK_INLINE VkExtent3D PalToVkExtent3d(const Pal::Extent3d& extent) +inline VkExtent3D PalToVkExtent3d(const Pal::Extent3d& extent) { VkExtent3D result; result.width = extent.width; @@ -1344,7 +1344,7 @@ VK_INLINE VkExtent3D PalToVkExtent3d(const Pal::Extent3d& extent) // ===================================================================================================================== // Converts two Vulkan 3D offsets to a PAL signed extent 3D -VK_INLINE Pal::SignedExtent3d VkToPalSignedExtent3d(const VkOffset3D offsets[2]) +inline Pal::SignedExtent3d VkToPalSignedExtent3d(const VkOffset3D offsets[2]) { Pal::SignedExtent3d result; result.width = offsets[1].x - offsets[0].x; @@ -1355,14 +1355,14 @@ VK_INLINE Pal::SignedExtent3d VkToPalSignedExtent3d(const VkOffset3D offsets[2]) // ===================================================================================================================== // Converts value in texels to value in blocks, specifying block dimension for the given coordinate. -VK_INLINE uint32_t TexelsToBlocks(uint32_t texels, uint32_t blockSize) +inline uint32_t TexelsToBlocks(uint32_t texels, uint32_t blockSize) { return Util::RoundUpToMultiple(texels, blockSize) / blockSize; } // ===================================================================================================================== // Converts signed value in texels to signed value in blocks, specifying block dimension for the given coordinate. -VK_INLINE int32_t TexelsToBlocks(int32_t texels, uint32_t blockSize) +inline int32_t TexelsToBlocks(int32_t texels, uint32_t blockSize) { uint32_t value = Util::Math::Absu(texels); value = Util::RoundUpToMultiple(value, blockSize) / blockSize; @@ -1373,14 +1373,14 @@ VK_INLINE int32_t TexelsToBlocks(int32_t texels, uint32_t blockSize) // ===================================================================================================================== // Converts pitch value in texels to pitch value in blocks, specifying block dimension for the given coordinate. -VK_INLINE Pal::gpusize PitchTexelsToBlocks(Pal::gpusize texels, uint32_t blockSize) +inline Pal::gpusize PitchTexelsToBlocks(Pal::gpusize texels, uint32_t blockSize) { return Util::RoundUpToMultiple(texels, static_cast(blockSize)) / blockSize; } // ===================================================================================================================== // Converts extent in texels to extent in blocks, specifying block dimensions. -VK_INLINE Pal::Extent3d TexelsToBlocks(Pal::Extent3d texels, Pal::Extent3d blockSize) +inline Pal::Extent3d TexelsToBlocks(Pal::Extent3d texels, Pal::Extent3d blockSize) { Pal::Extent3d blocks; @@ -1393,7 +1393,7 @@ VK_INLINE Pal::Extent3d TexelsToBlocks(Pal::Extent3d texels, Pal::Extent3d block // ===================================================================================================================== // Converts signed extent in texels to signed extent in blocks, specifying block dimensions. -VK_INLINE Pal::SignedExtent3d TexelsToBlocks(Pal::SignedExtent3d texels, Pal::Extent3d blockSize) +inline Pal::SignedExtent3d TexelsToBlocks(Pal::SignedExtent3d texels, Pal::Extent3d blockSize) { Pal::SignedExtent3d blocks; @@ -1406,7 +1406,7 @@ VK_INLINE Pal::SignedExtent3d TexelsToBlocks(Pal::SignedExtent3d texels, Pal::Ex // ===================================================================================================================== // Converts offset in texels to offset in blocks, specifying block dimensions. -VK_INLINE Pal::Offset3d TexelsToBlocks(Pal::Offset3d texels, Pal::Extent3d blockSize) +inline Pal::Offset3d TexelsToBlocks(Pal::Offset3d texels, Pal::Extent3d blockSize) { Pal::Offset3d blocks; @@ -1419,7 +1419,7 @@ VK_INLINE Pal::Offset3d TexelsToBlocks(Pal::Offset3d texels, Pal::Extent3d block // ===================================================================================================================== // Queries the number of bytes in a pixel or element for the given format. -VK_INLINE Pal::uint32 BytesPerPixel(Pal::ChNumFormat format, uint32 plane) +inline Pal::uint32 BytesPerPixel(Pal::ChNumFormat format, uint32 plane) { if (Pal::Formats::IsYuvPlanar(format)) { @@ -1460,7 +1460,7 @@ VK_INLINE Pal::uint32 BytesPerPixel(Pal::ChNumFormat format, uint32 plane) // ===================================================================================================================== // Converts a Vulkan image-copy structure to one or more PAL image-copy-region structures. template -VK_INLINE void VkToPalImageCopyRegion( +void VkToPalImageCopyRegion( const ImageCopyType& imageCopy, Pal::ChNumFormat srcFormat, Pal::ChNumFormat dstFormat, @@ -1519,7 +1519,7 @@ VK_INLINE void VkToPalImageCopyRegion( // ===================================================================================================================== // Converts a Vulkan image-blit structure to one or more PAL image-scaled-copy-region structures. template -VK_INLINE void VkToPalImageScaledCopyRegion( +void VkToPalImageScaledCopyRegion( const ImageBlitType& imageBlit, Pal::ChNumFormat srcFormat, Pal::ChNumFormat dstFormat, @@ -1564,7 +1564,7 @@ VK_INLINE void VkToPalImageScaledCopyRegion( // ===================================================================================================================== // Converts a Vulkan image-blit structure to one or more PAL color-space-conversion-region structures. -VK_INLINE Pal::ColorSpaceConversionRegion VkToPalImageColorSpaceConversionRegion( +inline Pal::ColorSpaceConversionRegion VkToPalImageColorSpaceConversionRegion( const VkImageBlit& imageBlit, Pal::SwizzledFormat srcFormat, Pal::SwizzledFormat dstFormat) @@ -1628,7 +1628,7 @@ VK_INLINE Pal::ColorSpaceConversionRegion VkToPalImageColorSpaceConversionRegion // ===================================================================================================================== // Converts a Vulkan image-resolve structure to one or more PAL image-resolve-region structures. template -VK_INLINE void VkToPalImageResolveRegion( +void VkToPalImageResolveRegion( const ImageResolveType& imageResolve, Pal::ChNumFormat srcFormat, Pal::ChNumFormat dstFormat, @@ -1673,7 +1673,7 @@ VK_INLINE void VkToPalImageResolveRegion( // ===================================================================================================================== // Converts a Vulkan buffer-image-copy structure to a PAL memory-image-copy-region structure. template -VK_INLINE Pal::MemoryImageCopyRegion VkToPalMemoryImageCopyRegion( +Pal::MemoryImageCopyRegion VkToPalMemoryImageCopyRegion( const BufferImageCopyType& bufferImageCopy, Pal::ChNumFormat format, uint32 plane, @@ -1725,7 +1725,7 @@ extern Pal::SwizzledFormat VkToPalSwizzledFormatLookupTableStorage[VK_FORMAT_END }; // ===================================================================================================================== -constexpr VK_INLINE Pal::SwizzledFormat PalFmt( +constexpr Pal::SwizzledFormat PalFmt( Pal::ChNumFormat chNumFormat, Pal::ChannelSwizzle r, Pal::ChannelSwizzle g, @@ -1735,7 +1735,7 @@ constexpr VK_INLINE Pal::SwizzledFormat PalFmt( return{ chNumFormat,{ r, g, b, a } }; } -#if ( VKI_GPU_DECOMPRESS) +#if (VKI_GPU_DECOMPRESS) static VkFormat convertCompressedFormat(VkFormat format) { if (Formats::IsASTCFormat(format)) @@ -1746,10 +1746,7 @@ static VkFormat convertCompressedFormat(VkFormat format) } else if (Formats::IsEtc2Format(format)) { - format = ((format == VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK) || - (format == VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK) || - (format == VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK)) ? - VK_FORMAT_R8G8B8A8_SRGB : VK_FORMAT_R8G8B8A8_UNORM; + format = VK_FORMAT_R8G8B8A8_UNORM; } return format; } @@ -1757,7 +1754,7 @@ static VkFormat convertCompressedFormat(VkFormat format) // ===================================================================================================================== // Converts Vulkan format to PAL equivalent. -VK_INLINE Pal::SwizzledFormat VkToPalFormat(VkFormat format, const RuntimeSettings& settings) +inline Pal::SwizzledFormat VkToPalFormat(VkFormat format, const RuntimeSettings& settings) { if (VK_ENUM_IN_RANGE(format, VK_FORMAT)) { @@ -1807,7 +1804,7 @@ VK_INLINE Pal::SwizzledFormat VkToPalFormat(VkFormat format, const RuntimeSettin // extension and propose revisions to VK_EXT_swapchain_colorspace. namespace convert { - VK_INLINE Pal::ScreenColorSpace ScreenColorSpace(VkSurfaceFormatKHR surfaceFormat) + inline Pal::ScreenColorSpace ScreenColorSpace(VkSurfaceFormatKHR surfaceFormat) { union { @@ -1895,7 +1892,7 @@ namespace convert } // ===================================================================================================================== -VK_INLINE Pal::ScreenColorSpace VkToPalScreenSpace(VkSurfaceFormatKHR colorFormat) +inline Pal::ScreenColorSpace VkToPalScreenSpace(VkSurfaceFormatKHR colorFormat) { return convert::ScreenColorSpace(colorFormat); } @@ -1903,7 +1900,7 @@ VK_INLINE Pal::ScreenColorSpace VkToPalScreenSpace(VkSurfaceFormatKHR colorForma // ===================================================================================================================== // Converts Vulkan source pipeline stage flags to PAL HW pipe point. // Selects a source pipe point that matches all stage flags to use for setting/resetting events. -VK_INLINE Pal::HwPipePoint VkToPalSrcPipePoint(PipelineStageFlags flags) +inline Pal::HwPipePoint VkToPalSrcPipePoint(PipelineStageFlags flags) { // Flags that only require signaling at top-of-pipe. static const PipelineStageFlags srcTopOfPipeFlags = @@ -1986,7 +1983,7 @@ VK_INLINE Pal::HwPipePoint VkToPalSrcPipePoint(PipelineStageFlags flags) // ===================================================================================================================== // Converts Vulkan source pipeline stage flags to PAL HW top or bottom pipe point. -VK_INLINE Pal::HwPipePoint VkToPalSrcPipePointForTimestampWrite(PipelineStageFlags flags) +inline Pal::HwPipePoint VkToPalSrcPipePointForTimestampWrite(PipelineStageFlags flags) { // Flags that require signaling at top-of-pipe. static const PipelineStageFlags srcTopOfPipeFlags = @@ -2008,7 +2005,7 @@ VK_INLINE Pal::HwPipePoint VkToPalSrcPipePointForTimestampWrite(PipelineStageFla // ===================================================================================================================== // Converts Vulkan source pipeline stage flags to PAL buffer marker writes (top/bottom only) -VK_INLINE Pal::HwPipePoint VkToPalSrcPipePointForMarkers( +inline Pal::HwPipePoint VkToPalSrcPipePointForMarkers( PipelineStageFlags flags, Pal::EngineType engineType) { @@ -2107,7 +2104,7 @@ static const size_t MaxHwPipePoints = sizeof(hwPipePointMappingTable) / sizeof(h // By having the flexibility to specify multiple pipe points for barriers we can avoid going with the least common // denominator like in case of event sets/resets. // The function returns the number of pipe points set in the return value. -VK_INLINE uint32_t VkToPalSrcPipePoints(PipelineStageFlags flags, Pal::HwPipePoint* pPalPipePoints) +inline uint32_t VkToPalSrcPipePoints(PipelineStageFlags flags, Pal::HwPipePoint* pPalPipePoints) { uint32_t pipePointCount = 0; @@ -2126,7 +2123,7 @@ VK_INLINE uint32_t VkToPalSrcPipePoints(PipelineStageFlags flags, Pal::HwPipePoi // ===================================================================================================================== // Converts Vulkan destination pipeline stage flags to PAL HW pipe point. // This way a target pipeline stage is selected where the wait for events happens -VK_INLINE Pal::HwPipePoint VkToPalWaitPipePoint(PipelineStageFlags flags) +inline Pal::HwPipePoint VkToPalWaitPipePoint(PipelineStageFlags flags) { static_assert((Pal::HwPipePostIndexFetch == Pal::HwPipePreCs) && (Pal::HwPipePostIndexFetch == Pal::HwPipePreBlt), "The code here assumes pre-CS and pre-blit match post-index-fetch."); @@ -2181,7 +2178,7 @@ VK_INLINE Pal::HwPipePoint VkToPalWaitPipePoint(PipelineStageFlags flags) // ===================================================================================================================== // Converts Vulkan source pipeline stage flags to PAL pipeline stage mask. -VK_INLINE uint32_t VkToPalPipelineStageFlags( +inline uint32_t VkToPalPipelineStageFlags( PipelineStageFlags stageMask) { uint32_t palPipelineStageMask = 0; @@ -2329,7 +2326,7 @@ PAL_TO_VK_ENTRY_X( ImageTiling::Optimal, IMAGE_TILING_OPTIMAL PAL_TO_VK_RETURN_X( IMAGE_TILING_LINEAR ) ) -VK_INLINE VkImageTiling PalToVkImageTiling(Pal::ImageTiling tiling) +inline VkImageTiling PalToVkImageTiling(Pal::ImageTiling tiling) { return convert::PalToVKImageTiling(tiling); } @@ -2353,7 +2350,7 @@ PAL_TO_VK_RETURN_X( SURFACE_TRANSFORM_ID // ===================================================================================================================== // Converts PAL surface transform to Vulkan. -VK_INLINE VkSurfaceTransformFlagBitsKHR PalToVkSurfaceTransform(Pal::SurfaceTransformFlags transformFlag) +inline VkSurfaceTransformFlagBitsKHR PalToVkSurfaceTransform(Pal::SurfaceTransformFlags transformFlag) { if (transformFlag) { @@ -2364,7 +2361,7 @@ VK_INLINE VkSurfaceTransformFlagBitsKHR PalToVkSurfaceTransform(Pal::SurfaceTran // ===================================================================================================================== // Converts Vulkan WSI Platform Type to PAL equivalent. -VK_INLINE Pal::WsiPlatform VkToPalWsiPlatform(VkIcdWsiPlatform Platform) +inline Pal::WsiPlatform VkToPalWsiPlatform(VkIcdWsiPlatform Platform) { Pal::WsiPlatform palPlatform = Pal::WsiPlatform::Win32; @@ -2405,7 +2402,7 @@ VK_TO_PAL_ENTRY_I( PRESENT_MODE_FIFO_RELAXED_KHR, SwapChainMode:: // ===================================================================================================================== // Converts Vulkan present mode to PAL equivalent. -VK_INLINE Pal::SwapChainMode VkToPalSwapChainMode(VkPresentModeKHR presentMode) +inline Pal::SwapChainMode VkToPalSwapChainMode(VkPresentModeKHR presentMode) { return convert::SwapChainMode(presentMode); } @@ -2413,7 +2410,7 @@ VK_INLINE Pal::SwapChainMode VkToPalSwapChainMode(VkPresentModeKHR presentMode) #if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 610 namespace convert { - VK_INLINE Pal::CompositeAlphaMode CompositeAlpha(VkCompositeAlphaFlagBitsKHR compositeAlpha) + inline Pal::CompositeAlphaMode CompositeAlpha(VkCompositeAlphaFlagBitsKHR compositeAlpha) { switch (compositeAlpha) { @@ -2438,21 +2435,21 @@ namespace convert // ===================================================================================================================== // Converts Vulkan composite alpha flag to PAL equivalent. -VK_INLINE Pal::CompositeAlphaMode VkToPalCompositeAlphaMode(VkCompositeAlphaFlagBitsKHR compositeAlpha) +inline Pal::CompositeAlphaMode VkToPalCompositeAlphaMode(VkCompositeAlphaFlagBitsKHR compositeAlpha) { return convert::CompositeAlpha(compositeAlpha); } #else // ===================================================================================================================== // Converts Vulkan composite alpha flag to PAL equivalent. -VK_INLINE Pal::CompositeAlphaMode VkToPalCompositeAlphaMode(VkCompositeAlphaFlagBitsKHR compositeAlpha) +inline Pal::CompositeAlphaMode VkToPalCompositeAlphaMode(VkCompositeAlphaFlagBitsKHR compositeAlpha) { return static_cast(compositeAlpha); } // ===================================================================================================================== // Converts Vulkan composite alpha flag to PAL equivalent. -VK_INLINE VkCompositeAlphaFlagsKHR PalToVkSupportedCompositeAlphaMode(uint32 compositeAlpha) +inline VkCompositeAlphaFlagsKHR PalToVkSupportedCompositeAlphaMode(uint32 compositeAlpha) { static_assert((static_cast(VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR) == static_cast(Pal::CompositeAlphaMode::Opaque)) && @@ -2471,7 +2468,7 @@ VK_INLINE VkCompositeAlphaFlagsKHR PalToVkSupportedCompositeAlphaMode(uint32 com // ===================================================================================================================== // Converts Vulkan image creation flags to PAL image creation flags (unfortunately, PAL doesn't define a dedicated type // for the image creation flags so we have to return the constructed flag set as a uint32_t) -VK_INLINE uint32_t VkToPalImageCreateFlags(VkImageCreateFlags imageCreateFlags, +inline uint32_t VkToPalImageCreateFlags(VkImageCreateFlags imageCreateFlags, VkFormat format) { Pal::ImageCreateInfo palImageCreateInfo; @@ -2494,7 +2491,7 @@ VK_INLINE uint32_t VkToPalImageCreateFlags(VkImageCreateFlags imageCreateFlags, // ===================================================================================================================== // Converts PAL image creation flags to Vulkan image creation flags. -VK_INLINE VkImageCreateFlags PalToVkImageCreateFlags(Pal::ImageCreateFlags imageCreateFlags) +inline VkImageCreateFlags PalToVkImageCreateFlags(Pal::ImageCreateFlags imageCreateFlags) { VkImageUsageFlags vkImageCreateFlags = 0; @@ -2525,7 +2522,7 @@ VK_INLINE VkImageCreateFlags PalToVkImageCreateFlags(Pal::ImageCreateFlags image // ===================================================================================================================== // Converts Vulkan image usage flags to PAL image usage flags -VK_INLINE Pal::ImageUsageFlags VkToPalImageUsageFlags(VkImageUsageFlags imageUsageFlags, +inline Pal::ImageUsageFlags VkToPalImageUsageFlags(VkImageUsageFlags imageUsageFlags, uint32_t samples, VkImageUsageFlags maskSetShaderReadForTransferSrc, VkImageUsageFlags maskSetShaderWriteForTransferDst) @@ -2556,7 +2553,7 @@ VK_INLINE Pal::ImageUsageFlags VkToPalImageUsageFlags(VkImageUsageFlags imageUsa // ===================================================================================================================== // Converts PAL image usage flag to Vulkan. -VK_INLINE VkImageUsageFlags PalToVkImageUsageFlags(Pal::ImageUsageFlags imageUsageFlags) +inline VkImageUsageFlags PalToVkImageUsageFlags(Pal::ImageUsageFlags imageUsageFlags) { VkImageUsageFlags vkImageUsageFlags = 0; @@ -2590,7 +2587,7 @@ extern VkResult PalToVkError(Pal::Result result); // ===================================================================================================================== // Converts a PAL result to an equivalent VK result. -VK_INLINE VkResult PalToVkResult( +inline VkResult PalToVkResult( Pal::Result result) { VkResult vkResult = VK_SUCCESS; @@ -2641,13 +2638,13 @@ VK_TO_PAL_ENTRY_X(PIPELINE_BIND_POINT_GRAPHICS, PipelineBindPoint::G // ===================================================================================================================== // Converts Vulkan pipeline bind point to PAL equivalent -VK_INLINE Pal::PipelineBindPoint VkToPalPipelineBindPoint(VkPipelineBindPoint pipelineBind) +inline Pal::PipelineBindPoint VkToPalPipelineBindPoint(VkPipelineBindPoint pipelineBind) { return convert::PipelineBindPoint(pipelineBind); } // ===================================================================================================================== -VK_INLINE Pal::ShaderType VkToPalShaderType( +inline Pal::ShaderType VkToPalShaderType( VkShaderStageFlagBits shaderStage) { switch (shaderStage) @@ -2672,7 +2669,7 @@ VK_INLINE Pal::ShaderType VkToPalShaderType( // ===================================================================================================================== // Converts Vulkan clear depth to PAL equivalent valid range -VK_INLINE float VkToPalClearDepth(float depth) +inline float VkToPalClearDepth(float depth) { if (Util::Math::IsNaN(depth)) { @@ -2684,7 +2681,7 @@ VK_INLINE float VkToPalClearDepth(float depth) // ===================================================================================================================== // Converts Vulkan clear color value to PAL equivalent -VK_INLINE Pal::ClearColor VkToPalClearColor( +inline Pal::ClearColor VkToPalClearColor( const VkClearColorValue* pClearColor, const Pal::SwizzledFormat& swizzledFormat) { @@ -2726,14 +2723,14 @@ VK_INLINE Pal::ClearColor VkToPalClearColor( // ===================================================================================================================== // Converts integer nanoseconds to single precision seconds -VK_INLINE float NanosecToSec(uint64_t nanosecs) +inline float NanosecToSec(uint64_t nanosecs) { return static_cast(static_cast(nanosecs) / 1000000000.0); } // ===================================================================================================================== // Converts maximum sample count to VkSampleCountFlags -VK_INLINE VkSampleCountFlags MaxSampleCountToSampleCountFlags(uint32_t maxSampleCount) +inline VkSampleCountFlags MaxSampleCountToSampleCountFlags(uint32_t maxSampleCount) { return (maxSampleCount << 1) - 1; } @@ -2744,7 +2741,7 @@ constexpr uint32_t VkMemoryHeapNum = Pal::GpuHeapCount - 1; // ===================================================================================================================== // Converts PAL GPU heap to Vulkan memory heap flags -VK_INLINE VkMemoryHeapFlags PalGpuHeapToVkMemoryHeapFlags(Pal::GpuHeap heap) +inline VkMemoryHeapFlags PalGpuHeapToVkMemoryHeapFlags(Pal::GpuHeap heap) { switch (heap) { @@ -2762,7 +2759,7 @@ VK_INLINE VkMemoryHeapFlags PalGpuHeapToVkMemoryHeapFlags(Pal::GpuHeap heap) // ===================================================================================================================== // Returns the Vulkan format feature flags corresponding to the given PAL format feature flags. -VK_INLINE VkFormatFeatureFlags PalToVkFormatFeatureFlags(Pal::FormatFeatureFlags flags) +inline VkFormatFeatureFlags PalToVkFormatFeatureFlags(Pal::FormatFeatureFlags flags) { VkFormatFeatureFlags retFlags = 0; @@ -2851,7 +2848,7 @@ VK_INLINE VkFormatFeatureFlags PalToVkFormatFeatureFlags(Pal::FormatFeatureFlags // ===================================================================================================================== // Converts Vulkan rasterization order to PAL equivalent (out-of-order primitive enable) -VK_INLINE bool VkToPalRasterizationOrder(VkRasterizationOrderAMD order) +inline bool VkToPalRasterizationOrder(VkRasterizationOrderAMD order) { VK_ASSERT(VK_ENUM_IN_RANGE_AMD(order, VK_RASTERIZATION_ORDER)); @@ -2915,7 +2912,7 @@ VK_TO_PAL_TABLE_I_AMD(GPA_PERF_BLOCK, GpaPerfBlockAMD, GpuBlock, ) // ===================================================================================================================== -VK_INLINE Pal::GpuBlock VkToPalGpuBlock( +inline Pal::GpuBlock VkToPalGpuBlock( VkGpaPerfBlockAMD perfBlock) { return convert::GpuBlock(perfBlock); @@ -2934,14 +2931,14 @@ VK_TO_PAL_TABLE_I_AMD(GPA_DEVICE_CLOCK_MODE, GpaDeviceClockModeAMD, DeviceClockM ) // ===================================================================================================================== -VK_INLINE Pal::DeviceClockMode VkToPalDeviceClockMode( +inline Pal::DeviceClockMode VkToPalDeviceClockMode( VkGpaDeviceClockModeAMD clockMode) { return convert::DeviceClockMode(clockMode); } // ===================================================================================================================== -VK_INLINE uint32_t VkToPalPerfExperimentShaderFlags( +inline uint32_t VkToPalPerfExperimentShaderFlags( VkGpaSqShaderStageFlags stageMask) { uint32_t perfFlags = 0; @@ -2991,7 +2988,7 @@ PalClearRegion VkToPalClearRegion(const VkClearRect& clearRect, const uint32_t z // ===================================================================================================================== // Converts Vulkan clear rect to an equivalent PAL box template <> -VK_INLINE Pal::Box VkToPalClearRegion( +inline Pal::Box VkToPalClearRegion( const VkClearRect& clearRect, const uint32_t zOffset) { @@ -3010,7 +3007,7 @@ VK_INLINE Pal::Box VkToPalClearRegion( // ===================================================================================================================== // Converts Vulkan clear rect to an equivalent PAL clear bound target region template <> -VK_INLINE Pal::ClearBoundTargetRegion VkToPalClearRegion( +inline Pal::ClearBoundTargetRegion VkToPalClearRegion( const VkClearRect& clearRect, const uint32_t zOffset) { @@ -3028,7 +3025,7 @@ VK_INLINE Pal::ClearBoundTargetRegion VkToPalClearRegion void Reset(); - VK_INLINE size_t GetPrivateDataSize() const + size_t GetPrivateDataSize() const { return m_privateDataSize; } diff --git a/icd/api/include/vk_descriptor_set.h b/icd/api/include/vk_descriptor_set.h index 78eeccd2..91365156 100644 --- a/icd/api/include/vk_descriptor_set.h +++ b/icd/api/include/vk_descriptor_set.h @@ -101,11 +101,11 @@ class DescriptorSet final : public NonDispatchableGetQueueFamilyPalImageLayoutFlag(queueFamilyIndex); } - VK_INLINE uint32_t GetMemoryTypeMask() const + uint32_t GetMemoryTypeMask() const { return VkPhysicalDevice(DefaultDeviceIndex)->GetMemoryTypeMask(); } - VK_INLINE uint32_t GetMemoryTypeMaskMatching(VkMemoryPropertyFlags flags) const + uint32_t GetMemoryTypeMaskMatching(VkMemoryPropertyFlags flags) const { return VkPhysicalDevice(DefaultDeviceIndex)->GetMemoryTypeMaskMatching(flags); } - VK_INLINE uint32_t GetMemoryTypeMaskForExternalSharing() const + uint32_t GetMemoryTypeMaskForExternalSharing() const { return VkPhysicalDevice(DefaultDeviceIndex)->GetMemoryTypeMaskForExternalSharing(); } - VK_INLINE bool GetVkTypeIndexBitsFromPalHeap(Pal::GpuHeap heapIndex, uint32_t* pVkIndexBits) const + bool GetVkTypeIndexBitsFromPalHeap(Pal::GpuHeap heapIndex, uint32_t* pVkIndexBits) const { return VkPhysicalDevice(DefaultDeviceIndex)->GetVkTypeIndexBitsFromPalHeap(heapIndex, pVkIndexBits); } - VK_INLINE Pal::GpuHeap GetPalHeapFromVkTypeIndex(uint32_t vkIndex) const + Pal::GpuHeap GetPalHeapFromVkTypeIndex(uint32_t vkIndex) const { return VkPhysicalDevice(DefaultDeviceIndex)->GetPalHeapFromVkTypeIndex(vkIndex); } - VK_INLINE uint32_t GetUmdFpsCapFrameRate() const + uint32_t GetUmdFpsCapFrameRate() const { return VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().osProperties.umdFpsCapFrameRate; } - VK_INLINE uint64_t TimestampFrequency() const + uint64_t TimestampFrequency() const { return VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().timestampFrequency; } @@ -485,10 +485,10 @@ class Device uint32_t bindInfoCount, const VkBindImageMemoryInfo* pBindInfos) const; - VK_INLINE const DeviceFeatures& GetEnabledFeatures() const + const DeviceFeatures& GetEnabledFeatures() const { return m_enabledFeatures; } - VK_INLINE bool IsGlobalGpuVaEnabled() const + bool IsGlobalGpuVaEnabled() const { return m_useGlobalGpuVa; } Pal::PrtFeatureFlags GetPrtFeatures() const; @@ -502,14 +502,14 @@ class Device Pal::IDevice* pPalDevice, Pal::IGpuMemory* pPalMemory); - VK_INLINE const RuntimeSettings& GetRuntimeSettings() const + const RuntimeSettings& GetRuntimeSettings() const { return m_settings; } // return too many objects if the allocation count will exceed max limit. // There is a potential improvement by using atomic inc/dec. // That require us to limit the max allocation to some value less than UINT_MAX // to avoid the overflow. - VK_INLINE VkResult IncreaseAllocationCount() + VkResult IncreaseAllocationCount() { VkResult vkResult = VK_SUCCESS; Util::MutexAuto lock(&m_memoryMutex); @@ -525,7 +525,7 @@ class Device return vkResult; } - VK_INLINE void DecreaseAllocationCount() + void DecreaseAllocationCount() { Util::MutexAuto lock(&m_memoryMutex); m_allocatedCount --; @@ -546,7 +546,7 @@ class Device uint32_t deviceMask, uint32_t heapIdx); - VK_INLINE bool ShouldAddRemoteBackupHeap( + bool ShouldAddRemoteBackupHeap( uint32_t deviceIdx, uint32_t memoryTypeIdx, uint32_t palHeapIdx) const @@ -555,41 +555,41 @@ class Device m_overallocationRequestedForPalHeap[palHeapIdx]); } - VK_INLINE const InternalPipeline& GetTimestampQueryCopyPipeline() const + const InternalPipeline& GetTimestampQueryCopyPipeline() const { return m_timestampQueryCopyPipeline; } - VK_INLINE const Pal::IMsaaState* const * GetBltMsaaState(uint32_t imgSampleCount) const; + inline const Pal::IMsaaState* const * GetBltMsaaState(uint32_t imgSampleCount) const; - VK_INLINE bool IsExtensionEnabled(DeviceExtensions::ExtensionId id) const + bool IsExtensionEnabled(DeviceExtensions::ExtensionId id) const { return m_enabledExtensions.IsExtensionEnabled(id); } - VK_INLINE AppProfile GetAppProfile() const + AppProfile GetAppProfile() const { return VkPhysicalDevice(DefaultDeviceIndex)->GetAppProfile(); } - VK_INLINE SqttMgr* GetSqttMgr() + SqttMgr* GetSqttMgr() { return m_pSqttMgr; } - VK_INLINE OptLayer* GetAppOptLayer() + OptLayer* GetAppOptLayer() { return m_pAppOptLayer; } - VK_INLINE BarrierFilterLayer* GetBarrierFilterLayer() + BarrierFilterLayer* GetBarrierFilterLayer() { return m_pBarrierFilterLayer; } - VK_INLINE AsyncLayer* GetAsyncLayer() + AsyncLayer* GetAsyncLayer() { return m_pAsyncLayer; } #if VKI_GPU_DECOMPRESS - VK_INLINE GpuDecoderLayer* GetGpuDecoderLayer() + GpuDecoderLayer* GetGpuDecoderLayer() { return m_pGpuDecoderLayer; } - VK_INLINE InternalPipeline& GetInternalTexDecodePipeline() + InternalPipeline& GetInternalTexDecodePipeline() { return m_internalTexDecodePipeline; } #endif - VK_INLINE Util::Mutex* GetMemoryMutex() + Util::Mutex* GetMemoryMutex() { return &m_memoryMutex; } - VK_INLINE PipelineCompiler* GetCompiler(uint32_t idx) const + PipelineCompiler* GetCompiler(uint32_t idx) const { return m_perGpu[idx].pPhysicalDevice->GetCompiler(); } static const Pal::MsaaQuadSamplePattern* GetDefaultQuadSamplePattern(uint32_t sampleCount); @@ -597,7 +597,7 @@ class Device VkDeviceSize GetMemoryBaseAddrAlignment(uint32_t memoryTypes) const; - VK_INLINE RenderStateCache* GetRenderStateCache() + RenderStateCache* GetRenderStateCache() { return &m_renderStateCache; } uint32_t GetPinnedSystemMemoryTypes() const; @@ -623,16 +623,16 @@ class Device VK_FORCEINLINE const DeviceBarrierPolicy& GetBarrierPolicy() const { return m_barrierPolicy; } - VK_INLINE bool IsAllocationSizeTrackingEnabled() const + bool IsAllocationSizeTrackingEnabled() const { return m_allocationSizeTracking; } - VK_INLINE bool UseStridedCopyQueryResults() const + bool UseStridedCopyQueryResults() const { return (m_properties.timestampQueryPoolSlotSize == 32); } - VK_INLINE bool UseCompactDynamicDescriptors() const + bool UseCompactDynamicDescriptors() const { return !GetRuntimeSettings().enableRelocatableShaders && !GetEnabledFeatures().robustBufferAccess;} - VK_INLINE bool SupportDepthStencilResolve() const + bool SupportDepthStencilResolve() const { return (IsExtensionEnabled(DeviceExtensions::KHR_DEPTH_STENCIL_RESOLVE) || (VkPhysicalDevice(DefaultDeviceIndex)->GetEnabledAPIVersion() >= VK_MAKE_VERSION(1, 2, 0)) || @@ -659,7 +659,7 @@ class Device return m_maxVrsShadingRate; } - VK_INLINE size_t GetPrivateDataSize() const + size_t GetPrivateDataSize() const { return m_privateDataSize; } @@ -669,16 +669,16 @@ class Device void* AllocApiObject( const VkAllocationCallbacks* pAllocator, - const size_t totalObjectSize); + const size_t totalObjectSize) const; void FreeApiObject( const VkAllocationCallbacks* pAllocator, - void* pMemory); + void* pMemory) const; void FreeUnreservedPrivateData( void* pMemory) const; - VK_INLINE Util::RWLock* GetPrivateDataRWLock() + Util::RWLock* GetPrivateDataRWLock() { return &m_privateDataRWLock; } @@ -691,7 +691,7 @@ class Device void ReleaseBorderColorIndex( uint32_t pBorderColor); - VK_INLINE Pal::IBorderColorPalette* GetPalBorderColorPalette(uint32_t deviceIdx) const + Pal::IBorderColorPalette* GetPalBorderColorPalette(uint32_t deviceIdx) const { return m_perGpu[deviceIdx].pPalBorderColorPalette; } diff --git a/icd/api/include/vk_extensions.h b/icd/api/include/vk_extensions.h index 9433d8cd..a15af0e6 100644 --- a/icd/api/include/vk_extensions.h +++ b/icd/api/include/vk_extensions.h @@ -57,6 +57,7 @@ // EXT macros #define VK_EXT_EXTENDED_DYNAMIC_STATE2_SPEC_VERSION VK_EXT_EXTENDED_DYNAMIC_STATE_2_SPEC_VERSION #define VK_EXT_ROBUSTNESS2_SPEC_VERSION VK_EXT_ROBUSTNESS_2_SPEC_VERSION +#define VK_EXT_SHADER_ATOMIC_FLOAT2_SPEC_VERSION VK_EXT_SHADER_ATOMIC_FLOAT_2_SPEC_VERSION #define VK_EXT_SWAPCHAIN_COLORSPACE_EXTENSION_NAME VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME #define VK_EXT_SWAPCHAIN_COLORSPACE_SPEC_VERSION VK_EXT_SWAPCHAIN_COLOR_SPACE_SPEC_VERSION @@ -84,12 +85,12 @@ class Extensions } } - VK_INLINE bool IsExtensionSupported(typename T::ExtensionId id) const + bool IsExtensionSupported(typename T::ExtensionId id) const { return m_supported[id].specVersion != 0; } - VK_INLINE void AddExtension(typename T::ExtensionId id, const char* name, uint32_t specVersion) + void AddExtension(typename T::ExtensionId id, const char* name, uint32_t specVersion) { // Don't allow adding extensions redundantly. VK_ASSERT(!IsExtensionSupported(id)); @@ -100,13 +101,13 @@ class Extensions m_supportedCount++; } - VK_INLINE const VkExtensionProperties& GetExtensionInfo(typename T::ExtensionId id) const + const VkExtensionProperties& GetExtensionInfo(typename T::ExtensionId id) const { VK_ASSERT(IsExtensionSupported(id)); return m_supported[id]; } - VK_INLINE uint32_t GetExtensionCount() const + uint32_t GetExtensionCount() const { return m_supportedCount; } @@ -127,12 +128,12 @@ class Extensions } } - VK_INLINE void EnableExtension(typename T::ExtensionId id) + void EnableExtension(typename T::ExtensionId id) { m_enabled[id] = true; } - VK_INLINE bool IsExtensionEnabled(typename T::ExtensionId id) const + bool IsExtensionEnabled(typename T::ExtensionId id) const { return m_enabled[id]; } @@ -141,7 +142,7 @@ class Extensions bool m_enabled[T::Count]; }; - VK_INLINE static bool EnableExtensions( + static bool EnableExtensions( const char* const* const extensionNames, uint32_t extensionNameCount, const Supported& supported, @@ -308,6 +309,7 @@ class DeviceExtensions final : public Extensions EXT_IMAGE_ROBUSTNESS, EXT_INLINE_UNIFORM_BLOCK, EXT_LINE_RASTERIZATION, + EXT_LOAD_STORE_OP_NONE, EXT_MEMORY_BUDGET, EXT_MEMORY_PRIORITY, EXT_PCI_BUS_INFO, @@ -321,6 +323,8 @@ class DeviceExtensions final : public Extensions EXT_SAMPLE_LOCATIONS, EXT_SCALAR_BLOCK_LAYOUT, EXT_SEPARATE_STENCIL_USAGE, + EXT_SHADER_ATOMIC_FLOAT, + EXT_SHADER_ATOMIC_FLOAT2, EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, EXT_SHADER_IMAGE_ATOMIC_INT64, EXT_SHADER_STENCIL_EXPORT, diff --git a/icd/api/include/vk_fence.h b/icd/api/include/vk_fence.h index 179f94b9..1ea40d3a 100644 --- a/icd/api/include/vk_fence.h +++ b/icd/api/include/vk_fence.h @@ -78,13 +78,13 @@ class Fence final : public NonDispatchable VkResult RestoreFence(const Device* pDevice); - VK_INLINE uint32_t GetActiveDeviceMask() const + uint32_t GetActiveDeviceMask() const { return m_activeDeviceMask; } - VK_INLINE void ClearActiveDeviceMask() + void ClearActiveDeviceMask() { m_activeDeviceMask = 0; } - VK_INLINE void SetActiveDevice(uint32_t deviceIdx) + void SetActiveDevice(uint32_t deviceIdx) { m_activeDeviceMask |= (1 << deviceIdx); } VK_FORCEINLINE Pal::IFence* PalFence(int32_t idx) const diff --git a/icd/api/include/vk_formats.h b/icd/api/include/vk_formats.h index 413cf3de..ee2c26d2 100644 --- a/icd/api/include/vk_formats.h +++ b/icd/api/include/vk_formats.h @@ -58,24 +58,25 @@ struct AstcMappedInfo // on the current HW) -- put such information in PhysicalDevice or Device. struct Formats { - VK_INLINE static bool IsColorFormat(VkFormat format); - VK_INLINE static bool IsDepthStencilFormat(VkFormat format); - VK_INLINE static bool IsBcCompressedFormat(VkFormat format); - VK_INLINE static bool IsRTVertexFormat(VkFormat format); - VK_INLINE static bool IsYuvFormat(VkFormat format); - VK_INLINE static bool IsYuvPlanar(VkFormat format); - VK_INLINE static bool IsYuvPacked(VkFormat format); - VK_INLINE static bool IsYuvXChromaSubsampled(VkFormat format); - VK_INLINE static bool IsYuvYChromaSubsampled(VkFormat format); - VK_INLINE static uint32_t GetYuvPlaneCounts(VkFormat format); - VK_INLINE static bool IsASTCFormat(VkFormat format); - VK_INLINE static bool IsEtc2Format(VkFormat format); - VK_INLINE static bool HasDepth(VkFormat format); - VK_INLINE static bool HasStencil(VkFormat format); - VK_INLINE static VkFormat GetAspectFormat(VkFormat format, VkImageAspectFlags aspectMask); - - VK_INLINE static uint32_t GetIndex(VkFormat format); - VK_INLINE static VkFormat FromIndex(uint32_t index); + inline static bool IsColorFormat(VkFormat format); + inline static bool IsDepthStencilFormat(VkFormat format); + inline static bool IsBcCompressedFormat(VkFormat format); + inline static bool IsRTVertexFormat(VkFormat format); + inline static bool IsYuvFormat(VkFormat format); + inline static bool IsYuvPlanar(VkFormat format); + inline static bool IsYuvPacked(VkFormat format); + inline static bool IsYuvXChromaSubsampled(VkFormat format); + inline static bool IsYuvYChromaSubsampled(VkFormat format); + inline static uint32_t GetYuvPlaneCounts(VkFormat format); + inline static bool IsASTCFormat(VkFormat format); + inline static bool IsEtc2Format(VkFormat format); + inline static bool IsEacFormat(VkFormat format); + inline static bool HasDepth(VkFormat format); + inline static bool HasStencil(VkFormat format); + inline static VkFormat GetAspectFormat(VkFormat format, VkImageAspectFlags aspectMask); + + inline static uint32_t GetIndex(VkFormat format); + inline static VkFormat FromIndex(uint32_t index); #if ( VKI_GPU_DECOMPRESS) static void GetAstcMappedInfo(VkFormat format, AstcMappedInfo* pMapInfo); #endif @@ -241,7 +242,14 @@ bool Formats::IsASTCFormat( bool Formats::IsEtc2Format( VkFormat format) { - return ((format >= VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK) && (format <= VK_FORMAT_EAC_R11G11_SNORM_BLOCK)); + return ((format >= VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK) && (format <= VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK)); +} + +// ===================================================================================================================== +bool Formats::IsEacFormat( + VkFormat format) +{ + return ((format >= VK_FORMAT_EAC_R11_UNORM_BLOCK) && (format <= VK_FORMAT_EAC_R11G11_SNORM_BLOCK)); } // ===================================================================================================================== diff --git a/icd/api/include/vk_framebuffer.h b/icd/api/include/vk_framebuffer.h index cce468ad..6e667257 100644 --- a/icd/api/include/vk_framebuffer.h +++ b/icd/api/include/vk_framebuffer.h @@ -101,16 +101,16 @@ class Framebuffer final : public NonDispatchable private: PAL_DISALLOW_COPY_AND_ASSIGN(Framebuffer); - VK_INLINE void SetImageViews( + void SetImageViews( const VkImageView& imageView, Attachment* pAttachments); - VK_INLINE void SetSubresRanges( + void SetSubresRanges( const Image* pImage, Attachment* pAttachment); // Get the start address of the first Attachment object relative to the start of a Framebuffer object. - VK_INLINE static size_t GetAttachmentsOffset() + static size_t GetAttachmentsOffset() { // The alignment requirement of Framebuffer is less than of Attachment. // Therefore, we need to round up (this only works if the Framebuffer object is sufficiently aligned). diff --git a/icd/api/include/vk_gpa_session.h b/icd/api/include/vk_gpa_session.h index 291abeeb..c66f7fba 100644 --- a/icd/api/include/vk_gpa_session.h +++ b/icd/api/include/vk_gpa_session.h @@ -73,10 +73,10 @@ class GpaSession final : public NonDispatchable void Destroy(const VkAllocationCallbacks* pAllocator); - VK_INLINE VkResult GetStatus() + VkResult GetStatus() { return m_session.IsReady() ? VK_SUCCESS : VK_NOT_READY; } - VK_INLINE GpuUtil::GpaSession* PalSession() + GpuUtil::GpaSession* PalSession() { return &m_session; } private: diff --git a/icd/api/include/vk_graphics_pipeline.h b/icd/api/include/vk_graphics_pipeline.h index b42269ae..d09a3c6f 100644 --- a/icd/api/include/vk_graphics_pipeline.h +++ b/icd/api/include/vk_graphics_pipeline.h @@ -164,7 +164,7 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch CmdBuffer* pCmdBuffer, const Pal::DynamicGraphicsShaderInfos& graphicsShaderInfos) const; - VK_INLINE const Pal::DynamicGraphicsShaderInfos& GetBindInfo() const { return m_info.graphicsShaderInfos; } + const Pal::DynamicGraphicsShaderInfos& GetBindInfo() const { return m_info.graphicsShaderInfos; } const Pal::IMsaaState* const* GetMsaaStates() const { return m_pPalMsaa; } @@ -210,10 +210,18 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch void CreateStaticState(); void DestroyStaticState(const VkAllocationCallbacks* pAllocator); + static VkResult AchievePipelineLayout( + const Device* pDevice, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + PipelineLayout** ppPipelineLayout, + bool* pIsTemporary); + static VkResult CreatePipelineBinaries( Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineShaderStageInfo* pShaderInfo, + const PipelineLayout* pPipelineLayout, GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, PipelineCache* pPipelineCache, const VkPipelineCreationFeedbackCreateInfoEXT* pCreationFeedbackInfo, @@ -225,6 +233,7 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, + const PipelineLayout* pPipelineLayout, const VbInfo* pVbInfo, const size_t* pPipelineBinarySizes, const void** pPipelineBinaries, diff --git a/icd/api/include/vk_image_view.h b/icd/api/include/vk_image_view.h index 2bdd560a..33591607 100644 --- a/icd/api/include/vk_image_view.h +++ b/icd/api/include/vk_image_view.h @@ -63,39 +63,39 @@ class ImageView final : public NonDispatchable Device* pDevice, const VkAllocationCallbacks* pAllocator); - VK_INLINE const void* Descriptor( + inline const void* Descriptor( uint32_t deviceIdx, bool isShaderStorageDesc, size_t srdSize) const; - VK_INLINE const Pal::IColorTargetView* PalColorTargetView(int32_t idx) const + const Pal::IColorTargetView* PalColorTargetView(int32_t idx) const { VK_ASSERT((idx >= 0) && (idx < static_cast(MaxPalDevices))); return m_pColorTargetViews[idx]; } - VK_INLINE const Pal::IDepthStencilView* PalDepthStencilView(int32_t idx) const + const Pal::IDepthStencilView* PalDepthStencilView(int32_t idx) const { VK_ASSERT((idx >= 0) && (idx < static_cast(MaxPalDevices))); return m_pDepthStencilViews[idx]; } - VK_INLINE const Image* GetImage() const + const Image* GetImage() const { return m_pImage; } - VK_INLINE VkFormat GetViewFormat() const + VkFormat GetViewFormat() const { return m_viewFormat; } - VK_INLINE const Pal::Range GetZRange() const + const Pal::Range GetZRange() const { return m_zRange; } - VK_INLINE void GetFrameBufferAttachmentSubresRange(Pal::SubresRange* pRange) const; + inline void GetFrameBufferAttachmentSubresRange(Pal::SubresRange* pRange) const; - VK_INLINE bool NeedsFmaskViewSrds() const + bool NeedsFmaskViewSrds() const { return m_needsFmaskViewSrds; } protected: - static VK_INLINE void BuildImageSrds( + static void BuildImageSrds( const Device* pDevice, size_t srdSize, const Image* pImage, @@ -106,7 +106,7 @@ class ImageView final : public NonDispatchable const VkImageViewCreateInfo* pCreateInfo, void* pSrdMemory); - static VK_INLINE void BuildFmaskViewSrds( + static void BuildFmaskViewSrds( const Device* pDevice, size_t fmaskDescSize, const Image* pImage, @@ -114,7 +114,7 @@ class ImageView final : public NonDispatchable const VkImageViewCreateInfo* pCreateInfo, void* pFmaskMemory); - static VK_INLINE Pal::Result BuildColorTargetView( + static Pal::Result BuildColorTargetView( const Pal::IDevice* pPalDevice, const Pal::IImage* pPalImage, VkImageViewType viewType, @@ -126,7 +126,7 @@ class ImageView final : public NonDispatchable Pal::IColorTargetView** pColorView, const RuntimeSettings& settings); - static VK_INLINE Pal::Result BuildDepthStencilView( + static Pal::Result BuildDepthStencilView( const Pal::IDevice* pPalDevice, const Pal::IImage* pPalImage, VkImageViewType viewType, @@ -138,7 +138,7 @@ class ImageView final : public NonDispatchable Pal::IDepthStencilView** pDepthStencilView, const RuntimeSettings& settings); - VK_INLINE ImageView( + ImageView( Pal::IColorTargetView** pColorTargetView, Pal::IDepthStencilView** pDepthStencilView, const Image* pImage, diff --git a/icd/api/include/vk_instance.h b/icd/api/include/vk_instance.h index a40b62ec..8503e326 100644 --- a/icd/api/include/vk_instance.h +++ b/icd/api/include/vk_instance.h @@ -120,27 +120,27 @@ class Instance void PhysicalDevicesChanged(); - VK_INLINE void* AllocMem( + inline void* AllocMem( size_t size, size_t alignment, VkSystemAllocationScope allocType); - VK_INLINE void* AllocMem( + inline void* AllocMem( size_t size, VkSystemAllocationScope allocType); - VK_INLINE void FreeMem(void* pMem); + inline void FreeMem(void* pMem); - VK_INLINE VirtualStackMgr* StackMgr() + VirtualStackMgr* StackMgr() { return m_pVirtualStackMgr; } - VK_INLINE PalAllocator* Allocator() + PalAllocator* Allocator() { return &m_palAllocator; } - VK_INLINE PalAllocator* GetPrivateAllocator() + PalAllocator* GetPrivateAllocator() { return &m_privateAllocator; } - VK_INLINE VkAllocationCallbacks* GetAllocCallbacks() + VkAllocationCallbacks* GetAllocCallbacks() { return &m_allocCallbacks; } VK_FORCEINLINE Pal::IPlatform* PalPlatform() const @@ -154,10 +154,10 @@ class Instance uint32_t* pPropertyCount, VkExtensionProperties* pProperties); - VK_INLINE uint32_t GetAPIVersion() const + uint32_t GetAPIVersion() const { return m_apiVersion; } - VK_INLINE uint32_t GetAppVersion() const + uint32_t GetAppVersion() const { return m_appVersion; } VK_FORCEINLINE const PhysicalDeviceManager* GetPhysicalDeviceManager() const @@ -169,10 +169,10 @@ class Instance bool IsDeviceExtensionAvailable(DeviceExtensions::ExtensionId id) const; - static VK_INLINE bool IsExtensionSupported(InstanceExtensions::ExtensionId id) + static bool IsExtensionSupported(InstanceExtensions::ExtensionId id) { return GetSupportedExtensions().IsExtensionSupported(id); } - VK_INLINE bool IsExtensionEnabled(InstanceExtensions::ExtensionId id) const + bool IsExtensionEnabled(InstanceExtensions::ExtensionId id) const { return m_enabledExtensions.IsExtensionEnabled(id); } VkResult FindScreens( @@ -199,18 +199,18 @@ class Instance uint32_t* pModeCount, Pal::ScreenMode** ppModeList); - VK_INLINE const DispatchTable& GetDispatchTable() const + const DispatchTable& GetDispatchTable() const { return m_dispatchTable; } void EnableTracingSupport(); - VK_INLINE bool IsTracingSupportEnabled() const + bool IsTracingSupportEnabled() const { return m_flags.sqttSupport; } - VK_INLINE bool IsNullGpuModeEnabled() const + bool IsNullGpuModeEnabled() const { return m_flags.nullGpuMode; } - VK_INLINE Pal::NullGpuId GetNullGpuId() const + Pal::NullGpuId GetNullGpuId() const { return m_nullGpuId; } DevModeMgr* GetDevModeMgr() diff --git a/icd/api/include/vk_memory.h b/icd/api/include/vk_memory.h index 67eb9947..98b1f4f9 100644 --- a/icd/api/include/vk_memory.h +++ b/icd/api/include/vk_memory.h @@ -124,7 +124,7 @@ class Memory final : public NonDispatchable void Unmap(void); - VK_INLINE bool IsMultiInstance() const + bool IsMultiInstance() const { return m_flags.multiInstance; } @@ -135,12 +135,12 @@ class Memory final : public NonDispatchable Pal::IGpuMemory* PalMemory(uint32_t resourceIndex, uint32_t memoryIndex); - VK_INLINE Pal::IGpuMemory* PalMemory(uint32_t resourceIndex) const + Pal::IGpuMemory* PalMemory(uint32_t resourceIndex) const { return m_pPalMemory[resourceIndex][resourceIndex]; } - VK_INLINE Pal::IImage* GetExternalPalImage() const + Pal::IImage* GetExternalPalImage() const { return m_pExternalPalImage; } @@ -161,7 +161,7 @@ class Memory final : public NonDispatchable // Marks that the logical device's allocation count is incremented and needs to be decremented during the // destruction of this memory object. - VK_INLINE void SetAllocationCounted(uint32_t sizeAccountedForDeviceMask) + void SetAllocationCounted(uint32_t sizeAccountedForDeviceMask) { m_flags.allocationCounted = 1; m_sizeAccountedForDeviceMask = sizeAccountedForDeviceMask; diff --git a/icd/api/include/vk_physical_device.h b/icd/api/include/vk_physical_device.h index 59ae25d5..5a98e7d4 100644 --- a/icd/api/include/vk_physical_device.h +++ b/icd/api/include/vk_physical_device.h @@ -134,19 +134,19 @@ class PhysicalDevice void PopulateQueueFamilies(); void PopulateFormatProperties(); - VK_INLINE uint32_t GetMemoryTypeMask() const + uint32_t GetMemoryTypeMask() const { return m_memoryTypeMask; } uint32_t GetMemoryTypeMaskMatching(VkMemoryPropertyFlags flags) const; - VK_INLINE uint32_t GetMemoryTypeMaskForExternalSharing() const + uint32_t GetMemoryTypeMaskForExternalSharing() const { return m_memoryTypeMaskForExternalSharing; } - VK_INLINE bool GetVkTypeIndexBitsFromPalHeap(Pal::GpuHeap heapIndex, uint32_t* pVkIndexBits) const + bool GetVkTypeIndexBitsFromPalHeap(Pal::GpuHeap heapIndex, uint32_t* pVkIndexBits) const { VK_ASSERT(heapIndex < Pal::GpuHeapCount); VK_ASSERT(pVkIndexBits != nullptr); @@ -162,19 +162,19 @@ class PhysicalDevice } } - VK_INLINE Pal::GpuHeap GetPalHeapFromVkTypeIndex(uint32_t vkIndex) const + Pal::GpuHeap GetPalHeapFromVkTypeIndex(uint32_t vkIndex) const { VK_ASSERT(vkIndex < m_memoryProperties.memoryTypeCount); return m_memoryVkIndexToPalHeap[vkIndex]; } - VK_INLINE Pal::GpuHeap GetPalHeapFromVkHeapIndex(uint32_t heapIndex) const + Pal::GpuHeap GetPalHeapFromVkHeapIndex(uint32_t heapIndex) const { VK_ASSERT(heapIndex < m_memoryProperties.memoryHeapCount); return m_heapVkToPal[heapIndex]; } - VK_INLINE bool GetVkHeapIndexFromPalHeap(Pal::GpuHeap heapIndex, uint32_t* pVkHeapIndex) const + bool GetVkHeapIndexFromPalHeap(Pal::GpuHeap heapIndex, uint32_t* pVkHeapIndex) const { VK_ASSERT(heapIndex < Pal::GpuHeapCount); @@ -183,54 +183,54 @@ class PhysicalDevice return *pVkHeapIndex != Pal::GpuHeapCount; } - VK_INLINE const VkPhysicalDeviceMemoryProperties& GetMemoryProperties() const + const VkPhysicalDeviceMemoryProperties& GetMemoryProperties() const { return m_memoryProperties; } - VK_INLINE Pal::QueueType GetQueueFamilyPalQueueType( + Pal::QueueType GetQueueFamilyPalQueueType( uint32_t queueFamilyIndex) const { return m_queueFamilies[queueFamilyIndex].palQueueType; } - VK_INLINE bool GetQueueGroupCompatible( + bool GetQueueGroupCompatible( uint32_t queueFamilyIndex) const { return m_queueFamilies[queueFamilyIndex].flags.queueGroupCompatible; } - VK_INLINE Pal::EngineType GetQueueFamilyPalEngineType( + Pal::EngineType GetQueueFamilyPalEngineType( uint32_t queueFamilyIndex) const { return m_queueFamilies[queueFamilyIndex].palEngineType; } - VK_INLINE uint32_t GetCompQueueEngineIndex( + uint32_t GetCompQueueEngineIndex( const uint32_t queueIndex) const { return m_compQueueEnginesNdx[queueIndex]; } - VK_INLINE uint32_t GetUniversalQueueEngineIndex( + uint32_t GetUniversalQueueEngineIndex( const uint32_t queueIndex) const { return m_universalQueueEnginesNdx[queueIndex]; } - VK_INLINE uint32_t GetQueueFamilyPalImageLayoutFlag( + uint32_t GetQueueFamilyPalImageLayoutFlag( uint32_t queueFamilyIndex) const { return m_queueFamilies[queueFamilyIndex].palImageLayoutFlag; } - VK_INLINE VkShaderStageFlags GetValidShaderStages( + VkShaderStageFlags GetValidShaderStages( uint32_t queueFamilyIndex) const { return m_queueFamilies[queueFamilyIndex].validShaderStages; } - VK_INLINE const VkQueueFamilyProperties& GetQueueFamilyProperties( + const VkQueueFamilyProperties& GetQueueFamilyProperties( uint32_t queueFamilyIndex) const { return m_queueFamilies[queueFamilyIndex].properties; @@ -246,7 +246,7 @@ class PhysicalDevice size_t GetFeatures(VkPhysicalDeviceFeatures* pFeatures) const; - VK_INLINE VkResult GetFormatProperties( + VkResult GetFormatProperties( VkFormat format, VkFormatProperties* pFormatProperties) const { @@ -257,65 +257,65 @@ class PhysicalDevice return VK_SUCCESS; } - VK_INLINE bool FormatSupportsMsaa(VkFormat format) const + bool FormatSupportsMsaa(VkFormat format) const { uint32_t formatIndex = Formats::GetIndex(format); return Util::WideBitfieldIsSet(m_formatFeatureMsaaTarget, formatIndex); } - VK_INLINE void GetPhysicalDeviceIDProperties( + void GetPhysicalDeviceIDProperties( uint8_t* pDeviceUUID, uint8_t* pDriverUUID, uint8_t* pDeviceLUID, uint32_t* pDeviceNodeMask, VkBool32* pDeviceLUIDValid) const; - VK_INLINE void GetPhysicalDeviceMaintenance3Properties( + void GetPhysicalDeviceMaintenance3Properties( uint32_t* pMaxPerSetDescriptors, VkDeviceSize* pMaxMemoryAllocationSize) const; - VK_INLINE void GetPhysicalDeviceMultiviewProperties( + void GetPhysicalDeviceMultiviewProperties( uint32_t* pMaxMultiviewViewCount, uint32_t* pMaxMultiviewInstanceIndex) const; - VK_INLINE void GetPhysicalDevicePointClippingProperties( + void GetPhysicalDevicePointClippingProperties( VkPointClippingBehavior* pPointClippingBehavior) const; - VK_INLINE void GetPhysicalDeviceProtectedMemoryProperties( + void GetPhysicalDeviceProtectedMemoryProperties( VkBool32* pProtectedNoFault) const; - VK_INLINE void GetPhysicalDeviceSubgroupProperties( + void GetPhysicalDeviceSubgroupProperties( uint32_t* pSubgroupSize, VkShaderStageFlags* pSupportedStages, VkSubgroupFeatureFlags* pSupportedOperations, VkBool32* pQuadOperationsInAllStages) const; - VK_INLINE void GetPhysicalDeviceDriverProperties( + void GetPhysicalDeviceDriverProperties( VkDriverId* pDriverID, char* pDriverName, char* pDriverInfo, VkConformanceVersion* pConformanceVersion) const; template - VK_INLINE void GetPhysicalDeviceFloatControlsProperties( + void GetPhysicalDeviceFloatControlsProperties( T pFloatControlsProperties) const; template - VK_INLINE void GetPhysicalDeviceDescriptorIndexingProperties( + void GetPhysicalDeviceDescriptorIndexingProperties( T pDescriptorIndexingProperties) const; - VK_INLINE void GetPhysicalDeviceDepthStencilResolveProperties( + void GetPhysicalDeviceDepthStencilResolveProperties( VkResolveModeFlags* pSupportedDepthResolveModes, VkResolveModeFlags* pSupportedStencilResolveModes, VkBool32* pIndependentResolveNone, VkBool32* pIndependentResolve) const; - VK_INLINE void GetPhysicalDeviceSamplerFilterMinmaxProperties( + void GetPhysicalDeviceSamplerFilterMinmaxProperties( VkBool32* pFilterMinmaxSingleComponentFormats, VkBool32* pFilterMinmaxImageComponentMapping) const; - VK_INLINE void GetPhysicalDeviceTimelineSemaphoreProperties( + void GetPhysicalDeviceTimelineSemaphoreProperties( uint64_t* pMaxTimelineSemaphoreValueDifference) const; VkResult GetExternalMemoryProperties( @@ -341,74 +341,74 @@ class PhysicalDevice uint32_t* pPropertyCount, utils::ArrayView properties) const; - VK_INLINE void GetPhysicalDevice16BitStorageFeatures( + void GetPhysicalDevice16BitStorageFeatures( VkBool32* pStorageBuffer16BitAccess, VkBool32* pUniformAndStorageBuffer16BitAccess, VkBool32* pStoragePushConstant16, VkBool32* pStorageInputOutput16) const; - VK_INLINE void GetPhysicalDeviceMultiviewFeatures( + void GetPhysicalDeviceMultiviewFeatures( VkBool32* pMultiview, VkBool32* pMultiviewGeometryShader, VkBool32* pMultiviewTessellationShader) const; - VK_INLINE void GetPhysicalDeviceVariablePointerFeatures( + void GetPhysicalDeviceVariablePointerFeatures( VkBool32* pVariablePointersStorageBuffer, VkBool32* pVariablePointers) const; - VK_INLINE void GetPhysicalDeviceProtectedMemoryFeatures( + void GetPhysicalDeviceProtectedMemoryFeatures( VkBool32* pProtectedMemory) const; - VK_INLINE void GetPhysicalDeviceSamplerYcbcrConversionFeatures( + void GetPhysicalDeviceSamplerYcbcrConversionFeatures( VkBool32* pSamplerYcbcrConversion) const; - VK_INLINE void GetPhysicalDeviceShaderDrawParameterFeatures( + void GetPhysicalDeviceShaderDrawParameterFeatures( VkBool32* pShaderDrawParameters) const; - VK_INLINE void GetPhysicalDevice8BitStorageFeatures( + void GetPhysicalDevice8BitStorageFeatures( VkBool32* pStorageBuffer8BitAccess, VkBool32* pUniformAndStorageBuffer8BitAccess, VkBool32* pStoragePushConstant8) const; - VK_INLINE void GetPhysicalDeviceShaderAtomicInt64Features( + void GetPhysicalDeviceShaderAtomicInt64Features( VkBool32* pShaderBufferInt64Atomics, VkBool32* pShaderSharedInt64Atomics) const; - VK_INLINE void GetPhysicalDeviceFloat16Int8Features( + void GetPhysicalDeviceFloat16Int8Features( VkBool32* pShaderFloat16, VkBool32* pShaderInt8) const; template - VK_INLINE void GetPhysicalDeviceDescriptorIndexingFeatures( + void GetPhysicalDeviceDescriptorIndexingFeatures( T pDescriptorIndexingFeatures) const; - VK_INLINE void GetPhysicalDeviceScalarBlockLayoutFeatures( + void GetPhysicalDeviceScalarBlockLayoutFeatures( VkBool32* pScalarBlockLayout) const; - VK_INLINE void GetPhysicalDeviceImagelessFramebufferFeatures( + void GetPhysicalDeviceImagelessFramebufferFeatures( VkBool32* pImagelessFramebuffer) const; - VK_INLINE void GetPhysicalDeviceUniformBufferStandardLayoutFeatures( + void GetPhysicalDeviceUniformBufferStandardLayoutFeatures( VkBool32* pUniformBufferStandardLayout) const; - VK_INLINE void GetPhysicalDeviceSubgroupExtendedTypesFeatures( + void GetPhysicalDeviceSubgroupExtendedTypesFeatures( VkBool32* pShaderSubgroupExtendedTypes) const; - VK_INLINE void GetPhysicalDeviceSeparateDepthStencilLayoutsFeatures( + void GetPhysicalDeviceSeparateDepthStencilLayoutsFeatures( VkBool32* pSeparateDepthStencilLayouts) const; - VK_INLINE void GetPhysicalDeviceHostQueryResetFeatures( + void GetPhysicalDeviceHostQueryResetFeatures( VkBool32* pHostQueryReset) const; - VK_INLINE void GetPhysicalDeviceTimelineSemaphoreFeatures( + void GetPhysicalDeviceTimelineSemaphoreFeatures( VkBool32* pTimelineSemaphore) const; - VK_INLINE void GetPhysicalDeviceBufferAddressFeatures( + void GetPhysicalDeviceBufferAddressFeatures( VkBool32* pBufferDeviceAddress, VkBool32* pBufferDeviceAddressCaptureReplay, VkBool32* pBufferDeviceAddressMultiDevice) const; - VK_INLINE void GetPhysicalDeviceVulkanMemoryModelFeatures( + void GetPhysicalDeviceVulkanMemoryModelFeatures( VkBool32* pVulkanMemoryModel, VkBool32* pVulkanMemoryModelDeviceScope, VkBool32* pVulkanMemoryModelAvailabilityVisibilityChains) const; @@ -524,32 +524,32 @@ class PhysicalDevice return m_properties.gpuMemoryProperties.flags.virtualRemappingSupport; } - VK_INLINE const RuntimeSettings& GetRuntimeSettings() const + const RuntimeSettings& GetRuntimeSettings() const { return m_pSettingsLoader->GetSettings(); } - VK_INLINE VulkanSettingsLoader* GetSettingsLoader() const + VulkanSettingsLoader* GetSettingsLoader() const { return m_pSettingsLoader; } - VK_INLINE const VkPhysicalDeviceLimits& GetLimits() const + const VkPhysicalDeviceLimits& GetLimits() const { return m_limits; } - VK_INLINE uint32_t GetVrHighPrioritySubEngineIndex() const + uint32_t GetVrHighPrioritySubEngineIndex() const { return m_vrHighPrioritySubEngineIndex; } - VK_INLINE uint32_t GetRtCuHighComputeSubEngineIndex() const + uint32_t GetRtCuHighComputeSubEngineIndex() const { return m_RtCuHighComputeSubEngineIndex; } - VK_INLINE uint32_t GetSubgroupSize() const + uint32_t GetSubgroupSize() const { uint32_t subgroupSize = m_properties.gfxipProperties.shaderCore.maxWavefrontSize; @@ -561,7 +561,7 @@ class PhysicalDevice return subgroupSize; } - VK_INLINE bool IsPrtSupportedOnDmaEngine() const + bool IsPrtSupportedOnDmaEngine() const { return m_prtOnDmaSupported; } @@ -633,29 +633,29 @@ class PhysicalDevice const Instance* pInstance, const PhysicalDevice* pPhysicalDevice); - VK_INLINE const DeviceExtensions::Supported& GetSupportedExtensions() const + const DeviceExtensions::Supported& GetSupportedExtensions() const { return m_supportedExtensions; } - VK_INLINE const DeviceExtensions::Supported& GetAllowedExtensions() const + const DeviceExtensions::Supported& GetAllowedExtensions() const { return m_allowedExtensions; } - VK_INLINE bool IsExtensionSupported(DeviceExtensions::ExtensionId id) const + bool IsExtensionSupported(DeviceExtensions::ExtensionId id) const { return m_supportedExtensions.IsExtensionSupported(id); } - VK_INLINE bool IsExtensionSupported(InstanceExtensions::ExtensionId id) const + bool IsExtensionSupported(InstanceExtensions::ExtensionId id) const { return VkInstance()->IsExtensionSupported(id); } uint32_t GetSupportedAPIVersion() const; - VK_INLINE uint32_t GetEnabledAPIVersion() const + uint32_t GetEnabledAPIVersion() const { return Util::Min(GetSupportedAPIVersion(), VkInstance()->GetAPIVersion()); } - VK_INLINE AppProfile GetAppProfile() const + AppProfile GetAppProfile() const { return m_appProfile; } - VK_INLINE const PhysicalDeviceGpaProperties& GetGpaProperties() const + const PhysicalDeviceGpaProperties& GetGpaProperties() const { return m_gpaProps; } void LateInitialize(); @@ -677,7 +677,7 @@ class PhysicalDevice Pal::gpusize allocationSize, uint32_t heapIdx); - VK_INLINE bool ShouldAddRemoteBackupHeap(uint32_t vkIndex) const + bool ShouldAddRemoteBackupHeap(uint32_t vkIndex) const { return m_memoryVkIndexAddRemoteBackupHeap[vkIndex]; } bool IsOverrideHeapChoiceToLocalWithinBudget(Pal::gpusize size) const; diff --git a/icd/api/include/vk_physical_device_manager.h b/icd/api/include/vk_physical_device_manager.h index f53bcce8..90193cd8 100644 --- a/icd/api/include/vk_physical_device_manager.h +++ b/icd/api/include/vk_physical_device_manager.h @@ -75,7 +75,7 @@ class PhysicalDeviceManager uint32_t maxDeviceGroupIndices, int32_t* pDeviceGroupIndices) const; - VK_INLINE uint32_t GetDeviceCount() const + uint32_t GetDeviceCount() const { return m_devices.NumElements(); } VK_FORCEINLINE Instance* VkInstance() const diff --git a/icd/api/include/vk_pipeline.h b/icd/api/include/vk_pipeline.h index 4e3640b2..84d7110e 100644 --- a/icd/api/include/vk_pipeline.h +++ b/icd/api/include/vk_pipeline.h @@ -156,21 +156,21 @@ class Pipeline return m_pPalPipeline[idx]; } - VK_INLINE uint64_t PalPipelineHash() const { return m_palPipelineHash; } + uint64_t PalPipelineHash() const { return m_palPipelineHash; } - VK_INLINE uint64_t GetApiHash() const { return m_apiHash; } + uint64_t GetApiHash() const { return m_apiHash; } - VK_INLINE const PipelineBinaryInfo* GetBinary() const { return m_pBinary; } + const PipelineBinaryInfo* GetBinary() const { return m_pBinary; } - VK_INLINE VkPipelineBindPoint GetType() const { return m_type; } + VkPipelineBindPoint GetType() const { return m_type; } // This function returns true if any of the bits in the given state mask (corresponding to shifted values of // VK_DYNAMIC_STATE_*) should be programmed by the pipeline when it is bound (instead of by the application via // vkCmdSet*). - VK_INLINE bool ContainsStaticState(DynamicStatesInternal dynamicState) const + bool ContainsStaticState(DynamicStatesInternal dynamicState) const { return ((m_staticStateMask & (1UL << static_cast(dynamicState))) != 0); } - VK_INLINE bool ContainsDynamicState(DynamicStatesInternal dynamicState) const + bool ContainsDynamicState(DynamicStatesInternal dynamicState) const { return ((m_staticStateMask & (1UL << static_cast(dynamicState))) == 0); } VkResult GetShaderDisassembly( @@ -211,6 +211,7 @@ class Pipeline const Device* pDevice, const uint32_t stageCount, const VkPipelineShaderStageCreateInfo* pStages, + const bool duplicateExistingModules, uint32_t (*pfnGetOutputIdx)(const uint32_t inputIdx, const uint32_t stageIdx), ShaderStageInfo* pShaderStageInfo, diff --git a/icd/api/include/vk_pipeline_cache.h b/icd/api/include/vk_pipeline_cache.h index 87249af7..c60cad05 100644 --- a/icd/api/include/vk_pipeline_cache.h +++ b/icd/api/include/vk_pipeline_cache.h @@ -68,7 +68,7 @@ class PipelineCache final : public NonDispatchableGetCacheAdapter() : nullptr); diff --git a/icd/api/include/vk_pipeline_layout.h b/icd/api/include/vk_pipeline_layout.h index 9d3a2055..253abb9b 100644 --- a/icd/api/include/vk_pipeline_layout.h +++ b/icd/api/include/vk_pipeline_layout.h @@ -123,38 +123,46 @@ class PipelineLayout final : public NonDispatchable(Util::VoidPtrInc(this, sizeof(*this)))[setIndex]; } // Original descriptor set layout pointers - VK_INLINE const DescriptorSetLayout* GetSetLayouts(uint32_t setIndex) const + const DescriptorSetLayout* GetSetLayouts(uint32_t setIndex) const { return static_cast( Util::VoidPtrInc(this, sizeof(*this) + SetUserDataLayoutSize()))[setIndex]; } - VK_INLINE DescriptorSetLayout* GetSetLayouts(uint32_t setIndex) + DescriptorSetLayout* GetSetLayouts(uint32_t setIndex) { return static_cast( Util::VoidPtrInc(this, sizeof(*this) + SetUserDataLayoutSize()))[setIndex]; diff --git a/icd/api/include/vk_query.h b/icd/api/include/vk_query.h index a70e64f4..e10c9ff6 100644 --- a/icd/api/include/vk_query.h +++ b/icd/api/include/vk_query.h @@ -81,14 +81,14 @@ class QueryPool : public NonDispatchable uint32_t startQuery, uint32_t queryCount) = 0; - VK_INLINE VkQueryType GetQueryType() const + VkQueryType GetQueryType() const { return m_queryType; } - VK_INLINE const PalQueryPool* AsPalQueryPool() const; - VK_INLINE const QueryPoolWithStorageView* AsQueryPoolWithStorageView() const; - VK_INLINE const TimestampQueryPool* AsTimestampQueryPool() const; + inline const PalQueryPool* AsPalQueryPool() const; + inline const QueryPoolWithStorageView* AsQueryPoolWithStorageView() const; + inline const TimestampQueryPool* AsTimestampQueryPool() const; protected: QueryPool( @@ -174,7 +174,7 @@ class PalQueryPool final : public QueryPool class QueryPoolWithStorageView : public QueryPool { public: - VK_INLINE const void* GetStorageView(uint32_t deviceIdx) const + const void* GetStorageView(uint32_t deviceIdx) const { return m_pStorageView[deviceIdx]; } protected: @@ -245,20 +245,20 @@ class TimestampQueryPool final : public QueryPoolWithStorageView uint32_t startQuery, uint32_t queryCount) override; - VK_INLINE const InternalMemory& GetMemory() const + const InternalMemory& GetMemory() const { return m_internalMem; } - VK_INLINE Pal::gpusize GetSlotOffset(uint32_t query) const + Pal::gpusize GetSlotOffset(uint32_t query) const { VK_ASSERT(query < m_entryCount); return m_internalMem.Offset() + query * m_slotSize; } - VK_INLINE uint32_t GetSlotSize() const + uint32_t GetSlotSize() const { return m_slotSize; } - VK_INLINE const Pal::IGpuMemory& PalMemory(uint32_t deviceIdx) const + const Pal::IGpuMemory& PalMemory(uint32_t deviceIdx) const { return *m_internalMem.PalMemory(deviceIdx); } private: @@ -275,7 +275,7 @@ class TimestampQueryPool final : public QueryPoolWithStorageView }; // ===================================================================================================================== -VK_INLINE const PalQueryPool* QueryPool::AsPalQueryPool() const +inline const PalQueryPool* QueryPool::AsPalQueryPool() const { VK_ASSERT(m_queryType != VK_QUERY_TYPE_TIMESTAMP); @@ -283,14 +283,14 @@ VK_INLINE const PalQueryPool* QueryPool::AsPalQueryPool() const } // ===================================================================================================================== -VK_INLINE const TimestampQueryPool* QueryPool::AsTimestampQueryPool() const +inline const TimestampQueryPool* QueryPool::AsTimestampQueryPool() const { VK_ASSERT(m_queryType == VK_QUERY_TYPE_TIMESTAMP); return static_cast(this); } -VK_INLINE const QueryPoolWithStorageView* QueryPool::AsQueryPoolWithStorageView() const +inline const QueryPoolWithStorageView* QueryPool::AsQueryPoolWithStorageView() const { if ((m_queryType != VK_QUERY_TYPE_TIMESTAMP) ) diff --git a/icd/api/include/vk_queue.h b/icd/api/include/vk_queue.h index 69ab6053..c88a0721 100644 --- a/icd/api/include/vk_queue.h +++ b/icd/api/include/vk_queue.h @@ -204,7 +204,7 @@ class Queue uint32_t u32All; }; - VK_INLINE VkResult BindSparseEntry( + VkResult BindSparseEntry( const VkBindSparseInfo& bindInfo, uint32_t resourceDeviceIndex, uint32_t memoryDeviceIndex, diff --git a/icd/api/include/vk_render_pass.h b/icd/api/include/vk_render_pass.h index e70560eb..12e28da5 100644 --- a/icd/api/include/vk_render_pass.h +++ b/icd/api/include/vk_render_pass.h @@ -232,43 +232,43 @@ class RenderPass final : public NonDispatchable uint32_t GetColorAttachmentSamples(uint32_t subPassIndex, uint32_t colorTarget) const; uint32_t GetDepthStencilAttachmentSamples(uint32_t subPassIndex) const; - VK_INLINE VkResolveModeFlagBits GetDepthResolveMode(uint32_t subpass) const + VkResolveModeFlagBits GetDepthResolveMode(uint32_t subpass) const { return m_createInfo.pSubpasses[subpass].depthResolveMode; } - VK_INLINE VkResolveModeFlagBits GetStencilResolveMode(uint32_t subpass) const + VkResolveModeFlagBits GetStencilResolveMode(uint32_t subpass) const { return m_createInfo.pSubpasses[subpass].stencilResolveMode; } uint32_t GetSubpassColorReferenceCount(uint32_t subPassIndex) const; - VK_INLINE uint32_t GetAttachmentCount() const { return m_createInfo.attachmentCount; } + uint32_t GetAttachmentCount() const { return m_createInfo.attachmentCount; } const AttachmentDescription& GetAttachmentDesc(uint32_t attachmentIndex) const; const AttachmentReference& GetSubpassColorReference(uint32_t subpass, uint32_t index) const; const AttachmentReference& GetSubpassDepthStencilReference(uint32_t subpass) const; - VK_INLINE uint32_t GetSubpassMaxSampleCount(uint32_t subpass) const + uint32_t GetSubpassMaxSampleCount(uint32_t subpass) const { return Util::Max(m_createInfo.pSubpasses[subpass].subpassSampleCount.colorCount, m_createInfo.pSubpasses[subpass].subpassSampleCount.depthCount); } - VK_INLINE uint32_t GetSubpassColorSampleCount(uint32_t subpass) const + uint32_t GetSubpassColorSampleCount(uint32_t subpass) const { return m_createInfo.pSubpasses[subpass].subpassSampleCount.colorCount; } - VK_INLINE uint32_t GetSubpassDepthSampleCount(uint32_t subpass) const + uint32_t GetSubpassDepthSampleCount(uint32_t subpass) const { return m_createInfo.pSubpasses[subpass].subpassSampleCount.depthCount; } - VK_INLINE const RenderPassExecuteInfo* GetExecuteInfo() const + const RenderPassExecuteInfo* GetExecuteInfo() const { return m_pExecuteInfo; } - VK_INLINE uint64_t GetHash() const + uint64_t GetHash() const { return m_createInfo.hash; } - VK_INLINE uint32_t GetSubpassCount() const + uint32_t GetSubpassCount() const { return m_createInfo.subpassCount; } - VK_INLINE uint32_t GetViewMask(uint32_t subpass) const + uint32_t GetViewMask(uint32_t subpass) const { return m_createInfo.pSubpasses[subpass].viewMask; } - VK_INLINE uint32_t GetActiveViewsBitMask() const + uint32_t GetActiveViewsBitMask() const { uint32_t activeViewsBitMask = 0; @@ -282,7 +282,7 @@ class RenderPass final : public NonDispatchable return activeViewsBitMask; } - VK_INLINE bool IsMultiviewEnabled() const + bool IsMultiviewEnabled() const { // When a subpass uses a non-zero view mask, // multiview functionality is considered to be enabled. diff --git a/icd/api/include/vk_sampler.h b/icd/api/include/vk_sampler.h index 1c135efd..6e9736e3 100644 --- a/icd/api/include/vk_sampler.h +++ b/icd/api/include/vk_sampler.h @@ -59,13 +59,13 @@ class Sampler final : public NonDispatchable Device* pDevice, const VkAllocationCallbacks* pAllocator); - VK_INLINE uint64_t GetApiHash() const + uint64_t GetApiHash() const { return m_apiHash; } - VK_INLINE bool IsYCbCrSampler() const + bool IsYCbCrSampler() const { return m_isYCbCrSampler; } - VK_INLINE uint32_t GetMultiPlaneCount() const + uint32_t GetMultiPlaneCount() const { return m_multiPlaneCount; } diff --git a/icd/api/include/vk_shader_code.h b/icd/api/include/vk_shader_code.h index 523b2858..c44128d5 100644 --- a/icd/api/include/vk_shader_code.h +++ b/icd/api/include/vk_shader_code.h @@ -37,7 +37,7 @@ typedef Vkgc::ShaderStage ShaderStage; constexpr uint32_t ShaderStageCount = ShaderStage::ShaderStageCount; /// Translate shader stage flag bits to corresponding shader stage. -VK_INLINE ShaderStage ShaderFlagBitToStage(const VkShaderStageFlagBits& shaderBits) +inline ShaderStage ShaderFlagBitToStage(const VkShaderStageFlagBits& shaderBits) { ShaderStage stage = ShaderStage::ShaderStageCount; // Initialize it with an invalid value if (shaderBits & VK_SHADER_STAGE_VERTEX_BIT) diff --git a/icd/api/include/vk_swapchain.h b/icd/api/include/vk_swapchain.h index fe98960c..e42986a2 100644 --- a/icd/api/include/vk_swapchain.h +++ b/icd/api/include/vk_swapchain.h @@ -125,25 +125,25 @@ class SwapChain final : public NonDispatchable VK_FORCEINLINE Pal::ISwapChain* PalSwapChain() const { return m_pPalSwapChain; } - VK_INLINE const FullscreenMgr* GetFullscreenMgr() const + const FullscreenMgr* GetFullscreenMgr() const { return m_pFullscreenMgr; } - VK_INLINE FullscreenMgr* GetFullscreenMgr() + FullscreenMgr* GetFullscreenMgr() { return m_pFullscreenMgr; } - VK_INLINE uint32_t GetPresentCount() const + uint32_t GetPresentCount() const { return m_presentCount; } - VK_INLINE VkPresentModeKHR GetPresentMode() const + VkPresentModeKHR GetPresentMode() const { return m_presentMode; } - VK_INLINE uint32_t GetAppOwnedImageCount() const + uint32_t GetAppOwnedImageCount() const { return m_appOwnedImageCount; } - VK_INLINE bool IsHwCompositingSupported() const + bool IsHwCompositingSupported() const { return (m_properties.flags.hwCompositing == 1); } - VK_INLINE const Pal::ScreenColorConfig& GetColorParams() const + const Pal::ScreenColorConfig& GetColorParams() const { return m_colorParams; } Pal::IGpuMemory* UpdatePresentInfo( @@ -262,13 +262,13 @@ class FullscreenMgr SwapChain* pSwapChain, Pal::PresentSwapChainInfo* pPresentInfo); - VK_INLINE ExclusiveModeFlags GetExclusiveModeFlags() const + ExclusiveModeFlags GetExclusiveModeFlags() const { return m_exclusiveModeFlags; } - VK_INLINE uint32_t GetVidPnSourceId() const + uint32_t GetVidPnSourceId() const { return m_vidPnSourceId; } - VK_INLINE Pal::IScreen* GetPalScreen() const + Pal::IScreen* GetPalScreen() const { return m_pScreen; } bool TryEnterExclusive(SwapChain* pSwapChain); diff --git a/icd/api/include/vk_utils.h b/icd/api/include/vk_utils.h index 5d65237e..299e451f 100644 --- a/icd/api/include/vk_utils.h +++ b/icd/api/include/vk_utils.h @@ -62,7 +62,6 @@ #define VK_ALERT PAL_ALERT #define VK_ALERT_ALWAYS_MSG PAL_ALERT_ALWAYS_MSG #define VK_SOFT_ASSERT(expr) VK_ALERT(!(expr)) -#define VK_INLINE PAL_INLINE #define VK_NEW PAL_NEW #define VK_PLACEMENT_NEW PAL_PLACEMENT_NEW #define VK_NOT_IMPLEMENTED do { PAL_NOT_IMPLEMENTED(); } while (0) @@ -132,14 +131,14 @@ typedef VkAccessFlags2KHR AccessFlags; namespace utils { -VK_INLINE uint64_t TicksToNano(uint64_t ticks) +inline uint64_t TicksToNano(uint64_t ticks) { return (ticks * NANOSECONDS_IN_A_SECOND) / static_cast(Util::GetPerfFrequency()); } // ===================================================================================================================== // Get driver build time hash -VK_INLINE uint32_t GetBuildTimeHash() +inline uint32_t GetBuildTimeHash() { return Util::HashLiteralString(__DATE__ __TIME__); } @@ -147,7 +146,7 @@ VK_INLINE uint32_t GetBuildTimeHash() // ===================================================================================================================== // This function can be used to get the right externsion structure of specific type in case there are more than one // extension is supported -VK_INLINE const VkStructHeader* GetExtensionStructure(const VkStructHeader* pHeader, VkStructureType sType) +inline const VkStructHeader* GetExtensionStructure(const VkStructHeader* pHeader, VkStructureType sType) { const VkStructHeader* pIter = pHeader; while(pIter != nullptr) @@ -166,7 +165,7 @@ VK_INLINE const VkStructHeader* GetExtensionStructure(const VkStructHeader* pHea // ===================================================================================================================== template -VK_INLINE const ExtStruct* GetExtensionStructure( +const ExtStruct* GetExtensionStructure( const SrcStruct* pHeader, VkStructureType sType) { @@ -176,7 +175,7 @@ VK_INLINE const ExtStruct* GetExtensionStructure( // ===================================================================================================================== // Returns the number of indices of a particular index type that fit into a buffer of the given byte-size. -VK_INLINE uint32_t BufferSizeToIndexCount(Pal::IndexType indexType, VkDeviceSize bufferSize) +inline uint32_t BufferSizeToIndexCount(Pal::IndexType indexType, VkDeviceSize bufferSize) { static_assert((static_cast(Pal::IndexType::Idx8) == 0) && (static_cast(Pal::IndexType::Idx16) == 1) && @@ -187,7 +186,7 @@ VK_INLINE uint32_t BufferSizeToIndexCount(Pal::IndexType indexType, VkDeviceSize } // ===================================================================================================================== -VK_INLINE void GetExecutableNameAndPath(wchar_t* pExecutableName, wchar_t* pExecutablePath) +inline void GetExecutableNameAndPath(wchar_t* pExecutableName, wchar_t* pExecutablePath) { // Get the wchar_t executable name and path wchar_t executableNameAndPathBuffer[PATH_MAX]; @@ -210,7 +209,7 @@ VK_INLINE void GetExecutableNameAndPath(wchar_t* pExecutableName, wchar_t* pExec } // ===================================================================================================================== -VK_INLINE void GetExecutableNameAndPath(char* pExecutableName, char* pExecutablePath) +inline void GetExecutableNameAndPath(char* pExecutableName, char* pExecutablePath) { // Get the executable name and path char executableNameAndPathBuffer[PATH_MAX]; @@ -233,7 +232,7 @@ VK_INLINE void GetExecutableNameAndPath(char* pExecutableName, char* pExecutable } // ===================================================================================================================== -VK_INLINE int StrCmpCaseInsensitive( +inline int StrCmpCaseInsensitive( const char* a, const char* b) { @@ -261,7 +260,7 @@ VK_INLINE int StrCmpCaseInsensitive( // ===================================================================================================================== // Return true if Big Software Release 6.0 is supported. -VK_INLINE bool BigSW60Supported(const Pal::BigSoftwareReleaseInfo& bigSwInfo) +inline bool BigSW60Supported(const Pal::BigSoftwareReleaseInfo& bigSwInfo) { return ((bigSwInfo.majorVersion > 2019) || ((bigSwInfo.majorVersion == 2019) && (bigSwInfo.minorVersion >= 1))); @@ -271,7 +270,7 @@ VK_INLINE bool BigSW60Supported(const Pal::BigSoftwareReleaseInfo& bigSwInfo) class IterateMask { public: - VK_INLINE IterateMask(uint32_t mask) : + IterateMask(uint32_t mask) : m_index(0), m_mask(mask) { @@ -283,7 +282,7 @@ class IterateMask #endif } - VK_INLINE bool IterateNext() + bool IterateNext() { #if (VKI_BUILD_MAX_NUM_GPUS > 1) if (Util::BitMaskScanForward(&m_index, m_mask) == true) @@ -295,7 +294,7 @@ class IterateMask return false; } - VK_INLINE uint32_t Index() const + uint32_t Index() const { return m_index; } @@ -322,7 +321,7 @@ class ArrayView public: // Create a view into an array of ElementT with stride determined by OuterT. template - VK_INLINE ArrayView(OuterT* pData, ElementT* pFirstElement) : + ArrayView(OuterT* pData, ElementT* pFirstElement) : m_pData(reinterpret_cast(pData)), m_stride(sizeof(OuterT)) { @@ -337,18 +336,18 @@ class ArrayView } // Use this form to achieve tight packing of elements, if needed. - VK_INLINE explicit ArrayView(ElementT* pData) : + explicit ArrayView(ElementT* pData) : m_pData(reinterpret_cast(pData)), m_stride(sizeof(ElementT)) { } - VK_INLINE bool IsNull() const + bool IsNull() const { return m_pData == nullptr; } - VK_INLINE ElementT& operator[](int32_t ndx) const + ElementT& operator[](int32_t ndx) const { return *reinterpret_cast(m_pData + ndx * m_stride); } diff --git a/icd/api/internal_mem_mgr.cpp b/icd/api/internal_mem_mgr.cpp index b8d3f7b9..8100eb48 100644 --- a/icd/api/internal_mem_mgr.cpp +++ b/icd/api/internal_mem_mgr.cpp @@ -51,7 +51,7 @@ static constexpr Pal::gpusize PoolMinSuballocationSize = 1ull << 4; // 16 by // ===================================================================================================================== // Filter invisible heap. For some objects as pipeline, invisible heap will be appended in memory requirement. // We filter this because we don't expect to support object memory migration. -static VK_INLINE void FilterHeap( +static void FilterHeap( Pal::GpuMemoryRequirements* pMemReq, Pal::GpuHeap typeToFilter) { diff --git a/icd/api/pipeline_compiler.cpp b/icd/api/pipeline_compiler.cpp index 41b2b4f9..26881713 100644 --- a/icd/api/pipeline_compiler.cpp +++ b/icd/api/pipeline_compiler.cpp @@ -54,18 +54,6 @@ namespace vk { -// ===================================================================================================================== -// The shader stages of Pre-Rasterization Shaders section -constexpr uint32_t PrsShaderMask = 0 - | ((1 << ShaderStage::ShaderStageVertex) - | (1 << ShaderStage::ShaderStageTessControl) - | (1 << ShaderStage::ShaderStageTessEval) - | (1 << ShaderStage::ShaderStageGeometry)); - -// ===================================================================================================================== -// The shader stages of Fragment Shader (Post-Rasterization) section -constexpr uint32_t FgsShaderMask = (1 << ShaderStage::ShaderStageFragment); - // ===================================================================================================================== // Helper function used to check whether a specific dynamic state is set static bool IsDynamicStateEnabled(const uint32_t dynamicStateFlags, const DynamicStatesInternal internalState) @@ -785,24 +773,13 @@ VkResult PipelineCompiler::CreateGraphicsPipelineBinary( if (shouldCompile && ((pPipelineBinaryCache != nullptr) || (m_pBinaryCache != nullptr))) { int64_t startTime = Util::GetPerfCpuTime(); - Util::MetroHash128 hash = {}; - hash.Update(pipelineHash); - hash.Update(pCreateInfo->pipelineInfo.vs.options); - hash.Update(pCreateInfo->pipelineInfo.tes.options); - hash.Update(pCreateInfo->pipelineInfo.tcs.options); - hash.Update(pCreateInfo->pipelineInfo.gs.options); - hash.Update(pCreateInfo->pipelineInfo.fs.options); - hash.Update(pCreateInfo->pipelineInfo.options); - hash.Update(pCreateInfo->pipelineInfo.nggState); - hash.Update(GetCacheIdControlFlags(pCreateInfo->flags)); - hash.Update(pCreateInfo->dbFormat); - hash.Update(pCreateInfo->pipelineProfileKey); - hash.Update(deviceIdx); - hash.Update(pCreateInfo->compilerType); - hash.Update(pCreateInfo->pipelineInfo.dynamicVertexStride); - hash.Update(m_pPhysicalDevice->GetSettingsLoader()->GetSettingsHash()); - - hash.Finalize(pCacheId->bytes); + + GetGraphicsPipelineCacheId( + deviceIdx, + pCreateInfo, + pipelineHash, + m_pPhysicalDevice->GetSettingsLoader()->GetSettingsHash(), + pCacheId); cacheResult = GetCachedPipelineBinary(pCacheId, pPipelineBinaryCache, pPipelineBinarySize, ppPipelineBinary, &isUserCacheHit, &isInternalCacheHit, &pCreateInfo->freeCompilerBinary, &pCreateInfo->pipelineFeedback); @@ -834,13 +811,12 @@ VkResult PipelineCompiler::CreateGraphicsPipelineBinary( shaderInfos, pPipelineDumpHandle, pipelineHash, + pCacheId, &compileTime); } if (result == VK_SUCCESS) { - pCreateInfo->freeCompilerBinary = FreeWithCompiler; - } } } @@ -984,17 +960,13 @@ VkResult PipelineCompiler::CreateComputePipelineBinary( if (shouldCompile && ((pPipelineBinaryCache != nullptr) || (m_pBinaryCache != nullptr))) { int64_t startTime = Util::GetPerfCpuTime(); - Util::MetroHash128 hash = {}; - hash.Update(pipelineHash); - hash.Update(pCreateInfo->pipelineInfo.cs.options); - hash.Update(pCreateInfo->pipelineInfo.options); - hash.Update(GetCacheIdControlFlags(pCreateInfo->flags)); - hash.Update(pCreateInfo->pipelineProfileKey); - hash.Update(deviceIdx); - hash.Update(pCreateInfo->compilerType); - hash.Update(m_pPhysicalDevice->GetSettingsLoader()->GetSettingsHash()); - - hash.Finalize(pCacheId->bytes); + + GetComputePipelineCacheId( + deviceIdx, + pCreateInfo, + pipelineHash, + m_pPhysicalDevice->GetSettingsLoader()->GetSettingsHash(), + pCacheId); cacheResult = GetCachedPipelineBinary(pCacheId, pPipelineBinaryCache, pPipelineBinarySize, ppPipelineBinary, &isUserCacheHit, &isInternalCacheHit, &pCreateInfo->freeCompilerBinary, &pCreateInfo->pipelineFeedback); @@ -1025,13 +997,10 @@ VkResult PipelineCompiler::CreateComputePipelineBinary( ppPipelineBinary, pPipelineDumpHandle, pipelineHash, + pCacheId, &compileTime); } - if (result == VK_SUCCESS) - { - pCreateInfo->freeCompilerBinary = FreeWithCompiler; - } } } @@ -1253,17 +1222,20 @@ void BuildLlpcVertexInputDescriptors( static void BuildRasterizationState( const VkPipelineRasterizationStateCreateInfo* pRs, const uint32_t dynamicStateFlags, + bool* pIsConservativeOverestimation, GraphicsPipelineBinaryCreateInfo* pCreateInfo) { if (pRs != nullptr) { - EXTRACT_VK_STRUCTURES_1( + EXTRACT_VK_STRUCTURES_2( rasterizationDepthClipState, PipelineRasterizationDepthClipStateCreateInfoEXT, PipelineRasterizationStateStreamCreateInfoEXT, + PipelineRasterizationConservativeStateCreateInfoEXT, static_cast(pRs->pNext), PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT, - PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT); + PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT, + PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT); pCreateInfo->pipelineInfo.vpState.depthClipEnable = (pRs->depthClampEnable == VK_FALSE); pCreateInfo->pipelineInfo.rsState.rasterizerDiscardEnable = (pRs->rasterizerDiscardEnable != VK_FALSE); @@ -1293,6 +1265,18 @@ static void BuildRasterizationState( { pCreateInfo->pipelineInfo.rsState.rasterizerDiscardEnable = false; } + + if ((pPipelineRasterizationConservativeStateCreateInfoEXT != nullptr) && + (pPipelineRasterizationConservativeStateCreateInfoEXT->conservativeRasterizationMode == + VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT)) + { + *pIsConservativeOverestimation = true; + } + else + { + *pIsConservativeOverestimation = false; + } + } } @@ -1392,6 +1376,7 @@ static void BuildViewportState( static void BuildNggState( const Device* pDevice, const VkShaderStageFlagBits activeStages, + const bool isConservativeOverestimation, GraphicsPipelineBinaryCreateInfo* pCreateInfo) { const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); @@ -1426,7 +1411,8 @@ static void BuildNggState( pCreateInfo->pipelineInfo.nggState.enableFastLaunch = false; #endif pCreateInfo->pipelineInfo.nggState.enableVertexReuse = false; - pCreateInfo->pipelineInfo.nggState.enableBackfaceCulling = settings.nggEnableBackfaceCulling; + pCreateInfo->pipelineInfo.nggState.enableBackfaceCulling = (isConservativeOverestimation ? + false : settings.nggEnableBackfaceCulling); pCreateInfo->pipelineInfo.nggState.enableFrustumCulling = settings.nggEnableFrustumCulling; pCreateInfo->pipelineInfo.nggState.enableBoxFilterCulling = settings.nggEnableBoxFilterCulling; pCreateInfo->pipelineInfo.nggState.enableSphereCulling = settings.nggEnableSphereCulling; @@ -1471,6 +1457,16 @@ static void BuildNggState( } } +// ===================================================================================================================== +static void BuildDepthStencilState( + const VkPipelineDepthStencilStateCreateInfo* pDs, + GraphicsPipelineBinaryCreateInfo* pCreateInfo) +{ + if (pDs != nullptr) + { + } +} + // ===================================================================================================================== static void BuildPipelineShaderInfo( const Device* pDevice, @@ -1562,6 +1558,34 @@ static VkResult BuildPipelineResourceMapping( return result; } +// ===================================================================================================================== +static void BuildCompilerInfo( + const Device* pDevice, + const GraphicsPipelineShaderStageInfo* pShaderInfo, + const uint32_t shaderMask, + GraphicsPipelineBinaryCreateInfo* pCreateInfo) +{ + Vkgc::PipelineShaderInfo* ppShaderInfoOut[] = + { + &pCreateInfo->pipelineInfo.vs, + &pCreateInfo->pipelineInfo.tcs, + &pCreateInfo->pipelineInfo.tes, + &pCreateInfo->pipelineInfo.gs, + &pCreateInfo->pipelineInfo.fs, + }; + + pCreateInfo->compilerType = pDevice->GetCompiler(DefaultDeviceIndex)->CheckCompilerType(&pCreateInfo->pipelineInfo); + + for (uint32_t stage = 0; stage < ShaderStage::ShaderStageGfxCount; ++stage) + { + if (((shaderMask & (1 << stage)) != 0) && (pShaderInfo->stages[stage].pModuleHandle != nullptr)) + { + ppShaderInfoOut[stage]->pModuleData = + ShaderModule::GetShaderData(pCreateInfo->compilerType, pShaderInfo->stages[stage].pModuleHandle); + } + } +} + // ===================================================================================================================== template static void BuildPipelineShadersInfo( @@ -1586,7 +1610,7 @@ static void BuildPipelineShadersInfo( for (uint32_t stage = 0; stage < ShaderStage::ShaderStageGfxCount; ++stage) { - if (pShaderInfo->stages[stage].pModuleHandle != nullptr) + if (((shaderMask & (1 << stage)) != 0) && (pShaderInfo->stages[stage].pModuleHandle != nullptr)) { BuildPipelineShaderInfo(pDevice, &pShaderInfo->stages[stage], @@ -1597,22 +1621,6 @@ static void BuildPipelineShadersInfo( ); } } - - pCreateInfo->compilerType = pDevice->GetCompiler(DefaultDeviceIndex)->CheckCompilerType(&pCreateInfo->pipelineInfo); - - if (pCreateInfo->compilerType == PipelineCompilerTypeLlpc) - { - pCreateInfo->pipelineInfo.enableUberFetchShader = false; - } - - for (uint32_t stage = 0; stage < ShaderStage::ShaderStageGfxCount; ++stage) - { - if (((shaderMask & (1 << stage)) != 0) && (pShaderInfo->stages[stage].pModuleHandle != nullptr)) - { - ppShaderInfoOut[stage]->pModuleData = - ShaderModule::GetShaderData(pCreateInfo->compilerType, pShaderInfo->stages[stage].pModuleHandle); - } - } } // ===================================================================================================================== @@ -1715,16 +1723,17 @@ static void BuildPreRasterizationShaderState( const VkShaderStageFlagBits activeStages, GraphicsPipelineBinaryCreateInfo* pCreateInfo) { - const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pIn->renderPass); + const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pIn->renderPass); + bool isConservativeOverestimation = false; - BuildRasterizationState(pIn->pRasterizationState, dynamicStateFlags, pCreateInfo); + BuildRasterizationState(pIn->pRasterizationState, dynamicStateFlags, &isConservativeOverestimation, pCreateInfo); if (pCreateInfo->pipelineInfo.rsState.rasterizerDiscardEnable == false) { BuildViewportState(pDevice, pIn->pViewportState, dynamicStateFlags, pCreateInfo); } - BuildNggState(pDevice, activeStages, pCreateInfo); + BuildNggState(pDevice, activeStages, isConservativeOverestimation, pCreateInfo); if (activeStages & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) { @@ -1755,6 +1764,8 @@ static void BuildPreRasterizationShaderState( } BuildPipelineShadersInfo(pDevice, pIn, pShaderInfo, pCreateInfo); + + BuildCompilerInfo(pDevice, pShaderInfo, PrsShaderMask, pCreateInfo); } // ===================================================================================================================== @@ -1769,12 +1780,11 @@ static void BuildFragmentShaderState( BuildMultisampleState(pDevice, pIn->pMultisampleState, pRenderPass, pIn->subpass, dynamicStateFlags, pCreateInfo); + BuildDepthStencilState(pIn->pDepthStencilState, pCreateInfo); + BuildPipelineShadersInfo(pDevice, pIn, pShaderInfo, pCreateInfo); - // Handle VkPipelineDepthStencilStateCreateInfo - if (pIn->pDepthStencilState != nullptr) - { - } + BuildCompilerInfo(pDevice, pShaderInfo, FgsShaderMask, pCreateInfo); } // ===================================================================================================================== @@ -1792,11 +1802,6 @@ static void BuildFragmentOutputInterfaceState( pCreateInfo->pipelineInfo.iaState.enableMultiView = (pRenderPass != nullptr) ? pRenderPass->IsMultiviewEnabled() : false; - - // Handle VkPipelineDepthStencilStateCreateInfo - if (pIn->pDepthStencilState != nullptr) - { - } } // ===================================================================================================================== @@ -1808,24 +1813,46 @@ static VkResult BuildUberFetchShaderInternalData( PipelineCompiler* pDefaultCompiler = pDevice->GetCompiler(DefaultDeviceIndex); VK_ASSERT(pCreateInfo->pipelineInfo.enableUberFetchShader); - return pDefaultCompiler->BuildUberFetchShaderInternalData(pCreateInfo->compilerType, - pCreateInfo->pipelineInfo.pVertexInput, - pCreateInfo->pipelineInfo.dynamicVertexStride, - &pVbInfo->uberFetchShaderBuffer); + auto result = pDefaultCompiler->BuildUberFetchShaderInternalData(pCreateInfo->compilerType, + pCreateInfo->pipelineInfo.pVertexInput, + pCreateInfo->pipelineInfo.dynamicVertexStride, + &pVbInfo->uberFetchShaderBuffer); + + auto pSettings = &pDevice->GetRuntimeSettings(); + + if (pSettings->disablePerInstanceFetch) + { + if (pVbInfo->uberFetchShaderBuffer.requirePerIntanceFetch) + { + pCreateInfo->pipelineInfo.enableUberFetchShader = false; + pVbInfo->uberFetchShaderBuffer.bufferSize = 0; + } + } + + if (pSettings->disablePerCompFetch) + { + if (pVbInfo->uberFetchShaderBuffer.requirePerCompFetch) + { + pCreateInfo->pipelineInfo.enableUberFetchShader = false; + pVbInfo->uberFetchShaderBuffer.bufferSize = 0; + } + } + + return result; } // ===================================================================================================================== static VkResult BuildExecutablePipelineState( - const Device* pDevice, - const VkGraphicsPipelineCreateInfo* pIn, - const uint32_t dynamicStateFlags, - GraphicsPipelineBinaryCreateInfo* pCreateInfo, - VbInfo* pVbInfo) + const Device* pDevice, + const VkGraphicsPipelineCreateInfo* pIn, + const GraphicsPipelineShaderStageInfo* pShaderInfo, + const PipelineLayout* pPipelineLayout, + const uint32_t dynamicStateFlags, + GraphicsPipelineBinaryCreateInfo* pCreateInfo, + VbInfo* pVbInfo) { const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); PipelineCompiler* pDefaultCompiler = pDevice->GetCompiler(DefaultDeviceIndex); - const PipelineLayout* pLayout = PipelineLayout::ObjectFromHandle(pIn->layout); - if (pCreateInfo->pipelineInfo.rsState.rasterizerDiscardEnable == true) { pCreateInfo->pipelineInfo.rsState.numSamples = 1; @@ -1858,12 +1885,32 @@ static VkResult BuildExecutablePipelineState( } } - VkResult result = BuildPipelineResourceMapping(pDevice, pLayout, availableStageMask, pVbInfo, pCreateInfo); + VkResult result = BuildPipelineResourceMapping(pDevice, pPipelineLayout, availableStageMask, pVbInfo, pCreateInfo); - if ((result == VK_SUCCESS) && pCreateInfo->pipelineInfo.enableUberFetchShader) + if (result == VK_SUCCESS) { - VK_ASSERT(pVbInfo->uberFetchShaderBuffer.userDataOffset > 0); - result = BuildUberFetchShaderInternalData(pDevice, pCreateInfo, pVbInfo); + // Compiler info is required to be re-built here since we may need to change the compiler when all the states + // of an executable graphics pipeline are available. The shader mask here refers to the shader stages which + // are valid in this pipeline. + const Vkgc::GraphicsPipelineBuildInfo& pipelineInfo = pCreateInfo->pipelineInfo; + uint32_t shaderMask = 0; + shaderMask |= (pipelineInfo.vs.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageVertex) : 0; + shaderMask |= (pipelineInfo.tcs.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageTessControl) : 0; + shaderMask |= (pipelineInfo.tes.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageTessEval) : 0; + shaderMask |= (pipelineInfo.gs.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageGeometry) : 0; + shaderMask |= (pipelineInfo.fs.pModuleData != nullptr) ? (1 << ShaderStage::ShaderStageFragment) : 0; + BuildCompilerInfo(pDevice, pShaderInfo, shaderMask, pCreateInfo); + + if (pCreateInfo->compilerType == PipelineCompilerTypeLlpc) + { + pCreateInfo->pipelineInfo.enableUberFetchShader = false; + } + + if (pCreateInfo->pipelineInfo.enableUberFetchShader) + { + VK_ASSERT(pVbInfo->uberFetchShaderBuffer.userDataOffset > 0); + result = BuildUberFetchShaderInternalData(pDevice, pCreateInfo, pVbInfo); + } } return result; @@ -1875,6 +1922,7 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( const Device* pDevice, const VkGraphicsPipelineCreateInfo* pIn, const GraphicsPipelineShaderStageInfo* pShaderInfo, + const PipelineLayout* pPipelineLayout, GraphicsPipelineBinaryCreateInfo* pCreateInfo, VbInfo* pVbInfo) { @@ -1882,15 +1930,13 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( VkResult result = VK_SUCCESS; - const VkGraphicsPipelineCreateInfo* pGraphicsPipelineCreateInfo = pIn; - VkShaderStageFlagBits activeStages = GraphicsPipelineCommon::GetActiveShaderStages( pIn ); uint32_t dynamicStateFlags = GraphicsPipelineCommon::GetDynamicStateFlags( pIn->pDynamicState - ); + ); BuildVertexInputInterfaceState(pDevice, pIn, dynamicStateFlags, pCreateInfo, &pVbInfo->bindingInfo); @@ -1908,7 +1954,8 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( } { - result = BuildExecutablePipelineState(pDevice, pIn, dynamicStateFlags, pCreateInfo, pVbInfo); + result = BuildExecutablePipelineState( + pDevice, pIn, pShaderInfo, pPipelineLayout, dynamicStateFlags, pCreateInfo, pVbInfo); } return result; @@ -2252,7 +2299,7 @@ Util::Result PipelineCompiler::RegisterAndLoadReinjectionBinary( // ===================================================================================================================== // Filter VkPipelineCreateFlags to only values used for pipeline caching -VkPipelineCreateFlags PipelineCompiler::GetCacheIdControlFlags( +static VkPipelineCreateFlags GetCacheIdControlFlags( VkPipelineCreateFlags in) { // The following flags should NOT affect cache computation @@ -2268,6 +2315,83 @@ VkPipelineCreateFlags PipelineCompiler::GetCacheIdControlFlags( return in & (~CacheIdIgnoreFlags); } +// ===================================================================================================================== +// The pipeline cache ID contains additional inputs outside the shader creation information for pipeline executable +// properties as well as options to avoid user error when changing performance tuning, compiler, or any other settings. +static void GetCommonPipelineCacheId( + uint32_t deviceIdx, + VkPipelineCreateFlags flags, + PipelineOptimizerKey* pPipelineProfileKey, + PipelineCompilerType compilerType, + uint64_t pipelineHash, + const Util::MetroHash::Hash& settingsHash, + Util::MetroHash128* pHash) +{ + pHash->Update(pipelineHash); + pHash->Update(deviceIdx); + pHash->Update(GetCacheIdControlFlags(flags)); + pHash->Update(*pPipelineProfileKey); + pHash->Update(compilerType); + pHash->Update(settingsHash); +} + +// ===================================================================================================================== +void PipelineCompiler::GetComputePipelineCacheId( + uint32_t deviceIdx, + ComputePipelineBinaryCreateInfo* pCreateInfo, + uint64_t pipelineHash, + const Util::MetroHash::Hash& settingsHash, + Util::MetroHash::Hash* pCacheId) +{ + Util::MetroHash128 hash = {}; + + GetCommonPipelineCacheId( + deviceIdx, + pCreateInfo->flags, + &pCreateInfo->pipelineProfileKey, + pCreateInfo->compilerType, + pipelineHash, + settingsHash, + &hash); + + hash.Update(pCreateInfo->pipelineInfo.cs.options); + hash.Update(pCreateInfo->pipelineInfo.options); + + hash.Finalize(pCacheId->bytes); +} + +// ===================================================================================================================== +void PipelineCompiler::GetGraphicsPipelineCacheId( + uint32_t deviceIdx, + GraphicsPipelineBinaryCreateInfo* pCreateInfo, + uint64_t pipelineHash, + const Util::MetroHash::Hash& settingsHash, + Util::MetroHash::Hash* pCacheId) +{ + Util::MetroHash128 hash = {}; + + GetCommonPipelineCacheId( + deviceIdx, + pCreateInfo->flags, + &pCreateInfo->pipelineProfileKey, + pCreateInfo->compilerType, + pipelineHash, + settingsHash, + &hash); + + hash.Update(pCreateInfo->pipelineInfo.vs.options); + hash.Update(pCreateInfo->pipelineInfo.tes.options); + hash.Update(pCreateInfo->pipelineInfo.tcs.options); + hash.Update(pCreateInfo->pipelineInfo.gs.options); + hash.Update(pCreateInfo->pipelineInfo.fs.options); + hash.Update(pCreateInfo->pipelineInfo.options); + hash.Update(pCreateInfo->pipelineInfo.nggState); + hash.Update(pCreateInfo->dbFormat); + hash.Update(pCreateInfo->pipelineInfo.dynamicVertexStride); + + hash.Finalize(pCacheId->bytes); +} + // ===================================================================================================================== VkResult PipelineCompiler::BuildUberFetchShaderInternalData( PipelineCompilerType compilerType, diff --git a/icd/api/render_state_cache.cpp b/icd/api/render_state_cache.cpp index af7f1408..f4f896bc 100644 --- a/icd/api/render_state_cache.cpp +++ b/icd/api/render_state_cache.cpp @@ -761,7 +761,7 @@ void RenderStateCache::DestroyDepthStencilState( // ===================================================================================================================== // Returns true if the given -VK_INLINE bool RenderStateCache::IsEnabled( +bool RenderStateCache::IsEnabled( uint32_t staticStateFlag ) const { diff --git a/icd/api/renderpass/renderpass_builder.cpp b/icd/api/renderpass/renderpass_builder.cpp index 620aa799..c30d75d5 100644 --- a/icd/api/renderpass/renderpass_builder.cpp +++ b/icd/api/renderpass/renderpass_builder.cpp @@ -163,7 +163,11 @@ Pal::Result RenderPassBuilder::BuildInitialState() { if (m_pAttachments[attachment].finalUseSubpass != VK_SUBPASS_EXTERNAL) { - m_pSubpasses[m_pAttachments[attachment].finalUseSubpass].flags.hasFinalUseAttachments = true; + m_pSubpasses[m_pAttachments[attachment].finalUseSubpass].flags.hasFinalUseAttachments |= + ((m_pAttachments[attachment].prevReferenceLayout.layout != + m_pAttachments[attachment].pDesc->finalLayout) || + (m_pAttachments[attachment].prevReferenceStencilLayout.layout != + m_pAttachments[attachment].pDesc->stencilFinalLayout)); } } @@ -1322,7 +1326,7 @@ size_t RenderPassBuilder::SubpassState::GetExtraSize() const // ===================================================================================================================== template -VK_INLINE void* AssignArray(size_t n, void* pStorage, uint32_t* pArraySize, T** ppDest) +void* AssignArray(size_t n, void* pStorage, uint32_t* pArraySize, T** ppDest) { *pArraySize = static_cast(n); diff --git a/icd/api/renderpass/renderpass_types.h b/icd/api/renderpass/renderpass_types.h index d491bf81..66aae761 100644 --- a/icd/api/renderpass/renderpass_types.h +++ b/icd/api/renderpass/renderpass_types.h @@ -45,10 +45,10 @@ struct RPImageLayout VkImageLayout layout; // Base Vulkan image layout uint32_t extraUsage; // Extra PAL layout usages (used to e.g. make attachments resolve-compatible) - VK_INLINE bool operator==(const RPImageLayout& rhs) const + bool operator==(const RPImageLayout& rhs) const { return (layout == rhs.layout) && (extraUsage == rhs.extraUsage); } - VK_INLINE bool operator!=(const RPImageLayout& rhs) const + bool operator!=(const RPImageLayout& rhs) const { return (layout != rhs.layout) || (extraUsage != rhs.extraUsage); } }; diff --git a/icd/api/sqtt/sqtt_layer.h b/icd/api/sqtt/sqtt_layer.h index 35b5d7e5..e6bb02bd 100644 --- a/icd/api/sqtt/sqtt_layer.h +++ b/icd/api/sqtt/sqtt_layer.h @@ -81,7 +81,7 @@ class SqttQueueState void DebugLabelEnd(); void DebugLabelInsert(const VkDebugUtilsLabelEXT* pMarkerInfo); - VK_INLINE const DispatchTable* GetNextLayer() const + const DispatchTable* GetNextLayer() const { return m_pNextLayer; } private: @@ -138,13 +138,13 @@ class SqttCmdBufferState void PipelineBound(VkPipelineBindPoint bindPoint, VkPipeline pipeline); - VK_INLINE const DispatchTable* GetNextLayer() const + const DispatchTable* GetNextLayer() const { return m_pNextLayer; } - VK_INLINE CmdBuffer* GetParent() const + CmdBuffer* GetParent() const { return m_pCmdBuf; } - VK_INLINE RgpSqttMarkerCbID GetId() const + RgpSqttMarkerCbID GetId() const { return m_cbId; } void PalBarrierCallback( diff --git a/icd/api/sqtt/sqtt_mgr.cpp b/icd/api/sqtt/sqtt_mgr.cpp index 5777410f..7879c9ae 100644 --- a/icd/api/sqtt/sqtt_mgr.cpp +++ b/icd/api/sqtt/sqtt_mgr.cpp @@ -44,7 +44,7 @@ namespace vk // ===================================================================================================================== // This function atomically increments the given 32-bit unsigned int until a given max value, at which point it // wraps to 0. -VK_INLINE uint32_t AtomicWrappedIncrement( +static uint32_t AtomicWrappedIncrement( uint32_t maxValue, volatile uint32_t* pValue) { diff --git a/icd/api/sqtt/sqtt_mgr.h b/icd/api/sqtt/sqtt_mgr.h index 48701ce7..a8fdcf7c 100644 --- a/icd/api/sqtt/sqtt_mgr.h +++ b/icd/api/sqtt/sqtt_mgr.h @@ -64,7 +64,7 @@ class SqttMgr uint32_t queueFamilyIndex, const VkCommandBufferBeginInfo* pBeginInfo); - VK_INLINE const DispatchTable* GetNextLayer() const + const DispatchTable* GetNextLayer() const { return &m_nextLayer; } static void PalDeveloperCallback( diff --git a/icd/api/sqtt/sqtt_object_mgr.h b/icd/api/sqtt/sqtt_object_mgr.h index c8c516d2..70153649 100644 --- a/icd/api/sqtt/sqtt_object_mgr.h +++ b/icd/api/sqtt/sqtt_object_mgr.h @@ -83,16 +83,16 @@ class SqttObjectMgr void Init(Device* pDevice); template - VK_INLINE bool IsEnabled( + bool IsEnabled( ObjectType objectType) const; template VK_INLINE SqttMetaState* GetMetaState( + typename ObjectType> SqttMetaState* GetMetaState( ObjectType objectType, HandleType handle); template VK_INLINE const char* GetDebugName( + typename ObjectType> const char* GetDebugName( ObjectType objectType, HandleType handle); diff --git a/icd/api/strings/entry_points.txt b/icd/api/strings/entry_points.txt index 39de8703..28b0ffd7 100644 --- a/icd/api/strings/entry_points.txt +++ b/icd/api/strings/entry_points.txt @@ -409,3 +409,4 @@ vkCmdCopyBufferToImage2KHR @device @dext(KHR_copy_c vkCmdCopyImage2KHR @device @dext(KHR_copy_commands2) vkCmdCopyImageToBuffer2KHR @device @dext(KHR_copy_commands2) vkCmdResolveImage2KHR @device @dext(KHR_copy_commands2) + diff --git a/icd/api/strings/extensions.txt b/icd/api/strings/extensions.txt index 2f8256fb..d14325c3 100644 --- a/icd/api/strings/extensions.txt +++ b/icd/api/strings/extensions.txt @@ -112,6 +112,8 @@ VK_EXT_subgroup_size_control VK_EXT_calibrated_timestamps VK_KHR_pipeline_executable_properties VK_EXT_line_rasterization +VK_EXT_shader_atomic_float +VK_EXT_shader_atomic_float2 VK_KHR_shader_clock VK_KHR_shader_subgroup_extended_types VK_KHR_spirv_1_4 @@ -140,3 +142,4 @@ VK_EXT_extended_dynamic_state2 VK_KHR_copy_commands2 VK_EXT_ycbcr_image_arrays VK_KHR_zero_initialize_workgroup_memory +VK_EXT_load_store_op_none diff --git a/icd/api/utils/json_reader.cpp b/icd/api/utils/json_reader.cpp index 13235bb5..52c9d1b5 100644 --- a/icd/api/utils/json_reader.cpp +++ b/icd/api/utils/json_reader.cpp @@ -67,7 +67,7 @@ static void JsonDefaultFree( // ===================================================================================================================== // Returns the next character after offset entries without advancing the buffer. -VK_INLINE char JsonPeek( +static char JsonPeek( JsonContext* pCtx, size_t offset = 0) { @@ -83,7 +83,7 @@ VK_INLINE char JsonPeek( // ===================================================================================================================== // Advances the buffer -VK_INLINE void JsonAdvance(JsonContext* pCtx) +static void JsonAdvance(JsonContext* pCtx) { if (pCtx->sz > 0) { @@ -94,7 +94,7 @@ VK_INLINE void JsonAdvance(JsonContext* pCtx) // ===================================================================================================================== // Returns the next character after eating white-space and ignoring comments. Advances the buffer. -VK_INLINE char JsonNextToken( +static char JsonNextToken( JsonContext* pCtx) { while (true) diff --git a/icd/api/utils/temp_mem_arena.cpp b/icd/api/utils/temp_mem_arena.cpp index 9ba10152..05d20679 100644 --- a/icd/api/utils/temp_mem_arena.cpp +++ b/icd/api/utils/temp_mem_arena.cpp @@ -142,7 +142,7 @@ void TempMemArena::FreeChunks( } // ===================================================================================================================== -VK_INLINE void* TempMemArena::AllocFromChunk( +void* TempMemArena::AllocFromChunk( MemChunk* pChunk, size_t size) { diff --git a/icd/api/utils/temp_mem_arena.h b/icd/api/utils/temp_mem_arena.h index f8a25d34..7d2469c3 100644 --- a/icd/api/utils/temp_mem_arena.h +++ b/icd/api/utils/temp_mem_arena.h @@ -86,7 +86,7 @@ struct TempMemArena }; void* AllocFromNewChunk(size_t size); - VK_INLINE void* AllocFromChunk(MemChunk* pChunk, size_t size); + void* AllocFromChunk(MemChunk* pChunk, size_t size); void ResetChunk(MemChunk* pChunk); void FreeChunks(MemChunk* pChunk); diff --git a/icd/api/vk_buffer.cpp b/icd/api/vk_buffer.cpp index c77b281c..b556784a 100644 --- a/icd/api/vk_buffer.cpp +++ b/icd/api/vk_buffer.cpp @@ -44,30 +44,20 @@ namespace vk Buffer::Buffer( Device* pDevice, const VkAllocationCallbacks* pAllocator, - VkBufferCreateFlags flags, - VkBufferUsageFlags usage, + const VkBufferCreateInfo* pCreateInfo, Pal::IGpuMemory** pGpuMemory, - VkSharingMode sharingMode, - uint32_t queueFamilyIndexCount, - const uint32_t* pQueueFamilyIndices, - VkDeviceSize size, BufferFlags internalFlags) : - m_size(size), + m_size(pCreateInfo->size), m_memOffset(0), m_barrierPolicy( pDevice, - usage, - sharingMode, - queueFamilyIndexCount, - pQueueFamilyIndices) + pCreateInfo->usage, + pCreateInfo->sharingMode, + pCreateInfo->queueFamilyIndexCount, + pCreateInfo->pQueueFamilyIndices) { m_internalFlags.u32All = internalFlags.u32All; - m_internalFlags.usageUniformBuffer = (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) ? 1 : 0; - m_internalFlags.createSparseBinding = (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) ? 1 : 0; - m_internalFlags.createSparseResidency = (flags & VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT) ? 1 : 0; - m_internalFlags.createProtected = (flags & VK_BUFFER_CREATE_PROTECTED_BIT) ? 1 : 0; - // Note: The VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT is only used in vk_memory objects. for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) { @@ -93,21 +83,16 @@ VkResult Buffer::Create( const VkAllocationCallbacks* pAllocator, VkBuffer* pBuffer) { - VkDeviceSize size; void* pMemory = nullptr; Pal::IGpuMemory* pGpuMemory[MaxPalDevices] = {}; Pal::Result palResult = Pal::Result::Success; - // We ignore sharing information for buffers, it has no relevance for us currently. - VK_IGNORE(pCreateInfo->sharingMode); - size_t apiSize = ObjectSize(pDevice); - size = pCreateInfo->size; bool isSparse = (pCreateInfo->flags & SparseEnablingFlags) != 0; - if (isSparse && (size != 0)) + if (isSparse && (pCreateInfo->size != 0)) { // We need virtual remapping support for all sparse resources VK_ASSERT(pDevice->VkPhysicalDevice(DefaultDeviceIndex)->IsVirtualRemappingSupported()); @@ -123,7 +108,7 @@ VkResult Buffer::Create( info.alignment = pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties(). gpuMemoryProperties.virtualMemAllocGranularity; - info.size = Util::Pow2Align(size, info.alignment); + info.size = Util::Pow2Align(pCreateInfo->size, info.alignment); info.flags.u32All = 0; info.flags.virtualAlloc = 1; info.flags.globalGpuVa = pDevice->IsGlobalGpuVaEnabled(); @@ -176,65 +161,21 @@ VkResult Buffer::Create( } } - BufferFlags bufferFlags; - - bufferFlags.u32All = 0; - if (palResult == Pal::Result::Success) { - const VkExternalMemoryBufferCreateInfo* pExternalInfo = - static_cast(pCreateInfo->pNext); - if ((pExternalInfo != nullptr) && - (pExternalInfo->sType == VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO)) - { - VkExternalMemoryProperties externalMemoryProperties = {}; - - pDevice->VkPhysicalDevice(DefaultDeviceIndex)->GetExternalMemoryProperties( - isSparse, - false, - static_cast(pExternalInfo->handleTypes), - &externalMemoryProperties); - - if (externalMemoryProperties.externalMemoryFeatures & VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT) - { - bufferFlags.dedicatedRequired = true; - } - - if (externalMemoryProperties.externalMemoryFeatures & (VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | - VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT)) - { - bufferFlags.externallyShareable = true; - - if (pExternalInfo->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT) - { - bufferFlags.externalPinnedHost = true; - } - } - } - } - - if (palResult == Pal::Result::Success) - { - bufferFlags.internalMemBound = isSparse; + BufferFlags bufferFlags; + CalculateBufferFlags(pDevice, pCreateInfo, &bufferFlags); // Construct API buffer object. VK_PLACEMENT_NEW (pMemory) Buffer (pDevice, pAllocator, - pCreateInfo->flags, - pCreateInfo->usage, + pCreateInfo, pGpuMemory, - pCreateInfo->sharingMode, - pCreateInfo->queueFamilyIndexCount, - pCreateInfo->pQueueFamilyIndices, - size, bufferFlags); *pBuffer = Buffer::HandleFromVoidPointer(pMemory); - } - if (palResult == Pal::Result::Success) - { - LogBufferCreate(size, pCreateInfo, *pBuffer, pDevice); + LogBufferCreate(pCreateInfo, *pBuffer, pDevice); } return PalToVkResult(palResult); @@ -243,7 +184,6 @@ VkResult Buffer::Create( // ===================================================================================================================== // Logs the creation of a new buffer to PAL void Buffer::LogBufferCreate( - VkDeviceSize size, const VkBufferCreateInfo* pCreateInfo, VkBuffer buffer, const Device* pDevice) @@ -295,7 +235,7 @@ void Buffer::LogBufferCreate( static_cast(PalUsageFlag::ShaderDeviceAddress), "Usage Flag Mismatch"); Pal::ResourceDescriptionBuffer desc = {}; - desc.size = static_cast(size); + desc.size = pCreateInfo->size; desc.createFlags = pCreateInfo->flags; desc.usageFlags = pCreateInfo->usage; @@ -429,22 +369,47 @@ VkResult Buffer::BindMemory( return VK_SUCCESS; } +// ===================================================================================================================== +// Get the buffer's memory requirements from VkBuffer itself +void Buffer::GetMemoryRequirements( + const Device* pDevice, + VkMemoryRequirements* pMemoryRequirements) +{ + GetBufferMemoryRequirements(pDevice, &m_internalFlags, m_size, pMemoryRequirements); +} + +// ===================================================================================================================== +// Get the buffer's memory requirements from VkBufferCreateInfo +void Buffer::CalculateMemoryRequirements( + const Device* pDevice, + const VkBufferCreateInfo* pCreateInfo, + VkMemoryRequirements* pMemoryRequirements) +{ + BufferFlags bufferFlags; + + CalculateBufferFlags(pDevice, pCreateInfo, &bufferFlags); + + GetBufferMemoryRequirements(pDevice, &bufferFlags, pCreateInfo->size, pMemoryRequirements); +} + // ===================================================================================================================== // Get the buffer's memory requirements -VkResult Buffer::GetMemoryRequirements( +void Buffer::GetBufferMemoryRequirements( const Device* pDevice, + const BufferFlags* pBufferFlags, + const VkDeviceSize size, VkMemoryRequirements* pMemoryRequirements) { pMemoryRequirements->alignment = 4; // In case of sparse buffers the alignment and granularity is the page size - if (m_internalFlags.createSparseBinding) + if (pBufferFlags->createSparseBinding) { pMemoryRequirements->alignment = Util::Max(pMemoryRequirements->alignment, pDevice->GetProperties().virtualMemPageSize); } - if (m_internalFlags.usageUniformBuffer) + if (pBufferFlags->usageUniformBuffer) { constexpr VkDeviceSize UniformBufferAlignment = static_cast(sizeof(float) * 4); @@ -452,13 +417,13 @@ VkResult Buffer::GetMemoryRequirements( UniformBufferAlignment); } - pMemoryRequirements->size = Util::RoundUpToMultiple(m_size, pMemoryRequirements->alignment); + pMemoryRequirements->size = Util::RoundUpToMultiple(size, pMemoryRequirements->alignment); // MemoryRequirements cannot return smaller size than buffer size. // MAX_UINT64 can be used as buffer size. - if (m_size > pMemoryRequirements->size) + if (size > pMemoryRequirements->size) { - pMemoryRequirements->size = m_size; + pMemoryRequirements->size = size; } // Allow all available memory types for buffers @@ -478,19 +443,19 @@ VkResult Buffer::GetMemoryRequirements( } // Limit heaps to those compatible with pinned system memory - if (m_internalFlags.externalPinnedHost) + if (pBufferFlags->externalPinnedHost) { pMemoryRequirements->memoryTypeBits &= pDevice->GetPinnedSystemMemoryTypes(); VK_ASSERT(pMemoryRequirements->memoryTypeBits != 0); } - if (m_internalFlags.externallyShareable) + if (pBufferFlags->externallyShareable) { pMemoryRequirements->memoryTypeBits &= pDevice->GetMemoryTypeMaskForExternalSharing(); } - if (m_internalFlags.createProtected) + if (pBufferFlags->createProtected) { // If the buffer is protected only keep the protected type pMemoryRequirements->memoryTypeBits &= pDevice->GetMemoryTypeMaskMatching(VK_MEMORY_PROPERTY_PROTECTED_BIT); @@ -507,8 +472,54 @@ VkResult Buffer::GetMemoryRequirements( // remove the device coherent memory type pMemoryRequirements->memoryTypeBits &= ~pDevice->GetMemoryTypeMaskMatching(VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD); } +} - return VK_SUCCESS; +void Buffer::CalculateBufferFlags( + const Device* pDevice, + const VkBufferCreateInfo* pCreateInfo, + BufferFlags* pBufferFlags) +{ + pBufferFlags->u32All = 0; + + pBufferFlags->usageUniformBuffer = (pCreateInfo->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) ? 1 : 0; + pBufferFlags->createSparseBinding = (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) ? 1 : 0; + pBufferFlags->createSparseResidency = (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT) ? 1 : 0; + pBufferFlags->createProtected = (pCreateInfo->flags & VK_BUFFER_CREATE_PROTECTED_BIT) ? 1 : 0; + // Note: The VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT is only used in vk_memory objects. + + bool isSparse = (pCreateInfo->flags & SparseEnablingFlags) != 0; + + const VkExternalMemoryBufferCreateInfo* pExternalInfo = + static_cast(pCreateInfo->pNext); + if ((pExternalInfo != nullptr) && + (pExternalInfo->sType == VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO)) + { + VkExternalMemoryProperties externalMemoryProperties = {}; + + pDevice->VkPhysicalDevice(DefaultDeviceIndex)->GetExternalMemoryProperties( + isSparse, + false, + static_cast(pExternalInfo->handleTypes), + &externalMemoryProperties); + + if (externalMemoryProperties.externalMemoryFeatures & VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT) + { + pBufferFlags->dedicatedRequired = true; + } + + if (externalMemoryProperties.externalMemoryFeatures & (VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | + VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT)) + { + pBufferFlags->externallyShareable = true; + + if (pExternalInfo->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT) + { + pBufferFlags->externalPinnedHost = true; + } + } + } + + pBufferFlags->internalMemBound = isSparse; } namespace entry diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp index ed96fc7e..4cd01858 100644 --- a/icd/api/vk_cmdbuffer.cpp +++ b/icd/api/vk_cmdbuffer.cpp @@ -724,7 +724,7 @@ Pal::Result CmdBuffer::PalCmdBufferEnd() } // ===================================================================================================================== -Pal::Result CmdBuffer::PalCmdBufferReset(Pal::ICmdAllocator* pCmdAllocator, bool returnGpuMemory) +Pal::Result CmdBuffer::PalCmdBufferReset(bool returnGpuMemory) { Pal::Result result = Pal::Result::Success; @@ -1520,7 +1520,7 @@ VkResult CmdBuffer::Reset(VkCommandBufferResetFlags flags) ReleaseResources(); } - result = PalToVkResult(PalCmdBufferReset(nullptr, releaseResources)); + result = PalToVkResult(PalCmdBufferReset(releaseResources)); m_flags.wasBegun = false; } @@ -2034,15 +2034,6 @@ void CmdBuffer::BindDescriptorSets( DbgBarrierPostCmd(DbgBarrierBindSetsPushConstants); } -// ===================================================================================================================== -VK_INLINE bool CmdBuffer::PalPipelineBindingOwnedBy( - Pal::PipelineBindPoint palBind, - PipelineBindPoint apiBind - ) const -{ - return m_allGpuState.palToApiPipeline[static_cast(palBind)] == apiBind; -} - // ===================================================================================================================== template VKAPI_ATTR void VKAPI_CALL CmdBuffer::CmdBindDescriptorSets( @@ -3306,7 +3297,6 @@ void CmdBuffer::PalCmdSetEvent( } // ===================================================================================================================== -template void CmdBuffer::PalCmdResolveImage( const Image& srcImage, Pal::ImageLayout srcImageLayout, @@ -3333,7 +3323,7 @@ void CmdBuffer::PalCmdResolveImage( dstImageLayout, resolveMode, regionCount, - pRegions + (regionPerDevice ? (MaxRangePerAttachment * deviceIdx) : 0), + pRegions, 0); } while (deviceGroup.IterateNext()); @@ -3343,18 +3333,6 @@ void CmdBuffer::PalCmdResolveImage( DbgBarrierPostCmd(DbgBarrierResolve); } -// ===================================================================================================================== -// Instantiate the template function -template void CmdBuffer::PalCmdResolveImage( - const Image& srcImage, - Pal::ImageLayout srcImageLayout, - const Image& dstImage, - Pal::ImageLayout dstImageLayout, - Pal::ResolveMode resolveMode, - uint32_t regionCount, - const Pal::ImageResolveRegion* pRegions, - uint32_t deviceMask); - // ===================================================================================================================== // Clears a set of attachments in the current subpass using PAL's CmdClear*Image() commands. void CmdBuffer::ClearImageAttachments( @@ -3567,7 +3545,7 @@ void CmdBuffer::ResolveImage( ++rectIdx; } - PalCmdResolveImage( + PalCmdResolveImage( *pSrcImage, palSrcImageLayout, *pDstImage, @@ -6351,7 +6329,7 @@ void CmdBuffer::RPResolveAttachments( regions[idx].pQuadSamplePattern = pSampleLocations; } - PalCmdResolveImage( + PalCmdResolveImage( *srcAttachment.pImage, srcLayout, *dstAttachment.pImage, @@ -6527,7 +6505,7 @@ void CmdBuffer::EndRenderPass() } // ===================================================================================================================== -VK_INLINE void CmdBuffer::WritePushConstants( +void CmdBuffer::WritePushConstants( PipelineBindPoint apiBindPoint, Pal::PipelineBindPoint palBindPoint, const PipelineLayout* pLayout, @@ -7118,7 +7096,7 @@ void CmdBuffer::EndTransformFeedback( } // ===================================================================================================================== -VK_INLINE void CmdBuffer::CalcCounterBufferAddrs( +void CmdBuffer::CalcCounterBufferAddrs( uint32_t firstCounterBuffer, uint32_t counterBufferCount, const VkBuffer* pCounterBuffers, diff --git a/icd/api/vk_compute_pipeline.cpp b/icd/api/vk_compute_pipeline.cpp index 3118c0a7..a3e914a8 100644 --- a/icd/api/vk_compute_pipeline.cpp +++ b/icd/api/vk_compute_pipeline.cpp @@ -148,6 +148,7 @@ VkResult ComputePipeline::Create( VkResult result = BuildShaderStageInfo(pDevice, 1, &pCreateInfo->stage, + false, [](const uint32_t inputIdx, const uint32_t stageIdx) { return 0u; diff --git a/icd/api/vk_descriptor_set_layout.cpp b/icd/api/vk_descriptor_set_layout.cpp index 4f58475c..ddf32725 100644 --- a/icd/api/vk_descriptor_set_layout.cpp +++ b/icd/api/vk_descriptor_set_layout.cpp @@ -31,6 +31,7 @@ #include "include/vk_descriptor_set_layout.h" #include "include/vk_device.h" #include "include/vk_sampler.h" +#include "palVectorImpl.h" #include "palMetroHash.h" @@ -214,6 +215,18 @@ uint32_t DescriptorSetLayout::GetDescStaticSectionDwSize( return size / sizeof(uint32_t); } +// ===================================================================================================================== +// Returns the dword size required in the static section of the given binding point of the given DescriptorSetLayout. +uint32_t DescriptorSetLayout::GetDescStaticSectionDwSize( + const DescriptorSetLayout* pSrcDescSetLayout, + const uint32_t binding) +{ + const BindingInfo& bindingInfo = pSrcDescSetLayout->Binding(binding); + + return (bindingInfo.info.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) ? + bindingInfo.sta.dwSize : bindingInfo.sta.dwArrayStride; +} + // ===================================================================================================================== // Returns the dword size of the dynamic descriptor uint32_t DescriptorSetLayout::GetDynamicBufferDescDwSize(const Device* pDevice) @@ -342,7 +355,8 @@ void DescriptorSetLayout::ConvertImmutableInfo( uint32_t descSizeInDw, ImmSectionInfo* pSectionInfo, BindingSectionInfo* pBindingSectionInfo, - const DescriptorBindingFlags bindingFlags) + const DescriptorBindingFlags bindingFlags, + const DescriptorSetLayout* pSrcDescSetLayout) { if ((pBindingInfo->pImmutableSamplers != nullptr) && ((pBindingInfo->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) || @@ -388,20 +402,33 @@ void DescriptorSetLayout::ConvertImmutableInfo( // Copy the immutable descriptor data. uint32_t* pDestAddr = &pSectionInfo->pImmutableSamplerData[pBindingSectionInfo->dwOffset]; - for (uint32_t i = 0; i < descCount; ++i, pDestAddr += pBindingSectionInfo->dwArrayStride) + if (pSrcDescSetLayout == nullptr) { - const void* pSamplerDesc = Sampler::ObjectFromHandle(pBindingInfo->pImmutableSamplers[i])->Descriptor(); + for (uint32_t i = 0; i < descCount; ++i, pDestAddr += pBindingSectionInfo->dwArrayStride) + { + const void* pSamplerDesc = Sampler::ObjectFromHandle(pBindingInfo->pImmutableSamplers[i])->Descriptor(); - memcpy(pDestAddr, pSamplerDesc, descSizeInDw * sizeof(uint32_t)); + memcpy(pDestAddr, pSamplerDesc, descSizeInDw * sizeof(uint32_t)); - if (Sampler::ObjectFromHandle(pBindingInfo->pImmutableSamplers[i])->IsYCbCrSampler()) - { - // Copy the YCbCrMetaData - const void* pYCbCrMetaData = Util::VoidPtrInc(pSamplerDesc, descSizeInDw * sizeof(uint32_t)); - void* pImmutableYCbCrMetaDataDestAddr = Util::VoidPtrInc(pDestAddr, descSizeInDw * sizeof(uint32_t)); - memcpy(pImmutableYCbCrMetaDataDestAddr, pYCbCrMetaData, yCbCrMetaDataSizeInDW * sizeof(uint32_t)); + if (Sampler::ObjectFromHandle(pBindingInfo->pImmutableSamplers[i])->IsYCbCrSampler()) + { + // Copy the YCbCrMetaData + const void* pYCbCrMetaData = Util::VoidPtrInc(pSamplerDesc, descSizeInDw * sizeof(uint32_t)); + void* pImmutableYCbCrMetaDataDestAddr = Util::VoidPtrInc(pDestAddr, descSizeInDw * sizeof(uint32_t)); + memcpy(pImmutableYCbCrMetaDataDestAddr, pYCbCrMetaData, yCbCrMetaDataSizeInDW * sizeof(uint32_t)); + } } } + else + { + const DescriptorSetLayout::BindingInfo& refBindingInfo = + pSrcDescSetLayout->Binding(pBindingInfo->binding); + + const void* pSamplerDesc = Util::VoidPtrInc(pSrcDescSetLayout->Info().imm.pImmutableSamplerData, + refBindingInfo.imm.dwOffset * sizeof(uint32_t)); + + memcpy(pDestAddr, pSamplerDesc, refBindingInfo.imm.dwSize * sizeof(uint32_t)); + } } } else @@ -422,9 +449,7 @@ VkResult DescriptorSetLayout::ConvertCreateInfo( { VK_ASSERT((pIn != nullptr) && (pIn->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)); - pOut->activeStageMask = VK_SHADER_STAGE_ALL; // TODO set this up properly enumerating the active stages. - // currently this flag is only tested for non zero, so - // setting all flags active makes no difference... + pOut->activeStageMask = 0; pOut->varDescStride = 0; @@ -459,6 +484,13 @@ VkResult DescriptorSetLayout::ConvertCreateInfo( } } + { + for (uint32 inIndex = 0; inIndex < pIn->bindingCount; ++inIndex) + { + pOut->activeStageMask |= pIn->pBindings[inIndex].stageFlags; + } + } + // Bindings numbers are allowed to come in out-of-order, as well as with gaps. // We compute offsets using the size we've seen so far as we iterate, so we need to handle // the bindings in binding-number order, rather than array order. @@ -469,7 +501,9 @@ VkResult DescriptorSetLayout::ConvertCreateInfo( for (uint32 inIndex = 0; inIndex < pIn->bindingCount; ++inIndex) { const VkDescriptorSetLayoutBinding & currentBinding = pIn->pBindings[inIndex]; + { pOutBindings[currentBinding.binding].info = currentBinding; + } if (currentBinding.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { @@ -542,7 +576,7 @@ VkResult DescriptorSetLayout::ConvertCreateInfo( // ===================================================================================================================== // Creates a descriptor set layout object. VkResult DescriptorSetLayout::Create( - Device* pDevice, + const Device* pDevice, const VkDescriptorSetLayoutCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDescriptorSetLayout* pLayout) @@ -574,8 +608,9 @@ VkResult DescriptorSetLayout::Create( } } + { bindingCount = Util::Max(bindingCount, desc.binding + 1); - + } } const size_t bindingInfoAuxSize = bindingCount * sizeof(BindingInfo); @@ -628,46 +663,301 @@ VkResult DescriptorSetLayout::Create( return result; } +// ===================================================================================================================== +// Get the size in byte of a merged DescriptorSetLayout +size_t DescriptorSetLayout::GetObjectSize( + const VkDescriptorSetLayout* pLayouts, + const VkShaderStageFlags* pShaderMasks, + const uint32_t count) +{ + size_t size = sizeof(DescriptorSetLayout); + + for (uint32_t i = 0; i < count; ++i) + { + const DescriptorSetLayout* pSetLayout = DescriptorSetLayout::ObjectFromHandle(pLayouts[i]); + const VkShaderStageFlags shaderMask = pShaderMasks[i]; + + size += pSetLayout->GetBindingInfoArrayByteSize(shaderMask); + size += pSetLayout->GetImmSamplerArrayByteSize(shaderMask); + size += pSetLayout->GetImmYCbCrMetaDataArrayByteSize(shaderMask); + } + + return size; +} + +// ===================================================================================================================== +// Merge several descriptor set layouts into one layout. +// The output memory pointed by pOutLayout is required to be initialized to 0 by callee. +void DescriptorSetLayout::Merge( + const Device* pDevice, + const VkDescriptorSetLayout* pLayouts, + const VkShaderStageFlags* pShaderMasks, + const uint32_t count, + DescriptorSetLayout* pOutLayout) +{ + constexpr size_t apiSize = sizeof(DescriptorSetLayout); + + CreateInfo mergedInfo = {}; + BindingInfo* pBindingInfo = static_cast(Util::VoidPtrInc(pOutLayout, apiSize)); + + // The i th element in this array is the source descriptor set layout from which binding i in the + // merged layout should copy. + Util::Vector pRefDescSetLayouts{ nullptr }; + pRefDescSetLayouts.Resize(8, nullptr); + + for (uint32_t i = 0; i < count; ++i) + { + const DescriptorSetLayout* pRef = DescriptorSetLayout::ObjectFromHandle(pLayouts[i]); + const CreateInfo& refInfo = pRef->Info(); + const VkShaderStageFlags shaderMask = pShaderMasks[i]; + + for (uint32_t j = 0; j < refInfo.count; ++j) + { + const BindingInfo& refBinding = pRef->Binding(j); + const VkShaderStageFlags activeStages = refBinding.info.stageFlags & shaderMask; + + if ((activeStages != 0) && (refBinding.info.descriptorCount > 0)) + { + uint32_t bindingIdx = refBinding.info.binding; + + BindingInfo& mergedBinding = pBindingInfo[bindingIdx]; + + if (mergedBinding.info.stageFlags == 0) + { + VK_ASSERT(mergedBinding.info.descriptorCount == 0); + + mergedBinding = refBinding; + + mergedInfo.count = Util::Max(mergedInfo.count, bindingIdx + 1); + + if (pRefDescSetLayouts.NumElements() <= bindingIdx) + { + pRefDescSetLayouts.Resize(bindingIdx * 2, nullptr); + } + pRefDescSetLayouts[bindingIdx] = pRef; + } + else + { + VK_ASSERT(mergedBinding.info.descriptorCount == refBinding.info.descriptorCount); + VK_ASSERT(mergedBinding.info.descriptorType == refBinding.info.descriptorType); + VK_ASSERT(mergedBinding.bindingFlags.u32all == refBinding.bindingFlags.u32all); + + mergedBinding.info.stageFlags |= activeStages; + } + } + } + } + + mergedInfo.imm.pImmutableSamplerData = reinterpret_cast( + Util::VoidPtrInc(pOutLayout, apiSize + mergedInfo.count * sizeof(BindingInfo))); + + for (uint32_t bindingIdx = 0; bindingIdx < mergedInfo.count; ++bindingIdx) + { + const uint32 descAlignmentInDw = pDevice->GetProperties().descriptorSizes.alignment / sizeof(uint32); + + BindingInfo& binding = pBindingInfo[bindingIdx]; + + mergedInfo.activeStageMask |= binding.info.stageFlags; + + if ((bindingIdx == mergedInfo.count - 1) && binding.bindingFlags.variableDescriptorCount) + { + mergedInfo.varDescStride = GetSingleDescStaticSize(pDevice, binding.info.descriptorType); + } + + const uint32_t staticDescSize = + (pRefDescSetLayouts[bindingIdx] == nullptr) ? + GetSingleDescStaticSize(pDevice, binding.info.descriptorType) : + GetDescStaticSectionDwSize(pRefDescSetLayouts[bindingIdx], bindingIdx); + + ConvertBindingInfo( + &binding.info, + staticDescSize, + descAlignmentInDw, + &mergedInfo.sta, + &binding.sta); + + ConvertBindingInfo( + &binding.info, + GetDescDynamicSectionDwSize(pDevice, binding.info.descriptorType), + descAlignmentInDw, + &mergedInfo.dyn, + &binding.dyn); + + ConvertImmutableInfo( + &binding.info, + GetDescImmutableSectionDwSize(pDevice, binding.info.descriptorType), + &mergedInfo.imm, + &binding.imm, + binding.bindingFlags, + pRefDescSetLayouts[bindingIdx]); + + if ((binding.info.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) || + (binding.info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) + { + mergedInfo.numDynamicDescriptors += binding.info.descriptorCount; + } + } + + VK_PLACEMENT_NEW(pOutLayout) DescriptorSetLayout(pDevice, mergedInfo, 0); +} + // ===================================================================================================================== // Copy descriptor set layout object void DescriptorSetLayout::Copy( - const Device* pDevice, - DescriptorSetLayout* pOutLayout) const + const Device* pDevice, + const uint32_t shaderMask, + DescriptorSetLayout* pOutLayout) const { - const size_t apiSize = sizeof(DescriptorSetLayout); + if (CoverAllActiveShaderStages(shaderMask)) + { + constexpr size_t apiSize = sizeof(DescriptorSetLayout); - CreateInfo info = Info(); + CreateInfo info = Info(); - // Copy the bindings array - void* pBindings = Util::VoidPtrInc(pOutLayout, apiSize); + // Copy the bindings array + void* pBindings = Util::VoidPtrInc(pOutLayout, apiSize); - memcpy(pBindings, Util::VoidPtrInc(this, apiSize), GetBindingInfoArrayByteSize()); + memcpy(pBindings, Util::VoidPtrInc(this, apiSize), GetBindingInfoArrayByteSize(shaderMask)); - // Copy the immutable sampler data - void* pImmutableSamplerData = Util::VoidPtrInc(pOutLayout, apiSize + GetBindingInfoArrayByteSize()); + // Copy the immutable sampler data + void* pImmutableSamplerData = Util::VoidPtrInc(pOutLayout, apiSize + GetBindingInfoArrayByteSize(shaderMask)); - memcpy(pImmutableSamplerData, - Util::VoidPtrInc(this, apiSize + GetBindingInfoArrayByteSize()), - GetImmSamplerArrayByteSize() + GetImmYCbCrMetaDataArrayByteSize()); + memcpy(pImmutableSamplerData, + Util::VoidPtrInc(this, apiSize + GetBindingInfoArrayByteSize(shaderMask)), + GetImmSamplerArrayByteSize(shaderMask) + GetImmYCbCrMetaDataArrayByteSize(shaderMask)); - // Set the base pointer of the immutable sampler data to the appropriate location within the allocated memory - info.imm.pImmutableSamplerData = reinterpret_cast(pImmutableSamplerData); + // Set the base pointer of the immutable sampler data to the appropriate location within the allocated memory + info.imm.pImmutableSamplerData = reinterpret_cast(pImmutableSamplerData); - VK_PLACEMENT_NEW(pOutLayout) DescriptorSetLayout(pDevice, info, GetApiHash()); + VK_PLACEMENT_NEW(pOutLayout) DescriptorSetLayout(pDevice, info, GetApiHash()); + } + else + { + VkDescriptorSetLayout handle = DescriptorSetLayout::HandleFromObject(this); + Merge(pDevice, &handle, &shaderMask, 1, pOutLayout); + } +} + +// ===================================================================================================================== +// Get the size in byte of the refBinding info of the specific shader stages +size_t DescriptorSetLayout::GetBindingInfoArrayByteSize(VkShaderStageFlags shaderMask) const +{ + uint32_t numActiveBindings = 0; + + if (CoverAllActiveShaderStages(shaderMask)) + { + numActiveBindings = m_info.count; + } + else + { + for (uint32_t i = 0; i < m_info.count; ++i) + { + const VkDescriptorSetLayoutBinding& binding = Binding(i).info; + + if ((binding.stageFlags & shaderMask) != 0) + { + numActiveBindings = Util::Max(numActiveBindings, binding.binding + 1); + } + } + } + + return numActiveBindings * sizeof(DescriptorSetLayout::BindingInfo); } // ===================================================================================================================== // Get the size in bytes of immutable samplers array -uint32_t DescriptorSetLayout::GetImmSamplerArrayByteSize() const +size_t DescriptorSetLayout::GetImmSamplerArrayByteSize(VkShaderStageFlags shaderMask) const { - return m_info.imm.numImmutableSamplers * m_pDevice->GetProperties().descriptorSizes.sampler; + uint32_t numActiveImmSamplers = 0; + + if (CoverAllActiveShaderStages(shaderMask)) + { + numActiveImmSamplers = m_info.imm.numImmutableSamplers; + } + else + { + for (uint32_t i = 0; i < m_info.count; ++i) + { + const VkDescriptorSetLayoutBinding& binding = Binding(i).info; + + if (((binding.stageFlags & shaderMask) != 0) && + (binding.pImmutableSamplers != nullptr) && + ((binding.descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) || + (binding.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER))) + { + numActiveImmSamplers += binding.descriptorCount; + } + } + } + + return numActiveImmSamplers * m_pDevice->GetProperties().descriptorSizes.sampler; } // ===================================================================================================================== // Get the size in bytes of immutable ycbcr meta data array -uint32_t DescriptorSetLayout::GetImmYCbCrMetaDataArrayByteSize() const +size_t DescriptorSetLayout::GetImmYCbCrMetaDataArrayByteSize(VkShaderStageFlags shaderMask) const +{ + uint32_t numActiveImmYcbcrMetaData = 0; + + if (CoverAllActiveShaderStages(shaderMask)) + { + numActiveImmYcbcrMetaData = m_info.imm.numImmutableYCbCrMetaData; + } + else + { + for (uint32_t i = 0; i < m_info.count; ++i) + { + const VkDescriptorSetLayoutBinding& binding = Binding(i).info; + const DescriptorBindingFlags& flags = Binding(i).bindingFlags; + + if (((binding.stageFlags & shaderMask) != 0) && + (binding.pImmutableSamplers != nullptr) && + (flags.ycbcrConversionUsage != 0) && + ((binding.descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) || + (binding.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER))) + { + numActiveImmYcbcrMetaData += binding.descriptorCount; + } + } + } + + return numActiveImmYcbcrMetaData * sizeof(Vkgc::SamplerYCbCrConversionMetaData); +} + +// ===================================================================================================================== +// Get the size in byte of a DescriptorSetLayout +size_t DescriptorSetLayout::GetObjectSize(VkShaderStageFlags shaderMask) const { - return m_info.imm.numImmutableYCbCrMetaData * sizeof(Vkgc::SamplerYCbCrConversionMetaData); + const uint32_t apiSize = sizeof(DescriptorSetLayout); + + return apiSize + GetBindingInfoArrayByteSize(shaderMask) + + GetImmSamplerArrayByteSize(shaderMask) + + GetImmYCbCrMetaDataArrayByteSize(shaderMask); +} + +// ===================================================================================================================== +// Check whether there is refBinding of the specified shader(s) in this DescriptorSetLayout +bool DescriptorSetLayout::IsEmpty(VkShaderStageFlags shaderMask) const +{ + bool isEmpty = true; + + if (CoverAllActiveShaderStages(shaderMask)) + { + isEmpty = (m_info.count == 0); + } + else + { + for (uint32_t i = 0; i < m_info.count; ++i) + { + if ((Binding(i).info.stageFlags & shaderMask) != 0) + { + isEmpty = false; + break; + } + } + } + + return isEmpty; } // ===================================================================================================================== diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index fc252172..3a26e103 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -52,7 +52,6 @@ #include "include/vk_compute_pipeline.h" #include "include/vk_cmdbuffer.h" #include "include/vk_event.h" -#include "include/vk_graphics_pipeline.h" #include "include/vk_memory.h" #include "include/vk_pipeline_cache.h" #include "include/vk_query.h" @@ -65,6 +64,7 @@ #include "include/vk_swapchain.h" #include "include/vk_utils.h" #include "include/vk_conv.h" +#include "include/graphics_pipeline_common.h" #include "include/internal_layer_hooks.h" #include "sqtt/sqtt_layer.h" @@ -271,6 +271,8 @@ Device::Device( memset(m_pQueues, 0, sizeof(m_pQueues)); + m_enabledFeatures.u32All = 0; + m_maxVrsShadingRate = {0, 0}; for (uint32_t deviceIdx = 0; deviceIdx < palDeviceCount; ++deviceIdx) @@ -300,10 +302,6 @@ Device::Device( m_enabledFeatures.sparseBinding = true; } } - else - { - memset(&m_enabledFeatures, 0, sizeof(DeviceFeatures)); - } if (m_settings.robustBufferAccess == FeatureForceEnable) { @@ -2403,7 +2401,7 @@ VkResult Device::CreateGraphicsPipelines( { const VkGraphicsPipelineCreateInfo* pCreateInfo = &pCreateInfos[i]; - VkResult result = GraphicsPipeline::Create( + VkResult result = GraphicsPipelineCommon::Create( this, pPipelineCache, pCreateInfo, @@ -3419,7 +3417,7 @@ bool Device::ReserveFastPrivateDataSlot( // for extension private_data void* Device::AllocApiObject( const VkAllocationCallbacks* pAllocator, - const size_t totalObjectSize) + const size_t totalObjectSize) const { VK_ASSERT(pAllocator != nullptr); @@ -3444,7 +3442,7 @@ void* Device::AllocApiObject( // for extension private_data void Device::FreeApiObject( const VkAllocationCallbacks* pAllocator, - void* pMemory) + void* pMemory) const { VK_ASSERT(pAllocator != nullptr); diff --git a/icd/api/vk_gpa_session.cpp b/icd/api/vk_gpa_session.cpp index ea0317f3..143005c3 100644 --- a/icd/api/vk_gpa_session.cpp +++ b/icd/api/vk_gpa_session.cpp @@ -190,7 +190,7 @@ VkResult GpaSession::CmdEnd(CmdBuffer* pCmdBuf) } // ===================================================================================================================== -VK_INLINE VkResult ConvertPerfCounterId( +static VkResult ConvertPerfCounterId( const VkGpaPerfCounterAMD& perfCounter, GpuUtil::PerfCounterId* pId) { diff --git a/icd/api/vk_graphics_pipeline.cpp b/icd/api/vk_graphics_pipeline.cpp index a72beb01..3051e680 100644 --- a/icd/api/vk_graphics_pipeline.cpp +++ b/icd/api/vk_graphics_pipeline.cpp @@ -50,12 +50,34 @@ using namespace Util; namespace vk { +// ===================================================================================================================== +// Achieve pipeline layout from VkGraphicsPipelineCreateInfo. +// If the pipeline layout is temporary, callee must destroy it manually. +VkResult GraphicsPipeline::AchievePipelineLayout( + const Device* pDevice, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + PipelineLayout** ppPipelineLayout, + bool* pIsTemporary) +{ + VkResult result = VK_SUCCESS; + + *pIsTemporary = false; + + { + *ppPipelineLayout = PipelineLayout::ObjectFromHandle(pCreateInfo->layout); + } + + return result; +} + // ===================================================================================================================== // Create graphics pipeline binaries VkResult GraphicsPipeline::CreatePipelineBinaries( Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineShaderStageInfo* pShaderInfo, + const PipelineLayout* pPipelineLayout, GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, PipelineCache* pPipelineCache, const VkPipelineCreationFeedbackCreateInfoEXT* pCreationFeedbackInfo, @@ -86,7 +108,7 @@ VkResult GraphicsPipeline::CreatePipelineBinaries( GraphicsPipelineBinaryCreateInfo binaryCreateInfoMGPU = {}; VbInfo vbInfoMGPU = {}; pDefaultCompiler->ConvertGraphicsPipelineInfo( - pDevice, pCreateInfo, pShaderInfo, &binaryCreateInfoMGPU, &vbInfoMGPU); + pDevice, pCreateInfo, pShaderInfo, pPipelineLayout, &binaryCreateInfoMGPU, &vbInfoMGPU); result = pDevice->GetCompiler(i)->CreateGraphicsPipelineBinary( pDevice, @@ -120,6 +142,7 @@ VkResult GraphicsPipeline::CreatePipelineObjects( Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, + const PipelineLayout* pPipelineLayout, const VbInfo* pVbInfo, const size_t* pPipelineBinarySizes, const void** pPipelineBinaries, @@ -301,7 +324,7 @@ VkResult GraphicsPipeline::CreatePipelineObjects( VK_PLACEMENT_NEW(pSystemMem) GraphicsPipeline( pDevice, pPalPipeline, - pObjectCreateInfo->pLayout, + pPipelineLayout, pObjectCreateInfo->immedInfo, pObjectCreateInfo->staticStateMask, pObjectCreateInfo->flags.bindDepthStencilObject, @@ -371,16 +394,24 @@ VkResult GraphicsPipeline::Create( pDefaultCompiler->GetPipelineCreationFeedback(static_cast(pCreateInfo->pNext), &pPipelineCreationFeedbackCreateInfo); - // 1. Build pipeline binary create info + // 1. Get pipeline layout + bool isTempLayout = false; + PipelineLayout* pPipelineLayout = nullptr; + VkResult result = AchievePipelineLayout(pDevice, pCreateInfo, pAllocator, &pPipelineLayout, &isTempLayout); + + // 2. Build pipeline binary create info GraphicsPipelineBinaryCreateInfo binaryCreateInfo = {}; GraphicsPipelineShaderStageInfo shaderStageInfo = {}; VbInfo vbInfo = {}; ShaderModuleHandle tempModules[ShaderStage::ShaderStageGfxCount] = {}; - VkResult result = BuildPipelineBinaryCreateInfo( - pDevice, pCreateInfo, &binaryCreateInfo, &shaderStageInfo, &vbInfo, tempModules); + if (result == VK_SUCCESS) + { + result = BuildPipelineBinaryCreateInfo( + pDevice, pCreateInfo, pPipelineLayout, &binaryCreateInfo, &shaderStageInfo, &vbInfo, tempModules); + } - // 2. Create pipeine binaries + // 3. Create pipeine binaries size_t pipelineBinarySizes[MaxPalDevices] = {}; const void* pPipelineBinaries[MaxPalDevices] = {}; Util::MetroHash::Hash cacheId[MaxPalDevices] = {}; @@ -390,6 +421,7 @@ VkResult GraphicsPipeline::Create( result = CreatePipelineBinaries(pDevice, pCreateInfo, &shaderStageInfo, + pPipelineLayout, &binaryCreateInfo, pPipelineCache, pPipelineCreationFeedbackCreateInfo, @@ -404,19 +436,20 @@ VkResult GraphicsPipeline::Create( { pipelineHash = Vkgc::IPipelineDumper::GetPipelineHash(&binaryCreateInfo.pipelineInfo); - // 3. Build pipeline object create info + // 4. Build pipeline object create info GraphicsPipelineObjectCreateInfo objectCreateInfo = {}; GraphicsPipelineBinaryInfo binaryInfo = {}; binaryInfo.pOptimizerKey = &binaryCreateInfo.pipelineProfileKey; BuildPipelineObjectCreateInfo( - pDevice, pCreateInfo, &vbInfo, &binaryInfo, &objectCreateInfo); + pDevice, pCreateInfo, &vbInfo, &binaryInfo, pPipelineLayout, &objectCreateInfo); - // 4. Create pipeline objects + // 5. Create pipeline objects result = CreatePipelineObjects( pDevice, pCreateInfo, pAllocator, + pPipelineLayout, &vbInfo, pipelineBinarySizes, pPipelineBinaries, @@ -429,6 +462,12 @@ VkResult GraphicsPipeline::Create( // Free the temporary newly-built shader modules FreeTempModules(pDevice, ShaderStage::ShaderStageGfxCount, tempModules); + // Free the temporary merged pipeline layout used only for current pipeline + if (isTempLayout) + { + pPipelineLayout->Destroy(pDevice, pAllocator); + } + // Free the created pipeline binaries now that the PAL Pipelines/PipelineBinaryInfo have read them. for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) { diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index edfe60a0..cea821da 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -588,7 +588,7 @@ void PhysicalDevice::InitializePlatformKey( // - markPipelineCacheWithBuildTimestamp: decides whether to mix in __DATE__ __TIME__ from compiler to UUID // - useGlobalCacheId : decides if UUID should be portable between machines // -static VK_INLINE void GenerateCacheUuid( +static void GenerateCacheUuid( const RuntimeSettings& settings, const Pal::DeviceProperties& palProps, AppProfile appProfile, @@ -956,7 +956,6 @@ VkResult PhysicalDevice::Initialize() (1UL << memoryTypeIndex); m_memoryTypeMask |= 1 << m_memoryProperties.memoryTypeCount; - ++m_memoryProperties.memoryTypeCount; } } @@ -1174,6 +1173,15 @@ void PhysicalDevice::PopulateFormatProperties() } } + if (format == VK_FORMAT_R32_SFLOAT) + { + if (IsExtensionSupported(DeviceExtensions::EXT_SHADER_ATOMIC_FLOAT)) + { + optimalFlags |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT; + bufferFlags |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT; + } + } + linearFlags &= AllImgFeatures; optimalFlags &= AllImgFeatures; bufferFlags &= AllBufFeatures; @@ -3650,32 +3658,35 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( #if defined(__unix__) #endif - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_SHADER_IMAGE_ATOMIC_INT64)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_SHADER_IMAGE_ATOMIC_INT64)); - if ((pPhysicalDevice == nullptr) || - IsConditionalRenderingSupported(pPhysicalDevice)) - { - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_CONDITIONAL_RENDERING)); - } + if ((pPhysicalDevice == nullptr) || + IsConditionalRenderingSupported(pPhysicalDevice)) + { + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_CONDITIONAL_RENDERING)); + } - if ((pPhysicalDevice == nullptr) || - (pPhysicalDevice->PalProperties().gfxipProperties.supportedVrsRates != 0)) - { - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_FRAGMENT_SHADING_RATE)); - } + if ((pPhysicalDevice == nullptr) || + (pPhysicalDevice->PalProperties().gfxipProperties.supportedVrsRates != 0)) + { + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_FRAGMENT_SHADING_RATE)); + } + + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_SAMPLER_YCBCR_CONVERSION)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_BUFFER_DEVICE_ADDRESS)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_ROBUSTNESS2)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_SHADER_TERMINATE_INVOCATION)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_EXTENDED_DYNAMIC_STATE2)); - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_SAMPLER_YCBCR_CONVERSION)); - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_BUFFER_DEVICE_ADDRESS)); - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_ROBUSTNESS2)); - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_SHADER_TERMINATE_INVOCATION)); - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_EXTENDED_DYNAMIC_STATE2)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_COPY_COMMANDS2)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW)); - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_COPY_COMMANDS2)); - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW)); - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_4444_FORMATS)); - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_SYNCHRONIZATION2)); - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_CUSTOM_BORDER_COLOR)); - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_COLOR_WRITE_ENABLE)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_4444_FORMATS)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_SYNCHRONIZATION2)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_CUSTOM_BORDER_COLOR)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_COLOR_WRITE_ENABLE)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_ZERO_INITIALIZE_WORKGROUP_MEMORY)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_LOAD_STORE_OP_NONE)); bool disableAMDVendorExtensions = false; if (pPhysicalDevice != nullptr) @@ -3756,7 +3767,7 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( // ===================================================================================================================== // Is the queue suitable for normal use (i.e. non-exclusive and no elevated priority). template -VK_INLINE static bool IsNormalQueue(const T& engineCapabilities) +static bool IsNormalQueue(const T& engineCapabilities) { return ((engineCapabilities.flags.exclusive == 0) && (((engineCapabilities.queuePrioritySupport & Pal::QueuePrioritySupport::SupportQueuePriorityNormal) != 0) || @@ -4658,13 +4669,11 @@ void PhysicalDevice::GetPhysicalDeviceBufferAddressFeatures( VkBool32* pBufferDeviceAddressMultiDevice ) const { - { - *pBufferDeviceAddress = VK_TRUE; - *pBufferDeviceAddressCaptureReplay = - PalProperties().gfxipProperties.flags.supportCaptureReplay ? VK_TRUE : VK_FALSE; - *pBufferDeviceAddressMultiDevice = - PalProperties().gpuMemoryProperties.flags.globalGpuVaSupport; - } + *pBufferDeviceAddress = VK_TRUE; + *pBufferDeviceAddressCaptureReplay = + PalProperties().gfxipProperties.flags.supportCaptureReplay ? VK_TRUE : VK_FALSE; + *pBufferDeviceAddressMultiDevice = + PalProperties().gpuMemoryProperties.flags.globalGpuVaSupport; } // ===================================================================================================================== @@ -5511,6 +5520,53 @@ size_t PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: + { + auto pExtInfo = reinterpret_cast(pHeader); + if (updateFeatures) + { + pExtInfo->shaderBufferFloat32Atomics = VK_TRUE; + pExtInfo->shaderBufferFloat32AtomicAdd = VK_FALSE; + pExtInfo->shaderBufferFloat64Atomics = VK_TRUE; + pExtInfo->shaderBufferFloat64AtomicAdd = VK_FALSE; + pExtInfo->shaderSharedFloat32Atomics = VK_TRUE; + pExtInfo->shaderSharedFloat32AtomicAdd = VK_FALSE; + pExtInfo->shaderSharedFloat64Atomics = VK_TRUE; + pExtInfo->shaderSharedFloat64AtomicAdd = VK_FALSE; + pExtInfo->shaderImageFloat32Atomics = VK_TRUE; + pExtInfo->shaderImageFloat32AtomicAdd = VK_FALSE; + pExtInfo->sparseImageFloat32Atomics = VK_TRUE; + pExtInfo->sparseImageFloat32AtomicAdd = VK_FALSE; + } + + structSize = sizeof(*pExtInfo); + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: + { + auto* pExtInfo = reinterpret_cast(pHeader); + + if (updateFeatures) + { + pExtInfo->shaderBufferFloat16Atomics = VK_FALSE; + pExtInfo->shaderBufferFloat16AtomicAdd = VK_FALSE; + pExtInfo->shaderBufferFloat16AtomicMinMax = VK_FALSE; + pExtInfo->shaderBufferFloat32AtomicMinMax = VK_TRUE; + pExtInfo->shaderBufferFloat64AtomicMinMax = VK_TRUE; + pExtInfo->shaderSharedFloat16Atomics = VK_FALSE; + pExtInfo->shaderSharedFloat16AtomicAdd = VK_FALSE; + pExtInfo->shaderSharedFloat16AtomicMinMax = VK_FALSE; + pExtInfo->shaderSharedFloat32AtomicMinMax = VK_TRUE; + pExtInfo->shaderSharedFloat64AtomicMinMax = VK_TRUE; + pExtInfo->shaderImageFloat32AtomicMinMax = VK_TRUE; + pExtInfo->sparseImageFloat32AtomicMinMax = VK_TRUE; + } + + structSize = sizeof(*pExtInfo); + break; + } + default: { // skip any unsupported extension structures @@ -6784,10 +6840,8 @@ static void VerifyExtensions( && dev.IsExtensionSupported(DeviceExtensions::KHR_SHADER_SUBGROUP_EXTENDED_TYPES) && dev.IsExtensionSupported(DeviceExtensions::KHR_TIMELINE_SEMAPHORE) && dev.IsExtensionSupported(DeviceExtensions::KHR_UNIFORM_BUFFER_STANDARD_LAYOUT) - && dev.IsExtensionSupported(DeviceExtensions::KHR_VULKAN_MEMORY_MODEL)); - { - VK_ASSERT(dev.IsExtensionSupported(DeviceExtensions::KHR_BUFFER_DEVICE_ADDRESS)); - } + && dev.IsExtensionSupported(DeviceExtensions::KHR_VULKAN_MEMORY_MODEL) + && dev.IsExtensionSupported(DeviceExtensions::KHR_BUFFER_DEVICE_ADDRESS)); } } diff --git a/icd/api/vk_pipeline.cpp b/icd/api/vk_pipeline.cpp index 22f44042..4f282526 100644 --- a/icd/api/vk_pipeline.cpp +++ b/icd/api/vk_pipeline.cpp @@ -122,7 +122,10 @@ void Pipeline::GenerateHashFromShaderStageCreateInfo( { pHasher->Update(desc.flags); pHasher->Update(desc.stage); - pHasher->Update(ShaderModule::ObjectFromHandle(desc.module)->GetCodeHash(desc.pName)); + if (desc.module != VK_NULL_HANDLE) + { + pHasher->Update(ShaderModule::ObjectFromHandle(desc.module)->GetCodeHash(desc.pName)); + } if (desc.pSpecializationInfo != nullptr) { @@ -152,6 +155,7 @@ VkResult Pipeline::BuildShaderStageInfo( const Device* pDevice, const uint32_t stageCount, const VkPipelineShaderStageCreateInfo* pStages, + const bool duplicateExistingModules, uint32_t (*pfnGetOutputIdx)(const uint32_t inputIdx, const uint32_t stageIdx), ShaderStageInfo* pShaderStageInfo, @@ -169,7 +173,7 @@ VkResult Pipeline::BuildShaderStageInfo( const ShaderStage stage = ShaderFlagBitToStage(stageInfo.stage); const uint32_t outIdx = pfnGetOutputIdx(i, stage); - if (stageInfo.module != VK_NULL_HANDLE) + if ((stageInfo.module != VK_NULL_HANDLE) && (duplicateExistingModules == false)) { const ShaderModule* pModule = ShaderModule::ObjectFromHandle(stageInfo.module); @@ -186,22 +190,35 @@ VkResult Pipeline::BuildShaderStageInfo( // creation of pipeline. VK_ASSERT(pTempModules != nullptr); - EXTRACT_VK_STRUCTURES_0( - shaderModule, - ShaderModuleCreateInfo, - static_cast(stageInfo.pNext), - SHADER_MODULE_CREATE_INFO); + VkShaderModuleCreateFlags flags = 0; + size_t codeSize = 0; + const void* pCode = nullptr; + + if (stageInfo.module != VK_NULL_HANDLE) + { + // TODO: It's better to copy the compiled shader modules rather than compile them again. + const ShaderModule* pModule = ShaderModule::ObjectFromHandle(stageInfo.module); + codeSize = pModule->GetCodeSize(); + pCode = pModule->GetCode(); + } + else + { + EXTRACT_VK_STRUCTURES_0( + shaderModule, + ShaderModuleCreateInfo, + static_cast(stageInfo.pNext), + SHADER_MODULE_CREATE_INFO); + + VK_ASSERT(pShaderModuleCreateInfo != nullptr); - VK_ASSERT(pShaderModuleCreateInfo != nullptr); + flags = pShaderModuleCreateInfo->flags; + codeSize = pShaderModuleCreateInfo->codeSize; + pCode = pShaderModuleCreateInfo->pCode; + } - const Pal::ShaderHash codeHash = - ShaderModule::BuildCodeHash(pShaderModuleCreateInfo->pCode, pShaderModuleCreateInfo->codeSize); + const Pal::ShaderHash codeHash = ShaderModule::BuildCodeHash(pCode, codeSize); - result = pCompiler->BuildShaderModule(pDevice, - pShaderModuleCreateInfo->flags, - pShaderModuleCreateInfo->codeSize, - pShaderModuleCreateInfo->pCode, - &pTempModules[numNewModules]); + result = pCompiler->BuildShaderModule(pDevice, flags, codeSize, pCode, &pTempModules[numNewModules]); if (result != VK_SUCCESS) { @@ -210,7 +227,7 @@ VkResult Pipeline::BuildShaderStageInfo( pShaderStageInfo[outIdx].pModuleHandle = &pTempModules[numNewModules++]; pShaderStageInfo[outIdx].codeHash = ShaderModule::GetCodeHash(codeHash, stageInfo.pName); - pShaderStageInfo[outIdx].codeSize = pShaderModuleCreateInfo->codeSize; + pShaderStageInfo[outIdx].codeSize = codeSize; } pShaderStageInfo[outIdx].stage = stage; @@ -278,15 +295,36 @@ void Pipeline::Init( uint32_t staticStateMask, uint64_t apiHash) { - m_userDataLayout = pLayout->GetInfo().userDataLayout; m_staticStateMask = staticStateMask; m_apiHash = apiHash; m_pBinary = pBinary; - m_palPipelineHash = pPalPipeline[DefaultDeviceIndex]->GetInfo().internalPipelineHash.unique; - for (uint32_t devIdx = 0; devIdx < m_pDevice->NumPalDevices(); devIdx++) + if (pLayout != nullptr) + { + m_userDataLayout = pLayout->GetInfo().userDataLayout; + } + else + { + memset(&m_userDataLayout, 0, sizeof(UserDataLayout)); + } + + if (pPalPipeline != nullptr) + { + m_palPipelineHash = pPalPipeline[DefaultDeviceIndex]->GetInfo().internalPipelineHash.unique; + + for (uint32_t devIdx = 0; devIdx < m_pDevice->NumPalDevices(); devIdx++) + { + m_pPalPipeline[devIdx] = pPalPipeline[devIdx]; + } + } + else { - m_pPalPipeline[devIdx] = pPalPipeline[devIdx]; + m_palPipelineHash = 0; + + for (uint32_t devIdx = 0; devIdx < m_pDevice->NumPalDevices(); devIdx++) + { + m_pPalPipeline[devIdx] = nullptr; + } } } diff --git a/icd/api/vk_pipeline_layout.cpp b/icd/api/vk_pipeline_layout.cpp index a6bd71d7..18acc405 100644 --- a/icd/api/vk_pipeline_layout.cpp +++ b/icd/api/vk_pipeline_layout.cpp @@ -34,6 +34,7 @@ #include "include/vk_sampler.h" #include "include/vk_utils.h" #include "palMetroHash.h" +#include "palVectorImpl.h" namespace vk { @@ -66,6 +67,7 @@ uint64_t PipelineLayout::BuildApiHash( return hash; } +// ===================================================================================================================== constexpr size_t PipelineLayout::GetMaxResMappingRootNodeSize() { return @@ -261,7 +263,7 @@ VkResult PipelineLayout::ConvertCreateInfo( // ===================================================================================================================== // Creates a pipeline layout object. VkResult PipelineLayout::Create( - Device* pDevice, + const Device* pDevice, const VkPipelineLayoutCreateInfo* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkPipelineLayout* pPipelineLayout) @@ -278,7 +280,7 @@ VkResult PipelineLayout::Create( for (uint32_t i = 0; i < pCreateInfo->setLayoutCount; ++i) { DescriptorSetLayout* pLayout = DescriptorSetLayout::ObjectFromHandle(pCreateInfo->pSetLayouts[i]); - setLayoutsArraySize += pLayout->GetObjectSize(); + setLayoutsArraySize += pLayout->GetObjectSize(VK_SHADER_STAGE_ALL); } // Need to add extra storage for DescriptorSetLayout*, SetUserDataLayout, the descriptor set layouts themselves, @@ -326,11 +328,209 @@ VkResult PipelineLayout::Create( const DescriptorSetLayout* pLayout = DescriptorSetLayout::ObjectFromHandle(pCreateInfo->pSetLayouts[i]); // Copy the original descriptor set layout object - pLayout->Copy(pDevice, ppSetLayouts[i]); + pLayout->Copy(pDevice, VK_SHADER_STAGE_ALL, ppSetLayouts[i]); + + currentSetLayoutOffset += pLayout->GetObjectSize(VK_SHADER_STAGE_ALL); + } + + VK_PLACEMENT_NEW(pSysMem) PipelineLayout(pDevice, info, pipelineInfo, apiHash); + + *pPipelineLayout = PipelineLayout::HandleFromVoidPointer(pSysMem); + } + + if (result != VK_SUCCESS) + { + if (pSysMem != nullptr) + { + pDevice->FreeApiObject(pAllocator, pSysMem); + } + } + + return result; +} + +// ===================================================================================================================== +// Create a pipeline layout object via existing pipeline layouts +VkResult PipelineLayout::Create( + const Device* pDevice, + const VkPipelineLayout* pReference, + const VkShaderStageFlags* pRefShaderMask, + const uint32_t refCount, + const VkAllocationCallbacks* pAllocator, + VkPipelineLayout* pPipelineLayout) +{ + VkResult result = VK_SUCCESS; + + VkPipelineLayoutCreateInfo createInfo = {}; + Info info = {}; + PipelineInfo pipelineInfo = {}; + uint64_t apiHash = 0; + + Util::Vector mergedDescriptorSetLayoutsSize{ nullptr }; + Util::Vector mergedDescriptorSetLayouts{ nullptr }; + Util::Vector mergedShaderMasks{ nullptr }; + + VkPushConstantRange pushConstantRange = {}; + pushConstantRange.offset = 0; + pushConstantRange.stageFlags = VK_SHADER_STAGE_ALL; + + size_t setLayoutsArraySize = 0; + + for (uint32_t set = 0; ; ++set) + { + Util::Vector setLayouts = { nullptr }; + Util::Vector setShaderMasks = { nullptr }; + + bool aboveLargestSet = true; + + for (uint32_t i = 0; i < refCount; ++i) + { + const PipelineLayout* pRef = PipelineLayout::ObjectFromHandle(pReference[i]); + + if (pRef != nullptr) + { + const PipelineLayout::Info& layoutInfo = pRef->GetInfo(); + + if (set < layoutInfo.setCount) + { + aboveLargestSet = false; + + const DescriptorSetLayout* pLayout = pRef->GetSetLayouts(set); + + if (pLayout->IsEmpty(pRefShaderMask[i]) == false) + { + setLayouts.PushBack(DescriptorSetLayout::HandleFromObject(pLayout)); + setShaderMasks.PushBack(pRefShaderMask[i]); + } + } + } + } + + if (aboveLargestSet == true) + { + break; + } + + const size_t objSize = + DescriptorSetLayout::GetObjectSize(setLayouts.Data(), setShaderMasks.Data(), setLayouts.size()); + setLayoutsArraySize += objSize; + mergedDescriptorSetLayoutsSize.PushBack(objSize); + } + + for (uint32_t i = 0; i < refCount; ++i) + { + const PipelineLayout* pRef = PipelineLayout::ObjectFromHandle(pReference[i]); + + if (pRef != nullptr) + { + const PipelineLayout::Info& layoutInfo = pRef->GetInfo(); + + pushConstantRange.size = + Util::Max(pushConstantRange.size, + layoutInfo.userDataLayout.pushConstRegCount * sizeof(uint32_t)); + + } + } + + uint32_t setLayoutCount = mergedDescriptorSetLayoutsSize.size(); + const size_t apiSize = sizeof(PipelineLayout); + const size_t setUserDataLayoutSize = + Util::Pow2Align((setLayoutCount * sizeof(SetUserDataLayout)), ExtraDataAlignment()); + const size_t descriptorSetLayoutSize = + Util::Pow2Align((setLayoutCount * sizeof(DescriptorSetLayout*)), ExtraDataAlignment()); + + size_t objSize = apiSize + setUserDataLayoutSize + descriptorSetLayoutSize + setLayoutsArraySize; + + void* pSysMem = pDevice->AllocApiObject(pAllocator, objSize); + + if (pSysMem == nullptr) + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + + if (result == VK_SUCCESS) + { + memset(pSysMem, 0, objSize); + + SetUserDataLayout* pSetUserData = nullptr; + DescriptorSetLayout** ppSetLayouts = nullptr; + + pSetUserData = static_cast(Util::VoidPtrInc(pSysMem, apiSize)); + ppSetLayouts = static_cast( + Util::VoidPtrInc(pSysMem, apiSize + setUserDataLayoutSize)); + + size_t currentSetLayoutOffset = apiSize + setUserDataLayoutSize + descriptorSetLayoutSize; + + for (uint32_t set = 0; ; ++set) + { + Util::Vector setLayouts = { nullptr }; + Util::Vector setShaderMasks = { nullptr }; + + bool aboveLargestSet = true; - currentSetLayoutOffset += pLayout->GetObjectSize(); + for (uint32_t i = 0; i < refCount; ++i) + { + const PipelineLayout* pRef = PipelineLayout::ObjectFromHandle(pReference[i]); + + if (pRef != nullptr) + { + const PipelineLayout::Info& layoutInfo = pRef->GetInfo(); + + if (set < layoutInfo.setCount) + { + aboveLargestSet = false; + + const DescriptorSetLayout* pLayout = pRef->GetSetLayouts(set); + + if (pLayout->IsEmpty(pRefShaderMask[i]) == false) + { + setLayouts.PushBack(DescriptorSetLayout::HandleFromObject(pLayout)); + setShaderMasks.PushBack(pRefShaderMask[i]); + } + } + } + } + + if (aboveLargestSet == true) + { + break; + } + + ppSetLayouts[set] = reinterpret_cast(Util::VoidPtrInc(pSysMem, currentSetLayoutOffset)); + + DescriptorSetLayout::Merge(pDevice, + setLayouts.Data(), + setShaderMasks.Data(), + setLayouts.size(), + ppSetLayouts[set]); + + mergedDescriptorSetLayouts.PushBack(DescriptorSetLayout::HandleFromObject(ppSetLayouts[set])); + + currentSetLayoutOffset += mergedDescriptorSetLayoutsSize[set]; } + createInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + createInfo.pNext = nullptr; + createInfo.setLayoutCount = mergedDescriptorSetLayouts.size(); + createInfo.pSetLayouts = mergedDescriptorSetLayouts.Data(); + if (pushConstantRange.size > 0) + { + createInfo.pushConstantRangeCount = 1; + createInfo.pPushConstantRanges = &pushConstantRange; + } + + apiHash = BuildApiHash(&createInfo); + + result = ConvertCreateInfo( + pDevice, + &createInfo, + &info, + &pipelineInfo, + pSetUserData); + } + + if (result == VK_SUCCESS) + { VK_PLACEMENT_NEW(pSysMem) PipelineLayout(pDevice, info, pipelineInfo, apiHash); *pPipelineLayout = PipelineLayout::HandleFromVoidPointer(pSysMem); @@ -353,7 +553,7 @@ Vkgc::ResourceMappingNodeType PipelineLayout::MapLlpcResourceNodeType( VkDescriptorType descriptorType) { auto nodeType = Vkgc::ResourceMappingNodeType::Unknown; - switch(static_cast(descriptorType)) + switch (static_cast(descriptorType)) { case VK_DESCRIPTOR_TYPE_SAMPLER: nodeType = Vkgc::ResourceMappingNodeType::DescriptorSampler; @@ -362,21 +562,45 @@ Vkgc::ResourceMappingNodeType PipelineLayout::MapLlpcResourceNodeType( nodeType = Vkgc::ResourceMappingNodeType::DescriptorCombinedTexture; break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + nodeType = Vkgc::ResourceMappingNodeType::DescriptorResource; + break; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 49 + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + nodeType = Vkgc::ResourceMappingNodeType::DescriptorImage; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + nodeType = Vkgc::ResourceMappingNodeType::DescriptorConstTexelBuffer; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + nodeType = Vkgc::ResourceMappingNodeType::DescriptorConstBuffer; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + nodeType = Vkgc::ResourceMappingNodeType::DescriptorConstBufferCompact; + break; +#else + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: nodeType = Vkgc::ResourceMappingNodeType::DescriptorResource; break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: nodeType = Vkgc::ResourceMappingNodeType::DescriptorTexelBuffer; break; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + nodeType = Vkgc::ResourceMappingNodeType::DescriptorBuffer; + break; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: + nodeType = Vkgc::ResourceMappingNodeType::DescriptorBufferCompact; + break; +#endif + + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + nodeType = Vkgc::ResourceMappingNodeType::DescriptorTexelBuffer; + break; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: nodeType = Vkgc::ResourceMappingNodeType::DescriptorBuffer; break; - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - nodeType = Vkgc::ResourceMappingNodeType::DescriptorResource; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + nodeType = Vkgc::ResourceMappingNodeType::DescriptorBufferCompact; break; case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT: nodeType = Vkgc::ResourceMappingNodeType::PushConst; @@ -456,9 +680,25 @@ VkResult PipelineLayout::BuildLlpcSetMapping( VK_ASSERT((binding.info.descriptorType == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) || (binding.info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)); auto pNode = &pDynNodes[*pDynNodeCount]; - pNode->node.type = (binding.dyn.dwArrayStride == 2) ? - Vkgc::ResourceMappingNodeType::DescriptorBufferCompact : - Vkgc::ResourceMappingNodeType::DescriptorBuffer; + if (binding.info.descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) + { + pNode->node.type = (binding.dyn.dwArrayStride == 2) ? + Vkgc::ResourceMappingNodeType::DescriptorBufferCompact : + Vkgc::ResourceMappingNodeType::DescriptorBuffer; + + } + else + { +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 49 + pNode->node.type = (binding.dyn.dwArrayStride == 2) ? + Vkgc::ResourceMappingNodeType::DescriptorConstBufferCompact: + Vkgc::ResourceMappingNodeType::DescriptorConstBuffer; +#else + pNode->node.type = (binding.dyn.dwArrayStride == 2) ? + Vkgc::ResourceMappingNodeType::DescriptorBufferCompact : + Vkgc::ResourceMappingNodeType::DescriptorBuffer; +#endif + } pNode->node.offsetInDwords = userDataRegBase + binding.dyn.dwOffset; pNode->node.sizeInDwords = binding.dyn.dwSize; pNode->node.srdRange.binding = binding.info.binding; @@ -535,49 +775,52 @@ VkResult PipelineLayout::BuildLlpcPipelineMapping( uint32_t visibility = stageMask & VkToVkgcShaderStageMask(pSetLayout->Info().activeStageMask); - // Build the resource mapping nodes for the contents of this set. - auto pDynNodes = &pUserDataNodes[userDataNodeCount]; - auto pStaNodes = &pResourceNodes[mappingNodeCount]; - auto pDescValues = &pDescriptorRangeValues[descriptorRangeCount]; - - uint32_t dynNodeCount = 0; - uint32_t staNodeCount = 0; - uint32_t descRangeCount = 0; - - result = BuildLlpcSetMapping( - visibility, - setIndex, - pSetLayout, - pDynNodes, - &dynNodeCount, - pStaNodes, - &staNodeCount, - pDescValues, - &descRangeCount, - m_info.userDataLayout.setBindingRegBase + pSetUserData->dynDescDataRegOffset); - - // Increase the number of mapping nodes used by the number of static section nodes added. - mappingNodeCount += staNodeCount; - - // Increase the number of user data nodes used by the number of dynamic section nodes added. - userDataNodeCount += dynNodeCount; - - // Increase the number of descriptor range value nodes used by immutable samplers - descriptorRangeCount += descRangeCount; - - // Add a top-level user data node entry for this set's pointer if there are static nodes. - if (pSetUserData->setPtrRegOffset != InvalidReg) + if (visibility != 0) { - auto pSetPtrNode = &pUserDataNodes[userDataNodeCount]; - - pSetPtrNode->node.type = Vkgc::ResourceMappingNodeType::DescriptorTableVaPtr; - pSetPtrNode->node.offsetInDwords = m_info.userDataLayout.setBindingRegBase + - pSetUserData->setPtrRegOffset; - pSetPtrNode->node.sizeInDwords = SetPtrRegCount; - pSetPtrNode->node.tablePtr.nodeCount = staNodeCount; - pSetPtrNode->node.tablePtr.pNext = pStaNodes; - pSetPtrNode->visibility = visibility; - userDataNodeCount++; + // Build the resource mapping nodes for the contents of this set. + auto pDynNodes = &pUserDataNodes[userDataNodeCount]; + auto pStaNodes = &pResourceNodes[mappingNodeCount]; + auto pDescValues = &pDescriptorRangeValues[descriptorRangeCount]; + + uint32_t dynNodeCount = 0; + uint32_t staNodeCount = 0; + uint32_t descRangeCount = 0; + + result = BuildLlpcSetMapping( + visibility, + setIndex, + pSetLayout, + pDynNodes, + &dynNodeCount, + pStaNodes, + &staNodeCount, + pDescValues, + &descRangeCount, + m_info.userDataLayout.setBindingRegBase + pSetUserData->dynDescDataRegOffset); + + // Increase the number of mapping nodes used by the number of static section nodes added. + mappingNodeCount += staNodeCount; + + // Increase the number of user data nodes used by the number of dynamic section nodes added. + userDataNodeCount += dynNodeCount; + + // Increase the number of descriptor range value nodes used by immutable samplers + descriptorRangeCount += descRangeCount; + + // Add a top-level user data node entry for this set's pointer if there are static nodes. + if (pSetUserData->setPtrRegOffset != InvalidReg) + { + auto pSetPtrNode = &pUserDataNodes[userDataNodeCount]; + + pSetPtrNode->node.type = Vkgc::ResourceMappingNodeType::DescriptorTableVaPtr; + pSetPtrNode->node.offsetInDwords = m_info.userDataLayout.setBindingRegBase + + pSetUserData->setPtrRegOffset; + pSetPtrNode->node.sizeInDwords = SetPtrRegCount; + pSetPtrNode->node.tablePtr.nodeCount = staNodeCount; + pSetPtrNode->node.tablePtr.pNext = pStaNodes; + pSetPtrNode->visibility = visibility; + userDataNodeCount++; + } } } diff --git a/icd/api/vk_render_pass.cpp b/icd/api/vk_render_pass.cpp index 457b2f0b..2012d4ef 100644 --- a/icd/api/vk_render_pass.cpp +++ b/icd/api/vk_render_pass.cpp @@ -576,7 +576,7 @@ void SubpassDescription::Init( stencilResolveMode = pExtInfo->stencilResolveMode; depthStencilResolveAttachment.Init( - pExtInfo->pDepthStencilResolveAttachment[subpassIndex]); + *(pExtInfo->pDepthStencilResolveAttachment)); break; } diff --git a/icd/api/vk_shader.cpp b/icd/api/vk_shader.cpp index 07108e59..e6b9a7be 100644 --- a/icd/api/vk_shader.cpp +++ b/icd/api/vk_shader.cpp @@ -62,7 +62,7 @@ void* VKAPI_CALL AllocateShaderOutput( // ===================================================================================================================== // Concatenates a MetroHash::Hash to two 64-bit uints. -VK_INLINE void MetroHashTo128Bit( +static void MetroHashTo128Bit( const Util::MetroHash::Hash& hash, uint64_t* pLower, uint64_t* pUpper) diff --git a/icd/imported/gputexdecoder/gpuTexDecoder.cpp b/icd/imported/gputexdecoder/gpuTexDecoder.cpp index 5ff7541a..eb51b7f2 100755 --- a/icd/imported/gputexdecoder/gpuTexDecoder.cpp +++ b/icd/imported/gputexdecoder/gpuTexDecoder.cpp @@ -321,7 +321,9 @@ static void GetSpvCode( } else { - // TODO: Etc code + PAL_ASSERT(type == InternalTexConvertCsType::ConvertETC2ToRGBA8); + *pCode = Etc2Decoder; + *pSize = sizeof(Etc2Decoder); } } @@ -355,7 +357,11 @@ void Device::Init( m_bufferViewSizeInDwords = m_info.pDeviceProperties->gfxipProperties.srdSizes.bufferView / sizeof(uint32); // 3 Table and 1 TexBuffer, and 2 Image resource. - m_srdDwords = (3 + 1) * m_bufferViewSizeInDwords + 2 * m_imageViewSizeInDwords; + m_srdDwords[static_cast(InternalTexConvertCsType::ConvertASTCToRGBA8)] + = (3 + 1) * m_bufferViewSizeInDwords + 2 * m_imageViewSizeInDwords; + // 1 Image resource for output and 1 TexBuffer for Input + m_srdDwords[static_cast(InternalTexConvertCsType::ConvertETC2ToRGBA8)] + = 2 * m_imageViewSizeInDwords; } // ===================================================================================================================== @@ -374,7 +380,9 @@ Pal::Result Device::GpuDecodeImage( if (type == InternalTexConvertCsType::ConvertASTCToRGBA8) { uint32* pUserData = nullptr; - CreateAstcUserData(InternalTexConvertCsType::ConvertASTCToRGBA8, &pUserData, m_srdDwords); + CreateUserData(InternalTexConvertCsType::ConvertASTCToRGBA8, + &pUserData, + m_srdDwords[static_cast(InternalTexConvertCsType::ConvertASTCToRGBA8)]); BindPipeline(type, constInfo); // Image To Image @@ -409,11 +417,47 @@ Pal::Result Device::GpuDecodeImage( } } } - else // for ETC Decode + else // for ETC2 Decode { - // TODO: ETC2 Decode + PAL_ASSERT(type == InternalTexConvertCsType::ConvertETC2ToRGBA8); + uint32* pUserData = nullptr; + CreateUserData(InternalTexConvertCsType::ConvertETC2ToRGBA8, + &pUserData, + m_srdDwords[static_cast(InternalTexConvertCsType::ConvertETC2ToRGBA8)]); + BindPipeline(type, constInfo); + + if (pSrcImage != nullptr) + { + for (uint32 idx = 0; idx < regionCount; ++idx) + { + Pal::ImageCopyRegion copyRegion = pPalImageRegions[idx]; + uint32 mips = copyRegion.srcSubres.mipLevel; + Pal::SubresId palSrcSubResId = copyRegion.srcSubres; + Pal::SubresId palDstSubResId = copyRegion.dstSubres; + + Pal::SwizzledFormat dstFormat = pDstImage->GetImageCreateInfo().swizzledFormat; + Pal::SwizzledFormat srcFormat = pSrcImage->GetImageCreateInfo().swizzledFormat; + + Pal::ImageViewInfo imageView[2] = {}; + + BuildImageViewInfo(&imageView[0], pDstImage, palDstSubResId, dstFormat, true); + BuildImageViewInfo(&imageView[1], pSrcImage, palSrcSubResId, srcFormat, false); + + m_info.pPalDevice->CreateImageViewSrds(2, imageView, pUserData); + + uint32 threadGroupsX = (pSrcImage->GetImageCreateInfo().extent.width + 7) / 8; + uint32 threadGroupsY = (pSrcImage->GetImageCreateInfo().extent.height + 7) / 8; + uint32 threadGroupsZ = 1; + + m_pPalCmdBuffer->CmdDispatch(threadGroupsX, threadGroupsY, threadGroupsZ); + } + + } + } + m_pPalCmdBuffer->CmdRestoreComputeState(Pal::ComputeStateAll); + return Pal::Result::Success; } @@ -433,7 +477,9 @@ Pal::Result Device::GpuDecodeBuffer( if (type == InternalTexConvertCsType::ConvertASTCToRGBA8) { uint32* pUserData = nullptr; - CreateAstcUserData(InternalTexConvertCsType::ConvertASTCToRGBA8, &pUserData, m_srdDwords); + CreateUserData(InternalTexConvertCsType::ConvertASTCToRGBA8, + &pUserData, + m_srdDwords[static_cast(InternalTexConvertCsType::ConvertASTCToRGBA8)]); BindPipeline(type, constInfo); // Buffer To Image @@ -454,7 +500,7 @@ Pal::Result Device::GpuDecodeBuffer( } // ===================================================================================================================== -void Device::CreateAstcUserData( +void Device::CreateUserData( InternalTexConvertCsType type, uint32** ppUserData, uint32 srdDwords) @@ -462,11 +508,14 @@ void Device::CreateAstcUserData( *ppUserData = CreateAndBindEmbeddedUserData(m_pPalCmdBuffer, srdDwords, 0, 1); memset(*ppUserData, 0, srdDwords * sizeof(uint32)); - if (m_pTableMemory == nullptr) + if (type == InternalTexConvertCsType::ConvertASTCToRGBA8) { - CreateTableMemory(); + if (m_pTableMemory == nullptr) + { + CreateTableMemory(); + } + SetupInternalTables(type, ppUserData); } - SetupInternalTables(type, ppUserData); } // ===================================================================================================================== @@ -562,57 +611,82 @@ Pal::IPipeline* Device::GetInternalPipeline( Pal::IPipeline* pPipeline = nullptr; void* pMemory = nullptr; PipelineBuildInfo buildInfo = {}; - GpuDecodeMappingNode astcResourceNodes[AstcInternalPipelineNodes]; if (type == InternalTexConvertCsType::ConvertASTCToRGBA8) { - uint32 offset = 0; + GpuDecodeMappingNode astcResourceNodes[AstcInternalPipelineNodes]; + uint32 offset = 0; buildInfo.nodeCount = 1; // 1.Color UnQuantization Buffer View - astcResourceNodes[0].nodeType = NodeType::Buffer; - astcResourceNodes[0].sizeInDwords = m_bufferViewSizeInDwords; + astcResourceNodes[0].nodeType = NodeType::Buffer; + astcResourceNodes[0].sizeInDwords = m_bufferViewSizeInDwords; astcResourceNodes[0].offsetInDwords = 0; - astcResourceNodes[0].binding = 0; - astcResourceNodes[0].set = 0; + astcResourceNodes[0].binding = 0; + astcResourceNodes[0].set = 0; // 2.Trits Quints Buffer View - astcResourceNodes[1].nodeType = NodeType::Buffer; - astcResourceNodes[1].sizeInDwords = m_bufferViewSizeInDwords; + astcResourceNodes[1].nodeType = NodeType::Buffer; + astcResourceNodes[1].sizeInDwords = m_bufferViewSizeInDwords; astcResourceNodes[1].offsetInDwords = 1 * m_bufferViewSizeInDwords; - astcResourceNodes[1].binding = 1; - astcResourceNodes[1].set = 0; + astcResourceNodes[1].binding = 1; + astcResourceNodes[1].set = 0; // 3.Quant and Transfer Buffer View - astcResourceNodes[2].nodeType = NodeType::Buffer; - astcResourceNodes[2].sizeInDwords = m_bufferViewSizeInDwords; + astcResourceNodes[2].nodeType = NodeType::Buffer; + astcResourceNodes[2].sizeInDwords = m_bufferViewSizeInDwords; astcResourceNodes[2].offsetInDwords = 2 * m_bufferViewSizeInDwords; - astcResourceNodes[2].binding = 2; - astcResourceNodes[2].set = 0; + astcResourceNodes[2].binding = 2; + astcResourceNodes[2].set = 0; // 4. TexBuffer View for Src Image Buffer - astcResourceNodes[3].nodeType = NodeType::TexBuffer; - astcResourceNodes[3].sizeInDwords = m_bufferViewSizeInDwords; + astcResourceNodes[3].nodeType = NodeType::TexBuffer; + astcResourceNodes[3].sizeInDwords = m_bufferViewSizeInDwords; astcResourceNodes[3].offsetInDwords = 3 * m_bufferViewSizeInDwords; - astcResourceNodes[3].binding = 3; - astcResourceNodes[3].set = 0; + astcResourceNodes[3].binding = 3; + astcResourceNodes[3].set = 0; // 5. Image View for Src Image - astcResourceNodes[4].nodeType = NodeType::Image; - astcResourceNodes[4].sizeInDwords = m_imageViewSizeInDwords; + astcResourceNodes[4].nodeType = NodeType::Image; + astcResourceNodes[4].sizeInDwords = m_imageViewSizeInDwords; astcResourceNodes[4].offsetInDwords = 4 * m_bufferViewSizeInDwords; - astcResourceNodes[4].binding = 4; - astcResourceNodes[4].set = 0; + astcResourceNodes[4].binding = 4; + astcResourceNodes[4].set = 0; // 6. Image View for Dst Image - astcResourceNodes[5].nodeType = NodeType::Image; - astcResourceNodes[5].sizeInDwords = m_imageViewSizeInDwords; + astcResourceNodes[5].nodeType = NodeType::Image; + astcResourceNodes[5].sizeInDwords = m_imageViewSizeInDwords; astcResourceNodes[5].offsetInDwords = 4 * m_bufferViewSizeInDwords + m_imageViewSizeInDwords; - astcResourceNodes[5].binding = 5; - astcResourceNodes[5].set = 0; + astcResourceNodes[5].binding = 5; + astcResourceNodes[5].set = 0; buildInfo.pUserDataNodes = astcResourceNodes; - buildInfo.shaderType = InternalTexConvertCsType::ConvertASTCToRGBA8; + buildInfo.shaderType = InternalTexConvertCsType::ConvertASTCToRGBA8; + GetSpvCode(buildInfo.shaderType, &(buildInfo.code.pSpvCode), &(buildInfo.code.spvSize)); + } + else + { + PAL_ASSERT(type == InternalTexConvertCsType::ConvertETC2ToRGBA8); + GpuDecodeMappingNode etc2ResourceNodes[Etc2InternalPipelineNodes]; + uint32 offset = 0; + buildInfo.nodeCount = 1; + + // 1. output + etc2ResourceNodes[0].nodeType = NodeType::Image; + etc2ResourceNodes[0].sizeInDwords = m_imageViewSizeInDwords; + etc2ResourceNodes[0].offsetInDwords = 0; + etc2ResourceNodes[0].binding = 0; + etc2ResourceNodes[0].set = 0; + + //2. input + etc2ResourceNodes[1].nodeType = NodeType::Image; + etc2ResourceNodes[1].sizeInDwords = m_imageViewSizeInDwords; + etc2ResourceNodes[1].offsetInDwords = 1 * m_imageViewSizeInDwords; + etc2ResourceNodes[1].binding = 1; + etc2ResourceNodes[1].set = 0; + + buildInfo.pUserDataNodes = etc2ResourceNodes; + buildInfo.shaderType = InternalTexConvertCsType::ConvertETC2ToRGBA8; GetSpvCode(buildInfo.shaderType, &(buildInfo.code.pSpvCode), &(buildInfo.code.spvSize)); } diff --git a/icd/imported/gputexdecoder/gpuTexDecoder.h b/icd/imported/gputexdecoder/gpuTexDecoder.h index 5e21913d..3d5bbde6 100755 --- a/icd/imported/gputexdecoder/gpuTexDecoder.h +++ b/icd/imported/gputexdecoder/gpuTexDecoder.h @@ -55,6 +55,7 @@ using Pal::uint32; using Pal::uint64; constexpr uint32 AstcInternalPipelineNodes = 6; +constexpr uint32 Etc2InternalPipelineNodes = 2; // Enum for internal texture format convert type enum class InternalTexConvertCsType : uint32 @@ -162,7 +163,7 @@ class Device const CompileTimeConstants& constInfo); private: - void CreateAstcUserData( + void CreateUserData( InternalTexConvertCsType type, uint32** ppUserData, uint32 srdDwords); @@ -217,6 +218,6 @@ class Device Pal::ICmdBuffer* m_pPalCmdBuffer; // The associated PAL cmdbuffer uint32 m_bufferViewSizeInDwords{0}; uint32 m_imageViewSizeInDwords{0}; - uint32 m_srdDwords; + uint32 m_srdDwords[static_cast(InternalTexConvertCsType::Count)]; }; } diff --git a/icd/imported/gputexdecoder/shaders.h b/icd/imported/gputexdecoder/shaders.h index e91f815a..b031da0b 100644 --- a/icd/imported/gputexdecoder/shaders.h +++ b/icd/imported/gputexdecoder/shaders.h @@ -23,3 +23,4 @@ * **********************************************************************************************************************/ #include "shaders/AstcDecode.h" +#include "shaders/Etc2Decode.h" diff --git a/icd/imported/gputexdecoder/shaders/Etc2Decode.h b/icd/imported/gputexdecoder/shaders/Etc2Decode.h new file mode 100644 index 00000000..3e2cf89e --- /dev/null +++ b/icd/imported/gputexdecoder/shaders/Etc2Decode.h @@ -0,0 +1,684 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ + + // 1011.5.0 + #pragma once +const uint32_t Etc2Decoder[] = { + 0x07230203,0x00010000,0x0008000a,0x000003a6,0x00000000,0x00020011,0x00000001,0x00020011, + 0x00000038,0x0006000b,0x00000001,0x4c534c47,0x6474732e,0x3035342e,0x00000000,0x0003000e, + 0x00000000,0x00000001,0x0007000f,0x00000005,0x00000004,0x6e69616d,0x00000000,0x00000021, + 0x00000029,0x00060010,0x00000004,0x00000011,0x00000004,0x00000004,0x00000004,0x00030003, + 0x00000002,0x000001c2,0x000b0004,0x455f4c47,0x735f5458,0x6c706d61,0x656c7265,0x745f7373, + 0x75747865,0x665f6572,0x74636e75,0x736e6f69,0x00000000,0x00040005,0x00000004,0x6e69616d, + 0x00000000,0x00060005,0x00000009,0x6c697562,0x6f635f64,0x2864726f,0x00000000,0x00060005, + 0x0000000f,0x70696c66,0x646e655f,0x286e6169,0x003b3175,0x00030005,0x0000000e,0x00000076, + 0x00070005,0x00000015,0x70696c66,0x646e655f,0x286e6169,0x3b327576,0x00000000,0x00030005, + 0x00000014,0x00000076,0x00090005,0x0000001b,0x6f636564,0x655f6564,0x5f326374,0x68706c61, + 0x75762861,0x31693b32,0x0000003b,0x00040005,0x00000019,0x6c796170,0x0064616f,0x00060005, + 0x0000001a,0x656e696c,0x705f7261,0x6c657869,0x00000000,0x00040005,0x0000001e,0x65736162, + 0x00000000,0x00060005,0x00000021,0x575f6c67,0x476b726f,0x70756f72,0x00004449,0x00080005, + 0x00000029,0x4c5f6c67,0x6c61636f,0x6f766e49,0x69746163,0x44496e6f,0x00000000,0x00040005, + 0x0000004c,0x64726f77,0x00000073,0x00040005,0x0000006a,0x61726170,0x0000006d,0x00040005, + 0x0000006e,0x61726170,0x0000006d,0x00050005,0x00000075,0x5f746962,0x7366666f,0x00007465, + 0x00040005,0x0000007b,0x65736162,0x00000000,0x00050005,0x00000081,0x746c756d,0x696c7069, + 0x00007265,0x00040005,0x00000087,0x6c626174,0x00000065,0x00050005,0x0000008d,0x5f62736c, + 0x65646e69,0x00000078,0x00030005,0x0000009a,0x0062736d,0x00030005,0x000000a4,0x00646f6d, + 0x00050005,0x000000c3,0x65646e69,0x6c626178,0x00000065,0x00030005,0x000000c9,0x00000061, + 0x00040005,0x000000d5,0x726f6f63,0x00000064,0x00040005,0x000000d9,0x54444957,0x00000048, + 0x00040005,0x000000db,0x47494548,0x00005448,0x00050005,0x000000e5,0x656c6974,0x6f6f635f, + 0x00006472,0x00050005,0x000000e9,0x65786970,0x6f635f6c,0x0064726f,0x00060005,0x000000ed, + 0x656e696c,0x705f7261,0x6c657869,0x00000000,0x00040005,0x000000f4,0x6c796170,0x0064616f, + 0x00040005,0x000000f7,0x706e4975,0x00007475,0x00050005,0x000000fb,0x48504c41,0x49425f41, + 0x00005354,0x00060005,0x000000ff,0x68706c61,0x61705f61,0x616f6c79,0x00000064,0x00040005, + 0x00000100,0x61726170,0x0000006d,0x00060005,0x00000104,0x68706c61,0x65725f61,0x746c7573, + 0x00000000,0x00040005,0x00000105,0x61726170,0x0000006d,0x00040005,0x00000107,0x61726170, + 0x0000006d,0x00060005,0x0000010a,0x6f6c6f63,0x61705f72,0x616f6c79,0x00000064,0x00040005, + 0x0000010b,0x61726170,0x0000006d,0x00040005,0x00000110,0x61726170,0x0000006d,0x00040005, + 0x00000114,0x70696c66,0x00000000,0x00050005,0x00000118,0x62627573,0x6b636f6c,0x00000000, + 0x00050005,0x00000120,0x31637465,0x6d6f635f,0x00746170,0x00060005,0x00000125,0x636e7570, + 0x72687468,0x6867756f,0x00000000,0x00050005,0x00000138,0x65736162,0x6267725f,0x00000000, + 0x00030005,0x0000014f,0x00000072,0x00030005,0x00000155,0x00006472,0x00030005,0x0000015a, + 0x00000067,0x00030005,0x00000160,0x00006467,0x00030005,0x00000165,0x00000062,0x00030005, + 0x0000016a,0x00006462,0x00030005,0x0000016f,0x00003172,0x00030005,0x00000173,0x00003167, + 0x00030005,0x00000177,0x00003162,0x00030005,0x00000181,0x00003172,0x00030005,0x0000018c, + 0x00003167,0x00030005,0x00000191,0x00003162,0x00030005,0x00000196,0x00003272,0x00030005, + 0x0000019b,0x00003267,0x00030005,0x000001a0,0x00003262,0x00030005,0x000001a5,0x00006164, + 0x00040005,0x000001ae,0x74736964,0x00000000,0x00050005,0x000001b7,0x65646e69,0x6c626178, + 0x00000065,0x00030005,0x000001ba,0x0062736d,0x00030005,0x000001c2,0x0062736c,0x00040005, + 0x000001c9,0x65646e69,0x00000078,0x00050005,0x000001d7,0x5f626772,0x75736572,0x0000746c, + 0x00030005,0x000001e4,0x00646f6d,0x00030005,0x000001e7,0x00626772,0x00030005,0x000001fe, + 0x00003172,0x00030005,0x00000203,0x00003167,0x00030005,0x0000020f,0x00003162,0x00030005, + 0x0000021a,0x00003272,0x00030005,0x0000021f,0x00003267,0x00030005,0x00000224,0x00003262, + 0x00030005,0x00000229,0x00006164,0x00030005,0x0000022d,0x00006264,0x00030005,0x00000231, + 0x00000064,0x00040005,0x0000024a,0x74736964,0x00000000,0x00050005,0x0000024c,0x65646e69, + 0x6c626178,0x00000065,0x00030005,0x0000024f,0x0062736d,0x00030005,0x00000257,0x0062736c, + 0x00040005,0x00000265,0x65736162,0x00000000,0x00030005,0x00000278,0x00646f6d,0x00030005, + 0x0000028b,0x00000072,0x00030005,0x00000291,0x00000067,0x00030005,0x0000029d,0x00000062, + 0x00030005,0x000002ae,0x00006872,0x00030005,0x000002b9,0x00007672,0x00030005,0x000002bf, + 0x00006867,0x00030005,0x000002c4,0x00007667,0x00030005,0x000002c9,0x00006862,0x00030005, + 0x000002ce,0x00007662,0x00030005,0x00000300,0x00626772,0x00030005,0x00000305,0x00007864, + 0x00030005,0x0000030c,0x00007964,0x00070005,0x00000342,0x31637465,0x6261745f,0x695f656c, + 0x7865646e,0x00000000,0x00030005,0x0000034b,0x0062736d,0x00030005,0x00000353,0x0062736c, + 0x00030005,0x0000035a,0x006e6773,0x00040005,0x00000367,0x7366666f,0x00007465,0x00050005, + 0x0000037d,0x65646e69,0x6c626178,0x00000065,0x00050005,0x00000392,0x4374756f,0x726f6c6f, + 0x00000000,0x00040005,0x000003a1,0x74754f75,0x00747570,0x00040047,0x00000021,0x0000000b, + 0x0000001a,0x00040047,0x00000029,0x0000000b,0x0000001b,0x00040047,0x000000d9,0x00000001, + 0x00000001,0x00040047,0x000000db,0x00000001,0x00000002,0x00040047,0x000000f7,0x00000022, + 0x00000000,0x00040047,0x000000f7,0x00000021,0x00000001,0x00040047,0x000000fb,0x00000001, + 0x00000000,0x00040047,0x000003a1,0x00000022,0x00000000,0x00040047,0x000003a1,0x00000021, + 0x00000000,0x00030047,0x000003a1,0x00000019,0x00040047,0x000003a5,0x0000000b,0x00000019, + 0x00020013,0x00000002,0x00030021,0x00000003,0x00000002,0x00040015,0x00000006,0x00000020, + 0x00000001,0x00040017,0x00000007,0x00000006,0x00000002,0x00030021,0x00000008,0x00000007, + 0x00040015,0x0000000b,0x00000020,0x00000000,0x00040020,0x0000000c,0x00000007,0x0000000b, + 0x00040021,0x0000000d,0x0000000b,0x0000000c,0x00040017,0x00000011,0x0000000b,0x00000002, + 0x00040020,0x00000012,0x00000007,0x00000011,0x00040021,0x00000013,0x00000011,0x00000012, + 0x00040020,0x00000017,0x00000007,0x00000006,0x00050021,0x00000018,0x0000000b,0x00000012, + 0x00000017,0x00040020,0x0000001d,0x00000007,0x00000007,0x00040017,0x0000001f,0x0000000b, + 0x00000003,0x00040020,0x00000020,0x00000001,0x0000001f,0x0004003b,0x00000020,0x00000021, + 0x00000001,0x0004002b,0x00000006,0x00000025,0x00000008,0x0004002b,0x00000006,0x00000028, + 0x00000004,0x0004003b,0x00000020,0x00000029,0x00000001,0x0004002b,0x0000000b,0x0000002a, + 0x00000002,0x00040020,0x0000002b,0x00000001,0x0000000b,0x0004002b,0x00000006,0x0000002f, + 0x00000001,0x0004002b,0x0000000b,0x00000032,0x00000000,0x0004002b,0x00000006,0x00000037, + 0x00000002,0x0004002b,0x0000000b,0x0000003d,0x00000001,0x00040017,0x0000004a,0x0000000b, + 0x00000004,0x00040020,0x0000004b,0x00000007,0x0000004a,0x0004002b,0x0000000b,0x0000004f, + 0x00000008,0x0004002b,0x0000000b,0x00000050,0x00000010,0x0004002b,0x0000000b,0x00000051, + 0x00000018,0x0007002c,0x0000004a,0x00000052,0x00000032,0x0000004f,0x00000050,0x00000051, + 0x0004002b,0x0000000b,0x00000054,0x000000ff,0x0004002b,0x0000000b,0x00000063,0x00000003, + 0x0004002b,0x00000006,0x00000076,0x0000002d,0x0004002b,0x00000006,0x00000077,0x00000003, + 0x0004002b,0x00000006,0x0000007e,0x00000018,0x0004002b,0x00000006,0x00000084,0x00000014, + 0x0004002b,0x00000006,0x0000008a,0x00000010,0x0004002b,0x00000006,0x0000008f,0x00000005, + 0x0004002b,0x00000006,0x00000094,0x0000001f,0x00040017,0x000000a5,0x00000006,0x00000004, + 0x0004001c,0x000000a6,0x000000a5,0x00000050,0x0004002b,0x00000006,0x000000a7,0x0000000e, + 0x0007002c,0x000000a5,0x000000a8,0x00000037,0x0000008f,0x00000025,0x000000a7,0x0004002b, + 0x00000006,0x000000a9,0x00000006,0x0004002b,0x00000006,0x000000aa,0x00000009,0x0004002b, + 0x00000006,0x000000ab,0x0000000c,0x0007002c,0x000000a5,0x000000ac,0x00000037,0x000000a9, + 0x000000aa,0x000000ab,0x0004002b,0x00000006,0x000000ad,0x00000007,0x0007002c,0x000000a5, + 0x000000ae,0x0000002f,0x00000028,0x000000ad,0x000000ab,0x0007002c,0x000000a5,0x000000af, + 0x0000002f,0x00000077,0x0000008f,0x000000ab,0x0004002b,0x00000006,0x000000b0,0x0000000b, + 0x0007002c,0x000000a5,0x000000b1,0x00000037,0x0000008f,0x000000ad,0x000000b0,0x0004002b, + 0x00000006,0x000000b2,0x0000000a,0x0007002c,0x000000a5,0x000000b3,0x00000037,0x000000a9, + 0x00000025,0x000000b2,0x0007002c,0x000000a5,0x000000b4,0x00000077,0x000000a9,0x000000ad, + 0x000000b2,0x0007002c,0x000000a5,0x000000b5,0x00000037,0x00000028,0x000000ad,0x000000b2, + 0x0007002c,0x000000a5,0x000000b6,0x0000002f,0x0000008f,0x000000ad,0x000000aa,0x0007002c, + 0x000000a5,0x000000b7,0x0000002f,0x00000028,0x000000ad,0x000000aa,0x0007002c,0x000000a5, + 0x000000b8,0x0000002f,0x00000077,0x000000ad,0x000000aa,0x0007002c,0x000000a5,0x000000b9, + 0x0000002f,0x00000028,0x000000a9,0x000000aa,0x0007002c,0x000000a5,0x000000ba,0x00000037, + 0x00000077,0x000000a9,0x000000aa,0x0004002b,0x00000006,0x000000bb,0x00000000,0x0007002c, + 0x000000a5,0x000000bc,0x000000bb,0x0000002f,0x00000037,0x000000aa,0x0007002c,0x000000a5, + 0x000000bd,0x00000077,0x0000008f,0x000000ad,0x00000025,0x0007002c,0x000000a5,0x000000be, + 0x00000037,0x00000028,0x000000a9,0x00000025,0x0013002c,0x000000a6,0x000000bf,0x000000a8, + 0x000000ac,0x000000ae,0x000000af,0x000000b1,0x000000b3,0x000000b4,0x000000b5,0x000000b6, + 0x000000b7,0x000000b8,0x000000b9,0x000000ba,0x000000bc,0x000000bd,0x000000be,0x00040020, + 0x000000c2,0x00000007,0x000000a6,0x0004002b,0x00000006,0x000000d0,0x000000ff,0x00040032, + 0x00000006,0x000000d9,0x00000000,0x00060034,0x0000000b,0x000000da,0x00000080,0x000000d9, + 0x00000032,0x00040032,0x00000006,0x000000db,0x00000000,0x00060034,0x0000000b,0x000000dc, + 0x00000080,0x000000db,0x00000032,0x00050033,0x00000011,0x000000dd,0x000000da,0x000000dc, + 0x00020014,0x000000de,0x00040017,0x000000df,0x000000de,0x00000002,0x00090019,0x000000f5, + 0x0000000b,0x00000001,0x00000000,0x00000000,0x00000000,0x00000001,0x00000000,0x00040020, + 0x000000f6,0x00000000,0x000000f5,0x0004003b,0x000000f6,0x000000f7,0x00000000,0x00040032, + 0x00000006,0x000000fb,0x00000000,0x00060034,0x000000de,0x000000fc,0x000000aa,0x000000fb, + 0x00000025,0x00040020,0x0000011f,0x00000007,0x000000de,0x0003002a,0x000000de,0x00000121, + 0x00060034,0x000000de,0x00000122,0x000000aa,0x000000fb,0x0000002f,0x00060034,0x000000de, + 0x0000012b,0x000000ab,0x000000fb,0x0000002f,0x00030029,0x000000de,0x00000135,0x00040017, + 0x00000136,0x00000006,0x00000003,0x00040020,0x00000137,0x00000007,0x00000136,0x0004002b, + 0x0000000b,0x0000013b,0x0000001c,0x0004002b,0x0000000b,0x0000013c,0x00000014,0x0004002b, + 0x0000000b,0x0000013d,0x0000000c,0x0006002c,0x0000001f,0x0000013e,0x0000013b,0x0000013c, + 0x0000013d,0x0004002b,0x0000000b,0x0000013f,0x00000004,0x0004002b,0x00000006,0x00000146, + 0x0000000f,0x0004002b,0x00000006,0x0000014a,0x00000011,0x0004002b,0x00000006,0x00000152, + 0x0000001b,0x0004002b,0x00000006,0x0000015d,0x00000013,0x0004002b,0x0000000b,0x0000017d, + 0x0000001f,0x0004001c,0x000001af,0x00000006,0x0000004f,0x0004002b,0x00000006,0x000001b0, + 0x00000017,0x0004002b,0x00000006,0x000001b1,0x00000020,0x0004002b,0x00000006,0x000001b2, + 0x00000029,0x0004002b,0x00000006,0x000001b3,0x00000040,0x000b002c,0x000001af,0x000001b4, + 0x00000077,0x000000a9,0x000000b0,0x0000008a,0x000001b0,0x000001b1,0x000001b2,0x000001b3, + 0x00040020,0x000001b6,0x00000007,0x000001af,0x00040020,0x000001d6,0x00000007,0x0000001f, + 0x0004002b,0x0000000b,0x000001df,0x00000011,0x0006002c,0x00000136,0x000001f4,0x000000bb, + 0x000000bb,0x000000bb,0x0006002c,0x00000136,0x000001f5,0x000000d0,0x000000d0,0x000000d0, + 0x0004002b,0x00000006,0x00000237,0x00010000,0x0004002b,0x00000006,0x0000023a,0x00000100, + 0x0004002b,0x00000006,0x0000028e,0x00000019,0x0004002b,0x00000006,0x00000298,0x00000012, + 0x0004002b,0x00000006,0x000002bc,0x0000000d,0x0004001c,0x00000368,0x00000007,0x0000004f, + 0x0005002c,0x00000007,0x00000369,0x00000037,0x00000025,0x0005002c,0x00000007,0x0000036a, + 0x0000008f,0x0000014a,0x0004002b,0x00000006,0x0000036b,0x0000001d,0x0005002c,0x00000007, + 0x0000036c,0x000000aa,0x0000036b,0x0004002b,0x00000006,0x0000036d,0x0000002a,0x0005002c, + 0x00000007,0x0000036e,0x000002bc,0x0000036d,0x0004002b,0x00000006,0x0000036f,0x0000003c, + 0x0005002c,0x00000007,0x00000370,0x00000298,0x0000036f,0x0004002b,0x00000006,0x00000371, + 0x00000050,0x0005002c,0x00000007,0x00000372,0x0000007e,0x00000371,0x0004002b,0x00000006, + 0x00000373,0x00000021,0x0004002b,0x00000006,0x00000374,0x0000006a,0x0005002c,0x00000007, + 0x00000375,0x00000373,0x00000374,0x0004002b,0x00000006,0x00000376,0x0000002f,0x0004002b, + 0x00000006,0x00000377,0x000000b7,0x0005002c,0x00000007,0x00000378,0x00000376,0x00000377, + 0x000b002c,0x00000368,0x00000379,0x00000369,0x0000036a,0x0000036c,0x0000036e,0x00000370, + 0x00000372,0x00000375,0x00000378,0x00040020,0x0000037c,0x00000007,0x00000368,0x00060034, + 0x000000de,0x00000389,0x000000aa,0x000000fb,0x0000002f,0x0006002c,0x0000001f,0x0000038e, + 0x00000032,0x00000032,0x00000032,0x00030016,0x0000038f,0x00000020,0x00040017,0x00000390, + 0x0000038f,0x00000004,0x00040020,0x00000391,0x00000007,0x00000390,0x00040017,0x00000394, + 0x0000038f,0x00000003,0x0004002b,0x0000038f,0x0000039c,0x437f0000,0x00090019,0x0000039f, + 0x0000038f,0x00000001,0x00000000,0x00000000,0x00000000,0x00000002,0x00000000,0x00040020, + 0x000003a0,0x00000000,0x0000039f,0x0004003b,0x000003a0,0x000003a1,0x00000000,0x0006002c, + 0x0000001f,0x000003a5,0x0000013f,0x0000013f,0x0000013f,0x00050036,0x00000002,0x00000004, + 0x00000000,0x00000003,0x000200f8,0x00000005,0x0004003b,0x0000001d,0x000000d5,0x00000007, + 0x0004003b,0x0000001d,0x000000e5,0x00000007,0x0004003b,0x0000001d,0x000000e9,0x00000007, + 0x0004003b,0x00000017,0x000000ed,0x00000007,0x0004003b,0x0000004b,0x000000f4,0x00000007, + 0x0004003b,0x00000012,0x000000ff,0x00000007,0x0004003b,0x00000012,0x00000100,0x00000007, + 0x0004003b,0x0000000c,0x00000104,0x00000007,0x0004003b,0x00000012,0x00000105,0x00000007, + 0x0004003b,0x00000017,0x00000107,0x00000007,0x0004003b,0x00000012,0x0000010a,0x00000007, + 0x0004003b,0x00000012,0x0000010b,0x00000007,0x0004003b,0x00000012,0x00000110,0x00000007, + 0x0004003b,0x0000000c,0x00000114,0x00000007,0x0004003b,0x0000000c,0x00000118,0x00000007, + 0x0004003b,0x0000011f,0x00000120,0x00000007,0x0004003b,0x0000011f,0x00000125,0x00000007, + 0x0004003b,0x00000137,0x00000138,0x00000007,0x0004003b,0x00000017,0x0000014f,0x00000007, + 0x0004003b,0x00000017,0x00000155,0x00000007,0x0004003b,0x00000017,0x0000015a,0x00000007, + 0x0004003b,0x00000017,0x00000160,0x00000007,0x0004003b,0x00000017,0x00000165,0x00000007, + 0x0004003b,0x00000017,0x0000016a,0x00000007,0x0004003b,0x00000017,0x0000016f,0x00000007, + 0x0004003b,0x00000017,0x00000173,0x00000007,0x0004003b,0x00000017,0x00000177,0x00000007, + 0x0004003b,0x00000017,0x00000181,0x00000007,0x0004003b,0x00000017,0x0000018c,0x00000007, + 0x0004003b,0x00000017,0x00000191,0x00000007,0x0004003b,0x00000017,0x00000196,0x00000007, + 0x0004003b,0x00000017,0x0000019b,0x00000007,0x0004003b,0x00000017,0x000001a0,0x00000007, + 0x0004003b,0x0000000c,0x000001a5,0x00000007,0x0004003b,0x00000017,0x000001ae,0x00000007, + 0x0004003b,0x000001b6,0x000001b7,0x00000007,0x0004003b,0x00000017,0x000001ba,0x00000007, + 0x0004003b,0x00000017,0x000001c2,0x00000007,0x0004003b,0x00000017,0x000001c9,0x00000007, + 0x0004003b,0x000001d6,0x000001d7,0x00000007,0x0004003b,0x00000017,0x000001e4,0x00000007, + 0x0004003b,0x00000137,0x000001e7,0x00000007,0x0004003b,0x00000017,0x000001fe,0x00000007, + 0x0004003b,0x00000017,0x00000203,0x00000007,0x0004003b,0x00000017,0x0000020f,0x00000007, + 0x0004003b,0x00000017,0x0000021a,0x00000007,0x0004003b,0x00000017,0x0000021f,0x00000007, + 0x0004003b,0x00000017,0x00000224,0x00000007,0x0004003b,0x0000000c,0x00000229,0x00000007, + 0x0004003b,0x0000000c,0x0000022d,0x00000007,0x0004003b,0x0000000c,0x00000231,0x00000007, + 0x0004003b,0x00000017,0x0000024a,0x00000007,0x0004003b,0x000001b6,0x0000024c,0x00000007, + 0x0004003b,0x00000017,0x0000024f,0x00000007,0x0004003b,0x00000017,0x00000257,0x00000007, + 0x0004003b,0x00000137,0x00000265,0x00000007,0x0004003b,0x00000137,0x00000268,0x00000007, + 0x0004003b,0x00000017,0x00000278,0x00000007,0x0004003b,0x00000017,0x0000028b,0x00000007, + 0x0004003b,0x00000017,0x00000291,0x00000007,0x0004003b,0x00000017,0x0000029d,0x00000007, + 0x0004003b,0x00000017,0x000002ae,0x00000007,0x0004003b,0x00000017,0x000002b9,0x00000007, + 0x0004003b,0x00000017,0x000002bf,0x00000007,0x0004003b,0x00000017,0x000002c4,0x00000007, + 0x0004003b,0x00000017,0x000002c9,0x00000007,0x0004003b,0x00000017,0x000002ce,0x00000007, + 0x0004003b,0x00000137,0x00000300,0x00000007,0x0004003b,0x00000137,0x00000305,0x00000007, + 0x0004003b,0x00000137,0x0000030c,0x00000007,0x0004003b,0x0000000c,0x00000342,0x00000007, + 0x0004003b,0x00000017,0x0000034b,0x00000007,0x0004003b,0x00000017,0x00000353,0x00000007, + 0x0004003b,0x00000017,0x0000035a,0x00000007,0x0004003b,0x00000017,0x00000367,0x00000007, + 0x0004003b,0x0000037c,0x0000037d,0x00000007,0x0004003b,0x00000391,0x00000392,0x00000007, + 0x00040039,0x00000007,0x000000d6,0x00000009,0x0003003e,0x000000d5,0x000000d6,0x0004003d, + 0x00000007,0x000000d7,0x000000d5,0x0004007c,0x00000011,0x000000d8,0x000000d7,0x000500ae, + 0x000000df,0x000000e0,0x000000d8,0x000000dd,0x0004009a,0x000000de,0x000000e1,0x000000e0, + 0x000300f7,0x000000e3,0x00000000,0x000400fa,0x000000e1,0x000000e2,0x000000e3,0x000200f8, + 0x000000e2,0x000100fd,0x000200f8,0x000000e3,0x0004003d,0x00000007,0x000000e6,0x000000d5, + 0x00050050,0x00000007,0x000000e7,0x00000037,0x00000037,0x000500c3,0x00000007,0x000000e8, + 0x000000e6,0x000000e7,0x0003003e,0x000000e5,0x000000e8,0x0004003d,0x00000007,0x000000ea, + 0x000000d5,0x00050050,0x00000007,0x000000eb,0x00000077,0x00000077,0x000500c7,0x00000007, + 0x000000ec,0x000000ea,0x000000eb,0x0003003e,0x000000e9,0x000000ec,0x00050041,0x00000017, + 0x000000ee,0x000000e9,0x00000032,0x0004003d,0x00000006,0x000000ef,0x000000ee,0x00050084, + 0x00000006,0x000000f0,0x00000028,0x000000ef,0x00050041,0x00000017,0x000000f1,0x000000e9, + 0x0000003d,0x0004003d,0x00000006,0x000000f2,0x000000f1,0x00050080,0x00000006,0x000000f3, + 0x000000f0,0x000000f2,0x0003003e,0x000000ed,0x000000f3,0x0004003d,0x000000f5,0x000000f8, + 0x000000f7,0x0004003d,0x00000007,0x000000f9,0x000000e5,0x0007005f,0x0000004a,0x000000fa, + 0x000000f8,0x000000f9,0x00000002,0x000000bb,0x0003003e,0x000000f4,0x000000fa,0x000300f7, + 0x000000fe,0x00000000,0x000400fa,0x000000fc,0x000000fd,0x0000010f,0x000200f8,0x000000fd, + 0x0004003d,0x0000004a,0x00000101,0x000000f4,0x0007004f,0x00000011,0x00000102,0x00000101, + 0x00000101,0x00000000,0x00000001,0x0003003e,0x00000100,0x00000102,0x00050039,0x00000011, + 0x00000103,0x00000015,0x00000100,0x0003003e,0x000000ff,0x00000103,0x0004003d,0x00000011, + 0x00000106,0x000000ff,0x0003003e,0x00000105,0x00000106,0x0004003d,0x00000006,0x00000108, + 0x000000ed,0x0003003e,0x00000107,0x00000108,0x00060039,0x0000000b,0x00000109,0x0000001b, + 0x00000105,0x00000107,0x0003003e,0x00000104,0x00000109,0x0004003d,0x0000004a,0x0000010c, + 0x000000f4,0x0007004f,0x00000011,0x0000010d,0x0000010c,0x0000010c,0x00000002,0x00000003, + 0x0003003e,0x0000010b,0x0000010d,0x00050039,0x00000011,0x0000010e,0x00000015,0x0000010b, + 0x0003003e,0x0000010a,0x0000010e,0x000200f9,0x000000fe,0x000200f8,0x0000010f,0x0004003d, + 0x0000004a,0x00000111,0x000000f4,0x0007004f,0x00000011,0x00000112,0x00000111,0x00000111, + 0x00000000,0x00000001,0x0003003e,0x00000110,0x00000112,0x00050039,0x00000011,0x00000113, + 0x00000015,0x00000110,0x0003003e,0x0000010a,0x00000113,0x0003003e,0x00000104,0x00000054, + 0x000200f9,0x000000fe,0x000200f8,0x000000fe,0x00050041,0x0000000c,0x00000115,0x0000010a, + 0x0000003d,0x0004003d,0x0000000b,0x00000116,0x00000115,0x000500c7,0x0000000b,0x00000117, + 0x00000116,0x0000003d,0x0003003e,0x00000114,0x00000117,0x0004003d,0x0000000b,0x00000119, + 0x00000114,0x00050041,0x00000017,0x0000011a,0x000000e9,0x00000119,0x0004003d,0x00000006, + 0x0000011b,0x0000011a,0x0004007c,0x0000000b,0x0000011c,0x0000011b,0x000500c7,0x0000000b, + 0x0000011d,0x0000011c,0x0000002a,0x000500c2,0x0000000b,0x0000011e,0x0000011d,0x0000003d, + 0x0003003e,0x00000118,0x0000011e,0x0003003e,0x00000120,0x00000121,0x000300f7,0x00000124, + 0x00000000,0x000400fa,0x00000122,0x00000123,0x0000012a,0x000200f8,0x00000123,0x00050041, + 0x0000000c,0x00000126,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x00000127,0x00000126, + 0x000500c7,0x0000000b,0x00000128,0x00000127,0x0000002a,0x000500aa,0x000000de,0x00000129, + 0x00000128,0x00000032,0x0003003e,0x00000125,0x00000129,0x000200f9,0x00000124,0x000200f8, + 0x0000012a,0x0003003e,0x00000125,0x00000121,0x000200f9,0x00000124,0x000200f8,0x00000124, + 0x000300f7,0x0000012d,0x00000000,0x000400fa,0x0000012b,0x0000012c,0x0000012d,0x000200f8, + 0x0000012c,0x00050041,0x0000000c,0x0000012e,0x0000010a,0x0000003d,0x0004003d,0x0000000b, + 0x0000012f,0x0000012e,0x000500c7,0x0000000b,0x00000130,0x0000012f,0x0000002a,0x000500aa, + 0x000000de,0x00000131,0x00000130,0x00000032,0x000200f9,0x0000012d,0x000200f8,0x0000012d, + 0x000700f5,0x000000de,0x00000132,0x0000012b,0x00000124,0x00000131,0x0000012c,0x000300f7, + 0x00000134,0x00000000,0x000400fa,0x00000132,0x00000133,0x0000014e,0x000200f8,0x00000133, + 0x0003003e,0x00000120,0x00000135,0x0004003d,0x00000011,0x00000139,0x0000010a,0x0008004f, + 0x0000001f,0x0000013a,0x00000139,0x00000139,0x00000001,0x00000001,0x00000001,0x0004003d, + 0x0000000b,0x00000140,0x00000118,0x00050084,0x0000000b,0x00000141,0x0000013f,0x00000140, + 0x00060050,0x0000001f,0x00000142,0x00000141,0x00000141,0x00000141,0x00050082,0x0000001f, + 0x00000143,0x0000013e,0x00000142,0x000500c2,0x0000001f,0x00000144,0x0000013a,0x00000143, + 0x0004007c,0x00000136,0x00000145,0x00000144,0x0003003e,0x00000138,0x00000145,0x0004003d, + 0x00000136,0x00000147,0x00000138,0x00060050,0x00000136,0x00000148,0x00000146,0x00000146, + 0x00000146,0x000500c7,0x00000136,0x00000149,0x00000147,0x00000148,0x0003003e,0x00000138, + 0x00000149,0x0004003d,0x00000136,0x0000014b,0x00000138,0x00060050,0x00000136,0x0000014c, + 0x0000014a,0x0000014a,0x0000014a,0x00050084,0x00000136,0x0000014d,0x0000014b,0x0000014c, + 0x0003003e,0x00000138,0x0000014d,0x000200f9,0x00000134,0x000200f8,0x0000014e,0x00050041, + 0x0000000c,0x00000150,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x00000151,0x00000150, + 0x000600cb,0x0000000b,0x00000153,0x00000151,0x00000152,0x0000008f,0x0004007c,0x00000006, + 0x00000154,0x00000153,0x0003003e,0x0000014f,0x00000154,0x00050041,0x0000000c,0x00000156, + 0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x00000157,0x00000156,0x0004007c,0x00000006, + 0x00000158,0x00000157,0x000600ca,0x00000006,0x00000159,0x00000158,0x0000007e,0x00000077, + 0x0003003e,0x00000155,0x00000159,0x00050041,0x0000000c,0x0000015b,0x0000010a,0x0000003d, + 0x0004003d,0x0000000b,0x0000015c,0x0000015b,0x000600cb,0x0000000b,0x0000015e,0x0000015c, + 0x0000015d,0x0000008f,0x0004007c,0x00000006,0x0000015f,0x0000015e,0x0003003e,0x0000015a, + 0x0000015f,0x00050041,0x0000000c,0x00000161,0x0000010a,0x0000003d,0x0004003d,0x0000000b, + 0x00000162,0x00000161,0x0004007c,0x00000006,0x00000163,0x00000162,0x000600ca,0x00000006, + 0x00000164,0x00000163,0x0000008a,0x00000077,0x0003003e,0x00000160,0x00000164,0x00050041, + 0x0000000c,0x00000166,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x00000167,0x00000166, + 0x000600cb,0x0000000b,0x00000168,0x00000167,0x000000b0,0x0000008f,0x0004007c,0x00000006, + 0x00000169,0x00000168,0x0003003e,0x00000165,0x00000169,0x00050041,0x0000000c,0x0000016b, + 0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x0000016c,0x0000016b,0x0004007c,0x00000006, + 0x0000016d,0x0000016c,0x000600ca,0x00000006,0x0000016e,0x0000016d,0x00000025,0x00000077, + 0x0003003e,0x0000016a,0x0000016e,0x0004003d,0x00000006,0x00000170,0x0000014f,0x0004003d, + 0x00000006,0x00000171,0x00000155,0x00050080,0x00000006,0x00000172,0x00000170,0x00000171, + 0x0003003e,0x0000016f,0x00000172,0x0004003d,0x00000006,0x00000174,0x0000015a,0x0004003d, + 0x00000006,0x00000175,0x00000160,0x00050080,0x00000006,0x00000176,0x00000174,0x00000175, + 0x0003003e,0x00000173,0x00000176,0x0004003d,0x00000006,0x00000178,0x00000165,0x0004003d, + 0x00000006,0x00000179,0x0000016a,0x00050080,0x00000006,0x0000017a,0x00000178,0x00000179, + 0x0003003e,0x00000177,0x0000017a,0x0004003d,0x00000006,0x0000017b,0x0000016f,0x0004007c, + 0x0000000b,0x0000017c,0x0000017b,0x000500ac,0x000000de,0x0000017e,0x0000017c,0x0000017d, + 0x000300f7,0x00000180,0x00000000,0x000400fa,0x0000017e,0x0000017f,0x000001f8,0x000200f8, + 0x0000017f,0x00050041,0x0000000c,0x00000182,0x0000010a,0x0000003d,0x0004003d,0x0000000b, + 0x00000183,0x00000182,0x000600cb,0x0000000b,0x00000184,0x00000183,0x0000007e,0x00000037, + 0x0004007c,0x00000006,0x00000185,0x00000184,0x00050041,0x0000000c,0x00000186,0x0000010a, + 0x0000003d,0x0004003d,0x0000000b,0x00000187,0x00000186,0x000600cb,0x0000000b,0x00000188, + 0x00000187,0x00000152,0x00000037,0x0004007c,0x00000006,0x00000189,0x00000188,0x000500c4, + 0x00000006,0x0000018a,0x00000189,0x00000037,0x000500c5,0x00000006,0x0000018b,0x00000185, + 0x0000018a,0x0003003e,0x00000181,0x0000018b,0x00050041,0x0000000c,0x0000018d,0x0000010a, + 0x0000003d,0x0004003d,0x0000000b,0x0000018e,0x0000018d,0x000600cb,0x0000000b,0x0000018f, + 0x0000018e,0x00000084,0x00000028,0x0004007c,0x00000006,0x00000190,0x0000018f,0x0003003e, + 0x0000018c,0x00000190,0x00050041,0x0000000c,0x00000192,0x0000010a,0x0000003d,0x0004003d, + 0x0000000b,0x00000193,0x00000192,0x000600cb,0x0000000b,0x00000194,0x00000193,0x0000008a, + 0x00000028,0x0004007c,0x00000006,0x00000195,0x00000194,0x0003003e,0x00000191,0x00000195, + 0x00050041,0x0000000c,0x00000197,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x00000198, + 0x00000197,0x000600cb,0x0000000b,0x00000199,0x00000198,0x000000ab,0x00000028,0x0004007c, + 0x00000006,0x0000019a,0x00000199,0x0003003e,0x00000196,0x0000019a,0x00050041,0x0000000c, + 0x0000019c,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x0000019d,0x0000019c,0x000600cb, + 0x0000000b,0x0000019e,0x0000019d,0x00000025,0x00000028,0x0004007c,0x00000006,0x0000019f, + 0x0000019e,0x0003003e,0x0000019b,0x0000019f,0x00050041,0x0000000c,0x000001a1,0x0000010a, + 0x0000003d,0x0004003d,0x0000000b,0x000001a2,0x000001a1,0x000600cb,0x0000000b,0x000001a3, + 0x000001a2,0x00000028,0x00000028,0x0004007c,0x00000006,0x000001a4,0x000001a3,0x0003003e, + 0x000001a0,0x000001a4,0x00050041,0x0000000c,0x000001a6,0x0000010a,0x0000003d,0x0004003d, + 0x0000000b,0x000001a7,0x000001a6,0x000600cb,0x0000000b,0x000001a8,0x000001a7,0x00000037, + 0x00000037,0x000500c4,0x0000000b,0x000001a9,0x000001a8,0x0000002f,0x00050041,0x0000000c, + 0x000001aa,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x000001ab,0x000001aa,0x000500c7, + 0x0000000b,0x000001ac,0x000001ab,0x0000003d,0x000500c5,0x0000000b,0x000001ad,0x000001a9, + 0x000001ac,0x0003003e,0x000001a5,0x000001ad,0x0004003d,0x0000000b,0x000001b5,0x000001a5, + 0x0003003e,0x000001b7,0x000001b4,0x00050041,0x00000017,0x000001b8,0x000001b7,0x000001b5, + 0x0004003d,0x00000006,0x000001b9,0x000001b8,0x0003003e,0x000001ae,0x000001b9,0x00050041, + 0x0000000c,0x000001bb,0x0000010a,0x00000032,0x0004003d,0x0000000b,0x000001bc,0x000001bb, + 0x0004003d,0x00000006,0x000001bd,0x000000ed,0x00050080,0x00000006,0x000001be,0x00000146, + 0x000001bd,0x000500c2,0x0000000b,0x000001bf,0x000001bc,0x000001be,0x000500c7,0x0000000b, + 0x000001c0,0x000001bf,0x0000002a,0x0004007c,0x00000006,0x000001c1,0x000001c0,0x0003003e, + 0x000001ba,0x000001c1,0x00050041,0x0000000c,0x000001c3,0x0000010a,0x00000032,0x0004003d, + 0x0000000b,0x000001c4,0x000001c3,0x0004003d,0x00000006,0x000001c5,0x000000ed,0x000500c2, + 0x0000000b,0x000001c6,0x000001c4,0x000001c5,0x000500c7,0x0000000b,0x000001c7,0x000001c6, + 0x0000003d,0x0004007c,0x00000006,0x000001c8,0x000001c7,0x0003003e,0x000001c2,0x000001c8, + 0x0004003d,0x00000006,0x000001ca,0x000001ba,0x0004003d,0x00000006,0x000001cb,0x000001c2, + 0x000500c5,0x00000006,0x000001cc,0x000001ca,0x000001cb,0x0003003e,0x000001c9,0x000001cc, + 0x0004003d,0x000000de,0x000001cd,0x00000125,0x000300f7,0x000001cf,0x00000000,0x000400fa, + 0x000001cd,0x000001ce,0x000001cf,0x000200f8,0x000001ce,0x0004003d,0x00000006,0x000001d0, + 0x000001c9,0x000500aa,0x000000de,0x000001d1,0x000001d0,0x00000037,0x0003003e,0x00000125, + 0x000001d1,0x000200f9,0x000001cf,0x000200f8,0x000001cf,0x0004003d,0x00000006,0x000001d2, + 0x000001c9,0x000500aa,0x000000de,0x000001d3,0x000001d2,0x000000bb,0x000300f7,0x000001d5, + 0x00000000,0x000400fa,0x000001d3,0x000001d4,0x000001e3,0x000200f8,0x000001d4,0x0004003d, + 0x00000006,0x000001d8,0x00000181,0x0004007c,0x0000000b,0x000001d9,0x000001d8,0x0004003d, + 0x00000006,0x000001da,0x0000018c,0x0004007c,0x0000000b,0x000001db,0x000001da,0x0004003d, + 0x00000006,0x000001dc,0x00000191,0x0004007c,0x0000000b,0x000001dd,0x000001dc,0x00060050, + 0x0000001f,0x000001de,0x000001d9,0x000001db,0x000001dd,0x0003003e,0x000001d7,0x000001de, + 0x0004003d,0x0000001f,0x000001e0,0x000001d7,0x00060050,0x0000001f,0x000001e1,0x000001df, + 0x000001df,0x000001df,0x00050084,0x0000001f,0x000001e2,0x000001e0,0x000001e1,0x0003003e, + 0x000001d7,0x000001e2,0x000200f9,0x000001d5,0x000200f8,0x000001e3,0x0004003d,0x00000006, + 0x000001e5,0x000001c9,0x00050082,0x00000006,0x000001e6,0x00000037,0x000001e5,0x0003003e, + 0x000001e4,0x000001e6,0x0004003d,0x00000006,0x000001e8,0x00000196,0x0004003d,0x00000006, + 0x000001e9,0x0000019b,0x0004003d,0x00000006,0x000001ea,0x000001a0,0x00060050,0x00000136, + 0x000001eb,0x000001e8,0x000001e9,0x000001ea,0x00060050,0x00000136,0x000001ec,0x0000014a, + 0x0000014a,0x0000014a,0x00050084,0x00000136,0x000001ed,0x000001eb,0x000001ec,0x0004003d, + 0x00000006,0x000001ee,0x000001e4,0x0004003d,0x00000006,0x000001ef,0x000001ae,0x00050084, + 0x00000006,0x000001f0,0x000001ee,0x000001ef,0x00060050,0x00000136,0x000001f1,0x000001f0, + 0x000001f0,0x000001f0,0x00050080,0x00000136,0x000001f2,0x000001ed,0x000001f1,0x0003003e, + 0x000001e7,0x000001f2,0x0004003d,0x00000136,0x000001f3,0x000001e7,0x0008000c,0x00000136, + 0x000001f6,0x00000001,0x0000002d,0x000001f3,0x000001f4,0x000001f5,0x0004007c,0x0000001f, + 0x000001f7,0x000001f6,0x0003003e,0x000001d7,0x000001f7,0x000200f9,0x000001d5,0x000200f8, + 0x000001d5,0x000200f9,0x00000180,0x000200f8,0x000001f8,0x0004003d,0x00000006,0x000001f9, + 0x00000173,0x0004007c,0x0000000b,0x000001fa,0x000001f9,0x000500ac,0x000000de,0x000001fb, + 0x000001fa,0x0000017d,0x000300f7,0x000001fd,0x00000000,0x000400fa,0x000001fb,0x000001fc, + 0x00000285,0x000200f8,0x000001fc,0x00050041,0x0000000c,0x000001ff,0x0000010a,0x0000003d, + 0x0004003d,0x0000000b,0x00000200,0x000001ff,0x000600cb,0x0000000b,0x00000201,0x00000200, + 0x00000152,0x00000028,0x0004007c,0x00000006,0x00000202,0x00000201,0x0003003e,0x000001fe, + 0x00000202,0x00050041,0x0000000c,0x00000204,0x0000010a,0x0000003d,0x0004003d,0x0000000b, + 0x00000205,0x00000204,0x000600cb,0x0000000b,0x00000206,0x00000205,0x0000007e,0x00000077, + 0x0004007c,0x00000006,0x00000207,0x00000206,0x000500c4,0x00000006,0x00000208,0x00000207, + 0x0000002f,0x00050041,0x0000000c,0x00000209,0x0000010a,0x0000003d,0x0004003d,0x0000000b, + 0x0000020a,0x00000209,0x000500c2,0x0000000b,0x0000020b,0x0000020a,0x0000013c,0x000500c7, + 0x0000000b,0x0000020c,0x0000020b,0x0000003d,0x0004007c,0x00000006,0x0000020d,0x0000020c, + 0x000500c5,0x00000006,0x0000020e,0x00000208,0x0000020d,0x0003003e,0x00000203,0x0000020e, + 0x00050041,0x0000000c,0x00000210,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x00000211, + 0x00000210,0x000600cb,0x0000000b,0x00000212,0x00000211,0x00000146,0x00000077,0x0004007c, + 0x00000006,0x00000213,0x00000212,0x00050041,0x0000000c,0x00000214,0x0000010a,0x0000003d, + 0x0004003d,0x0000000b,0x00000215,0x00000214,0x000500c2,0x0000000b,0x00000216,0x00000215, + 0x00000050,0x000500c7,0x0000000b,0x00000217,0x00000216,0x0000004f,0x0004007c,0x00000006, + 0x00000218,0x00000217,0x000500c5,0x00000006,0x00000219,0x00000213,0x00000218,0x0003003e, + 0x0000020f,0x00000219,0x00050041,0x0000000c,0x0000021b,0x0000010a,0x0000003d,0x0004003d, + 0x0000000b,0x0000021c,0x0000021b,0x000600cb,0x0000000b,0x0000021d,0x0000021c,0x000000b0, + 0x00000028,0x0004007c,0x00000006,0x0000021e,0x0000021d,0x0003003e,0x0000021a,0x0000021e, + 0x00050041,0x0000000c,0x00000220,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x00000221, + 0x00000220,0x000600cb,0x0000000b,0x00000222,0x00000221,0x000000ad,0x00000028,0x0004007c, + 0x00000006,0x00000223,0x00000222,0x0003003e,0x0000021f,0x00000223,0x00050041,0x0000000c, + 0x00000225,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x00000226,0x00000225,0x000600cb, + 0x0000000b,0x00000227,0x00000226,0x00000077,0x00000028,0x0004007c,0x00000006,0x00000228, + 0x00000227,0x0003003e,0x00000224,0x00000228,0x00050041,0x0000000c,0x0000022a,0x0000010a, + 0x0000003d,0x0004003d,0x0000000b,0x0000022b,0x0000022a,0x000500c7,0x0000000b,0x0000022c, + 0x0000022b,0x0000013f,0x0003003e,0x00000229,0x0000022c,0x00050041,0x0000000c,0x0000022e, + 0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x0000022f,0x0000022e,0x000500c7,0x0000000b, + 0x00000230,0x0000022f,0x0000003d,0x0003003e,0x0000022d,0x00000230,0x0004003d,0x0000000b, + 0x00000232,0x00000229,0x0004003d,0x0000000b,0x00000233,0x0000022d,0x00050084,0x0000000b, + 0x00000234,0x0000002a,0x00000233,0x00050080,0x0000000b,0x00000235,0x00000232,0x00000234, + 0x0003003e,0x00000231,0x00000235,0x0004003d,0x00000006,0x00000236,0x000001fe,0x00050084, + 0x00000006,0x00000238,0x00000236,0x00000237,0x0004003d,0x00000006,0x00000239,0x00000203, + 0x00050084,0x00000006,0x0000023b,0x00000239,0x0000023a,0x00050080,0x00000006,0x0000023c, + 0x00000238,0x0000023b,0x0004003d,0x00000006,0x0000023d,0x0000020f,0x00050080,0x00000006, + 0x0000023e,0x0000023c,0x0000023d,0x0004003d,0x00000006,0x0000023f,0x0000021a,0x00050084, + 0x00000006,0x00000240,0x0000023f,0x00000237,0x0004003d,0x00000006,0x00000241,0x0000021f, + 0x00050084,0x00000006,0x00000242,0x00000241,0x0000023a,0x00050080,0x00000006,0x00000243, + 0x00000240,0x00000242,0x0004003d,0x00000006,0x00000244,0x00000224,0x00050080,0x00000006, + 0x00000245,0x00000243,0x00000244,0x000500af,0x000000de,0x00000246,0x0000023e,0x00000245, + 0x000600a9,0x0000000b,0x00000247,0x00000246,0x0000003d,0x00000032,0x0004003d,0x0000000b, + 0x00000248,0x00000231,0x00050080,0x0000000b,0x00000249,0x00000248,0x00000247,0x0003003e, + 0x00000231,0x00000249,0x0004003d,0x0000000b,0x0000024b,0x00000231,0x0003003e,0x0000024c, + 0x000001b4,0x00050041,0x00000017,0x0000024d,0x0000024c,0x0000024b,0x0004003d,0x00000006, + 0x0000024e,0x0000024d,0x0003003e,0x0000024a,0x0000024e,0x00050041,0x0000000c,0x00000250, + 0x0000010a,0x00000032,0x0004003d,0x0000000b,0x00000251,0x00000250,0x0004003d,0x00000006, + 0x00000252,0x000000ed,0x00050080,0x00000006,0x00000253,0x00000146,0x00000252,0x000500c2, + 0x0000000b,0x00000254,0x00000251,0x00000253,0x000500c7,0x0000000b,0x00000255,0x00000254, + 0x0000002a,0x0004007c,0x00000006,0x00000256,0x00000255,0x0003003e,0x0000024f,0x00000256, + 0x00050041,0x0000000c,0x00000258,0x0000010a,0x00000032,0x0004003d,0x0000000b,0x00000259, + 0x00000258,0x0004003d,0x00000006,0x0000025a,0x000000ed,0x000500c2,0x0000000b,0x0000025b, + 0x00000259,0x0000025a,0x000500c7,0x0000000b,0x0000025c,0x0000025b,0x0000003d,0x0004007c, + 0x00000006,0x0000025d,0x0000025c,0x0003003e,0x00000257,0x0000025d,0x0004003d,0x000000de, + 0x0000025e,0x00000125,0x000300f7,0x00000260,0x00000000,0x000400fa,0x0000025e,0x0000025f, + 0x00000260,0x000200f8,0x0000025f,0x0004003d,0x00000006,0x00000261,0x0000024f,0x0004003d, + 0x00000006,0x00000262,0x00000257,0x00050080,0x00000006,0x00000263,0x00000261,0x00000262, + 0x000500aa,0x000000de,0x00000264,0x00000263,0x00000037,0x0003003e,0x00000125,0x00000264, + 0x000200f9,0x00000260,0x000200f8,0x00000260,0x0004003d,0x00000006,0x00000266,0x0000024f, + 0x000500ab,0x000000de,0x00000267,0x00000266,0x000000bb,0x000300f7,0x0000026a,0x00000000, + 0x000400fa,0x00000267,0x00000269,0x0000026f,0x000200f8,0x00000269,0x0004003d,0x00000006, + 0x0000026b,0x0000021a,0x0004003d,0x00000006,0x0000026c,0x0000021f,0x0004003d,0x00000006, + 0x0000026d,0x00000224,0x00060050,0x00000136,0x0000026e,0x0000026b,0x0000026c,0x0000026d, + 0x0003003e,0x00000268,0x0000026e,0x000200f9,0x0000026a,0x000200f8,0x0000026f,0x0004003d, + 0x00000006,0x00000270,0x000001fe,0x0004003d,0x00000006,0x00000271,0x00000203,0x0004003d, + 0x00000006,0x00000272,0x0000020f,0x00060050,0x00000136,0x00000273,0x00000270,0x00000271, + 0x00000272,0x0003003e,0x00000268,0x00000273,0x000200f9,0x0000026a,0x000200f8,0x0000026a, + 0x0004003d,0x00000136,0x00000274,0x00000268,0x0003003e,0x00000265,0x00000274,0x0004003d, + 0x00000136,0x00000275,0x00000265,0x00060050,0x00000136,0x00000276,0x0000014a,0x0000014a, + 0x0000014a,0x00050084,0x00000136,0x00000277,0x00000275,0x00000276,0x0003003e,0x00000265, + 0x00000277,0x0004003d,0x00000006,0x00000279,0x00000257,0x00050084,0x00000006,0x0000027a, + 0x00000037,0x00000279,0x00050082,0x00000006,0x0000027b,0x0000002f,0x0000027a,0x0003003e, + 0x00000278,0x0000027b,0x0004003d,0x00000006,0x0000027c,0x00000278,0x0004003d,0x00000006, + 0x0000027d,0x0000024a,0x00050084,0x00000006,0x0000027e,0x0000027c,0x0000027d,0x0004003d, + 0x00000136,0x0000027f,0x00000265,0x00060050,0x00000136,0x00000280,0x0000027e,0x0000027e, + 0x0000027e,0x00050080,0x00000136,0x00000281,0x0000027f,0x00000280,0x0003003e,0x00000265, + 0x00000281,0x0004003d,0x00000136,0x00000282,0x00000265,0x0008000c,0x00000136,0x00000283, + 0x00000001,0x0000002d,0x00000282,0x000001f4,0x000001f5,0x0004007c,0x0000001f,0x00000284, + 0x00000283,0x0003003e,0x000001d7,0x00000284,0x000200f9,0x000001fd,0x000200f8,0x00000285, + 0x0004003d,0x00000006,0x00000286,0x00000177,0x0004007c,0x0000000b,0x00000287,0x00000286, + 0x000500ac,0x000000de,0x00000288,0x00000287,0x0000017d,0x000300f7,0x0000028a,0x00000000, + 0x000400fa,0x00000288,0x00000289,0x0000032a,0x000200f8,0x00000289,0x00050041,0x0000000c, + 0x0000028c,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x0000028d,0x0000028c,0x000600cb, + 0x0000000b,0x0000028f,0x0000028d,0x0000028e,0x000000a9,0x0004007c,0x00000006,0x00000290, + 0x0000028f,0x0003003e,0x0000028b,0x00000290,0x00050041,0x0000000c,0x00000292,0x0000010a, + 0x0000003d,0x0004003d,0x0000000b,0x00000293,0x00000292,0x000600cb,0x0000000b,0x00000294, + 0x00000293,0x0000014a,0x000000a9,0x0004007c,0x00000006,0x00000295,0x00000294,0x00050041, + 0x0000000c,0x00000296,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x00000297,0x00000296, + 0x000500c2,0x0000000b,0x00000299,0x00000297,0x00000298,0x0004007c,0x00000006,0x0000029a, + 0x00000299,0x000500c7,0x00000006,0x0000029b,0x0000029a,0x000001b3,0x000500c5,0x00000006, + 0x0000029c,0x00000295,0x0000029b,0x0003003e,0x00000291,0x0000029c,0x00050041,0x0000000c, + 0x0000029e,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x0000029f,0x0000029e,0x000600cb, + 0x0000000b,0x000002a0,0x0000029f,0x000000ad,0x00000077,0x0004007c,0x00000006,0x000002a1, + 0x000002a0,0x00050041,0x0000000c,0x000002a2,0x0000010a,0x0000003d,0x0004003d,0x0000000b, + 0x000002a3,0x000002a2,0x000600cb,0x0000000b,0x000002a4,0x000002a3,0x000000b0,0x00000037, + 0x0004007c,0x00000006,0x000002a5,0x000002a4,0x000500c4,0x00000006,0x000002a6,0x000002a5, + 0x00000077,0x000500c5,0x00000006,0x000002a7,0x000002a1,0x000002a6,0x00050041,0x0000000c, + 0x000002a8,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x000002a9,0x000002a8,0x000500c2, + 0x0000000b,0x000002aa,0x000002a9,0x000000b0,0x0004007c,0x00000006,0x000002ab,0x000002aa, + 0x000500c7,0x00000006,0x000002ac,0x000002ab,0x000001b1,0x000500c5,0x00000006,0x000002ad, + 0x000002a7,0x000002ac,0x0003003e,0x0000029d,0x000002ad,0x00050041,0x0000000c,0x000002af, + 0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x000002b0,0x000002af,0x000500c7,0x0000000b, + 0x000002b1,0x000002b0,0x0000003d,0x0004007c,0x00000006,0x000002b2,0x000002b1,0x00050041, + 0x0000000c,0x000002b3,0x0000010a,0x0000003d,0x0004003d,0x0000000b,0x000002b4,0x000002b3, + 0x000600cb,0x0000000b,0x000002b5,0x000002b4,0x00000037,0x0000008f,0x0004007c,0x00000006, + 0x000002b6,0x000002b5,0x000500c4,0x00000006,0x000002b7,0x000002b6,0x0000002f,0x000500c5, + 0x00000006,0x000002b8,0x000002b2,0x000002b7,0x0003003e,0x000002ae,0x000002b8,0x00050041, + 0x0000000c,0x000002ba,0x0000010a,0x00000032,0x0004003d,0x0000000b,0x000002bb,0x000002ba, + 0x000600cb,0x0000000b,0x000002bd,0x000002bb,0x000002bc,0x000000a9,0x0004007c,0x00000006, + 0x000002be,0x000002bd,0x0003003e,0x000002b9,0x000002be,0x00050041,0x0000000c,0x000002c0, + 0x0000010a,0x00000032,0x0004003d,0x0000000b,0x000002c1,0x000002c0,0x000600cb,0x0000000b, + 0x000002c2,0x000002c1,0x0000028e,0x000000ad,0x0004007c,0x00000006,0x000002c3,0x000002c2, + 0x0003003e,0x000002bf,0x000002c3,0x00050041,0x0000000c,0x000002c5,0x0000010a,0x00000032, + 0x0004003d,0x0000000b,0x000002c6,0x000002c5,0x000600cb,0x0000000b,0x000002c7,0x000002c6, + 0x000000a9,0x000000ad,0x0004007c,0x00000006,0x000002c8,0x000002c7,0x0003003e,0x000002c4, + 0x000002c8,0x00050041,0x0000000c,0x000002ca,0x0000010a,0x00000032,0x0004003d,0x0000000b, + 0x000002cb,0x000002ca,0x000600cb,0x0000000b,0x000002cc,0x000002cb,0x0000015d,0x000000a9, + 0x0004007c,0x00000006,0x000002cd,0x000002cc,0x0003003e,0x000002c9,0x000002cd,0x00050041, + 0x0000000c,0x000002cf,0x0000010a,0x00000032,0x0004003d,0x0000000b,0x000002d0,0x000002cf, + 0x000600cb,0x0000000b,0x000002d1,0x000002d0,0x000000bb,0x000000a9,0x0004007c,0x00000006, + 0x000002d2,0x000002d1,0x0003003e,0x000002ce,0x000002d2,0x0004003d,0x00000006,0x000002d3, + 0x0000028b,0x000500c4,0x00000006,0x000002d4,0x000002d3,0x00000037,0x0004003d,0x00000006, + 0x000002d5,0x0000028b,0x000500c3,0x00000006,0x000002d6,0x000002d5,0x00000028,0x000500c5, + 0x00000006,0x000002d7,0x000002d4,0x000002d6,0x0003003e,0x0000028b,0x000002d7,0x0004003d, + 0x00000006,0x000002d8,0x000002ae,0x000500c4,0x00000006,0x000002d9,0x000002d8,0x00000037, + 0x0004003d,0x00000006,0x000002da,0x000002ae,0x000500c3,0x00000006,0x000002db,0x000002da, + 0x00000028,0x000500c5,0x00000006,0x000002dc,0x000002d9,0x000002db,0x0003003e,0x000002ae, + 0x000002dc,0x0004003d,0x00000006,0x000002dd,0x000002b9,0x000500c4,0x00000006,0x000002de, + 0x000002dd,0x00000037,0x0004003d,0x00000006,0x000002df,0x000002b9,0x000500c3,0x00000006, + 0x000002e0,0x000002df,0x00000028,0x000500c5,0x00000006,0x000002e1,0x000002de,0x000002e0, + 0x0003003e,0x000002b9,0x000002e1,0x0004003d,0x00000006,0x000002e2,0x00000291,0x000500c4, + 0x00000006,0x000002e3,0x000002e2,0x0000002f,0x0004003d,0x00000006,0x000002e4,0x00000291, + 0x000500c3,0x00000006,0x000002e5,0x000002e4,0x000000a9,0x000500c5,0x00000006,0x000002e6, + 0x000002e3,0x000002e5,0x0003003e,0x00000291,0x000002e6,0x0004003d,0x00000006,0x000002e7, + 0x000002bf,0x000500c4,0x00000006,0x000002e8,0x000002e7,0x0000002f,0x0004003d,0x00000006, + 0x000002e9,0x000002bf,0x000500c3,0x00000006,0x000002ea,0x000002e9,0x000000a9,0x000500c5, + 0x00000006,0x000002eb,0x000002e8,0x000002ea,0x0003003e,0x000002bf,0x000002eb,0x0004003d, + 0x00000006,0x000002ec,0x000002c4,0x000500c4,0x00000006,0x000002ed,0x000002ec,0x0000002f, + 0x0004003d,0x00000006,0x000002ee,0x000002c4,0x000500c3,0x00000006,0x000002ef,0x000002ee, + 0x000000a9,0x000500c5,0x00000006,0x000002f0,0x000002ed,0x000002ef,0x0003003e,0x000002c4, + 0x000002f0,0x0004003d,0x00000006,0x000002f1,0x0000029d,0x000500c4,0x00000006,0x000002f2, + 0x000002f1,0x00000037,0x0004003d,0x00000006,0x000002f3,0x0000029d,0x000500c3,0x00000006, + 0x000002f4,0x000002f3,0x00000028,0x000500c5,0x00000006,0x000002f5,0x000002f2,0x000002f4, + 0x0003003e,0x0000029d,0x000002f5,0x0004003d,0x00000006,0x000002f6,0x000002c9,0x000500c4, + 0x00000006,0x000002f7,0x000002f6,0x00000037,0x0004003d,0x00000006,0x000002f8,0x000002c9, + 0x000500c3,0x00000006,0x000002f9,0x000002f8,0x00000028,0x000500c5,0x00000006,0x000002fa, + 0x000002f7,0x000002f9,0x0003003e,0x000002c9,0x000002fa,0x0004003d,0x00000006,0x000002fb, + 0x000002ce,0x000500c4,0x00000006,0x000002fc,0x000002fb,0x00000037,0x0004003d,0x00000006, + 0x000002fd,0x000002ce,0x000500c3,0x00000006,0x000002fe,0x000002fd,0x00000028,0x000500c5, + 0x00000006,0x000002ff,0x000002fc,0x000002fe,0x0003003e,0x000002ce,0x000002ff,0x0004003d, + 0x00000006,0x00000301,0x0000028b,0x0004003d,0x00000006,0x00000302,0x00000291,0x0004003d, + 0x00000006,0x00000303,0x0000029d,0x00060050,0x00000136,0x00000304,0x00000301,0x00000302, + 0x00000303,0x0003003e,0x00000300,0x00000304,0x0004003d,0x00000006,0x00000306,0x000002ae, + 0x0004003d,0x00000006,0x00000307,0x000002bf,0x0004003d,0x00000006,0x00000308,0x000002c9, + 0x00060050,0x00000136,0x00000309,0x00000306,0x00000307,0x00000308,0x0004003d,0x00000136, + 0x0000030a,0x00000300,0x00050082,0x00000136,0x0000030b,0x00000309,0x0000030a,0x0003003e, + 0x00000305,0x0000030b,0x0004003d,0x00000006,0x0000030d,0x000002b9,0x0004003d,0x00000006, + 0x0000030e,0x000002c4,0x0004003d,0x00000006,0x0000030f,0x000002ce,0x00060050,0x00000136, + 0x00000310,0x0000030d,0x0000030e,0x0000030f,0x0004003d,0x00000136,0x00000311,0x00000300, + 0x00050082,0x00000136,0x00000312,0x00000310,0x00000311,0x0003003e,0x0000030c,0x00000312, + 0x00050041,0x00000017,0x00000313,0x000000e9,0x00000032,0x0004003d,0x00000006,0x00000314, + 0x00000313,0x0004003d,0x00000136,0x00000315,0x00000305,0x00060050,0x00000136,0x00000316, + 0x00000314,0x00000314,0x00000314,0x00050084,0x00000136,0x00000317,0x00000315,0x00000316, + 0x0003003e,0x00000305,0x00000317,0x00050041,0x00000017,0x00000318,0x000000e9,0x0000003d, + 0x0004003d,0x00000006,0x00000319,0x00000318,0x0004003d,0x00000136,0x0000031a,0x0000030c, + 0x00060050,0x00000136,0x0000031b,0x00000319,0x00000319,0x00000319,0x00050084,0x00000136, + 0x0000031c,0x0000031a,0x0000031b,0x0003003e,0x0000030c,0x0000031c,0x0004003d,0x00000136, + 0x0000031d,0x00000300,0x0004003d,0x00000136,0x0000031e,0x00000305,0x0004003d,0x00000136, + 0x0000031f,0x0000030c,0x00050080,0x00000136,0x00000320,0x0000031e,0x0000031f,0x00060050, + 0x00000136,0x00000321,0x00000037,0x00000037,0x00000037,0x00050080,0x00000136,0x00000322, + 0x00000320,0x00000321,0x00060050,0x00000136,0x00000323,0x00000037,0x00000037,0x00000037, + 0x000500c3,0x00000136,0x00000324,0x00000322,0x00000323,0x00050080,0x00000136,0x00000325, + 0x0000031d,0x00000324,0x0003003e,0x00000300,0x00000325,0x0004003d,0x00000136,0x00000326, + 0x00000300,0x0008000c,0x00000136,0x00000327,0x00000001,0x0000002d,0x00000326,0x000001f4, + 0x000001f5,0x0003003e,0x00000300,0x00000327,0x0004003d,0x00000136,0x00000328,0x00000300, + 0x0004007c,0x0000001f,0x00000329,0x00000328,0x0003003e,0x000001d7,0x00000329,0x0003003e, + 0x00000125,0x00000121,0x000200f9,0x0000028a,0x000200f8,0x0000032a,0x0003003e,0x00000120, + 0x00000135,0x0004003d,0x00000006,0x0000032b,0x0000014f,0x0004003d,0x00000006,0x0000032c, + 0x0000015a,0x0004003d,0x00000006,0x0000032d,0x00000165,0x00060050,0x00000136,0x0000032e, + 0x0000032b,0x0000032c,0x0000032d,0x0004003d,0x0000000b,0x0000032f,0x00000118,0x0004007c, + 0x00000006,0x00000330,0x0000032f,0x0004003d,0x00000006,0x00000331,0x00000155,0x0004003d, + 0x00000006,0x00000332,0x00000160,0x0004003d,0x00000006,0x00000333,0x0000016a,0x00060050, + 0x00000136,0x00000334,0x00000331,0x00000332,0x00000333,0x00060050,0x00000136,0x00000335, + 0x00000330,0x00000330,0x00000330,0x00050084,0x00000136,0x00000336,0x00000335,0x00000334, + 0x00050080,0x00000136,0x00000337,0x0000032e,0x00000336,0x0003003e,0x00000138,0x00000337, + 0x0004003d,0x00000136,0x00000338,0x00000138,0x00060050,0x00000136,0x00000339,0x00000077, + 0x00000077,0x00000077,0x000500c4,0x00000136,0x0000033a,0x00000338,0x00000339,0x0004003d, + 0x00000136,0x0000033b,0x00000138,0x00060050,0x00000136,0x0000033c,0x00000037,0x00000037, + 0x00000037,0x000500c3,0x00000136,0x0000033d,0x0000033b,0x0000033c,0x000500c5,0x00000136, + 0x0000033e,0x0000033a,0x0000033d,0x0003003e,0x00000138,0x0000033e,0x000200f9,0x0000028a, + 0x000200f8,0x0000028a,0x000200f9,0x000001fd,0x000200f8,0x000001fd,0x000200f9,0x00000180, + 0x000200f8,0x00000180,0x000200f9,0x00000134,0x000200f8,0x00000134,0x0004003d,0x000000de, + 0x0000033f,0x00000120,0x000300f7,0x00000341,0x00000000,0x000400fa,0x0000033f,0x00000340, + 0x00000341,0x000200f8,0x00000340,0x00050041,0x0000000c,0x00000343,0x0000010a,0x0000003d, + 0x0004003d,0x0000000b,0x00000344,0x00000343,0x0004003d,0x0000000b,0x00000345,0x00000118, + 0x000500ab,0x000000de,0x00000346,0x00000345,0x00000032,0x000600a9,0x00000006,0x00000347, + 0x00000346,0x0000002f,0x000000bb,0x00050084,0x00000006,0x00000348,0x00000077,0x00000347, + 0x00050082,0x00000006,0x00000349,0x0000008f,0x00000348,0x000600cb,0x0000000b,0x0000034a, + 0x00000344,0x00000349,0x00000077,0x0003003e,0x00000342,0x0000034a,0x00050041,0x0000000c, + 0x0000034c,0x0000010a,0x00000032,0x0004003d,0x0000000b,0x0000034d,0x0000034c,0x0004003d, + 0x00000006,0x0000034e,0x000000ed,0x00050080,0x00000006,0x0000034f,0x00000146,0x0000034e, + 0x000500c2,0x0000000b,0x00000350,0x0000034d,0x0000034f,0x000500c7,0x0000000b,0x00000351, + 0x00000350,0x0000002a,0x0004007c,0x00000006,0x00000352,0x00000351,0x0003003e,0x0000034b, + 0x00000352,0x00050041,0x0000000c,0x00000354,0x0000010a,0x00000032,0x0004003d,0x0000000b, + 0x00000355,0x00000354,0x0004003d,0x00000006,0x00000356,0x000000ed,0x000500c2,0x0000000b, + 0x00000357,0x00000355,0x00000356,0x000500c7,0x0000000b,0x00000358,0x00000357,0x0000003d, + 0x0004007c,0x00000006,0x00000359,0x00000358,0x0003003e,0x00000353,0x00000359,0x0004003d, + 0x00000006,0x0000035b,0x0000034b,0x00050082,0x00000006,0x0000035c,0x0000002f,0x0000035b, + 0x0003003e,0x0000035a,0x0000035c,0x0004003d,0x000000de,0x0000035d,0x00000125,0x000300f7, + 0x0000035f,0x00000000,0x000400fa,0x0000035d,0x0000035e,0x0000035f,0x000200f8,0x0000035e, + 0x0004003d,0x00000006,0x00000360,0x00000353,0x0004003d,0x00000006,0x00000361,0x0000035a, + 0x00050084,0x00000006,0x00000362,0x00000361,0x00000360,0x0003003e,0x0000035a,0x00000362, + 0x0004003d,0x00000006,0x00000363,0x0000034b,0x0004003d,0x00000006,0x00000364,0x00000353, + 0x00050080,0x00000006,0x00000365,0x00000363,0x00000364,0x000500aa,0x000000de,0x00000366, + 0x00000365,0x00000037,0x0003003e,0x00000125,0x00000366,0x000200f9,0x0000035f,0x000200f8, + 0x0000035f,0x0004003d,0x0000000b,0x0000037a,0x00000342,0x0004003d,0x00000006,0x0000037b, + 0x00000353,0x0003003e,0x0000037d,0x00000379,0x00060041,0x00000017,0x0000037e,0x0000037d, + 0x0000037a,0x0000037b,0x0004003d,0x00000006,0x0000037f,0x0000037e,0x0004003d,0x00000006, + 0x00000380,0x0000035a,0x00050084,0x00000006,0x00000381,0x0000037f,0x00000380,0x0003003e, + 0x00000367,0x00000381,0x0004003d,0x00000136,0x00000382,0x00000138,0x0004003d,0x00000006, + 0x00000383,0x00000367,0x00060050,0x00000136,0x00000384,0x00000383,0x00000383,0x00000383, + 0x00050080,0x00000136,0x00000385,0x00000382,0x00000384,0x0008000c,0x00000136,0x00000386, + 0x00000001,0x0000002d,0x00000385,0x000001f4,0x000001f5,0x0003003e,0x00000138,0x00000386, + 0x0004003d,0x00000136,0x00000387,0x00000138,0x0004007c,0x0000001f,0x00000388,0x00000387, + 0x0003003e,0x000001d7,0x00000388,0x000200f9,0x00000341,0x000200f8,0x00000341,0x0004003d, + 0x000000de,0x0000038a,0x00000125,0x000500a7,0x000000de,0x0000038b,0x00000389,0x0000038a, + 0x000300f7,0x0000038d,0x00000000,0x000400fa,0x0000038b,0x0000038c,0x0000038d,0x000200f8, + 0x0000038c,0x0003003e,0x000001d7,0x0000038e,0x0003003e,0x00000104,0x00000032,0x000200f9, + 0x0000038d,0x000200f8,0x0000038d,0x0004003d,0x0000001f,0x00000393,0x000001d7,0x00040070, + 0x00000394,0x00000395,0x00000393,0x0004003d,0x0000000b,0x00000396,0x00000104,0x00040070, + 0x0000038f,0x00000397,0x00000396,0x00050051,0x0000038f,0x00000398,0x00000395,0x00000000, + 0x00050051,0x0000038f,0x00000399,0x00000395,0x00000001,0x00050051,0x0000038f,0x0000039a, + 0x00000395,0x00000002,0x00070050,0x00000390,0x0000039b,0x00000398,0x00000399,0x0000039a, + 0x00000397,0x00070050,0x00000390,0x0000039d,0x0000039c,0x0000039c,0x0000039c,0x0000039c, + 0x00050088,0x00000390,0x0000039e,0x0000039b,0x0000039d,0x0003003e,0x00000392,0x0000039e, + 0x0004003d,0x0000039f,0x000003a2,0x000003a1,0x0004003d,0x00000007,0x000003a3,0x000000d5, + 0x0004003d,0x00000390,0x000003a4,0x00000392,0x00040063,0x000003a2,0x000003a3,0x000003a4, + 0x000100fd,0x00010038,0x00050036,0x00000007,0x00000009,0x00000000,0x00000008,0x000200f8, + 0x0000000a,0x0004003b,0x0000001d,0x0000001e,0x00000007,0x0004003d,0x0000001f,0x00000022, + 0x00000021,0x0007004f,0x00000011,0x00000023,0x00000022,0x00000022,0x00000000,0x00000001, + 0x0004007c,0x00000007,0x00000024,0x00000023,0x00050050,0x00000007,0x00000026,0x00000025, + 0x00000025,0x00050084,0x00000007,0x00000027,0x00000024,0x00000026,0x0003003e,0x0000001e, + 0x00000027,0x00050041,0x0000002b,0x0000002c,0x00000029,0x0000002a,0x0004003d,0x0000000b, + 0x0000002d,0x0000002c,0x0004007c,0x00000006,0x0000002e,0x0000002d,0x000500c7,0x00000006, + 0x00000030,0x0000002e,0x0000002f,0x00050084,0x00000006,0x00000031,0x00000028,0x00000030, + 0x00050041,0x00000017,0x00000033,0x0000001e,0x00000032,0x0004003d,0x00000006,0x00000034, + 0x00000033,0x00050080,0x00000006,0x00000035,0x00000034,0x00000031,0x00050041,0x00000017, + 0x00000036,0x0000001e,0x00000032,0x0003003e,0x00000036,0x00000035,0x00050041,0x0000002b, + 0x00000038,0x00000029,0x0000002a,0x0004003d,0x0000000b,0x00000039,0x00000038,0x0004007c, + 0x00000006,0x0000003a,0x00000039,0x000500c7,0x00000006,0x0000003b,0x0000003a,0x00000037, + 0x00050084,0x00000006,0x0000003c,0x00000037,0x0000003b,0x00050041,0x00000017,0x0000003e, + 0x0000001e,0x0000003d,0x0004003d,0x00000006,0x0000003f,0x0000003e,0x00050080,0x00000006, + 0x00000040,0x0000003f,0x0000003c,0x00050041,0x00000017,0x00000041,0x0000001e,0x0000003d, + 0x0003003e,0x00000041,0x00000040,0x0004003d,0x0000001f,0x00000042,0x00000029,0x0007004f, + 0x00000011,0x00000043,0x00000042,0x00000042,0x00000000,0x00000001,0x0004007c,0x00000007, + 0x00000044,0x00000043,0x0004003d,0x00000007,0x00000045,0x0000001e,0x00050080,0x00000007, + 0x00000046,0x00000045,0x00000044,0x0003003e,0x0000001e,0x00000046,0x0004003d,0x00000007, + 0x00000047,0x0000001e,0x000200fe,0x00000047,0x00010038,0x00050036,0x0000000b,0x0000000f, + 0x00000000,0x0000000d,0x00030037,0x0000000c,0x0000000e,0x000200f8,0x00000010,0x0004003b, + 0x0000004b,0x0000004c,0x00000007,0x0004003d,0x0000000b,0x0000004d,0x0000000e,0x00070050, + 0x0000004a,0x0000004e,0x0000004d,0x0000004d,0x0000004d,0x0000004d,0x000500c2,0x0000004a, + 0x00000053,0x0000004e,0x00000052,0x0003003e,0x0000004c,0x00000053,0x0004003d,0x0000004a, + 0x00000055,0x0000004c,0x00070050,0x0000004a,0x00000056,0x00000054,0x00000054,0x00000054, + 0x00000054,0x000500c7,0x0000004a,0x00000057,0x00000055,0x00000056,0x0003003e,0x0000004c, + 0x00000057,0x00050041,0x0000000c,0x00000058,0x0000004c,0x00000032,0x0004003d,0x0000000b, + 0x00000059,0x00000058,0x000500c4,0x0000000b,0x0000005a,0x00000059,0x00000051,0x00050041, + 0x0000000c,0x0000005b,0x0000004c,0x0000003d,0x0004003d,0x0000000b,0x0000005c,0x0000005b, + 0x000500c4,0x0000000b,0x0000005d,0x0000005c,0x00000050,0x000500c5,0x0000000b,0x0000005e, + 0x0000005a,0x0000005d,0x00050041,0x0000000c,0x0000005f,0x0000004c,0x0000002a,0x0004003d, + 0x0000000b,0x00000060,0x0000005f,0x000500c4,0x0000000b,0x00000061,0x00000060,0x0000004f, + 0x000500c5,0x0000000b,0x00000062,0x0000005e,0x00000061,0x00050041,0x0000000c,0x00000064, + 0x0000004c,0x00000063,0x0004003d,0x0000000b,0x00000065,0x00000064,0x000500c4,0x0000000b, + 0x00000066,0x00000065,0x00000032,0x000500c5,0x0000000b,0x00000067,0x00000062,0x00000066, + 0x000200fe,0x00000067,0x00010038,0x00050036,0x00000011,0x00000015,0x00000000,0x00000013, + 0x00030037,0x00000012,0x00000014,0x000200f8,0x00000016,0x0004003b,0x0000000c,0x0000006a, + 0x00000007,0x0004003b,0x0000000c,0x0000006e,0x00000007,0x00050041,0x0000000c,0x0000006b, + 0x00000014,0x0000003d,0x0004003d,0x0000000b,0x0000006c,0x0000006b,0x0003003e,0x0000006a, + 0x0000006c,0x00050039,0x0000000b,0x0000006d,0x0000000f,0x0000006a,0x00050041,0x0000000c, + 0x0000006f,0x00000014,0x00000032,0x0004003d,0x0000000b,0x00000070,0x0000006f,0x0003003e, + 0x0000006e,0x00000070,0x00050039,0x0000000b,0x00000071,0x0000000f,0x0000006e,0x00050050, + 0x00000011,0x00000072,0x0000006d,0x00000071,0x000200fe,0x00000072,0x00010038,0x00050036, + 0x0000000b,0x0000001b,0x00000000,0x00000018,0x00030037,0x00000012,0x00000019,0x00030037, + 0x00000017,0x0000001a,0x000200f8,0x0000001c,0x0004003b,0x00000017,0x00000075,0x00000007, + 0x0004003b,0x00000017,0x0000007b,0x00000007,0x0004003b,0x00000017,0x00000081,0x00000007, + 0x0004003b,0x00000017,0x00000087,0x00000007,0x0004003b,0x00000017,0x0000008d,0x00000007, + 0x0004003b,0x00000017,0x0000009a,0x00000007,0x0004003b,0x00000017,0x000000a4,0x00000007, + 0x0004003b,0x000000c2,0x000000c3,0x00000007,0x0004003b,0x00000017,0x000000c9,0x00000007, + 0x0004003d,0x00000006,0x00000078,0x0000001a,0x00050084,0x00000006,0x00000079,0x00000077, + 0x00000078,0x00050082,0x00000006,0x0000007a,0x00000076,0x00000079,0x0003003e,0x00000075, + 0x0000007a,0x00050041,0x0000000c,0x0000007c,0x00000019,0x0000003d,0x0004003d,0x0000000b, + 0x0000007d,0x0000007c,0x000600cb,0x0000000b,0x0000007f,0x0000007d,0x0000007e,0x00000025, + 0x0004007c,0x00000006,0x00000080,0x0000007f,0x0003003e,0x0000007b,0x00000080,0x00050041, + 0x0000000c,0x00000082,0x00000019,0x0000003d,0x0004003d,0x0000000b,0x00000083,0x00000082, + 0x000600cb,0x0000000b,0x00000085,0x00000083,0x00000084,0x00000028,0x0004007c,0x00000006, + 0x00000086,0x00000085,0x0003003e,0x00000081,0x00000086,0x00050041,0x0000000c,0x00000088, + 0x00000019,0x0000003d,0x0004003d,0x0000000b,0x00000089,0x00000088,0x000600cb,0x0000000b, + 0x0000008b,0x00000089,0x0000008a,0x00000028,0x0004007c,0x00000006,0x0000008c,0x0000008b, + 0x0003003e,0x00000087,0x0000008c,0x0004003d,0x00000006,0x0000008e,0x00000075,0x000500c3, + 0x00000006,0x00000090,0x0000008e,0x0000008f,0x00050041,0x0000000c,0x00000091,0x00000019, + 0x00000090,0x0004003d,0x0000000b,0x00000092,0x00000091,0x0004003d,0x00000006,0x00000093, + 0x00000075,0x000500c7,0x00000006,0x00000095,0x00000093,0x00000094,0x000600cb,0x0000000b, + 0x00000096,0x00000092,0x00000095,0x00000037,0x0004007c,0x00000006,0x00000097,0x00000096, + 0x0003003e,0x0000008d,0x00000097,0x0004003d,0x00000006,0x00000098,0x00000075,0x00050080, + 0x00000006,0x00000099,0x00000098,0x00000037,0x0003003e,0x00000075,0x00000099,0x0004003d, + 0x00000006,0x0000009b,0x00000075,0x000500c3,0x00000006,0x0000009c,0x0000009b,0x0000008f, + 0x00050041,0x0000000c,0x0000009d,0x00000019,0x0000009c,0x0004003d,0x0000000b,0x0000009e, + 0x0000009d,0x0004003d,0x00000006,0x0000009f,0x00000075,0x000500c7,0x00000006,0x000000a0, + 0x0000009f,0x00000094,0x000500c2,0x0000000b,0x000000a1,0x0000009e,0x000000a0,0x000500c7, + 0x0000000b,0x000000a2,0x000000a1,0x0000003d,0x0004007c,0x00000006,0x000000a3,0x000000a2, + 0x0003003e,0x0000009a,0x000000a3,0x0004003d,0x00000006,0x000000c0,0x00000087,0x0004003d, + 0x00000006,0x000000c1,0x0000008d,0x0003003e,0x000000c3,0x000000bf,0x00060041,0x00000017, + 0x000000c4,0x000000c3,0x000000c0,0x000000c1,0x0004003d,0x00000006,0x000000c5,0x000000c4, + 0x0004003d,0x00000006,0x000000c6,0x0000009a,0x00050082,0x00000006,0x000000c7,0x000000c6, + 0x0000002f,0x000500c6,0x00000006,0x000000c8,0x000000c5,0x000000c7,0x0003003e,0x000000a4, + 0x000000c8,0x0004003d,0x00000006,0x000000ca,0x0000007b,0x0004003d,0x00000006,0x000000cb, + 0x000000a4,0x0004003d,0x00000006,0x000000cc,0x00000081,0x00050084,0x00000006,0x000000cd, + 0x000000cb,0x000000cc,0x00050080,0x00000006,0x000000ce,0x000000ca,0x000000cd,0x0003003e, + 0x000000c9,0x000000ce,0x0004003d,0x00000006,0x000000cf,0x000000c9,0x0008000c,0x00000006, + 0x000000d1,0x00000001,0x0000002d,0x000000cf,0x000000bb,0x000000d0,0x0004007c,0x0000000b, + 0x000000d2,0x000000d1,0x000200fe,0x000000d2,0x00010038 +}; diff --git a/icd/imported/gputexdecoder/shaders/Etc2Decoder.comp b/icd/imported/gputexdecoder/shaders/Etc2Decoder.comp new file mode 100755 index 00000000..15cebef8 --- /dev/null +++ b/icd/imported/gputexdecoder/shaders/Etc2Decoder.comp @@ -0,0 +1,289 @@ +#version 450 +/* Copyright (c) 2020 Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#extension GL_EXT_samplerless_texture_functions : require +layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; + +layout(set = 0, binding = 0) writeonly uniform image2D uOutput; +layout(set = 0, binding = 1) uniform utexture2D uInput; + +layout(constant_id = 0) const int ALPHA_BITS = 0; +layout(constant_id = 1) const int WIDTH = 0; +layout(constant_id = 2) const int HEIGHT = 0; + +ivec2 build_coord() +{ + ivec2 base = ivec2(gl_WorkGroupID.xy) * 8; + base.x += 4 * (int(gl_LocalInvocationID.z) & 1); + base.y += 2 * (int(gl_LocalInvocationID.z) & 2); + base += ivec2(gl_LocalInvocationID.xy); + return base; +} + +uint flip_endian(uint v) +{ + uvec4 words = uvec4(v) >> uvec4(0, 8, 16, 24); + words &= 0xffu; + return (words.x << 24u) | (words.y << 16u) | (words.z << 8u) | (words.w << 0u); +} + +uvec2 flip_endian(uvec2 v) +{ + return uvec2(flip_endian(v.y), flip_endian(v.x)); +} + +const ivec2 etc1_color_modifier_table[8] = ivec2[]( + ivec2(2, 8), + ivec2(5, 17), + ivec2(9, 29), + ivec2(13, 42), + ivec2(18, 60), + ivec2(24, 80), + ivec2(33, 106), + ivec2(47, 183)); + +const ivec4 etc2_alpha_modifier_table[16] = ivec4[]( + ivec4(2, 5, 8, 14), + ivec4(2, 6, 9, 12), + ivec4(1, 4, 7, 12), + ivec4(1, 3, 5, 12), + ivec4(2, 5, 7, 11), + ivec4(2, 6, 8, 10), + ivec4(3, 6, 7, 10), + ivec4(2, 4, 7, 10), + ivec4(1, 5, 7, 9), + ivec4(1, 4, 7, 9), + ivec4(1, 3, 7, 9), + ivec4(1, 4, 6, 9), + ivec4(2, 3, 6, 9), + ivec4(0, 1, 2, 9), + ivec4(3, 5, 7, 8), + ivec4(2, 4, 6, 8) +); + +const int etc2_distance_table[8] = int[](3, 6, 11, 16, 23, 32, 41, 64); + +uint decode_etc2_alpha(uvec2 payload, int linear_pixel) +{ + int bit_offset = 45 - 3 * linear_pixel; + int base = int(bitfieldExtract(payload.y, 24, 8)); + int multiplier = int(bitfieldExtract(payload.y, 20, 4)); + int table = int(bitfieldExtract(payload.y, 16, 4)); + + int lsb_index = int(bitfieldExtract(payload[bit_offset >> 5], bit_offset & 31, 2)); + bit_offset += 2; + int msb = int((payload[bit_offset >> 5] >> (bit_offset & 31)) & 1); + int mod = etc2_alpha_modifier_table[table][lsb_index] ^ (msb - 1); + int a = base + mod * multiplier; + return clamp(a, 0, 0xff); +} + +void main() +{ + ivec2 coord = build_coord(); + if (any(greaterThanEqual(coord, uvec2(WIDTH,HEIGHT)))) + return; + + ivec2 tile_coord = coord >> 2; + ivec2 pixel_coord = coord & 3; + int linear_pixel = 4 * pixel_coord.x + pixel_coord.y; + uvec4 payload = texelFetch(uInput, tile_coord, 0); + + uvec2 color_payload; + uint alpha_result; + bool punchthrough; + if (ALPHA_BITS == 8) + { + uvec2 alpha_payload = flip_endian(payload.xy); + alpha_result = decode_etc2_alpha(alpha_payload, linear_pixel); + color_payload = flip_endian(payload.zw); + } + else + { + color_payload = flip_endian(payload.xy); + alpha_result = 0xffu; + } + + uvec3 rgb_result; + ivec3 base_rgb; + uint flip = color_payload.y & 1u; + uint subblock = (pixel_coord[flip] & 2u) >> 1u; + bool etc1_compat = false; + + if (ALPHA_BITS == 1) + punchthrough = (color_payload.y & 2u) == 0u; + else + punchthrough = false; + + if (ALPHA_BITS != 1 && (color_payload.y & 2u) == 0u) + { + // Individual mode (ETC1) + etc1_compat = true; + base_rgb = ivec3(color_payload.yyy >> (uvec3(28, 20, 12) - 4 * subblock)); + base_rgb &= 0xf; + base_rgb *= 0x11; + } + else + { + int r = int(bitfieldExtract(color_payload.y, 27, 5)); + int rd = bitfieldExtract(int(color_payload.y), 24, 3); + int g = int(bitfieldExtract(color_payload.y, 19, 5)); + int gd = bitfieldExtract(int(color_payload.y), 16, 3); + int b = int(bitfieldExtract(color_payload.y, 11, 5)); + int bd = bitfieldExtract(int(color_payload.y), 8, 3); + + int r1 = r + rd; + int g1 = g + gd; + int b1 = b + bd; + + if (uint(r1) > 31) + { + int r1 = int(bitfieldExtract(color_payload.y, 56 - 32, 2)) | + (int(bitfieldExtract(color_payload.y, 59 - 32, 2)) << 2); + int g1 = int(bitfieldExtract(color_payload.y, 52 - 32, 4)); + int b1 = int(bitfieldExtract(color_payload.y, 48 - 32, 4)); + int r2 = int(bitfieldExtract(color_payload.y, 44 - 32, 4)); + int g2 = int(bitfieldExtract(color_payload.y, 40 - 32, 4)); + int b2 = int(bitfieldExtract(color_payload.y, 36 - 32, 4)); + uint da = (bitfieldExtract(color_payload.y, 34 - 32, 2) << 1) | + (color_payload.y & 1u); + int dist = etc2_distance_table[da]; + + int msb = int((color_payload.x >> (15 + linear_pixel)) & 2u); + int lsb = int((color_payload.x >> linear_pixel) & 1u); + int index = msb | lsb; + + if (punchthrough) + punchthrough = index == 2; + + if (index == 0) + { + rgb_result = uvec3(r1, g1, b1); + rgb_result *= 0x11u; + } + else + { + int mod = 2 - index; + ivec3 rgb = ivec3(r2, g2, b2) * 0x11 + mod * dist; + rgb_result = clamp(rgb, ivec3(0), ivec3(255)); + } + } + else if (uint(g1) > 31) + { + int r1 = int(bitfieldExtract(color_payload.y, 59 - 32, 4)); + int g1 = (int(bitfieldExtract(color_payload.y, 56 - 32, 3)) << 1) | + int((color_payload.y >> 20u) & 1u); + int b1 = int(bitfieldExtract(color_payload.y, 47 - 32, 3)) | + int((color_payload.y >> 16u) & 8u); + int r2 = int(bitfieldExtract(color_payload.y, 43 - 32, 4)); + int g2 = int(bitfieldExtract(color_payload.y, 39 - 32, 4)); + int b2 = int(bitfieldExtract(color_payload.y, 35 - 32, 4)); + uint da = color_payload.y & 4u; + uint db = color_payload.y & 1u; + uint d = da + 2 * db; + d += uint((r1 * 0x10000 + g1 * 0x100 + b1) >= (r2 * 0x10000 + g2 * 0x100 + b2)); + int dist = etc2_distance_table[d]; + int msb = int((color_payload.x >> (15 + linear_pixel)) & 2u); + int lsb = int((color_payload.x >> linear_pixel) & 1u); + + if (punchthrough) + punchthrough = (msb + lsb) == 2; + + ivec3 base = msb != 0 ? ivec3(r2, g2, b2) : ivec3(r1, g1, b1); + base *= 0x11; + int mod = 1 - 2 * lsb; + base += mod * dist; + rgb_result = clamp(base, ivec3(0), ivec3(0xff)); + } + else if (uint(b1) > 31) + { + // Planar mode + int r = int(bitfieldExtract(color_payload.y, 57 - 32, 6)); + int g = int(bitfieldExtract(color_payload.y, 49 - 32, 6)) | + (int(color_payload.y >> 18) & 0x40); + int b = int(bitfieldExtract(color_payload.y, 39 - 32, 3)) | + (int(bitfieldExtract(color_payload.y, 43 - 32, 2)) << 3) | + (int(color_payload.y >> 11) & 0x20); + int rh = int(color_payload.y & 1u) | + (int(bitfieldExtract(color_payload.y, 2, 5)) << 1); + int rv = int(bitfieldExtract(color_payload.x, 13, 6)); + int gh = int(bitfieldExtract(color_payload.x, 25, 7)); + int gv = int(bitfieldExtract(color_payload.x, 6, 7)); + int bh = int(bitfieldExtract(color_payload.x, 19, 6)); + int bv = int(bitfieldExtract(color_payload.x, 0, 6)); + + r = (r << 2) | (r >> 4); + rh = (rh << 2) | (rh >> 4); + rv = (rv << 2) | (rv >> 4); + g = (g << 1) | (g >> 6); + gh = (gh << 1) | (gh >> 6); + gv = (gv << 1) | (gv >> 6); + b = (b << 2) | (b >> 4); + bh = (bh << 2) | (bh >> 4); + bv = (bv << 2) | (bv >> 4); + + ivec3 rgb = ivec3(r, g, b); + ivec3 dx = ivec3(rh, gh, bh) - rgb; + ivec3 dy = ivec3(rv, gv, bv) - rgb; + dx *= pixel_coord.x; + dy *= pixel_coord.y; + rgb = rgb + ((dx + dy + 2) >> 2); + rgb = clamp(rgb, ivec3(0), ivec3(255)); + rgb_result = rgb; + punchthrough = false; + } + else + { + // Differential mode (ETC1) + etc1_compat = true; + base_rgb = ivec3(r, g, b) + int(subblock) * ivec3(rd, gd, bd); + base_rgb = (base_rgb << 3) | (base_rgb >> 2); + } + } + + if (etc1_compat) + { + uint etc1_table_index = bitfieldExtract(color_payload.y, 5 - 3 * int(subblock != 0u), 3); + int msb = int((color_payload.x >> (15 + linear_pixel)) & 2u); + int lsb = int((color_payload.x >> linear_pixel) & 1u); + int sgn = 1 - msb; + if (punchthrough) + { + sgn *= lsb; + punchthrough = (msb + lsb) == 2; + } + int offset = etc1_color_modifier_table[etc1_table_index][lsb] * sgn; + base_rgb = clamp(base_rgb + offset, ivec3(0), ivec3(255)); + rgb_result = base_rgb; + } + + if (ALPHA_BITS == 1 && punchthrough) + { + rgb_result = uvec3(0); + alpha_result = 0; + } + + vec4 outColor = vec4(rgb_result, alpha_result) / 255.0f; + + imageStore(uOutput, coord, outColor); +} diff --git a/icd/layers/vk_layer_switchable_graphics.cpp b/icd/layers/vk_layer_switchable_graphics.cpp index eab14d4f..fef93bf3 100644 --- a/icd/layers/vk_layer_switchable_graphics.cpp +++ b/icd/layers/vk_layer_switchable_graphics.cpp @@ -243,7 +243,9 @@ VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDevices_SG( else { for (uint32_t i = 0; i < physicalDeviceCount; i++) + { nextLinkFuncs.pfnGetPhysicalDeviceProperties(pLayerPhysicalDevices[i], &pProperties[i]); + } } if (result == VK_SUCCESS) @@ -253,31 +255,69 @@ VKAPI_ATTR VkResult VKAPI_CALL vkEnumeratePhysicalDevices_SG( // Return specified physical devices according to environment variable AMD_VULKAN_ICD const char* pEnv = getenv("AMD_VULKAN_ICD"); - bool preferRADV = pEnv && !strcmp(pEnv, "RADV"); + bool preferRadv = pEnv && (strcmp(pEnv, "RADV") == 0); + bool amdvlkExists = false; + uint32_t nonAmdvlkCount = 0; for (uint32_t i = 0; i < physicalDeviceCount; i++) { - bool isAMD = pProperties[i].vendorID == VENDOR_ID_AMD || pProperties[i].vendorID == VENDOR_ID_ATI; - bool isRADV = isAMD && strstr(pProperties[i].deviceName, "RADV") != nullptr; - bool isLLVMpipe = strstr(pProperties[i].deviceName, "llvmpipe") != nullptr; + bool isAmd = (pProperties[i].vendorID == VENDOR_ID_AMD) || (pProperties[i].vendorID == VENDOR_ID_ATI); + bool isRadv = isAmd && strstr(pProperties[i].deviceName, "RADV") != nullptr; + bool isLlvmpipe = strstr(pProperties[i].deviceName, "llvmpipe") != nullptr; + + if ((!isAmd || (isRadv == preferRadv)) && (!isLlvmpipe || preferRadv)) + { + if (pPhysicalDevices != nullptr) + { + if (returnedPhysicalDeviceCount < *pPhysicalDeviceCount) + { + pPhysicalDevices[returnedPhysicalDeviceCount++] = pLayerPhysicalDevices[i]; + } + } + else + { + returnedPhysicalDeviceCount++; + } + + availablePhysicalDeviceCount++; + } + + if (isAmd && !isRadv) + { + amdvlkExists = true; + } + + if (isRadv || isLlvmpipe) + { + pLayerPhysicalDevices[nonAmdvlkCount++] = pLayerPhysicalDevices[i]; + } + } - if ((!isAMD || isRADV == preferRADV) && (!isLLVMpipe || preferRADV)) + if (!amdvlkExists && !preferRadv) + { + for (uint32_t i = 0; i < nonAmdvlkCount; i++) { if (pPhysicalDevices != nullptr) { if (returnedPhysicalDeviceCount < *pPhysicalDeviceCount) + { pPhysicalDevices[returnedPhysicalDeviceCount++] = pLayerPhysicalDevices[i]; + } } else + { returnedPhysicalDeviceCount++; + } availablePhysicalDeviceCount++; } } *pPhysicalDeviceCount = returnedPhysicalDeviceCount; - if (pPhysicalDevices != nullptr && returnedPhysicalDeviceCount < availablePhysicalDeviceCount) + if ((pPhysicalDevices != nullptr) && (returnedPhysicalDeviceCount < availablePhysicalDeviceCount)) + { result = VK_INCOMPLETE; + } } if (pProperties != nullptr) diff --git a/icd/make/importdefs b/icd/make/importdefs index 60151772..f699ea87 100644 --- a/icd/make/importdefs +++ b/icd/make/importdefs @@ -26,7 +26,7 @@ # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. It must # be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -ICD_PAL_CLIENT_MAJOR_VERSION = 674 +ICD_PAL_CLIENT_MAJOR_VERSION = 675 ICD_PAL_CLIENT_MINOR_VERSION = 0 # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. It describes @@ -36,7 +36,7 @@ ICD_GPUOPEN_CLIENT_MINOR_VERSION = 0 # This will become the value of LLPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_LLPC=1. It describes the version of the # interface version of LLPC that the ICD supports. -ICD_LLPC_CLIENT_MAJOR_VERSION = 48 +ICD_LLPC_CLIENT_MAJOR_VERSION = 49 # When ICD_LLPC_CLIENT_MAJOR_VERSION >= 39, Set ENABLE_VKGC to 1 to use Vkgc namespace instead of Llpc namespace in ICD ENABLE_VKGC=1 diff --git a/icd/res/ver.h b/icd/res/ver.h index 1f99f4d7..17a4a6d0 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 196 +#define VULKAN_ICD_BUILD_VERSION 199 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index 13625766..eaef6388 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -287,8 +287,8 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { // Mall no alloc settings give a 2.91% gain - m_settings.mallNoAllocCtPolicy = 0x01; - m_settings.mallNoAllocCtSsrPolicy = 0x01; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; } // Don't enable DCC for color attachments aside from those listed in the app_resource_optimizer @@ -382,7 +382,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( // Mall no alloc setting gives a ~0.82% gain if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { - m_settings.mallNoAllocSsrPolicy = 0x01; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; } m_settings.implicitExternalSynchronization = false; @@ -650,9 +650,9 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { - m_settings.mallNoAllocCtPolicy = 0x01; - m_settings.mallNoAllocSsrPolicy = 0x01; - m_settings.mallNoAllocCtSsrPolicy = 0x01; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; m_settings.enableWgpMode = 0x00000020; @@ -682,6 +682,8 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } m_settings.implicitExternalSynchronization = false; + + m_settings.syncOsHdrState = false; } if (appProfile == AppProfile::DoomEternal) @@ -721,9 +723,9 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( // Mall no alloc settings give a ~1% gain if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { - m_settings.mallNoAllocCtPolicy = 0x01; - m_settings.mallNoAllocCtSsrPolicy = 0x01; - m_settings.mallNoAllocSsrPolicy = 0x01; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; m_settings.enableWgpMode = 0x20; } @@ -786,6 +788,32 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( if (appProfile == AppProfile::Valheim) { + if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) + { + m_settings.csWaveSize = 32; + m_settings.fsWaveSize = 64; + + if (pInfo->revision == Pal::AsicRevision::Navi21) + { + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + } + + else if (pInfo->revision == Pal::AsicRevision::Navi22) + { + m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | + ForceDccForColorAttachments | + ForceDccFor3DShaderStorage | + ForceDccForNonColorAttachmentShaderStorage); + + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + } + + else if (pInfo->revision == Pal::AsicRevision::Navi23) + { + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; + } + } } pAllocCb->pfnFree(pAllocCb->pUserData, pInfo); @@ -968,6 +996,31 @@ void VulkanSettingsLoader::ValidateSettings() m_settings.devModeSemaphoreQueueTimingEnable = true; #endif + // Undo any heap overrides to local if oversubscription is allowed by default because they will likely + // degrade performance instead of improve it. When not allowed, testing should catch these cases + // so that overrides to local aren't added in the first place. + Pal::GpuMemoryHeapProperties heapProperties[Pal::GpuHeapCount] = {}; + + if ((m_pDevice->GetGpuMemoryHeapProperties(heapProperties) == Pal::Result::Success) && + (heapProperties[Pal::GpuHeapLocal].heapSize < m_settings.memoryRemoteBackupHeapMinHeapSize) && + (heapProperties[Pal::GpuHeapInvisible].heapSize < m_settings.memoryRemoteBackupHeapMinHeapSize)) + { + if (heapProperties[Pal::GpuHeapGartUswc].heapSize > 0) + { + if (m_settings.cmdAllocatorDataHeap == Pal::GpuHeapLocal) + { + m_settings.cmdAllocatorDataHeap = Pal::GpuHeapGartUswc; + } + + if (m_settings.cmdAllocatorEmbeddedHeap == Pal::GpuHeapLocal) + { + m_settings.cmdAllocatorEmbeddedHeap = Pal::GpuHeapGartUswc; + } + } + + m_settings.overrideHeapChoiceToLocal = 0; + } + // Command buffer prefetching was found to be slower for command buffers in local memory. if (m_settings.cmdAllocatorDataHeap == Pal::GpuHeapLocal) { diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json index 1f87f425..e4feca85 100644 --- a/icd/settings/settings_xgl.json +++ b/icd/settings/settings_xgl.json @@ -972,6 +972,30 @@ "Scope": "Driver", "Type": "bool" }, + { + "Name": "DisablePerCompFetch", + "Description": "Disable per component fetch in uber fetch shader.", + "Tags": [ + "SPIRV Options" + ], + "Defaults": { + "Default": false + }, + "Scope": "Driver", + "Type": "bool" + }, + { + "Name": "DisablePerInstanceFetch", + "Description": "Disable per instance fetch in uber fetch shader.", + "Tags": [ + "SPIRV Options" + ], + "Defaults": { + "Default": false + }, + "Scope": "Driver", + "Type": "bool" + }, { "Name": "ForceAlignedForDynamicStride", "Description": "Force vertex stride is aligned when dynamic vertex stride is enabled", @@ -984,6 +1008,18 @@ "Scope": "Driver", "Type": "bool" }, + { + "Name": "SupportPatchSpecConst", + "Description": "Support patch specialized constant", + "Tags": [ + "SPIRV Options" + ], + "Defaults": { + "Default": false + }, + "Scope": "Driver", + "Type": "bool" + }, { "Description": "Enable pipeline dump, pipeline is stored with .pipe format. You must set AMD_DEBUG_DIR and make sure $AMD_DEBUG_DIR + pipelineDumpDir is an available directory.", "Tags": [