diff --git a/CMakeLists.txt b/CMakeLists.txt index a7aaacca..54acd778 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -312,3 +312,4 @@ if(UNIX) generateInstallTargets() generatePackageTarget() endif() + diff --git a/cmake/XglCompilerOptions.cmake b/cmake/XglCompilerOptions.cmake index 945c8c4d..658f6075 100644 --- a/cmake/XglCompilerOptions.cmake +++ b/cmake/XglCompilerOptions.cmake @@ -290,7 +290,9 @@ function(xgl_compiler_options TARGET) endif() if(TARGET_ARCHITECTURE_BITS EQUAL 32) - target_compile_options(${TARGET} PRIVATE -msse -msse2) + if(NOT (CMAKE_CXX_COMPILER MATCHES ".*arm-linux-gnueabi.*")) + target_compile_options(${TARGET} PRIVATE -msse -msse2) + endif() endif() if(CMAKE_BUILD_TYPE_RELEASE) diff --git a/cmake/XglOptions.cmake b/cmake/XglOptions.cmake index f28a09f2..35e40072 100644 --- a/cmake/XglOptions.cmake +++ b/cmake/XglOptions.cmake @@ -66,6 +66,10 @@ macro(xgl_options) option(VKI_EXT_EXTENDED_DYNAMIC_STATE "Build vulkan with EXTENDED_DYNAMIC_STATE extention" OFF) #endif +#if VKI_SPIRV_1_6 + option(VKI_SPIRV_1_6 "Build vulkan with VKI_SPIRV_1_6" OFF) +#endif + option(ICD_BUILD_LLPC "Build LLPC?" ON) option(ICD_BUILD_LLPCONLY "Build LLPC Only?" OFF) diff --git a/cmake/XglOverrides.cmake b/cmake/XglOverrides.cmake index 56b58d60..5f703c72 100644 --- a/cmake/XglOverrides.cmake +++ b/cmake/XglOverrides.cmake @@ -183,7 +183,7 @@ macro(xgl_overrides_vkgc) ### For LLPC ########################################################################################################## set(LLPC_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_LLPC_CLIENT_MAJOR_VERSION} CACHE STRING "${PROJECT_NAME} override." FORCE) - if(ICD_BUILD_LLPC) + if(ICD_BUILD_LLPC OR ICD_BUILD_SPVONLY) set(LLPC_BUILD_TESTS ${XGL_BUILD_TESTS} CACHE BOOL "${PROJECT_NAME} override." FORCE) set(LLPC_BUILD_LIT ${XGL_BUILD_LIT} CACHE BOOL "${PROJECT_NAME} override." FORCE) diff --git a/icd/Loader/LunarG/Lnx/amd-icd.json b/icd/Loader/LunarG/Lnx/amd-icd.json index adb6eaf0..4bc94ab6 100644 --- a/icd/Loader/LunarG/Lnx/amd-icd.json +++ b/icd/Loader/LunarG/Lnx/amd-icd.json @@ -2,13 +2,13 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.2.197" + "api_version": "1.2.201" }, "layer": { "name": "VK_LAYER_AMD_switchable_graphics_@ISABITS@", "type": "GLOBAL", "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.2.197", + "api_version": "1.2.201", "implementation_version": "1", "description": "AMD switchable graphics layer", "functions": { diff --git a/icd/api/app_profile.cpp b/icd/api/app_profile.cpp index 79762939..4b55bc25 100644 --- a/icd/api/app_profile.cpp +++ b/icd/api/app_profile.cpp @@ -181,6 +181,12 @@ constexpr AppProfilePatternEntry AppNameSeriousSamFusionLinux = "serious sam fusion 2017 - linux - 64bit" }; +constexpr AppProfilePatternEntry AppNameSeriousSam4Win = +{ + PatternAppNameLower, + "serious sam 4 - 64bit" +}; + constexpr AppProfilePatternEntry AppEngineSedp = { PatternEngineNameLower, @@ -301,6 +307,18 @@ constexpr AppProfilePatternEntry AppEngineDagorEngine = "dagor" }; +constexpr AppProfilePatternEntry AppNameMetroExodus = +{ + PatternAppNameLower, + "metroexodus" +}; + +constexpr AppProfilePatternEntry AppEngineMetroExodus = +{ + PatternEngineNameLower, + "metroexodus" +}; + constexpr AppProfilePatternEntry AppEngineXSystem = { PatternEngineNameLower, @@ -343,6 +361,12 @@ constexpr AppProfilePatternEntry AppNameRainbowSixSiege = "rainbow six siege" }; +constexpr AppProfilePatternEntry AppNameHyperscape = +{ + PatternAppNameLower, + "hyperscape" +}; + constexpr AppProfilePatternEntry AppEngineScimitar = { PatternEngineNameLower, @@ -610,6 +634,15 @@ AppProfilePattern AppPatternTable[] = } }, + { + AppProfile::SeriousSam4, + { + AppNameSeriousSam4Win, + AppEngineSedp, + PatternEnd + } + }, + { AppProfile::SedpEngine, { @@ -733,6 +766,15 @@ AppProfilePattern AppPatternTable[] = } }, + { + AppProfile::MetroExodus, + { + AppNameMetroExodus, + AppEngineMetroExodus, + PatternEnd + } + }, + { AppProfile::AshesOfTheSingularity, { @@ -825,6 +867,15 @@ AppProfilePattern AppPatternTable[] = } }, + { + AppProfile::Hyperscape, + { + AppNameHyperscape, + AppEngineScimitar, + PatternEnd + } + }, + { AppProfile::ScimitarEngine, { diff --git a/icd/api/appopt/gpu_decode_layer.cpp b/icd/api/appopt/gpu_decode_layer.cpp index 4559c8ec..7aac2cbb 100644 --- a/icd/api/appopt/gpu_decode_layer.cpp +++ b/icd/api/appopt/gpu_decode_layer.cpp @@ -98,9 +98,11 @@ namespace GpuTexDecoder }; Vkgc::ResourceMappingRootNode rootNode = {}; + // use the max node type here, ASTC has 6 nodes which is the maximum right now + Vkgc::ResourceMappingNode nodes[GpuTexDecoder::AstcInternalPipelineNodes] = {}; + if (buildInfo.shaderType == GpuTexDecoder::InternalTexConvertCsType::ConvertASTCToRGBA8) { - Vkgc::ResourceMappingNode nodes[GpuTexDecoder::AstcInternalPipelineNodes] = {}; GpuTexDecoder::GpuDecodeMappingNode* pDecodeNode = buildInfo.pUserDataNodes; for (size_t index = 0; index < GpuTexDecoder::AstcInternalPipelineNodes; index++) { @@ -136,7 +138,6 @@ namespace GpuTexDecoder if (buildInfo.shaderType == GpuTexDecoder::InternalTexConvertCsType::ConvertETC2ToRGBA8) { - Vkgc::ResourceMappingNode nodes[GpuTexDecoder::Etc2InternalPipelineNodes] = {}; GpuTexDecoder::GpuDecodeMappingNode* pDecodeNode = buildInfo.pUserDataNodes; for (size_t index = 0; index < GpuTexDecoder::Etc2InternalPipelineNodes; index++) { diff --git a/icd/api/graphics_pipeline_common.cpp b/icd/api/graphics_pipeline_common.cpp index 1afe0ddd..b278f24b 100644 --- a/icd/api/graphics_pipeline_common.cpp +++ b/icd/api/graphics_pipeline_common.cpp @@ -627,7 +627,21 @@ static void BuildRasterizationState( pInfo->pipeline.viewportInfo.depthClipFarEnable = (pRsDepthClip->depthClipEnable == VK_TRUE); } break; - + case VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT: + { + const auto* pRsProvokingVertex = + static_cast(pNext); + pInfo->immedInfo.triangleRasterState.provokingVertex = + static_cast(pRsProvokingVertex->provokingVertexMode); + + static_assert(static_cast(VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT) == + Pal::ProvokingVertex::First, + "VK and PAL enums don't match"); + static_assert(static_cast(VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) == + Pal::ProvokingVertex::Last, + "VK and PAL enums don't match"); + } + break; default: // Skip any unknown extension structures break; @@ -636,7 +650,7 @@ static void BuildRasterizationState( pNext = pHeader->pNext; } -#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 691 +#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 693 // For optimal performance, depth clamping should be enabled by default. Only disable it if dealing // with depth values outside of [0.0, 1.0] range. // Note that this is the opposite of the default Vulkan setting which is depthClampEnable = false. @@ -663,18 +677,18 @@ static void BuildRasterizationState( ((pInfo->pipeline.viewportInfo.depthClipNearEnable == false) && (pInfo->pipeline.viewportInfo.depthClipFarEnable == false))) { - pInfo->pipeline.rsState.DepthClampMode = Pal::DepthClampMode::None; + pInfo->pipeline.rsState.depthClampMode = Pal::DepthClampMode::_None; } else { - pInfo->pipeline.rsState.DepthClampMode = Pal::DepthClampMode::ZeroToOne; + pInfo->pipeline.rsState.depthClampMode = Pal::DepthClampMode::ZeroToOne; } } else { // When depth clamping is enabled, depth clipping should be disabled, and vice versa. // Clipping is updated in pipeline compiler. - pInfo->pipeline.rsState.DepthClampMode = Pal::DepthClampMode::Viewport; + pInfo->pipeline.rsState.depthClampMode = Pal::DepthClampMode::Viewport; } #endif @@ -708,6 +722,20 @@ static void BuildViewportState( { if (pVp != nullptr) { + EXTRACT_VK_STRUCTURES_0( + viewportDepthClipControl, + PipelineViewportDepthClipControlCreateInfoEXT, + static_cast(pVp->pNext), + PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT); + + // Default Vulkan depth range is [0, 1] + // Check if VK_EXT_depth_clip_control overrides depth to [-1, 1] + pInfo->pipeline.viewportInfo.depthRange = + ((pPipelineViewportDepthClipControlCreateInfoEXT != nullptr) && + (pPipelineViewportDepthClipControlCreateInfoEXT->negativeOneToOne == VK_TRUE)) ? + Pal::DepthRange::NegativeOneToOne : Pal::DepthRange::ZeroToOne; + + pInfo->immedInfo.viewportParams.depthRange = pInfo->pipeline.viewportInfo.depthRange; // From the spec, "scissorCount is the number of scissors and must match the number of viewports." VK_ASSERT(pVp->viewportCount <= Pal::MaxViewports); diff --git a/icd/api/include/app_profile.h b/icd/api/include/app_profile.h index 8f77d835..1ba17c63 100644 --- a/icd/api/include/app_profile.h +++ b/icd/api/include/app_profile.h @@ -84,6 +84,7 @@ enum class AppProfile : uint32_t ShadowOfTheTombRaider, // ShadowOfTheTombRaider by Feral3D XPlane, // XPlane by Laminar Research WarThunder, // WarThunder by Gaijin Distribution Kft + MetroExodus, // Metro Exodus Quake2RTX, // Quake2 RTX Valheim, // Valheim by Coffee Stain Studios WolfensteinCyberpilot, // Wolfenstein Cyberpilot by Machine Games @@ -103,6 +104,8 @@ enum class AppProfile : uint32_t XSystemEngine, // XSystem Engine by Laminar Research UnityEngine, // Unity Engine by Unity Technologies (Default) SaschaWillemsExamples, // Vulkan Examples by Sascha Willems + Hyperscape, // Hyperscape by Ubisoft + SeriousSam4, // Serious Sam 4 by Croteam Maxon, // Maxon AngleEngine, // Angle Engine }; diff --git a/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std_decode.h b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std_decode.h index 6f2d6d7e..cb0d1df5 100644 --- a/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std_decode.h +++ b/icd/api/include/khronos/sdk-1.2/vk_video/vulkan_video_codec_h264std_decode.h @@ -29,6 +29,7 @@ typedef enum StdVideoDecodeH264FieldOrderCount { typedef struct StdVideoDecodeH264PictureInfoFlags { uint32_t field_pic_flag : 1; // Is field picture uint32_t is_intra : 1; // Is intra picture + uint32_t IdrPicFlag : 1; // instantaneous decoding refresh (IDR) picture uint32_t bottom_field_flag : 1; // bottom (true) or top (false) field if field_pic_flag is set. uint32_t is_reference : 1; // This only applies to picture info, and not to the DPB lists. uint32_t complementary_field_pair : 1; // complementary field pair, complementary non-reference field pair, complementary reference field pair diff --git a/icd/api/include/khronos/sdk-1.2/vulkan/vk_platform.h b/icd/api/include/khronos/sdk-1.2/vulkan/vk_platform.h index 18b913ab..5e1a95d0 100644 --- a/icd/api/include/khronos/sdk-1.2/vulkan/vk_platform.h +++ b/icd/api/include/khronos/sdk-1.2/vulkan/vk_platform.h @@ -42,7 +42,7 @@ extern "C" #define VKAPI_CALL __stdcall #define VKAPI_PTR VKAPI_CALL #elif defined(__ANDROID__) && defined(__ARM_ARCH) && __ARM_ARCH < 7 - #error "Vulkan isn't supported for the 'armeabi' NDK ABI" + #error "Vulkan is not supported for the 'armeabi' NDK ABI" #elif defined(__ANDROID__) && defined(__ARM_ARCH) && __ARM_ARCH >= 7 && defined(__ARM_32BIT_STATE) // On Android 32-bit ARM targets, Vulkan functions use the "hardfloat" // calling convention, i.e. float parameters are passed in registers. This diff --git a/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_beta.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_beta.h index d2f34d1c..3b1a55ae 100644 --- a/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_beta.h +++ b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_beta.h @@ -384,7 +384,7 @@ typedef VkFlags VkVideoEncodeFlagsKHR; typedef enum VkVideoEncodeRateControlFlagBitsKHR { VK_VIDEO_ENCODE_RATE_CONTROL_DEFAULT_KHR = 0, - VK_VIDEO_ENCODE_RATE_CONTROL_RESET_BIT_KHR = 0x00000001, + VK_VIDEO_ENCODE_RATE_CONTROL_RESERVED_0_BIT_KHR = 0x00000001, VK_VIDEO_ENCODE_RATE_CONTROL_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF } VkVideoEncodeRateControlFlagBitsKHR; typedef VkFlags VkVideoEncodeRateControlFlagsKHR; @@ -409,18 +409,27 @@ typedef struct VkVideoEncodeInfoKHR { const VkVideoReferenceSlotKHR* pSetupReferenceSlot; uint32_t referenceSlotCount; const VkVideoReferenceSlotKHR* pReferenceSlots; + uint32_t precedingExternallyEncodedBytes; } VkVideoEncodeInfoKHR; +typedef struct VkVideoEncodeRateControlLayerInfoKHR { + VkStructureType sType; + const void* pNext; + uint32_t averageBitrate; + uint32_t maxBitrate; + uint32_t frameRateNumerator; + uint32_t frameRateDenominator; + uint32_t virtualBufferSizeInMs; + uint32_t initialVirtualBufferSizeInMs; +} VkVideoEncodeRateControlLayerInfoKHR; + typedef struct VkVideoEncodeRateControlInfoKHR { - VkStructureType sType; - const void* pNext; - VkVideoEncodeRateControlFlagsKHR flags; - VkVideoEncodeRateControlModeFlagBitsKHR rateControlMode; - uint32_t averageBitrate; - uint16_t peakToAverageBitrateRatio; - uint16_t frameRateNumerator; - uint16_t frameRateDenominator; - uint32_t virtualBufferSizeInMs; + VkStructureType sType; + const void* pNext; + VkVideoEncodeRateControlFlagsKHR flags; + VkVideoEncodeRateControlModeFlagBitsKHR rateControlMode; + uint8_t layerCount; + const VkVideoEncodeRateControlLayerInfoKHR* pLayerConfigs; } VkVideoEncodeRateControlInfoKHR; typedef void (VKAPI_PTR *PFN_vkCmdEncodeVideoKHR)(VkCommandBuffer commandBuffer, const VkVideoEncodeInfoKHR* pEncodeInfo); @@ -450,6 +459,7 @@ typedef enum VkVideoEncodeH264CapabilityFlagBitsEXT { VK_VIDEO_ENCODE_H264_CAPABILITY_DEBLOCKING_FILTER_PARTIAL_BIT_EXT = 0x00000100, VK_VIDEO_ENCODE_H264_CAPABILITY_MULTIPLE_SLICE_PER_FRAME_BIT_EXT = 0x00000200, VK_VIDEO_ENCODE_H264_CAPABILITY_EVENLY_DISTRIBUTED_SLICE_SIZE_BIT_EXT = 0x00000400, + VK_VIDEO_ENCODE_H264_CAPABILITY_OPTIONAL_RC_EXTENSION_STRUCT_BIT_EXT = 0x00000800, VK_VIDEO_ENCODE_H264_CAPABILITY_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF } VkVideoEncodeH264CapabilityFlagBitsEXT; typedef VkFlags VkVideoEncodeH264CapabilityFlagsEXT; @@ -476,6 +486,14 @@ typedef enum VkVideoEncodeH264CreateFlagBitsEXT { VK_VIDEO_ENCODE_H264_CREATE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF } VkVideoEncodeH264CreateFlagBitsEXT; typedef VkFlags VkVideoEncodeH264CreateFlagsEXT; + +typedef enum VkVideoEncodeH264RateControlStructureFlagBitsEXT { + VK_VIDEO_ENCODE_H264_RATE_CONTROL_STRUCTURE_UNKNOWN_EXT = 0, + VK_VIDEO_ENCODE_H264_RATE_CONTROL_STRUCTURE_FLAT_BIT_EXT = 0x00000001, + VK_VIDEO_ENCODE_H264_RATE_CONTROL_STRUCTURE_DYADIC_BIT_EXT = 0x00000002, + VK_VIDEO_ENCODE_H264_RATE_CONTROL_STRUCTURE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkVideoEncodeH264RateControlStructureFlagBitsEXT; +typedef VkFlags VkVideoEncodeH264RateControlStructureFlagsEXT; typedef struct VkVideoEncodeH264CapabilitiesEXT { VkStructureType sType; const void* pNext; @@ -533,9 +551,6 @@ typedef struct VkVideoEncodeH264NaluSliceEXT { const VkVideoEncodeH264DpbSlotInfoEXT* pRefFinalList0Entries; uint8_t refFinalList1EntryCount; const VkVideoEncodeH264DpbSlotInfoEXT* pRefFinalList1Entries; - uint32_t precedingNaluBytes; - uint8_t minQp; - uint8_t maxQp; } VkVideoEncodeH264NaluSliceEXT; typedef struct VkVideoEncodeH264VclFrameInfoEXT { @@ -565,6 +580,41 @@ typedef struct VkVideoEncodeH264ProfileEXT { StdVideoH264ProfileIdc stdProfileIdc; } VkVideoEncodeH264ProfileEXT; +typedef struct VkVideoEncodeH264RateControlInfoEXT { + VkStructureType sType; + const void* pNext; + uint32_t gopFrameCount; + uint32_t idrPeriod; + uint32_t consecutiveBFrameCount; + VkVideoEncodeH264RateControlStructureFlagBitsEXT rateControlStructure; +} VkVideoEncodeH264RateControlInfoEXT; + +typedef struct VkVideoEncodeH264QpEXT { + int32_t qpI; + int32_t qpP; + int32_t qpB; +} VkVideoEncodeH264QpEXT; + +typedef struct VkVideoEncodeH264FrameSizeEXT { + uint32_t frameISize; + uint32_t framePSize; + uint32_t frameBSize; +} VkVideoEncodeH264FrameSizeEXT; + +typedef struct VkVideoEncodeH264RateControlLayerInfoEXT { + VkStructureType sType; + const void* pNext; + uint8_t temporalLayerId; + VkBool32 useInitialRcQp; + VkVideoEncodeH264QpEXT initialRcQp; + VkBool32 useMinQp; + VkVideoEncodeH264QpEXT minQp; + VkBool32 useMaxQp; + VkVideoEncodeH264QpEXT maxQp; + VkBool32 useMaxFrameSize; + VkVideoEncodeH264FrameSizeEXT maxFrameSize; +} VkVideoEncodeH264RateControlLayerInfoEXT; + #define VK_EXT_video_encode_h265 1 @@ -599,6 +649,14 @@ typedef enum VkVideoEncodeH265CtbSizeFlagBitsEXT { VK_VIDEO_ENCODE_H265_CTB_SIZE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF } VkVideoEncodeH265CtbSizeFlagBitsEXT; typedef VkFlags VkVideoEncodeH265CtbSizeFlagsEXT; + +typedef enum VkVideoEncodeH265RateControlStructureFlagBitsEXT { + VK_VIDEO_ENCODE_H265_RATE_CONTROL_STRUCTURE_UNKNOWN_EXT = 0, + VK_VIDEO_ENCODE_H265_RATE_CONTROL_STRUCTURE_FLAT_BIT_EXT = 0x00000001, + VK_VIDEO_ENCODE_H265_RATE_CONTROL_STRUCTURE_DYADIC_BIT_EXT = 0x00000002, + VK_VIDEO_ENCODE_H265_RATE_CONTROL_STRUCTURE_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF +} VkVideoEncodeH265RateControlStructureFlagBitsEXT; +typedef VkFlags VkVideoEncodeH265RateControlStructureFlagsEXT; typedef struct VkVideoEncodeH265CapabilitiesEXT { VkStructureType sType; const void* pNext; @@ -693,6 +751,41 @@ typedef struct VkVideoEncodeH265ProfileEXT { StdVideoH265ProfileIdc stdProfileIdc; } VkVideoEncodeH265ProfileEXT; +typedef struct VkVideoEncodeH265RateControlInfoEXT { + VkStructureType sType; + const void* pNext; + uint32_t gopFrameCount; + uint32_t idrPeriod; + uint32_t consecutiveBFrameCount; + VkVideoEncodeH265RateControlStructureFlagBitsEXT rateControlStructure; +} VkVideoEncodeH265RateControlInfoEXT; + +typedef struct VkVideoEncodeH265QpEXT { + int32_t qpI; + int32_t qpP; + int32_t qpB; +} VkVideoEncodeH265QpEXT; + +typedef struct VkVideoEncodeH265FrameSizeEXT { + uint32_t frameISize; + uint32_t framePSize; + uint32_t frameBSize; +} VkVideoEncodeH265FrameSizeEXT; + +typedef struct VkVideoEncodeH265RateControlLayerInfoEXT { + VkStructureType sType; + const void* pNext; + uint8_t temporalId; + VkBool32 useInitialRcQp; + VkVideoEncodeH265QpEXT initialRcQp; + VkBool32 useMinQp; + VkVideoEncodeH265QpEXT minQp; + VkBool32 useMaxQp; + VkVideoEncodeH265QpEXT maxQp; + VkBool32 useMaxFrameSize; + VkVideoEncodeH265FrameSizeEXT maxFrameSize; +} VkVideoEncodeH265RateControlLayerInfoEXT; + #define VK_EXT_video_decode_h264 1 diff --git a/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_core.h b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_core.h index a2f4e771..56d4a209 100644 --- a/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_core.h +++ b/icd/api/include/khronos/sdk-1.2/vulkan/vulkan_core.h @@ -72,7 +72,7 @@ extern "C" { #define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0 // Version of this file -#define VK_HEADER_VERSION 197 +#define VK_HEADER_VERSION 201 // Complete version of this file #define VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION(0, 1, 2, VK_HEADER_VERSION) @@ -459,6 +459,12 @@ typedef enum VkStructureType { #ifdef VK_ENABLE_BETA_EXTENSIONS VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_PROFILE_EXT = 1000038008, #endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_RATE_CONTROL_INFO_EXT = 1000038009, +#endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H264_RATE_CONTROL_LAYER_INFO_EXT = 1000038010, +#endif #ifdef VK_ENABLE_BETA_EXTENSIONS VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_CAPABILITIES_EXT = 1000039000, #endif @@ -489,6 +495,12 @@ typedef enum VkStructureType { #ifdef VK_ENABLE_BETA_EXTENSIONS VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_REFERENCE_LISTS_EXT = 1000039009, #endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_RATE_CONTROL_INFO_EXT = 1000039010, +#endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_H265_RATE_CONTROL_LAYER_INFO_EXT = 1000039011, +#endif #ifdef VK_ENABLE_BETA_EXTENSIONS VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_EXT = 1000040000, #endif @@ -821,6 +833,9 @@ typedef enum VkStructureType { #endif #ifdef VK_ENABLE_BETA_EXTENSIONS VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_INFO_KHR = 1000299001, +#endif +#ifdef VK_ENABLE_BETA_EXTENSIONS + VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_LAYER_INFO_KHR = 1000299002, #endif VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DIAGNOSTICS_CONFIG_FEATURES_NV = 1000300000, VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV = 1000300001, @@ -860,6 +875,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2_KHR = 1000337009, VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR = 1000337010, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT = 1000340000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_ARM = 1000342000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RGBA10X6_FORMATS_FEATURES_EXT = 1000344000, VK_STRUCTURE_TYPE_DIRECTFB_SURFACE_CREATE_INFO_EXT = 1000346000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE = 1000351000, @@ -868,6 +884,8 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT = 1000352001, VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT = 1000352002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT = 1000353000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT = 1000355000, + VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT = 1000355001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT = 1000356000, VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3_KHR = 1000360000, VK_STRUCTURE_TYPE_IMPORT_MEMORY_ZIRCON_HANDLE_INFO_FUCHSIA = 1000364000, @@ -897,6 +915,8 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PIPELINE_COLOR_WRITE_CREATE_INFO_EXT = 1000381001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_EXT = 1000388000, VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_EXT = 1000388001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_MIN_LOD_FEATURES_EXT = 1000391000, + VK_STRUCTURE_TYPE_IMAGE_VIEW_MIN_LOD_CREATE_INFO_EXT = 1000391001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT = 1000392000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT = 1000392001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT = 1000411000, @@ -2214,8 +2234,8 @@ typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_DERIVATIVE_BIT = 0x00000004, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT = 0x00000008, VK_PIPELINE_CREATE_DISPATCH_BASE_BIT = 0x00000010, - VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = 0x00200000, - VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = 0x00400000, + VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = 0x00200000, + VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = 0x00400000, VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_ANY_HIT_SHADERS_BIT_KHR = 0x00004000, VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_CLOSEST_HIT_SHADERS_BIT_KHR = 0x00008000, VK_PIPELINE_CREATE_RAY_TRACING_NO_NULL_MISS_SHADERS_BIT_KHR = 0x00010000, @@ -2232,6 +2252,8 @@ typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT = 0x00000200, VK_PIPELINE_CREATE_RAY_TRACING_ALLOW_MOTION_BIT_NV = 0x00100000, VK_PIPELINE_CREATE_DISPATCH_BASE = VK_PIPELINE_CREATE_DISPATCH_BASE_BIT, + VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR, + VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR = VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT, VK_PIPELINE_CREATE_DISPATCH_BASE_KHR = VK_PIPELINE_CREATE_DISPATCH_BASE, VK_PIPELINE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF @@ -2286,7 +2308,18 @@ typedef VkFlags VkPipelineTessellationStateCreateFlags; typedef VkFlags VkPipelineViewportStateCreateFlags; typedef VkFlags VkPipelineRasterizationStateCreateFlags; typedef VkFlags VkPipelineMultisampleStateCreateFlags; + +typedef enum VkPipelineDepthStencilStateCreateFlagBits { + VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM = 0x00000001, + VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_ARM = 0x00000002, + VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkPipelineDepthStencilStateCreateFlagBits; typedef VkFlags VkPipelineDepthStencilStateCreateFlags; + +typedef enum VkPipelineColorBlendStateCreateFlagBits { + VK_PIPELINE_COLOR_BLEND_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_BIT_ARM = 0x00000001, + VK_PIPELINE_COLOR_BLEND_STATE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF +} VkPipelineColorBlendStateCreateFlagBits; typedef VkFlags VkPipelineColorBlendStateCreateFlags; typedef VkFlags VkPipelineDynamicStateCreateFlags; typedef VkFlags VkPipelineLayoutCreateFlags; @@ -2352,6 +2385,9 @@ typedef enum VkSubpassDescriptionFlagBits { VK_SUBPASS_DESCRIPTION_PER_VIEW_POSITION_X_ONLY_BIT_NVX = 0x00000002, VK_SUBPASS_DESCRIPTION_FRAGMENT_REGION_BIT_QCOM = 0x00000004, VK_SUBPASS_DESCRIPTION_SHADER_RESOLVE_BIT_QCOM = 0x00000008, + VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_ARM = 0x00000010, + VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM = 0x00000020, + VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_ARM = 0x00000040, VK_SUBPASS_DESCRIPTION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkSubpassDescriptionFlagBits; typedef VkFlags VkSubpassDescriptionFlags; @@ -5352,6 +5388,7 @@ typedef enum VkDriverId { VK_DRIVER_ID_MESA_TURNIP = 18, VK_DRIVER_ID_MESA_V3DV = 19, VK_DRIVER_ID_MESA_PANVK = 20, + VK_DRIVER_ID_SAMSUNG_PROPRIETARY = 21, VK_DRIVER_ID_AMD_PROPRIETARY_KHR = VK_DRIVER_ID_AMD_PROPRIETARY, VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR = VK_DRIVER_ID_AMD_OPEN_SOURCE, VK_DRIVER_ID_MESA_RADV_KHR = VK_DRIVER_ID_MESA_RADV, @@ -8643,7 +8680,7 @@ typedef struct VkFormatProperties3KHR { #define VK_KHR_maintenance4 1 -#define VK_KHR_MAINTENANCE_4_SPEC_VERSION 1 +#define VK_KHR_MAINTENANCE_4_SPEC_VERSION 2 #define VK_KHR_MAINTENANCE_4_EXTENSION_NAME "VK_KHR_maintenance4" typedef struct VkPhysicalDeviceMaintenance4FeaturesKHR { VkStructureType sType; @@ -12716,6 +12753,19 @@ typedef struct VkPhysicalDevice4444FormatsFeaturesEXT { +#define VK_ARM_rasterization_order_attachment_access 1 +#define VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_SPEC_VERSION 1 +#define VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME "VK_ARM_rasterization_order_attachment_access" +typedef struct VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM { + VkStructureType sType; + const void* pNext; + VkBool32 rasterizationOrderColorAttachmentAccess; + VkBool32 rasterizationOrderDepthAttachmentAccess; + VkBool32 rasterizationOrderStencilAttachmentAccess; +} VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM; + + + #define VK_EXT_rgba10x6_formats 1 #define VK_EXT_RGBA10X6_FORMATS_SPEC_VERSION 1 #define VK_EXT_RGBA10X6_FORMATS_EXTENSION_NAME "VK_EXT_rgba10x6_formats" @@ -12823,6 +12873,23 @@ typedef struct VkPhysicalDeviceDrmPropertiesEXT { +#define VK_EXT_depth_clip_control 1 +#define VK_EXT_DEPTH_CLIP_CONTROL_SPEC_VERSION 1 +#define VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME "VK_EXT_depth_clip_control" +typedef struct VkPhysicalDeviceDepthClipControlFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 depthClipControl; +} VkPhysicalDeviceDepthClipControlFeaturesEXT; + +typedef struct VkPipelineViewportDepthClipControlCreateInfoEXT { + VkStructureType sType; + const void* pNext; + VkBool32 negativeOneToOne; +} VkPipelineViewportDepthClipControlCreateInfoEXT; + + + #define VK_EXT_primitive_topology_list_restart 1 #define VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_SPEC_VERSION 1 #define VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME "VK_EXT_primitive_topology_list_restart" @@ -13002,6 +13069,23 @@ typedef struct VkQueueFamilyGlobalPriorityPropertiesEXT { +#define VK_EXT_image_view_min_lod 1 +#define VK_EXT_IMAGE_VIEW_MIN_LOD_SPEC_VERSION 1 +#define VK_EXT_IMAGE_VIEW_MIN_LOD_EXTENSION_NAME "VK_EXT_image_view_min_lod" +typedef struct VkPhysicalDeviceImageViewMinLodFeaturesEXT { + VkStructureType sType; + void* pNext; + VkBool32 minLod; +} VkPhysicalDeviceImageViewMinLodFeaturesEXT; + +typedef struct VkImageViewMinLodCreateInfoEXT { + VkStructureType sType; + const void* pNext; + float minLod; +} VkImageViewMinLodCreateInfoEXT; + + + #define VK_EXT_multi_draw 1 #define VK_EXT_MULTI_DRAW_SPEC_VERSION 1 #define VK_EXT_MULTI_DRAW_EXTENSION_NAME "VK_EXT_multi_draw" diff --git a/icd/api/include/khronos/vk_video/vulkan_video_codec_h264std.h b/icd/api/include/khronos/vk_video/vulkan_video_codec_h264std.h new file mode 100644 index 00000000..f2289993 --- /dev/null +++ b/icd/api/include/khronos/vk_video/vulkan_video_codec_h264std.h @@ -0,0 +1,41 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file vulkan_video_codec_h264std.h + * @brief Proxy to the real Khronos Vulkan video header. + *********************************************************************************************************************** + */ + +#ifndef __VULKAN_VIDEO_CODEC_H264STD_H_PROXY__ +#define __VULKAN_VIDEO_CODEC_H264STD_H_PROXY__ + +#if EXTERNAL_VULKAN_HEADERS +#include "vk_video/vulkan_video_codec_h264std.h" +#else +#include "sdk-1.2/vk_video/vulkan_video_codec_h264std.h" +#endif + +#endif /* __VULKAN_VIDEO_CODEC_H264STD_H_PROXY__*/ diff --git a/icd/api/include/khronos/vk_video/vulkan_video_codec_h264std_decode.h b/icd/api/include/khronos/vk_video/vulkan_video_codec_h264std_decode.h new file mode 100644 index 00000000..95bcedf8 --- /dev/null +++ b/icd/api/include/khronos/vk_video/vulkan_video_codec_h264std_decode.h @@ -0,0 +1,41 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file vulkan_video_codec_h264std_decode.h + * @brief Proxy to the real Khronos Vulkan video header. + *********************************************************************************************************************** + */ + +#ifndef __VULKAN_VIDEO_CODEC_H264STD_DECODE_H_PROXY__ +#define __VULKAN_VIDEO_CODEC_H264STD_DECODE_H_PROXY__ + +#if EXTERNAL_VULKAN_HEADERS +#include "vk_video/vulkan_video_codec_h264std_decode.h" +#else +#include "sdk-1.2/vk_video/vulkan_video_codec_h264std_decode.h" +#endif + +#endif /* __vulkan_video_codec_h264std_decode_H_PROXY__*/ diff --git a/icd/api/include/khronos/vk_video/vulkan_video_codec_h264std_encode.h b/icd/api/include/khronos/vk_video/vulkan_video_codec_h264std_encode.h new file mode 100644 index 00000000..c1c185ea --- /dev/null +++ b/icd/api/include/khronos/vk_video/vulkan_video_codec_h264std_encode.h @@ -0,0 +1,41 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file vulkan_video_codec_h264std_encode.h + * @brief Proxy to the real Khronos Vulkan video header. + *********************************************************************************************************************** + */ + +#ifndef __VULKAN_VIDEO_CODEC_H264STD_ENCODE_H_PROXY__ +#define __VULKAN_VIDEO_CODEC_H264STD_ENCODE_H_PROXY__ + +#if EXTERNAL_VULKAN_HEADERS +#include "vk_video/vulkan_video_codec_h264std_encode.h" +#else +#include "sdk-1.2/vk_video/vulkan_video_codec_h264std_encode.h" +#endif + +#endif /* __VULKAN_VIDEO_CODEC_H264STD_ENCODE_H_PROXY__*/ diff --git a/icd/api/include/khronos/vk_video/vulkan_video_codec_h265std.h b/icd/api/include/khronos/vk_video/vulkan_video_codec_h265std.h new file mode 100644 index 00000000..e9b20eee --- /dev/null +++ b/icd/api/include/khronos/vk_video/vulkan_video_codec_h265std.h @@ -0,0 +1,41 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file vulkan_video_codec_h265std.h + * @brief Proxy to the real Khronos Vulkan video header. + *********************************************************************************************************************** + */ + +#ifndef __VULKAN_VIDEO_CODEC_H265STD_H_PROXY__ +#define __VULKAN_VIDEO_CODEC_H265STD_H_PROXY__ + +#if EXTERNAL_VULKAN_HEADERS +#include "vk_video/vulkan_video_codec_h265std.h" +#else +#include "sdk-1.2/vk_video/vulkan_video_codec_h265std.h" +#endif + +#endif /* __VULKAN_VIDEO_CODEC_H265STD_H_PROXY__*/ diff --git a/icd/api/include/khronos/vk_video/vulkan_video_codec_h265std_decode.h b/icd/api/include/khronos/vk_video/vulkan_video_codec_h265std_decode.h new file mode 100644 index 00000000..abbc5dc5 --- /dev/null +++ b/icd/api/include/khronos/vk_video/vulkan_video_codec_h265std_decode.h @@ -0,0 +1,41 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file vulkan_video_codec_h265std_decode.h + * @brief Proxy to the real Khronos Vulkan video header. + *********************************************************************************************************************** + */ + +#ifndef __VULKAN_VIDEO_CODEC_H265STD_DECODE_H_PROXY__ +#define __VULKAN_VIDEO_CODEC_H265STD_DECODE_H_PROXY__ + +#if EXTERNAL_VULKAN_HEADERS +#include "vk_video/vulkan_video_codec_h265std_decode.h" +#else +#include "sdk-1.2/vk_video/vulkan_video_codec_h265std_decode.h" +#endif + +#endif /* __VULKAN_VIDEO_CODEC_H265STD_DECODE_H_PROXY__*/ diff --git a/icd/api/include/khronos/vk_video/vulkan_video_codec_h265std_encode.h b/icd/api/include/khronos/vk_video/vulkan_video_codec_h265std_encode.h new file mode 100644 index 00000000..78a3c662 --- /dev/null +++ b/icd/api/include/khronos/vk_video/vulkan_video_codec_h265std_encode.h @@ -0,0 +1,41 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file vulkan_video_codec_h265std_encode.h + * @brief Proxy to the real Khronos Vulkan video header. + *********************************************************************************************************************** + */ + +#ifndef __VULKAN_VIDEO_CODEC_H265STD_ENCODE_H_PROXY__ +#define __VULKAN_VIDEO_CODEC_H265STD_ENCODE_H_PROXY__ + +#if EXTERNAL_VULKAN_HEADERS +#include "vk_video/vulkan_video_codec_h265std_encode.h" +#else +#include "sdk-1.2/vk_video/vulkan_video_codec_h265std_encode.h" +#endif + +#endif /* __VULKAN_VIDEO_CODEC_H265STD_ENCODE_H_PROXY__*/ diff --git a/icd/api/include/khronos/vk_video/vulkan_video_codecs_common.h b/icd/api/include/khronos/vk_video/vulkan_video_codecs_common.h new file mode 100644 index 00000000..b884c16f --- /dev/null +++ b/icd/api/include/khronos/vk_video/vulkan_video_codecs_common.h @@ -0,0 +1,41 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2021 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +/** + *********************************************************************************************************************** + * @file vulkan_video_codecs_common.h + * @brief Proxy to the real Khronos Vulkan video header. + *********************************************************************************************************************** + */ + +#ifndef __VULKAN_VIDEO_CODECS_COMMON_H_PROXY__ +#define __VULKAN_VIDEO_CODECS_COMMON_H_PROXY__ + +#if EXTERNAL_VULKAN_HEADERS +#include "vk_video/vulkan_video_codecs_common.h" +#else +#include "sdk-1.2/vk_video/vulkan_video_codecs_common.h" +#endif + +#endif /* __VULKAN_VIDEO_CODECS_COMMON_H_PROXY__*/ diff --git a/icd/api/include/render_state_cache.h b/icd/api/include/render_state_cache.h index a72f974a..0c313bc0 100644 --- a/icd/api/include/render_state_cache.h +++ b/icd/api/include/render_state_cache.h @@ -359,7 +359,11 @@ class RenderStateCache Util::HashMap m_fragmentShadingRate; + PalAllocator, + Util::JenkinsHashFunc, + Util::DefaultEqualFunc, + Util::HashAllocator, + 1024> m_fragmentShadingRate; uint32_t m_fragmentShadingRateNextId; }; diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h index 190c4da5..877c8fb6 100644 --- a/icd/api/include/vk_cmdbuffer.h +++ b/icd/api/include/vk_cmdbuffer.h @@ -1009,20 +1009,21 @@ class CmdBuffer uint32_t index, uint32_t stride) const; + VK_FORCEINLINE uint32_t NumPalDevices() const + { return m_numPalDevices; } + uint32_t NumDeviceEvents(uint32_t numEvents) const - { - return m_numPalDevices * numEvents; - } + { return m_numPalDevices * numEvents; } #if VK_ENABLE_DEBUG_BARRIERS - void DbgBarrierPreCmd(uint32_t cmd) + void DbgBarrierPreCmd(uint64_t cmd) { if (m_dbgBarrierPreCmdMask & (cmd)) { DbgCmdBarrier(true); } } - void DbgBarrierPostCmd(uint32_t cmd) + void DbgBarrierPostCmd(uint64_t cmd) { if (m_dbgBarrierPostCmdMask & (cmd)) { @@ -1030,8 +1031,8 @@ class CmdBuffer } } #else - void DbgBarrierPreCmd(uint32_t cmd) {} - void DbgBarrierPostCmd(uint32_t cmd) {} + void DbgBarrierPreCmd(uint64_t cmd) {} + void DbgBarrierPostCmd(uint64_t cmd) {} #endif SqttCmdBufferState* GetSqttState() @@ -1311,8 +1312,8 @@ class CmdBuffer TransformFeedbackState* m_pTransformFeedbackState; #if VK_ENABLE_DEBUG_BARRIERS - uint32_t m_dbgBarrierPreCmdMask; - uint32_t m_dbgBarrierPostCmdMask; + uint64_t m_dbgBarrierPreCmdMask; + uint64_t m_dbgBarrierPostCmdMask; #endif Util::Vector m_palDepthStencilState; diff --git a/icd/api/include/vk_conv.h b/icd/api/include/vk_conv.h index 52f5b5dc..bc7cd741 100644 --- a/icd/api/include/vk_conv.h +++ b/icd/api/include/vk_conv.h @@ -998,8 +998,8 @@ inline Pal::SwizzledFormat RemapFormatComponents( uint32_t tilingIdx = (imageTiling == Pal::ImageTiling::Linear) ? 0 : 1; uint32_t x8MmformatIdx = static_cast(Pal::ChNumFormat::X8_MM_Unorm); uint32_t x8Y8MmformatIdx = static_cast(Pal::ChNumFormat::X8Y8_MM_Unorm); - uint32_t x16MmformatIdx = static_cast(Pal::ChNumFormat::X16_MM_Unorm); - uint32_t x16Y16MmformatIdx = static_cast(Pal::ChNumFormat::X16Y16_MM_Unorm); + uint32_t x16MmformatIdx = static_cast(Pal::ChNumFormat::X16_MM10_Unorm); + uint32_t x16Y16MmformatIdx = static_cast(Pal::ChNumFormat::X16Y16_MM10_Unorm); // As spec says, the remapping must be identity for any VkImageView used with a combined image sampler that // enables sampler YCbCr conversion, thus we could totally ignore the setting in VkComponentMapping. @@ -1116,7 +1116,7 @@ inline Pal::SwizzledFormat RemapFormatComponents( if (subresRange.startSubres.plane == 0) { newFormat.format = (formatProperties.features[x16MmformatIdx][tilingIdx] != 0) ? - Pal::ChNumFormat::X16_MM_Unorm : Pal::ChNumFormat::X16_Unorm; + Pal::ChNumFormat::X16_MM10_Unorm : Pal::ChNumFormat::X16_Unorm; newFormat.swizzle.r = ChannelSwizzle::Zero; newFormat.swizzle.g = ChannelSwizzle::X; newFormat.swizzle.b = ChannelSwizzle::Zero; @@ -1125,7 +1125,7 @@ inline Pal::SwizzledFormat RemapFormatComponents( else if (subresRange.startSubres.plane == 1) { newFormat.format = (formatProperties.features[x16Y16MmformatIdx][tilingIdx] != 0) ? - Pal::ChNumFormat::X16Y16_MM_Unorm : Pal::ChNumFormat::X16Y16_Unorm; + Pal::ChNumFormat::X16Y16_MM10_Unorm : Pal::ChNumFormat::X16Y16_Unorm; newFormat.swizzle.r = ChannelSwizzle::Y; newFormat.swizzle.g = ChannelSwizzle::Zero; newFormat.swizzle.b = ChannelSwizzle::X; @@ -3244,6 +3244,40 @@ inline Pal::QueuePriority VkToPalGlobalPriority( } // ===================================================================================================================== +inline Pal::QueuePrioritySupport VkToPalGlobaPrioritySupport( + VkQueueGlobalPriorityEXT vkPriority) +{ + Pal::QueuePrioritySupport palPrioritySupport = Pal::QueuePrioritySupport::SupportQueuePriorityNormal; + switch (static_cast(vkPriority)) + { + case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT: + palPrioritySupport = Pal::QueuePrioritySupport::SupportQueuePriorityIdle; + break; + case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT: + palPrioritySupport = Pal::QueuePrioritySupport::SupportQueuePriorityNormal; + break; + case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT: + palPrioritySupport = Pal::QueuePrioritySupport::SupportQueuePriorityHigh; + break; + case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT: + palPrioritySupport = Pal::QueuePrioritySupport::SupportQueuePriorityRealtime; + break; + default: + break; + } + + return palPrioritySupport; +} + +// ===================================================================================================================== +// Is the queue suitable for normal use (i.e. non-exclusive and no elevated priority). +template +static bool IsNormalQueue(const T& engineCapabilities) +{ + return ((engineCapabilities.flags.exclusive == 0) && + ((engineCapabilities.queuePrioritySupport & Pal::QueuePrioritySupport::SupportQueuePriorityNormal) != 0)); +} + inline Pal::ResolveMode VkToPalResolveMode( VkResolveModeFlagBits vkResolveMode) { @@ -3480,6 +3514,46 @@ inline uint32_t VkToVkgcShaderStageMask(VkShaderStageFlags vkShaderStageFlags) return vkgcShaderMask; } +// ===================================================================================================================== +inline VkShaderStageFlags VkgcToVkShaderStageMask(uint32_t vkgcShaderStageFlags) +{ + VkShaderStageFlags vkShaderMask = 0; + uint32_t expectedShaderStageCount = 6; + + if ((vkgcShaderStageFlags & Vkgc::ShaderStageVertexBit) != 0) + { + vkShaderMask |= VK_SHADER_STAGE_VERTEX_BIT; + } + + if ((vkgcShaderStageFlags & Vkgc::ShaderStageTessControlBit) != 0) + { + vkShaderMask |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; + } + + if ((vkgcShaderStageFlags & Vkgc::ShaderStageTessEvalBit) != 0) + { + vkShaderMask |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; + } + + if ((vkgcShaderStageFlags & Vkgc::ShaderStageGeometryBit) != 0) + { + vkShaderMask |= VK_SHADER_STAGE_GEOMETRY_BIT; + } + + if ((vkgcShaderStageFlags & Vkgc::ShaderStageFragmentBit) != 0) + { + vkShaderMask |= VK_SHADER_STAGE_FRAGMENT_BIT; + } + + if ((vkgcShaderStageFlags & Vkgc::ShaderStageComputeBit) != 0) + { + vkShaderMask |= VK_SHADER_STAGE_COMPUTE_BIT; + } + + VK_ASSERT(expectedShaderStageCount == Vkgc::ShaderStageCount); // Need update this function if mismatch + return vkShaderMask; +} + // ===================================================================================================================== struct UberFetchShaderFormatInfo { diff --git a/icd/api/include/vk_device.h b/icd/api/include/vk_device.h index 04c94673..1e7664bc 100644 --- a/icd/api/include/vk_device.h +++ b/icd/api/include/vk_device.h @@ -363,13 +363,8 @@ class Device DispatchableQueue** pQueues, const DeviceExtensions::Enabled& enabled, const VkMemoryOverallocationBehaviorAMD overallocationBehavior, - const bool deviceCoherentMemoryEnabled, - const bool attachmentFragmentShadingRate, - bool scalarBlockLayoutEnabled, - const ExtendedRobustness& extendedRobustnessEnabled, bool bufferDeviceAddressMultiDeviceEnabled, - bool pageableDeviceLocalMemory, - bool maintenance4Enabled); + bool pageableDeviceLocalMemory); void InitDispatchTable(); @@ -736,7 +731,8 @@ class Device const VkPhysicalDeviceFeatures* pFeatures, bool useComputeAsTransferQueue, uint32 privateDataSlotRequestCount, - size_t privateDataSize); + size_t privateDataSize, + const DeviceFeatures& deviceFeatures); VkResult CreateInternalPipelines(); @@ -792,7 +788,7 @@ class Device Util::Mutex m_memoryMutex; // Shared mutex used occasionally by memory objects // The states of m_enabledFeatures are provided by application - DeviceFeatures m_enabledFeatures; + const DeviceFeatures m_enabledFeatures; // The count of allocations that has been created from the logical device. uint32_t m_allocatedCount; diff --git a/icd/api/include/vk_extensions.h b/icd/api/include/vk_extensions.h index 21687a37..d047fd43 100644 --- a/icd/api/include/vk_extensions.h +++ b/icd/api/include/vk_extensions.h @@ -317,6 +317,7 @@ class DeviceExtensions final : public Extensions EXT_CONSERVATIVE_RASTERIZATION, EXT_CUSTOM_BORDER_COLOR, EXT_DEBUG_MARKER, + EXT_DEPTH_CLIP_CONTROL, EXT_DEPTH_CLIP_ENABLE, EXT_DEPTH_RANGE_UNRESTRICTED, EXT_DESCRIPTOR_INDEXING, @@ -325,6 +326,7 @@ class DeviceExtensions final : public Extensions EXT_EXTERNAL_MEMORY_DMA_BUF, EXT_EXTERNAL_MEMORY_HOST, EXT_GLOBAL_PRIORITY, + EXT_GLOBAL_PRIORITY_QUERY, EXT_HDR_METADATA, EXT_HOST_QUERY_RESET, EXT_IMAGE_ROBUSTNESS, @@ -341,6 +343,7 @@ class DeviceExtensions final : public Extensions EXT_POST_DEPTH_COVERAGE, EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART, EXT_PRIVATE_DATA, + EXT_PROVOKING_VERTEX, EXT_QUEUE_FAMILY_FOREIGN, EXT_ROBUSTNESS2, EXT_SAMPLER_FILTER_MINMAX, diff --git a/icd/api/include/vk_physical_device.h b/icd/api/include/vk_physical_device.h index be8c16c1..59371b8c 100644 --- a/icd/api/include/vk_physical_device.h +++ b/icd/api/include/vk_physical_device.h @@ -295,6 +295,68 @@ class PhysicalDevice VkSubgroupFeatureFlags* pSupportedOperations, VkBool32* pQuadOperationsInAllStages) const; + void GetPhysicalDeviceSubgroupSizeControlProperties( + uint32_t* pMinSubgroupSize, + uint32_t* pMaxSubgroupSize, + uint32_t* pMaxComputeWorkgroupSubgroups, + VkShaderStageFlags* pQuadOperationsInAllStages) const; + + void GetPhysicalDeviceUniformBlockProperties( + uint32_t* pMaxInlineUniformBlockSize, + uint32_t* pMaxPerStageDescriptorInlineUniformBlocks, + uint32_t* pMaxPerStageDescriptorUpdateAfterBindInlineUniformBlocks, + uint32_t* pMaxDescriptorSetInlineUniformBlocks, + uint32_t* pMaxDescriptorSetUpdateAfterBindInlineUniformBlocks) const; + + void GetPhysicalDeviceDotProduct8Properties( + VkBool32* pIntegerDotProduct8BitUnsignedAccelerated, + VkBool32* pIntegerDotProduct8BitSignedAccelerated, + VkBool32* pIntegerDotProduct8BitMixedSignednessAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating8BitUnsignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating8BitSignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated) const; + + void GetPhysicalDeviceDotProduct4x8Properties( + VkBool32* pIntegerDotProduct4x8BitPackedUnsignedAccelerated, + VkBool32* pIntegerDotProduct4x8BitPackedSignedAccelerated, + VkBool32* pIntegerDotProduct4x8BitPackedMixedSignednessAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated) const; + + void GetPhysicalDeviceDotProduct16Properties( + VkBool32* pIntegerDotProduct16BitUnsignedAccelerated, + VkBool32* pIntegerDotProduct16BitSignedAccelerated, + VkBool32* pIntegerDotProduct16BitMixedSignednessAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating16BitUnsignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating16BitSignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated) const; + + void GetPhysicalDeviceDotProduct32Properties( + VkBool32* pIntegerDotProduct32BitUnsignedAccelerated, + VkBool32* pIntegerDotProduct32BitSignedAccelerated, + VkBool32* pIntegerDotProduct32BitMixedSignednessAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating32BitUnsignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating32BitSignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated) const; + + void GetPhysicalDeviceDotProduct64Properties( + VkBool32* pIntegerDotProduct64BitUnsignedAccelerated, + VkBool32* pIntegerDotProduct64BitSignedAccelerated, + VkBool32* pIntegerDotProduct64BitMixedSignednessAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating64BitUnsignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating64BitSignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated) const; + + void GetPhysicalDeviceTexelBufferAlignmentProperties( + VkDeviceSize* pStorageTexelBufferOffsetAlignmentBytes, + VkBool32* pStorageTexelBufferOffsetSingleTexelAlignment, + VkDeviceSize* pUniformTexelBufferOffsetAlignmentBytes, + VkBool32* pUniformTexelBufferOffsetSingleTexelAlignment) const; + + void GetDevicePropertiesMaxBufferSize( + VkDeviceSize* pMaxBufferSize) const; + void GetPhysicalDeviceDriverProperties( VkDriverId* pDriverID, char* pDriverName, diff --git a/icd/api/include/vk_utils.h b/icd/api/include/vk_utils.h index 761831cf..42108e6f 100644 --- a/icd/api/include/vk_utils.h +++ b/icd/api/include/vk_utils.h @@ -358,6 +358,114 @@ class ArrayView size_t m_stride; }; +// ===================================================================================================================== +// PlacementHelper is a utility to lay out objects in a block of memory. +// +// Example usage: +// +// int* pMyInts = nullptr; +// float* pMyFloats = nullptr; +// IObject* pMyObject = nullptr; +// +// auto placement = PlacementHelper<3>( // number of elements +// nullptr, // placement base pointer, use nullptr to determine size first +// PlacementElement {&pMyInts, 6}, // place 6 integers at pMyInts +// PlacementElement {&pMyFloats, 4}, // place 4 floats at pMyFloats +// PlacementElement{&pMyObject, 2, 64}); // place a block of memory with 2 * 64 size (see note below) +// +// auto pMemory = malloc(placement.SizeOf()); // allocate memory of required size +// +// placement.FixupPtrs(pMemory); // assign correct values to pMy* pointers +// +// NOTE: If an explicit size is given, as in the pMyObject example, remember that the pointer can no longer be treated +// as an array of IObject. It's just a pointer to a block of memory with IObject* type and 128 bytes of storage. +// If the assumption is that a second IObject is placed at the offset 64, then that pointer must be computed +// manually, e.g. by the use of Util::VoidPtrInc(). +// +// If the correct alignment is important, make sure that the pointer passed to FixupPtrs() is itself aligned +// to the largest required alignment among the placed objects. +// +template +struct PlacementElement +{ + using Type = T; + + T** outPtr = nullptr; // destination pointer where the objects are placed + size_t count = 1; // number of objects to place at the pointer + size_t size = 0; // optional object size, otherwise 0 means sizeof(T) + size_t alignment = 0; // optional object alignment (must be power of 2), otherwise 0 means alignof(T) +}; + +template +class PlacementHelper +{ +public: + template + PlacementHelper(void* basePtr, Element... elements) + { + // sizeof ... (type) counts the number of types in the parameter pack (not byte size). + static_assert(ElementCount == (sizeof ... (Element)), "Wrong number of elements"); + + // Start unpacking the arguments recursively. + ExpandCtorArguments(basePtr, 0, 0, elements...); + } + + size_t SizeOf() const + { + return m_totalSize; + } + + void FixupPtrs(void* basePtr) const + { + // If the layout has been done with a basePtr == nullptr, the pointers will be offsets from zero. + // Here we can move them relative to the correct memory base offset. + // This is typically needed if we allocate memory after determining the layout and size requirements. + for (uint32_t ndx = 0; ndx < ElementCount; ++ndx) + { + *m_outPtrs[ndx] = Util::VoidPtrInc(basePtr, reinterpret_cast(*m_outPtrs[ndx])); + } + } + +private: + template + void ExpandCtorArguments(void* basePtr, size_t idx, size_t offset, FirstElement head, Element... tail) + { + using HeadType = typename FirstElement::Type; + + // basePtr *may* be a nullptr. + // head.count *may* be 0 + VK_ASSERT(head.outPtr != nullptr); + + // If no explicit size is given, we derive the size from the element type. Keep track of the total size. + size_t size = (head.size != 0) ? head.size : sizeof(HeadType); + size *= head.count; + m_totalSize += size; + + // Ensure the placement offset is aligned for this type + const size_t alignment = (head.alignment != 0) ? head.alignment : alignof(HeadType); + const size_t offsetMisalignment = Util::Pow2Align(offset, alignment) - offset; + + m_totalSize += offsetMisalignment; + offset += offsetMisalignment; + + // Save the output pointer in case we need to modify it later. + // Then write the output pointer anyway (we need to save the offsets somewhere at least). + m_outPtrs[idx] = reinterpret_cast(head.outPtr); + *head.outPtr = static_cast(Util::VoidPtrInc(basePtr, offset)); + + // Process the next element. + ExpandCtorArguments(basePtr, idx + 1, offset + size, tail...); + } + + void ExpandCtorArguments(const void* basePtr, size_t idx, size_t offset) + { + // All elements have been already consumed. + } + + size_t m_totalSize = 0; + void** m_outPtrs[ElementCount] = {}; +}; + template constexpr T StaticMax(T a, T b) { return (a > b) ? a : b; }; diff --git a/icd/api/pipeline_compiler.cpp b/icd/api/pipeline_compiler.cpp index 2498f062..24f3d528 100644 --- a/icd/api/pipeline_compiler.cpp +++ b/icd/api/pipeline_compiler.cpp @@ -245,17 +245,20 @@ VkResult PipelineCompiler::Initialize() result = m_compilerSolutionLlpc.Initialize(m_gfxIp, info.gfxLevel, pCacheAdapter); } - if (result == VK_SUCCESS) + if (settings.enableUberFetchShader || settings.enableEarlyCompile) { - result = PalToVkResult(m_shaderModuleHandleMap.Init()); - } + if (result == VK_SUCCESS) + { + result = PalToVkResult(m_shaderModuleHandleMap.Init()); + } - if (result == VK_SUCCESS) - { - if (settings.enableUberFetchShader || settings.enableEarlyCompile) + if (result == VK_SUCCESS) { - m_uberFetchShaderInfoFormatMap.Init(); + result = PalToVkResult(m_uberFetchShaderInfoFormatMap.Init()); + } + if (result == VK_SUCCESS) + { result = InitializeUberFetchShaderFormatTable(m_pPhysicalDevice, &m_uberFetchShaderInfoFormatMap); } } @@ -277,6 +280,7 @@ void PipelineCompiler::Destroy() DestroyPipelineBinaryCache(); + if (m_pPhysicalDevice->GetRuntimeSettings().enableEarlyCompile) { Util::MutexAuto mutexLock(&m_shaderModuleCacheLock); for (auto it = m_shaderModuleHandleMap.Begin(); it.Get() != nullptr; it.Next()) @@ -392,7 +396,7 @@ VkResult PipelineCompiler::LoadShaderModuleFromCache( Util::MetroHash::Hash& uniqueHash, ShaderModuleHandle* pShaderModule) { - bool supportModuleCache = true; + bool supportModuleCache = m_pPhysicalDevice->GetRuntimeSettings().enableEarlyCompile; #if ICD_X86_BUILD supportModuleCache = false; @@ -453,7 +457,7 @@ void PipelineCompiler::StoreShaderModuleToCache( VK_ASSERT(pShaderModule->pRefCount == nullptr); - bool supportModuleCache = true; + bool supportModuleCache = m_pPhysicalDevice->GetRuntimeSettings().enableEarlyCompile; #if ICD_X86_BUILD supportModuleCache = false; @@ -2286,6 +2290,8 @@ void PipelineCompiler::ApplyPipelineOptions( pOptions->shadowDescriptorTablePtrHigh = static_cast(info.gpuMemoryProperties.shadowDescTableVaStart >> 32); + pOptions->pageMigrationEnabled = info.gpuMemoryProperties.flags.pageMigrationEnabled; + // Apply runtime settings from device const auto& settings = m_pPhysicalDevice->GetRuntimeSettings(); pOptions->enableRelocatableShaderElf = settings.enableRelocatableShaders; diff --git a/icd/api/strings/entry_points.txt b/icd/api/strings/entry_points.txt index bfdf41cb..5a387680 100644 --- a/icd/api/strings/entry_points.txt +++ b/icd/api/strings/entry_points.txt @@ -376,7 +376,7 @@ vkSignalSemaphoreKHR @device @dext(KHR_timeli vkCmdBeginConditionalRenderingEXT @device @dext(EXT_conditional_rendering) vkCmdEndConditionalRenderingEXT @device @dext(EXT_conditional_rendering) -vkGetPhysicalDeviceToolPropertiesEXT @device @dext(EXT_tooling_info) +vkGetPhysicalDeviceToolPropertiesEXT @instance @dext(EXT_tooling_info) vkCmdSetEvent2KHR @device @dext(KHR_synchronization2) vkCmdResetEvent2KHR @device @dext(KHR_synchronization2) diff --git a/icd/api/strings/extensions.txt b/icd/api/strings/extensions.txt index e1ab18fa..b02dbc6a 100644 --- a/icd/api/strings/extensions.txt +++ b/icd/api/strings/extensions.txt @@ -70,6 +70,7 @@ VK_AMD_shader_fragment_mask VK_EXT_sample_locations VK_EXT_sampler_filter_minmax VK_EXT_global_priority +VK_EXT_global_priority_query VK_AMD_buffer_marker VK_EXT_external_memory_host VK_EXT_depth_clip_enable @@ -142,6 +143,7 @@ VK_EXT_border_color_swizzle VK_EXT_color_write_enable VK_KHR_shader_terminate_invocation VK_KHR_synchronization2 +VK_EXT_depth_clip_control VK_EXT_primitive_topology_list_restart VK_KHR_dynamic_rendering VK_KHR_format_feature_flags2 @@ -152,3 +154,4 @@ VK_KHR_zero_initialize_workgroup_memory VK_EXT_load_store_op_none VK_KHR_maintenance4 VK_EXT_index_type_uint8 +VK_EXT_provoking_vertex diff --git a/icd/api/vk_buffer.cpp b/icd/api/vk_buffer.cpp index bfb7c358..8c4dd222 100644 --- a/icd/api/vk_buffer.cpp +++ b/icd/api/vk_buffer.cpp @@ -456,8 +456,7 @@ void Buffer::GetBufferMemoryRequirements( VK_ASSERT(pMemoryRequirements->memoryTypeBits != 0); } - - if (pBufferFlags->externallyShareable) + else if (pBufferFlags->externallyShareable) { pMemoryRequirements->memoryTypeBits &= pDevice->GetMemoryTypeMaskForExternalSharing(); } diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp index dc9d69aa..352dc77d 100644 --- a/icd/api/vk_cmdbuffer.cpp +++ b/icd/api/vk_cmdbuffer.cpp @@ -4165,8 +4165,29 @@ void CmdBuffer::LoadOpClearDepthStencil( const VkRenderingAttachmentInfoKHR* pDepthAttachmentInfo = pRenderingInfo->pDepthAttachment; const VkRenderingAttachmentInfoKHR* pStencilAttachmentInfo = pRenderingInfo->pStencilAttachment; + if ((pStencilAttachmentInfo != nullptr) && + (pStencilAttachmentInfo->imageView != VK_NULL_HANDLE)) + { + const ImageView* const pStencilImageView = ImageView::ObjectFromHandle(pStencilAttachmentInfo->imageView); + + pDepthStencilImage = pStencilImageView->GetImage(); + + GetImageLayout( + pStencilAttachmentInfo->imageView, + pStencilAttachmentInfo->imageLayout, + VK_IMAGE_ASPECT_STENCIL_BIT, + &subresRange, + &stencilLayout); + + if (pStencilAttachmentInfo->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) + { + clearSubresRanges.PushBack(subresRange); + clearStencil = pStencilAttachmentInfo->clearValue.depthStencil.stencil; + } + } + if ((pDepthAttachmentInfo != nullptr) && - (pDepthAttachmentInfo->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)) + (pDepthAttachmentInfo->imageView != VK_NULL_HANDLE)) { const ImageView* const pDepthImageView = ImageView::ObjectFromHandle(pDepthAttachmentInfo->imageView); @@ -4179,28 +4200,15 @@ void CmdBuffer::LoadOpClearDepthStencil( &subresRange, &depthLayout); - clearSubresRanges.PushBack(subresRange); - - clearDepth = pDepthAttachmentInfo->clearValue.depthStencil.depth; + if (pDepthAttachmentInfo->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) + { + clearSubresRanges.PushBack(subresRange); + clearDepth = pDepthAttachmentInfo->clearValue.depthStencil.depth; + } } - - if ((pStencilAttachmentInfo != nullptr) && - (pStencilAttachmentInfo->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)) + else { - const ImageView* const pStencilImageView = ImageView::ObjectFromHandle(pStencilAttachmentInfo->imageView); - - pDepthStencilImage = pStencilImageView->GetImage(); - - GetImageLayout( - pStencilAttachmentInfo->imageView, - pStencilAttachmentInfo->imageLayout, - VK_IMAGE_ASPECT_STENCIL_BIT, - &subresRange, - &stencilLayout); - - clearSubresRanges.PushBack(subresRange); - - clearStencil = pStencilAttachmentInfo->clearValue.depthStencil.stencil; + depthLayout = stencilLayout; } if (pDepthStencilImage != nullptr) diff --git a/icd/api/vk_compute_pipeline.cpp b/icd/api/vk_compute_pipeline.cpp index a3e914a8..c3e70979 100644 --- a/icd/api/vk_compute_pipeline.cpp +++ b/icd/api/vk_compute_pipeline.cpp @@ -90,8 +90,6 @@ void ComputePipeline::ConvertComputePipelineInfo( pOutInfo->pLayout = PipelineLayout::ObjectFromHandle(pIn->layout); } - pOutInfo->staticStateMask = 0; - } // ===================================================================================================================== diff --git a/icd/api/vk_descriptor_set_layout.cpp b/icd/api/vk_descriptor_set_layout.cpp index 9ae24b68..994f4652 100644 --- a/icd/api/vk_descriptor_set_layout.cpp +++ b/icd/api/vk_descriptor_set_layout.cpp @@ -513,9 +513,7 @@ VkResult DescriptorSetLayout::ConvertCreateInfo( for (uint32 inIndex = 0; inIndex < pIn->bindingCount; ++inIndex) { const VkDescriptorSetLayoutBinding & currentBinding = pIn->pBindings[inIndex]; - { - pOutBindings[currentBinding.binding].info = currentBinding; - } + pOutBindings[currentBinding.binding].info = currentBinding; if (currentBinding.descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) { @@ -625,9 +623,7 @@ VkResult DescriptorSetLayout::Create( } } - { - bindingCount = Util::Max(bindingCount, desc.binding + 1); - } + bindingCount = Util::Max(bindingCount, desc.binding + 1); } const size_t bindingInfoAuxSize = bindingCount * sizeof(BindingInfo); @@ -736,7 +732,6 @@ void DescriptorSetLayout::Merge( if ((activeStages != 0) && (refBinding.info.descriptorCount > 0)) { uint32_t bindingIdx = refBinding.info.binding; - BindingInfo& mergedBinding = pBindingInfo[bindingIdx]; if (mergedBinding.info.stageFlags == 0) diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index 9be35db1..3ad5f4de 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -238,7 +238,8 @@ Device::Device( const VkPhysicalDeviceFeatures* pFeatures, bool useComputeAsTransferQueue, uint32 privateDataSlotRequestCount, - size_t privateDataSize) + size_t privateDataSize, + const DeviceFeatures& deviceFeatures) : m_pInstance(pPhysicalDevices[DefaultDeviceIndex]->VkInstance()), m_settings(pPhysicalDevices[DefaultDeviceIndex]->GetRuntimeSettings()), @@ -256,6 +257,7 @@ Device::Device( m_pSqttMgr(nullptr), m_pAppOptLayer(nullptr), m_pBarrierFilterLayer(nullptr), + m_enabledFeatures(deviceFeatures), #if VKI_GPU_DECOMPRESS m_pGpuDecoderLayer(nullptr), #endif @@ -269,8 +271,6 @@ Device::Device( memset(m_pQueues, 0, sizeof(m_pQueues)); - m_enabledFeatures.u32All = 0; - m_maxVrsShadingRate = {0, 0}; for (uint32_t deviceIdx = 0; deviceIdx < palDeviceCount; ++deviceIdx) @@ -288,41 +288,6 @@ Device::Device( } - if (pFeatures != nullptr) - { - if (pFeatures->robustBufferAccess) - { - m_enabledFeatures.robustBufferAccess = true; - } - - if (pFeatures->sparseBinding) - { - m_enabledFeatures.sparseBinding = true; - } - } - - if (m_settings.robustBufferAccess == FeatureForceEnable) - { - m_enabledFeatures.robustBufferAccess = true; - } - else if (m_settings.robustBufferAccess == FeatureForceDisable) - { - m_enabledFeatures.robustBufferAccess = false; - } - - if (m_settings.enableRelocatableShaders) - { - m_enabledFeatures.mustWriteImmutableSamplers = true; - } - else - { - m_enabledFeatures.mustWriteImmutableSamplers = false; - } - - m_enabledFeatures.scalarBlockLayout = false; - - m_enabledFeatures.attachmentFragmentShadingRate = false; - m_allocatedCount = 0; m_maxAllocations = pPhysicalDevices[DefaultDeviceIndex]->GetLimits().maxMemoryAllocationCount; @@ -501,17 +466,15 @@ VkResult Device::Create( Instance* pInstance = pPhysicalDevice->VkInstance(); const VkPhysicalDeviceFeatures* pEnabledFeatures = pCreateInfo->pEnabledFeatures; VkMemoryOverallocationBehaviorAMD overallocationBehavior = VK_MEMORY_OVERALLOCATION_BEHAVIOR_DEFAULT_AMD; - bool deviceCoherentMemoryEnabled = false; - bool scalarBlockLayoutEnabled = false; - ExtendedRobustness extendedRobustnessEnabled = { false, false, false }; - bool attachmentFragmentShadingRate = false; bool pageableDeviceLocalMemory = false; + DeviceFeatures deviceFeatures = {}; uint32 privateDataSlotRequestCount = 0; bool privateDataEnabled = false; size_t privateDataSize = 0; bool bufferDeviceAddressMultiDeviceEnabled = false; bool maintenance4Enabled = false; + bool globalPriorityQueryEnabled = false; const VkStructHeader* pHeader = nullptr; @@ -567,7 +530,7 @@ VkResult Device::Create( { if (reinterpret_cast(pHeader)->scalarBlockLayout) { - scalarBlockLayoutEnabled = true; + deviceFeatures.scalarBlockLayout = true; } break; @@ -614,7 +577,7 @@ VkResult Device::Create( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: { - deviceCoherentMemoryEnabled = enabledDeviceExtensions.IsExtensionEnabled( + deviceFeatures.deviceCoherentMemory = enabledDeviceExtensions.IsExtensionEnabled( DeviceExtensions::AMD_DEVICE_COHERENT_MEMORY) && reinterpret_cast(pHeader)->deviceCoherentMemory; @@ -626,7 +589,7 @@ VkResult Device::Create( if (reinterpret_cast(pHeader)->scalarBlockLayout) { - scalarBlockLayoutEnabled = true; + deviceFeatures.scalarBlockLayout = true; } break; @@ -638,7 +601,7 @@ VkResult Device::Create( if (reinterpret_cast( pHeader)->attachmentFragmentShadingRate) { - attachmentFragmentShadingRate = true; + deviceFeatures.attachmentFragmentShadingRate = true; } break; } @@ -648,17 +611,17 @@ VkResult Device::Create( if (reinterpret_cast(pHeader)->robustBufferAccess2) { - extendedRobustnessEnabled.robustBufferAccess = true; + deviceFeatures.robustBufferAccessExtended = true; } if (reinterpret_cast(pHeader)->robustImageAccess2) { - extendedRobustnessEnabled.robustImageAccess = true; + deviceFeatures.robustImageAccessExtended= true; } if (reinterpret_cast(pHeader)->nullDescriptor) { - extendedRobustnessEnabled.nullDescriptor = true; + deviceFeatures.nullDescriptorExtended = true; } break; @@ -666,8 +629,10 @@ VkResult Device::Create( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: { - privateDataEnabled = reinterpret_cast( - pHeader)->privateData; + if (reinterpret_cast(pHeader)->privateData) + { + privateDataEnabled = true; + } break; } @@ -676,7 +641,7 @@ VkResult Device::Create( { if (reinterpret_cast(pHeader)->robustImageAccess) { - extendedRobustnessEnabled.robustImageAccess = true; + deviceFeatures.robustImageAccessExtended= true; } break; @@ -690,6 +655,13 @@ VkResult Device::Create( pageableDeviceLocalMemory = true; } + if (enabledDeviceExtensions.IsExtensionEnabled(DeviceExtensions::EXT_MEMORY_PRIORITY) || + (enabledDeviceExtensions.IsExtensionEnabled(DeviceExtensions::EXT_PAGEABLE_DEVICE_LOCAL_MEMORY) && + pageableDeviceLocalMemory)) + { + deviceFeatures.appControlledMemPriority = true; + } + break; } @@ -704,12 +676,21 @@ VkResult Device::Create( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_EXT: + { + if (reinterpret_cast( + pHeader)->globalPriorityQuery) + { + globalPriorityQueryEnabled = true; + } + + break; + } + default: break; } - // TODO Remove below check after physical device properties for the following extensions are added. - // The loader device create info should be ignored by the driver. if ((static_cast(pHeader->sType) != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO) && (static_cast(pHeader->sType) != VK_STRUCTURE_TYPE_DEVICE_GROUP_DEVICE_CREATE_INFO) && (static_cast(pHeader->sType) != VK_STRUCTURE_TYPE_DEVICE_PRIVATE_DATA_CREATE_INFO_EXT) @@ -728,6 +709,48 @@ VkResult Device::Create( pCreateInfo->pEnabledFeatures, true); } + + if (pEnabledFeatures != nullptr) + { + if (pEnabledFeatures->robustBufferAccess) + { + deviceFeatures.robustBufferAccess = true; + } + + if (pEnabledFeatures->sparseBinding) + { + deviceFeatures.sparseBinding = true; + } + } + + if (pPhysicalDevice->GetRuntimeSettings().robustBufferAccess == FeatureForceEnable) + { + deviceFeatures.robustBufferAccess = true; + } + else if (pPhysicalDevice->GetRuntimeSettings().robustBufferAccess == FeatureForceDisable) + { + deviceFeatures.robustBufferAccess = false; + } + + if (pPhysicalDevice->GetRuntimeSettings().enableRelocatableShaders) + { + deviceFeatures.mustWriteImmutableSamplers = true; + } + else + { + deviceFeatures.mustWriteImmutableSamplers = false; + } + + if ((pPhysicalDevice->GetRuntimeSettings().strictImageSizeRequirements == StrictImageSizeOn) || + ((pPhysicalDevice->GetRuntimeSettings().strictImageSizeRequirements == StrictImageSizeAppControlled) && + maintenance4Enabled)) + { + deviceFeatures.strictImageSizeRequirements = true; + } + else + { + deviceFeatures.strictImageSizeRequirements = false; + } } if (palResult == Pal::Result::Success) @@ -787,6 +810,29 @@ VkResult Device::Create( { queuePriority[pQueueInfo->queueFamilyIndex][queueId] = pPriorityInfo->globalPriority; } + + uint32 queuePrioritySupportMask = 0; + for (uint32 engineNdx = 0u; + engineNdx < properties.engineProperties[pQueueInfo->queueFamilyIndex].engineCount; + ++engineNdx) + { + const auto& engineCapabilities = + properties.engineProperties[pQueueInfo->queueFamilyIndex].capabilities[engineNdx]; + + // Leave out High Priority for Universal Queue + if ((pQueueInfo->queueFamilyIndex != Pal::EngineTypeUniversal) || IsNormalQueue(engineCapabilities)) + { + queuePrioritySupportMask |= engineCapabilities.queuePrioritySupport; + } + } + + Pal::QueuePrioritySupport palQueuePriority = VkToPalGlobaPrioritySupport(pPriorityInfo->globalPriority); + if (((palQueuePriority & queuePrioritySupportMask) == false) && + globalPriorityQueryEnabled && + (pPhysicalDevice->GetRuntimeSettings().ignoreDeviceQueuePriorityFailures == false)) + { + vkResult = VK_ERROR_INITIALIZATION_FAILED; + } } break; default: @@ -938,7 +984,8 @@ VkResult Device::Create( pEnabledFeatures, useComputeAsTransferQueue, privateDataSlotRequestCount, - privateDataSize)); + privateDataSize, + deviceFeatures)); DispatchableDevice* pDispatchableDevice = static_cast(pMemory); DispatchableQueue* pDispatchableQueues[Queue::MaxQueueFamilies][Queue::MaxQueuesPerFamily] = {}; @@ -1129,13 +1176,8 @@ VkResult Device::Create( &pDispatchableQueues[0][0], enabledDeviceExtensions, overallocationBehavior, - deviceCoherentMemoryEnabled, - attachmentFragmentShadingRate, - scalarBlockLayoutEnabled, - extendedRobustnessEnabled, bufferDeviceAddressMultiDeviceEnabled, - pageableDeviceLocalMemory, - maintenance4Enabled); + pageableDeviceLocalMemory); // If we've failed to Initialize, make sure we destroy anything we might have allocated. if (vkResult != VK_SUCCESS) @@ -1165,13 +1207,8 @@ VkResult Device::Initialize( DispatchableQueue** pQueues, const DeviceExtensions::Enabled& enabled, const VkMemoryOverallocationBehaviorAMD overallocationBehavior, - const bool deviceCoherentMemoryEnabled, - const bool attachmentFragmentShadingRate, - bool scalarBlockLayoutEnabled, - const ExtendedRobustness& extendedRobustnessEnabled, bool bufferDeviceAddressMultiDeviceEnabled, - bool pageableDeviceLocalMemory, - bool maintenance4Enabled) + bool pageableDeviceLocalMemory) { // Initialize the internal memory manager VkResult result = m_internalMemMgr.Init(); @@ -1277,37 +1314,12 @@ VkResult Device::Initialize( deviceProps.engineProperties[Pal::EngineTypeDma].minTimestampAlignment : deviceProps.engineProperties[Pal::EngineTypeUniversal].minTimestampAlignment; - m_enabledFeatures.deviceCoherentMemory = deviceCoherentMemoryEnabled; - m_enabledFeatures.scalarBlockLayout = scalarBlockLayoutEnabled; - m_enabledFeatures.robustBufferAccessExtended = extendedRobustnessEnabled.robustBufferAccess; - m_enabledFeatures.robustImageAccessExtended = extendedRobustnessEnabled.robustImageAccess; - m_enabledFeatures.nullDescriptorExtended = extendedRobustnessEnabled.nullDescriptor; - - if (IsExtensionEnabled(DeviceExtensions::EXT_MEMORY_PRIORITY) || - (IsExtensionEnabled(DeviceExtensions::EXT_PAGEABLE_DEVICE_LOCAL_MEMORY) && pageableDeviceLocalMemory)) - { - m_enabledFeatures.appControlledMemPriority = true; - } - - if ((m_settings.strictImageSizeRequirements == StrictImageSizeOn) || - ((m_settings.strictImageSizeRequirements == StrictImageSizeAppControlled) && - maintenance4Enabled)) - { - m_enabledFeatures.strictImageSizeRequirements = true; - } - else - { - m_enabledFeatures.strictImageSizeRequirements = false; - } - // If VkPhysicalDeviceBufferDeviceAddressFeaturesEXT.bufferDeviceAddressMultiDevice is enabled // and if globalGpuVaSupport is supported and if multiple devices are used set the global GpuVa. m_useGlobalGpuVa = (bufferDeviceAddressMultiDeviceEnabled && deviceProps.gpuMemoryProperties.flags.globalGpuVaSupport && IsMultiGpu()); - m_enabledFeatures.attachmentFragmentShadingRate = attachmentFragmentShadingRate; - Pal::VrsShadingRate maxPalVrsShadingRate; bool vrsMaskIsValid = Util::BitMaskScanReverse(reinterpret_cast(&maxPalVrsShadingRate), deviceProps.gfxipProperties.supportedVrsRates); diff --git a/icd/api/vk_dispatch.cpp b/icd/api/vk_dispatch.cpp index 27440f3a..6b2c17e9 100644 --- a/icd/api/vk_dispatch.cpp +++ b/icd/api/vk_dispatch.cpp @@ -624,6 +624,7 @@ void DispatchTable::Init() INIT_DISPATCH_ENTRY(vkCmdSetDepthBoundsTestEnableEXT ); INIT_DISPATCH_ENTRY(vkCmdSetStencilTestEnableEXT ); INIT_DISPATCH_ENTRY(vkCmdSetStencilOpEXT ); + INIT_DISPATCH_ENTRY(vkCmdSetFragmentShadingRateKHR ); INIT_DISPATCH_ENTRY(vkGetPhysicalDeviceFragmentShadingRatesKHR ); @@ -646,9 +647,11 @@ void DispatchTable::Init() INIT_DISPATCH_ENTRY(vkCmdCopyImage2KHR ); INIT_DISPATCH_ENTRY(vkCmdCopyImageToBuffer2KHR ); INIT_DISPATCH_ENTRY(vkCmdResolveImage2KHR ); + INIT_DISPATCH_ENTRY(vkGetDeviceBufferMemoryRequirementsKHR ); INIT_DISPATCH_ENTRY(vkGetDeviceImageMemoryRequirementsKHR ); INIT_DISPATCH_ENTRY(vkGetDeviceImageSparseMemoryRequirementsKHR ); + } // ===================================================================================================================== diff --git a/icd/api/vk_fence.cpp b/icd/api/vk_fence.cpp index 84974759..b7c759f7 100644 --- a/icd/api/vk_fence.cpp +++ b/icd/api/vk_fence.cpp @@ -212,15 +212,27 @@ VkResult Fence::ImportFenceFd( } else { - const size_t palSize = pDevice->PalDevice(DefaultDeviceIndex)->GetFenceSize(nullptr); - VkAllocationCallbacks* pAllocator = pDevice->VkInstance()->GetAllocCallbacks(); + void* pMemory = nullptr; + + if (m_pPalTemporaryFences != nullptr) + { + m_pPalTemporaryFences->Destroy(); - // Allocate system memory - void* pMemory = pAllocator->pfnAllocation( - pAllocator->pUserData, - palSize, - VK_DEFAULT_MEM_ALIGN, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + // Reuse the existing memory + pMemory = m_pPalTemporaryFences; + } + else + { + const size_t palSize = pDevice->PalDevice(DefaultDeviceIndex)->GetFenceSize(nullptr); + VkAllocationCallbacks* pAllocator = pDevice->VkInstance()->GetAllocCallbacks(); + + // Allocate system memory + pMemory = pAllocator->pfnAllocation( + pAllocator->pUserData, + palSize, + VK_DEFAULT_MEM_ALIGN, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + } if (pMemory != nullptr) { diff --git a/icd/api/vk_image.cpp b/icd/api/vk_image.cpp index 33d3ddd8..5621068a 100644 --- a/icd/api/vk_image.cpp +++ b/icd/api/vk_image.cpp @@ -292,6 +292,13 @@ static void ConvertImageCreateInfo( pPalCreateInfo->tiling = VkToPalImageTiling(pCreateInfo->tiling); pPalCreateInfo->tilingOptMode = pDevice->GetTilingOptMode(); + if ((pPalCreateInfo->tilingOptMode == Pal::TilingOptMode::OptForSpace) && + Pal::Formats::IsBlockCompressed(pPalCreateInfo->swizzledFormat.format) && + (pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().gfxLevel > Pal::GfxIpLevel::GfxIp9)) + { + pPalCreateInfo->tilingOptMode = Pal::TilingOptMode::Balanced; + } + if ((pCreateInfo->imageType == VK_IMAGE_TYPE_3D) && (pCreateInfo->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT))) { @@ -1661,8 +1668,7 @@ void Image::SetMemoryRequirementsAtCreate( VK_ASSERT(m_memoryRequirements.memoryTypeBits != 0); } - - if (m_internalFlags.externallyShareable) + else if (m_internalFlags.externallyShareable) { m_memoryRequirements.memoryTypeBits &= pDevice->GetMemoryTypeMaskForExternalSharing(); } diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index 692580dc..ff0cfcc1 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -1178,6 +1178,11 @@ void PhysicalDevice::PopulateFormatProperties() optimalFlags &= AllImgFeatures; bufferFlags &= AllBufFeatures; + if (Formats::IsDepthStencilFormat(format)) + { + bufferFlags = 0; + } + if ((format == VK_FORMAT_R64_SINT) || (format == VK_FORMAT_R64_UINT)) { memset(&m_formatFeaturesTable[i], 0, sizeof(VkFormatProperties)); @@ -1326,6 +1331,43 @@ VkResult PhysicalDevice::GetQueueFamilyProperties( switch (static_cast(pHeader->sType)) { + case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_EXT: + { + auto* pProperties = static_cast(pNext); + pProperties->priorityCount = 0; + + uint32 queuePrioritySupportMask = 0; + for (uint32 engineNdx = 0u; engineNdx < m_properties.engineProperties[i].engineCount; ++engineNdx) + { + const auto& engineCapabilities = m_properties.engineProperties[i].capabilities[engineNdx]; + + // Leave out High Priority for Universal Queue + if ((i != Pal::EngineTypeUniversal) || IsNormalQueue(engineCapabilities)) + { + queuePrioritySupportMask |= engineCapabilities.queuePrioritySupport; + } + } + + if ((queuePrioritySupportMask & Pal::QueuePrioritySupport::SupportQueuePriorityIdle) != 0) + { + pProperties->priorities[pProperties->priorityCount++] = VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT; + } + + // Everything gets Normal + pProperties->priorities[pProperties->priorityCount++] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT; + + if ((queuePrioritySupportMask & Pal::QueuePrioritySupport::SupportQueuePriorityHigh) != 0) + { + pProperties->priorities[pProperties->priorityCount++] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT; + } + + if ((queuePrioritySupportMask & Pal::QueuePrioritySupport::SupportQueuePriorityRealtime) != 0) + { + pProperties->priorities[pProperties->priorityCount++] = VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT; + } + + break; + } default: // Skip any unknown extension structures break; @@ -3662,6 +3704,8 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( (pPhysicalDevice->PalProperties().osProperties.supportQueuePriority)) { availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_GLOBAL_PRIORITY)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_GLOBAL_PRIORITY_QUERY)); + } availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_EXTERNAL_FENCE)); @@ -3798,20 +3842,16 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_COLOR_WRITE_ENABLE)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_ZERO_INITIALIZE_WORKGROUP_MEMORY)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_LOAD_STORE_OP_NONE)); - -#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 52 availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_YCBCR_IMAGE_ARRAYS)); -#else -#endif -#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 1500 if ((pPhysicalDevice == nullptr) || ((pPhysicalDevice->PalProperties().gfxLevel != Pal::GfxIpLevel::GfxIp9) && (pPhysicalDevice->PalProperties().gfxipProperties.flags.supportBorderColorSwizzle))) { availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_BORDER_COLOR_SWIZZLE)); } -#endif + + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_MAINTENANCE4)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_INDEX_TYPE_UINT8)); @@ -3891,15 +3931,6 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( return availableExtensions; } -// ===================================================================================================================== -// Is the queue suitable for normal use (i.e. non-exclusive and no elevated priority). -template -static bool IsNormalQueue(const T& engineCapabilities) -{ - return ((engineCapabilities.flags.exclusive == 0) && - ((engineCapabilities.queuePrioritySupport & Pal::QueuePrioritySupport::SupportQueuePriorityNormal) != 0)); -} - // ===================================================================================================================== // Populates the device queue families. Note that there's not a one-to-one association between PAL queue types and // Vulkan queue families due to many reasons: @@ -4384,6 +4415,169 @@ void PhysicalDevice::GetPhysicalDeviceSubgroupProperties( *pQuadOperationsInAllStages = VK_TRUE; } +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceSubgroupSizeControlProperties( + uint32_t* pMinSubgroupSize, + uint32_t* pMaxSubgroupSize, + uint32_t* pMaxComputeWorkgroupSubgroups, + VkShaderStageFlags* pRequiredSubgroupSizeStages +) const +{ + *pMinSubgroupSize = m_properties.gfxipProperties.shaderCore.minWavefrontSize; + *pMaxSubgroupSize = m_properties.gfxipProperties.shaderCore.maxWavefrontSize; + + // No limits on the maximum number of subgroups allowed within a workgroup. + *pMaxComputeWorkgroupSubgroups = UINT32_MAX; + + // Do not support setting a required subgroup size in any stage. + *pRequiredSubgroupSizeStages = 0; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceUniformBlockProperties( + uint32_t* pMaxInlineUniformBlockSize, + uint32_t* pMaxPerStageDescriptorInlineUniformBlocks, + uint32_t* pMaxPerStageDescriptorUpdateAfterBindInlineUniformBlocks, + uint32_t* pMaxDescriptorSetInlineUniformBlocks, + uint32_t* pMaxDescriptorSetUpdateAfterBindInlineUniformBlocks +) const +{ + *pMaxInlineUniformBlockSize = 64 * 1024; + *pMaxPerStageDescriptorInlineUniformBlocks = 16; + *pMaxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 16; + *pMaxDescriptorSetInlineUniformBlocks = 16; + *pMaxDescriptorSetUpdateAfterBindInlineUniformBlocks = 16; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceDotProduct8Properties( + VkBool32* pIntegerDotProduct8BitUnsignedAccelerated, + VkBool32* pIntegerDotProduct8BitSignedAccelerated, + VkBool32* pIntegerDotProduct8BitMixedSignednessAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating8BitUnsignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating8BitSignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated +) const +{ + const VkBool32 int8DotSupport = PalProperties().gfxipProperties.flags.supportInt8Dot ? VK_TRUE : + VK_FALSE; + + *pIntegerDotProduct8BitUnsignedAccelerated = int8DotSupport; + *pIntegerDotProduct8BitSignedAccelerated = int8DotSupport; + *pIntegerDotProductAccumulatingSaturating8BitUnsignedAccelerated = VK_FALSE; + *pIntegerDotProductAccumulatingSaturating8BitSignedAccelerated = VK_FALSE; + *pIntegerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = VK_FALSE; + + { + *pIntegerDotProduct8BitMixedSignednessAccelerated = VK_FALSE; + } +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceDotProduct4x8Properties( + VkBool32* pIntegerDotProduct4x8BitPackedUnsignedAccelerated, + VkBool32* pIntegerDotProduct4x8BitPackedSignedAccelerated, + VkBool32* pIntegerDotProduct4x8BitPackedMixedSignednessAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated +) const +{ + const VkBool32 int8DotSupport = PalProperties().gfxipProperties.flags.supportInt8Dot ? VK_TRUE : + VK_FALSE; + + *pIntegerDotProduct4x8BitPackedUnsignedAccelerated = int8DotSupport; + *pIntegerDotProduct4x8BitPackedSignedAccelerated = int8DotSupport; + *pIntegerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = VK_FALSE; + *pIntegerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = VK_FALSE; + *pIntegerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = VK_FALSE; + + { + *pIntegerDotProduct4x8BitPackedMixedSignednessAccelerated = VK_FALSE; + } +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceDotProduct16Properties( + VkBool32* pIntegerDotProduct16BitUnsignedAccelerated, + VkBool32* pIntegerDotProduct16BitSignedAccelerated, + VkBool32* pIntegerDotProduct16BitMixedSignednessAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating16BitUnsignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating16BitSignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated +) const +{ + const VkBool32 int16DotSupport = ((PalProperties().gfxipProperties.flags.support16BitInstructions) && + ((GetRuntimeSettings().optOnlyEnableFP16ForGfx9Plus == false) || + (PalProperties().gfxLevel >= Pal::GfxIpLevel::GfxIp9)) + ) ? VK_TRUE : VK_FALSE; + + *pIntegerDotProduct16BitUnsignedAccelerated = int16DotSupport; + *pIntegerDotProduct16BitSignedAccelerated = int16DotSupport; + *pIntegerDotProductAccumulatingSaturating16BitUnsignedAccelerated = int16DotSupport; + *pIntegerDotProductAccumulatingSaturating16BitSignedAccelerated = int16DotSupport; + *pIntegerDotProduct16BitMixedSignednessAccelerated = VK_FALSE; + *pIntegerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = VK_FALSE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceDotProduct32Properties( + VkBool32* pIntegerDotProduct32BitUnsignedAccelerated, + VkBool32* pIntegerDotProduct32BitSignedAccelerated, + VkBool32* pIntegerDotProduct32BitMixedSignednessAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating32BitUnsignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating32BitSignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated +) const +{ + *pIntegerDotProduct32BitUnsignedAccelerated = VK_FALSE; + *pIntegerDotProduct32BitSignedAccelerated = VK_FALSE; + *pIntegerDotProduct32BitMixedSignednessAccelerated = VK_FALSE; + *pIntegerDotProductAccumulatingSaturating32BitUnsignedAccelerated = VK_FALSE; + *pIntegerDotProductAccumulatingSaturating32BitSignedAccelerated = VK_FALSE; + *pIntegerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = VK_FALSE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceDotProduct64Properties( + VkBool32* pIntegerDotProduct64BitUnsignedAccelerated, + VkBool32* pIntegerDotProduct64BitSignedAccelerated, + VkBool32* pIntegerDotProduct64BitMixedSignednessAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating64BitUnsignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating64BitSignedAccelerated, + VkBool32* pIntegerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated +) const +{ + *pIntegerDotProduct64BitUnsignedAccelerated = VK_FALSE; + *pIntegerDotProduct64BitSignedAccelerated = VK_FALSE; + *pIntegerDotProduct64BitMixedSignednessAccelerated = VK_FALSE; + *pIntegerDotProductAccumulatingSaturating64BitUnsignedAccelerated = VK_FALSE; + *pIntegerDotProductAccumulatingSaturating64BitSignedAccelerated = VK_FALSE; + *pIntegerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = VK_FALSE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetPhysicalDeviceTexelBufferAlignmentProperties( + VkDeviceSize* pStorageTexelBufferOffsetAlignmentBytes, + VkBool32* pStorageTexelBufferOffsetSingleTexelAlignment, + VkDeviceSize* pUniformTexelBufferOffsetAlignmentBytes, + VkBool32* pUniformTexelBufferOffsetSingleTexelAlignment +) const +{ + *pStorageTexelBufferOffsetAlignmentBytes = m_limits.minTexelBufferOffsetAlignment; + *pStorageTexelBufferOffsetSingleTexelAlignment = VK_TRUE; + *pUniformTexelBufferOffsetAlignmentBytes = m_limits.minTexelBufferOffsetAlignment; + *pUniformTexelBufferOffsetSingleTexelAlignment = VK_TRUE; +} + +// ===================================================================================================================== +void PhysicalDevice::GetDevicePropertiesMaxBufferSize( + VkDeviceSize* pMaxBufferSize +) const +{ + *pMaxBufferSize = 2u * 1024u * 1024u * 1024u; // TODO: replace with actual size +} + // ===================================================================================================================== void PhysicalDevice::GetPhysicalDeviceDriverProperties( VkDriverId* pDriverID, @@ -5129,6 +5323,19 @@ size_t PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: + { + auto* pExtInfo = reinterpret_cast(pHeader); + + if (updateFeatures) + { + pExtInfo->depthClipControl = VK_TRUE; + } + + structSize = sizeof(*pExtInfo); + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: { auto* pExtInfo = reinterpret_cast(pHeader); @@ -5831,6 +6038,20 @@ size_t PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: + { + auto* pExtInfo = reinterpret_cast(pHeader); + + if (updateFeatures) + { + pExtInfo->provokingVertexLast = VK_TRUE; + pExtInfo->transformFeedbackPreservesProvokingVertex = VK_FALSE; + } + + structSize = sizeof(*pExtInfo); + break; + } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: { auto* pExtInfo = reinterpret_cast(pHeader); @@ -5844,6 +6065,19 @@ size_t PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_EXT: + { + auto* pExtInfo = reinterpret_cast(pHeader); + + if (updateFeatures) + { + pExtInfo->globalPriorityQuery = PalProperties().osProperties.supportQueuePriority; + } + + structSize = sizeof(*pExtInfo); + break; + } + default: { // skip any unsupported extension structures @@ -6199,11 +6433,12 @@ void PhysicalDevice::GetDeviceProperties2( { auto* pProps = static_cast(pNext); - pProps->maxInlineUniformBlockSize = 64*1024; - pProps->maxPerStageDescriptorInlineUniformBlocks = 16; - pProps->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 16; - pProps->maxDescriptorSetInlineUniformBlocks = 16; - pProps->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 16; + GetPhysicalDeviceUniformBlockProperties( + &pProps->maxInlineUniformBlockSize, + &pProps->maxPerStageDescriptorInlineUniformBlocks, + &pProps->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks, + &pProps->maxDescriptorSetInlineUniformBlocks, + &pProps->maxDescriptorSetUpdateAfterBindInlineUniformBlocks); break; } @@ -6252,14 +6487,11 @@ void PhysicalDevice::GetDeviceProperties2( { auto* pProps = static_cast(pNext); - pProps->minSubgroupSize = m_properties.gfxipProperties.shaderCore.minWavefrontSize; - pProps->maxSubgroupSize = m_properties.gfxipProperties.shaderCore.maxWavefrontSize; - - // No limits on the maximum number of subgroups allowed within a workgroup. - pProps->maxComputeWorkgroupSubgroups = UINT32_MAX; - - // Do not support setting a required subgroup size in any stage. - pProps->requiredSubgroupSizeStages = 0; + GetPhysicalDeviceSubgroupSizeControlProperties( + &pProps->minSubgroupSize, + &pProps->maxSubgroupSize, + &pProps->maxComputeWorkgroupSubgroups, + &pProps->requiredSubgroupSizeStages); break; } @@ -6394,60 +6626,56 @@ void PhysicalDevice::GetDeviceProperties2( { auto* pProps = static_cast(pNext); - const VkBool32 int8DotSupport = PalProperties().gfxipProperties.flags.supportInt8Dot ? VK_TRUE : - VK_FALSE; - pProps->integerDotProduct8BitUnsignedAccelerated = int8DotSupport; - pProps->integerDotProduct8BitSignedAccelerated = int8DotSupport; - pProps->integerDotProduct4x8BitPackedUnsignedAccelerated = int8DotSupport; - pProps->integerDotProduct4x8BitPackedSignedAccelerated = int8DotSupport; - - { - pProps->integerDotProduct8BitMixedSignednessAccelerated = VK_FALSE; - pProps->integerDotProduct4x8BitPackedMixedSignednessAccelerated = VK_FALSE; - } - - const VkBool32 int16DotSupport = ((PalProperties().gfxipProperties.flags.support16BitInstructions) && - ((GetRuntimeSettings().optOnlyEnableFP16ForGfx9Plus == false) || - (PalProperties().gfxLevel >= Pal::GfxIpLevel::GfxIp9)) - ) ? VK_TRUE : VK_FALSE; - - pProps->integerDotProduct16BitUnsignedAccelerated = int16DotSupport; - pProps->integerDotProduct16BitSignedAccelerated = int16DotSupport; - pProps->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = int16DotSupport; - pProps->integerDotProductAccumulatingSaturating16BitSignedAccelerated = int16DotSupport; - - pProps->integerDotProduct16BitMixedSignednessAccelerated = VK_FALSE; - pProps->integerDotProduct32BitUnsignedAccelerated = VK_FALSE; - pProps->integerDotProduct32BitSignedAccelerated = VK_FALSE; - pProps->integerDotProduct32BitMixedSignednessAccelerated = VK_FALSE; - pProps->integerDotProduct64BitUnsignedAccelerated = VK_FALSE; - pProps->integerDotProduct64BitSignedAccelerated = VK_FALSE; - pProps->integerDotProduct64BitMixedSignednessAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating8BitSignedAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating32BitSignedAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating64BitSignedAccelerated = VK_FALSE; - pProps->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = VK_FALSE; + GetPhysicalDeviceDotProduct8Properties( + &pProps->integerDotProduct8BitUnsignedAccelerated, + &pProps->integerDotProduct8BitSignedAccelerated, + &pProps->integerDotProduct8BitMixedSignednessAccelerated, + &pProps->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated, + &pProps->integerDotProductAccumulatingSaturating8BitSignedAccelerated, + &pProps->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated); + + GetPhysicalDeviceDotProduct4x8Properties( + &pProps->integerDotProduct4x8BitPackedUnsignedAccelerated, + &pProps->integerDotProduct4x8BitPackedSignedAccelerated, + &pProps->integerDotProduct4x8BitPackedMixedSignednessAccelerated, + &pProps->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated, + &pProps->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated, + &pProps->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated); + + GetPhysicalDeviceDotProduct16Properties( + &pProps->integerDotProduct16BitUnsignedAccelerated, + &pProps->integerDotProduct16BitSignedAccelerated, + &pProps->integerDotProduct16BitMixedSignednessAccelerated, + &pProps->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated, + &pProps->integerDotProductAccumulatingSaturating16BitSignedAccelerated, + &pProps->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated); + + GetPhysicalDeviceDotProduct32Properties( + &pProps->integerDotProduct32BitUnsignedAccelerated, + &pProps->integerDotProduct32BitSignedAccelerated, + &pProps->integerDotProduct32BitMixedSignednessAccelerated, + &pProps->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated, + &pProps->integerDotProductAccumulatingSaturating32BitSignedAccelerated, + &pProps->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated); + + GetPhysicalDeviceDotProduct64Properties( + &pProps->integerDotProduct64BitUnsignedAccelerated, + &pProps->integerDotProduct64BitSignedAccelerated, + &pProps->integerDotProduct64BitMixedSignednessAccelerated, + &pProps->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated, + &pProps->integerDotProductAccumulatingSaturating64BitSignedAccelerated, + &pProps->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated); } break; case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: { auto* pProps = static_cast(pNext); - // Properties are guaranteed by the comment for PAL's definition of CreateTypedBufferViewSrds(). - pProps->storageTexelBufferOffsetAlignmentBytes = m_limits.minTexelBufferOffsetAlignment; - pProps->storageTexelBufferOffsetSingleTexelAlignment = VK_TRUE; - pProps->uniformTexelBufferOffsetAlignmentBytes = m_limits.minTexelBufferOffsetAlignment; - pProps->uniformTexelBufferOffsetSingleTexelAlignment = VK_TRUE; + GetPhysicalDeviceTexelBufferAlignmentProperties( + &pProps->storageTexelBufferOffsetAlignmentBytes, + &pProps->storageTexelBufferOffsetSingleTexelAlignment, + &pProps->uniformTexelBufferOffsetAlignmentBytes, + &pProps->uniformTexelBufferOffsetSingleTexelAlignment); break; } @@ -6470,7 +6698,16 @@ void PhysicalDevice::GetDeviceProperties2( case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES_KHR: { auto* pProps = static_cast(pNext); - pProps->maxBufferSize = 2u * 1024u * 1024u * 1024u; // TODO: replace with actual size + + GetDevicePropertiesMaxBufferSize(&pProps->maxBufferSize); + break; + } + + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: + { + auto* pProps = static_cast(pNext); + pProps->provokingVertexModePerPipeline = VK_TRUE; + pProps->transformFeedbackPreservesTriangleFanProvokingVertex = VK_FALSE; break; } @@ -7206,6 +7443,7 @@ static void VerifyExtensions( && dev.IsExtensionSupported(DeviceExtensions::KHR_VULKAN_MEMORY_MODEL) && dev.IsExtensionSupported(DeviceExtensions::KHR_BUFFER_DEVICE_ADDRESS)); } + } // ===================================================================================================================== diff --git a/icd/api/vk_pipeline_layout.cpp b/icd/api/vk_pipeline_layout.cpp index e8e7bb8e..fad27a5f 100644 --- a/icd/api/vk_pipeline_layout.cpp +++ b/icd/api/vk_pipeline_layout.cpp @@ -870,8 +870,7 @@ VkResult PipelineLayout::BuildLlpcSetMapping( pNode->offsetInDwords = binding.sta.dwOffset; pNode->sizeInDwords = binding.sta.dwSize; pNode->srdRange.binding = binding.info.binding; - pNode->srdRange.set = - setIndex; + pNode->srdRange.set = setIndex; (*pStaNodeCount)++; if (binding.imm.dwSize > 0) @@ -958,8 +957,7 @@ void PipelineLayout::BuildLlpcStaticSetMapping( pNode->offsetInDwords = binding.sta.dwOffset; pNode->sizeInDwords = binding.sta.dwSize; pNode->srdRange.binding = binding.info.binding; - pNode->srdRange.set = - setIndex; + pNode->srdRange.set = setIndex; (*pNodeCount)++; if (binding.imm.dwSize > 0) diff --git a/icd/api/vk_sampler.cpp b/icd/api/vk_sampler.cpp index 2ddc8952..9f7f898e 100644 --- a/icd/api/vk_sampler.cpp +++ b/icd/api/vk_sampler.cpp @@ -164,15 +164,19 @@ VkResult Sampler::Create( break; } - samplerInfo.flags.useAnisoThreshold = (settings.useAnisoThreshold == true) ? 1 : 0; - samplerInfo.anisoThreshold = settings.anisoThreshold; - samplerInfo.perfMip = settings.samplerPerfMip; - samplerInfo.flags.unnormalizedCoords = (pCreateInfo->unnormalizedCoordinates == VK_TRUE) ? 1 : 0; - samplerInfo.flags.prtBlendZeroMode = 0; - samplerInfo.flags.seamlessCubeMapFiltering = 1; - samplerInfo.flags.truncateCoords = ((pCreateInfo->magFilter == VK_FILTER_NEAREST) && - (pCreateInfo->minFilter == VK_FILTER_NEAREST)) - ? 1 : 0; + // disableSingleMipAnisoOverride=1 ensure properly sampling with single mipmap level and anisotropic filtering. + samplerInfo.flags.disableSingleMipAnisoOverride = 1; + + samplerInfo.flags.useAnisoThreshold = (settings.useAnisoThreshold == true) ? 1 : 0; + samplerInfo.anisoThreshold = settings.anisoThreshold; + samplerInfo.perfMip = settings.samplerPerfMip; + samplerInfo.flags.unnormalizedCoords = (pCreateInfo->unnormalizedCoordinates == VK_TRUE) ? 1 : 0; + samplerInfo.flags.prtBlendZeroMode = 0; + samplerInfo.flags.seamlessCubeMapFiltering = 1; + samplerInfo.flags.truncateCoords = ((pCreateInfo->magFilter == VK_FILTER_NEAREST) && + (pCreateInfo->minFilter == VK_FILTER_NEAREST) && + (samplerInfo.compareFunc == Pal::CompareFunc::Never)) + ? 1 : 0; // Parse the creation info. const void* pNext = pCreateInfo->pNext; diff --git a/icd/imported/gputexdecoder/gpuTexDecoder.cpp b/icd/imported/gputexdecoder/gpuTexDecoder.cpp index eb51b7f2..f8d1350b 100755 --- a/icd/imported/gputexdecoder/gpuTexDecoder.cpp +++ b/icd/imported/gputexdecoder/gpuTexDecoder.cpp @@ -611,86 +611,86 @@ Pal::IPipeline* Device::GetInternalPipeline( Pal::IPipeline* pPipeline = nullptr; void* pMemory = nullptr; PipelineBuildInfo buildInfo = {}; + GpuDecodeMappingNode resourceNodes[AstcInternalPipelineNodes]; if (type == InternalTexConvertCsType::ConvertASTCToRGBA8) { - GpuDecodeMappingNode astcResourceNodes[AstcInternalPipelineNodes]; uint32 offset = 0; buildInfo.nodeCount = 1; // 1.Color UnQuantization Buffer View - astcResourceNodes[0].nodeType = NodeType::Buffer; - astcResourceNodes[0].sizeInDwords = m_bufferViewSizeInDwords; - astcResourceNodes[0].offsetInDwords = 0; - astcResourceNodes[0].binding = 0; - astcResourceNodes[0].set = 0; + resourceNodes[0].nodeType = NodeType::Buffer; + resourceNodes[0].sizeInDwords = m_bufferViewSizeInDwords; + resourceNodes[0].offsetInDwords = 0; + resourceNodes[0].binding = 0; + resourceNodes[0].set = 0; // 2.Trits Quints Buffer View - astcResourceNodes[1].nodeType = NodeType::Buffer; - astcResourceNodes[1].sizeInDwords = m_bufferViewSizeInDwords; - astcResourceNodes[1].offsetInDwords = 1 * m_bufferViewSizeInDwords; - astcResourceNodes[1].binding = 1; - astcResourceNodes[1].set = 0; + resourceNodes[1].nodeType = NodeType::Buffer; + resourceNodes[1].sizeInDwords = m_bufferViewSizeInDwords; + resourceNodes[1].offsetInDwords = 1 * m_bufferViewSizeInDwords; + resourceNodes[1].binding = 1; + resourceNodes[1].set = 0; // 3.Quant and Transfer Buffer View - astcResourceNodes[2].nodeType = NodeType::Buffer; - astcResourceNodes[2].sizeInDwords = m_bufferViewSizeInDwords; - astcResourceNodes[2].offsetInDwords = 2 * m_bufferViewSizeInDwords; - astcResourceNodes[2].binding = 2; - astcResourceNodes[2].set = 0; + resourceNodes[2].nodeType = NodeType::Buffer; + resourceNodes[2].sizeInDwords = m_bufferViewSizeInDwords; + resourceNodes[2].offsetInDwords = 2 * m_bufferViewSizeInDwords; + resourceNodes[2].binding = 2; + resourceNodes[2].set = 0; // 4. TexBuffer View for Src Image Buffer - astcResourceNodes[3].nodeType = NodeType::TexBuffer; - astcResourceNodes[3].sizeInDwords = m_bufferViewSizeInDwords; - astcResourceNodes[3].offsetInDwords = 3 * m_bufferViewSizeInDwords; - astcResourceNodes[3].binding = 3; - astcResourceNodes[3].set = 0; + resourceNodes[3].nodeType = NodeType::TexBuffer; + resourceNodes[3].sizeInDwords = m_bufferViewSizeInDwords; + resourceNodes[3].offsetInDwords = 3 * m_bufferViewSizeInDwords; + resourceNodes[3].binding = 3; + resourceNodes[3].set = 0; // 5. Image View for Src Image - astcResourceNodes[4].nodeType = NodeType::Image; - astcResourceNodes[4].sizeInDwords = m_imageViewSizeInDwords; - astcResourceNodes[4].offsetInDwords = 4 * m_bufferViewSizeInDwords; - astcResourceNodes[4].binding = 4; - astcResourceNodes[4].set = 0; + resourceNodes[4].nodeType = NodeType::Image; + resourceNodes[4].sizeInDwords = m_imageViewSizeInDwords; + resourceNodes[4].offsetInDwords = 4 * m_bufferViewSizeInDwords; + resourceNodes[4].binding = 4; + resourceNodes[4].set = 0; // 6. Image View for Dst Image - astcResourceNodes[5].nodeType = NodeType::Image; - astcResourceNodes[5].sizeInDwords = m_imageViewSizeInDwords; - astcResourceNodes[5].offsetInDwords = 4 * m_bufferViewSizeInDwords + m_imageViewSizeInDwords; - astcResourceNodes[5].binding = 5; - astcResourceNodes[5].set = 0; + resourceNodes[5].nodeType = NodeType::Image; + resourceNodes[5].sizeInDwords = m_imageViewSizeInDwords; + resourceNodes[5].offsetInDwords = 4 * m_bufferViewSizeInDwords + m_imageViewSizeInDwords; + resourceNodes[5].binding = 5; + resourceNodes[5].set = 0; - buildInfo.pUserDataNodes = astcResourceNodes; + buildInfo.pUserDataNodes = resourceNodes; buildInfo.shaderType = InternalTexConvertCsType::ConvertASTCToRGBA8; GetSpvCode(buildInfo.shaderType, &(buildInfo.code.pSpvCode), &(buildInfo.code.spvSize)); } else { PAL_ASSERT(type == InternalTexConvertCsType::ConvertETC2ToRGBA8); - GpuDecodeMappingNode etc2ResourceNodes[Etc2InternalPipelineNodes]; uint32 offset = 0; buildInfo.nodeCount = 1; // 1. output - etc2ResourceNodes[0].nodeType = NodeType::Image; - etc2ResourceNodes[0].sizeInDwords = m_imageViewSizeInDwords; - etc2ResourceNodes[0].offsetInDwords = 0; - etc2ResourceNodes[0].binding = 0; - etc2ResourceNodes[0].set = 0; + resourceNodes[0].nodeType = NodeType::Image; + resourceNodes[0].sizeInDwords = m_imageViewSizeInDwords; + resourceNodes[0].offsetInDwords = 0; + resourceNodes[0].binding = 0; + resourceNodes[0].set = 0; //2. input - etc2ResourceNodes[1].nodeType = NodeType::Image; - etc2ResourceNodes[1].sizeInDwords = m_imageViewSizeInDwords; - etc2ResourceNodes[1].offsetInDwords = 1 * m_imageViewSizeInDwords; - etc2ResourceNodes[1].binding = 1; - etc2ResourceNodes[1].set = 0; + resourceNodes[1].nodeType = NodeType::Image; + resourceNodes[1].sizeInDwords = m_imageViewSizeInDwords; + resourceNodes[1].offsetInDwords = 1 * m_imageViewSizeInDwords; + resourceNodes[1].binding = 1; + resourceNodes[1].set = 0; - buildInfo.pUserDataNodes = etc2ResourceNodes; + buildInfo.pUserDataNodes = resourceNodes; buildInfo.shaderType = InternalTexConvertCsType::ConvertETC2ToRGBA8; GetSpvCode(buildInfo.shaderType, &(buildInfo.code.pSpvCode), &(buildInfo.code.spvSize)); } ClientCreateInternalComputePipeline(m_info, constInfo, buildInfo, &pPipeline, &pMemory); + return pPipeline; } diff --git a/icd/make/importdefs b/icd/make/importdefs index d175fdf4..3a255d57 100644 --- a/icd/make/importdefs +++ b/icd/make/importdefs @@ -26,7 +26,7 @@ # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. It must # be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -ICD_PAL_CLIENT_MAJOR_VERSION = 687 +ICD_PAL_CLIENT_MAJOR_VERSION = 692 ICD_PAL_CLIENT_MINOR_VERSION = 0 # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. It describes diff --git a/icd/res/ver.h b/icd/res/ver.h index c3b154c1..071b1ac8 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 207 +#define VULKAN_ICD_BUILD_VERSION 210 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION @@ -45,7 +45,7 @@ // These values specify the driver ID and driver info string #define VULKAN_DRIVER_ID VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR // "AMDOPEN" #define VULKAN_DRIVER_NAME_STR "AMD open-source driver" -#define VULKAN_DRIVER_INFO_STR "" +#define VULKAN_DRIVER_INFO_STR "2021.Q4.3" // These values tell which version of the conformance test the driver is compliant against #define CTS_VERSION_MAJOR 1 diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index ecb19c8f..833b0445 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -830,6 +830,12 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.enableDumbTransitionSync = false; } + if (appProfile == AppProfile::MetroExodus) + { + // A larger minImageCount can get a performance gain for game Metro Exodus. + m_settings.forceMinImageCount = 3; + } + if (appProfile == AppProfile::Valheim) { if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json index 761ce0ea..df071b23 100644 --- a/icd/settings/settings_xgl.json +++ b/icd/settings/settings_xgl.json @@ -221,6 +221,10 @@ } ] }, + { + "Name": "MinimizeVgprUsageStrategy", + "Values": [] + }, { "Name": "ThreadGroupSwizzleMode", "IsEnum": true, @@ -2186,6 +2190,82 @@ "Type": "bool", "Name": "DumpDuplicatePipelines" }, + { + "Description": "BVH Builder NodeSort Type", + "Tags": [], + "Defaults": { + "Default": "BvhBuilderNodeSortOff" + }, + "ValidValues": { + "IsEnum": true, + "Name": "BvhBuilderNodeSortType", + "Values": [ + { + "Name": "BvhBuilderNodeSortOff", + "Value": 0 + }, + { + "Name": "BvhBuilderNodeSortFourWaySortOnTLAS", + "Value": 1 + }, + { + "Name": "BvhBuilderNodeSortFourWaySortOnBLAS", + "Value": 2 + }, + { + "Name": "BvhBuilderNodeSortFourWaySortOnBoth", + "Value": 3 + }, + { + "Name": "BvhBuilderNodeSortTwoLevelSortOnTLAS", + "Value": 4 + }, + { + "Name": "BvhBuilderNodeSortTwoLevelSortOnBLAS", + "Value": 5 + }, + { + "Name": "BvhBuilderNodeSortTwoLevelSortOnBoth", + "Value": 6 + } + ] + }, + "Type": "enum", + "Name": "BvhBuilderNodeSortType", + "Scope": "Driver" + }, + { + "Description": "BVH Builder NodeSort Heurstic. Currently only work when NodeSortType is set to be NodeSort4WaySort.", + "Tags": [], + "Defaults": { + "Default": "BvhBuilderNodeSortHeuristicSurfaceAreaLargestFirst" + }, + "ValidValues": { + "IsEnum": true, + "Name": "BvhBuilderNodeSortHeuristic", + "Values": [ + { + "Name": "BvhBuilderNodeSortHeuristicSurfaceAreaLargestFirst", + "Value": 0 + }, + { + "Name": "BvhBuilderNodeSortHeuristicSurfaceAreaSmallestFirst", + "Value": 1 + }, + { + "Name": "BvhBuilderNodeSortHeuristicDensityLargestFirst", + "Value": 2 + }, + { + "Name": "BvhBuilderNodeSortHeuristicDensitySmallestFirst", + "Value": 3 + } + ] + }, + "Type": "enum", + "Name": "BvhBuilderNodeSortHeuristic", + "Scope": "Driver" + }, { "Description": "Re-routes all compute work to a universal queue internally.", "Tags": [ @@ -2198,6 +2278,46 @@ "Name": "UseUniversalAsComputeQueue", "Scope": "Driver" }, + { + "Name": "DispatchRaysThreadGroupSize", + "Type": "uint32", + "Description": "Thread Group size to use when calling DispatchRays", + "Scope": "Driver", + "Tags": [], + "Defaults": { + "Default": 32 + } + }, + { + "Description": "Override the thread group size in x dimension for compute shaders using ray query.", + "Tags": [], + "Defaults": { + "Default": 0 + }, + "Type": "uint32", + "Name": "RayQueryCsThreadGroupSizeX", + "Scope": "Driver" + }, + { + "Description": "Override the thread group size in y dimension for compute shaders using ray query.", + "Tags": [], + "Defaults": { + "Default": 0 + }, + "Type": "uint32", + "Name": "RayQueryCsThreadGroupSizeY", + "Scope": "Driver" + }, + { + "Description": "Override the thread group size in z dimension for compute shaders using ray query.", + "Tags": [], + "Defaults": { + "Default": 0 + }, + "Type": "uint32", + "Name": "RayQueryCsThreadGroupSizeZ", + "Scope": "Driver" + }, { "Name": "DbgBarrierPostCmdEnable", "Description": "Triggers a CmdBarrier call after any command in the given mask. The barrier behavior is controlled by the other DbgBarrierPost* settings in this category. Requires VK_ENABLE_DEBUG_BARRIERS=1 to take effect. 0x8FFFFFFF: All commands (heavyweight option)", @@ -2351,19 +2471,19 @@ }, { "Name": "DbgBarrierBeginRendering", - "Value": 1073741824, + "Value": 4294967296, "Description": "Begin rendering" }, { "Name": "DbgBarrierOther", - "Value": 2147483648, + "Value": 8589934592, "Description": "Any other command not listed above" } ], "Name": "DbgBarrierCmd" }, "Scope": "Driver", - "Type": "uint32" + "Type": "uint64" }, { "Description": "For post-cmd barriers, this flag describes the PAL HwPipePoint pipeline stage where the barrier will wait.", @@ -2640,7 +2760,7 @@ "Default": 0 }, "Scope": "Driver", - "Type": "uint32", + "Type": "uint64", "Name": "DbgBarrierPreCmdEnable" }, { @@ -3012,6 +3132,16 @@ "Name": "RobustBufferAccess", "Scope": "Driver" }, + { + "Description": "Max number of VGPRs for indirect shaders (0xFFFFFFFF = uses indirectCallTargetOccupancyPerSimd)", + "Tags": [], + "Defaults": { + "Default": "0xFFFFFFFF" + }, + "Type": "uint32", + "Name": "RtIndirectVgprLimit", + "Scope": "Driver" + }, { "Description": "Allows instance-level functions to be queried using vkGetDeviceProcAddr.", "Tags": [ @@ -3154,6 +3284,18 @@ "Scope": "Driver", "Type": "bool" }, + { + "Name": "IgnoreDeviceQueuePriorityFailures", + "Description": "Specify whether to skip queue creation priority failures", + "Tags": [ + "General" + ], + "Defaults": { + "Default": false + }, + "Scope": "Driver", + "Type": "bool" + }, { "Description": "Custom device allocation count limitation, when larger than 0.", "Tags": [ @@ -3337,18 +3479,6 @@ "Type": "uint32", "Name": "HeapBudgetRatioOfHeapSizeNonlocal" }, - { - "Description": "Whether driver should perform sparse resource unmap at resource destroy time under wddm1.", - "Tags": [ - "Memory" - ], - "Defaults": { - "Default": true - }, - "Scope": "Driver", - "Type": "bool", - "Name": "IsSparseUnmapBeforeDestroyEnabled" - }, { "Description": "Override reported minImageTransferGranularity field for graphics queue families. This is encoded as a hex string of the form 0xb000zzyyxx, where 'xx', 'yy', and 'zz' are the reported transfer granularities in the X, Y and Z extents respectively, and 'b' is a control flag: if 'b' is non-zero, this override is applies; otherwise the standard transfer granularity is used. ", "Tags": [