diff --git a/LICENSE.txt b/LICENSE.txt index 23d4ca1f..2981cbd5 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2017 Advanced Micro Devices, Inc. +Copyright (c) 2018 Advanced Micro Devices, Inc. All Rights Reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/icd/api/app_profile.cpp b/icd/api/app_profile.cpp index 15979c90..436c032a 100644 --- a/icd/api/app_profile.cpp +++ b/icd/api/app_profile.cpp @@ -818,9 +818,8 @@ void ProcessProfileEntry( const char* entryName, uint32_t dataSize, const void* data, - RuntimeSettings* pRuntimeSettings, - ChillSettings* pChillSettings, - TurboSyncSettings* pTurboSyncSettings, + ProfileSettings* pProfileSettings, + uint32_t appGpuID, bool isUser3DAreaFormat) { // Skip if the data is empty @@ -829,18 +828,13 @@ void ProcessProfileEntry( const wchar_t* wcharData = reinterpret_cast(data); bool* pBoolSetting = nullptr; uint32_t* pUint32Setting = nullptr; + float* pFloatSetting = nullptr; bool assertOnZero = false; bool doNotSetOnZero = false; - uint32_t appGpuID = 0u; - if (pRuntimeSettings != nullptr) + if (strcmp(entryName, "TFQ") == 0) { - appGpuID = pRuntimeSettings->appGpuID; - - if (strcmp(entryName, "TFQ") == 0 && (pRuntimeSettings != nullptr)) - { - pUint32Setting = reinterpret_cast(&(pRuntimeSettings->vulkanTexFilterQuality)); - } + pUint32Setting = &(pProfileSettings->texFilterQuality); } if (pBoolSetting != nullptr) @@ -848,6 +842,11 @@ void ProcessProfileEntry( uint32_t dataValue = ParseProfileDataToUint32(wcharData, isUser3DAreaFormat, appGpuID); *pBoolSetting = dataValue ? true : false; } + else if (pFloatSetting != nullptr) + { + uint32_t dataValue = ParseProfileDataToUint32(wcharData, isUser3DAreaFormat, appGpuID); + *pFloatSetting = static_cast(dataValue); + } else if (pUint32Setting != nullptr) { uint32_t dataValue = ParseProfileDataToUint32(wcharData, isUser3DAreaFormat, appGpuID); @@ -863,7 +862,6 @@ void ProcessProfileEntry( *pUint32Setting = dataValue; } } - } } @@ -873,9 +871,8 @@ void ProcessProfileEntry( // Return true if a profile is present. static bool QueryPalProfile( Instance* pInstance, - RuntimeSettings* pRuntimeSettings, - ChillSettings* pChillSettings, - TurboSyncSettings* pTurboSyncSettings, + ProfileSettings* pProfileSettings, + uint32_t appGpuID, Pal::ApplicationProfileClient client, char* exeOrCdnName) // This is the game EXE name or Content Distribution Network name. { @@ -895,9 +892,8 @@ static bool QueryPalProfile( ProcessProfileEntry(iterator.GetName(), iterator.GetDataSize(), iterator.GetData(), - pRuntimeSettings, - pChillSettings, - pTurboSyncSettings, + pProfileSettings, + appGpuID, isUser3DAreaFormat); iterator.Next(); } @@ -909,15 +905,13 @@ static bool QueryPalProfile( // ===================================================================================================================== // Queries PAL for app profile settings void ReloadAppProfileSettings( - Instance* pInstance, - VulkanSettingsLoader* pSettingsLoader, - ChillSettings* pChillSettings, - TurboSyncSettings* pTurboSyncSettings) + Instance* pInstance, + ProfileSettings* pProfileSettings, + uint32_t appGpuID) { size_t exeNameLength = 0; char* pExeName = GetExecutableName(&exeNameLength, true); char* pExeNameLower = nullptr; - RuntimeSettings* pRuntimeSettings = nullptr; if (pExeName != nullptr) { @@ -927,19 +921,13 @@ void ReloadAppProfileSettings( free(pExeName); } - if (pSettingsLoader != nullptr) - { - pRuntimeSettings = pSettingsLoader->GetSettingsPtr(); - } - if (pExeNameLower != nullptr) { bool foundProfile = false; // User 3D has highest priority, so query it first foundProfile = QueryPalProfile(pInstance, - pRuntimeSettings, - pChillSettings, - pTurboSyncSettings, + pProfileSettings, + appGpuID, Pal::ApplicationProfileClient::User3D, pExeNameLower); @@ -955,24 +943,13 @@ void ReloadAppProfileSettings( if (hasValidCdnName == true) { foundProfile = QueryPalProfile(pInstance, - pRuntimeSettings, - pChillSettings, - pTurboSyncSettings, + pProfileSettings, + appGpuID, Pal::ApplicationProfileClient::User3D, cdnApplicationId); } } - if (foundProfile == false) - { - QueryPalProfile(pInstance, - pRuntimeSettings, - pChillSettings, - pTurboSyncSettings, - Pal::ApplicationProfileClient::Chill, //CHILL area - pExeNameLower); - } - free(pExeNameLower); } } diff --git a/icd/api/devmode/devmode_mgr.cpp b/icd/api/devmode/devmode_mgr.cpp index 98ece72c..71627d11 100644 --- a/icd/api/devmode/devmode_mgr.cpp +++ b/icd/api/devmode/devmode_mgr.cpp @@ -320,8 +320,6 @@ DevModeMgr::DevModeMgr(Instance* pInstance) #if VKI_GPUOPEN_PROTOCOL_ETW_CLIENT m_pEtwClient(nullptr), #endif - m_hardwareSupportsTracing(false), - m_rgpServerSupportsTracing(false), m_finalized(false), m_numPrepFrames(0), m_traceGpuMemLimit(0), @@ -388,15 +386,6 @@ Pal::Result DevModeMgr::Init() m_pRGPServer = m_pDevDriverServer->GetRGPServer(); } - // Tell RGP that the server (i.e. the driver) supports tracing if requested. - if (result == Pal::Result::Success) - { - if (m_pRGPServer != nullptr) - { - m_rgpServerSupportsTracing = (m_pRGPServer->EnableTraces() == DevDriver::Result::Success); - } - } - if (result == Pal::Result::Success) { m_pipelineReinjectionLock.Init(); @@ -411,41 +400,27 @@ Pal::Result DevModeMgr::Init() // This finalizes the developer driver manager. void DevModeMgr::Finalize( uint32_t deviceCount, - Pal::IDevice** ppDevices, VulkanSettingsLoader* settingsLoaders[]) { - // Figure out if the gfxip supports tracing. We decide tracing if there is at least one enumerated GPU - // that can support tracing. Since we don't yet know if that GPU will be picked as the target of an eventual - // VkDevice, this check is imperfect. In mixed-GPU situations where an unsupported GPU is picked for tracing, - // trace capture will fail with an error. - m_hardwareSupportsTracing = false; - - if (m_rgpServerSupportsTracing) + if (m_pRGPServer != nullptr) { + bool tracingForceDisabledForAllGpus = true; + for (uint32_t gpu = 0; gpu < deviceCount; ++gpu) { - // This is technically a violation of the PAL interface: we are not allowed to query PAL device properties - // prior to calling CommitSettingsAndInit(). However, doing so is (a) safe for some properties and (b) - // the only way to do this currently as we need to know this information prior to calling Finalize() on - // the device and devdriver manager and (c) it also matches DXCP behavior for the same reasons. - Pal::DeviceProperties props = {}; - - if (ppDevices[gpu]->GetProperties(&props) == Pal::Result::Success) + if (settingsLoaders[gpu]->GetSettings().devModeSqttForceDisable == false) { - if (GpuSupportsTracing(props, settingsLoaders[gpu]->GetSettings())) - { - m_hardwareSupportsTracing = true; + tracingForceDisabledForAllGpus = false; - break; - } + break; } } - } - // If no GPU supports tracing, inform the RGP server to disable tracing - if ((m_pRGPServer != nullptr) && (m_hardwareSupportsTracing == false)) - { - m_pRGPServer->DisableTraces(); + // If tracing is force disabled for all GPUs, inform the RGP server to disable tracing + if (tracingForceDisabledForAllGpus) + { + m_pRGPServer->DisableTraces(); + } } // Finalize the devmode manager @@ -470,8 +445,7 @@ void DevModeMgr::WaitForDriverResume() auto* pDriverControlServer = m_pDevDriverServer->GetDriverControlServer(); VK_ASSERT(pDriverControlServer != nullptr); - - pDriverControlServer->WaitForDriverResume(); + pDriverControlServer->DriverTick(); } // ===================================================================================================================== @@ -1970,16 +1944,6 @@ Pal::Result DevModeMgr::InitTraceQueueFamilyResources( return result; } -// ===================================================================================================================== -// Returns true if the given device properties/settings support tracing. -bool DevModeMgr::GpuSupportsTracing( - const Pal::DeviceProperties& props, - const RuntimeSettings& settings) -{ - return props.gfxipProperties.flags.supportRgpTraces && - (settings.devModeSqttForceDisable == false); -} - // ===================================================================================================================== // Initializes device-persistent RGP resources Pal::Result DevModeMgr::InitRGPTracing( @@ -2002,7 +1966,7 @@ Pal::Result DevModeMgr::InitRGPTracing( // // It's necessary to check this during RGP tracing init in addition to devmode init because during the earlier // devmode init we may be in a situation where some enumerated physical devices support tracing and others do not. - if (GpuSupportsTracing(pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties(), pDevice->GetRuntimeSettings()) == false) + if (pDevice->GetRuntimeSettings().devModeSqttForceDisable) { result = Pal::Result::ErrorInitializationFailed; } @@ -2212,11 +2176,7 @@ void DevModeMgr::PostDeviceCreate(Device* pDevice) // information to decide when it's reasonable to make certain requests of the driver through protocol functions. if (pDriverControlServer->IsDriverInitialized() == false) { -#if GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION < GPUOPEN_DRIVER_CONTROL_CLEANUP_VERSION - pDriverControlServer->FinishDriverInitialization(); -#else pDriverControlServer->FinishDeviceInit(); -#endif } } diff --git a/icd/api/devmode/devmode_mgr.h b/icd/api/devmode/devmode_mgr.h index e137146d..2a8273ac 100644 --- a/icd/api/devmode/devmode_mgr.h +++ b/icd/api/devmode/devmode_mgr.h @@ -130,7 +130,6 @@ class DevModeMgr void Finalize( uint32_t deviceCount, - Pal::IDevice** ppDevices, VulkanSettingsLoader* settingsLoaders[]); void Destroy(); @@ -301,7 +300,6 @@ class DevModeMgr void DestroyTraceQueueFamilyResources(TraceQueueFamilyState* pState); TraceQueueState* FindTraceQueueState(TraceState* pState, const Queue* pQueue); bool QueueSupportsTiming(uint32_t deviceIdx, const Queue* pQueue); - static bool GpuSupportsTracing(const Pal::DeviceProperties& props, const RuntimeSettings& settings); #if VKI_GPUOPEN_PROTOCOL_ETW_CLIENT Pal::Result InitEtwClient(); @@ -318,9 +316,6 @@ class DevModeMgr #endif Util::Mutex m_traceMutex; TraceState m_trace; - bool m_hardwareSupportsTracing; // True if gfxip supports tracing - bool m_rgpServerSupportsTracing; // True if gpuopen protocol successfully enabled - // tracing bool m_finalized; uint32_t m_numPrepFrames; uint32_t m_traceGpuMemLimit; diff --git a/icd/api/include/app_profile.h b/icd/api/include/app_profile.h index bf814c17..fbff011b 100644 --- a/icd/api/include/app_profile.h +++ b/icd/api/include/app_profile.h @@ -45,8 +45,6 @@ namespace Pal namespace vk { class Instance; -class VulkanSettingsLoader; -struct RuntimeSettings; }; namespace vk @@ -84,31 +82,18 @@ enum class AppProfile : uint32_t NitrousEngine, // Nitrous Engine by Oxide (Default) }; -// Struct describing dynamic CHILL settings -struct ChillSettings +struct ProfileSettings { - bool chillProfileEnable; // If per-app chill profile settings is enabled - uint32_t chillLevel; // Chill level and flags - uint32_t chillMinFrameRate; // Min chill frame rate; valid range is 30-300fps. - uint32_t chillMaxFrameRate; // Max chill frame rate; valid range is 30-300fps. - uint32_t chillLoadingScreenDrawsThresh; // The threshold number of draw calls per frame used to distinguish - // between loading screens and gameplay. -}; + uint32_t texFilterQuality; // TextureFilterOptimizationSettings -// Struct describing dynamic TurboSync settings -struct TurboSyncSettings -{ - bool turboSyncEnable; // If per-app TurboSync profile settings is enabled }; extern AppProfile ScanApplicationProfile(const VkInstanceCreateInfo& instanceInfo); -void ReloadAppProfileSettings( - Instance* pInstance, - VulkanSettingsLoader* pSettingsLoader, - ChillSettings* pChillSettings, - TurboSyncSettings* pTurboSyncSettings); +void ReloadAppProfileSettings(Instance* pInstance, + ProfileSettings* pProfileSettings, + uint32_t appGpuID = 0u); }; -#endif /* __GPU_EVENT_MGR_H__ */ +#endif /* __APP_PROFILE_H__ */ diff --git a/icd/api/include/khronos/sdk-1.1/spirv.hpp b/icd/api/include/khronos/sdk-1.1/spirv.hpp index b231c475..f5cbda1b 100644 --- a/icd/api/include/khronos/sdk-1.1/spirv.hpp +++ b/icd/api/include/khronos/sdk-1.1/spirv.hpp @@ -49,7 +49,7 @@ namespace spv { typedef unsigned int Id; -#define SPV_VERSION 0x10400 +#define SPV_VERSION 0x10500 #define SPV_REVISION 1 static const unsigned int MagicNumber = 0x07230203; @@ -91,6 +91,7 @@ enum AddressingModel { AddressingModelLogical = 0, AddressingModelPhysical32 = 1, AddressingModelPhysical64 = 2, + AddressingModelPhysicalStorageBuffer64 = 5348, AddressingModelPhysicalStorageBuffer64EXT = 5348, AddressingModelMax = 0x7fffffff, }; @@ -99,6 +100,7 @@ enum MemoryModel { MemoryModelSimple = 0, MemoryModelGLSL450 = 1, MemoryModelOpenCL = 2, + MemoryModelVulkan = 3, MemoryModelVulkanKHR = 3, MemoryModelMax = 0x7fffffff, }; @@ -183,6 +185,7 @@ enum StorageClass { StorageClassHitAttributeNV = 5339, StorageClassIncomingRayPayloadNV = 5342, StorageClassShaderRecordBufferNV = 5343, + StorageClassPhysicalStorageBuffer = 5349, StorageClassPhysicalStorageBufferEXT = 5349, StorageClassMax = 0x7fffffff, }; @@ -311,9 +314,13 @@ enum ImageOperandsShift { ImageOperandsConstOffsetsShift = 5, ImageOperandsSampleShift = 6, ImageOperandsMinLodShift = 7, + ImageOperandsMakeTexelAvailableShift = 8, ImageOperandsMakeTexelAvailableKHRShift = 8, + ImageOperandsMakeTexelVisibleShift = 9, ImageOperandsMakeTexelVisibleKHRShift = 9, + ImageOperandsNonPrivateTexelShift = 10, ImageOperandsNonPrivateTexelKHRShift = 10, + ImageOperandsVolatileTexelShift = 11, ImageOperandsVolatileTexelKHRShift = 11, ImageOperandsSignExtendShift = 12, ImageOperandsZeroExtendShift = 13, @@ -330,9 +337,13 @@ enum ImageOperandsMask { ImageOperandsConstOffsetsMask = 0x00000020, ImageOperandsSampleMask = 0x00000040, ImageOperandsMinLodMask = 0x00000080, + ImageOperandsMakeTexelAvailableMask = 0x00000100, ImageOperandsMakeTexelAvailableKHRMask = 0x00000100, + ImageOperandsMakeTexelVisibleMask = 0x00000200, ImageOperandsMakeTexelVisibleKHRMask = 0x00000200, + ImageOperandsNonPrivateTexelMask = 0x00000400, ImageOperandsNonPrivateTexelKHRMask = 0x00000400, + ImageOperandsVolatileTexelMask = 0x00000800, ImageOperandsVolatileTexelKHRMask = 0x00000800, ImageOperandsSignExtendMask = 0x00001000, ImageOperandsZeroExtendMask = 0x00002000, @@ -448,8 +459,11 @@ enum Decoration { DecorationPerViewNV = 5272, DecorationPerTaskNV = 5273, DecorationPerVertexNV = 5285, + DecorationNonUniform = 5300, DecorationNonUniformEXT = 5300, + DecorationRestrictPointer = 5355, DecorationRestrictPointerEXT = 5355, + DecorationAliasedPointer = 5356, DecorationAliasedPointerEXT = 5356, DecorationCounterBuffer = 5634, DecorationHlslCounterBufferGOOGLE = 5634, @@ -630,8 +644,11 @@ enum MemorySemanticsShift { MemorySemanticsCrossWorkgroupMemoryShift = 9, MemorySemanticsAtomicCounterMemoryShift = 10, MemorySemanticsImageMemoryShift = 11, + MemorySemanticsOutputMemoryShift = 12, MemorySemanticsOutputMemoryKHRShift = 12, + MemorySemanticsMakeAvailableShift = 13, MemorySemanticsMakeAvailableKHRShift = 13, + MemorySemanticsMakeVisibleShift = 14, MemorySemanticsMakeVisibleKHRShift = 14, MemorySemanticsVolatileShift = 15, MemorySemanticsMax = 0x7fffffff, @@ -649,8 +666,11 @@ enum MemorySemanticsMask { MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200, MemorySemanticsAtomicCounterMemoryMask = 0x00000400, MemorySemanticsImageMemoryMask = 0x00000800, + MemorySemanticsOutputMemoryMask = 0x00001000, MemorySemanticsOutputMemoryKHRMask = 0x00001000, + MemorySemanticsMakeAvailableMask = 0x00002000, MemorySemanticsMakeAvailableKHRMask = 0x00002000, + MemorySemanticsMakeVisibleMask = 0x00004000, MemorySemanticsMakeVisibleKHRMask = 0x00004000, MemorySemanticsVolatileMask = 0x00008000, }; @@ -659,8 +679,11 @@ enum MemoryAccessShift { MemoryAccessVolatileShift = 0, MemoryAccessAlignedShift = 1, MemoryAccessNontemporalShift = 2, + MemoryAccessMakePointerAvailableShift = 3, MemoryAccessMakePointerAvailableKHRShift = 3, + MemoryAccessMakePointerVisibleShift = 4, MemoryAccessMakePointerVisibleKHRShift = 4, + MemoryAccessNonPrivatePointerShift = 5, MemoryAccessNonPrivatePointerKHRShift = 5, MemoryAccessMax = 0x7fffffff, }; @@ -670,8 +693,11 @@ enum MemoryAccessMask { MemoryAccessVolatileMask = 0x00000001, MemoryAccessAlignedMask = 0x00000002, MemoryAccessNontemporalMask = 0x00000004, + MemoryAccessMakePointerAvailableMask = 0x00000008, MemoryAccessMakePointerAvailableKHRMask = 0x00000008, + MemoryAccessMakePointerVisibleMask = 0x00000010, MemoryAccessMakePointerVisibleKHRMask = 0x00000010, + MemoryAccessNonPrivatePointerMask = 0x00000020, MemoryAccessNonPrivatePointerKHRMask = 0x00000020, }; @@ -681,6 +707,7 @@ enum Scope { ScopeWorkgroup = 2, ScopeSubgroup = 3, ScopeInvocation = 4, + ScopeQueueFamily = 5, ScopeQueueFamilyKHR = 5, ScopeMax = 0x7fffffff, }; @@ -781,6 +808,8 @@ enum Capability { CapabilityGroupNonUniformShuffleRelative = 66, CapabilityGroupNonUniformClustered = 67, CapabilityGroupNonUniformQuad = 68, + CapabilityShaderLayer = 69, + CapabilityShaderViewportIndex = 70, CapabilitySubgroupBallotKHR = 4423, CapabilityDrawParameters = 4427, CapabilitySubgroupVoteKHR = 4431, @@ -825,21 +854,36 @@ enum Capability { CapabilityFragmentDensityEXT = 5291, CapabilityShadingRateNV = 5291, CapabilityGroupNonUniformPartitionedNV = 5297, + CapabilityShaderNonUniform = 5301, CapabilityShaderNonUniformEXT = 5301, + CapabilityRuntimeDescriptorArray = 5302, CapabilityRuntimeDescriptorArrayEXT = 5302, + CapabilityInputAttachmentArrayDynamicIndexing = 5303, CapabilityInputAttachmentArrayDynamicIndexingEXT = 5303, + CapabilityUniformTexelBufferArrayDynamicIndexing = 5304, CapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304, + CapabilityStorageTexelBufferArrayDynamicIndexing = 5305, CapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305, + CapabilityUniformBufferArrayNonUniformIndexing = 5306, CapabilityUniformBufferArrayNonUniformIndexingEXT = 5306, + CapabilitySampledImageArrayNonUniformIndexing = 5307, CapabilitySampledImageArrayNonUniformIndexingEXT = 5307, + CapabilityStorageBufferArrayNonUniformIndexing = 5308, CapabilityStorageBufferArrayNonUniformIndexingEXT = 5308, + CapabilityStorageImageArrayNonUniformIndexing = 5309, CapabilityStorageImageArrayNonUniformIndexingEXT = 5309, + CapabilityInputAttachmentArrayNonUniformIndexing = 5310, CapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310, + CapabilityUniformTexelBufferArrayNonUniformIndexing = 5311, CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, + CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, CapabilityRayTracingNV = 5340, + CapabilityVulkanMemoryModel = 5345, CapabilityVulkanMemoryModelKHR = 5345, + CapabilityVulkanMemoryModelDeviceScope = 5346, CapabilityVulkanMemoryModelDeviceScopeKHR = 5346, + CapabilityPhysicalStorageBufferAddresses = 5347, CapabilityPhysicalStorageBufferAddressesEXT = 5347, CapabilityComputeDerivativeGroupLinearNV = 5350, CapabilityCooperativeMatrixNV = 5357, @@ -1753,6 +1797,7 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpGroupSMaxNonUniformAMD: *hasResult = true; *hasResultType = true; break; case OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; + case OpReadClockKHR: *hasResult = true; *hasResultType = true; break; case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; diff --git a/icd/api/include/vk_conv.h b/icd/api/include/vk_conv.h index fe82af40..ce957c0c 100644 --- a/icd/api/include/vk_conv.h +++ b/icd/api/include/vk_conv.h @@ -1305,7 +1305,7 @@ VK_INLINE void VkToPalImageCopyRegion( Pal::ChNumFormat srcFormat, Pal::ChNumFormat dstFormat, Pal::ImageCopyRegion* pPalRegions, - uint32_t& palRegionIndex) + uint32_t* pPalRegionIndex) { Pal::ImageCopyRegion region = {}; @@ -1347,9 +1347,11 @@ VK_INLINE void VkToPalImageCopyRegion( do { + VK_ASSERT(pPalRegionIndex != nullptr); + region.srcSubres.aspect = VkToPalImageAspectExtract(srcFormat, &srcAspectMask); region.dstSubres.aspect = VkToPalImageAspectExtract(dstFormat, &dstAspectMask); - pPalRegions[palRegionIndex++] = region; + pPalRegions[(*pPalRegionIndex)++] = region; } while (srcAspectMask != 0 || dstAspectMask != 0); } @@ -1361,7 +1363,7 @@ VK_INLINE void VkToPalImageScaledCopyRegion( Pal::ChNumFormat srcFormat, Pal::ChNumFormat dstFormat, Pal::ImageScaledCopyRegion* pPalRegions, - uint32_t& palRegionIndex) + uint32_t* pPalRegionIndex) { Pal::ImageScaledCopyRegion region = {}; @@ -1380,7 +1382,7 @@ VK_INLINE void VkToPalImageScaledCopyRegion( VK_ASSERT(imageBlit.srcSubresource.layerCount == imageBlit.dstSubresource.layerCount); VK_ASSERT(region.srcExtent.depth == region.srcExtent.depth); - region.numSlices = Util::Max(region.srcExtent.depth, imageBlit.srcSubresource.layerCount); + region.numSlices = imageBlit.srcSubresource.layerCount; // PAL expects all dimensions to be in blocks for compressed formats so let's handle that here if (Pal::Formats::IsBlockCompressed(srcFormat)) @@ -1408,8 +1410,10 @@ VK_INLINE void VkToPalImageScaledCopyRegion( do { + VK_ASSERT(pPalRegionIndex != nullptr); + region.srcSubres.aspect = region.dstSubres.aspect = VkToPalImageAspectExtract(srcFormat, &aspectMask); - pPalRegions[palRegionIndex++] = region; + pPalRegions[(*pPalRegionIndex)++] = region; } while (aspectMask != 0); } @@ -1501,7 +1505,7 @@ VK_INLINE void VkToPalImageResolveRegion( Pal::ChNumFormat srcFormat, Pal::ChNumFormat dstFormat, Pal::ImageResolveRegion* pPalRegions, - uint32_t& palRegionIndex) + uint32_t* pPalRegionIndex) { Pal::ImageResolveRegion region = {}; @@ -1530,8 +1534,10 @@ VK_INLINE void VkToPalImageResolveRegion( do { + VK_ASSERT(pPalRegionIndex != nullptr); + region.srcAspect = region.dstAspect = VkToPalImageAspectExtract(srcFormat, &aspectMask); - pPalRegions[palRegionIndex++] = region; + pPalRegions[(*pPalRegionIndex)++] = region; } while (aspectMask != 0); } diff --git a/icd/api/include/vk_device.h b/icd/api/include/vk_device.h index 69986635..1d443d35 100644 --- a/icd/api/include/vk_device.h +++ b/icd/api/include/vk_device.h @@ -519,6 +519,8 @@ class Device uint32_t GetPinnedSystemMemoryTypes() const; + uint32_t GetPinnedHostMappedForeignMemoryTypes() const; + uint32_t GetExternalHostMemoryTypes( VkExternalMemoryHandleTypeFlagBits handleType, const void* pExternalPtr) const; @@ -554,6 +556,10 @@ class Device Pal::QueueType cmdBufferQueueType, const Queue* pQueue); + bool BigSW60Supported() const; + + void UpdateFeatureSettings(); + protected: Device( uint32_t deviceCount, diff --git a/icd/api/include/vk_instance.h b/icd/api/include/vk_instance.h index 52266554..3cb24c5d 100644 --- a/icd/api/include/vk_instance.h +++ b/icd/api/include/vk_instance.h @@ -66,6 +66,8 @@ class PhysicalDeviceManager; class VirtualStackMgr; class VulkanSettingsLoader; +struct RuntimeSettings; + // ===================================================================================================================== // Represents the per-Vulkan instance data as seen by the applicaton. class Instance @@ -218,10 +220,6 @@ class Instance VulkanSettingsLoader* settingsLoaders[], AppProfile* pAppProfiles); - void QueryApplicationProfile( - Pal::IDevice* pPalDevice, - VulkanSettingsLoader* pSettingsLoader = nullptr); - VkResult RegisterDebugCallback( DebugReportCallback* pCallback); @@ -284,6 +282,8 @@ class Instance const char* pFormat, va_list args); + void UpdateSettingsWithAppProfile(RuntimeSettings* pSettings); + Pal::IPlatform* m_pPalPlatform; // Pal Platform object. VkAllocationCallbacks m_allocCallbacks; @@ -329,9 +329,7 @@ class Instance ScreenObject m_screens[Pal::MaxScreens]; void* m_pScreenStorage; - DevModeMgr* m_pDevModeMgr; // GPUOpen Developer Mode manager. - ChillSettings m_chillSettings; // Dynamic chill settings structure - TurboSyncSettings m_turboSyncSettings; // Dynamic TurboSync settings structure + DevModeMgr* m_pDevModeMgr; // GPUOpen Developer Mode manager. Util::List m_debugReportCallbacks; // List of registered Debug // Report Callbacks diff --git a/icd/api/sqtt/sqtt_layer.cpp b/icd/api/sqtt/sqtt_layer.cpp index 1103242f..0f945feb 100644 --- a/icd/api/sqtt/sqtt_layer.cpp +++ b/icd/api/sqtt/sqtt_layer.cpp @@ -654,6 +654,8 @@ void SqttCmdBufferState::WriteBarrierEndMarker( marker.numLayoutTransitions = m_currentBarrier.numLayoutTransitions; + marker.invalGl1 = operations.caches.invalGl1; + WriteMarker(&marker, sizeof(marker)); } } diff --git a/icd/api/sqtt/sqtt_rgp_annotations.h b/icd/api/sqtt/sqtt_rgp_annotations.h index e5b2e478..d891a35b 100644 --- a/icd/api/sqtt/sqtt_rgp_annotations.h +++ b/icd/api/sqtt/sqtt_rgp_annotations.h @@ -319,17 +319,18 @@ struct RgpSqttMarkerBarrierEnd struct { uint32_t syncCpDma : 1; // Issue dummy CP-DMA command to confirm all prior CP-DMAs have completed. - uint32_t invalTcp : 1; // Invalidate the L1 vector caches. + uint32_t invalTcp : 1; // Invalidate the TCP vector caches. uint32_t invalSqI : 1; // Invalidate the SQ instruction caches - uint32_t invalSqK : 1; // Invalidate the SQ constant caches (i.e. L1 scalar caches) - uint32_t flushTcc : 1; // Flush L2 - uint32_t invalTcc : 1; // Invalidate L2 + uint32_t invalSqK : 1; // Invalidate the SQ constant caches. + uint32_t flushTcc : 1; // Flush L2. + uint32_t invalTcc : 1; // Flush L2. uint32_t flushCb : 1; // Flush CB caches (including DCC, cmask, fmask) uint32_t invalCb : 1; // Invalidate CB caches (including DCC, cmask, fmask) uint32_t flushDb : 1; // Flush DB caches (including htile) uint32_t invalDb : 1; // Invalidate DB caches (including htile) uint32_t numLayoutTransitions : 16; // Number of layout transitions following this packet - uint32_t reserved : 6; // Reserved for future expansion. Always 0 + uint32_t invalGl1 : 1; // Invalidate L1. + uint32_t reserved : 5; // Reserved for future expansion. Always 0 }; uint32_t dword02; // The second dword diff --git a/icd/api/vert_buf_binding_mgr.cpp b/icd/api/vert_buf_binding_mgr.cpp index 9c24cd68..3a0ec024 100644 --- a/icd/api/vert_buf_binding_mgr.cpp +++ b/icd/api/vert_buf_binding_mgr.cpp @@ -145,17 +145,19 @@ void VertBufBindingMgr::GraphicsPipelineChanged( // Update strides for each binding used by the graphics pipeline. Rebuild SRD data for those bindings // whose strides changed. - - for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++) + utils::IterateMask deviceGroup(pCmdBuf->GetDeviceMask()); + while (deviceGroup.Iterate()) { + uint32_t deviceIdx = deviceGroup.Index(); + uint32_t firstChanged = UINT_MAX; uint32_t lastChanged = 0; for (uint32_t bindex = 0; bindex < bindingInfo.bindingCount; ++bindex) { - const uint32_t slot = bindingInfo.bindings[bindex].slot; - const uint32_t byteStride = bindingInfo.bindings[bindex].byteStride; - Pal::BufferViewInfo*const pBinding = &m_bindings[deviceIdx][slot]; + const uint32_t slot = bindingInfo.bindings[bindex].slot; + const uint32_t byteStride = bindingInfo.bindings[bindex].byteStride; + Pal::BufferViewInfo*const pBinding = &m_bindings[deviceIdx][slot]; if (pBinding->stride != byteStride) { @@ -176,6 +178,5 @@ void VertBufBindingMgr::GraphicsPipelineChanged( } } } - }//namespace vk diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp index b8427177..54e9f83f 100644 --- a/icd/api/vk_cmdbuffer.cpp +++ b/icd/api/vk_cmdbuffer.cpp @@ -960,9 +960,9 @@ VkResult CmdBuffer::Begin( union { - const VkStructHeader* pHeader; - const VkCommandBufferBeginInfo* pInfo; - const VkDeviceGroupCommandBufferBeginInfo* pDeviceGroupInfo; + const VkStructHeader* pHeader; + const VkCommandBufferBeginInfo* pInfo; + const VkDeviceGroupCommandBufferBeginInfo* pDeviceGroupInfo; }; RenderPass* pRenderPass = nullptr; @@ -981,7 +981,7 @@ VkResult CmdBuffer::Begin( uint32_t currentSubPass = 0; for (pInfo = pBeginInfo; pHeader != nullptr; pHeader = pHeader->pNext) { - switch (pHeader->sType) + switch (static_cast(pHeader->sType)) { // Convert Vulkan flags to PAL flags. case VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO: @@ -2028,7 +2028,7 @@ void CmdBuffer::CopyImage( (palRegionCount <= (regionBatch - MaxPalAspectsPerMask))) { VkToPalImageCopyRegion(pRegions[regionIdx], srcFormat.format, dstFormat.format, - pPalRegions, palRegionCount); + pPalRegions, &palRegionCount); ++regionIdx; } @@ -2096,7 +2096,7 @@ void CmdBuffer::BlitImage( (palCopyInfo.regionCount <= (regionBatch - MaxPalAspectsPerMask))) { VkToPalImageScaledCopyRegion(pRegions[regionIdx], srcFormat.format, dstFormat.format, - pPalRegions, palCopyInfo.regionCount); + pPalRegions, &palCopyInfo.regionCount); ++regionIdx; } @@ -2927,7 +2927,7 @@ void CmdBuffer::ResolveImage( // We expect MSAA images to never have mipmaps VK_ASSERT(pRects[rectIdx].srcSubresource.mipLevel == 0); - VkToPalImageResolveRegion(pRects[rectIdx], srcFormat.format, dstFormat.format, pPalRegions, palRegionCount); + VkToPalImageResolveRegion(pRects[rectIdx], srcFormat.format, dstFormat.format, pPalRegions, &palRegionCount); ++rectIdx; } diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index bfadb4d5..5d0599e7 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -1282,6 +1282,9 @@ VkResult Device::Initialize( // Finalize the device settings after driver intitalization is done // This essentially generates settings hash pPhysicalDevice->GetSettingsLoader()->FinalizeSettings(); + + // Get the current values of driver features, from an app profile or global settings. + UpdateFeatureSettings(); } if (result == VK_SUCCESS) @@ -2574,6 +2577,18 @@ uint32_t Device::GetPinnedSystemMemoryTypes() const return memoryTypes; } +uint32_t Device::GetPinnedHostMappedForeignMemoryTypes() const +{ + uint32_t memoryTypes = 0; + uint32_t gartIndexBits; + + if (GetVkTypeIndexBitsFromPalHeap(Pal::GpuHeapGartUswc, &gartIndexBits)) + { + memoryTypes |= gartIndexBits; + } + + return memoryTypes; +} // ===================================================================================================================== // Returns the memory type bit-mask that is compatible to be used as pinned memory types for the given external // host pointer @@ -2588,6 +2603,10 @@ uint32_t Device::GetExternalHostMemoryTypes( { memoryTypes = GetPinnedSystemMemoryTypes(); } + else if (handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) + { + memoryTypes = GetPinnedHostMappedForeignMemoryTypes(); + } return memoryTypes; } @@ -2782,6 +2801,27 @@ Pal::IQueue* Device::PerformSwCompositing( return pPresentQueue; } +// ===================================================================================================================== +// Return true if Big Software Release 6.0 is supported. +bool Device::BigSW60Supported() const +{ + const Pal::DeviceProperties& deviceProps = VkPhysicalDevice(DefaultDeviceIndex)->PalProperties(); + const Pal::BigSoftwareReleaseInfo* pBigSwInfo = &(deviceProps.bigSoftwareReleaseInfo); + + return ((pBigSwInfo->majorVersion > 2019) || + ((pBigSwInfo->majorVersion == 2019) && (pBigSwInfo->minorVersion >= 1))); +} + +// ===================================================================================================================== +// Update driver feature settings for this device based on an app profile and global settings. +void Device::UpdateFeatureSettings() +{ + ProfileSettings profileSettings = {}; + + ReloadAppProfileSettings(m_pInstance, &profileSettings); + +} + /** *********************************************************************************************************************** * C-Callable entry points start here. These entries go in the dispatch table(s). diff --git a/icd/api/vk_dispatch.cpp b/icd/api/vk_dispatch.cpp index 289de7e4..b7da0fe9 100644 --- a/icd/api/vk_dispatch.cpp +++ b/icd/api/vk_dispatch.cpp @@ -547,9 +547,7 @@ void DispatchTable::Init() INIT_DISPATCH_ENTRY(vkGetPipelineExecutableInternalRepresentationsKHR); INIT_DISPATCH_ENTRY(vkGetBufferDeviceAddressEXT ); - INIT_DISPATCH_ENTRY(vkResetQueryPoolEXT ); - INIT_DISPATCH_ENTRY(vkCmdSetLineStippleEXT ); INIT_DISPATCH_ENTRY(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ); diff --git a/icd/api/vk_instance.cpp b/icd/api/vk_instance.cpp index 66f0b5c9..8653386b 100644 --- a/icd/api/vk_instance.cpp +++ b/icd/api/vk_instance.cpp @@ -98,10 +98,6 @@ Instance::Instance( { m_flags.u32All = 0; - // Disable TurboSync and Chill by default - m_turboSyncSettings.turboSyncEnable = false; - m_chillSettings.chillProfileEnable = false; - memset(m_screens, 0, sizeof(m_screens)); } @@ -363,6 +359,10 @@ VkResult Instance::Init( m_nullGpuId = createInfo.nullGpuId; } +#if ICD_GPUOPEN_DEVMODE_BUILD + createInfo.flags.supportRgpTraces = 1; +#endif + Pal::Result palResult = Pal::CreatePlatform(createInfo, pPalMemory, &m_pPalPlatform); if (palResult != Pal::Result::ErrorUnknown) @@ -591,15 +591,13 @@ VkResult Instance::LoadAndCommitSettings( { settingsLoaders[deviceIdx]->ProcessSettings(m_appVersion, &pAppProfiles[deviceIdx]); - // Overlay the application profile from Radeon Settings - QueryApplicationProfile(ppDevices[deviceIdx], settingsLoaders[deviceIdx]); + UpdateSettingsWithAppProfile(settingsLoaders[deviceIdx]->GetSettingsPtr()); // Make sure the final settings have legal values and update dependant parameters settingsLoaders[deviceIdx]->ValidateSettings(); // Update PAL settings based on runtime settings and desired driver defaults if needed settingsLoaders[deviceIdx]->UpdatePalSettings(); - } } @@ -607,7 +605,7 @@ VkResult Instance::LoadAndCommitSettings( // Inform developer mode manager of settings. This also finalizes the developer mode manager. if (m_pDevModeMgr != nullptr) { - m_pDevModeMgr->Finalize(deviceCount, ppDevices, settingsLoaders); + m_pDevModeMgr->Finalize(deviceCount, settingsLoaders); } #endif @@ -620,6 +618,24 @@ VkResult Instance::LoadAndCommitSettings( return result; } +// ===================================================================================================================== +// Overlay the application profile settings on top of the default settings. +void Instance::UpdateSettingsWithAppProfile( + RuntimeSettings* pSettings) +{ + ProfileSettings profileSettings = {}; + + // Set the default values + profileSettings.texFilterQuality = pSettings->vulkanTexFilterQuality; + + ReloadAppProfileSettings(this, + &profileSettings, + pSettings->appGpuID); + + pSettings->vulkanTexFilterQuality = + static_cast(profileSettings.texFilterQuality); +} + // ===================================================================================================================== // Destroys the Instance. VkResult Instance::Destroy(void) @@ -1070,25 +1086,6 @@ void PAL_STDCALL Instance::PalDeveloperCallback( } } -// ===================================================================================================================== -// Query dynamic application profile settings -void Instance::QueryApplicationProfile( - Pal::IDevice* pPalDevice, - VulkanSettingsLoader* pSettingsLoader) -{ - ReloadAppProfileSettings(this, pSettingsLoader, &m_chillSettings, &m_turboSyncSettings); - - if (m_turboSyncSettings.turboSyncEnable == false) - { - // Read TurboSync global key - pPalDevice->ReadSetting("TurboSync", - Pal::SettingScope::Global, - Util::ValueType::Boolean, - &m_turboSyncSettings.turboSyncEnable, - sizeof(m_turboSyncSettings.turboSyncEnable)); - } -} - // ===================================================================================================================== // Callback function used to route debug prints to the VK_EXT_debug_report extension void PAL_STDCALL Instance::LogCallback( diff --git a/icd/api/vk_memory.cpp b/icd/api/vk_memory.cpp index b1aad6a2..d0979e26 100644 --- a/icd/api/vk_memory.cpp +++ b/icd/api/vk_memory.cpp @@ -218,7 +218,13 @@ VkResult Memory::Create( VK_ASSERT(pDevice->IsExtensionEnabled(DeviceExtensions::EXT_EXTERNAL_MEMORY_HOST)); VK_ASSERT(pImportMemoryInfo->handleType & - (VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT)); + (VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT | + VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT)); + + if (pImportMemoryInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) + { + isHostMappedForeign = true; + } pPinnedHostPtr = pImportMemoryInfo->pHostPointer; } @@ -514,7 +520,6 @@ VkResult Memory::CreateGpuPinnedMemory( pinnedInfo.pSysMem = pPinnedHostPtr; pinnedInfo.vaRange = Pal::VaRange::Default; pinnedInfo.alignment = createInfo.alignment; - gpuMemorySize = pDevice->PalDevice(DefaultDeviceIndex)->GetPinnedGpuMemorySize( pinnedInfo, &palResult); diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index a20a43c0..0e87ac9f 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -389,6 +389,8 @@ static void GetFormatFeatureFlags( retFlags &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; retFlags &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + + retFlags &= ~VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; } else { @@ -1355,10 +1357,10 @@ VkResult PhysicalDevice::GetImageFormatProperties( // a- color attachment. // b- depth/stencil attachment. // c- storage image. - if ((FormatSupportsMsaa(format) == false) || - (type != VK_IMAGE_TYPE_2D) || - (tiling == VK_IMAGE_TILING_LINEAR) || - ((flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) != 0) || + if ((FormatSupportsMsaa(format) == false) || + (type != VK_IMAGE_TYPE_2D) || + (tiling == VK_IMAGE_TILING_LINEAR) || + ((flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) != 0) || ((supportedFeatures & (VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT | VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) == 0)) @@ -2532,6 +2534,19 @@ VkResult PhysicalDevice::GetSurfaceCapabilities( return result; } +// Instantiate the template for the linker. +template +VkResult PhysicalDevice::GetSurfaceCapabilities( + VkSurfaceKHR surface, + Pal::OsDisplayHandle displayHandle, + VkSurfaceCapabilitiesKHR* pSurfaceCapabilities) const; + +template +VkResult PhysicalDevice::GetSurfaceCapabilities( + VkSurfaceKHR surface, + Pal::OsDisplayHandle displayHandle, + VkSurfaceCapabilities2EXT* pSurfaceCapabilities) const; + // ===================================================================================================================== VkResult PhysicalDevice::GetSurfaceCapabilities2KHR( const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, @@ -3665,6 +3680,9 @@ void PhysicalDevice::GetPhysicalDeviceDriverProperties( { *pDriverID = VULKAN_DRIVER_ID; + memset(pDriverName, 0, VK_MAX_DRIVER_NAME_SIZE_KHR); + memset(pDriverInfo, 0, VK_MAX_DRIVER_INFO_SIZE_KHR); + Util::Strncpy(pDriverName, VULKAN_DRIVER_NAME_STR, VK_MAX_DRIVER_NAME_SIZE_KHR); Util::Strncpy(pDriverInfo, VULKAN_DRIVER_INFO_STR, VK_MAX_DRIVER_INFO_SIZE_KHR); diff --git a/icd/api/vk_pipeline.cpp b/icd/api/vk_pipeline.cpp index e0de2559..0d3fd891 100644 --- a/icd/api/vk_pipeline.cpp +++ b/icd/api/vk_pipeline.cpp @@ -188,18 +188,36 @@ VkResult Pipeline::GetShaderDisassembly( { bool symbolValid = false; Util::Abi::ApiHwShaderMapping apiToHwShader = pPalPipeline->ApiHwShaderMapping(); + Util::Abi::ApiShaderType apiShaderType; - static_assert(((static_cast(Util::Abi::ApiShaderType::Cs) == static_cast(Pal::ShaderType::Compute)) && - (static_cast(Util::Abi::ApiShaderType::Vs) == static_cast(Pal::ShaderType::Vertex)) && - (static_cast(Util::Abi::ApiShaderType::Hs) == static_cast(Pal::ShaderType::Hull)) && - (static_cast(Util::Abi::ApiShaderType::Ds) == static_cast(Pal::ShaderType::Domain)) && - (static_cast(Util::Abi::ApiShaderType::Gs) == static_cast(Pal::ShaderType::Geometry)) && - (static_cast(Util::Abi::ApiShaderType::Ps) == static_cast(Pal::ShaderType::Pixel)) && - (static_cast(Util::Abi::ApiShaderType::Count) == Pal::NumShaderTypes)), - "Util::Abi::ApiShaderType to Pal::ShaderType mapping does not match!"); + switch (shaderType) + { + case Pal::ShaderType::Compute: + apiShaderType = Util::Abi::ApiShaderType::Cs; + break; + case Pal::ShaderType::Vertex: + apiShaderType = Util::Abi::ApiShaderType::Vs; + break; + case Pal::ShaderType::Hull: + apiShaderType = Util::Abi::ApiShaderType::Hs; + break; + case Pal::ShaderType::Domain: + apiShaderType = Util::Abi::ApiShaderType::Ds; + break; + case Pal::ShaderType::Geometry: + apiShaderType = Util::Abi::ApiShaderType::Gs; + break; + case Pal::ShaderType::Pixel: + apiShaderType = Util::Abi::ApiShaderType::Ps; + break; + default: + // Pal::ShaderType mapping to Util::Abi::ApiShaderType does not match! + VK_NEVER_CALLED(); + break; + } uint32_t hwStage = 0; - if (Util::BitMaskScanForward(&hwStage, apiToHwShader.apiShaders[static_cast(shaderType)])) + if (Util::BitMaskScanForward(&hwStage, apiToHwShader.apiShaders[static_cast(apiShaderType)])) { Util::Abi::PipelineSymbolEntry symbol = {}; const void* pDisassemblySection = nullptr; @@ -609,14 +627,35 @@ static Pal::ShaderType GetApiShaderFromHwShader( if (apiToHwShader.apiShaders[i] & (1 << static_cast(hwStage))) { - apiShaderType = static_cast(i); + switch (static_cast(i)) + { + case Util::Abi::ApiShaderType::Cs: + apiShaderType = Pal::ShaderType::Compute; + break; + case Util::Abi::ApiShaderType::Vs: + apiShaderType = Pal::ShaderType::Vertex; + break; + case Util::Abi::ApiShaderType::Hs: + apiShaderType = Pal::ShaderType::Hull; + break; + case Util::Abi::ApiShaderType::Ds: + apiShaderType = Pal::ShaderType::Domain; + break; + case Util::Abi::ApiShaderType::Gs: + apiShaderType = Pal::ShaderType::Geometry; + break; + case Util::Abi::ApiShaderType::Ps: + apiShaderType = Pal::ShaderType::Pixel; + break; + default: + // Util::Abi::ApiShaderType mapping to Pal::ShaderType does not match! + VK_NEVER_CALLED(); + break; + } break; } } - // API shaders should never exceed number of shader types - VK_ASSERT(static_cast(apiShaderType) < static_cast(Pal::NumShaderTypes)); - return apiShaderType; } diff --git a/icd/api/vk_queue.cpp b/icd/api/vk_queue.cpp index cf58119a..ebe6e368 100644 --- a/icd/api/vk_queue.cpp +++ b/icd/api/vk_queue.cpp @@ -656,6 +656,22 @@ VkResult Queue::Present( VkResult result = VK_SUCCESS; + // Query driver feature settings that could change from frame to frame. + uint32_t rsFeaturesChangedMask = 0; + { + uint32_t rsFeaturesQueriedMask = 0; + + Pal::Result palResult = m_pDevice->PalDevice(DefaultDeviceIndex)->DidRsFeatureSettingsChange( + rsFeaturesQueriedMask, + &rsFeaturesChangedMask); + + if ((palResult == Pal::Result::Success) && (rsFeaturesChangedMask != 0)) + { + // Update the feature settings from the app profile or the global settings. + m_pDevice->UpdateFeatureSettings(); + } + } + if (pPresentInfo == nullptr) { return VK_ERROR_INITIALIZATION_FAILED; @@ -705,8 +721,11 @@ VkResult Queue::Present( bool syncFlip = false; bool postFrameTimerSubmission = false; bool needFramePacing = NeedPacePresent(&presentInfo, pSwapChain, &syncFlip, &postFrameTimerSubmission); - const Pal::IGpuMemory* pGpuMemory = - pSwapChain->GetPresentableImageMemory(imageIndex)->PalMemory(DefaultDeviceIndex); + + const Pal::IGpuMemory* pGpuMemory = nullptr; + { + pGpuMemory = pSwapChain->GetPresentableImageMemory(imageIndex)->PalMemory(DefaultDeviceIndex); + } result = NotifyFlipMetadataBeforePresent(presentationDeviceIdx, &presentInfo, pPresentCmdBuffer, pGpuMemory); if (result != VK_SUCCESS) diff --git a/icd/api/vk_sampler.cpp b/icd/api/vk_sampler.cpp index 16e61cd5..13904401 100644 --- a/icd/api/vk_sampler.cpp +++ b/icd/api/vk_sampler.cpp @@ -163,7 +163,6 @@ VkResult Sampler::Create( samplerInfo.flags.prtBlendZeroMode = 0; samplerInfo.flags.seamlessCubeMapFiltering = 1; break; - case VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT: samplerInfo.filterMode = VkToPalTexFilterMode(pVkSamplerReductionModeCreateInfoEXT->reductionMode); break; diff --git a/icd/make/importdefs b/icd/make/importdefs index 7bf1ac25..cb4acba5 100644 --- a/icd/make/importdefs +++ b/icd/make/importdefs @@ -1,12 +1,12 @@ # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. It must # be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -ICD_PAL_CLIENT_MAJOR_VERSION = 534 -ICD_PAL_CLIENT_MINOR_VERSION = 0 +ICD_PAL_CLIENT_MAJOR_VERSION = 543 +ICD_PAL_CLIENT_MINOR_VERSION = 1 # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. It describes # the interface version of the gpuopen shared module (part of PAL) that the ICD supports. -ICD_GPUOPEN_CLIENT_MAJOR_VERSION = 39 +ICD_GPUOPEN_CLIENT_MAJOR_VERSION = 41 ICD_GPUOPEN_CLIENT_MINOR_VERSION = 0 #if ICD_BUILD_SCPC diff --git a/icd/res/ver.h b/icd/res/ver.h index e0b4ef73..a6c41bad 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 111 +#define VULKAN_ICD_BUILD_VERSION 115 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index a68a2aaa..f09a4061 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -389,6 +389,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings( { m_settings.usePalPipelineCaching = (atoi(pPipelineCacheEnvVar) >= 0); } + } // =====================================================================================================================