From 3252b6223947f9fc67399e0798b1062983925fce Mon Sep 17 00:00:00 2001
From: Jacob He <jacob.he@amd.com>
Date: Sun, 29 Sep 2019 14:26:49 +0800
Subject: [PATCH] Update xgl from commit: 2f3287e

* Implement VK_EXT_post_depth_coverage
* Re-work vkPipelineCache
* Add "enableLoadScalarizer" option to app_shader_optimizer
* Tune shader performance for F1 2017 and the Talos principle
* EXT_vertex_attribute_divisor: Add missing features query and support verification
* Fix a case fallthrough bug with VK_AMD_memory_overallocation_behavior at device creation
* Move platformKey to physical device
* Make InitializePlatformKey() as a void function
* Add ShaderDbg to LLPC
* Bump LLPC client interface version to 34
* Update PAL Interface in Vulkan to 534

Change-Id: I8f6833890aaf717ade2bc04235ed81863f2fbad3
---
 CMakeLists.txt                           |   1 -
 icd/CMakeLists.txt                       |  10 +-
 icd/api/app_shader_optimizer.cpp         | 199 ++++++++++++
 icd/api/compiler_solution.cpp            |  47 +--
 icd/api/compiler_solution_llpc.cpp       |   6 +-
 icd/api/gpu_event_mgr.cpp                | 391 -----------------------
 icd/api/include/app_shader_optimizer.h   |   1 +
 icd/api/include/compiler_solution.h      |   2 +-
 icd/api/include/compiler_solution_llpc.h |   2 +-
 icd/api/include/gpu_event_mgr.h          | 136 --------
 icd/api/include/pipeline_binary_cache.h  |  38 ++-
 icd/api/include/pipeline_compiler.h      |   2 -
 icd/api/include/vk_cmd_pool.h            |   8 -
 icd/api/include/vk_cmdbuffer.h           |   4 -
 icd/api/include/vk_conv.h                |   8 +-
 icd/api/include/vk_extensions.h          |   1 +
 icd/api/include/vk_physical_device.h     |  29 ++
 icd/api/include/vk_pipeline_cache.h      |   2 +-
 icd/api/include/vk_render_pass.h         |   1 -
 icd/api/pipeline_binary_cache.cpp        | 321 +++++++++++++++----
 icd/api/pipeline_compiler.cpp            |   2 +-
 icd/api/renderpass/renderpass_logger.cpp |   4 +-
 icd/api/sqtt/sqtt_rgp_annotations.h      | 150 ++++-----
 icd/api/strings/base_extensions.txt      |   1 +
 icd/api/vk_cmd_pool.cpp                  |  64 ----
 icd/api/vk_cmdbuffer.cpp                 |  66 +---
 icd/api/vk_device.cpp                    |  54 +++-
 icd/api/vk_event.cpp                     |   1 -
 icd/api/vk_image.cpp                     |   2 +-
 icd/api/vk_physical_device.cpp           | 205 +++++++++---
 icd/api/vk_pipeline_cache.cpp            | 143 ++++++---
 icd/api/vk_queue.cpp                     |  11 +-
 icd/make/importdefs                      |   6 +-
 icd/res/ver.h                            |   2 +-
 icd/settings/settings.cpp                |  19 +-
 icd/settings/settings_xgl.json           | 346 +++++++++++---------
 36 files changed, 1134 insertions(+), 1151 deletions(-)
 delete mode 100644 icd/api/gpu_event_mgr.cpp
 delete mode 100644 icd/api/include/gpu_event_mgr.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 737df5e6..ce6d7d33 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -134,7 +134,6 @@ set(XGL_PAL_PATH ${PROJECT_SOURCE_DIR}/../pal CACHE PATH "Specify the path to th
 set(PAL_CLIENT_INTERFACE_MAJOR_VERSION ${ICD_PAL_CLIENT_MAJOR_VERSION} CACHE STRING "${PROJECT_NAME} override." FORCE)
 set(PAL_CLIENT "VULKAN" CACHE STRING "${PROJECT_NAME} override." FORCE)
 
-set(PAL_DEVELOPER_BUILD ${VK_INTERNAL_DEVELOPER} CACHE BOOL "${PROJECT_NAME} override." FORCE)
 if(ICD_BUILD_SPVONLY)
     set(PAL_ENABLE_PRINTS_ASSERTS ON CACHE BOOL "${PROJECT_NAME} override." FORCE)
 endif()
diff --git a/icd/CMakeLists.txt b/icd/CMakeLists.txt
index 60126c13..a60a9db8 100644
--- a/icd/CMakeLists.txt
+++ b/icd/CMakeLists.txt
@@ -116,11 +116,10 @@ if(ICD_MEMTRACK)
 endif()
 
 # Configure Vulkan SDK version definitions
-if(USE_NEXT_SDK)
-    target_compile_definitions(xgl PRIVATE VKI_SDK_1_2=1)
-else()
-    target_compile_definitions(xgl PRIVATE VKI_SDK_1_1=1)
-endif()
+
+set(SDK VKI_SDK_1_1)
+
+target_compile_definitions(xgl PRIVATE ${SDK}=1)
 
 # Enable relevant GPUOpen preprocessor definitions
 if(ICD_GPUOPEN_DEVMODE_BUILD)
@@ -183,7 +182,6 @@ target_sources(xgl PRIVATE
     api/barrier_policy.cpp
     api/color_space_helper.cpp
     api/compiler_solution.cpp
-    api/gpu_event_mgr.cpp
     api/internal_mem_mgr.cpp
     api/pipeline_compiler.cpp
     api/pipeline_binary_cache.cpp
diff --git a/icd/api/app_shader_optimizer.cpp b/icd/api/app_shader_optimizer.cpp
index 3a023193..c5aad96b 100644
--- a/icd/api/app_shader_optimizer.cpp
+++ b/icd/api/app_shader_optimizer.cpp
@@ -136,6 +136,12 @@ void ShaderOptimizer::ApplyProfileToShaderCreateInfo(
                     options.pPipelineOptions->reconfigWorkgroupLayout = true;
                 }
 #endif
+#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION >= 33
+                if (shaderCreate.tuningOptions.enableLoadScalarizer)
+                {
+                    options.pOptions->enableLoadScalarizer = true;
+                }
+#endif
 
                 if (shaderCreate.apply.waveSize)
                 {
@@ -817,6 +823,7 @@ void ShaderOptimizer::BuildAppProfileLlpc()
         m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x8296579A6570BC13;
         m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x44FA946844F62696;
         m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
 
         ///////////////////////////////////////////////////////////////////////////////////////////////////////////
         // 0xE4B55319684F59F228A2B57C92339574, PS
@@ -844,6 +851,160 @@ void ShaderOptimizer::BuildAppProfileLlpc()
         m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x7E1F46BE56E427AA;
         m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xA3EB7292C77A0365;
         m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0xF341093EF870C70A0AECE7808011C4B8, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x0AECE7808011C4B8;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xF341093EF870C70A;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0xB60900B3E1256DDFC7A889DBAC76F591, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xC7A889DBAC76F591;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xB60900B3E1256DDF;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0x41DF226419CD26C217CE9268FE52D03B, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x17CE9268FE52D03B;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x41DF226419CD26C2;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0x1D9EB7DDBA66FDF78AED19D93B57535B, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x8AED19D93B57535B;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x1D9EB7DDBA66FDF7;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0x20E5DA2E5917E2416A43398F36D72603, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x6A43398F36D72603;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x20E5DA2E5917E241;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0xF3AF74681BD7980350FBF528DC8AFBA5, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x50FBF528DC8AFBA5;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xF3AF74681BD79803;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0x51D59E18E8BD64D9955B7EEAB9F6CDAA, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x955B7EEAB9F6CDAA;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x51D59E18E8BD64D9;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0x154112D144C95DE5ECF087B422ED60CE, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xECF087B422ED60CE;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x154112D144C95DE5;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0xE39F6C59BF345B466DE524A0717A4D67, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x6DE524A0717A4D67;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xE39F6C59BF345B46;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0xB020780B537A01C426365F3E39BE59E6, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x26365F3E39BE59E6;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xB020780B537A01C4;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0xFBAD8E5EE07D12D0F5E3F18201C348E6, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xF5E3F18201C348E6;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xFBAD8E5EE07D12D0;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0xCD911627E2D20F9B7D5DFF0970FB823A, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x7D5DFF0970FB823A;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xCD911627E2D20F9B;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0x2DAC71E14EB7945D50DD68ED10CBE1AF, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x50DD68ED10CBE1AF;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x2DAC71E14EB7945D;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0x3C1101DC3E3B206E2D99D8DAAF0FE1BE, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x2D99D8DAAF0FE1BE;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x3C1101DC3E3B206E;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0x474C4C2966E08232DE5274426C9F365C, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xDE5274426C9F365C;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x474C4C2966E08232;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0xD85FA2403788076B3BA507665B126C33, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x3BA507665B126C33;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0xD85FA2403788076B;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0x6A07F5C0DAAB96E6D1C630198DDC7F21, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xD1C630198DDC7F21;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x6A07F5C0DAAB96E6;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
     }
     else if (appProfile == AppProfile::SeriousSamFusion)
     {
@@ -1011,6 +1172,35 @@ void ShaderOptimizer::BuildAppProfileLlpc()
         m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x549373FA25856E20;
         m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true;
     }
+    else if (appProfile == AppProfile::F1_2017)
+    {
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0x7C92A52E3084149659025B19EDAE3734, CS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageCompute].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageCompute].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageCompute].codeHash.lower = 0x59025B19EDAE3734;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageCompute].codeHash.upper = 0x7C92A52E30841496;
+        m_appProfile.entries[i].action.shaders[ShaderStageCompute].shaderCreate.tuningOptions.useSiScheduler = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0x767991F055DE051DEC878C820BD1D81E, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0xEC878C820BD1D81E;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x767991F055DE051D;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.useSiScheduler = true;
+
+        ///////////////////////////////////////////////////////////////////////////////////////////////////////////
+        // 0x8648E5203943C0B00EBEFF2CBF131944, PS
+        i = m_appProfile.entryCount++;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.stageActive = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].match.codeHash = true;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.lower = 0x0EBEFF2CBF131944;
+        m_appProfile.entries[i].pattern.shaders[ShaderStageFragment].codeHash.upper = 0x8648E5203943C0B0;
+        m_appProfile.entries[i].action.shaders[ShaderStageFragment].shaderCreate.tuningOptions.enableLoadScalarizer = true;
+    }
 }
 
 #if PAL_ENABLE_PRINTS_ASSERTS
@@ -1152,6 +1342,7 @@ static bool ParseJsonProfileActionShader(
         "enableSelectiveInline",
         "useSiScheduler",
         "reconfigWorkgroupLayout",
+        "enableLoadScalarizer",
         "waveSize",
         "wgpMode",
         "waveBreakSize",
@@ -1384,6 +1575,14 @@ static bool ParseJsonProfileActionShader(
         }
     }
 
+    if ((pItem = utils::JsonGetValue(pJson, "enableLoadScalarizer")) != nullptr)
+    {
+        if (pItem->integerValue != 0)
+        {
+            pActions->shaderCreate.tuningOptions.enableLoadScalarizer = true;
+        }
+    }
+
     return success;
 }
 
diff --git a/icd/api/compiler_solution.cpp b/icd/api/compiler_solution.cpp
index 2fce9c40..8b482715 100644
--- a/icd/api/compiler_solution.cpp
+++ b/icd/api/compiler_solution.cpp
@@ -49,49 +49,12 @@ CompilerSolution::~CompilerSolution()
 
 // =====================================================================================================================
 // Initialize CompilerSolution class
-VkResult CompilerSolution::Initialize()
+VkResult CompilerSolution::Initialize(
+    Llpc::GfxIpVersion gfxIp,
+    Pal::GfxIpLevel    gfxIpLevel)
 {
-    Pal::IDevice* pPalDevice = m_pPhysicalDevice->PalDevice();
-    const RuntimeSettings& settings = m_pPhysicalDevice->GetRuntimeSettings();
-
-    // Initialize GfxIp informations per PAL device properties
-    Pal::DeviceProperties info;
-    pPalDevice->GetProperties(&info);
-
-    switch (info.gfxLevel)
-    {
-    case Pal::GfxIpLevel::GfxIp6:
-        m_gfxIp.major = 6;
-        m_gfxIp.minor = 0;
-        break;
-    case Pal::GfxIpLevel::GfxIp7:
-        m_gfxIp.major = 7;
-        m_gfxIp.minor = 0;
-        break;
-    case Pal::GfxIpLevel::GfxIp8:
-        m_gfxIp.major = 8;
-        m_gfxIp.minor = 0;
-        break;
-    case Pal::GfxIpLevel::GfxIp8_1:
-        m_gfxIp.major = 8;
-        m_gfxIp.minor = 1;
-        break;
-    case Pal::GfxIpLevel::GfxIp9:
-        m_gfxIp.major = 9;
-        m_gfxIp.minor = 0;
-        break;
-    case Pal::GfxIpLevel::GfxIp10_1:
-        m_gfxIp.major = 10;
-        m_gfxIp.minor = 1;
-        break;
-
-    default:
-        VK_NEVER_CALLED();
-        break;
-    }
-
-    m_gfxIp.stepping = info.gfxStepping;
-    m_gfxIpLevel     = info.gfxLevel;
+    m_gfxIp      = gfxIp;
+    m_gfxIpLevel = gfxIpLevel;
 
     return VK_SUCCESS;
 }
diff --git a/icd/api/compiler_solution_llpc.cpp b/icd/api/compiler_solution_llpc.cpp
index 5609a6b4..2f4dbd07 100644
--- a/icd/api/compiler_solution_llpc.cpp
+++ b/icd/api/compiler_solution_llpc.cpp
@@ -54,9 +54,11 @@ CompilerSolutionLlpc::~CompilerSolutionLlpc()
 
 // =====================================================================================================================
 // Initialize CompilerSolutionLlpc class
-VkResult CompilerSolutionLlpc::Initialize()
+VkResult CompilerSolutionLlpc::Initialize(
+    Llpc::GfxIpVersion gfxIp,
+    Pal::GfxIpLevel    gfxIpLevel)
 {
-    VkResult result = CompilerSolution::Initialize();
+    VkResult result = CompilerSolution::Initialize(gfxIp, gfxIpLevel);
 
     if (result == VK_SUCCESS)
     {
diff --git a/icd/api/gpu_event_mgr.cpp b/icd/api/gpu_event_mgr.cpp
deleted file mode 100644
index fa79097c..00000000
--- a/icd/api/gpu_event_mgr.cpp
+++ /dev/null
@@ -1,391 +0,0 @@
-/*
- ***********************************************************************************************************************
- *
- *  Copyright (c) 2014-2019 Advanced Micro Devices, Inc. All Rights Reserved.
- *
- *  Permission is hereby granted, free of charge, to any person obtaining a copy
- *  of this software and associated documentation files (the "Software"), to deal
- *  in the Software without restriction, including without limitation the rights
- *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- *  copies of the Software, and to permit persons to whom the Software is
- *  furnished to do so, subject to the following conditions:
- *
- *  The above copyright notice and this permission notice shall be included in all
- *  copies or substantial portions of the Software.
- *
- *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- *  SOFTWARE.
- *
- **********************************************************************************************************************/
-
-#include "include/vk_cmdbuffer.h"
-#include "include/vk_conv.h"
-#include "include/vk_device.h"
-#include "sqtt/sqtt_rgp_annotations.h"
-
-#include "palGpuEvent.h"
-#include "palIntrusiveListImpl.h"
-
-namespace vk
-{
-
-// =====================================================================================================================
-void GpuEvents::Destroy()
-{
-    for (uint32_t i = 0; i < m_numDeviceEvents; i++)
-    {
-        m_pEvents[i]->Destroy();
-    }
-}
-
-// =====================================================================================================================
-GpuEventMgr::GpuEventMgr(Device* pDevice)
-    :
-    m_parentNode(this),
-    m_pFirstChunk(nullptr),
-    m_needWaitRecycleEvents(false),
-    m_pDevice(pDevice),
-    m_totalEventCount(0)
-{
-
-}
-
-// =====================================================================================================================
-GpuEventMgr::~GpuEventMgr()
-{
-    Destroy();
-}
-
-// =====================================================================================================================
-// Should be called during the parent's vkBeginCommandBuffer()
-void GpuEventMgr::BeginCmdBuf(
-    CmdBuffer*                     pOwner,
-    const Pal::CmdBufferBuildInfo& info)
-{
-    // If this command buffer can be submitted multiple times, we need to make sure that we wait on its previous
-    // incarnation to complete before allowing any events to be accessed.  This is because we need to make sure nothing
-    // signals these events while the GPU is still accessing this command buffer.
-    m_needWaitRecycleEvents = (info.flags.optimizeOneTimeSubmit == false);
-}
-
-// =====================================================================================================================
-// Called when this event manager's event memory should be reset.  This will mark all events as free for allocation but
-// does not release any of their GPU memory.
-//
-// This is called either when a command buffer is being reset, or when a command buffer's resources are being
-// are being released back to the command pool (e.g. when destroyed).
-void GpuEventMgr::ResetEvents()
-{
-    // Mark all previously-created events as free for reuse.  When resetting a command buffer, the application is
-    // responsible for ensuring that no previous access to the command buffer by the GPU is pending which means that
-    // we don't need to wait before resetting the GPU value of these events (this actual reset happens during
-    // RequestEvents()).
-    EventChunk* pChunk = m_pFirstChunk;
-
-    while (pChunk != nullptr)
-    {
-        pChunk->eventNextFree = 0;
-        pChunk = pChunk->pNextChunk;
-    }
-}
-
-// =====================================================================================================================
-// Called when the command buffer that owns this event manager is reset.
-void GpuEventMgr::ResetCmdBuf(
-    CmdBuffer* pOwner)
-{
-    // Reset all events back to available.
-    ResetEvents();
-}
-
-// =====================================================================================================================
-// Destroys the event manager's internal memory
-void GpuEventMgr::Destroy()
-{
-    Instance* pInstance = m_pDevice->VkInstance();
-
-    EventChunk* pChunk = m_pFirstChunk;
-
-    while (pChunk != nullptr)
-    {
-        EventChunk* pNext = pChunk->pNextChunk;
-
-        DestroyChunk(pChunk);
-
-        pChunk = pNext;
-    }
-
-    m_pFirstChunk = nullptr;
-    m_totalEventCount = 0;
-}
-
-// =====================================================================================================================
-// Destroys the given batch of GPU events.  Called when the command buffer is destroyed or as part of allocation
-// failure clean-up.
-void GpuEventMgr::DestroyChunk(EventChunk* pChunk)
-{
-    if (pChunk != nullptr)
-    {
-        for (uint32_t i = 0; i < pChunk->eventCount; ++i)
-        {
-            pChunk->ppGpuEvents[i]->Destroy();
-        }
-
-        m_pDevice->MemMgr()->FreeGpuMem(&pChunk->gpuMemory);
-
-        m_pDevice->VkInstance()->FreeMem(pChunk);
-    }
-}
-
-// =====================================================================================================================
-// Requests some number of events to be given to the command buffer.
-//
-// WARNING: THIS FUNCTIONALITY IS INCOMPATIBLE WITH COMMAND BUFFERS THAT CAN BE SUBMITTED IN PARALLEL ON MULTIPLE
-// QUEUES.  PARALLEL EXECUTION OF THE SAME COMMAND BUFFER WILL CAUSE IT TO TRIP OVER ITS OWN EVENTS.
-//
-// There is currently no use case for that with the exception of compute engine command buffers and such command
-// buffers should not make use of this functionality.
-VkResult GpuEventMgr::RequestEvents(
-    CmdBuffer*        pCmdBuf,
-    uint32_t          eventCount,
-    GpuEvents***      pppGpuEvents)
-{
-    if (eventCount == 0)
-    {
-        *pppGpuEvents = nullptr;
-
-        return VK_SUCCESS;
-    }
-
-#if PAL_ENABLE_PRINTS_ASSERTS
-    const Pal::DeviceProperties& deviceProps = m_pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties();
-    const Pal::EngineType engineType = pCmdBuf->GetPalEngineType();
-
-    // See above comment
-    VK_ASSERT(deviceProps.engineProperties[engineType].engineCount == 1);
-#endif
-
-    if (m_needWaitRecycleEvents)
-    {
-        WaitToRecycleEvents(pCmdBuf);
-    }
-
-    VkResult result = VK_SUCCESS;
-
-    EventChunk* pChunk = FindFreeExistingChunk(eventCount);
-
-    if (pChunk == nullptr)
-    {
-        result = CreateNewChunk(eventCount, &pChunk);
-    }
-
-    if (pChunk != nullptr)
-    {
-        VK_ASSERT(result == VK_SUCCESS);
-
-        AllocEventsFromChunk(pCmdBuf, eventCount, pChunk, pppGpuEvents);
-    }
-
-    return result;
-}
-
-// =====================================================================================================================
-// Tries to find enough space in an existing batch of GPU events.
-GpuEventMgr::EventChunk* GpuEventMgr::FindFreeExistingChunk(uint32_t eventCount)
-{
-    EventChunk* pChunk = m_pFirstChunk;
-
-    while (pChunk != nullptr)
-    {
-        if (pChunk->eventCount - pChunk->eventNextFree >= eventCount)
-        {
-            return pChunk;
-        }
-
-        pChunk = pChunk->pNextChunk;
-    }
-
-    return nullptr;
-}
-
-// =====================================================================================================================
-// Allocates GPU events from the given chunk of events.
-void GpuEventMgr::AllocEventsFromChunk(
-    CmdBuffer*        pCmdBuf,
-    uint32_t          eventCount,
-    EventChunk*       pChunk,
-    GpuEvents***      pppGpuEvents)
-{
-    GpuEvents** ppEvents = pChunk->ppGpuEvents + pChunk->eventNextFree;
-
-    pChunk->eventNextFree += eventCount;
-
-    VK_ASSERT(pChunk->eventNextFree <= pChunk->eventCount);
-
-    // Reset the event status
-    // Note that the top of pipe reset below is okay because any previous reads have already been taken care of by the
-    // insertion of the inter-submit barrier
-    VK_ASSERT(m_needWaitRecycleEvents == false);
-
-    for (uint32_t i = 0; i < eventCount; ++i)
-    {
-        pCmdBuf->PalCmdResetEvent(ppEvents[i], Pal::HwPipeTop);
-    }
-
-    *pppGpuEvents = ppEvents;
-}
-
-// =====================================================================================================================
-// Creates a new chunk at least large enough to fit the requested number of events.
-VkResult GpuEventMgr::CreateNewChunk(
-    uint32_t     eventCount,
-    EventChunk** ppChunk)
-{
-    const auto& settings = m_pDevice->VkPhysicalDevice(DefaultDeviceIndex)->GetRuntimeSettings();
-
-    if (eventCount < settings.cmdBufGpuEventMinAllocCount)
-    {
-        eventCount = settings.cmdBufGpuEventMinAllocCount;
-    }
-
-    VkResult result = VK_SUCCESS;
-
-    EventChunk* pChunk = CreateChunkState(eventCount);
-
-    if (pChunk != nullptr)
-    {
-        pChunk->pNextChunk = m_pFirstChunk;
-        m_pFirstChunk = pChunk;
-
-        m_totalEventCount += pChunk->eventCount;
-
-        *ppChunk = pChunk;
-    }
-    else
-    {
-        DestroyChunk(pChunk);
-
-        *ppChunk = nullptr;
-    }
-
-    return result;
-}
-
-// =====================================================================================================================
-GpuEventMgr::EventChunk::EventChunk()
-    :
-    ppGpuEvents(nullptr),
-    eventCount(0),
-    eventNextFree(0),
-    pNextChunk(nullptr)
-{
-
-}
-
-// =====================================================================================================================
-// Initializes the system memory and state of a new event chunk.
-GpuEventMgr::EventChunk* GpuEventMgr::CreateChunkState(uint32_t eventCount)
-{
-    size_t totalSize = 0;
-
-    size_t chunkHeaderSize = sizeof(EventChunk);
-
-    totalSize += chunkHeaderSize;
-
-    size_t eventPtrArraySize = eventCount * sizeof(GpuEvents);
-
-    totalSize += eventPtrArraySize;
-
-    size_t eventPalObjSize = 0;
-    Pal::GpuEventCreateInfo eventCreateInfo = {};
-    for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); ++deviceIdx)
-    {
-        eventPalObjSize += m_pDevice->PalDevice(deviceIdx)->GetGpuEventSize(eventCreateInfo, nullptr);
-    }
-
-    size_t eventSysMemSize = eventCount * (sizeof(GpuEvents) + eventPalObjSize);
-
-    totalSize += eventSysMemSize;
-
-    void* pMem = m_pDevice->VkInstance()->AllocMem(totalSize, VK_DEFAULT_MEM_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
-    void* pOrigMem = pMem;
-
-    if (pMem == nullptr)
-    {
-        return nullptr;
-    }
-
-    EventChunk* pChunk = reinterpret_cast<EventChunk*>(pMem);
-    pMem = Util::VoidPtrInc(pMem, chunkHeaderSize);
-
-    VK_PLACEMENT_NEW(pChunk) GpuEventMgr::EventChunk();
-
-    pChunk->ppGpuEvents = reinterpret_cast<GpuEvents**>(pMem);
-
-    pMem = Util::VoidPtrInc(pMem, eventPtrArraySize);
-
-    Pal::Result result = Pal::Result::Success;
-
-    const Pal::GpuEventCreateInfo createInfo = {};
-
-    for (pChunk->eventCount = 0;
-         (pChunk->eventCount < eventCount) && (result == Pal::Result::Success);
-         pChunk->eventCount++)
-    {
-        Pal::IGpuEvent* pPalEvents[MaxPalDevices] = {};
-
-        size_t memOffset = sizeof(GpuEvents);
-        for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); ++deviceIdx)
-        {
-            result = m_pDevice->PalDevice(deviceIdx)->CreateGpuEvent(createInfo,
-                                    Util::VoidPtrInc(pMem, memOffset), &pPalEvents[deviceIdx] );
-
-            memOffset += m_pDevice->PalDevice(deviceIdx)->GetGpuEventSize(createInfo, nullptr);
-        }
-        VK_PLACEMENT_NEW(pMem) GpuEvents(m_pDevice->NumPalDevices(), pPalEvents);
-
-        pChunk->ppGpuEvents[pChunk->eventCount] = reinterpret_cast<GpuEvents*>(pMem);
-
-        pMem = Util::VoidPtrInc(pMem, sizeof(GpuEvents) + eventPalObjSize);
-    }
-
-    VK_ASSERT(Util::VoidPtrDiff(pMem, pOrigMem) == totalSize);
-
-    if (result == Pal::Result::Success)
-    {
-        return pChunk;
-    }
-    else
-    {
-        return nullptr;
-    }
-}
-
-// =====================================================================================================================
-// Waits for any previous access to all events to finish.
-void GpuEventMgr::WaitToRecycleEvents(CmdBuffer* pCmdBuf)
-{
-    Pal::BarrierInfo barrier = {};
-    Pal::HwPipePoint signalPoint = Pal::HwPipeTop;
-
-    barrier.flags.u32All          = 0;
-    barrier.waitPoint             = Pal::HwPipeTop;
-    barrier.pipePointWaitCount    = 1;
-    barrier.pPipePoints           = &signalPoint;
-    barrier.pSplitBarrierGpuEvent = nullptr;
-    barrier.reason                = RgpBarrierInternalGpuEventRecycleStall;
-
-    for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++)
-    {
-        pCmdBuf->PalCmdBuffer(deviceIdx)->CmdBarrier(barrier);
-    }
-
-    m_needWaitRecycleEvents = false;
-}
-
-};
diff --git a/icd/api/include/app_shader_optimizer.h b/icd/api/include/app_shader_optimizer.h
index ad7e82b0..643bbb7e 100644
--- a/icd/api/include/app_shader_optimizer.h
+++ b/icd/api/include/app_shader_optimizer.h
@@ -114,6 +114,7 @@ struct ShaderTuningOptions
     uint32_t waveBreakSize;
     uint32_t useSiScheduler;
     uint32_t reconfigWorkgroupLayout;
+    bool enableLoadScalarizer;
 };
 
 struct ShaderProfileAction
diff --git a/icd/api/include/compiler_solution.h b/icd/api/include/compiler_solution.h
index f056efed..b93e5f45 100644
--- a/icd/api/include/compiler_solution.h
+++ b/icd/api/include/compiler_solution.h
@@ -106,7 +106,7 @@ class CompilerSolution
     CompilerSolution(PhysicalDevice* pPhysicalDevice);
     virtual ~CompilerSolution();
 
-    virtual VkResult Initialize() = 0;
+    virtual VkResult Initialize(Llpc::GfxIpVersion gfxIp, Pal::GfxIpLevel gfxIpLevel) = 0;
 
     virtual void Destroy() = 0;
 
diff --git a/icd/api/include/compiler_solution_llpc.h b/icd/api/include/compiler_solution_llpc.h
index 76778172..d2c2ab9d 100644
--- a/icd/api/include/compiler_solution_llpc.h
+++ b/icd/api/include/compiler_solution_llpc.h
@@ -45,7 +45,7 @@ class CompilerSolutionLlpc : public CompilerSolution
 
 public:
     // Overidded functions
-    virtual VkResult Initialize();
+    virtual VkResult Initialize(Llpc::GfxIpVersion gfxIp, Pal::GfxIpLevel gfxIpLevel);
 
     virtual void Destroy();
 
diff --git a/icd/api/include/gpu_event_mgr.h b/icd/api/include/gpu_event_mgr.h
deleted file mode 100644
index c57759d9..00000000
--- a/icd/api/include/gpu_event_mgr.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- ***********************************************************************************************************************
- *
- *  Copyright (c) 2014-2019 Advanced Micro Devices, Inc. All Rights Reserved.
- *
- *  Permission is hereby granted, free of charge, to any person obtaining a copy
- *  of this software and associated documentation files (the "Software"), to deal
- *  in the Software without restriction, including without limitation the rights
- *  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- *  copies of the Software, and to permit persons to whom the Software is
- *  furnished to do so, subject to the following conditions:
- *
- *  The above copyright notice and this permission notice shall be included in all
- *  copies or substantial portions of the Software.
- *
- *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- *  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- *  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- *  SOFTWARE.
- *
- **********************************************************************************************************************/
-/**
- **************************************************************************************************
- * @file  gpu_event_mgr.h
- * @brief Manages internal GPU events while building command buffers
- **************************************************************************************************
- */
-
-#ifndef __GPU_EVENT_MGR_H__
-#define __GPU_EVENT_MGR_H__
-
-#pragma once
-
-#include "include/khronos/vulkan.h"
-
-#include "include/internal_mem_mgr.h"
-
-#include "palIntrusiveList.h"
-
-// Forward declare PAL classes used in this file
-namespace Pal
-{
-struct CmdBufferBuildInfo;
-class IGpuEvent;
-};
-
-// Forward declare Vulkan classes used in this file
-namespace vk
-{
-class CmdBuffer;
-class Device;
-};
-
-namespace vk
-{
-
-// =====================================================================================================================
-// Class contains Pal::IGpuEvent* objects which are part of a device group
-class GpuEvents
-{
-
-public:
-    GpuEvents(uint32_t numDeviceEvents,
-              Pal::IGpuEvent** pPalEvents) :
-        m_numDeviceEvents(numDeviceEvents)
-    {
-        memcpy(m_pEvents, pPalEvents, sizeof(m_pEvents[0]) * numDeviceEvents);
-    }
-
-    void Destroy();
-
-    VK_INLINE Pal::IGpuEvent* PalEvent(uint32_t deviceIdx) const
-    {
-        VK_ASSERT(deviceIdx < m_numDeviceEvents);
-        return m_pEvents[deviceIdx];
-    }
-
-private:
-    uint32_t        m_numDeviceEvents;
-    Pal::IGpuEvent* m_pEvents[MaxPalDevices];
-};
-
-// =====================================================================================================================
-// Manages GPU events used internally by command buffers.
-class GpuEventMgr
-{
-public:
-    typedef Util::IntrusiveList<GpuEventMgr> List;
-
-    GpuEventMgr(Device* pDevice);
-    ~GpuEventMgr();
-
-    void     BeginCmdBuf(CmdBuffer* pOwner, const Pal::CmdBufferBuildInfo& info);
-    VkResult RequestEvents(CmdBuffer* pCmdBuf, uint32_t eventCount, GpuEvents*** pppGpuEvents);
-    void     ResetCmdBuf(CmdBuffer* pOwner);
-    void     ResetEvents();
-    void     Destroy();
-
-    List::Node* ListNode() { return &m_parentNode; }
-
-protected:
-    struct EventChunk
-    {
-        EventChunk();
-
-        InternalMemory  gpuMemory;
-        GpuEvents**     ppGpuEvents;
-        uint32_t        eventCount;
-        uint32_t        eventNextFree;
-        EventChunk*     pNextChunk;
-    };
-
-    void        DestroyChunk(EventChunk* pChunk);
-    EventChunk* FindFreeExistingChunk(uint32_t eventCount);
-    VkResult    CreateNewChunk(uint32_t eventCount, EventChunk** ppChunk);
-    EventChunk* CreateChunkState(uint32_t eventCount);
-    void        AllocEventsFromChunk(
-                            CmdBuffer* pCmdBuf,
-                            uint32_t eventCount,
-                            EventChunk* pChunk,
-                            GpuEvents*** ppGpuEvents);
-    void        WaitToRecycleEvents(CmdBuffer* pCmdBuf);
-
-    List::Node    m_parentNode;             // Intrusive list parent node
-    EventChunk*   m_pFirstChunk;            // Linked list of event chunks
-    bool          m_needWaitRecycleEvents;  // True if we still need to wait for previous access to events to complete
-    Device* const m_pDevice;                // Device pointer
-    uint32_t      m_totalEventCount;        // Total number of GPU event objects created so far
-};
-
-};
-
-#endif /* __GPU_EVENT_MGR_H__ */
diff --git a/icd/api/include/pipeline_binary_cache.h b/icd/api/include/pipeline_binary_cache.h
index 554e6ade..65355827 100644
--- a/icd/api/include/pipeline_binary_cache.h
+++ b/icd/api/include/pipeline_binary_cache.h
@@ -44,6 +44,18 @@ class IPlatformKey;
 namespace vk
 {
 
+struct BinaryCacheEntry
+{
+    Util::MetroHash::Hash hashId;
+    size_t                dataSize;
+};
+
+constexpr size_t SHA_DIGEST_LENGTH = 20;
+struct PipelineBinaryCachePrivateHeader
+{
+    uint8_t  hashId[SHA_DIGEST_LENGTH];
+};
+
 // Unified pipeline cache interface
 class PipelineBinaryCache
 {
@@ -58,10 +70,13 @@ class PipelineBinaryCache
         const Llpc::GfxIpVersion& gfxIp,
         const PhysicalDevice*     pPhysicalDevice);
 
-    VkResult Initialize(
+    static bool IsValidBlob(
         const PhysicalDevice* pPhysicalDevice,
-        size_t                initDataSize,
-        const void*           pInitData);
+        size_t dataSize,
+        const void* pData);
+
+    VkResult Initialize(
+        const PhysicalDevice* pPhysicalDevice);
 
     Util::Result QueryPipelineBinary(
         const CacheId*     pCacheId,
@@ -70,13 +85,21 @@ class PipelineBinaryCache
     Util::Result LoadPipelineBinary(
         const CacheId*  pCacheId,
         size_t*         pPipelineBinarySize,
-        const void**    ppPipelineBinary);
+        const void**    ppPipelineBinary) const;
 
     Util::Result StorePipelineBinary(
         const CacheId*  pCacheId,
         size_t          pipelineBinarySize,
         const void*     pPipelineBinary);
 
+    VkResult Serialize(
+        void*   pBlob,
+        size_t* pSize);
+
+    VkResult Merge(
+        uint32_t                    srcCacheCount,
+        const PipelineBinaryCache** ppSrcCaches);
+
 #if ICD_GPUOPEN_DEVMODE_BUILD
     Util::Result LoadReinjectionBinary(
         const CacheId*           pInternalPipelineHash,
@@ -132,8 +155,6 @@ class PipelineBinaryCache
 
     VkResult InitLayers(
         const PhysicalDevice*  pPhysicalDevice,
-        size_t                 initDataSize,
-        const void*            pInitData,
         bool                   internal,
         const RuntimeSettings& settings);
 
@@ -152,6 +173,7 @@ class PipelineBinaryCache
         const PhysicalDevice*  pPhysicalDevice,
         const RuntimeSettings& settings);
 
+    Util::ICacheLayer*  GetMemoryLayer() const { return m_pMemoryLayer; }
     Util::IArchiveFile* OpenReadOnlyArchive(const char* path, const char* fileName, size_t bufferSize);
     Util::IArchiveFile* OpenWritableArchive(const char* path, const char* fileName, size_t bufferSize);
     Util::ICacheLayer*  CreateFileLayer(Util::IArchiveFile* pFile);
@@ -171,7 +193,9 @@ class PipelineBinaryCache
     Llpc::GfxIpVersion      m_gfxIp;                    // Compared against e_flags of reinjected elf files
 
     Instance* const         m_pInstance;                // Allocator for use when interacting with the cache
-    Util::IPlatformKey*     m_pPlatformKey;             // Platform identifying key
+
+    const Util::IPlatformKey*     m_pPlatformKey;       // Platform identifying key
+
     Util::ICacheLayer*      m_pTopLayer;                // Top layer of the cache chain where queries are submitted
 
 #if ICD_GPUOPEN_DEVMODE_BUILD
diff --git a/icd/api/include/pipeline_compiler.h b/icd/api/include/pipeline_compiler.h
index af4b014c..7f49f1f4 100644
--- a/icd/api/include/pipeline_compiler.h
+++ b/icd/api/include/pipeline_compiler.h
@@ -39,8 +39,6 @@
 
 #include "include/vk_shader_code.h"
 
-#define ICD_BUILD_MULTI_COMPILER 0
-
 namespace vk
 {
 
diff --git a/icd/api/include/vk_cmd_pool.h b/icd/api/include/vk_cmd_pool.h
index 2c99f529..7010547e 100644
--- a/icd/api/include/vk_cmd_pool.h
+++ b/icd/api/include/vk_cmd_pool.h
@@ -39,8 +39,6 @@
 #include "include/vk_dispatch.h"
 #include "include/vk_alloccb.h"
 
-#include "include/gpu_event_mgr.h"
-
 #include "palCmdAllocator.h"
 #include "palHashSet.h"
 
@@ -79,9 +77,6 @@ class CmdPool : public NonDispatchable<VkCommandPool, CmdPool>
 
     void UnregisterCmdBuffer(CmdBuffer* pCmdBuffer);
 
-    GpuEventMgr* AcquireGpuEventMgr();
-    void ReleaseGpuEventMgr(GpuEventMgr* pGpuEventMgr);
-
     VkResult PalCmdAllocatorReset();
 
     VK_INLINE uint32_t GetQueueFamilyIndex() const { return m_queueFamilyIndex; }
@@ -93,8 +88,6 @@ class CmdPool : public NonDispatchable<VkCommandPool, CmdPool>
         uint32_t             queueFamilyIndex,
         bool                 sharedCmdAllocator);
 
-    void DestroyGpuEventMgrs();
-
     Device*             m_pDevice;
     Pal::ICmdAllocator* m_pPalCmdAllocators[MaxPalDevices];
     const uint32_t      m_queueFamilyIndex;
@@ -102,7 +95,6 @@ class CmdPool : public NonDispatchable<VkCommandPool, CmdPool>
 
     Util::HashSet<CmdBuffer*, PalAllocator> m_cmdBufferRegistry;
 
-    Util::IntrusiveList<GpuEventMgr> m_freeEventMgrs;
     uint32_t                         m_totalEventMgrCount;
 };
 
diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h
index 05eb0bbe..90b2abc2 100644
--- a/icd/api/include/vk_cmdbuffer.h
+++ b/icd/api/include/vk_cmdbuffer.h
@@ -43,7 +43,6 @@
 #include "include/vk_render_pass.h"
 #include "include/vk_utils.h"
 
-#include "include/gpu_event_mgr.h"
 #include "include/internal_mem_mgr.h"
 #include "include/stencil_ops_combiner.h"
 #include "include/vert_buf_binding_mgr.h"
@@ -643,8 +642,6 @@ class CmdBuffer
 
     VK_FORCEINLINE VirtualStackAllocator* GetStackAllocator() { return m_pStackAllocator; }
 
-    void RequestRenderPassEvents(uint32_t eventCount, GpuEvents*** pppGpuEvents);
-
     void PalCmdBarrier(
         const Pal::BarrierInfo& info,
         uint32_t                deviceMask);
@@ -995,7 +992,6 @@ class CmdBuffer
     VkShaderStageFlags            m_validShaderStageFlags;
     Pal::ICmdBuffer*              m_pPalCmdBuffers[MaxPalDevices];
     VirtualStackAllocator*        m_pStackAllocator;
-    GpuEventMgr*                  m_pGpuEventMgr;
 
     CmdBufferRenderState          m_state; // Render state tracked during command buffer building
 
diff --git a/icd/api/include/vk_conv.h b/icd/api/include/vk_conv.h
index 130c0499..fe82af40 100644
--- a/icd/api/include/vk_conv.h
+++ b/icd/api/include/vk_conv.h
@@ -2766,14 +2766,14 @@ VK_INLINE void VkToPalScissorRect(
 VK_INLINE Pal::QueuePriority VkToPalGlobalPriority(
     VkQueueGlobalPriorityEXT vkPriority)
 {
-    Pal::QueuePriority palPriority = Pal::QueuePriority::Low;
+    Pal::QueuePriority palPriority = Pal::QueuePriority::Normal;
     switch (static_cast<int32_t>(vkPriority))
     {
     case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
-        palPriority = Pal::QueuePriority::VeryLow;
+        palPriority = Pal::QueuePriority::Idle;
         break;
     case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
-        palPriority = Pal::QueuePriority::Low;
+        palPriority = Pal::QueuePriority::Normal;
         break;
     case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
         palPriority = Pal::QueuePriority::Medium;
@@ -2782,7 +2782,7 @@ VK_INLINE Pal::QueuePriority VkToPalGlobalPriority(
         palPriority = Pal::QueuePriority::High;
         break;
     default:
-        palPriority = Pal::QueuePriority::Low;
+        palPriority = Pal::QueuePriority::Normal;
         break;
     }
 
diff --git a/icd/api/include/vk_extensions.h b/icd/api/include/vk_extensions.h
index 49015c37..984c8573 100644
--- a/icd/api/include/vk_extensions.h
+++ b/icd/api/include/vk_extensions.h
@@ -295,6 +295,7 @@ class DeviceExtensions : public Extensions<DeviceExtensions>
         EXT_MEMORY_PRIORITY,
         AMD_DEVICE_COHERENT_MEMORY,
         EXT_MEMORY_BUDGET,
+        EXT_POST_DEPTH_COVERAGE,
         EXT_HOST_QUERY_RESET,
         EXT_BUFFER_DEVICE_ADDRESS,
         EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION,
diff --git a/icd/api/include/vk_physical_device.h b/icd/api/include/vk_physical_device.h
index 161393df..58fe1010 100644
--- a/icd/api/include/vk_physical_device.h
+++ b/icd/api/include/vk_physical_device.h
@@ -58,6 +58,11 @@ class IDevice;
 
 } // namespace Pal
 
+namespace Util
+{
+class IPlatformKey;
+} // namespace Util
+
 namespace vk
 {
 
@@ -192,6 +197,18 @@ class PhysicalDevice
         return m_queueFamilies[queueFamilyIndex].palEngineType;
     }
 
+    VK_INLINE uint32_t GetCompQueueEngineIndex(
+        const uint32_t queueIndex) const
+    {
+        return m_compQueueEnginesNdx[queueIndex];
+    }
+
+    VK_INLINE uint32_t GetUniversalQueueEngineIndex(
+        const uint32_t queueIndex) const
+    {
+        return m_universalQueueEnginesNdx[queueIndex];
+    }
+
     VK_INLINE uint32_t GetQueueFamilyPalImageLayoutFlag(
         uint32_t queueFamilyIndex) const
     {
@@ -637,6 +654,8 @@ class PhysicalDevice
     VK_INLINE bool ShouldAddRemoteBackupHeap(uint32_t vkIndex) const
         { return m_memoryVkIndexAddRemoteBackupHeap[vkIndex]; }
 
+    Util::IPlatformKey* GetPlatformKey() const { return m_pPlatformKey; }
+
 protected:
     PhysicalDevice(PhysicalDeviceManager* pPhysicalDeviceManager,
                    Pal::IDevice*          pPalDevice,
@@ -649,6 +668,8 @@ class PhysicalDevice
     void PopulateExtensions();
     void PopulateGpaProperties();
 
+    void InitializePlatformKey(const RuntimeSettings& settings);
+
     VK_FORCEINLINE bool IsPerChannelMinMaxFilteringSupported() const
     {
         return m_properties.gfxipProperties.flags.supportPerChannelMinMaxFilter;
@@ -686,6 +707,12 @@ class PhysicalDevice
         VkQueueFamilyProperties      properties;
     } m_queueFamilies[Queue::MaxQueueFamilies];
 
+    // List of indices for compute engines that aren't exclusive.
+    uint32_t m_compQueueEnginesNdx[Queue::MaxQueuesPerFamily];
+
+    // List of indices for universal engines that aren't exclusive.
+    uint32_t m_universalQueueEnginesNdx[Queue::MaxQueuesPerFamily];
+
     const AppProfile                 m_appProfile;
     bool                             m_prtOnDmaSupported;
 
@@ -705,6 +732,8 @@ class PhysicalDevice
     } m_memoryUsageTracker;
 
     uint8_t                          m_pipelineCacheUUID[VK_UUID_SIZE];
+
+    Util::IPlatformKey*              m_pPlatformKey;             // Platform identifying key
 };
 
 VK_DEFINE_DISPATCHABLE(PhysicalDevice);
diff --git a/icd/api/include/vk_pipeline_cache.h b/icd/api/include/vk_pipeline_cache.h
index 50d6f210..74910630 100644
--- a/icd/api/include/vk_pipeline_cache.h
+++ b/icd/api/include/vk_pipeline_cache.h
@@ -77,7 +77,7 @@ class PipelineCache : public NonDispatchable<VkPipelineCache, PipelineCache>
 
     VkResult Merge(uint32_t srcCacheCount, const PipelineCache** ppSrcCaches);
 
-    VK_INLINE PipelineBinaryCache* GetPipelineCache() { return m_pBinaryCache; }
+    VK_INLINE PipelineBinaryCache* GetPipelineCache() const { return m_pBinaryCache; }
 protected:
     PipelineCache(const Device*  pDevice,
             ShaderCache*         pShaderCaches,
diff --git a/icd/api/include/vk_render_pass.h b/icd/api/include/vk_render_pass.h
index 1a24cf24..f92238b4 100644
--- a/icd/api/include/vk_render_pass.h
+++ b/icd/api/include/vk_render_pass.h
@@ -48,7 +48,6 @@ namespace vk
 class Device;
 class CmdBuffer;
 class Framebuffer;
-class GpuEvents;
 class RenderPassCmdList;
 
 struct RenderPassExtCreateInfo
diff --git a/icd/api/pipeline_binary_cache.cpp b/icd/api/pipeline_binary_cache.cpp
index af5740d9..e20e7be3 100644
--- a/icd/api/pipeline_binary_cache.cpp
+++ b/icd/api/pipeline_binary_cache.cpp
@@ -66,6 +66,76 @@ const uint32_t PipelineBinaryCache::ElfType     = Util::HashString(ElfTypeString
 static Util::Hash128 ParseHash128(const char* str);
 #endif
 
+static Util::Result CalculateHashId(
+    Instance*                   pInstance,
+    const Util::IPlatformKey*   pPlatformKey,
+    const void*                 pData,
+    size_t                      dataSize,
+    uint8_t*                    pHashId)
+{
+    Util::Result        result          = Util::Result::Success;
+    Util::IHashContext* pContext        = nullptr;
+    size_t              contextSize     = pPlatformKey->GetKeyContext()->GetDuplicateObjectSize();
+    void*               pContextMem     = pInstance->AllocMem(
+                                            contextSize,
+                                            VK_DEFAULT_MEM_ALIGN,
+                                            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+    if (pContextMem != nullptr)
+    {
+        result = pPlatformKey->GetKeyContext()->Duplicate(pContextMem, &pContext);
+    }
+    if (result == Util::Result::Success)
+    {
+        result = pContext->AddData(pData, dataSize);
+    }
+    if (result == Util::Result::Success)
+    {
+        result = pContext->Finish(pHashId);
+    }
+    if (pContext != nullptr)
+    {
+        pContext->Destroy();
+    }
+    if (pContextMem != nullptr)
+    {
+        pInstance->FreeMem(pContextMem);
+    }
+
+    return result;
+}
+
+bool PipelineBinaryCache::IsValidBlob(
+    const PhysicalDevice* pPhysicalDevice,
+    size_t dataSize,
+    const void* pData)
+{
+    bool     isValid            = false;
+    size_t   blobSize           = dataSize;
+    auto pBinaryPrivateHeader   = static_cast<const PipelineBinaryCachePrivateHeader*>(pData);
+    uint8_t  hashId[SHA_DIGEST_LENGTH];
+
+    pData         = Util::VoidPtrInc(pData, sizeof(PipelineBinaryCachePrivateHeader));
+    blobSize     -= sizeof(PipelineBinaryCachePrivateHeader);
+
+    if (pPhysicalDevice->GetPlatformKey() != nullptr)
+    {
+        Util::Result        result          = CalculateHashId(
+                                                pPhysicalDevice->Manager()->VkInstance(),
+                                                pPhysicalDevice->GetPlatformKey(),
+                                                pData,
+                                                blobSize,
+                                                hashId);
+
+        if (result == Util::Result::Success)
+        {
+            isValid = (memcmp(hashId, pBinaryPrivateHeader->hashId, SHA_DIGEST_LENGTH) == 0);
+        }
+    }
+
+    return isValid;
+}
+
 // =====================================================================================================================
 // Allocate and initialize a PipelineBinaryCache object
 PipelineBinaryCache* PipelineBinaryCache::Create(
@@ -82,12 +152,44 @@ PipelineBinaryCache* PipelineBinaryCache::Create(
     {
         pObj = VK_PLACEMENT_NEW(pMem) PipelineBinaryCache(pInstance, gfxIp, internal);
 
-        if (pObj->Initialize(pPhysicalDevice, initDataSize, pInitData) != VK_SUCCESS)
+        if (pObj->Initialize(pPhysicalDevice) != VK_SUCCESS)
         {
             pObj->Destroy();
             pInstance->FreeMem(pMem);
             pObj = nullptr;
         }
+        else if ((pInitData != nullptr) &&
+                 (initDataSize > (sizeof(BinaryCacheEntry) + sizeof(PipelineBinaryCachePrivateHeader))))
+        {
+            const void* pBlob           = pInitData;
+            size_t   blobSize           = initDataSize;
+            constexpr size_t EntrySize  = sizeof(BinaryCacheEntry);
+
+            pBlob         = Util::VoidPtrInc(pBlob, sizeof(PipelineBinaryCachePrivateHeader));
+            blobSize     -= sizeof(PipelineBinaryCachePrivateHeader);
+            while (blobSize > EntrySize)
+            {
+                const BinaryCacheEntry* pEntry  = static_cast<const BinaryCacheEntry*>(pBlob);
+                const void*             pData   = Util::VoidPtrInc(pBlob, sizeof(BinaryCacheEntry));
+                const size_t entryAndDataSize   = pEntry->dataSize + sizeof(BinaryCacheEntry);
+
+                if (blobSize >= entryAndDataSize)
+                {
+                    //add to cache
+                    Util::Result result = pObj->StorePipelineBinary(&pEntry->hashId, pEntry->dataSize, pData);
+                    if (result != Util::Result::Success)
+                    {
+                        break;
+                    }
+                    pBlob = Util::VoidPtrInc(pBlob, entryAndDataSize);
+                    blobSize -= entryAndDataSize;
+                }
+                else
+                {
+                    break;
+                }
+            }
+        }
     }
     return pObj;
 }
@@ -109,22 +211,17 @@ PipelineBinaryCache::PipelineBinaryCache(
     m_pArchiveLayer    { nullptr },
     m_openFiles        { pInstance->Allocator() },
     m_archiveLayers    { pInstance->Allocator() },
-    m_isInternalCache    { internal }
+    m_isInternalCache  { internal }
 {
     // Without copy constructor, a class type variable can't be initialized in initialization list with gcc 4.8.5.
     // Initialize m_gfxIp here instead to make gcc 4.8.5 work.
     m_gfxIp = gfxIp;
+
 }
 
 // =====================================================================================================================
 PipelineBinaryCache::~PipelineBinaryCache()
 {
-    if (m_pPlatformKey != nullptr)
-    {
-        m_pPlatformKey->Destroy();
-        m_pInstance->FreeMem(m_pPlatformKey);
-    }
-
     for (FileVector::Iter i = m_openFiles.Begin(); i.IsValid(); i.Next())
     {
         i.Get()->Destroy();
@@ -171,7 +268,7 @@ Util::Result PipelineBinaryCache::QueryPipelineBinary(
 Util::Result PipelineBinaryCache::LoadPipelineBinary(
     const CacheId* pCacheId,
     size_t*        pPipelineBinarySize,
-    const void**   ppPipelineBinary)
+    const void**   ppPipelineBinary) const
 {
     VK_ASSERT(m_pTopLayer != nullptr);
 
@@ -347,9 +444,7 @@ void PipelineBinaryCache::FreePipelineBinary(
 // =====================================================================================================================
 // Build the cache layer chain
 VkResult PipelineBinaryCache::Initialize(
-    const PhysicalDevice* pPhysicalDevice,
-    size_t                initDataSize,
-    const void*           pInitData)
+    const PhysicalDevice* pPhysicalDevice)
 {
     VkResult result = VK_SUCCESS;
 
@@ -357,12 +452,17 @@ VkResult PipelineBinaryCache::Initialize(
 
     if (result == VK_SUCCESS)
     {
-        result = InitializePlatformKey(pPhysicalDevice, settings);
+        m_pPlatformKey = pPhysicalDevice->GetPlatformKey();
+    }
+
+    if (m_pPlatformKey == nullptr)
+    {
+        result = VK_ERROR_INITIALIZATION_FAILED;
     }
 
     if (result == VK_SUCCESS)
     {
-        result = InitLayers(pPhysicalDevice, initDataSize, pInitData, m_isInternalCache, settings);
+        result = InitLayers(pPhysicalDevice, m_isInternalCache, settings);
     }
 
     if (result == VK_SUCCESS)
@@ -402,52 +502,6 @@ VkResult PipelineBinaryCache::Initialize(
     return result;
 }
 
-// =====================================================================================================================
-// Generate our platform key
-VkResult PipelineBinaryCache::InitializePlatformKey(
-    const PhysicalDevice*  pPhysicalDevice,
-    const RuntimeSettings& settings)
-{
-    static constexpr Util::HashAlgorithm KeyAlgorithm = Util::HashAlgorithm::Sha1;
-
-    struct
-    {
-        VkPhysicalDeviceProperties properties;
-        char*                      timestamp[sizeof(__TIMESTAMP__)];
-    } initialData;
-
-    memset(&initialData, 0, sizeof(initialData));
-
-    VkResult result = pPhysicalDevice->GetDeviceProperties(&initialData.properties);
-
-    if (result == VK_SUCCESS)
-    {
-        size_t memSize = Util::GetPlatformKeySize(KeyAlgorithm);
-        void*  pMem    = m_pInstance->AllocMem(memSize, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-
-        if (pMem == nullptr)
-        {
-            result = VK_ERROR_OUT_OF_HOST_MEMORY;
-        }
-        else
-        {
-            if (settings.markPipelineCacheWithBuildTimestamp)
-            {
-                memcpy(initialData.timestamp, __TIMESTAMP__, sizeof(__TIMESTAMP__));
-            }
-
-            if (Util::CreatePlatformKey(KeyAlgorithm, &initialData, sizeof(initialData), pMem, &m_pPlatformKey) !=
-                Util::Result::Success)
-            {
-                m_pInstance->FreeMem(pMem);
-                result = VK_ERROR_INITIALIZATION_FAILED;
-            }
-        }
-    }
-
-    return result;
-}
-
 #if ICD_GPUOPEN_DEVMODE_BUILD
 // =====================================================================================================================
 // Initialize reinjection cache layer
@@ -1033,8 +1087,6 @@ VkResult PipelineBinaryCache::InitArchiveLayers(
 // Initialize layers (a single layer that supports storage for binaries needs to succeed)
 VkResult PipelineBinaryCache::InitLayers(
     const PhysicalDevice*  pPhysicalDevice,
-    size_t                 initDataSize,
-    const void*            pInitData,
     bool                   internal,
     const RuntimeSettings& settings)
 {
@@ -1122,4 +1174,149 @@ VkResult PipelineBinaryCache::OrderLayers(
     return result;
 }
 
+// =====================================================================================================================
+// Copies the pipeline cache data to the memory blob provided by the calling function.
+//
+// NOTE: It is expected that the calling function has not used this pipeline cache since querying the size
+VkResult PipelineBinaryCache::Serialize(
+    void*   pBlob,    // [out] System memory pointer where the serialized data should be placed
+    size_t* pSize)    // [in,out] Size of the memory pointed to by pBlob. If the value stored in pSize is zero then no
+                      // data will be copied and instead the size required for serialization will be returned in pSize
+{
+    VkResult result = VK_ERROR_INITIALIZATION_FAILED;
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 534
+    if (m_pMemoryLayer != nullptr)
+    {
+        if (*pSize == 0)
+        {
+            size_t curCount, curDataSize;
+
+            result = PalToVkResult(Util::GetMemoryCacheLayerCurSize(m_pMemoryLayer, &curCount, &curDataSize));
+            if (result == VK_SUCCESS)
+            {
+                *pSize = curCount * sizeof(BinaryCacheEntry) + curDataSize + sizeof(PipelineBinaryCachePrivateHeader);
+            }
+        }
+        else
+        {
+            size_t curCount, curDataSize;
+
+            result = PalToVkResult(Util::GetMemoryCacheLayerCurSize(m_pMemoryLayer, &curCount, &curDataSize));
+            if (result == VK_SUCCESS)
+            {
+                if (*pSize > (sizeof(BinaryCacheEntry) + sizeof(PipelineBinaryCachePrivateHeader)))
+                {
+                    Util::AutoBuffer<Util::Hash128, 8, PalAllocator> cacheIds(curCount, m_pInstance->Allocator());
+                    size_t remainingSpace    = *pSize - sizeof(PipelineBinaryCachePrivateHeader);
+
+                    result = PalToVkResult(Util::GetMemoryCacheLayerHashIds(m_pMemoryLayer, curCount, &cacheIds[0]));
+                    if (result == VK_SUCCESS)
+                    {
+                        void* pDataDst = pBlob;
+
+                        // reserved for privateHeader
+                        pDataDst = Util::VoidPtrInc(pDataDst, sizeof(PipelineBinaryCachePrivateHeader));
+
+                        for (uint32_t i = 0; i < curCount && remainingSpace > sizeof(BinaryCacheEntry); i++)
+                        {
+                            size_t           dataSize;
+                            const void*      pBinaryCacheData;
+
+                            result = PalToVkResult(LoadPipelineBinary(&cacheIds[i], &dataSize, &pBinaryCacheData));
+                            if (result == VK_SUCCESS)
+                            {
+                                if (remainingSpace >= (sizeof(BinaryCacheEntry) + dataSize))
+                                {
+                                    BinaryCacheEntry* pEntry =  static_cast<BinaryCacheEntry*>(pDataDst);
+
+                                    pEntry->hashId    = cacheIds[i];
+                                    pEntry->dataSize  = dataSize;
+
+                                    pDataDst = Util::VoidPtrInc(pDataDst, sizeof(BinaryCacheEntry));
+                                    memcpy(pDataDst, pBinaryCacheData, dataSize);
+                                    pDataDst = Util::VoidPtrInc(pDataDst, dataSize);
+                                    remainingSpace -= (sizeof(BinaryCacheEntry) + dataSize);
+                                }
+                                m_pInstance->FreeMem(const_cast<void*>(pBinaryCacheData));
+                            }
+                        }
+                    }
+                    if (*pSize < (sizeof(BinaryCacheEntry) * curCount + curDataSize + sizeof(PipelineBinaryCachePrivateHeader)))
+                    {
+                        result = VK_INCOMPLETE;
+                    }
+                    *pSize -= remainingSpace;
+
+                    auto pBinaryPrivateHeader = static_cast<PipelineBinaryCachePrivateHeader*>(pBlob);
+                    void* pData               = Util::VoidPtrInc(pBlob, sizeof(PipelineBinaryCachePrivateHeader));
+
+                    result = PalToVkResult(CalculateHashId(
+                                                m_pInstance,
+                                                m_pPlatformKey,
+                                                pData,
+                                                *pSize - sizeof(PipelineBinaryCachePrivateHeader),
+                                                pBinaryPrivateHeader->hashId));
+                }
+                else
+                {
+                    result = VK_ERROR_INITIALIZATION_FAILED;
+                }
+            }
+        }
+    }
+#endif
+    return result;
+}
+
+// =====================================================================================================================
+// Merge the pipeline cache data into one
+//
+VkResult PipelineBinaryCache::Merge(
+    uint32_t                    srcCacheCount,
+    const PipelineBinaryCache** ppSrcCaches)
+{
+    VkResult result = VK_ERROR_INITIALIZATION_FAILED;
+
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 534
+    if (m_pMemoryLayer != nullptr)
+    {
+        for (uint32_t i = 0; i < srcCacheCount; i++)
+        {
+            Util::ICacheLayer* pMemoryLayer = ppSrcCaches[i]->GetMemoryLayer();
+            size_t curCount, curDataSize;
+
+            result = PalToVkResult(Util::GetMemoryCacheLayerCurSize(pMemoryLayer, &curCount, &curDataSize));
+            if ((result == VK_SUCCESS) && (curCount > 0))
+            {
+                Util::AutoBuffer<Util::Hash128, 8, PalAllocator> cacheIds(curCount, m_pInstance->Allocator());
+
+                result = PalToVkResult(Util::GetMemoryCacheLayerHashIds(pMemoryLayer, curCount, &cacheIds[0]));
+                if (result == VK_SUCCESS)
+                {
+                    for (uint32_t j = 0; j < curCount; j++)
+                    {
+                        size_t           dataSize;
+                        const void*      pBinaryCacheData;
+
+                        result = PalToVkResult(ppSrcCaches[i]->LoadPipelineBinary(&cacheIds[j], &dataSize, &pBinaryCacheData));
+                        if (result == VK_SUCCESS)
+                        {
+                            result = PalToVkResult(StorePipelineBinary(&cacheIds[j], dataSize, pBinaryCacheData));
+                            m_pInstance->FreeMem(const_cast<void*>(pBinaryCacheData));
+                            if (result != VK_SUCCESS)
+                            {
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+#endif
+
+    return result;
+}
+
 } // namespace vk
diff --git a/icd/api/pipeline_compiler.cpp b/icd/api/pipeline_compiler.cpp
index 40b0c206..62316c24 100644
--- a/icd/api/pipeline_compiler.cpp
+++ b/icd/api/pipeline_compiler.cpp
@@ -151,7 +151,7 @@ VkResult PipelineCompiler::Initialize()
 
     if (result == VK_SUCCESS)
     {
-        result = m_compilerSolutionLlpc.Initialize();
+        result = m_compilerSolutionLlpc.Initialize(m_gfxIp, info.gfxLevel);
     }
 
     if ((result == VK_SUCCESS) &&
diff --git a/icd/api/renderpass/renderpass_logger.cpp b/icd/api/renderpass/renderpass_logger.cpp
index e4c451ca..98683a42 100644
--- a/icd/api/renderpass/renderpass_logger.cpp
+++ b/icd/api/renderpass/renderpass_logger.cpp
@@ -220,7 +220,7 @@ void RenderPassLogger::LogAttachmentReference(
     const AttachmentReference& reference)
 {
     LogAttachment(reference.attachment);
-    Log(" in %s, %s", ImageLayoutString(reference.layout, false), ImageLayoutString(reference.stencilLayout, false));
+    Log(" in %s", ImageLayoutString(reference.layout, false));
     Log(" aspectMask ");
     LogImageAspectMask(reference.aspectMask, false);
 }
@@ -232,8 +232,6 @@ void RenderPassLogger::LogAttachmentReference(
     LogAttachment(reference.attachment);
     Log(" in ");
     LogImageLayout(reference.layout);
-    Log(", ");
-    LogImageLayout(reference.stencilLayout);
 }
 
 // =====================================================================================================================
diff --git a/icd/api/sqtt/sqtt_rgp_annotations.h b/icd/api/sqtt/sqtt_rgp_annotations.h
index 7392b557..e5b2e478 100644
--- a/icd/api/sqtt/sqtt_rgp_annotations.h
+++ b/icd/api/sqtt/sqtt_rgp_annotations.h
@@ -176,35 +176,35 @@ constexpr uint32_t RgpSqttMarkerCbEndWordCount = 3;
 // existing values can't be changed.
 enum class RgpSqttMarkerEventType : uint32_t
 {
-    CmdDraw                        = 0,     // vkCmdDraw
-    CmdDrawIndexed                 = 1,     // vkCmdDrawIndexed
-    CmdDrawIndirect                = 2,     // vkCmdDrawIndirect
-    CmdDrawIndexedIndirect         = 3,     // vkCmdDrawIndexedIndirect
-    CmdDrawIndirectCountAMD        = 4,     // vkCmdDrawIndirectCountAMD
-    CmdDrawIndexedIndirectCountAMD = 5,     // vkCmdDrawIndexedIndirectCountAMD
-    CmdDispatch                    = 6,     // vkCmdDispatch
-    CmdDispatchIndirect            = 7,     // vkCmdDispatchIndirect
-    CmdCopyBuffer                  = 8,     // vkCmdCopyBuffer
-    CmdCopyImage                   = 9,     // vkCmdCopyImage
-    CmdBlitImage                   = 10,    // vkCmdBlitImage
-    CmdCopyBufferToImage           = 11,    // vkCmdCopyBufferToImage
-    CmdCopyImageToBuffer           = 12,    // vkCmdCopyImageToBuffer
-    CmdUpdateBuffer                = 13,    // vkCmdUpdateBuffer
-    CmdFillBuffer                  = 14,    // vkCmdFillBuffer
-    CmdClearColorImage             = 15,    // vkCmdClearColorImage
-    CmdClearDepthStencilImage      = 16,    // vkCmdClearDepthStencilImage
-    CmdClearAttachments            = 17,    // vkCmdClearAttachments
-    CmdResolveImage                = 18,    // vkCmdResolveImage
-    CmdWaitEvents                  = 19,    // vkCmdWaitEvents
-    CmdPipelineBarrier             = 20,    // vkCmdPipelineBarrier
-    CmdResetQueryPool              = 21,    // vkCmdResetQueryPool
-    CmdCopyQueryPoolResults        = 22,    // vkCmdCopyQueryPoolResults
-    RenderPassColorClear           = 23,    // Render pass: Color clear triggered by attachment load op
-    RenderPassDepthStencilClear    = 24,    // Render pass: Depth-stencil clear triggered by attachment load op
-    RenderPassResolve              = 25,    // Render pass: Color multisample resolve triggered by resolve attachment
-    InternalUnknown                = 26,    // Draw or dispatch by PAL due to a reason we do not know
-    CmdDrawIndirectCountKHR        = 27,    // vkCmdDrawIndirectCountKHR
-    CmdDrawIndexedIndirectCountKHR = 28,    // vkCmdDrawIndexedIndirectCountKHR
+    CmdDraw                             = 0,     // vkCmdDraw
+    CmdDrawIndexed                      = 1,     // vkCmdDrawIndexed
+    CmdDrawIndirect                     = 2,     // vkCmdDrawIndirect
+    CmdDrawIndexedIndirect              = 3,     // vkCmdDrawIndexedIndirect
+    CmdDrawIndirectCountAMD             = 4,     // vkCmdDrawIndirectCountAMD
+    CmdDrawIndexedIndirectCountAMD      = 5,     // vkCmdDrawIndexedIndirectCountAMD
+    CmdDispatch                         = 6,     // vkCmdDispatch
+    CmdDispatchIndirect                 = 7,     // vkCmdDispatchIndirect
+    CmdCopyBuffer                       = 8,     // vkCmdCopyBuffer
+    CmdCopyImage                        = 9,     // vkCmdCopyImage
+    CmdBlitImage                        = 10,    // vkCmdBlitImage
+    CmdCopyBufferToImage                = 11,    // vkCmdCopyBufferToImage
+    CmdCopyImageToBuffer                = 12,    // vkCmdCopyImageToBuffer
+    CmdUpdateBuffer                     = 13,    // vkCmdUpdateBuffer
+    CmdFillBuffer                       = 14,    // vkCmdFillBuffer
+    CmdClearColorImage                  = 15,    // vkCmdClearColorImage
+    CmdClearDepthStencilImage           = 16,    // vkCmdClearDepthStencilImage
+    CmdClearAttachments                 = 17,    // vkCmdClearAttachments
+    CmdResolveImage                     = 18,    // vkCmdResolveImage
+    CmdWaitEvents                       = 19,    // vkCmdWaitEvents
+    CmdPipelineBarrier                  = 20,    // vkCmdPipelineBarrier
+    CmdResetQueryPool                   = 21,    // vkCmdResetQueryPool
+    CmdCopyQueryPoolResults             = 22,    // vkCmdCopyQueryPoolResults
+    RenderPassColorClear                = 23,    // Render pass: Color clear triggered by attachment load op
+    RenderPassDepthStencilClear         = 24,    // Render pass: Depth-stencil clear triggered by attachment load op
+    RenderPassResolve                   = 25,    // Render pass: Color multisample resolve triggered by resolve attachment
+    InternalUnknown                     = 26,    // Draw or dispatch by PAL due to a reason we do not know
+    CmdDrawIndirectCountKHR             = 27,    // vkCmdDrawIndirectCountKHR
+    CmdDrawIndexedIndirectCountKHR      = 28,    // vkCmdDrawIndexedIndirectCountKHR
 
     Invalid                        = 0xffffffff
 };
@@ -431,52 +431,52 @@ struct RgpSqttMarkerUserEventWithString
 enum class RgpSqttMarkerGeneralApiType : uint32_t
 {
     // Interesting subset of core Vulkan 1.0:
-    CmdBindPipeline                = 0,
-    CmdBindDescriptorSets          = 1,
-    CmdBindIndexBuffer             = 2,
-    CmdBindVertexBuffers           = 3,
-    CmdDraw                        = 4,
-    CmdDrawIndexed                 = 5,
-    CmdDrawIndirect                = 6,
-    CmdDrawIndexedIndirect         = 7,
-    CmdDrawIndirectCountAMD        = 8,
-    CmdDrawIndexedIndirectCountAMD = 9,
-    CmdDispatch                    = 10,
-    CmdDispatchIndirect            = 11,
-    CmdCopyBuffer                  = 12,
-    CmdCopyImage                   = 13,
-    CmdBlitImage                   = 14,
-    CmdCopyBufferToImage           = 15,
-    CmdCopyImageToBuffer           = 16,
-    CmdUpdateBuffer                = 17,
-    CmdFillBuffer                  = 18,
-    CmdClearColorImage             = 19,
-    CmdClearDepthStencilImage      = 20,
-    CmdClearAttachments            = 21,
-    CmdResolveImage                = 22,
-    CmdWaitEvents                  = 23,
-    CmdPipelineBarrier             = 24,
-    CmdBeginQuery                  = 25,
-    CmdEndQuery                    = 26,
-    CmdResetQueryPool              = 27,
-    CmdWriteTimestamp              = 28,
-    CmdCopyQueryPoolResults        = 29,
-    CmdPushConstants               = 30,
-    CmdBeginRenderPass             = 31,
-    CmdNextSubpass                 = 32,
-    CmdEndRenderPass               = 33,
-    CmdExecuteCommands             = 34,
-    CmdSetViewport                 = 35,
-    CmdSetScissor                  = 36,
-    CmdSetLineWidth                = 37,
-    CmdSetDepthBias                = 38,
-    CmdSetBlendConstants           = 39,
-    CmdSetDepthBounds              = 40,
-    CmdSetStencilCompareMask       = 41,
-    CmdSetStencilWriteMask         = 42,
-    CmdSetStencilReference         = 43,
-    CmdDrawIndirectCountKHR        = 44,
-    CmdDrawIndexedIndirectCountKHR = 45,
+    CmdBindPipeline                     = 0,
+    CmdBindDescriptorSets               = 1,
+    CmdBindIndexBuffer                  = 2,
+    CmdBindVertexBuffers                = 3,
+    CmdDraw                             = 4,
+    CmdDrawIndexed                      = 5,
+    CmdDrawIndirect                     = 6,
+    CmdDrawIndexedIndirect              = 7,
+    CmdDrawIndirectCountAMD             = 8,
+    CmdDrawIndexedIndirectCountAMD      = 9,
+    CmdDispatch                         = 10,
+    CmdDispatchIndirect                 = 11,
+    CmdCopyBuffer                       = 12,
+    CmdCopyImage                        = 13,
+    CmdBlitImage                        = 14,
+    CmdCopyBufferToImage                = 15,
+    CmdCopyImageToBuffer                = 16,
+    CmdUpdateBuffer                     = 17,
+    CmdFillBuffer                       = 18,
+    CmdClearColorImage                  = 19,
+    CmdClearDepthStencilImage           = 20,
+    CmdClearAttachments                 = 21,
+    CmdResolveImage                     = 22,
+    CmdWaitEvents                       = 23,
+    CmdPipelineBarrier                  = 24,
+    CmdBeginQuery                       = 25,
+    CmdEndQuery                         = 26,
+    CmdResetQueryPool                   = 27,
+    CmdWriteTimestamp                   = 28,
+    CmdCopyQueryPoolResults             = 29,
+    CmdPushConstants                    = 30,
+    CmdBeginRenderPass                  = 31,
+    CmdNextSubpass                      = 32,
+    CmdEndRenderPass                    = 33,
+    CmdExecuteCommands                  = 34,
+    CmdSetViewport                      = 35,
+    CmdSetScissor                       = 36,
+    CmdSetLineWidth                     = 37,
+    CmdSetDepthBias                     = 38,
+    CmdSetBlendConstants                = 39,
+    CmdSetDepthBounds                   = 40,
+    CmdSetStencilCompareMask            = 41,
+    CmdSetStencilWriteMask              = 42,
+    CmdSetStencilReference              = 43,
+    CmdDrawIndirectCountKHR             = 44,
+    CmdDrawIndexedIndirectCountKHR      = 45,
 
     Invalid = 0xffffffff
 };
diff --git a/icd/api/strings/base_extensions.txt b/icd/api/strings/base_extensions.txt
index 6e22799b..a84c7d47 100644
--- a/icd/api/strings/base_extensions.txt
+++ b/icd/api/strings/base_extensions.txt
@@ -109,3 +109,4 @@ VK_EXT_subgroup_size_control
 VK_EXT_calibrated_timestamps
 VK_KHR_pipeline_executable_properties
 VK_EXT_line_rasterization
+VK_EXT_post_depth_coverage
diff --git a/icd/api/vk_cmd_pool.cpp b/icd/api/vk_cmd_pool.cpp
index c100efbc..b153ab2b 100644
--- a/icd/api/vk_cmd_pool.cpp
+++ b/icd/api/vk_cmd_pool.cpp
@@ -218,8 +218,6 @@ VkResult CmdPool::Destroy(
         pAllocator->pfnFree(pAllocator->pUserData, m_pPalCmdAllocators[DefaultDeviceIndex]);
     }
 
-    DestroyGpuEventMgrs();
-
     Util::Destructor(this);
 
     pAllocator->pfnFree(pAllocator->pUserData, this);
@@ -227,26 +225,6 @@ VkResult CmdPool::Destroy(
     return VK_SUCCESS;
 }
 
-// =====================================================================================================================
-void CmdPool::DestroyGpuEventMgrs()
-{
-    while (m_freeEventMgrs.IsEmpty() == false)
-    {
-        VK_ASSERT(m_totalEventMgrCount > 0);
-
-        m_totalEventMgrCount--;
-
-        GpuEventMgr::List::Iter it = m_freeEventMgrs.Begin();
-        GpuEventMgr* pEventMgr = it.Get();
-        m_freeEventMgrs.Erase(&it);
-
-        pEventMgr->Destroy();
-        m_pDevice->VkInstance()->FreeMem(pEventMgr);
-    }
-
-    VK_ASSERT(m_totalEventMgrCount == 0);
-}
-
 // =====================================================================================================================
 VkResult CmdPool::PalCmdAllocatorReset()
 {
@@ -308,48 +286,6 @@ void CmdPool::UnregisterCmdBuffer(CmdBuffer* pCmdBuffer)
     m_cmdBufferRegistry.Erase(pCmdBuffer);
 }
 
-// =====================================================================================================================
-GpuEventMgr* CmdPool::AcquireGpuEventMgr()
-{
-    GpuEventMgr* pEventMgr = nullptr;
-
-    if (!m_freeEventMgrs.IsEmpty())
-    {
-        GpuEventMgr::List::Iter it = m_freeEventMgrs.Begin();
-
-        pEventMgr = it.Get();
-
-        m_freeEventMgrs.Erase(&it);
-    }
-
-    if (pEventMgr == nullptr)
-    {
-        void* pMemory = m_pDevice->VkInstance()->AllocMem(
-            sizeof(GpuEventMgr),
-            VK_DEFAULT_MEM_ALIGN,
-            VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
-
-        if (pMemory != nullptr)
-        {
-            pEventMgr = VK_PLACEMENT_NEW(pMemory) GpuEventMgr(m_pDevice);
-
-            m_totalEventMgrCount++;
-        }
-    }
-
-    return pEventMgr;
-}
-
-// =====================================================================================================================
-void CmdPool::ReleaseGpuEventMgr(GpuEventMgr* pGpuEventMgr)
-{
-    VK_ASSERT(pGpuEventMgr->ListNode()->InList() == false);
-
-    pGpuEventMgr->ResetEvents();
-
-    m_freeEventMgrs.PushBack(pGpuEventMgr->ListNode());
-}
-
 /**
  ***********************************************************************************************************************
  * C-Callable entry points start here. These entries go in the dispatch table(s).
diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp
index 40601688..b8427177 100644
--- a/icd/api/vk_cmdbuffer.cpp
+++ b/icd/api/vk_cmdbuffer.cpp
@@ -352,7 +352,6 @@ CmdBuffer::CmdBuffer(
     m_cbBeginDeviceMask(0),
     m_validShaderStageFlags(pDevice->VkPhysicalDevice(DefaultDeviceIndex)->GetValidShaderStages(queueFamilyIndex)),
     m_pStackAllocator(nullptr),
-    m_pGpuEventMgr(nullptr),
     m_vbMgr(pDevice),
     m_is2ndLvl(false),
     m_isRecording(false),
@@ -1088,24 +1087,7 @@ VkResult CmdBuffer::Begin(
         }
     }
 
-    // Get a GPU event manager if we don't already have one
-    if (m_pGpuEventMgr == nullptr)
-    {
-        m_pGpuEventMgr = m_pCmdPool->AcquireGpuEventMgr();
-
-        if (m_pGpuEventMgr == nullptr)
-        {
-            result = Pal::Result::ErrorOutOfMemory;
-        }
-    }
-
-    // Notify the GPU event manager we're starting a new command buffer
-    if (m_pGpuEventMgr != nullptr)
-    {
-        m_pGpuEventMgr->BeginCmdBuf(this, cmdInfo);
-
-        m_isRecording = true;
-    }
+    m_isRecording = true;
 
     if (m_is2ndLvl && pRenderPass) // secondary VkCommandBuffer will be used inside VkRenderPass
     {
@@ -1225,11 +1207,6 @@ void CmdBuffer::ResetState()
 
     m_curDeviceMask = InvalidPalDeviceMask;
 
-    if (m_pGpuEventMgr != nullptr)
-    {
-        m_pGpuEventMgr->ResetCmdBuf(this);
-    }
-
     m_renderPassInstance.pExecuteInfo = nullptr;
     m_renderPassInstance.subpass      = VK_SUBPASS_EXTERNAL;
     m_renderPassInstance.flags.u32All = 0;
@@ -1569,13 +1546,6 @@ void CmdBuffer::ReleaseResources()
         m_renderPassInstance.maxSubpassCount = 0;
     }
 
-    // Release the GPU event manager back to the command pool
-    if (m_pGpuEventMgr != nullptr)
-    {
-        m_pCmdPool->ReleaseGpuEventMgr(m_pGpuEventMgr);
-        m_pGpuEventMgr = nullptr;
-    }
-
     if (m_pStackAllocator != nullptr)
     {
         pInstance->StackMgr()->ReleaseAllocator(m_pStackAllocator);
@@ -2704,12 +2674,6 @@ void CmdBuffer::PalCmdResetEvent(
     }
 }
 
-// =====================================================================================================================
-// Instantiate the template function
-template void CmdBuffer::PalCmdResetEvent<GpuEvents>(
-    GpuEvents*              pEvent,
-    Pal::HwPipePoint        resetPoint);
-
 // =====================================================================================================================
 template <typename EventContainer_T>
 void CmdBuffer::PalCmdSetEvent(
@@ -2725,12 +2689,6 @@ void CmdBuffer::PalCmdSetEvent(
     }
 }
 
-// =====================================================================================================================
-// Instantiate the template function
-template void CmdBuffer::PalCmdSetEvent<GpuEvents>(
-    GpuEvents*              pEvent,
-    Pal::HwPipePoint        resetPoint);
-
 // =====================================================================================================================
 template<bool regionPerDevice>
 void CmdBuffer::PalCmdResolveImage(
@@ -5035,28 +4993,6 @@ void CmdBuffer::PushConstants(
     DbgBarrierPostCmd(DbgBarrierBindSetsPushConstants);
 }
 
-// =====================================================================================================================
-void CmdBuffer::RequestRenderPassEvents(
-    uint32_t     eventCount,
-    GpuEvents*** pppGpuEvents)
-{
-    VK_ASSERT(m_pGpuEventMgr != nullptr);
-
-    // This function may fail if we've run out of system/video memory.  There is no way to return "out of memory"
-    // during command buffer building -- the function is just expected to succeed.  Under these extreme conditions,
-    // the render pass logic will fall back to using a hard pipeline barrier between every node.
-    VkResult result = m_pGpuEventMgr->RequestEvents(this, eventCount, pppGpuEvents);
-
-    if (result != VK_SUCCESS)
-    {
-        // This situation should be so rare that it's worth asserting here.  If we actually ever hit this condition,
-        // we are probably leaking GPU memory somewhere.
-        VK_ALERT("Failed to create GPU events for render passes.");
-
-        *pppGpuEvents = nullptr;
-    }
-}
-
 // =====================================================================================================================
 void CmdBuffer::SetViewport(
     uint32_t            firstViewport,
diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp
index b66c573e..bfadb4d5 100644
--- a/icd/api/vk_device.cpp
+++ b/icd/api/vk_device.cpp
@@ -294,22 +294,37 @@ static void ConstructQueueCreateInfo(
     {
         VK_ASSERT(queuePriority == VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT);
 
-        pQueueCreateInfo->engineType    = Pal::EngineType::EngineTypeExclusiveCompute;
+        pQueueCreateInfo->engineType    = Pal::EngineType::EngineTypeCompute;
         pQueueCreateInfo->engineIndex   = rtCuHighComputeSubEngineIndex;
         pQueueCreateInfo->numReservedCu = dedicatedComputeUnits;
     }
-    else if ((palQueuePriority > Pal::QueuePriority::Low)       &&
-             (palQueueType == Pal::QueueType::QueueTypeCompute) &&
-             (vrHighPriorityIndex != UINT32_MAX))
+    else if (palQueueType == Pal::QueueType::QueueTypeCompute)
     {
-        pQueueCreateInfo->engineType     = Pal::EngineType::EngineTypeExclusiveCompute;
-        pQueueCreateInfo->engineIndex    = vrHighPriorityIndex;
+        pQueueCreateInfo->engineType = Pal::EngineType::EngineTypeCompute;
+
+        if ((palQueuePriority > Pal::QueuePriority::Idle) &&
+            (vrHighPriorityIndex != UINT32_MAX))
+        {
+            pQueueCreateInfo->engineIndex    = vrHighPriorityIndex;
+        }
+        else
+        {
+            pQueueCreateInfo->engineIndex   = pPhysicalDevices[deviceIdx]->GetCompQueueEngineIndex(queueIndex);
+        }
     }
     else
     {
         pQueueCreateInfo->engineType  =
             pPhysicalDevices[deviceIdx]->GetQueueFamilyPalEngineType(queueFamilyIndex);
-        pQueueCreateInfo->engineIndex = queueIndex;
+
+        if (palQueueType == Pal::QueueType::QueueTypeUniversal)
+        {
+            pQueueCreateInfo->engineIndex = pPhysicalDevices[deviceIdx]->GetUniversalQueueEngineIndex(queueIndex);
+        }
+        else
+        {
+            pQueueCreateInfo->engineIndex = queueIndex;
+        }
     }
 
     pQueueCreateInfo->queueType = palQueueType;
@@ -638,6 +653,17 @@ VkResult Device::Create(
                 reinterpret_cast<const VkDeviceMemoryOverallocationCreateInfoAMD*>(pHeader);
 
             overallocationBehavior = pMemoryOverallocationCreateInfo->overallocationBehavior;
+
+            break;
+        }
+
+        case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT:
+        {
+            vkResult = VerifyRequestedPhysicalDeviceFeatures<VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT>(
+                pPhysicalDevice,
+                reinterpret_cast<const VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT*>(pHeader));
+
+            break;
         }
 
         case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD:
@@ -1146,7 +1172,7 @@ VkResult Device::Initialize(
         }
         case AppProfile::WolfensteinII:
             // This application optimization layer is currently GFX10-specific
-            if (deviceProps.gfxLevel > Pal::GfxIpLevel::GfxIp9)
+            if (deviceProps.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1)
             {
                 void* pMemory = VkInstance()->AllocMem(sizeof(Wolfenstein2Layer), VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
 
@@ -1820,15 +1846,19 @@ void Device::DestroyInternalPipelines()
 // Wait for device idle. Punts to PAL device.
 VkResult Device::WaitIdle(void)
 {
-    for (uint32_t i = 0; i < Queue::MaxQueueFamilies; ++i)
+    VkResult result = VK_SUCCESS;
+
+    for (uint32_t i = 0; (i < Queue::MaxQueueFamilies) && (result == VK_SUCCESS); ++i)
     {
-        for (uint32_t j = 0; (j < Queue::MaxQueuesPerFamily) && (m_pQueues[i][j] != nullptr); ++j)
+        for (uint32_t j = 0;
+            (j < Queue::MaxQueuesPerFamily) && (m_pQueues[i][j] != nullptr) && (result == VK_SUCCESS);
+            ++j)
         {
-            (*m_pQueues[i][j])->WaitIdle();
+            result = (*m_pQueues[i][j])->WaitIdle();
         }
     }
 
-    return VK_SUCCESS;
+    return result;
 }
 
 // =====================================================================================================================
diff --git a/icd/api/vk_event.cpp b/icd/api/vk_event.cpp
index e46f156e..93200e6d 100644
--- a/icd/api/vk_event.cpp
+++ b/icd/api/vk_event.cpp
@@ -29,7 +29,6 @@
  ***********************************************************************************************************************
  */
 
-#include "include/gpu_event_mgr.h"
 #include "include/vk_conv.h"
 #include "include/vk_device.h"
 #include "include/vk_event.h"
diff --git a/icd/api/vk_image.cpp b/icd/api/vk_image.cpp
index 7d53246d..fed901a9 100644
--- a/icd/api/vk_image.cpp
+++ b/icd/api/vk_image.cpp
@@ -659,7 +659,7 @@ VkResult Image::Create(
         // b. If dev enables the extension: keep DCC enabled for UAVs with <= 4 mips
         // c. Can app-detect un-disable DCC for cases where we know devs don't store to multiple mips
         Pal::GfxIpLevel gfxLevel = pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalProperties().gfxLevel;
-        if ((gfxLevel > Pal::GfxIpLevel::GfxIp9) && (gfxLevel <= Pal::GfxIpLevel::GfxIp10_1) &&
+        if ((gfxLevel == Pal::GfxIpLevel::GfxIp10_1) &&
             pDevice->IsExtensionEnabled(DeviceExtensions::AMD_SHADER_IMAGE_LOAD_STORE_LOD) &&
             (pCreateInfo->mipLevels > 4) && (pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
         {
diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp
index c09acdca..a20a43c0 100644
--- a/icd/api/vk_physical_device.cpp
+++ b/icd/api/vk_physical_device.cpp
@@ -59,6 +59,7 @@
 #include "palLib.h"
 #include "palMath.h"
 #include "palMsaaState.h"
+#include "palPlatformKey.h"
 #include "palScreen.h"
 #include "palHashLiteralString.h"
 #include <string>
@@ -259,7 +260,8 @@ PhysicalDevice::PhysicalDevice(
     m_prtOnDmaSupported(true),
     m_supportedExtensions(),
     m_allowedExtensions(),
-    m_compiler(this)
+    m_compiler(this),
+    m_pPlatformKey(nullptr)
 {
     memset(&m_limits, 0, sizeof(m_limits));
     memset(m_formatFeatureMsaaTarget, 0, sizeof(m_formatFeatureMsaaTarget));
@@ -444,6 +446,44 @@ void PhysicalDevice::DecreaseAllocatedMemorySize(
     m_memoryUsageTracker.allocatedMemorySize[heapIdx] -= allocationSize;
 }
 
+// =====================================================================================================================
+// Generate our platform key
+void PhysicalDevice::InitializePlatformKey(
+    const RuntimeSettings& settings)
+{
+    static constexpr Util::HashAlgorithm KeyAlgorithm = Util::HashAlgorithm::Sha1;
+
+    struct
+    {
+        VkPhysicalDeviceProperties properties;
+        char*                      timestamp[sizeof(__TIMESTAMP__)];
+    } initialData;
+
+    memset(&initialData, 0, sizeof(initialData));
+
+    VkResult result = GetDeviceProperties(&initialData.properties);
+
+    if (result == VK_SUCCESS)
+    {
+        size_t memSize = Util::GetPlatformKeySize(KeyAlgorithm);
+        void*  pMem    = VkInstance()->AllocMem(memSize, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+        if (pMem != nullptr)
+        {
+            if (settings.markPipelineCacheWithBuildTimestamp)
+            {
+                memcpy(initialData.timestamp, __TIMESTAMP__, sizeof(__TIMESTAMP__));
+            }
+
+            if (Util::CreatePlatformKey(KeyAlgorithm, &initialData, sizeof(initialData), pMem, &m_pPlatformKey) !=
+                Util::Result::Success)
+            {
+                VkInstance()->FreeMem(pMem);
+            }
+        }
+    }
+}
+
 // =====================================================================================================================
 VkResult PhysicalDevice::Initialize()
 {
@@ -466,14 +506,8 @@ VkResult PhysicalDevice::Initialize()
         {
             for (uint32_t idx = 0; idx < Pal::EngineTypeCount; ++idx)
             {
-                // We do not currently create a high priority universal queue, so we don't need that engine.
-                // In order to support global priority, we still need exclusive compute engine to be initialized
-                // but this engine can only be selected according to the global priority set by application
-                if (idx != static_cast<uint32_t>(Pal::EngineTypeHighPriorityUniversal))
-                {
-                    const auto& engineProps = m_properties.engineProperties[idx];
-                    finalizeInfo.requestedEngineCounts[idx].engines = ((1 << engineProps.engineCount) - 1);
-                }
+                const auto& engineProps = m_properties.engineProperties[idx];
+                finalizeInfo.requestedEngineCounts[idx].engines = ((1 << engineProps.engineCount) - 1);
             }
         }
 
@@ -742,8 +776,10 @@ VkResult PhysicalDevice::Initialize()
     }
 
     VkResult vkResult = PalToVkResult(result);
+
     if (vkResult == VK_SUCCESS)
     {
+        InitializePlatformKey(settings);
         vkResult = m_compiler.Initialize();
     }
 
@@ -895,6 +931,12 @@ void PhysicalDevice::LateInitialize()
 // =====================================================================================================================
 VkResult PhysicalDevice::Destroy(void)
 {
+    if (m_pPlatformKey != nullptr)
+    {
+        m_pPlatformKey->Destroy();
+        VkInstance()->FreeMem(m_pPlatformKey);
+    }
+
     m_compiler.Destroy();
 
     this->~PhysicalDevice();
@@ -1033,8 +1075,10 @@ VkResult PhysicalDevice::GetFeatures(
     pFeatures->shaderStorageImageArrayDynamicIndexing   = VK_TRUE;
     pFeatures->shaderClipDistance                       = VK_TRUE;
     pFeatures->shaderCullDistance                       = VK_TRUE;
-    pFeatures->shaderFloat64                            = VK_TRUE;
-    pFeatures->shaderInt64                              = VK_TRUE;
+    pFeatures->shaderFloat64                            =
+        (PalProperties().gfxipProperties.flags.support64BitInstructions ? VK_TRUE : VK_FALSE);
+    pFeatures->shaderInt64                              =
+        (PalProperties().gfxipProperties.flags.support64BitInstructions ? VK_TRUE : VK_FALSE);
 
     if ((PalProperties().gfxipProperties.flags.support16BitInstructions) &&
         ((GetRuntimeSettings().optOnlyEnableFP16ForGfx9Plus == false)      ||
@@ -3202,16 +3246,11 @@ void PhysicalDevice::PopulateQueueFamilies()
         VK_QUEUE_COMPUTE_BIT |
         VK_QUEUE_TRANSFER_BIT |
         VK_QUEUE_SPARSE_BINDING_BIT,
-        // Pal::EngineTypeExclusiveCompute
-        0,
         // Pal::EngineTypeDma
         VK_QUEUE_TRANSFER_BIT |
         VK_QUEUE_SPARSE_BINDING_BIT,
         // Pal::EngineTypeTimer
         0,
-        // Pal::EngineTypeHighPriorityUniversal
-        0,
-
     };
 
     // While it's possible for an engineType to support multiple queueTypes,
@@ -3220,10 +3259,8 @@ void PhysicalDevice::PopulateQueueFamilies()
     {
         Pal::QueueTypeUniversal,
         Pal::QueueTypeCompute,
-        Pal::QueueTypeCompute,
         Pal::QueueTypeDma,
         Pal::QueueTypeTimer,
-        Pal::QueueTypeUniversal,
 
     };
 
@@ -3231,11 +3268,9 @@ void PhysicalDevice::PopulateQueueFamilies()
                   (VK_ARRAY_SIZE(palQueueTypes) == Pal::EngineTypeCount) &&
                   (Pal::EngineTypeUniversal        == 0) &&
                   (Pal::EngineTypeCompute          == 1) &&
-                  (Pal::EngineTypeExclusiveCompute == 2) &&
-                  (Pal::EngineTypeDma              == 3) &&
-                  (Pal::EngineTypeTimer            == 4) &&
-                  (Pal::EngineTypeHighPriorityUniversal == 0x5),
-        "PAL engine types have changed, need to update the tables above");
+                  (Pal::EngineTypeDma              == 2) &&
+                  (Pal::EngineTypeTimer            == 3)
+        , "PAL engine types have changed, need to update the tables above");
 
     // Always enable core queue flags.  Final determination of support will be done on a per-engine basis.
     uint32_t enabledQueueFlags =
@@ -3244,17 +3279,48 @@ void PhysicalDevice::PopulateQueueFamilies()
         VK_QUEUE_TRANSFER_BIT |
         VK_QUEUE_SPARSE_BINDING_BIT;
 
-    // find out the sub engine index of VrHighPriority.
-    const auto& exclusiveComputeProps = m_properties.engineProperties[Pal::EngineTypeExclusiveCompute];
-    for (uint32_t subEngineIndex = 0; subEngineIndex < exclusiveComputeProps.engineCount; subEngineIndex++)
+    const uint32 queueSupportPriority = Pal::QueuePrioritySupport::SupportQueuePriorityNormal |
+        Pal::QueuePrioritySupport::SupportQueuePriorityIdle;
+
+    // find out the sub engine index of VrHighPriority and indices for compute engines that aren't exclusive.
     {
-        if (exclusiveComputeProps.engineSubType[subEngineIndex] == Pal::EngineSubType::VrHighPriority)
+        const auto& computeProps = m_properties.engineProperties[Pal::EngineTypeCompute];
+        uint32_t engineIndex = 0u;
+        for (uint32_t subEngineIndex = 0; subEngineIndex < computeProps.engineCount; subEngineIndex++)
         {
-            m_vrHighPrioritySubEngineIndex = subEngineIndex;
+            if (computeProps.capabilities[subEngineIndex].flags.exclusive == 1)
+            {
+                if (computeProps.capabilities[subEngineIndex].queuePrioritySupport &
+                         Pal::QueuePrioritySupport::SupportQueuePriorityRealtime)
+                {
+                    m_RtCuHighComputeSubEngineIndex = subEngineIndex;
+                }
+                else if (computeProps.capabilities[subEngineIndex].queuePrioritySupport &
+                    Pal::QueuePrioritySupport::SupportQueuePriorityHigh)
+                {
+                    m_vrHighPrioritySubEngineIndex = subEngineIndex;
+                }
+            }
+            else if ((computeProps.capabilities[subEngineIndex].queuePrioritySupport == queueSupportPriority) ||
+                    (computeProps.capabilities[subEngineIndex].queuePrioritySupport == 0u))
+            {
+                m_compQueueEnginesNdx[engineIndex++] = subEngineIndex;
+            }
         }
-        else if (exclusiveComputeProps.engineSubType[subEngineIndex] == Pal::EngineSubType::RtCuHighCompute)
+    }
+
+    // find out universal engines that aren't exclusive.
+    {
+        const auto& universalProps = m_properties.engineProperties[Pal::EngineTypeUniversal];
+        uint32_t engineIndex = 0u;
+        for (uint32_t subEngineIndex = 0; subEngineIndex < universalProps.engineCount; subEngineIndex++)
         {
-            m_RtCuHighComputeSubEngineIndex = subEngineIndex;
+            if ((universalProps.capabilities[subEngineIndex].flags.exclusive == 0) &&
+                ((universalProps.capabilities[subEngineIndex].queuePrioritySupport == queueSupportPriority) ||
+                 (universalProps.capabilities[subEngineIndex].queuePrioritySupport == 0u)))
+            {
+                m_universalQueueEnginesNdx[engineIndex++] = subEngineIndex;
+            }
         }
     }
 
@@ -3302,8 +3368,6 @@ void PhysicalDevice::PopulateQueueFamilies()
                 break;
             case Pal::EngineTypeCompute:
                 pComputeQueueFamilyProperties = &m_queueFamilies[m_queueFamilyCount].properties;
-                // fallthrough
-            case Pal::EngineTypeExclusiveCompute:
                 palImageLayoutFlag            = Pal::LayoutComputeEngine;
                 transferGranularityOverride   = settings.transferGranularityComputeOverride;
                 m_queueFamilies[m_queueFamilyCount].validShaderStages |= VK_SHADER_STAGE_COMPUTE_BIT;
@@ -3327,9 +3391,21 @@ void PhysicalDevice::PopulateQueueFamilies()
             VkQueueFamilyProperties* pQueueFamilyProps     = &m_queueFamilies[m_queueFamilyCount].properties;
 
             pQueueFamilyProps->queueFlags                  = (vkQueueFlags[engineType] & supportedQueueFlags);
-            pQueueFamilyProps->queueCount                  = (engineType == Pal::EngineTypeCompute)
-                                                             ? Util::Min(settings.asyncComputeQueueLimit, engineProps.engineCount)
-                                                             : engineProps.engineCount;
+            pQueueFamilyProps->queueCount                  = 0u;
+
+            for (uint32 engineNdx = 0u; engineNdx < engineProps.engineCount; ++engineNdx)
+            {
+                if ((engineProps.capabilities[engineNdx].flags.exclusive == 0) &&
+                    ((engineProps.capabilities[engineNdx].queuePrioritySupport == queueSupportPriority) ||
+                     (engineProps.capabilities[engineNdx].queuePrioritySupport == 0u)))
+                {
+                    pQueueFamilyProps->queueCount++;
+                }
+            }
+            pQueueFamilyProps->queueCount = (engineType == Pal::EngineTypeCompute)
+                ? Util::Min(settings.asyncComputeQueueLimit, pQueueFamilyProps->queueCount)
+                : pQueueFamilyProps->queueCount;
+
             pQueueFamilyProps->timestampValidBits          = (engineProps.flags.supportsTimestamps != 0) ? 64 : 0;
             pQueueFamilyProps->minImageTransferGranularity = PalToVkExtent3d(engineProps.minTiledImageCopyAlignment);
 
@@ -3498,17 +3574,17 @@ void  PhysicalDevice::GetPhysicalDeviceIDProperties(
     *pDeviceLUIDValid = VK_FALSE;
 
 #if defined(INTEROP_DRIVER_UUID)
-    const char* pDriverUuidString = INTEROP_DRIVER_UUID;
+    const char driverUuidString[] = INTEROP_DRIVER_UUID;
 #else
-    const char* pDriverUuidString = "AMD-LINUX-DRV";
+    const char driverUuidString[] = "AMD-LINUX-DRV";
 #endif
 
-    static_assert(VK_UUID_SIZE >= sizeof(pDriverUuidString),
+    static_assert(VK_UUID_SIZE >= sizeof(driverUuidString),
                   "The driver UUID string has changed and now exceeds the maximum length permitted by Vulkan");
 
     memcpy(pDriverUUID,
-           pDriverUuidString,
-           strlen(pDriverUuidString));
+           driverUuidString,
+           strlen(driverUuidString));
 }
 
 // =====================================================================================================================
@@ -3608,19 +3684,10 @@ void PhysicalDevice::GetPhysicalDeviceFloatControlsProperties(
     pFloatControlsProperties->roundingModeIndependence   = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR;
 
     pFloatControlsProperties->shaderSignedZeroInfNanPreserveFloat32  = VK_TRUE;
-    pFloatControlsProperties->shaderSignedZeroInfNanPreserveFloat64  = VK_TRUE;
-
     pFloatControlsProperties->shaderDenormPreserveFloat32            = VK_TRUE;
-    pFloatControlsProperties->shaderDenormPreserveFloat64            = VK_TRUE;
-
     pFloatControlsProperties->shaderDenormFlushToZeroFloat32         = VK_TRUE;
-    pFloatControlsProperties->shaderDenormFlushToZeroFloat64         = VK_TRUE;
-
     pFloatControlsProperties->shaderRoundingModeRTEFloat32           = VK_TRUE;
-    pFloatControlsProperties->shaderRoundingModeRTEFloat64           = VK_TRUE;
-
     pFloatControlsProperties->shaderRoundingModeRTZFloat32           = VK_TRUE;
-    pFloatControlsProperties->shaderRoundingModeRTZFloat64           = VK_TRUE;
 
     if (PalProperties().gfxipProperties.flags.supportDoubleRate16BitInstructions)
     {
@@ -3638,6 +3705,23 @@ void PhysicalDevice::GetPhysicalDeviceFloatControlsProperties(
         pFloatControlsProperties->shaderRoundingModeRTEFloat16           = VK_FALSE;
         pFloatControlsProperties->shaderRoundingModeRTZFloat16           = VK_FALSE;
     }
+
+    if (PalProperties().gfxipProperties.flags.support64BitInstructions)
+    {
+        pFloatControlsProperties->shaderSignedZeroInfNanPreserveFloat64 = VK_TRUE;
+        pFloatControlsProperties->shaderDenormPreserveFloat64           = VK_TRUE;
+        pFloatControlsProperties->shaderDenormFlushToZeroFloat64        = VK_TRUE;
+        pFloatControlsProperties->shaderRoundingModeRTEFloat64          = VK_TRUE;
+        pFloatControlsProperties->shaderRoundingModeRTZFloat64          = VK_TRUE;
+    }
+    else
+    {
+        pFloatControlsProperties->shaderSignedZeroInfNanPreserveFloat64 = VK_FALSE;
+        pFloatControlsProperties->shaderDenormPreserveFloat64           = VK_FALSE;
+        pFloatControlsProperties->shaderDenormFlushToZeroFloat64        = VK_FALSE;
+        pFloatControlsProperties->shaderRoundingModeRTEFloat64          = VK_FALSE;
+        pFloatControlsProperties->shaderRoundingModeRTZFloat64          = VK_FALSE;
+    }
 }
 
 // =====================================================================================================================
@@ -3840,8 +3924,16 @@ void PhysicalDevice::GetPhysicalDeviceShaderAtomicInt64Features(
     VkBool32* pShaderSharedInt64Atomics
     ) const
 {
-    *pShaderBufferInt64Atomics = VK_TRUE;
-    *pShaderSharedInt64Atomics = VK_TRUE;
+    if (PalProperties().gfxipProperties.flags.support64BitInstructions)
+    {
+        *pShaderBufferInt64Atomics = VK_TRUE;
+        *pShaderSharedInt64Atomics = VK_TRUE;
+    }
+    else
+    {
+        *pShaderBufferInt64Atomics = VK_FALSE;
+        *pShaderSharedInt64Atomics = VK_FALSE;
+    }
 }
 
 // =====================================================================================================================
@@ -4183,6 +4275,17 @@ void PhysicalDevice::GetFeatures2(
                 break;
             }
 
+            case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT:
+            {
+                VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT* pVertexAttributeDivisorFeatures =
+                    reinterpret_cast<VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT*>(pHeader);
+
+                pVertexAttributeDivisorFeatures->vertexAttributeInstanceRateDivisor     = VK_TRUE;
+                pVertexAttributeDivisorFeatures->vertexAttributeInstanceRateZeroDivisor = VK_FALSE;
+
+                break;
+            }
+
             case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD:
             {
                 VkPhysicalDeviceCoherentMemoryFeaturesAMD * pDeviceCoherentMemory =
diff --git a/icd/api/vk_pipeline_cache.cpp b/icd/api/vk_pipeline_cache.cpp
index 32273a7e..1503dc0d 100644
--- a/icd/api/vk_pipeline_cache.cpp
+++ b/icd/api/vk_pipeline_cache.cpp
@@ -76,6 +76,8 @@ VkResult PipelineCache::Create(
     size_t                  shaderCacheSize  = 0;
     size_t                  pipelineCacheSize[MaxPalDevices];
 
+    bool                    usePipelineCacheInitialData   = false;
+
     PipelineCompilerType       cacheType = pDevice->GetCompiler(DefaultDeviceIndex)->GetShaderCacheType();
 
     for (uint32_t i = 0; i < numPalDevices; i++)
@@ -99,12 +101,20 @@ VkResult PipelineCache::Create(
                 pDevice->VkPhysicalDevice(DefaultDeviceIndex)->GetDeviceProperties(&physicalDeviceProps);
                 if (memcmp(pHeader->UUID, physicalDeviceProps.pipelineCacheUUID, sizeof(pHeader->UUID)) == 0)
                 {
-                    auto pPrivateDataHeader = reinterpret_cast<const PipelineCachePrivateHeaderData*>(
-                            Util::VoidPtrInc(pCreateInfo->pInitialData, sizeof(PipelineCacheHeaderData)));
+                    const void* pData   = Util::VoidPtrInc(pCreateInfo->pInitialData, sizeof(PipelineCacheHeaderData));
+                    size_t dataSize     = pCreateInfo->initialDataSize - sizeof(PipelineCacheHeaderData);
 
-                    if (pPrivateDataHeader->cacheType == cacheType)
+                    if (PipelineBinaryCache::IsValidBlob(pDevice->VkPhysicalDevice(DefaultDeviceIndex), dataSize, pData))
+                    {
+                        usePipelineCacheInitialData = true;
+                    }
+                    else
                     {
-                        useInitialData = true;
+                        auto pPrivateDataHeader = reinterpret_cast<const PipelineCachePrivateHeaderData*>(pData);
+                        if (pPrivateDataHeader->cacheType == cacheType)
+                        {
+                            useInitialData = true;
+                        }
                     }
                 }
             }
@@ -179,12 +189,19 @@ VkResult PipelineCache::Create(
         if (result == VK_SUCCESS)
         {
             PipelineBinaryCache* pBinaryCache = nullptr;
-            if (((settings.usePalPipelineCaching) ||
-                 (pDevice->VkPhysicalDevice(DefaultDeviceIndex)->VkInstance()->GetDevModeMgr() != nullptr)) &&
-                (settings.allowExternalPipelineCacheObject))
+            if (settings.allowExternalPipelineCacheObject)
             {
+                const void* pInitialData = nullptr;
+                size_t initialDataSize = 0;
+
+                if (usePipelineCacheInitialData)
+                {
+                    pInitialData    = Util::VoidPtrInc(pCreateInfo->pInitialData, sizeof(PipelineCacheHeaderData));
+                    initialDataSize = pCreateInfo->initialDataSize - sizeof(PipelineCacheHeaderData);
+                }
+
                 pBinaryCache = PipelineBinaryCache::Create(pDevice->VkPhysicalDevice(DefaultDeviceIndex)->VkInstance(),
-                    pCreateInfo->initialDataSize, pCreateInfo->pInitialData, false,
+                    initialDataSize, pInitialData, false,
                     pDevice->GetCompiler(DefaultDeviceIndex)->GetGfxIp(), pDevice->VkPhysicalDevice(DefaultDeviceIndex));
 
                 // This isn't a terminal failure, the device can continue without the pipeline cache if need be.
@@ -207,7 +224,7 @@ VkResult PipelineCache::Destroy(
     const Device*                   pDevice,
     const VkAllocationCallbacks*    pAllocator)
 {
-    if (m_pBinaryCache)
+    if (m_pBinaryCache != nullptr)
     {
         m_pBinaryCache->Destroy();
         pDevice->VkPhysicalDevice(DefaultDeviceIndex)->VkInstance()->FreeMem(m_pBinaryCache);
@@ -229,41 +246,51 @@ VkResult PipelineCache::GetData(
     VK_ASSERT(pSize != nullptr);
 
     VkResult        result = VK_SUCCESS;
-    uint32_t numPalDevices = m_pDevice->NumPalDevices();
-
-    size_t allBlobSize = sizeof(PipelineCachePrivateHeaderData);
-    PipelineCachePrivateHeaderData headerData = {};
-
-    headerData.cacheType = m_shaderCaches[0].GetCacheType();
-    for (uint32_t i = 0; i < numPalDevices; i++)
-    {
-        size_t blobSize = 0;
-        result = m_shaderCaches[i].Serialize(nullptr, &blobSize);
-        VK_ASSERT(result == VK_SUCCESS);
-        headerData.blobSize[i] = blobSize;
-        allBlobSize += blobSize;
-    }
 
-    if (*pSize == 0)
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 534
+    if (m_pBinaryCache != nullptr)
     {
-        *pSize = allBlobSize;
+        result = m_pBinaryCache->Serialize(pData, pSize);
     }
     else
+#endif
     {
-        VK_ASSERT(*pSize >= allBlobSize);
-        memcpy(pData, &headerData, sizeof(headerData));
+        uint32_t numPalDevices = m_pDevice->NumPalDevices();
 
-        void* pBlob = Util::VoidPtrInc(pData, sizeof(headerData));
+        size_t allBlobSize = sizeof(PipelineCachePrivateHeaderData);
+        PipelineCachePrivateHeaderData headerData = {};
 
+        headerData.cacheType = m_shaderCaches[0].GetCacheType();
         for (uint32_t i = 0; i < numPalDevices; i++)
         {
-            size_t blobSize = static_cast<size_t>(headerData.blobSize[i]);
-            result = m_shaderCaches[i].Serialize(pBlob, &blobSize);
-            if (result != VK_SUCCESS)
+            size_t blobSize = 0;
+            result = m_shaderCaches[i].Serialize(nullptr, &blobSize);
+            VK_ASSERT(result == VK_SUCCESS);
+            headerData.blobSize[i] = blobSize;
+            allBlobSize += blobSize;
+        }
+
+        if (*pSize == 0)
+        {
+            *pSize = allBlobSize;
+        }
+        else
+        {
+            VK_ASSERT(*pSize >= allBlobSize);
+            memcpy(pData, &headerData, sizeof(headerData));
+
+            void* pBlob = Util::VoidPtrInc(pData, sizeof(headerData));
+
+            for (uint32_t i = 0; i < numPalDevices; i++)
             {
-                break;
+                size_t blobSize = static_cast<size_t>(headerData.blobSize[i]);
+                result = m_shaderCaches[i].Serialize(pBlob, &blobSize);
+                if (result != VK_SUCCESS)
+                {
+                    break;
+                }
+                pBlob = Util::VoidPtrInc(pBlob, blobSize);
             }
-            pBlob = Util::VoidPtrInc(pBlob, blobSize);
         }
     }
 
@@ -275,30 +302,49 @@ VkResult PipelineCache::Merge(
     uint32_t              srcCacheCount,
     const PipelineCache** ppSrcCaches)
 {
-    Util::AutoBuffer<ShaderCache::ShaderCachePtr, 16, PalAllocator> shaderCaches(
-        srcCacheCount * m_pDevice->NumPalDevices(),
-        m_pDevice->VkInstance()->Allocator());
+    VkResult result = VK_SUCCESS;
 
-    for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++)
+#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 534
+    if (m_pBinaryCache != nullptr)
     {
+        Util::AutoBuffer<const PipelineBinaryCache *, 16, PalAllocator> binaryCaches(
+            srcCacheCount,
+            m_pDevice->VkInstance()->Allocator());
+
         for (uint32_t cacheIdx = 0; cacheIdx < srcCacheCount; cacheIdx++)
         {
-            VK_ASSERT(ppSrcCaches[cacheIdx]->GetShaderCache(deviceIdx).GetCacheType() ==
-                GetShaderCache(deviceIdx).GetCacheType());
-            // Store all PAL caches like this d0c0,d0c1,d0c2...,d1c0,d1c2,d1c3...
-            shaderCaches[deviceIdx * srcCacheCount + cacheIdx] =
-                ppSrcCaches[cacheIdx]->GetShaderCache(deviceIdx).GetCachePtr();
+            binaryCaches[cacheIdx] = ppSrcCaches[cacheIdx]->GetPipelineCache();
         }
-    }
 
-    VkResult result = VK_SUCCESS;
-    for (uint32_t i = 0; i < m_pDevice->NumPalDevices(); i++)
+        result = m_pBinaryCache->Merge(srcCacheCount, &binaryCaches[0]);
+    }
+    else
+#endif
     {
-        result = m_shaderCaches[i].Merge(srcCacheCount, &shaderCaches[i * srcCacheCount]);
+        Util::AutoBuffer<ShaderCache::ShaderCachePtr, 16, PalAllocator> shaderCaches(
+            srcCacheCount * m_pDevice->NumPalDevices(),
+            m_pDevice->VkInstance()->Allocator());
 
-        if (result != VK_SUCCESS)
+        for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++)
+        {
+            for (uint32_t cacheIdx = 0; cacheIdx < srcCacheCount; cacheIdx++)
+            {
+                VK_ASSERT(ppSrcCaches[cacheIdx]->GetShaderCache(deviceIdx).GetCacheType() ==
+                    GetShaderCache(deviceIdx).GetCacheType());
+                // Store all PAL caches like this d0c0,d0c1,d0c2...,d1c0,d1c2,d1c3...
+                shaderCaches[deviceIdx * srcCacheCount + cacheIdx] =
+                    ppSrcCaches[cacheIdx]->GetShaderCache(deviceIdx).GetCachePtr();
+            }
+        }
+
+        for (uint32_t i = 0; i < m_pDevice->NumPalDevices(); i++)
         {
-            break;
+            result = m_shaderCaches[i].Merge(srcCacheCount, &shaderCaches[i * srcCacheCount]);
+
+            if (result != VK_SUCCESS)
+            {
+                break;
+            }
         }
     }
 
@@ -389,6 +435,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPipelineCacheData(
         {
             void* pPrivateData = Util::VoidPtrInc(pData, HeaderSize);
             result = pCache->GetData(pPrivateData, &privateDataSize);
+            *pDataSize = privateDataSize + HeaderSize;
         }
     }
 
diff --git a/icd/api/vk_queue.cpp b/icd/api/vk_queue.cpp
index cb99ed11..cf58119a 100644
--- a/icd/api/vk_queue.cpp
+++ b/icd/api/vk_queue.cpp
@@ -409,13 +409,16 @@ VkResult Queue::WaitIdle(void)
 {
     VK_ASSERT(m_pPalQueues != nullptr);
 
-    for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++)
+    Pal::Result palResult = Pal::Result::Success;
+
+    for (uint32_t deviceIdx = 0;
+        (deviceIdx < m_pDevice->NumPalDevices()) && (palResult == Pal::Result::Success);
+        deviceIdx++)
     {
-        PalQueue(deviceIdx)->WaitIdle();
+        palResult = PalQueue(deviceIdx)->WaitIdle();
     }
 
-    // Pal::IQueue::WaitIdle returns void. We have no errors to produce here.
-    return VK_SUCCESS;
+    return PalToVkResult(palResult);
 }
 
 // =====================================================================================================================
diff --git a/icd/make/importdefs b/icd/make/importdefs
index 7c766840..7bf1ac25 100644
--- a/icd/make/importdefs
+++ b/icd/make/importdefs
@@ -1,7 +1,7 @@
 # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION.  It describes the version of the PAL interface
 # that the ICD supports.  PAL uses this value to enable backwards-compatibility for older interface versions.  It must
 # be updated on each PAL promotion after handling all of the interface changes described in palLib.h.
-ICD_PAL_CLIENT_MAJOR_VERSION = 527
+ICD_PAL_CLIENT_MAJOR_VERSION = 534
 ICD_PAL_CLIENT_MINOR_VERSION = 0
 
 # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1.  It describes
@@ -12,9 +12,9 @@ ICD_GPUOPEN_CLIENT_MINOR_VERSION = 0
 #if ICD_BUILD_SCPC
 # This will become the value of SCPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_SCPC=1.  It describes the version of the
 # interface version of SCPC (currently part of PAL) that the ICD supports.
-ICD_SCPC_CLIENT_MAJOR_VERSION = 49
+ICD_SCPC_CLIENT_MAJOR_VERSION = 56
 #endif
 
 # This will become the value of LLPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_LLPC=1.  It describes the version of the
 # interface version of LLPC that the ICD supports.
-ICD_LLPC_CLIENT_MAJOR_VERSION = 32
+ICD_LLPC_CLIENT_MAJOR_VERSION = 34
diff --git a/icd/res/ver.h b/icd/res/ver.h
index 2b0550aa..e0b4ef73 100644
--- a/icd/res/ver.h
+++ b/icd/res/ver.h
@@ -36,7 +36,7 @@
 #define VERSION_MAJOR_STR           MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0"
 
 // Bump up after each promotion to mainline
-#define VULKAN_ICD_BUILD_VERSION   109
+#define VULKAN_ICD_BUILD_VERSION   111
 
 // String version is needed with leading zeros and extra termination (unicode)
 #define VERSION_NUMBER_MINOR        VULKAN_ICD_BUILD_VERSION
diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp
index 3c17dc44..a68a2aaa 100644
--- a/icd/settings/settings.cpp
+++ b/icd/settings/settings.cpp
@@ -154,7 +154,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings(
 
     // In general, DCC is very beneficial for color attachments. If this is completely offset, maybe by increased
     // shader read latency or partial writes of DCC blocks, it should be debugged on a case by case basis.
-    if (info.gfxLevel > Pal::GfxIpLevel::GfxIp9)
+    if (info.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1)
     {
         m_settings.forceDccForColorAttachments = true;
     }
@@ -214,7 +214,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings(
                                               ForceImageSharingModeExclusive;
         }
 
-        if (info.gfxLevel > Pal::GfxIpLevel::GfxIp9)
+        if (info.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1)
         {
             m_settings.asyncComputeQueueLimit = 1;
         }
@@ -232,7 +232,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings(
 
     if (((appProfile == AppProfile::WolfensteinII) ||
          (appProfile == AppProfile::Doom)) &&
-        (info.gfxLevel > Pal::GfxIpLevel::GfxIp9))
+        (info.gfxLevel == Pal::GfxIpLevel::GfxIp10_1))
     {
         m_settings.asyncComputeQueueMaxWavesPerCu = 40;
         m_settings.nggSubgroupSizing   = NggSubgroupExplicit;
@@ -256,10 +256,11 @@ void VulkanSettingsLoader::OverrideProfiledSettings(
         }
 
         // WWZ performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now.
-        if (info.gfxLevel > Pal::GfxIpLevel::GfxIp9)
+        if (info.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1)
         {
             m_settings.forceDccForColorAttachments = false;
         }
+
     }
 
     if (appProfile == AppProfile::IdTechEngine)
@@ -329,6 +330,10 @@ void VulkanSettingsLoader::OverrideProfiledSettings(
         m_settings.preciseAnisoMode = DisablePreciseAnisoAll;
     }
 
+    if (appProfile == AppProfile::StrangeBrigade)
+    {
+    }
+
     if (appProfile == AppProfile::MadMax)
     {
         m_settings.preciseAnisoMode  = DisablePreciseAnisoAll;
@@ -341,7 +346,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings(
         m_settings.prefetchShaders = true;
 
         // F1 2017 performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now.
-        if (info.gfxLevel > Pal::GfxIpLevel::GfxIp9)
+        if (info.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1)
         {
             m_settings.forceDccForColorAttachments = false;
         }
@@ -355,7 +360,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings(
     if (appProfile == AppProfile::DiRT4)
     {
         // DiRT 4 performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now.
-        if (info.gfxLevel > Pal::GfxIpLevel::GfxIp9)
+        if (info.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1)
         {
             m_settings.forceDccForColorAttachments = false;
         }
@@ -364,7 +369,7 @@ void VulkanSettingsLoader::OverrideProfiledSettings(
     if (appProfile == AppProfile::WarHammerII)
     {
         // WarHammer II performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now.
-        if (info.gfxLevel > Pal::GfxIpLevel::GfxIp9)
+        if (info.gfxLevel >= Pal::GfxIpLevel::GfxIp10_1)
         {
             m_settings.forceDccForColorAttachments = false;
         }
diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json
index fe84c653..1ef65905 100644
--- a/icd/settings/settings_xgl.json
+++ b/icd/settings/settings_xgl.json
@@ -1455,7 +1455,7 @@
             "Value": 0
           },
           {
-            "Description": " Enable for vertex shaders",
+            "Description": "Enable for vertex shaders",
             "Value": 1
           },
           {
@@ -1602,6 +1602,204 @@
       "VariableName": "skipUnsupportedOpCode",
       "Name": "SkipUnsupportedOpCode"
     },
+    {
+      "Description": "Force vertex shaders' optimization IR mode selection.",
+      "Tags": [
+        "SPIRV Options"
+      ],
+      "ValidValues": {
+        "Values": [
+          {
+            "Description": "Let the compiler choose the IR mode.",
+            "Value": 0
+          },
+          {
+            "Description": "Force the compiler to use Old IR.",
+            "Value": 1
+          },
+          {
+            "Description": "Force the compiler to use New IR with retry mechanism.",
+            "Value": 2
+          },
+          {
+            "Description": "Force the compiler to use New IR with error mechanism.",
+            "Value": 3
+          }
+        ]
+      },
+      "Defaults": {
+        "Default": 0
+      },
+      "Type": "uint32",
+      "VariableName": "vsIrMode",
+      "Name": "VsIrMode",
+      "Scope": "Driver"
+    },
+    {
+      "Description": "Force tessellation control shaders' optimization IR mode selection.",
+      "Tags": [
+        "SPIRV Options"
+      ],
+      "ValidValues": {
+        "Values": [
+          {
+            "Description": "Let the compiler choose the IR mode.",
+            "Value": 0
+          },
+          {
+            "Description": "Force the compiler to use Old IR.",
+            "Value": 1
+          },
+          {
+            "Description": "Force the compiler to use New IR with retry mechanism.",
+            "Value": 2
+          },
+          {
+            "Description": "Force the compiler to use New IR with error mechanism.",
+            "Value": 3
+          }
+        ]
+      },
+      "Defaults": {
+        "Default": 0
+      },
+      "Type": "uint32",
+      "VariableName": "tcsIrMode",
+      "Name": "TcsIrMode",
+      "Scope": "Driver"
+    },
+    {
+      "Description": "Force tessellation evaluation shaders' optimization IR mode selection.",
+      "Tags": [
+        "SPIRV Options"
+      ],
+      "ValidValues": {
+        "Values": [
+          {
+            "Description": "Let the compiler choose the IR mode.",
+            "Value": 0
+          },
+          {
+            "Description": "Force the compiler to use Old IR.",
+            "Value": 1
+          },
+          {
+            "Description": "Force the compiler to use New IR with retry mechanism.",
+            "Value": 2
+          },
+          {
+            "Description": "Force the compiler to use New IR with error mechanism.",
+            "Value": 3
+          }
+        ]
+      },
+      "Defaults": {
+        "Default": 0
+      },
+      "Type": "uint32",
+      "VariableName": "tesIrMode",
+      "Name": "TesIrMode",
+      "Scope": "Driver"
+    },
+    {
+      "Description": "Force geometry shaders' optimization IR mode selection.",
+      "Tags": [
+        "SPIRV Options"
+      ],
+      "ValidValues": {
+        "Values": [
+          {
+            "Description": "Let the compiler choose the IR mode.",
+            "Value": 0
+          },
+          {
+            "Description": "Force the compiler to use Old IR.",
+            "Value": 1
+          },
+          {
+            "Description": "Force the compiler to use New IR with retry mechanism.",
+            "Value": 2
+          },
+          {
+            "Description": "Force the compiler to use New IR with error mechanism.",
+            "Value": 3
+          }
+        ]
+      },
+      "Defaults": {
+        "Default": 0
+      },
+      "Type": "uint32",
+      "VariableName": "gsIrMode",
+      "Name": "GsIrMode",
+      "Scope": "Driver"
+    },
+    {
+      "Description": "Force fragment shaders' optimization IR mode selection.",
+      "Tags": [
+        "SPIRV Options"
+      ],
+      "ValidValues": {
+        "Values": [
+          {
+            "Description": "Let the compiler choose the IR mode.",
+            "Value": 0
+          },
+          {
+            "Description": "Force the compiler to use Old IR.",
+            "Value": 1
+          },
+          {
+            "Description": "Force the compiler to use New IR with retry mechanism.",
+            "Value": 2
+          },
+          {
+            "Description": "Force the compiler to use New IR with error mechanism.",
+            "Value": 3
+          }
+        ]
+      },
+      "Defaults": {
+        "Default": 0
+      },
+      "Type": "uint32",
+      "VariableName": "fsIrMode",
+      "Name": "FsIrMode",
+      "Scope": "Driver"
+    },
+    {
+      "Description": "Force compute shaders' optimization IR mode selection.",
+      "Tags": [
+        "SPIRV Options"
+      ],
+      "ValidValues": {
+        "Values": [
+          {
+            "Description": "Let the compiler choose the IR mode.",
+            "Value": 0
+          },
+          {
+            "Description": "Force the compiler to use Old IR.",
+            "Value": 1
+          },
+          {
+            "Description": "Force the compiler to use New IR with retry mechanism.",
+            "Value": 2
+          },
+          {
+            "Description": "Force the compiler to use New IR with error mechanism.",
+            "Value": 3
+          }
+        ]
+      },
+      "Defaults": {
+        "Default": 0
+      },
+      "Type": "uint32",
+      "VariableName": "csIrMode",
+      "Name": "CsIrMode",
+      "Scope": "Driver"
+    },
     {
       "Description": "[LLPC ONLY] LLPC general options. Max length of the option is 256. for example: Disable Loop unroll: -pragma-unroll-threshold=1 Enable si-scheduler: -enable-si-scheduler Please see amdllpc -help or -help-hidden for detail",
       "Tags": [
@@ -1687,7 +1885,7 @@
     },
     {
       "Name": "AllowExternalPipelineCacheObject",
-      "Description": "Controls whether a pipeline cache object is allowed to be created via vkCreatePipelineCache in addition to the cache residing within the pipeline compiler. (Default: FALSE)",
+      "Description": "Controls whether a pipeline cache object is allowed to be created via vkCreatePipelineCache in addition to the cache residing within the pipeline compiler. (Default: TRUE)",
       "Tags": [
         "SPIRV Options"
       ],
@@ -3926,150 +4124,6 @@
       "VariableName": "devModeSqttPrepareFrameCount",
       "Name": "DevModeSqttPrepareFrameCount"
     },
-    {
-      "Description": "Allow chill to run. Chill is a user interaction dependent FPS limiter,  used for power saving. ",
-      "Tags": [
-        "Chill"
-      ],
-      "Defaults": {
-        "Default": true
-      },
-      "Type": "bool",
-      "VariableName": "allowChill",
-      "Name": "AllowChill",
-      "Scope": "Driver"
-    },
-    {
-      "Description": "If per-app chill profile settings is enabled. ",
-      "Tags": [
-        "Chill"
-      ],
-      "Defaults": {
-        "Default": false
-      },
-      "Scope": "Driver",
-      "Type": "bool",
-      "VariableName": "chillProfileEnable",
-      "Name": "Chill_ProfileEnable"
-    },
-    {
-      "ValidValues": {
-        "IsEnum": true,
-        "Values": [
-          {
-            "Name": "IcdChillLevelDisable",
-            "Value": 0,
-            "Description": "Disable"
-          },
-          {
-            "Name": "IcdChillLevelSubtle",
-            "Value": 1,
-            "Description": "Subtle"
-          },
-          {
-            "Name": "IcdChillLevelMedium",
-            "Value": 2,
-            "Description": "Medium"
-          },
-          {
-            "Name": "IcdChillLevelFull",
-            "Value": 3,
-            "Description": "Full"
-          }
-        ],
-        "Name": "IcdChillLevelMode"
-      },
-      "Description": "Chill level setting, default is medium.",
-      "Tags": [
-        "Chill"
-      ],
-      "Defaults": {
-        "Default": "IcdChillLevelMedium"
-      },
-      "Flags": {
-        "IsHex": true
-      },
-      "Scope": "Driver",
-      "Type": "enum",
-      "VariableName": "chillLevel",
-      "Name": "Chill_ChillLevel"
-    },
-    {
-      "ValidValues": {
-        "Values": [
-          {
-            "LogicOp": "GreaterThanOrEqual",
-            "Value": 30
-          },
-          {
-            "LogicOp": "LessThanOrEqual",
-            "Value": 300
-          }
-        ]
-      },
-      "Description": "Min chill frame rate; valid range is 30-300fps.",
-      "Tags": [
-        "Chill"
-      ],
-      "Defaults": {
-        "Default": 70
-      },
-      "Scope": "Driver",
-      "Type": "uint32",
-      "VariableName": "chillMinFrameRate",
-      "Name": "Chill_MinFramerate"
-    },
-    {
-      "ValidValues": {
-        "Values": [
-          {
-            "LogicOp": "GreaterThanOrEqual",
-            "Value": 30
-          },
-          {
-            "LogicOp": "LessThanOrEqual",
-            "Value": 300
-          }
-        ]
-      },
-      "Description": "Max chill frame rate; valid range is 30-300fps.",
-      "Tags": [
-        "Chill"
-      ],
-      "Defaults": {
-        "Default": 144
-      },
-      "Scope": "Driver",
-      "Type": "uint32",
-      "VariableName": "chillMaxFrameRate",
-      "Name": "Chill_MaxFramerate"
-    },
-    {
-      "Description": "The threshold number of draw calls per frame used to distinguish between loading screens and gameplay.",
-      "Tags": [
-        "Chill"
-      ],
-      "Defaults": {
-        "Default": 150
-      },
-      "Scope": "Driver",
-      "Type": "uint32",
-      "VariableName": "chillLoadingScreenDrawsThresh",
-      "Name": "Chill_LoadingScreenDrawsThresh"
-    },
-    {
-      "Description": "When true, we will not disable chill based on KMD workstation flag  or Big Software version.",
-      "Tags": [
-        "Chill"
-      ],
-      "Defaults": {
-        "Default": false
-      },
-      "Scope": "Driver",
-      "Type": "bool",
-      "VariableName": "chillIgnoreBaseDriverRestrictions",
-      "Name": "Chill_IgnoreBaseDriverRestrictions"
-    },
     {
       "Name": "OverrideShaderParams",
       "Description": "Indicate that shader parameter override is enabled - mainly used for automation",