Skip to content

Commit

Permalink
Merge branch 'internal_master_prm'
Browse files Browse the repository at this point in the history
  • Loading branch information
JaxLinAMD committed Sep 9, 2021
2 parents e6ae9d1 + 9db36fe commit d8bb7de
Show file tree
Hide file tree
Showing 89 changed files with 3,466 additions and 1,145 deletions.
2 changes: 1 addition & 1 deletion icd/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ set(ICD_STRING_DIR ${CMAKE_CURRENT_SOURCE_DIR}/api/strings)
# ICD settings code generation main script
set(ICD_GEN_STRINGS ${ICD_STRING_DIR}/generate_strings.py)

set(ICD_GEN_STRINGS_OPT -w ${ICD_STRING_DIR})
set(ICD_GEN_STRINGS_OPT -w ${ICD_STRING_DIR} -d ${ICD_STRING_DIR})

set(ICD_GEN_STRINGS_FILES ${ICD_GEN_STRINGS} ${ICD_STRING_DIR}/func_table_template.py)

Expand Down
4 changes: 2 additions & 2 deletions icd/Loader/LunarG/Lnx/amd-icd.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
"file_format_version": "1.0.0",
"ICD": {
"library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@[email protected]",
"api_version": "1.2.185"
"api_version": "1.2.188"
},
"layer": {
"name": "VK_LAYER_AMD_switchable_graphics_@ISABITS@",
"type": "GLOBAL",
"library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@[email protected]",
"api_version": "1.2.185",
"api_version": "1.2.188",
"implementation_version": "1",
"description": "AMD switchable graphics layer",
"functions": {
Expand Down
2 changes: 1 addition & 1 deletion icd/api/appopt/async_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class AsyncLayer final : public OptLayer

virtual void OverrideDispatchTable(DispatchTable* pDispatchTable) override;

VK_INLINE Device* GetDevice() { return m_pDevice; }
Device* GetDevice() { return m_pDevice; }

void* GetTaskThread(TaskType type)
{
Expand Down
2 changes: 1 addition & 1 deletion icd/api/appopt/async_shader_module.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class ShaderModule final : public vk::NonDispatchable<VkShaderModule, ShaderModu
Device* pDevice,
const VkAllocationCallbacks* pAllocator);

VK_INLINE VkShaderModule GetNextLayerModule()
VkShaderModule GetNextLayerModule()
{
return (m_asyncModule == VK_NULL_HANDLE) ? m_immedModule : m_asyncModule;
}
Expand Down
4 changes: 2 additions & 2 deletions icd/api/appopt/async_task_thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class TaskThread final : public Util::Thread
}

// Starts a new thread which starts by running function TaskThreadFunc.
VK_INLINE void Begin()
void Begin()
{
Util::Thread::Begin(ThreadFunc, this);
}
Expand All @@ -81,7 +81,7 @@ class TaskThread final : public Util::Thread
}

// Set flag stop and trig event.
VK_INLINE void SetStop()
void SetStop()
{
m_event.Set();
m_stop = true;
Expand Down
157 changes: 148 additions & 9 deletions icd/api/appopt/gpu_decode_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,30 +52,35 @@ namespace GpuTexDecoder
{
VkResult result = VK_SUCCESS;
vk::Device* pDevice = reinterpret_cast<vk::Device*>(initInfo.pClientUserData);

VK_ASSERT(constInfo.numConstants <= 4);
VkSpecializationMapEntry mapEntries[4] =
{
// local_thread_x
// local_thread_x - ASTC
// ALPHA_BITS - ETC2
{
0,
0,
sizeof(uint32_t)
},

// local_thread_y
// local_thread_y - ASTC
// WIDTH = ETC2
{
1,
1 * sizeof(uint32_t),
sizeof(uint32_t)
},

// isSrgb Format
// isSrgb Format - ASTC
// HEIGHT - ETC2
{
2,
2 * sizeof(uint32_t),
sizeof(uint32_t)
},

// isBufferTexture
// isBufferTexture - ASTC
{
3,
3 * sizeof(uint32_t),
Expand All @@ -93,9 +98,9 @@ namespace GpuTexDecoder
};

Vkgc::ResourceMappingRootNode rootNode = {};
Vkgc::ResourceMappingNode nodes[GpuTexDecoder::AstcInternalPipelineNodes] = {};
if (buildInfo.shaderType == GpuTexDecoder::InternalTexConvertCsType::ConvertASTCToRGBA8)
{
Vkgc::ResourceMappingNode nodes[GpuTexDecoder::AstcInternalPipelineNodes] = {};
GpuTexDecoder::GpuDecodeMappingNode* pDecodeNode = buildInfo.pUserDataNodes;
for (size_t index = 0; index < GpuTexDecoder::AstcInternalPipelineNodes; index++)
{
Expand Down Expand Up @@ -129,6 +134,42 @@ namespace GpuTexDecoder
rootNode.visibility = Vkgc::ShaderStageComputeBit;
}

if (buildInfo.shaderType == GpuTexDecoder::InternalTexConvertCsType::ConvertETC2ToRGBA8)
{
Vkgc::ResourceMappingNode nodes[GpuTexDecoder::Etc2InternalPipelineNodes] = {};
GpuTexDecoder::GpuDecodeMappingNode* pDecodeNode = buildInfo.pUserDataNodes;
for (size_t index = 0; index < GpuTexDecoder::Etc2InternalPipelineNodes; index++)
{
if (pDecodeNode[index].nodeType == GpuTexDecoder::NodeType::Image)
{
nodes[index].type = Vkgc::ResourceMappingNodeType::DescriptorResource;
nodes[index].sizeInDwords = pDecodeNode[index].sizeInDwords;
nodes[index].offsetInDwords = pDecodeNode[index].offsetInDwords;
nodes[index].srdRange.binding = pDecodeNode[index].binding;
nodes[index].srdRange.set = pDecodeNode[index].set;
}
else
{
Vkgc::ResourceMappingNodeType vkgcType =
(pDecodeNode[index].nodeType == GpuTexDecoder::NodeType::Buffer) ?
Vkgc::ResourceMappingNodeType::DescriptorBuffer :
Vkgc::ResourceMappingNodeType::DescriptorTexelBuffer;
nodes[index].type = vkgcType;
nodes[index].sizeInDwords = pDecodeNode[index].sizeInDwords;
nodes[index].offsetInDwords = pDecodeNode[index].offsetInDwords;
nodes[index].srdRange.binding = pDecodeNode[index].binding;
nodes[index].srdRange.set = pDecodeNode[index].set;
}
}

rootNode.node.type = Vkgc::ResourceMappingNodeType::DescriptorTableVaPtr;
rootNode.node.offsetInDwords = 0;
rootNode.node.sizeInDwords = 1;
rootNode.node.tablePtr.nodeCount = GpuTexDecoder::Etc2InternalPipelineNodes;
rootNode.node.tablePtr.pNext = &nodes[0];
rootNode.visibility = Vkgc::ShaderStageComputeBit;
}

Vkgc::BinaryData spvBin = { buildInfo.code.spvSize, buildInfo.code.pSpvCode };

result = pDevice->CreateInternalComputePipeline(buildInfo.code.spvSize,
Expand Down Expand Up @@ -217,12 +258,11 @@ VKAPI_ATTR void VKAPI_CALL vkCmdCopyImage(
const Image* const pSrcImage = Image::ObjectFromHandle(srcImage);
const Image* const pDstImage = Image::ObjectFromHandle(dstImage);

const Pal::SwizzledFormat srcFormat = VkToPalFormat(pSrcImage->GetFormat(), pDevice->GetRuntimeSettings());
const Pal::SwizzledFormat dstFormat = VkToPalFormat(pDstImage->GetFormat(), pDevice->GetRuntimeSettings());

if (Formats::IsASTCFormat(pDstImage->GetFormat()))
{
VK_ASSERT(Formats::IsASTCFormat(pDstImage->GetFormat()));
const Pal::SwizzledFormat srcFormat = VkToPalFormat(pSrcImage->GetFormat(), pDevice->GetRuntimeSettings());
const Pal::SwizzledFormat dstFormat = VkToPalFormat(pDstImage->GetFormat(), pDevice->GetRuntimeSettings());

uint32_t maxObj = pCmdBuffer->EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ImageCopyRegion));

const auto maxRegions = Util::Max(maxObj, MaxPalAspectsPerMask);
Expand Down Expand Up @@ -274,6 +314,81 @@ VKAPI_ATTR void VKAPI_CALL vkCmdCopyImage(

virtStackFrame.FreeArray(pPalRegions);
}
else if (Formats::IsEtc2Format(pDstImage->GetFormat()))
{
VkFormat payloadFormat = pSrcImage->GetFormat();

const Pal::SwizzledFormat srcFormat = VkToPalFormat(payloadFormat, pDevice->GetRuntimeSettings());
const Pal::SwizzledFormat dstFormat = VkToPalFormat(pDstImage->GetFormat(), pDevice->GetRuntimeSettings());

uint32_t maxObj = pCmdBuffer->EstimateMaxObjectsOnVirtualStack(sizeof(Pal::ImageCopyRegion));

const auto maxRegions = Util::Max(maxObj, MaxPalAspectsPerMask);
auto regionBatch = Util::Min(regionCount * MaxPalAspectsPerMask, maxRegions);

VirtualStackFrame virtStackFrame(pCmdBuffer->GetStackAllocator());
Pal::ImageCopyRegion* pPalRegions =
virtStackFrame.AllocArray<Pal::ImageCopyRegion>(regionBatch);

VkFormat format = pDstImage->GetFormat();

uint32_t alphaBits = 0;

switch (format)
{
case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
alphaBits = 0;
break;
case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
alphaBits = 1;
break;
case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
alphaBits = 8;
break;
default:
break;
}

const Pal::ImageCreateInfo& imageInfo = pDstImage->PalImage(DefaultDeviceIndex)->GetImageCreateInfo();

uint32_t const_data[3] =
{
alphaBits,
imageInfo.extent.width,
imageInfo.extent.height
};

GpuTexDecoder::CompileTimeConstants constInfo = {};
constInfo.numConstants = 3;
constInfo.pConstants = const_data;

for (uint32_t regionIdx = 0; regionIdx < regionCount;)
{
uint32_t palRegionCount = 0;

while ((regionIdx < regionCount) &&
(palRegionCount <= (regionBatch - MaxPalAspectsPerMask)))
{
VkToPalImageCopyRegion(pRegions[regionIdx], srcFormat.format, dstFormat.format,
pPalRegions, &palRegionCount);

++regionIdx;
}

pDevice->GetGpuDecoderLayer()->GetTexDecoder()->GpuDecodeImage(
GpuTexDecoder::InternalTexConvertCsType::ConvertETC2ToRGBA8,
pCmdBuffer->PalCmdBuffer(DefaultDeviceIndex),
pSrcImage->PalImage(DefaultDeviceIndex),
pDstImage->PalImage(DefaultDeviceIndex),
regionCount, pPalRegions, constInfo);
}

virtStackFrame.FreeArray(pPalRegions);

}
else
{
DECODER_WAPPER_CALL_NEXT_LAYER(vkCmdCopyImage(cmdBuffer,
Expand Down Expand Up @@ -320,6 +435,30 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateImage(
pImage);

}
else if(Formats::IsEtc2Format(format) &&
(pCreateInfo->usage == VK_IMAGE_USAGE_TRANSFER_SRC_BIT))
{
VkImageCreateInfo etc2SrcInfo = *pCreateInfo;
switch (pCreateInfo->format)
{
case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
etc2SrcInfo.format = VK_FORMAT_R32G32B32A32_UINT;
break;
case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
etc2SrcInfo.format = VK_FORMAT_R32G32_UINT;
break;
default:
break;
}
vkResult = DECODER_WAPPER_CALL_NEXT_LAYER(vkCreateImage)(device,
&etc2SrcInfo,
pAllocator,
pImage);
}
else
{
vkResult = DECODER_WAPPER_CALL_NEXT_LAYER(vkCreateImage)(device,
Expand Down
4 changes: 2 additions & 2 deletions icd/api/appopt/gpu_decode_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ class GpuDecoderLayer final : public OptLayer
VkResult Init(Device* pDevice);

virtual void OverrideDispatchTable(DispatchTable* pDispatchTable) override;
VK_INLINE GpuTexDecoder::Device* GetTexDecoder()
GpuTexDecoder::Device* GetTexDecoder()
{
return m_pGpuTexDecoder;
}

VK_INLINE bool isAstcSrgbaFormat(VkFormat format)
bool isAstcSrgbaFormat(VkFormat format)
{
return Formats::IsASTCFormat(format) &&
(static_cast<uint32_t>(format) % 2 == 0);
Expand Down
2 changes: 1 addition & 1 deletion icd/api/appopt/opt_layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class OptLayer

virtual void OverrideDispatchTable(DispatchTable* pDispatchTable) = 0;

VK_INLINE const DispatchTable* GetNextLayer() const
const DispatchTable* GetNextLayer() const
{ return &m_nextLayer; }

protected:
Expand Down
6 changes: 3 additions & 3 deletions icd/api/barrier_policy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ static const LayoutUsageHelper g_LayoutUsageHelper;

// =====================================================================================================================
// Converts ImageLayout to Cache masks, for use with VK_ACCESS_MEMORY_WRITE and VK_ACCESS_MEMORY_READ only.
static VK_INLINE uint32_t ImageLayoutToCacheMask(VkImageLayout imageLayout)
static uint32_t ImageLayoutToCacheMask(VkImageLayout imageLayout)
{
uint32_t cacheMask = 0;

Expand Down Expand Up @@ -291,7 +291,7 @@ static VK_INLINE uint32_t ImageLayoutToCacheMask(VkImageLayout imageLayout)

// =====================================================================================================================
// Converts source access flags to source cache coherency flags.
static VK_INLINE uint32_t SrcAccessToCacheMask(AccessFlags accessMask, VkImageLayout imageLayout)
static uint32_t SrcAccessToCacheMask(AccessFlags accessMask, VkImageLayout imageLayout)
{
uint32_t cacheMask = 0;

Expand Down Expand Up @@ -355,7 +355,7 @@ static VK_INLINE uint32_t SrcAccessToCacheMask(AccessFlags accessMask, VkImageLa

// =====================================================================================================================
// Converts destination access flags to destination cache coherency flags.
static VK_INLINE uint32_t DstAccessToCacheMask(AccessFlags accessMask, VkImageLayout imageLayout)
static uint32_t DstAccessToCacheMask(AccessFlags accessMask, VkImageLayout imageLayout)
{
uint32_t cacheMask = 0;

Expand Down
4 changes: 4 additions & 0 deletions icd/api/compiler_solution_llpc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,10 +301,12 @@ VkResult CompilerSolutionLlpc::CreateGraphicsPipelineBinary(
Vkgc::PipelineShaderInfo** ppShadersInfo,
void* pPipelineDumpHandle,
uint64_t pipelineHash,
Util::MetroHash::Hash* pCacheId,
int64_t* pCompileTime)
{
VK_IGNORE(pDevice);
VK_IGNORE(pipelineHash);
VK_IGNORE(pCacheId);
const RuntimeSettings& settings = m_pPhysicalDevice->GetRuntimeSettings();
auto pInstance = m_pPhysicalDevice->Manager()->VkInstance();

Expand Down Expand Up @@ -424,10 +426,12 @@ VkResult CompilerSolutionLlpc::CreateComputePipelineBinary(
const void** ppPipelineBinary,
void* pPipelineDumpHandle,
uint64_t pipelineHash,
Util::MetroHash::Hash* pCacheId,
int64_t* pCompileTime)
{
VK_IGNORE(pDevice);
VK_IGNORE(pipelineHash);
VK_IGNORE(pCacheId);

const RuntimeSettings& settings = m_pPhysicalDevice->GetRuntimeSettings();
auto pInstance = m_pPhysicalDevice->Manager()->VkInstance();
Expand Down
12 changes: 6 additions & 6 deletions icd/api/devmode/devmode_mgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,20 +179,20 @@ class DevModeMgr
uint64_t value,
Pal::IQueueSemaphore* pQueueSemaphore);

VK_INLINE bool IsQueueTimingActive(const Device* pDevice) const;
VK_INLINE bool GetTraceFrameBeginTag(uint64_t* pTag) const;
VK_INLINE bool GetTraceFrameEndTag(uint64_t* pTag) const;
inline bool IsQueueTimingActive(const Device* pDevice) const;
inline bool GetTraceFrameBeginTag(uint64_t* pTag) const;
inline bool GetTraceFrameEndTag(uint64_t* pTag) const;

Util::Result RegisterPipelineCache(
PipelineBinaryCache* pPipelineCache,
uint32_t postSizeLimit);

void DeregisterPipelineCache(
PipelineBinaryCache* pPipelineCache);
VK_INLINE Util::ListIterator<PipelineBinaryCache*, PalAllocator> GetPipelineCacheListIterator()
Util::ListIterator<PipelineBinaryCache*, PalAllocator> GetPipelineCacheListIterator()
{ return m_pipelineCaches.Begin(); }

VK_INLINE Util::RWLock* GetPipelineReinjectionLock()
Util::RWLock* GetPipelineReinjectionLock()
{ return &m_pipelineReinjectionLock; }

private:
Expand Down Expand Up @@ -363,7 +363,7 @@ class DevModeMgr
#if ICD_GPUOPEN_DEVMODE_BUILD
// =====================================================================================================================
// Returns true if queue operations are currently being timed by RGP traces.
VK_INLINE bool DevModeMgr::IsQueueTimingActive(
inline bool DevModeMgr::IsQueueTimingActive(
const Device* pDevice
) const
{
Expand Down
Loading

0 comments on commit d8bb7de

Please sign in to comment.