Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add fixup for image1dbuffer with bgra format on Intel #629

Merged
merged 1 commit into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ add_library(OpenCL-objects OBJECT
device.cpp
device_properties.cpp
event.cpp
image_format.cpp
init.cpp
kernel.cpp
log.cpp
Expand Down
299 changes: 64 additions & 235 deletions src/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "cl_headers.hpp"
#include "icd.hpp"
#include "image_format.hpp"
#include "init.hpp"
#include "kernel.hpp"
#include "log.hpp"
Expand Down Expand Up @@ -4590,195 +4591,47 @@ cl_int CLVK_API_CALL clGetImageInfo(cl_mem image, cl_image_info param_name,
return ret;
}

struct ClFormatMapHash {
size_t operator()(const cl_image_format& format) const {
return format.image_channel_order << 16 |
format.image_channel_data_type;
}
};

struct ClFormatMapEqual {
bool operator()(const cl_image_format& lhs,
const cl_image_format& rhs) const {
return lhs.image_channel_order == rhs.image_channel_order &&
lhs.image_channel_data_type == rhs.image_channel_data_type;
}
};

struct image_format_support {
static constexpr cl_mem_flags RO = CL_MEM_READ_ONLY;
static constexpr cl_mem_flags WO = CL_MEM_WRITE_ONLY;
static constexpr cl_mem_flags RW = CL_MEM_KERNEL_READ_AND_WRITE;
static constexpr cl_mem_flags ROWO = RO | WO | CL_MEM_READ_WRITE;
static constexpr cl_mem_flags ALL = ROWO | RW;

image_format_support(cl_mem_flags flags, VkFormat fmt)
: flags(flags), vkfmt(fmt) {}
image_format_support(VkFormat fmt) : flags(ALL), vkfmt(fmt) {}

cl_mem_flags flags;
VkFormat vkfmt;
};

std::unordered_map<cl_image_format, image_format_support, ClFormatMapHash,
ClFormatMapEqual>
gFormatMaps = {
// R formats
{{CL_R, CL_UNORM_INT8}, VK_FORMAT_R8_UNORM},
{{CL_R, CL_SNORM_INT8}, VK_FORMAT_R8_SNORM},
{{CL_R, CL_UNSIGNED_INT8}, VK_FORMAT_R8_UINT},
{{CL_R, CL_SIGNED_INT8}, VK_FORMAT_R8_SINT},
{{CL_R, CL_UNORM_INT16}, VK_FORMAT_R16_UNORM},
{{CL_R, CL_SNORM_INT16}, VK_FORMAT_R16_SNORM},
{{CL_R, CL_UNSIGNED_INT16}, VK_FORMAT_R16_UINT},
{{CL_R, CL_SIGNED_INT16}, VK_FORMAT_R16_SINT},
{{CL_R, CL_HALF_FLOAT}, VK_FORMAT_R16_SFLOAT},
{{CL_R, CL_UNSIGNED_INT32}, VK_FORMAT_R32_UINT},
{{CL_R, CL_SIGNED_INT32}, VK_FORMAT_R32_SINT},
{{CL_R, CL_FLOAT}, VK_FORMAT_R32_SFLOAT},

// LUMINANCE formats
{{CL_LUMINANCE, CL_UNORM_INT8},
{image_format_support::ROWO, VK_FORMAT_R8_UNORM}},
{{CL_LUMINANCE, CL_SNORM_INT8},
{image_format_support::ROWO, VK_FORMAT_R8_SNORM}},
{{CL_LUMINANCE, CL_UNSIGNED_INT8},
{image_format_support::ROWO, VK_FORMAT_R8_UINT}},
{{CL_LUMINANCE, CL_SIGNED_INT8},
{image_format_support::ROWO, VK_FORMAT_R8_SINT}},
{{CL_LUMINANCE, CL_UNORM_INT16},
{image_format_support::ROWO, VK_FORMAT_R16_UNORM}},
{{CL_LUMINANCE, CL_SNORM_INT16},
{image_format_support::ROWO, VK_FORMAT_R16_SNORM}},
{{CL_LUMINANCE, CL_UNSIGNED_INT16},
{image_format_support::ROWO, VK_FORMAT_R16_UINT}},
{{CL_LUMINANCE, CL_SIGNED_INT16},
{image_format_support::ROWO, VK_FORMAT_R16_SINT}},
{{CL_LUMINANCE, CL_HALF_FLOAT},
{image_format_support::ROWO, VK_FORMAT_R16_SFLOAT}},
{{CL_LUMINANCE, CL_UNSIGNED_INT32},
{image_format_support::ROWO, VK_FORMAT_R32_UINT}},
{{CL_LUMINANCE, CL_SIGNED_INT32},
{image_format_support::ROWO, VK_FORMAT_R32_SINT}},
{{CL_LUMINANCE, CL_FLOAT},
{image_format_support::ROWO, VK_FORMAT_R32_SFLOAT}},

// INTENSITY formats
{{CL_INTENSITY, CL_UNORM_INT8},
{image_format_support::ROWO, VK_FORMAT_R8_UNORM}},
{{CL_INTENSITY, CL_SNORM_INT8},
{image_format_support::ROWO, VK_FORMAT_R8_SNORM}},
{{CL_INTENSITY, CL_UNSIGNED_INT8},
{image_format_support::ROWO, VK_FORMAT_R8_UINT}},
{{CL_INTENSITY, CL_SIGNED_INT8},
{image_format_support::ROWO, VK_FORMAT_R8_SINT}},
{{CL_INTENSITY, CL_UNORM_INT16},
{image_format_support::ROWO, VK_FORMAT_R16_UNORM}},
{{CL_INTENSITY, CL_SNORM_INT16},
{image_format_support::ROWO, VK_FORMAT_R16_SNORM}},
{{CL_INTENSITY, CL_UNSIGNED_INT16},
{image_format_support::ROWO, VK_FORMAT_R16_UINT}},
{{CL_INTENSITY, CL_SIGNED_INT16},
{image_format_support::ROWO, VK_FORMAT_R16_SINT}},
{{CL_INTENSITY, CL_HALF_FLOAT},
{image_format_support::ROWO, VK_FORMAT_R16_SFLOAT}},
{{CL_INTENSITY, CL_UNSIGNED_INT32},
{image_format_support::ROWO, VK_FORMAT_R32_UINT}},
{{CL_INTENSITY, CL_SIGNED_INT32},
{image_format_support::ROWO, VK_FORMAT_R32_SINT}},
{{CL_INTENSITY, CL_FLOAT},
{image_format_support::ROWO, VK_FORMAT_R32_SFLOAT}},

// RG formats
{{CL_RG, CL_UNORM_INT8}, VK_FORMAT_R8G8_UNORM},
{{CL_RG, CL_SNORM_INT8}, VK_FORMAT_R8G8_SNORM},
{{CL_RG, CL_UNSIGNED_INT8}, VK_FORMAT_R8G8_UINT},
{{CL_RG, CL_SIGNED_INT8}, VK_FORMAT_R8G8_SINT},
{{CL_RG, CL_UNORM_INT16}, VK_FORMAT_R16G16_UNORM},
{{CL_RG, CL_SNORM_INT16}, VK_FORMAT_R16G16_SNORM},
{{CL_RG, CL_UNSIGNED_INT16}, VK_FORMAT_R16G16_UINT},
{{CL_RG, CL_SIGNED_INT16}, VK_FORMAT_R16G16_SINT},
{{CL_RG, CL_HALF_FLOAT}, VK_FORMAT_R16G16_SFLOAT},
{{CL_RG, CL_UNSIGNED_INT32}, VK_FORMAT_R32G32_UINT},
{{CL_RG, CL_SIGNED_INT32}, VK_FORMAT_R32G32_SINT},
{{CL_RG, CL_FLOAT}, VK_FORMAT_R32G32_SFLOAT},

// RGB formats
{{CL_RGB, CL_UNORM_INT8}, VK_FORMAT_R8G8B8_UNORM},
{{CL_RGB, CL_SNORM_INT8}, VK_FORMAT_R8G8B8_SNORM},
{{CL_RGB, CL_UNSIGNED_INT8}, VK_FORMAT_R8G8B8_UINT},
{{CL_RGB, CL_SIGNED_INT8}, VK_FORMAT_R8G8B8_SINT},
{{CL_RGB, CL_UNORM_INT16}, VK_FORMAT_R16G16B16_UNORM},
{{CL_RGB, CL_SNORM_INT16}, VK_FORMAT_R16G16B16_SNORM},
{{CL_RGB, CL_UNSIGNED_INT16}, VK_FORMAT_R16G16B16_UINT},
{{CL_RGB, CL_SIGNED_INT16}, VK_FORMAT_R16G16B16_SINT},
{{CL_RGB, CL_HALF_FLOAT}, VK_FORMAT_R16G16B16_SFLOAT},
{{CL_RGB, CL_UNSIGNED_INT32}, VK_FORMAT_R32G32B32_UINT},
{{CL_RGB, CL_SIGNED_INT32}, VK_FORMAT_R32G32B32_SINT},
{{CL_RGB, CL_FLOAT}, VK_FORMAT_R32G32B32_SFLOAT},
{{CL_RGB, CL_UNORM_SHORT_565}, VK_FORMAT_R5G6B5_UNORM_PACK16},

// RGBA formats
{{CL_RGBA, CL_UNORM_INT8}, VK_FORMAT_R8G8B8A8_UNORM},
{{CL_RGBA, CL_SNORM_INT8}, VK_FORMAT_R8G8B8A8_SNORM},
{{CL_RGBA, CL_UNSIGNED_INT8}, VK_FORMAT_R8G8B8A8_UINT},
{{CL_RGBA, CL_SIGNED_INT8}, VK_FORMAT_R8G8B8A8_SINT},
{{CL_RGBA, CL_UNORM_INT16}, VK_FORMAT_R16G16B16A16_UNORM},
{{CL_RGBA, CL_SNORM_INT16}, VK_FORMAT_R16G16B16A16_SNORM},
{{CL_RGBA, CL_UNSIGNED_INT16}, VK_FORMAT_R16G16B16A16_UINT},
{{CL_RGBA, CL_SIGNED_INT16}, VK_FORMAT_R16G16B16A16_SINT},
{{CL_RGBA, CL_HALF_FLOAT}, VK_FORMAT_R16G16B16A16_SFLOAT},
{{CL_RGBA, CL_UNSIGNED_INT32}, VK_FORMAT_R32G32B32A32_UINT},
{{CL_RGBA, CL_SIGNED_INT32}, VK_FORMAT_R32G32B32A32_SINT},
{{CL_RGBA, CL_FLOAT}, VK_FORMAT_R32G32B32A32_SFLOAT},

// BGRA formats
{{CL_BGRA, CL_UNORM_INT8}, VK_FORMAT_B8G8R8A8_UNORM},
{{CL_BGRA, CL_SNORM_INT8}, VK_FORMAT_B8G8R8A8_SNORM},
{{CL_BGRA, CL_UNSIGNED_INT8}, VK_FORMAT_B8G8R8A8_UINT},
{{CL_BGRA, CL_SIGNED_INT8}, VK_FORMAT_B8G8R8A8_SINT},
};

static void get_component_mappings_for_channel_order(
cl_channel_order order, VkComponentMapping* components_sampled,
VkComponentMapping* components_storage) {
if (order == CL_LUMINANCE) {
*components_sampled = {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R,
VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_A};
} else if (order == CL_INTENSITY) {
*components_sampled = {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R,
VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R};
} else {
*components_sampled = {
VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
}

*components_storage = {
VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY};
bool operator!=(const VkComponentMapping& lhs, const VkComponentMapping& rhs) {
return lhs.r != rhs.r || lhs.g != rhs.g || lhs.b != rhs.b || lhs.a != rhs.a;
}

bool cl_image_format_to_vulkan_format(const cl_image_format& clformat,
VkFormat* format,
VkComponentMapping* components_sampled,
VkComponentMapping* components_storage) {
auto m = gFormatMaps.find(clformat);
bool success = false;

if (m != gFormatMaps.end()) {
*format = (*m).second.vkfmt;
success = true;
static bool is_image_format_supported(
VkPhysicalDevice& pdev, VkFormat format, cl_mem_object_type image_type,
const VkFormatFeatureFlags& required_format_feature_flags,
cl_channel_order image_channel_order) {
VkFormatProperties properties;
vkGetPhysicalDeviceFormatProperties(pdev, format, &properties);

cvk_debug("Vulkan format %d:", format);
cvk_debug(
" linear : %s",
vulkan_format_features_string(properties.linearTilingFeatures).c_str());
cvk_debug(" optimal: %s",
vulkan_format_features_string(properties.optimalTilingFeatures)
.c_str());
cvk_debug(" buffer : %s",
vulkan_format_features_string(properties.bufferFeatures).c_str());

cvk_debug(
"Required format features %s",
vulkan_format_features_string(required_format_feature_flags).c_str());
VkFormatFeatureFlags features;
if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
features = properties.bufferFeatures;
} else {
// TODO support linear tiling
features = properties.optimalTilingFeatures;
}
if ((features & required_format_feature_flags) ==
required_format_feature_flags) {
if ((image_channel_order == CL_LUMINANCE ||
image_channel_order == CL_INTENSITY) &&
(image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
return false;
}
return true;
}

get_component_mappings_for_channel_order(
clformat.image_channel_order, components_sampled, components_storage);

return success;
}

bool operator!=(const VkComponentMapping& lhs, const VkComponentMapping& rhs) {
return lhs.r != rhs.r || lhs.g != rhs.g || lhs.b != rhs.b || lhs.a != rhs.a;
return false;
}

cl_int CLVK_API_CALL clGetSupportedImageFormats(cl_context context,
Expand Down Expand Up @@ -4809,7 +4662,8 @@ cl_int CLVK_API_CALL clGetSupportedImageFormats(cl_context context,

cl_uint num_formats_found = 0;

auto pdev = icd_downcast(context)->device()->vulkan_physical_device();
auto dev = icd_downcast(context)->device();
auto pdev = dev->vulkan_physical_device();

const VkFormatFeatureFlags required_format_feature_flags =
cvk_image::required_format_feature_flags_for(image_type, flags);
Expand All @@ -4820,59 +4674,34 @@ cl_int CLVK_API_CALL clGetSupportedImageFormats(cl_context context,

// Iterate over all known CL/VK format associations and report the CL
// formats for which the Vulkan format is supported
for (auto const& clvkfmt : gFormatMaps) {
const cl_image_format& clfmt = clvkfmt.first;
auto const& fmt_support = clvkfmt.second;
for (auto mapping : get_format_maps()) {
VkComponentMapping components_sampled, components_storage;
image_format_support fmt_support;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mapping.second

cl_image_format clfmt = mapping.first;
if (!cl_image_format_to_vulkan_format(clfmt, image_type, dev,
&fmt_support, &components_sampled,
&components_storage)) {
continue;
}
Comment on lines +4681 to +4685
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need the conversion now that you're iterating over the map again.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I remove this conversion, this PR makes no sense. The whole point of it is this conversion which allows to check for is_bgra_format_not_supported_for_image1d_buffer. That's why I did not want to iterate over the maps initially. But I agree having a getter is better. But I would prefer not to have access to the VkFormat there as people need to understand that the only mapping to consider is the one from the conversion function, not the map.

Copy link
Contributor Author

@rjodinchr rjodinchr Nov 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of the getter, we could have a function returning the number of elements in the map, and a function to get the cl_image_format (only) for a specified element:
const unsigned get_format_maps_size();
const cl_image_format get_format_from_format_maps_at(unsigned i);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Forget about that, FormatMaps is a map... not possible to do it like that.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I'd lost track of the big picture :D. I'm sure we'll find good ways of refactoring this as more support for images makes its way into the code.

if ((fmt_support.flags & flags) != flags) {
continue;
}
VkFormat format = fmt_support.vkfmt;
VkFormatProperties properties;
vkGetPhysicalDeviceFormatProperties(pdev, format, &properties);

cvk_debug("Vulkan format %d:", format);
cvk_debug(" linear : %s",
vulkan_format_features_string(properties.linearTilingFeatures)
.c_str());
cvk_debug(" optimal: %s", vulkan_format_features_string(
properties.optimalTilingFeatures)
.c_str());
cvk_debug(
" buffer : %s",
vulkan_format_features_string(properties.bufferFeatures).c_str());

cvk_debug("Required format features %s",
vulkan_format_features_string(required_format_feature_flags)
.c_str());
VkFormatFeatureFlags features;
if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
features = properties.bufferFeatures;
} else {
// TODO support linear tiling
features = properties.optimalTilingFeatures;
if (!is_image_format_supported(pdev, fmt_support.vkfmt, image_type,
required_format_feature_flags,
clfmt.image_channel_order)) {
continue;
}
if ((features & required_format_feature_flags) ==
required_format_feature_flags) {
VkComponentMapping components_sampled, components_storage;
get_component_mappings_for_channel_order(clfmt.image_channel_order,
&components_sampled,
&components_storage);
if ((components_sampled != components_storage) &&
(image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
continue;
}
if ((image_formats != nullptr) &&
(num_formats_found < num_entries)) {
image_formats[num_formats_found] = clfmt;
cvk_debug_fn(
"reporting image format {%s, %s}",
cl_channel_order_to_string(clfmt.image_channel_order)
.c_str(),
cl_channel_type_to_string(clfmt.image_channel_data_type)
.c_str());
}
num_formats_found++;

// image format is supported
if ((image_formats != nullptr) && (num_formats_found < num_entries)) {
image_formats[num_formats_found] = clfmt;
cvk_debug_fn(
"reporting image format {%s, %s}",
cl_channel_order_to_string(clfmt.image_channel_order).c_str(),
cl_channel_type_to_string(clfmt.image_channel_data_type)
.c_str());
}
num_formats_found++;
}

cvk_debug_fn("reporting %u formats", num_formats_found);
Expand Down
5 changes: 5 additions & 0 deletions src/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,11 @@ struct cvk_device : public _cl_device_id,

const cvk_vulkan_extension_functions& vkfns() const { return m_vkfns; }

bool is_bgra_format_not_supported_for_image1d_buffer() const {
return m_clvk_properties
->is_bgra_format_not_supported_for_image1d_buffer();
}

private:
std::string version_desc() const {
std::string ret = "CLVK on Vulkan v";
Expand Down
7 changes: 6 additions & 1 deletion src/device_properties.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,12 @@ struct cvk_device_properties_intel : public cvk_device_properties {
});
}
std::string get_compile_options() const override final {
return "-hack-mul-extended -hack-convert-to-float";
return "-hack-mul-extended -hack-convert-to-float "
"-hack-image1d-buffer-bgra";
}
bool
is_bgra_format_not_supported_for_image1d_buffer() const override final {
return true;
}
};

Expand Down
4 changes: 4 additions & 0 deletions src/device_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ struct cvk_device_properties {

virtual bool is_non_uniform_decoration_broken() const { return false; }

virtual bool is_bgra_format_not_supported_for_image1d_buffer() const {
return false;
}

virtual ~cvk_device_properties() {}
};

Expand Down
Loading