From 6176dc37ce47dbe9b171528d5f391a76286c0110 Mon Sep 17 00:00:00 2001 From: Callum Fare <callum@codeplay.com> Date: Fri, 9 Dec 2022 11:55:47 +0000 Subject: [PATCH 01/13] Implement printf Clspv provides a printf definition that stores the value of each argument in a program-wide printf storage buffer. This implementation uses the appropriate ClspvReflection instructions to parse this data and print the resulting strings at runtime. --- src/CMakeLists.txt | 1 + src/config.def | 3 + src/kernel.cpp | 2 +- src/kernel.hpp | 9 ++ src/printf.cpp | 247 +++++++++++++++++++++++++++++++++++++++++++++ src/printf.hpp | 31 ++++++ src/program.cpp | 83 +++++++++++++-- src/program.hpp | 59 ++++++++++- src/queue.cpp | 91 ++++++++++++++++- src/queue.hpp | 23 ++++- 10 files changed, 537 insertions(+), 12 deletions(-) create mode 100644 src/printf.cpp create mode 100644 src/printf.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6c8cc79f..00e5345e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -53,6 +53,7 @@ add_library(OpenCL-objects OBJECT kernel.cpp log.cpp memory.cpp + printf.cpp program.cpp queue.cpp semaphore.cpp diff --git a/src/config.def b/src/config.def index 821dfcd2..a709c0be 100644 --- a/src/config.def +++ b/src/config.def @@ -21,9 +21,12 @@ OPTION(bool, skip_spirv_capability_check, false) OPTION(bool, keep_temporaries, false) OPTION(std::string, spirv_arch, "spir") OPTION(bool, physical_addressing, false) + OPTION(std::string, clspv_native_builtins, "") OPTION(std::string, clspv_library_builtins, "") +OPTION(uint32_t, printf_buffer_size, 1048576u) + #if COMPILER_AVAILABLE OPTION(std::string, clspv_options, "") #if !CLSPV_ONLINE_COMPILER diff --git a/src/kernel.cpp b/src/kernel.cpp index 3cc7bdf5..cf0ba472 100644 --- a/src/kernel.cpp +++ b/src/kernel.cpp @@ -162,7 +162,7 @@ bool cvk_kernel_argument_values::setup_descriptor_sets() { // Setup module-scope variables if (program->module_constant_data_buffer() != nullptr && program->module_constant_data_buffer_info()->type == - constant_data_buffer_type::storage_buffer) { + module_buffer_type::storage_buffer) { auto buffer = program->module_constant_data_buffer(); auto info = program->module_constant_data_buffer_info(); cvk_debug_fn( diff --git a/src/kernel.hpp b/src/kernel.hpp index 02e6d8a8..f56de3b3 100644 --- a/src/kernel.hpp +++ b/src/kernel.hpp @@ -19,6 +19,8 @@ #include <unordered_map> #include <vector> +#include "spirv/unified1/NonSemanticClspvReflection.h" + #include "memory.hpp" #include "objects.hpp" #include "program.hpp" @@ -143,6 +145,13 @@ struct cvk_kernel : public _cl_kernel, api_object<object_magic::kernel> { return m_args.at(arg_index).info.type_qualifier; } + bool uses_printf() const { + return m_program->kernel_flags(m_name) & + NonSemanticClspvReflectionMayUsePrintf; + } + + bool requires_serialized_execution() const { return uses_printf(); } + private: friend cvk_kernel_argument_values; diff --git a/src/printf.cpp b/src/printf.cpp new file mode 100644 index 00000000..6182270d --- /dev/null +++ b/src/printf.cpp @@ -0,0 +1,247 @@ +// Copyright 2022 The clvk authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <sstream> + +#include "printf.hpp" + +// Extract the conversion specifier from a format string +char get_fmt_conversion(std::string_view fmt) { + auto conversionSpecPos = fmt.find_first_of("diouxXfFeEgGaAcsp"); + return fmt.at(conversionSpecPos); +} + +// Read type T from given pointer +template <typename T> T read_buff(const char* data) { + return *(reinterpret_cast<const T*>(data)); +} + +// Read type T from given pointer then increment the pointer +template <typename T> T read_inc_buff(char*& data) { + T out = *(reinterpret_cast<T*>(data)); + data += sizeof(T); + return out; +} + +// Extract the optional vector flag and return a modified format string suitable +// for calling snprintf on individual vector elements +std::string get_vector_fmt(std::string fmt, int& vector_size, int& element_size, + std::string& remaining_fmt) { + // Consume flags (skipping initial '%') + auto pos = fmt.find_first_not_of(" +-#0", 1ul); + // Consume precision and field width + pos = fmt.find_first_not_of("123456789.", pos); + + if (fmt.at(pos) != 'v') { + vector_size = 1; + return std::string{fmt}; + } + + // Trim the data after the conversion specifier and store it in + // `remaining_fmt` + auto pos_conversion = fmt.find_first_of("diouxXfFeEgGaAcsp"); + auto fmt_specifier = fmt.substr(0, pos_conversion + 1); + remaining_fmt = fmt.substr(pos_conversion + 1); + fmt = fmt_specifier; + + size_t vec_length_pos_start = ++pos; + size_t vec_length_pos_end = + fmt.find_first_not_of("23468", vec_length_pos_start); + auto vec_length_str = fmt.substr(vec_length_pos_start, + vec_length_pos_end - vec_length_pos_start); + int vec_length = std::atoi(vec_length_str.c_str()); + + auto fmt_pre_vec_len = fmt.substr(0, vec_length_pos_start - 1); + auto fmt_post_vec_len = fmt.substr(vec_length_pos_end, fmt.size()); + fmt = fmt_pre_vec_len + fmt_post_vec_len; + + // The length modifier is required with vectors + if (fmt_post_vec_len.find("hh") != std::string::npos) { + element_size = 1; + } else if (fmt_post_vec_len.find("hl") != std::string::npos) { + element_size = 4; + } else if (fmt_post_vec_len.find("h") != std::string::npos) { + element_size = 2; + } else if (fmt_post_vec_len.find("l") != std::string::npos) { + element_size = 8; + } + + // If 'hl' length modifier is present, strip it as snprintf doesn't + // understand it + size_t hl = fmt.find("hl"); + if (hl != std::string::npos) { + fmt.erase(hl, 2); + } + + vector_size = vec_length; + return fmt; +} + +// Print the format part containing exactly one arg using snprintf +std::string print_part(const std::string& fmt, const char* data, size_t size) { + // We don't know the exact size of the output string, but given we have a + // single argument, the size of the format string plus 1024 bytes is more + // than likely to fit everything. If it doesn't fit, just keep retrying with + // double the output size. + size_t out_size = fmt.size() + 1024; + std::vector<char> out; + out.reserve(out_size); + out[0] = '\0'; + + auto conversion = std::tolower(get_fmt_conversion(fmt)); + bool finished = false; + while (!finished) { + int written = 0; + switch (conversion) { + case 's': { + written = snprintf(out.data(), out_size, fmt.c_str(), data); + break; + } + case 'f': + case 'e': + case 'g': + case 'a': { + if (size == 4) + written = snprintf(out.data(), out_size, fmt.c_str(), + read_buff<float>(data)); + else + written = snprintf(out.data(), out_size, fmt.c_str(), + read_buff<double>(data)); + break; + } + default: { + if (size == 1) + written = snprintf(out.data(), out_size, fmt.c_str(), + read_buff<uint8_t>(data)); + else if (size == 2) + written = snprintf(out.data(), out_size, fmt.c_str(), + read_buff<uint16_t>(data)); + else if (size == 4) + written = snprintf(out.data(), out_size, fmt.c_str(), + read_buff<uint32_t>(data)); + else + written = snprintf(out.data(), out_size, fmt.c_str(), + read_buff<uint64_t>(data)); + break; + } + } + + // Finish if the string fit in the output buffer or snprintf failed, + // otherwise double the output buffer and try again. If snprintf failed, + // set the output to an empty string. + if (written < 0) { + out[0] = '\0'; + finished = true; + } else if (written < static_cast<long>(out_size)) { + finished = true; + } else { + out_size *= 2; + out.reserve(out_size); + } + } + + return std::string(out.data()); +} + +void process_printf(char*& data, const printf_descriptor_map_t& descs) { + + uint32_t printf_id = read_inc_buff<uint32_t>(data); + auto& format_string = descs.at(printf_id).format_string; + + std::stringstream printf_out{}; + + // Firstly print the part of the format string up to the first '%' + size_t next_part = format_string.find_first_of('%'); + printf_out << format_string.substr(0, next_part); + + // Decompose the remaining format string into individual strings with + // one format specifier each, handle each one individually + size_t arg_idx = 0; + while (next_part < format_string.size() - 1) { + // Get the part of the format string before the next format specifier + size_t part_start = next_part; + size_t part_end = format_string.find_first_of('%', part_start + 1); + auto part_fmt = format_string.substr(part_start, part_end - part_start); + + // Handle special cases + if (part_end == part_start + 1) { + printf_out << "%"; + next_part = part_end + 1; + continue; + } else if (part_end == std::string::npos && + arg_idx >= descs.at(printf_id).arg_sizes.size()) { + // If there are no remaining arguments, the rest of the format + // should be printed verbatim + printf_out << part_fmt; + break; + } + + // The size of the argument that this format part will consume + auto& size = descs.at(printf_id).arg_sizes[arg_idx]; + + // Check to see if we have a vector format specifier + int vec_len = 0; + int el_size = 0; + std::string remaining_str; + part_fmt = get_vector_fmt(part_fmt, vec_len, el_size, remaining_str); + + // Scalar argument + if (vec_len < 2) { + // Special case for %s + if (get_fmt_conversion(part_fmt) == 's') { + uint32_t string_id = read_buff<uint32_t>(data); + printf_out << print_part( + part_fmt, descs.at(string_id).format_string.c_str(), size); + } else { + printf_out << print_part(part_fmt, data, size); + } + data += size; + } else { + // Vector argument + auto* data_start = data; + for (int i = 0; i < vec_len - 1; i++) { + printf_out << print_part(part_fmt, data, size / vec_len) << ","; + data += el_size; + } + printf_out << print_part(part_fmt, data, size / vec_len) + << remaining_str; + data = data_start + size; + } + + // Move to the next format part and prepare to handle the next arg + next_part = part_end; + arg_idx++; + } + + printf("%s", printf_out.str().c_str()); +} + +void cvk_printf(cvk_mem* printf_buffer, + const printf_descriptor_map_t& descriptors) { + if (!printf_buffer->map()) { + cvk_error("Could not map printf buffer"); + return; + } + char* data = static_cast<char*>(printf_buffer->host_va()); + auto buffer_size = printf_buffer->size(); + auto bytes_written = read_inc_buff<uint32_t>(data) * 4; + auto* data_start = data; + + while (static_cast<size_t>(data - data_start) < bytes_written && + static_cast<size_t>(data - data_start) < buffer_size) { + process_printf(data, descriptors); + } + + printf_buffer->unmap(); +} diff --git a/src/printf.hpp b/src/printf.hpp new file mode 100644 index 00000000..2bab00c3 --- /dev/null +++ b/src/printf.hpp @@ -0,0 +1,31 @@ +// Copyright 2022 The clvk authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "memory.hpp" + +#include <vector> + +struct printf_descriptor { + uint32_t printf_id; + std::string format_string; + std::vector<uint32_t> arg_sizes; +}; + +using printf_descriptor_map_t = std::unordered_map<uint32_t, printf_descriptor>; + +// Process the contents of the printf buffer and print the results to stdout +void cvk_printf(cvk_mem* printf_buffer, + const printf_descriptor_map_t& descriptors); diff --git a/src/program.cpp b/src/program.cpp index 953a2b51..7303de83 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -149,6 +149,8 @@ spv_result_t parse_reflection(void* user_data, return pushconstant::image_metadata; case NonSemanticClspvReflectionConstantDataPointerPushConstant: return pushconstant::module_constants_pointer; + case NonSemanticClspvReflectionPrintfBufferPointerPushConstant: + return pushconstant::printf_buffer_pointer; default: cvk_error_fn("Unhandled reflection instruction for push constant"); break; @@ -182,11 +184,12 @@ spv_result_t parse_reflection(void* user_data, const auto& name = parse_data->strings[inst->words[6]]; const auto& num_args = parse_data->constants[inst->words[7]]; + const auto& flags = parse_data->constants[inst->words[8]]; const auto& attributes = parse_data->strings[inst->words[9]]; parse_data->strings[inst->result_id] = name; - parse_data->binary->add_kernel(name, num_args, attributes); - + parse_data->binary->add_kernel(name, num_args, attributes, + flags); break; } case NonSemanticClspvReflectionArgumentInfo: { @@ -446,15 +449,14 @@ spv_result_t parse_reflection(void* user_data, hex2bin(data.c_str(), binfo.data.data()); if (ext_inst == NonSemanticClspvReflectionConstantDataStorageBuffer) { - binfo.type = constant_data_buffer_type::storage_buffer; + binfo.type = module_buffer_type::storage_buffer; binfo.set = parse_data->constants[inst->words[5]]; if (binfo.set >= spir_binary::MAX_DESCRIPTOR_SETS) return SPV_ERROR_INVALID_DATA; binfo.binding = parse_data->constants[inst->words[6]]; } else { - binfo.type = - constant_data_buffer_type::pointer_push_constant; + binfo.type = module_buffer_type::pointer_push_constant; binfo.pc_offset = parse_data->constants[inst->words[5]]; parse_data->binary->add_push_constant( pushconstant::module_constants_pointer, @@ -463,6 +465,38 @@ spv_result_t parse_reflection(void* user_data, parse_data->binary->set_constant_data_buffer(binfo); break; } + case NonSemanticClspvReflectionPrintfInfo: { + uint32_t printf_id = parse_data->constants[inst->words[5]]; + std::string printf_string = parse_data->strings[inst->words[6]]; + std::vector<uint32_t> printf_arg_sizes; + for (int i = 6; i < inst->num_operands; i++) { + printf_arg_sizes.push_back( + parse_data + ->constants[inst->words[inst->operands[i].offset]]); + } + parse_data->binary->add_printf_descriptor( + {printf_id, printf_string, printf_arg_sizes}); + break; + } + case NonSemanticClspvReflectionPrintfBufferStorageBuffer: { + printf_buffer_desc_info binfo; + binfo.type = module_buffer_type::storage_buffer; + binfo.set = parse_data->constants[inst->words[5]]; + binfo.binding = parse_data->constants[inst->words[6]]; + binfo.size = parse_data->constants[inst->words[7]]; + parse_data->binary->set_printf_buffer_info(binfo); + break; + } + case NonSemanticClspvReflectionPrintfBufferPointerPushConstant: { + printf_buffer_desc_info binfo; + binfo.type = module_buffer_type::pointer_push_constant; + binfo.pc_offset = parse_data->constants[inst->words[5]]; + binfo.size = parse_data->constants[inst->words[7]]; + parse_data->binary->set_printf_buffer_info(binfo); + parse_data->binary->add_push_constant( + pushconstant::printf_buffer_pointer, {binfo.pc_offset, 8u}); + break; + } default: return SPV_ERROR_INVALID_DATA; } @@ -912,6 +946,11 @@ std::string cvk_program::prepare_build_options(const cvk_device* device) const { } } + options += " -enable-printf "; + options += + " -printf-buffer-size=" + std::to_string(config.printf_buffer_size) + + " "; + #if COMPILER_AVAILABLE options += " " + config.clspv_options() + " "; #endif @@ -1768,7 +1807,7 @@ bool cvk_entry_point:: auto info = m_program->module_constant_data_buffer_info(); // If the program scope buffer isn't passed as a storage buffer (i.e. // it is passed a pointer push constant), there is nothing to bind here - if (info->type != constant_data_buffer_type::storage_buffer) { + if (info->type != module_buffer_type::storage_buffer) { return true; } VkDescriptorSetLayoutBinding binding = { @@ -1789,6 +1828,34 @@ bool cvk_entry_point:: return true; } +bool cvk_entry_point::build_descriptor_sets_layout_bindings_for_printf_buffer( + binding_stat_map& smap) { + std::vector<VkDescriptorSetLayoutBinding> layoutBindings; + if (m_program->printf_buffer_info().size > 0 && + (m_program->kernel_flags(m_name) & + NonSemanticClspvReflectionMayUsePrintf)) { + auto info = m_program->printf_buffer_info(); + if (info.type != module_buffer_type::storage_buffer) { + return true; + } + VkDescriptorSetLayoutBinding binding = { + info.binding, // binding + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptorType + 1, // decriptorCount + VK_SHADER_STAGE_COMPUTE_BIT, // stageFlags + nullptr // pImmutableSamplers + }; + layoutBindings.push_back(binding); + smap[binding.descriptorType]++; + } + + if (!build_descriptor_set_layout(layoutBindings)) { + return false; + } + + return true; +} + cl_int cvk_entry_point::init() { VkResult res; @@ -1825,6 +1892,10 @@ cl_int cvk_entry_point::init() { bindingTypes)) { return CL_INVALID_VALUE; } + if (!build_descriptor_sets_layout_bindings_for_printf_buffer( + bindingTypes)) { + return CL_INVALID_VALUE; + } // Do we have POD arguments? for (auto& arg : m_args) { diff --git a/src/program.hpp b/src/program.hpp index 421b172c..2142040e 100644 --- a/src/program.hpp +++ b/src/program.hpp @@ -32,6 +32,7 @@ #include "init.hpp" #include "memory.hpp" #include "objects.hpp" +#include "printf.hpp" const int SPIR_WORD_SIZE = 4; @@ -124,6 +125,7 @@ enum class pushconstant region_group_offset, image_metadata, module_constants_pointer, + printf_buffer_pointer, }; struct pushconstant_desc { @@ -143,7 +145,7 @@ enum class spec_constant subgroup_max_size, }; -enum class constant_data_buffer_type +enum class module_buffer_type { storage_buffer, pointer_push_constant, @@ -180,13 +182,21 @@ struct user_spec_constant_data { }; struct constant_data_buffer_info { - constant_data_buffer_type type; + module_buffer_type type; uint32_t set; uint32_t binding; uint32_t pc_offset; std::vector<char> data; }; +struct printf_buffer_desc_info { + module_buffer_type type; + uint32_t set; + uint32_t binding; + uint32_t pc_offset; + uint32_t size = 0; +}; + struct spirv_validation_options { bool uniform_buffer_std_layout = false; }; @@ -212,6 +222,7 @@ class spir_binary { std::unordered_map<std::string, std::vector<kernel_argument>>; using kernels_reqd_work_group_size_map = std::unordered_map<std::string, std::array<uint32_t, 3>>; + using kernels_flags_map = std::unordered_map<std::string, uint32_t>; public: spir_binary(spv_target_env env) @@ -268,8 +279,13 @@ class spir_binary { } } + const printf_descriptor_map_t& printf_descriptors() const { + return m_printf_descriptors; + } + void add_kernel(const std::string& name, uint32_t num_args, - const std::string& attributes) { + const std::string& attributes, uint32_t flags) { + m_flags[name] = flags; auto& args = m_dmaps[name]; kernel_argument unused = { {}, 0, 0, 0, 0, 0, kernel_argument_kind::unused, 0, 0}; @@ -332,6 +348,24 @@ class spir_binary { m_image_metadata[name][ordinal].set_data_type(offset); } + void add_printf_descriptor(printf_descriptor&& desc) { + m_printf_descriptors[desc.printf_id] = desc; + } + + void set_printf_buffer_info(const printf_buffer_desc_info& info) { + m_printf_buffer_info = info; + } + + const printf_buffer_desc_info& printf_buffer_info() const { + return m_printf_buffer_info; + } + + const printf_descriptor_map_t& get_printf_descriptors() const { + return m_printf_descriptors; + } + + const kernels_flags_map& kernels_flags() const { return m_flags; } + private: spv_context m_context; std::vector<uint32_t> m_code; @@ -339,10 +373,13 @@ class spir_binary { std::unordered_map<pushconstant, pushconstant_desc> m_push_constants; std::unordered_map<spec_constant, uint32_t> m_spec_constants; image_metadata_map m_image_metadata; + std::unordered_map<uint32_t, printf_descriptor> m_printf_descriptors; + printf_buffer_desc_info m_printf_buffer_info; std::unique_ptr<constant_data_buffer_info> m_constant_data_buffer; kernels_arguments_map m_dmaps; kernels_reqd_work_group_size_map m_reqd_work_group_sizes; std::unordered_map<std::string, std::string> m_kernels_attributes; + kernels_flags_map m_flags; bool m_loaded_from_binary; spv_target_env m_target_env; }; @@ -453,6 +490,8 @@ class cvk_entry_point { binding_stat_map& smap); bool build_descriptor_sets_layout_bindings_for_program_scope_buffers( binding_stat_map& smap); + bool build_descriptor_sets_layout_bindings_for_printf_buffer( + binding_stat_map& smap); // Structures for caching pipelines based on specialization constants struct SpecConstantMapHash { @@ -604,6 +643,11 @@ struct cvk_program : public _cl_program, api_object<object_magic::program> { unsigned num_kernels() const { return m_binary.num_kernels(); } bool loaded_from_binary() const { return m_binary.loaded_from_binary(); } + bool uses_printf() { return !m_binary.printf_descriptors().empty(); } + const std::unordered_map<uint32_t, printf_descriptor>& + printf_descriptors() { + return m_binary.get_printf_descriptors(); + } const std::vector<kernel_argument>* args_for_kernel(std::string& name) { auto const& args = m_binary.kernels_arguments().find(name); @@ -701,6 +745,10 @@ struct cvk_program : public _cl_program, api_object<object_magic::program> { return m_binary.constant_data_buffer(); } + const printf_buffer_desc_info& printf_buffer_info() const { + return m_binary.printf_buffer_info(); + } + bool options_allow_split_region(std::string options) { if (options.find("-uniform-workgroup-size") != std::string::npos) return false; @@ -721,6 +769,10 @@ struct cvk_program : public _cl_program, api_object<object_magic::program> { return m_binary.kernels_attributes().at(kernel_name); } + uint32_t kernel_flags(const std::string& kernel) const { + return m_binary.kernels_flags().at(kernel); + } + private: void do_build(); std::string prepare_build_options(const cvk_device* device) const; @@ -770,6 +822,7 @@ struct cvk_program : public _cl_program, api_object<object_magic::program> { VkPipelineCache m_pipeline_cache; std::unique_ptr<cvk_buffer> m_module_constant_data_buffer; std::unordered_map<uint32_t, user_spec_constant_data> m_user_spec_constants; + std::unique_ptr<cvk_buffer> m_printf_buffer; }; static inline cvk_program* icd_downcast(cl_program program) { diff --git a/src/queue.cpp b/src/queue.cpp index c2d4b9e2..98a0adb6 100644 --- a/src/queue.cpp +++ b/src/queue.cpp @@ -593,6 +593,25 @@ void cvk_command_kernel::update_global_push_constants( &dev_addr); } + if (auto pc = program->push_constant(pushconstant::printf_buffer_pointer)) { + CVK_ASSERT(pc->size == 8); + CVK_ASSERT(program->uses_printf()); + + auto buffer = m_queue->printf_buffer(); + VkBufferDeviceAddressInfo info{}; + info.buffer = buffer->vulkan_buffer(); + info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO; + info.pNext = NULL; + + auto dev_addr = vkGetBufferDeviceAddress( + m_kernel->context()->device()->vulkan_device(), &info); + dev_addr += buffer->vulkan_buffer_offset(); + + vkCmdPushConstants(command_buffer, m_kernel->pipeline_layout(), + VK_SHADER_STAGE_COMPUTE_BIT, pc->offset, pc->size, + &dev_addr); + } + uint32_t image_metadata_pc_start = UINT32_MAX; uint32_t image_metadata_pc_end = 0; if (const auto* md = m_kernel->get_image_metadata()) { @@ -757,6 +776,32 @@ cl_int cvk_command_kernel::dispatch_uniform_region_within_vklimits( vkCmdDispatch(command_buffer, num_workgroups[0], num_workgroups[1], num_workgroups[2]); + // If we have a kernel that requires serial execution (i.e. regions are not + // executed in parallel with other regions or other kernels) then serialize + // the command buffer + if (m_kernel->requires_serialized_execution()) { + VkMemoryBarrier memoryBarrier = {VK_STRUCTURE_TYPE_MEMORY_BARRIER, + nullptr, VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_MEMORY_READ_BIT | + VK_ACCESS_MEMORY_WRITE_BIT}; + + // Workaround for a bug on some NVIDIA devices. + // This should already be covered by VK_ACCESS_MEMORY_READ_BIT. + memoryBarrier.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT; + + vkCmdPipelineBarrier( + command_buffer, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, // srcStageMask + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, // dstStageMask + 0, // dependencyFlags + 1, // memoryBarrierCount + &memoryBarrier, + 0, // bufferMemoryBarrierCount + nullptr, // pBufferMemoryBarriers + 0, // imageMemoryBarrierCount + nullptr); // pImageMemoryBarriers + } + return CL_SUCCESS; } @@ -915,6 +960,41 @@ cvk_command_kernel::build_batchable_inner(cvk_command_buffer& command_buffer) { return CL_OUT_OF_RESOURCES; } + // Setup printf buffer descriptor if needed + if (m_kernel->program()->uses_printf()) { + // Create and initialize the printf buffer + auto buffer = m_queue->printf_buffer(); + if (buffer->map()) { + memset(buffer->host_va(), 0, 4); + buffer->unmap(); + } + + if (m_kernel->program()->printf_buffer_info().type == + module_buffer_type::storage_buffer) { + + VkDescriptorBufferInfo bufferInfo = {buffer->vulkan_buffer(), + 0, // offset + VK_WHOLE_SIZE}; + + auto* ds = m_argument_values->descriptor_sets(); + VkWriteDescriptorSet writeDescriptorSet = { + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + nullptr, + ds[m_kernel->program()->printf_buffer_info().set], + m_kernel->program()->printf_buffer_info().binding, + 0, // dstArrayElement + 1, // descriptorCount + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptorType + nullptr, // pImageInfo + &bufferInfo, + nullptr, // pTexelBufferView + }; + + vkUpdateDescriptorSets(m_queue->device()->vulkan_device(), 1u, + &writeDescriptorSet, 0, nullptr); + } + } + // Bind descriptors and update push constants if (m_kernel->num_set_layouts() > 0) { vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, @@ -958,6 +1038,15 @@ cvk_command_kernel::build_batchable_inner(cvk_command_buffer& command_buffer) { return CL_SUCCESS; } +cl_int cvk_command_kernel::do_post_action() { + if (m_kernel->uses_printf()) { + cvk_printf(m_queue->printf_buffer(), + m_kernel->program()->printf_descriptors()); + } + + return CL_SUCCESS; +} + bool cvk_command_batchable::can_be_batched() const { bool unresolved_user_event_dependencies = false; bool unresolved_other_queue_dependencies = false; @@ -1078,7 +1167,7 @@ cl_int cvk_command_batchable::do_action() { return CL_OUT_OF_RESOURCES; } - return CL_COMPLETE; + return do_post_action(); } cl_int cvk_command_batch::do_action() { diff --git a/src/queue.hpp b/src/queue.hpp index d28c868d..bd24f420 100644 --- a/src/queue.hpp +++ b/src/queue.hpp @@ -22,6 +22,7 @@ #include "init.hpp" #include "kernel.hpp" #include "objects.hpp" +#include "printf.hpp" #include "tracing.hpp" struct cvk_command; @@ -171,6 +172,16 @@ struct cvk_command_queue : public _cl_command_queue, return m_command_pool.free_command_buffer(cmdbuf); } + cvk_buffer* printf_buffer() { + if (!m_printf_buffer) { + cl_int status; + m_printf_buffer = cvk_buffer::create( + context(), 0, config.printf_buffer_size, nullptr, &status); + CVK_ASSERT(status == CL_SUCCESS); + } + return m_printf_buffer.get(); + } + void command_pool_lock() { m_command_pool.lock(); } void command_pool_unlock() { m_command_pool.unlock(); } @@ -238,6 +249,8 @@ struct cvk_command_queue : public _cl_command_queue, TRACE_CNT_VAR(batch_in_flight_counter); TRACE_CNT_VAR(group_in_flight_counter); + + std::unique_ptr<cvk_buffer> m_printf_buffer; }; static inline cvk_command_queue* icd_downcast(cl_command_queue queue) { @@ -627,7 +640,7 @@ struct cvk_command_batchable : public cvk_command { } } - bool can_be_batched() const override final; + bool can_be_batched() const override; bool is_built_before_enqueue() const override final { return false; } CHECK_RETURN cl_int get_timestamp_query_results(cl_ulong* start, @@ -638,6 +651,7 @@ struct cvk_command_batchable : public cvk_command { CHECK_RETURN virtual cl_int build_batchable_inner(cvk_command_buffer& cmdbuf) = 0; CHECK_RETURN cl_int do_action() override; + CHECK_RETURN virtual cl_int do_post_action() { return CL_SUCCESS; } CHECK_RETURN cl_int set_profiling_info_end(cl_ulong sync_dev, cl_ulong sync_host) { @@ -733,6 +747,13 @@ struct cvk_command_kernel final : public cvk_command_batchable { CHECK_RETURN cl_int build_batchable_inner(cvk_command_buffer& cmdbuf) override final; + CHECK_RETURN cl_int do_post_action() override final; + + bool can_be_batched() const override final { + return !m_kernel->uses_printf() && + cvk_command_batchable::can_be_batched(); + } + const std::vector<cvk_mem*> memory_objects() const override { std::vector<cvk_mem*> ret; std::shared_ptr<cvk_kernel_argument_values> argvals = m_argument_values; From 0860a0d7f57c80f9ffe93875d9112bc35a9f7b70 Mon Sep 17 00:00:00 2001 From: Romaric Jodin <rjodin@chromium.org> Date: Thu, 25 May 2023 15:44:00 +0200 Subject: [PATCH 02/13] fix to work with stdc++ in google3 --- src/printf.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/printf.cpp b/src/printf.cpp index 6182270d..8d386c47 100644 --- a/src/printf.cpp +++ b/src/printf.cpp @@ -95,8 +95,7 @@ std::string print_part(const std::string& fmt, const char* data, size_t size) { // than likely to fit everything. If it doesn't fit, just keep retrying with // double the output size. size_t out_size = fmt.size() + 1024; - std::vector<char> out; - out.reserve(out_size); + std::vector<char> out(out_size); out[0] = '\0'; auto conversion = std::tolower(get_fmt_conversion(fmt)); From 8f131b2e7e55ec9f5c33141710194c67e71a2e85 Mon Sep 17 00:00:00 2001 From: Romaric Jodin <rjodin@chromium.org> Date: Sat, 24 Jun 2023 09:44:11 +0200 Subject: [PATCH 03/13] fix support for cl_half --- src/printf.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/printf.cpp b/src/printf.cpp index 8d386c47..c9ea14a0 100644 --- a/src/printf.cpp +++ b/src/printf.cpp @@ -111,7 +111,10 @@ std::string print_part(const std::string& fmt, const char* data, size_t size) { case 'e': case 'g': case 'a': { - if (size == 4) + if (size == 2) + written = snprintf(out.data(), out_size, fmt.c_str(), + cl_half_to_float(read_buff<cl_half>(data))); + else if (size == 4) written = snprintf(out.data(), out_size, fmt.c_str(), read_buff<float>(data)); else From 14af35a53c1e5021da34882b9104de80781f36bc Mon Sep 17 00:00:00 2001 From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com> Date: Mon, 10 Jul 2023 08:06:25 +0200 Subject: [PATCH 04/13] Update src/config.def MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kévin Petit <kpet@free.fr> --- src/config.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.def b/src/config.def index a709c0be..2e503f21 100644 --- a/src/config.def +++ b/src/config.def @@ -25,7 +25,7 @@ OPTION(bool, physical_addressing, false) OPTION(std::string, clspv_native_builtins, "") OPTION(std::string, clspv_library_builtins, "") -OPTION(uint32_t, printf_buffer_size, 1048576u) +OPTION(uint32_t, printf_buffer_size, 1024*1024u) #if COMPILER_AVAILABLE OPTION(std::string, clspv_options, "") From a38cf190b0c433ace0c81c5a57d188963bb82b57 Mon Sep 17 00:00:00 2001 From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com> Date: Mon, 10 Jul 2023 08:06:31 +0200 Subject: [PATCH 05/13] Update src/printf.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kévin Petit <kpet@free.fr> --- src/printf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/printf.cpp b/src/printf.cpp index c9ea14a0..76cee1ae 100644 --- a/src/printf.cpp +++ b/src/printf.cpp @@ -149,7 +149,7 @@ std::string print_part(const std::string& fmt, const char* data, size_t size) { finished = true; } else { out_size *= 2; - out.reserve(out_size); + out.resize(out_size); } } From 1e4e8d5f2a93246213c1c744478394a6440b140c Mon Sep 17 00:00:00 2001 From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com> Date: Mon, 10 Jul 2023 09:08:57 +0200 Subject: [PATCH 06/13] Update src/program.hpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kévin Petit <kpet@free.fr> --- src/program.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/program.hpp b/src/program.hpp index 2142040e..891bac8c 100644 --- a/src/program.hpp +++ b/src/program.hpp @@ -822,7 +822,6 @@ struct cvk_program : public _cl_program, api_object<object_magic::program> { VkPipelineCache m_pipeline_cache; std::unique_ptr<cvk_buffer> m_module_constant_data_buffer; std::unordered_map<uint32_t, user_spec_constant_data> m_user_spec_constants; - std::unique_ptr<cvk_buffer> m_printf_buffer; }; static inline cvk_program* icd_downcast(cl_program program) { From efae40e6c36e51ea63068f32290fc6577ad43091 Mon Sep 17 00:00:00 2001 From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com> Date: Mon, 10 Jul 2023 09:09:41 +0200 Subject: [PATCH 07/13] Update src/queue.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Kévin Petit <kpet@free.fr> --- src/queue.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/queue.cpp b/src/queue.cpp index 98a0adb6..7bcbe626 100644 --- a/src/queue.cpp +++ b/src/queue.cpp @@ -598,14 +598,7 @@ void cvk_command_kernel::update_global_push_constants( CVK_ASSERT(program->uses_printf()); auto buffer = m_queue->printf_buffer(); - VkBufferDeviceAddressInfo info{}; - info.buffer = buffer->vulkan_buffer(); - info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO; - info.pNext = NULL; - - auto dev_addr = vkGetBufferDeviceAddress( - m_kernel->context()->device()->vulkan_device(), &info); - dev_addr += buffer->vulkan_buffer_offset(); + auto dev_addr = buffer->device_address(); vkCmdPushConstants(command_buffer, m_kernel->pipeline_layout(), VK_SHADER_STAGE_COMPUTE_BIT, pc->offset, pc->size, From 30d2ddbbcbb3e0753b6164863cf771d40246b76c Mon Sep 17 00:00:00 2001 From: Romaric Jodin <rjodin@chromium.org> Date: Mon, 10 Jul 2023 13:56:22 +0200 Subject: [PATCH 08/13] fixes following kpet feedbacks --- src/kernel.hpp | 5 +---- src/printf.cpp | 11 ++++++++--- src/printf.hpp | 4 ++-- src/program.cpp | 8 ++++++-- src/program.hpp | 2 ++ src/queue.cpp | 32 ++++++++++++++++++++++---------- src/queue.hpp | 21 +++++++++++++++++++-- 7 files changed, 60 insertions(+), 23 deletions(-) diff --git a/src/kernel.hpp b/src/kernel.hpp index f56de3b3..9591910c 100644 --- a/src/kernel.hpp +++ b/src/kernel.hpp @@ -145,10 +145,7 @@ struct cvk_kernel : public _cl_kernel, api_object<object_magic::kernel> { return m_args.at(arg_index).info.type_qualifier; } - bool uses_printf() const { - return m_program->kernel_flags(m_name) & - NonSemanticClspvReflectionMayUsePrintf; - } + bool uses_printf() const { return m_entry_point->uses_printf(); } bool requires_serialized_execution() const { return uses_printf(); } diff --git a/src/printf.cpp b/src/printf.cpp index 76cee1ae..2a7bf799 100644 --- a/src/printf.cpp +++ b/src/printf.cpp @@ -229,21 +229,26 @@ void process_printf(char*& data, const printf_descriptor_map_t& descs) { printf("%s", printf_out.str().c_str()); } -void cvk_printf(cvk_mem* printf_buffer, +cl_int cvk_printf(cvk_mem* printf_buffer, const printf_descriptor_map_t& descriptors) { + CVK_ASSERT(printf_buffer); if (!printf_buffer->map()) { cvk_error("Could not map printf buffer"); - return; + return CL_OUT_OF_RESOURCES; } char* data = static_cast<char*>(printf_buffer->host_va()); auto buffer_size = printf_buffer->size(); + const auto bytes_written_size = sizeof(uint32_t); + const auto data_size = buffer_size - bytes_written_size; auto bytes_written = read_inc_buff<uint32_t>(data) * 4; auto* data_start = data; while (static_cast<size_t>(data - data_start) < bytes_written && - static_cast<size_t>(data - data_start) < buffer_size) { + static_cast<size_t>(data - data_start) < data_size) { process_printf(data, descriptors); } printf_buffer->unmap(); + + return CL_SUCCESS; } diff --git a/src/printf.hpp b/src/printf.hpp index 2bab00c3..9941ab18 100644 --- a/src/printf.hpp +++ b/src/printf.hpp @@ -27,5 +27,5 @@ struct printf_descriptor { using printf_descriptor_map_t = std::unordered_map<uint32_t, printf_descriptor>; // Process the contents of the printf buffer and print the results to stdout -void cvk_printf(cvk_mem* printf_buffer, - const printf_descriptor_map_t& descriptors); +cl_int cvk_printf(cvk_mem* printf_buffer, + const printf_descriptor_map_t& descriptors); diff --git a/src/program.cpp b/src/program.cpp index 7303de83..5a30d7e9 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -1832,8 +1832,7 @@ bool cvk_entry_point::build_descriptor_sets_layout_bindings_for_printf_buffer( binding_stat_map& smap) { std::vector<VkDescriptorSetLayoutBinding> layoutBindings; if (m_program->printf_buffer_info().size > 0 && - (m_program->kernel_flags(m_name) & - NonSemanticClspvReflectionMayUsePrintf)) { + (uses_printf())) { auto info = m_program->printf_buffer_info(); if (info.type != module_buffer_type::storage_buffer) { return true; @@ -2154,3 +2153,8 @@ std::unique_ptr<cvk_buffer> cvk_entry_point::allocate_pod_buffer() { return buffer; } + +bool cvk_entry_point::uses_printf() const { + return m_program->kernel_flags(m_name) & + NonSemanticClspvReflectionMayUsePrintf; +} diff --git a/src/program.hpp b/src/program.hpp index 891bac8c..53277c61 100644 --- a/src/program.hpp +++ b/src/program.hpp @@ -460,6 +460,8 @@ class cvk_entry_point { cvk_program* program() const { return m_program; } + bool uses_printf() const; + private: const uint32_t MAX_INSTANCES = 2 * 1024; // FIXME find a better definition diff --git a/src/queue.cpp b/src/queue.cpp index 7bcbe626..2f505b2c 100644 --- a/src/queue.cpp +++ b/src/queue.cpp @@ -540,7 +540,7 @@ bool cvk_command_buffer::submit_and_wait() { return true; } -void cvk_command_kernel::update_global_push_constants( +cl_int cvk_command_kernel::update_global_push_constants( cvk_command_buffer& command_buffer) { auto program = m_kernel->program(); @@ -597,7 +597,11 @@ void cvk_command_kernel::update_global_push_constants( CVK_ASSERT(pc->size == 8); CVK_ASSERT(program->uses_printf()); - auto buffer = m_queue->printf_buffer(); + auto buffer = m_queue->get_printf_buffer(); + if (buffer == nullptr) { + cvk_error_fn("printf buffer was not created"); + return CL_OUT_OF_RESOURCES; + } auto dev_addr = buffer->device_address(); vkCmdPushConstants(command_buffer, m_kernel->pipeline_layout(), @@ -653,6 +657,7 @@ void cvk_command_kernel::update_global_push_constants( } } } + return CL_SUCCESS; } cl_int cvk_command_kernel::dispatch_uniform_region_within_vklimits( @@ -956,10 +961,10 @@ cvk_command_kernel::build_batchable_inner(cvk_command_buffer& command_buffer) { // Setup printf buffer descriptor if needed if (m_kernel->program()->uses_printf()) { // Create and initialize the printf buffer - auto buffer = m_queue->printf_buffer(); - if (buffer->map()) { - memset(buffer->host_va(), 0, 4); - buffer->unmap(); + auto buffer = m_queue->get_or_create_printf_buffer(); + auto err = m_queue->reset_printf_buffer(); + if (err != CL_SUCCESS) { + return err; } if (m_kernel->program()->printf_buffer_info().type == @@ -996,10 +1001,13 @@ cvk_command_kernel::build_batchable_inner(cvk_command_buffer& command_buffer) { m_argument_values->descriptor_sets(), 0, 0); } - update_global_push_constants(command_buffer); + auto err = update_global_push_constants(command_buffer); + if (err != CL_SUCCESS) { + return err; + } // Dispatch work - auto err = build_and_dispatch_regions(command_buffer); + err = build_and_dispatch_regions(command_buffer); if (err != CL_SUCCESS) { return err; } @@ -1033,8 +1041,12 @@ cvk_command_kernel::build_batchable_inner(cvk_command_buffer& command_buffer) { cl_int cvk_command_kernel::do_post_action() { if (m_kernel->uses_printf()) { - cvk_printf(m_queue->printf_buffer(), - m_kernel->program()->printf_descriptors()); + auto buffer = m_queue->get_printf_buffer(); + if (buffer == nullptr) { + cvk_error_fn("printf buffer was not created"); + return CL_OUT_OF_RESOURCES; + } + return cvk_printf(buffer, m_kernel->program()->printf_descriptors()); } return CL_SUCCESS; diff --git a/src/queue.hpp b/src/queue.hpp index bd24f420..f4bbfc7d 100644 --- a/src/queue.hpp +++ b/src/queue.hpp @@ -172,7 +172,7 @@ struct cvk_command_queue : public _cl_command_queue, return m_command_pool.free_command_buffer(cmdbuf); } - cvk_buffer* printf_buffer() { + cvk_buffer* get_or_create_printf_buffer() { if (!m_printf_buffer) { cl_int status; m_printf_buffer = cvk_buffer::create( @@ -182,6 +182,23 @@ struct cvk_command_queue : public _cl_command_queue, return m_printf_buffer.get(); } + cvk_buffer* get_printf_buffer() { + if (!m_printf_buffer) { + return nullptr; + } + return m_printf_buffer.get(); + } + + cl_int reset_printf_buffer() { + if (m_printf_buffer && m_printf_buffer->map()) { + memset(m_printf_buffer->host_va(), 0, 4); + m_printf_buffer->unmap(); + return CL_SUCCESS; + } + cvk_error_fn("Could not reset printf buffer"); + return CL_OUT_OF_RESOURCES; + } + void command_pool_lock() { m_command_pool.lock(); } void command_pool_unlock() { m_command_pool.unlock(); } @@ -766,7 +783,7 @@ struct cvk_command_kernel final : public cvk_command_batchable { private: CHECK_RETURN cl_int build_and_dispatch_regions(cvk_command_buffer& command_buffer); - void update_global_push_constants(cvk_command_buffer& command_buffer); + CHECK_RETURN cl_int update_global_push_constants(cvk_command_buffer& command_buffer); CHECK_RETURN cl_int dispatch_uniform_region_within_vklimits( const cvk_ndrange& region, cvk_command_buffer& command_buffer); CHECK_RETURN cl_int dispatch_uniform_region_iterate( From d33db68352e1e192fef7c0593f94a8f7c7b22b79 Mon Sep 17 00:00:00 2001 From: Romaric Jodin <rjodin@chromium.org> Date: Mon, 10 Jul 2023 13:56:52 +0200 Subject: [PATCH 09/13] add tests --- src/exports.map | 2 + src/program.cpp | 6 +- src/unit.cpp | 7 ++ src/unit.hpp | 4 ++ src/utils.cpp | 13 ++-- src/utils.hpp | 4 +- tests/api/CMakeLists.txt | 1 + tests/api/printf.cpp | 141 +++++++++++++++++++++++++++++++++++++++ 8 files changed, 170 insertions(+), 8 deletions(-) create mode 100644 tests/api/printf.cpp diff --git a/src/exports.map b/src/exports.map index c2aac27f..9bb5cdfd 100644 --- a/src/exports.map +++ b/src/exports.map @@ -2,6 +2,8 @@ CLVK_UNIT_TESTING_FCT { global: clvk_override_device_max_compute_work_group_count; clvk_restore_device_properties; + clvk_override_printf_buffer_size; + cvk_mkdtemp; local: *; }; diff --git a/src/program.cpp b/src/program.cpp index 5a30d7e9..f025b6bf 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -966,7 +966,8 @@ cl_int cvk_program::parse_user_spec_constants() { std::filesystem::path tmp_prefix(config.compiler_temp_dir()); std::filesystem::path tmp_suffix("clvk-XXXXXX"); std::string tmp_template = (tmp_prefix / tmp_suffix).string(); - const char* tmp = cvk_mkdtemp(tmp_template); + const char* tmp = + cvk_mkdtemp((char*)tmp_template.c_str, tmp_template.size()); if (tmp == nullptr) { cvk_error_fn("Could not create temporary folder \"%s\"", tmp_template.c_str()); @@ -1361,7 +1362,8 @@ cl_build_status cvk_program::do_build_inner(const cvk_device* device) { std::filesystem::path tmp_prefix(config.compiler_temp_dir()); std::filesystem::path tmp_suffix("clvk-XXXXXX"); std::string tmp_template = (tmp_prefix / tmp_suffix).string(); - const char* tmp = cvk_mkdtemp(tmp_template); + const char* tmp = + cvk_mkdtemp((char*)tmp_template.c_str(), tmp_template.size()); if (tmp == nullptr) { cvk_error_fn("Could not create temporary folder \"%s\"", tmp_template.c_str()); diff --git a/src/unit.cpp b/src/unit.cpp index 4c275b39..d5a6fb31 100644 --- a/src/unit.cpp +++ b/src/unit.cpp @@ -37,6 +37,13 @@ void CL_API_CALL clvk_restore_device_properties(cl_device_id device) { icd_downcast(device)->restore_device_properties(); } + +void CL_API_CALL clvk_override_printf_buffer_size(uint32_t size) { + auto printf_buffer_size = + (config_value<uint32_t>*)&config.printf_buffer_size; + printf_buffer_size->value = size; + printf_buffer_size->set = true; +} } #endif diff --git a/src/unit.hpp b/src/unit.hpp index f6ff6259..4ee381ea 100644 --- a/src/unit.hpp +++ b/src/unit.hpp @@ -24,6 +24,10 @@ void CL_API_CALL clvk_override_device_max_compute_work_group_count( cl_device_id device, uint32_t x, uint32_t y, uint32_t z); void CL_API_CALL clvk_restore_device_properties(cl_device_id device); + +void CL_API_CALL clvk_override_printf_buffer_size(uint32_t size); + +char* CL_API_CALL clvk_mkdtemp(char* tmpl, size_t size); } #endif diff --git a/src/utils.cpp b/src/utils.cpp index bc99b472..5afc2978 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -26,21 +26,24 @@ #include <io.h> #endif -char* cvk_mkdtemp(std::string& tmpl) { +extern "C" { +char* CL_API_CALL cvk_mkdtemp(char* tmpl, size_t size) { #ifdef WIN32 - if (_mktemp_s(&tmpl.front(), tmpl.size() + 1) != 0) { + if (_mktemp_s(tmpl, size + 1) != 0) { return nullptr; } - if (!CreateDirectory(tmpl.c_str(), nullptr)) { + if (!CreateDirectory(tmpl, nullptr)) { return nullptr; } - return &tmpl.front(); + return tmpl; #else - return mkdtemp(&tmpl.front()); + UNUSED(size); + return mkdtemp(tmpl); #endif } +} int cvk_exec(const std::string& cmd, std::string* output) { #ifdef WIN32 diff --git a/src/utils.hpp b/src/utils.hpp index 52d73b6f..750d88fe 100644 --- a/src/utils.hpp +++ b/src/utils.hpp @@ -26,7 +26,9 @@ #define CHECK_RETURN #endif -char* cvk_mkdtemp(std::string& tmpl); +extern "C" { +char* CL_API_CALL cvk_mkdtemp(char* tmpl, size_t size); +} int cvk_exec(const std::string& cmd, std::string* output = nullptr); #define CVK_VK_CHECK_INTERNAL(logfn, res, msg) \ diff --git a/tests/api/CMakeLists.txt b/tests/api/CMakeLists.txt index d99e50cf..abc84e39 100644 --- a/tests/api/CMakeLists.txt +++ b/tests/api/CMakeLists.txt @@ -20,6 +20,7 @@ add_gtest_executable(api_tests local_buffer.cpp main.cpp platform.cpp + printf.cpp profiling.cpp simple.cpp simple_image.cpp diff --git a/tests/api/printf.cpp b/tests/api/printf.cpp new file mode 100644 index 00000000..f29879b8 --- /dev/null +++ b/tests/api/printf.cpp @@ -0,0 +1,141 @@ +// Copyright 2023 The clvk authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef CLVK_UNIT_TESTING_ENABLED + +#include "testcl.hpp" +#include "utils.hpp" +#include "unit.hpp" + +#include <filesystem> + +static std::string stdoutFileName; + +#define BUFFER_SIZE 1024 +static char stdoutBuffer[BUFFER_SIZE]; + +static void releaseStdout(int fd) { + fflush(stdout); + dup2(fd, fileno(stdout)); + close(fd); +} + +static bool getStdout(int& fd) { + fd = dup(fileno(stdout)); + if (!freopen(stdoutFileName.c_str(), "w", stdout)) { + fprintf(stderr, "ERROR!\n"); + releaseStdout(fd); + return false; + } + return true; +} + +static char* getStdoutContent() { + FILE* f; + memset(stdoutBuffer, 0, BUFFER_SIZE); + fflush(stdout); + f = fopen(stdoutFileName.c_str(), "r"); + if (f == nullptr) + return nullptr; + + char* ptr = stdoutBuffer; + do { + ptr += strlen(ptr); + ptr = fgets(ptr, BUFFER_SIZE, f); + } while (ptr != nullptr); + fclose(f); + + return stdoutBuffer; +} + +struct temp_folder_deletion { + ~temp_folder_deletion() { + if (!m_path.empty()) + std::filesystem::remove_all(m_path.c_str()); + } + void set_path(std::string path) { m_path = path; } + +private: + std::string m_path; +}; + +static std::string getStdoutFileName(temp_folder_deletion& temp) { + char template_tmp_dir[] = "clvk-XXXXXX"; + std::filesystem::path prefix( + cvk_mkdtemp(template_tmp_dir, sizeof(template_tmp_dir))); + std::filesystem::path suffix("stdout_buffer"); + temp.set_path(prefix.string()); + return (prefix / suffix).string(); +} + +TEST_F(WithCommandQueue, SimplePrintf) { + temp_folder_deletion temp; + stdoutFileName = getStdoutFileName(temp); + + int fd; + ASSERT_TRUE(getStdout(fd)); + + const char message[] = "Hello World!"; + char* source = nullptr; + asprintf(&source, "kernel void test_printf() { printf(\"%s\");}", message); + ASSERT_NE(source, nullptr); + auto kernel = CreateKernel(source, "test_printf"); + free(source); + + size_t gws = 1; + size_t lws = 1; + EnqueueNDRangeKernel(kernel, 1, nullptr, &gws, &lws, 0, nullptr, nullptr); + Finish(); + + releaseStdout(fd); + auto printf_buffer = getStdoutContent(); + ASSERT_NE(printf_buffer, nullptr); + + ASSERT_STREQ(printf_buffer, message); +} + +TEST_F(WithCommandQueue, TooLongPrintf) { + clvk_override_printf_buffer_size(24); + + temp_folder_deletion temp; + stdoutFileName = getStdoutFileName(temp); + + int fd; + ASSERT_TRUE(getStdout(fd)); + + const char* source = R"( + kernel void test_printf() { + for (unsigned i = 0; i < 3; i++){ + printf("get_global_id(%u) = %u\n", i, get_global_id(i)); + } + } + )"; + auto kernel = CreateKernel(source, "test_printf"); + + size_t gws = 1; + size_t lws = 1; + EnqueueNDRangeKernel(kernel, 1, nullptr, &gws, &lws, 0, nullptr, nullptr); + Finish(); + + releaseStdout(fd); + auto printf_buffer = getStdoutContent(); + ASSERT_NE(printf_buffer, nullptr); + + // We only get the first 2 prints because the buffer is too small to get the + // last one. + const char* message = "get_global_id(0) = 0\nget_global_id(1) = 0\n"; + ASSERT_STREQ(printf_buffer, message); +} + +#endif From 3c5e03d5dc8808faa722a2a8e223e9dc5fccd003 Mon Sep 17 00:00:00 2001 From: Romaric Jodin <rjodin@chromium.org> Date: Mon, 10 Jul 2023 14:29:03 +0200 Subject: [PATCH 10/13] fix formatting --- src/printf.cpp | 2 +- src/program.cpp | 3 +-- src/queue.hpp | 3 ++- tests/api/printf.cpp | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/printf.cpp b/src/printf.cpp index 2a7bf799..460012ac 100644 --- a/src/printf.cpp +++ b/src/printf.cpp @@ -230,7 +230,7 @@ void process_printf(char*& data, const printf_descriptor_map_t& descs) { } cl_int cvk_printf(cvk_mem* printf_buffer, - const printf_descriptor_map_t& descriptors) { + const printf_descriptor_map_t& descriptors) { CVK_ASSERT(printf_buffer); if (!printf_buffer->map()) { cvk_error("Could not map printf buffer"); diff --git a/src/program.cpp b/src/program.cpp index f025b6bf..d806ff89 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -1833,8 +1833,7 @@ bool cvk_entry_point:: bool cvk_entry_point::build_descriptor_sets_layout_bindings_for_printf_buffer( binding_stat_map& smap) { std::vector<VkDescriptorSetLayoutBinding> layoutBindings; - if (m_program->printf_buffer_info().size > 0 && - (uses_printf())) { + if (m_program->printf_buffer_info().size > 0 && (uses_printf())) { auto info = m_program->printf_buffer_info(); if (info.type != module_buffer_type::storage_buffer) { return true; diff --git a/src/queue.hpp b/src/queue.hpp index f4bbfc7d..4acd0e5e 100644 --- a/src/queue.hpp +++ b/src/queue.hpp @@ -783,7 +783,8 @@ struct cvk_command_kernel final : public cvk_command_batchable { private: CHECK_RETURN cl_int build_and_dispatch_regions(cvk_command_buffer& command_buffer); - CHECK_RETURN cl_int update_global_push_constants(cvk_command_buffer& command_buffer); + CHECK_RETURN cl_int + update_global_push_constants(cvk_command_buffer& command_buffer); CHECK_RETURN cl_int dispatch_uniform_region_within_vklimits( const cvk_ndrange& region, cvk_command_buffer& command_buffer); CHECK_RETURN cl_int dispatch_uniform_region_iterate( diff --git a/tests/api/printf.cpp b/tests/api/printf.cpp index f29879b8..b4ecf22b 100644 --- a/tests/api/printf.cpp +++ b/tests/api/printf.cpp @@ -15,8 +15,8 @@ #ifdef CLVK_UNIT_TESTING_ENABLED #include "testcl.hpp" -#include "utils.hpp" #include "unit.hpp" +#include "utils.hpp" #include <filesystem> From 09632057acadb7f9add3435b52fc286ddceb6b1c Mon Sep 17 00:00:00 2001 From: Romaric Jodin <rjodin@chromium.org> Date: Mon, 10 Jul 2023 14:48:49 +0200 Subject: [PATCH 11/13] fix parse_user_spec_constants --- src/program.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/program.cpp b/src/program.cpp index d806ff89..e341e528 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -967,7 +967,7 @@ cl_int cvk_program::parse_user_spec_constants() { std::filesystem::path tmp_suffix("clvk-XXXXXX"); std::string tmp_template = (tmp_prefix / tmp_suffix).string(); const char* tmp = - cvk_mkdtemp((char*)tmp_template.c_str, tmp_template.size()); + cvk_mkdtemp((char*)tmp_template.c_str(), tmp_template.size()); if (tmp == nullptr) { cvk_error_fn("Could not create temporary folder \"%s\"", tmp_template.c_str()); From d62ad0fb764efeb45bfd7033713c815a1c6d07ca Mon Sep 17 00:00:00 2001 From: Romaric Jodin <rjodin@chromium.org> Date: Mon, 10 Jul 2023 15:06:26 +0200 Subject: [PATCH 12/13] remove use of asprintf --- tests/api/printf.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/api/printf.cpp b/tests/api/printf.cpp index b4ecf22b..d445c2d4 100644 --- a/tests/api/printf.cpp +++ b/tests/api/printf.cpp @@ -87,11 +87,9 @@ TEST_F(WithCommandQueue, SimplePrintf) { ASSERT_TRUE(getStdout(fd)); const char message[] = "Hello World!"; - char* source = nullptr; - asprintf(&source, "kernel void test_printf() { printf(\"%s\");}", message); - ASSERT_NE(source, nullptr); + char source[512]; + sprintf(source, "kernel void test_printf() { printf(\"%s\");}", message); auto kernel = CreateKernel(source, "test_printf"); - free(source); size_t gws = 1; size_t lws = 1; From 6fafdf1fc67956e9b6d53e2d47c04a7b8a6c2bc1 Mon Sep 17 00:00:00 2001 From: Romaric Jodin <rjodin@chromium.org> Date: Tue, 11 Jul 2023 08:04:55 +0200 Subject: [PATCH 13/13] copy cvk_mkdtemp in test to avoid having to export the symbol --- src/exports.map | 1 - src/program.cpp | 6 ++---- src/unit.hpp | 2 -- src/utils.cpp | 13 +++++-------- src/utils.hpp | 4 +--- tests/api/printf.cpp | 27 ++++++++++++++++++++++++++- 6 files changed, 34 insertions(+), 19 deletions(-) diff --git a/src/exports.map b/src/exports.map index 9bb5cdfd..fc5c2f47 100644 --- a/src/exports.map +++ b/src/exports.map @@ -3,7 +3,6 @@ global: clvk_override_device_max_compute_work_group_count; clvk_restore_device_properties; clvk_override_printf_buffer_size; - cvk_mkdtemp; local: *; }; diff --git a/src/program.cpp b/src/program.cpp index e341e528..268075d2 100644 --- a/src/program.cpp +++ b/src/program.cpp @@ -966,8 +966,7 @@ cl_int cvk_program::parse_user_spec_constants() { std::filesystem::path tmp_prefix(config.compiler_temp_dir()); std::filesystem::path tmp_suffix("clvk-XXXXXX"); std::string tmp_template = (tmp_prefix / tmp_suffix).string(); - const char* tmp = - cvk_mkdtemp((char*)tmp_template.c_str(), tmp_template.size()); + const char* tmp = cvk_mkdtemp(tmp_template); if (tmp == nullptr) { cvk_error_fn("Could not create temporary folder \"%s\"", tmp_template.c_str()); @@ -1362,8 +1361,7 @@ cl_build_status cvk_program::do_build_inner(const cvk_device* device) { std::filesystem::path tmp_prefix(config.compiler_temp_dir()); std::filesystem::path tmp_suffix("clvk-XXXXXX"); std::string tmp_template = (tmp_prefix / tmp_suffix).string(); - const char* tmp = - cvk_mkdtemp((char*)tmp_template.c_str(), tmp_template.size()); + const char* tmp = cvk_mkdtemp(tmp_template); if (tmp == nullptr) { cvk_error_fn("Could not create temporary folder \"%s\"", tmp_template.c_str()); diff --git a/src/unit.hpp b/src/unit.hpp index 4ee381ea..b5e68685 100644 --- a/src/unit.hpp +++ b/src/unit.hpp @@ -26,8 +26,6 @@ void CL_API_CALL clvk_override_device_max_compute_work_group_count( void CL_API_CALL clvk_restore_device_properties(cl_device_id device); void CL_API_CALL clvk_override_printf_buffer_size(uint32_t size); - -char* CL_API_CALL clvk_mkdtemp(char* tmpl, size_t size); } #endif diff --git a/src/utils.cpp b/src/utils.cpp index 5afc2978..bc99b472 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -26,24 +26,21 @@ #include <io.h> #endif -extern "C" { -char* CL_API_CALL cvk_mkdtemp(char* tmpl, size_t size) { +char* cvk_mkdtemp(std::string& tmpl) { #ifdef WIN32 - if (_mktemp_s(tmpl, size + 1) != 0) { + if (_mktemp_s(&tmpl.front(), tmpl.size() + 1) != 0) { return nullptr; } - if (!CreateDirectory(tmpl, nullptr)) { + if (!CreateDirectory(tmpl.c_str(), nullptr)) { return nullptr; } - return tmpl; + return &tmpl.front(); #else - UNUSED(size); - return mkdtemp(tmpl); + return mkdtemp(&tmpl.front()); #endif } -} int cvk_exec(const std::string& cmd, std::string* output) { #ifdef WIN32 diff --git a/src/utils.hpp b/src/utils.hpp index 750d88fe..52d73b6f 100644 --- a/src/utils.hpp +++ b/src/utils.hpp @@ -26,9 +26,7 @@ #define CHECK_RETURN #endif -extern "C" { -char* CL_API_CALL cvk_mkdtemp(char* tmpl, size_t size); -} +char* cvk_mkdtemp(std::string& tmpl); int cvk_exec(const std::string& cmd, std::string* output = nullptr); #define CVK_VK_CHECK_INTERNAL(logfn, res, msg) \ diff --git a/tests/api/printf.cpp b/tests/api/printf.cpp index d445c2d4..db055178 100644 --- a/tests/api/printf.cpp +++ b/tests/api/printf.cpp @@ -20,6 +20,15 @@ #include <filesystem> +#ifdef __APPLE__ +#include <unistd.h> +#endif + +#ifdef WIN32 +#include <Windows.h> +#include <io.h> +#endif + static std::string stdoutFileName; #define BUFFER_SIZE 1024 @@ -70,10 +79,26 @@ struct temp_folder_deletion { std::string m_path; }; +static char* mkdtemp(char* tmpl, size_t size) { +#ifdef WIN32 + if (_mktemp_s(tmpl, size + 1) != 0) { + return nullptr; + } + + if (!CreateDirectory(tmpl, nullptr)) { + return nullptr; + } + + return tmpl; +#else + return mkdtemp(tmpl); +#endif +} + static std::string getStdoutFileName(temp_folder_deletion& temp) { char template_tmp_dir[] = "clvk-XXXXXX"; std::filesystem::path prefix( - cvk_mkdtemp(template_tmp_dir, sizeof(template_tmp_dir))); + mkdtemp(template_tmp_dir, sizeof(template_tmp_dir))); std::filesystem::path suffix("stdout_buffer"); temp.set_path(prefix.string()); return (prefix / suffix).string();