From 6176dc37ce47dbe9b171528d5f391a76286c0110 Mon Sep 17 00:00:00 2001
From: Callum Fare <callum@codeplay.com>
Date: Fri, 9 Dec 2022 11:55:47 +0000
Subject: [PATCH 01/13] Implement printf

Clspv provides a printf definition that stores the value of each argument in a
program-wide printf storage buffer. This implementation uses the appropriate
ClspvReflection instructions to parse this data and print the resulting
strings at runtime.
---
 src/CMakeLists.txt |   1 +
 src/config.def     |   3 +
 src/kernel.cpp     |   2 +-
 src/kernel.hpp     |   9 ++
 src/printf.cpp     | 247 +++++++++++++++++++++++++++++++++++++++++++++
 src/printf.hpp     |  31 ++++++
 src/program.cpp    |  83 +++++++++++++--
 src/program.hpp    |  59 ++++++++++-
 src/queue.cpp      |  91 ++++++++++++++++-
 src/queue.hpp      |  23 ++++-
 10 files changed, 537 insertions(+), 12 deletions(-)
 create mode 100644 src/printf.cpp
 create mode 100644 src/printf.hpp

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 6c8cc79f..00e5345e 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -53,6 +53,7 @@ add_library(OpenCL-objects OBJECT
   kernel.cpp
   log.cpp
   memory.cpp
+  printf.cpp
   program.cpp
   queue.cpp
   semaphore.cpp
diff --git a/src/config.def b/src/config.def
index 821dfcd2..a709c0be 100644
--- a/src/config.def
+++ b/src/config.def
@@ -21,9 +21,12 @@ OPTION(bool, skip_spirv_capability_check, false)
 OPTION(bool, keep_temporaries, false)
 OPTION(std::string, spirv_arch, "spir")
 OPTION(bool, physical_addressing, false)
+
 OPTION(std::string, clspv_native_builtins, "")
 OPTION(std::string, clspv_library_builtins, "")
 
+OPTION(uint32_t, printf_buffer_size, 1048576u)
+
 #if COMPILER_AVAILABLE
 OPTION(std::string, clspv_options, "")
 #if !CLSPV_ONLINE_COMPILER
diff --git a/src/kernel.cpp b/src/kernel.cpp
index 3cc7bdf5..cf0ba472 100644
--- a/src/kernel.cpp
+++ b/src/kernel.cpp
@@ -162,7 +162,7 @@ bool cvk_kernel_argument_values::setup_descriptor_sets() {
     // Setup module-scope variables
     if (program->module_constant_data_buffer() != nullptr &&
         program->module_constant_data_buffer_info()->type ==
-            constant_data_buffer_type::storage_buffer) {
+            module_buffer_type::storage_buffer) {
         auto buffer = program->module_constant_data_buffer();
         auto info = program->module_constant_data_buffer_info();
         cvk_debug_fn(
diff --git a/src/kernel.hpp b/src/kernel.hpp
index 02e6d8a8..f56de3b3 100644
--- a/src/kernel.hpp
+++ b/src/kernel.hpp
@@ -19,6 +19,8 @@
 #include <unordered_map>
 #include <vector>
 
+#include "spirv/unified1/NonSemanticClspvReflection.h"
+
 #include "memory.hpp"
 #include "objects.hpp"
 #include "program.hpp"
@@ -143,6 +145,13 @@ struct cvk_kernel : public _cl_kernel, api_object<object_magic::kernel> {
         return m_args.at(arg_index).info.type_qualifier;
     }
 
+    bool uses_printf() const {
+        return m_program->kernel_flags(m_name) &
+               NonSemanticClspvReflectionMayUsePrintf;
+    }
+
+    bool requires_serialized_execution() const { return uses_printf(); }
+
 private:
     friend cvk_kernel_argument_values;
 
diff --git a/src/printf.cpp b/src/printf.cpp
new file mode 100644
index 00000000..6182270d
--- /dev/null
+++ b/src/printf.cpp
@@ -0,0 +1,247 @@
+// Copyright 2022 The clvk authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sstream>
+
+#include "printf.hpp"
+
+// Extract the conversion specifier from a format string
+char get_fmt_conversion(std::string_view fmt) {
+    auto conversionSpecPos = fmt.find_first_of("diouxXfFeEgGaAcsp");
+    return fmt.at(conversionSpecPos);
+}
+
+// Read type T from given pointer
+template <typename T> T read_buff(const char* data) {
+    return *(reinterpret_cast<const T*>(data));
+}
+
+// Read type T from given pointer then increment the pointer
+template <typename T> T read_inc_buff(char*& data) {
+    T out = *(reinterpret_cast<T*>(data));
+    data += sizeof(T);
+    return out;
+}
+
+// Extract the optional vector flag and return a modified format string suitable
+// for calling snprintf on individual vector elements
+std::string get_vector_fmt(std::string fmt, int& vector_size, int& element_size,
+                           std::string& remaining_fmt) {
+    // Consume flags (skipping initial '%')
+    auto pos = fmt.find_first_not_of(" +-#0", 1ul);
+    // Consume precision and field width
+    pos = fmt.find_first_not_of("123456789.", pos);
+
+    if (fmt.at(pos) != 'v') {
+        vector_size = 1;
+        return std::string{fmt};
+    }
+
+    // Trim the data after the conversion specifier and store it in
+    // `remaining_fmt`
+    auto pos_conversion = fmt.find_first_of("diouxXfFeEgGaAcsp");
+    auto fmt_specifier = fmt.substr(0, pos_conversion + 1);
+    remaining_fmt = fmt.substr(pos_conversion + 1);
+    fmt = fmt_specifier;
+
+    size_t vec_length_pos_start = ++pos;
+    size_t vec_length_pos_end =
+        fmt.find_first_not_of("23468", vec_length_pos_start);
+    auto vec_length_str = fmt.substr(vec_length_pos_start,
+                                     vec_length_pos_end - vec_length_pos_start);
+    int vec_length = std::atoi(vec_length_str.c_str());
+
+    auto fmt_pre_vec_len = fmt.substr(0, vec_length_pos_start - 1);
+    auto fmt_post_vec_len = fmt.substr(vec_length_pos_end, fmt.size());
+    fmt = fmt_pre_vec_len + fmt_post_vec_len;
+
+    // The length modifier is required with vectors
+    if (fmt_post_vec_len.find("hh") != std::string::npos) {
+        element_size = 1;
+    } else if (fmt_post_vec_len.find("hl") != std::string::npos) {
+        element_size = 4;
+    } else if (fmt_post_vec_len.find("h") != std::string::npos) {
+        element_size = 2;
+    } else if (fmt_post_vec_len.find("l") != std::string::npos) {
+        element_size = 8;
+    }
+
+    // If 'hl' length modifier is present, strip it as snprintf doesn't
+    // understand it
+    size_t hl = fmt.find("hl");
+    if (hl != std::string::npos) {
+        fmt.erase(hl, 2);
+    }
+
+    vector_size = vec_length;
+    return fmt;
+}
+
+// Print the format part containing exactly one arg using snprintf
+std::string print_part(const std::string& fmt, const char* data, size_t size) {
+    // We don't know the exact size of the output string, but given we have a
+    // single argument, the size of the format string plus 1024 bytes is more
+    // than likely to fit everything. If it doesn't fit, just keep retrying with
+    // double the output size.
+    size_t out_size = fmt.size() + 1024;
+    std::vector<char> out;
+    out.reserve(out_size);
+    out[0] = '\0';
+
+    auto conversion = std::tolower(get_fmt_conversion(fmt));
+    bool finished = false;
+    while (!finished) {
+        int written = 0;
+        switch (conversion) {
+        case 's': {
+            written = snprintf(out.data(), out_size, fmt.c_str(), data);
+            break;
+        }
+        case 'f':
+        case 'e':
+        case 'g':
+        case 'a': {
+            if (size == 4)
+                written = snprintf(out.data(), out_size, fmt.c_str(),
+                                   read_buff<float>(data));
+            else
+                written = snprintf(out.data(), out_size, fmt.c_str(),
+                                   read_buff<double>(data));
+            break;
+        }
+        default: {
+            if (size == 1)
+                written = snprintf(out.data(), out_size, fmt.c_str(),
+                                   read_buff<uint8_t>(data));
+            else if (size == 2)
+                written = snprintf(out.data(), out_size, fmt.c_str(),
+                                   read_buff<uint16_t>(data));
+            else if (size == 4)
+                written = snprintf(out.data(), out_size, fmt.c_str(),
+                                   read_buff<uint32_t>(data));
+            else
+                written = snprintf(out.data(), out_size, fmt.c_str(),
+                                   read_buff<uint64_t>(data));
+            break;
+        }
+        }
+
+        // Finish if the string fit in the output buffer or snprintf failed,
+        // otherwise double the output buffer and try again. If snprintf failed,
+        // set the output to an empty string.
+        if (written < 0) {
+            out[0] = '\0';
+            finished = true;
+        } else if (written < static_cast<long>(out_size)) {
+            finished = true;
+        } else {
+            out_size *= 2;
+            out.reserve(out_size);
+        }
+    }
+
+    return std::string(out.data());
+}
+
+void process_printf(char*& data, const printf_descriptor_map_t& descs) {
+
+    uint32_t printf_id = read_inc_buff<uint32_t>(data);
+    auto& format_string = descs.at(printf_id).format_string;
+
+    std::stringstream printf_out{};
+
+    // Firstly print the part of the format string up to the first '%'
+    size_t next_part = format_string.find_first_of('%');
+    printf_out << format_string.substr(0, next_part);
+
+    // Decompose the remaining format string into individual strings with
+    // one format specifier each, handle each one individually
+    size_t arg_idx = 0;
+    while (next_part < format_string.size() - 1) {
+        // Get the part of the format string before the next format specifier
+        size_t part_start = next_part;
+        size_t part_end = format_string.find_first_of('%', part_start + 1);
+        auto part_fmt = format_string.substr(part_start, part_end - part_start);
+
+        // Handle special cases
+        if (part_end == part_start + 1) {
+            printf_out << "%";
+            next_part = part_end + 1;
+            continue;
+        } else if (part_end == std::string::npos &&
+                   arg_idx >= descs.at(printf_id).arg_sizes.size()) {
+            // If there are no remaining arguments, the rest of the format
+            // should be printed verbatim
+            printf_out << part_fmt;
+            break;
+        }
+
+        // The size of the argument that this format part will consume
+        auto& size = descs.at(printf_id).arg_sizes[arg_idx];
+
+        // Check to see if we have a vector format specifier
+        int vec_len = 0;
+        int el_size = 0;
+        std::string remaining_str;
+        part_fmt = get_vector_fmt(part_fmt, vec_len, el_size, remaining_str);
+
+        // Scalar argument
+        if (vec_len < 2) {
+            // Special case for %s
+            if (get_fmt_conversion(part_fmt) == 's') {
+                uint32_t string_id = read_buff<uint32_t>(data);
+                printf_out << print_part(
+                    part_fmt, descs.at(string_id).format_string.c_str(), size);
+            } else {
+                printf_out << print_part(part_fmt, data, size);
+            }
+            data += size;
+        } else {
+            // Vector argument
+            auto* data_start = data;
+            for (int i = 0; i < vec_len - 1; i++) {
+                printf_out << print_part(part_fmt, data, size / vec_len) << ",";
+                data += el_size;
+            }
+            printf_out << print_part(part_fmt, data, size / vec_len)
+                       << remaining_str;
+            data = data_start + size;
+        }
+
+        // Move to the next format part and prepare to handle the next arg
+        next_part = part_end;
+        arg_idx++;
+    }
+
+    printf("%s", printf_out.str().c_str());
+}
+
+void cvk_printf(cvk_mem* printf_buffer,
+                const printf_descriptor_map_t& descriptors) {
+    if (!printf_buffer->map()) {
+        cvk_error("Could not map printf buffer");
+        return;
+    }
+    char* data = static_cast<char*>(printf_buffer->host_va());
+    auto buffer_size = printf_buffer->size();
+    auto bytes_written = read_inc_buff<uint32_t>(data) * 4;
+    auto* data_start = data;
+
+    while (static_cast<size_t>(data - data_start) < bytes_written &&
+           static_cast<size_t>(data - data_start) < buffer_size) {
+        process_printf(data, descriptors);
+    }
+
+    printf_buffer->unmap();
+}
diff --git a/src/printf.hpp b/src/printf.hpp
new file mode 100644
index 00000000..2bab00c3
--- /dev/null
+++ b/src/printf.hpp
@@ -0,0 +1,31 @@
+// Copyright 2022 The clvk authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "memory.hpp"
+
+#include <vector>
+
+struct printf_descriptor {
+    uint32_t printf_id;
+    std::string format_string;
+    std::vector<uint32_t> arg_sizes;
+};
+
+using printf_descriptor_map_t = std::unordered_map<uint32_t, printf_descriptor>;
+
+// Process the contents of the printf buffer and print the results to stdout
+void cvk_printf(cvk_mem* printf_buffer,
+                const printf_descriptor_map_t& descriptors);
diff --git a/src/program.cpp b/src/program.cpp
index 953a2b51..7303de83 100644
--- a/src/program.cpp
+++ b/src/program.cpp
@@ -149,6 +149,8 @@ spv_result_t parse_reflection(void* user_data,
             return pushconstant::image_metadata;
         case NonSemanticClspvReflectionConstantDataPointerPushConstant:
             return pushconstant::module_constants_pointer;
+        case NonSemanticClspvReflectionPrintfBufferPointerPushConstant:
+            return pushconstant::printf_buffer_pointer;
         default:
             cvk_error_fn("Unhandled reflection instruction for push constant");
             break;
@@ -182,11 +184,12 @@ spv_result_t parse_reflection(void* user_data,
                 const auto& name = parse_data->strings[inst->words[6]];
 
                 const auto& num_args = parse_data->constants[inst->words[7]];
+                const auto& flags = parse_data->constants[inst->words[8]];
                 const auto& attributes = parse_data->strings[inst->words[9]];
 
                 parse_data->strings[inst->result_id] = name;
-                parse_data->binary->add_kernel(name, num_args, attributes);
-
+                parse_data->binary->add_kernel(name, num_args, attributes,
+                                               flags);
                 break;
             }
             case NonSemanticClspvReflectionArgumentInfo: {
@@ -446,15 +449,14 @@ spv_result_t parse_reflection(void* user_data,
                 hex2bin(data.c_str(), binfo.data.data());
                 if (ext_inst ==
                     NonSemanticClspvReflectionConstantDataStorageBuffer) {
-                    binfo.type = constant_data_buffer_type::storage_buffer;
+                    binfo.type = module_buffer_type::storage_buffer;
                     binfo.set = parse_data->constants[inst->words[5]];
                     if (binfo.set >= spir_binary::MAX_DESCRIPTOR_SETS)
                         return SPV_ERROR_INVALID_DATA;
                     binfo.binding = parse_data->constants[inst->words[6]];
 
                 } else {
-                    binfo.type =
-                        constant_data_buffer_type::pointer_push_constant;
+                    binfo.type = module_buffer_type::pointer_push_constant;
                     binfo.pc_offset = parse_data->constants[inst->words[5]];
                     parse_data->binary->add_push_constant(
                         pushconstant::module_constants_pointer,
@@ -463,6 +465,38 @@ spv_result_t parse_reflection(void* user_data,
                 parse_data->binary->set_constant_data_buffer(binfo);
                 break;
             }
+            case NonSemanticClspvReflectionPrintfInfo: {
+                uint32_t printf_id = parse_data->constants[inst->words[5]];
+                std::string printf_string = parse_data->strings[inst->words[6]];
+                std::vector<uint32_t> printf_arg_sizes;
+                for (int i = 6; i < inst->num_operands; i++) {
+                    printf_arg_sizes.push_back(
+                        parse_data
+                            ->constants[inst->words[inst->operands[i].offset]]);
+                }
+                parse_data->binary->add_printf_descriptor(
+                    {printf_id, printf_string, printf_arg_sizes});
+                break;
+            }
+            case NonSemanticClspvReflectionPrintfBufferStorageBuffer: {
+                printf_buffer_desc_info binfo;
+                binfo.type = module_buffer_type::storage_buffer;
+                binfo.set = parse_data->constants[inst->words[5]];
+                binfo.binding = parse_data->constants[inst->words[6]];
+                binfo.size = parse_data->constants[inst->words[7]];
+                parse_data->binary->set_printf_buffer_info(binfo);
+                break;
+            }
+            case NonSemanticClspvReflectionPrintfBufferPointerPushConstant: {
+                printf_buffer_desc_info binfo;
+                binfo.type = module_buffer_type::pointer_push_constant;
+                binfo.pc_offset = parse_data->constants[inst->words[5]];
+                binfo.size = parse_data->constants[inst->words[7]];
+                parse_data->binary->set_printf_buffer_info(binfo);
+                parse_data->binary->add_push_constant(
+                    pushconstant::printf_buffer_pointer, {binfo.pc_offset, 8u});
+                break;
+            }
             default:
                 return SPV_ERROR_INVALID_DATA;
             }
@@ -912,6 +946,11 @@ std::string cvk_program::prepare_build_options(const cvk_device* device) const {
         }
     }
 
+    options += " -enable-printf ";
+    options +=
+        " -printf-buffer-size=" + std::to_string(config.printf_buffer_size) +
+        " ";
+
 #if COMPILER_AVAILABLE
     options += " " + config.clspv_options() + " ";
 #endif
@@ -1768,7 +1807,7 @@ bool cvk_entry_point::
         auto info = m_program->module_constant_data_buffer_info();
         // If the program scope buffer isn't passed as a storage buffer (i.e.
         // it is passed a pointer push constant), there is nothing to bind here
-        if (info->type != constant_data_buffer_type::storage_buffer) {
+        if (info->type != module_buffer_type::storage_buffer) {
             return true;
         }
         VkDescriptorSetLayoutBinding binding = {
@@ -1789,6 +1828,34 @@ bool cvk_entry_point::
     return true;
 }
 
+bool cvk_entry_point::build_descriptor_sets_layout_bindings_for_printf_buffer(
+    binding_stat_map& smap) {
+    std::vector<VkDescriptorSetLayoutBinding> layoutBindings;
+    if (m_program->printf_buffer_info().size > 0 &&
+        (m_program->kernel_flags(m_name) &
+         NonSemanticClspvReflectionMayUsePrintf)) {
+        auto info = m_program->printf_buffer_info();
+        if (info.type != module_buffer_type::storage_buffer) {
+            return true;
+        }
+        VkDescriptorSetLayoutBinding binding = {
+            info.binding,                      // binding
+            VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptorType
+            1,                                 // decriptorCount
+            VK_SHADER_STAGE_COMPUTE_BIT,       // stageFlags
+            nullptr                            // pImmutableSamplers
+        };
+        layoutBindings.push_back(binding);
+        smap[binding.descriptorType]++;
+    }
+
+    if (!build_descriptor_set_layout(layoutBindings)) {
+        return false;
+    }
+
+    return true;
+}
+
 cl_int cvk_entry_point::init() {
     VkResult res;
 
@@ -1825,6 +1892,10 @@ cl_int cvk_entry_point::init() {
             bindingTypes)) {
         return CL_INVALID_VALUE;
     }
+    if (!build_descriptor_sets_layout_bindings_for_printf_buffer(
+            bindingTypes)) {
+        return CL_INVALID_VALUE;
+    }
 
     // Do we have POD arguments?
     for (auto& arg : m_args) {
diff --git a/src/program.hpp b/src/program.hpp
index 421b172c..2142040e 100644
--- a/src/program.hpp
+++ b/src/program.hpp
@@ -32,6 +32,7 @@
 #include "init.hpp"
 #include "memory.hpp"
 #include "objects.hpp"
+#include "printf.hpp"
 
 const int SPIR_WORD_SIZE = 4;
 
@@ -124,6 +125,7 @@ enum class pushconstant
     region_group_offset,
     image_metadata,
     module_constants_pointer,
+    printf_buffer_pointer,
 };
 
 struct pushconstant_desc {
@@ -143,7 +145,7 @@ enum class spec_constant
     subgroup_max_size,
 };
 
-enum class constant_data_buffer_type
+enum class module_buffer_type
 {
     storage_buffer,
     pointer_push_constant,
@@ -180,13 +182,21 @@ struct user_spec_constant_data {
 };
 
 struct constant_data_buffer_info {
-    constant_data_buffer_type type;
+    module_buffer_type type;
     uint32_t set;
     uint32_t binding;
     uint32_t pc_offset;
     std::vector<char> data;
 };
 
+struct printf_buffer_desc_info {
+    module_buffer_type type;
+    uint32_t set;
+    uint32_t binding;
+    uint32_t pc_offset;
+    uint32_t size = 0;
+};
+
 struct spirv_validation_options {
     bool uniform_buffer_std_layout = false;
 };
@@ -212,6 +222,7 @@ class spir_binary {
         std::unordered_map<std::string, std::vector<kernel_argument>>;
     using kernels_reqd_work_group_size_map =
         std::unordered_map<std::string, std::array<uint32_t, 3>>;
+    using kernels_flags_map = std::unordered_map<std::string, uint32_t>;
 
 public:
     spir_binary(spv_target_env env)
@@ -268,8 +279,13 @@ class spir_binary {
         }
     }
 
+    const printf_descriptor_map_t& printf_descriptors() const {
+        return m_printf_descriptors;
+    }
+
     void add_kernel(const std::string& name, uint32_t num_args,
-                    const std::string& attributes) {
+                    const std::string& attributes, uint32_t flags) {
+        m_flags[name] = flags;
         auto& args = m_dmaps[name];
         kernel_argument unused = {
             {}, 0, 0, 0, 0, 0, kernel_argument_kind::unused, 0, 0};
@@ -332,6 +348,24 @@ class spir_binary {
         m_image_metadata[name][ordinal].set_data_type(offset);
     }
 
+    void add_printf_descriptor(printf_descriptor&& desc) {
+        m_printf_descriptors[desc.printf_id] = desc;
+    }
+
+    void set_printf_buffer_info(const printf_buffer_desc_info& info) {
+        m_printf_buffer_info = info;
+    }
+
+    const printf_buffer_desc_info& printf_buffer_info() const {
+        return m_printf_buffer_info;
+    }
+
+    const printf_descriptor_map_t& get_printf_descriptors() const {
+        return m_printf_descriptors;
+    }
+
+    const kernels_flags_map& kernels_flags() const { return m_flags; }
+
 private:
     spv_context m_context;
     std::vector<uint32_t> m_code;
@@ -339,10 +373,13 @@ class spir_binary {
     std::unordered_map<pushconstant, pushconstant_desc> m_push_constants;
     std::unordered_map<spec_constant, uint32_t> m_spec_constants;
     image_metadata_map m_image_metadata;
+    std::unordered_map<uint32_t, printf_descriptor> m_printf_descriptors;
+    printf_buffer_desc_info m_printf_buffer_info;
     std::unique_ptr<constant_data_buffer_info> m_constant_data_buffer;
     kernels_arguments_map m_dmaps;
     kernels_reqd_work_group_size_map m_reqd_work_group_sizes;
     std::unordered_map<std::string, std::string> m_kernels_attributes;
+    kernels_flags_map m_flags;
     bool m_loaded_from_binary;
     spv_target_env m_target_env;
 };
@@ -453,6 +490,8 @@ class cvk_entry_point {
         binding_stat_map& smap);
     bool build_descriptor_sets_layout_bindings_for_program_scope_buffers(
         binding_stat_map& smap);
+    bool build_descriptor_sets_layout_bindings_for_printf_buffer(
+        binding_stat_map& smap);
 
     // Structures for caching pipelines based on specialization constants
     struct SpecConstantMapHash {
@@ -604,6 +643,11 @@ struct cvk_program : public _cl_program, api_object<object_magic::program> {
 
     unsigned num_kernels() const { return m_binary.num_kernels(); }
     bool loaded_from_binary() const { return m_binary.loaded_from_binary(); }
+    bool uses_printf() { return !m_binary.printf_descriptors().empty(); }
+    const std::unordered_map<uint32_t, printf_descriptor>&
+    printf_descriptors() {
+        return m_binary.get_printf_descriptors();
+    }
 
     const std::vector<kernel_argument>* args_for_kernel(std::string& name) {
         auto const& args = m_binary.kernels_arguments().find(name);
@@ -701,6 +745,10 @@ struct cvk_program : public _cl_program, api_object<object_magic::program> {
         return m_binary.constant_data_buffer();
     }
 
+    const printf_buffer_desc_info& printf_buffer_info() const {
+        return m_binary.printf_buffer_info();
+    }
+
     bool options_allow_split_region(std::string options) {
         if (options.find("-uniform-workgroup-size") != std::string::npos)
             return false;
@@ -721,6 +769,10 @@ struct cvk_program : public _cl_program, api_object<object_magic::program> {
         return m_binary.kernels_attributes().at(kernel_name);
     }
 
+    uint32_t kernel_flags(const std::string& kernel) const {
+        return m_binary.kernels_flags().at(kernel);
+    }
+
 private:
     void do_build();
     std::string prepare_build_options(const cvk_device* device) const;
@@ -770,6 +822,7 @@ struct cvk_program : public _cl_program, api_object<object_magic::program> {
     VkPipelineCache m_pipeline_cache;
     std::unique_ptr<cvk_buffer> m_module_constant_data_buffer;
     std::unordered_map<uint32_t, user_spec_constant_data> m_user_spec_constants;
+    std::unique_ptr<cvk_buffer> m_printf_buffer;
 };
 
 static inline cvk_program* icd_downcast(cl_program program) {
diff --git a/src/queue.cpp b/src/queue.cpp
index c2d4b9e2..98a0adb6 100644
--- a/src/queue.cpp
+++ b/src/queue.cpp
@@ -593,6 +593,25 @@ void cvk_command_kernel::update_global_push_constants(
                            &dev_addr);
     }
 
+    if (auto pc = program->push_constant(pushconstant::printf_buffer_pointer)) {
+        CVK_ASSERT(pc->size == 8);
+        CVK_ASSERT(program->uses_printf());
+
+        auto buffer = m_queue->printf_buffer();
+        VkBufferDeviceAddressInfo info{};
+        info.buffer = buffer->vulkan_buffer();
+        info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO;
+        info.pNext = NULL;
+
+        auto dev_addr = vkGetBufferDeviceAddress(
+            m_kernel->context()->device()->vulkan_device(), &info);
+        dev_addr += buffer->vulkan_buffer_offset();
+
+        vkCmdPushConstants(command_buffer, m_kernel->pipeline_layout(),
+                           VK_SHADER_STAGE_COMPUTE_BIT, pc->offset, pc->size,
+                           &dev_addr);
+    }
+
     uint32_t image_metadata_pc_start = UINT32_MAX;
     uint32_t image_metadata_pc_end = 0;
     if (const auto* md = m_kernel->get_image_metadata()) {
@@ -757,6 +776,32 @@ cl_int cvk_command_kernel::dispatch_uniform_region_within_vklimits(
     vkCmdDispatch(command_buffer, num_workgroups[0], num_workgroups[1],
                   num_workgroups[2]);
 
+    // If we have a kernel that requires serial execution (i.e. regions are not
+    // executed in parallel with other regions or other kernels) then serialize
+    // the command buffer
+    if (m_kernel->requires_serialized_execution()) {
+        VkMemoryBarrier memoryBarrier = {VK_STRUCTURE_TYPE_MEMORY_BARRIER,
+                                         nullptr, VK_ACCESS_SHADER_WRITE_BIT,
+                                         VK_ACCESS_MEMORY_READ_BIT |
+                                             VK_ACCESS_MEMORY_WRITE_BIT};
+
+        // Workaround for a bug on some NVIDIA devices.
+        // This should already be covered by VK_ACCESS_MEMORY_READ_BIT.
+        memoryBarrier.dstAccessMask |= VK_ACCESS_SHADER_READ_BIT;
+
+        vkCmdPipelineBarrier(
+            command_buffer,
+            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, // srcStageMask
+            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, // dstStageMask
+            0,                                    // dependencyFlags
+            1,                                    // memoryBarrierCount
+            &memoryBarrier,
+            0,        // bufferMemoryBarrierCount
+            nullptr,  // pBufferMemoryBarriers
+            0,        // imageMemoryBarrierCount
+            nullptr); // pImageMemoryBarriers
+    }
+
     return CL_SUCCESS;
 }
 
@@ -915,6 +960,41 @@ cvk_command_kernel::build_batchable_inner(cvk_command_buffer& command_buffer) {
         return CL_OUT_OF_RESOURCES;
     }
 
+    // Setup printf buffer descriptor if needed
+    if (m_kernel->program()->uses_printf()) {
+        // Create and initialize the printf buffer
+        auto buffer = m_queue->printf_buffer();
+        if (buffer->map()) {
+            memset(buffer->host_va(), 0, 4);
+            buffer->unmap();
+        }
+
+        if (m_kernel->program()->printf_buffer_info().type ==
+            module_buffer_type::storage_buffer) {
+
+            VkDescriptorBufferInfo bufferInfo = {buffer->vulkan_buffer(),
+                                                 0, // offset
+                                                 VK_WHOLE_SIZE};
+
+            auto* ds = m_argument_values->descriptor_sets();
+            VkWriteDescriptorSet writeDescriptorSet = {
+                VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+                nullptr,
+                ds[m_kernel->program()->printf_buffer_info().set],
+                m_kernel->program()->printf_buffer_info().binding,
+                0,                                 // dstArrayElement
+                1,                                 // descriptorCount
+                VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, // descriptorType
+                nullptr,                           // pImageInfo
+                &bufferInfo,
+                nullptr, // pTexelBufferView
+            };
+
+            vkUpdateDescriptorSets(m_queue->device()->vulkan_device(), 1u,
+                                   &writeDescriptorSet, 0, nullptr);
+        }
+    }
+
     // Bind descriptors and update push constants
     if (m_kernel->num_set_layouts() > 0) {
         vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
@@ -958,6 +1038,15 @@ cvk_command_kernel::build_batchable_inner(cvk_command_buffer& command_buffer) {
     return CL_SUCCESS;
 }
 
+cl_int cvk_command_kernel::do_post_action() {
+    if (m_kernel->uses_printf()) {
+        cvk_printf(m_queue->printf_buffer(),
+                   m_kernel->program()->printf_descriptors());
+    }
+
+    return CL_SUCCESS;
+}
+
 bool cvk_command_batchable::can_be_batched() const {
     bool unresolved_user_event_dependencies = false;
     bool unresolved_other_queue_dependencies = false;
@@ -1078,7 +1167,7 @@ cl_int cvk_command_batchable::do_action() {
         return CL_OUT_OF_RESOURCES;
     }
 
-    return CL_COMPLETE;
+    return do_post_action();
 }
 
 cl_int cvk_command_batch::do_action() {
diff --git a/src/queue.hpp b/src/queue.hpp
index d28c868d..bd24f420 100644
--- a/src/queue.hpp
+++ b/src/queue.hpp
@@ -22,6 +22,7 @@
 #include "init.hpp"
 #include "kernel.hpp"
 #include "objects.hpp"
+#include "printf.hpp"
 #include "tracing.hpp"
 
 struct cvk_command;
@@ -171,6 +172,16 @@ struct cvk_command_queue : public _cl_command_queue,
         return m_command_pool.free_command_buffer(cmdbuf);
     }
 
+    cvk_buffer* printf_buffer() {
+        if (!m_printf_buffer) {
+            cl_int status;
+            m_printf_buffer = cvk_buffer::create(
+                context(), 0, config.printf_buffer_size, nullptr, &status);
+            CVK_ASSERT(status == CL_SUCCESS);
+        }
+        return m_printf_buffer.get();
+    }
+
     void command_pool_lock() { m_command_pool.lock(); }
 
     void command_pool_unlock() { m_command_pool.unlock(); }
@@ -238,6 +249,8 @@ struct cvk_command_queue : public _cl_command_queue,
 
     TRACE_CNT_VAR(batch_in_flight_counter);
     TRACE_CNT_VAR(group_in_flight_counter);
+
+    std::unique_ptr<cvk_buffer> m_printf_buffer;
 };
 
 static inline cvk_command_queue* icd_downcast(cl_command_queue queue) {
@@ -627,7 +640,7 @@ struct cvk_command_batchable : public cvk_command {
         }
     }
 
-    bool can_be_batched() const override final;
+    bool can_be_batched() const override;
     bool is_built_before_enqueue() const override final { return false; }
 
     CHECK_RETURN cl_int get_timestamp_query_results(cl_ulong* start,
@@ -638,6 +651,7 @@ struct cvk_command_batchable : public cvk_command {
     CHECK_RETURN virtual cl_int
     build_batchable_inner(cvk_command_buffer& cmdbuf) = 0;
     CHECK_RETURN cl_int do_action() override;
+    CHECK_RETURN virtual cl_int do_post_action() { return CL_SUCCESS; }
 
     CHECK_RETURN cl_int set_profiling_info_end(cl_ulong sync_dev,
                                                cl_ulong sync_host) {
@@ -733,6 +747,13 @@ struct cvk_command_kernel final : public cvk_command_batchable {
     CHECK_RETURN cl_int
     build_batchable_inner(cvk_command_buffer& cmdbuf) override final;
 
+    CHECK_RETURN cl_int do_post_action() override final;
+
+    bool can_be_batched() const override final {
+        return !m_kernel->uses_printf() &&
+               cvk_command_batchable::can_be_batched();
+    }
+
     const std::vector<cvk_mem*> memory_objects() const override {
         std::vector<cvk_mem*> ret;
         std::shared_ptr<cvk_kernel_argument_values> argvals = m_argument_values;

From 0860a0d7f57c80f9ffe93875d9112bc35a9f7b70 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin@chromium.org>
Date: Thu, 25 May 2023 15:44:00 +0200
Subject: [PATCH 02/13] fix to work with stdc++ in google3

---
 src/printf.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/printf.cpp b/src/printf.cpp
index 6182270d..8d386c47 100644
--- a/src/printf.cpp
+++ b/src/printf.cpp
@@ -95,8 +95,7 @@ std::string print_part(const std::string& fmt, const char* data, size_t size) {
     // than likely to fit everything. If it doesn't fit, just keep retrying with
     // double the output size.
     size_t out_size = fmt.size() + 1024;
-    std::vector<char> out;
-    out.reserve(out_size);
+    std::vector<char> out(out_size);
     out[0] = '\0';
 
     auto conversion = std::tolower(get_fmt_conversion(fmt));

From 8f131b2e7e55ec9f5c33141710194c67e71a2e85 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin@chromium.org>
Date: Sat, 24 Jun 2023 09:44:11 +0200
Subject: [PATCH 03/13] fix support for cl_half

---
 src/printf.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/printf.cpp b/src/printf.cpp
index 8d386c47..c9ea14a0 100644
--- a/src/printf.cpp
+++ b/src/printf.cpp
@@ -111,7 +111,10 @@ std::string print_part(const std::string& fmt, const char* data, size_t size) {
         case 'e':
         case 'g':
         case 'a': {
-            if (size == 4)
+            if (size == 2)
+                written = snprintf(out.data(), out_size, fmt.c_str(),
+                                   cl_half_to_float(read_buff<cl_half>(data)));
+            else if (size == 4)
                 written = snprintf(out.data(), out_size, fmt.c_str(),
                                    read_buff<float>(data));
             else

From 14af35a53c1e5021da34882b9104de80781f36bc Mon Sep 17 00:00:00 2001
From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com>
Date: Mon, 10 Jul 2023 08:06:25 +0200
Subject: [PATCH 04/13] Update src/config.def
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Kévin Petit <kpet@free.fr>
---
 src/config.def | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/config.def b/src/config.def
index a709c0be..2e503f21 100644
--- a/src/config.def
+++ b/src/config.def
@@ -25,7 +25,7 @@ OPTION(bool, physical_addressing, false)
 OPTION(std::string, clspv_native_builtins, "")
 OPTION(std::string, clspv_library_builtins, "")
 
-OPTION(uint32_t, printf_buffer_size, 1048576u)
+OPTION(uint32_t, printf_buffer_size, 1024*1024u)
 
 #if COMPILER_AVAILABLE
 OPTION(std::string, clspv_options, "")

From a38cf190b0c433ace0c81c5a57d188963bb82b57 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com>
Date: Mon, 10 Jul 2023 08:06:31 +0200
Subject: [PATCH 05/13] Update src/printf.cpp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Kévin Petit <kpet@free.fr>
---
 src/printf.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/printf.cpp b/src/printf.cpp
index c9ea14a0..76cee1ae 100644
--- a/src/printf.cpp
+++ b/src/printf.cpp
@@ -149,7 +149,7 @@ std::string print_part(const std::string& fmt, const char* data, size_t size) {
             finished = true;
         } else {
             out_size *= 2;
-            out.reserve(out_size);
+            out.resize(out_size);
         }
     }
 

From 1e4e8d5f2a93246213c1c744478394a6440b140c Mon Sep 17 00:00:00 2001
From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com>
Date: Mon, 10 Jul 2023 09:08:57 +0200
Subject: [PATCH 06/13] Update src/program.hpp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Kévin Petit <kpet@free.fr>
---
 src/program.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/program.hpp b/src/program.hpp
index 2142040e..891bac8c 100644
--- a/src/program.hpp
+++ b/src/program.hpp
@@ -822,7 +822,6 @@ struct cvk_program : public _cl_program, api_object<object_magic::program> {
     VkPipelineCache m_pipeline_cache;
     std::unique_ptr<cvk_buffer> m_module_constant_data_buffer;
     std::unordered_map<uint32_t, user_spec_constant_data> m_user_spec_constants;
-    std::unique_ptr<cvk_buffer> m_printf_buffer;
 };
 
 static inline cvk_program* icd_downcast(cl_program program) {

From efae40e6c36e51ea63068f32290fc6577ad43091 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com>
Date: Mon, 10 Jul 2023 09:09:41 +0200
Subject: [PATCH 07/13] Update src/queue.cpp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Kévin Petit <kpet@free.fr>
---
 src/queue.cpp | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/queue.cpp b/src/queue.cpp
index 98a0adb6..7bcbe626 100644
--- a/src/queue.cpp
+++ b/src/queue.cpp
@@ -598,14 +598,7 @@ void cvk_command_kernel::update_global_push_constants(
         CVK_ASSERT(program->uses_printf());
 
         auto buffer = m_queue->printf_buffer();
-        VkBufferDeviceAddressInfo info{};
-        info.buffer = buffer->vulkan_buffer();
-        info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO;
-        info.pNext = NULL;
-
-        auto dev_addr = vkGetBufferDeviceAddress(
-            m_kernel->context()->device()->vulkan_device(), &info);
-        dev_addr += buffer->vulkan_buffer_offset();
+        auto dev_addr = buffer->device_address();
 
         vkCmdPushConstants(command_buffer, m_kernel->pipeline_layout(),
                            VK_SHADER_STAGE_COMPUTE_BIT, pc->offset, pc->size,

From 30d2ddbbcbb3e0753b6164863cf771d40246b76c Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin@chromium.org>
Date: Mon, 10 Jul 2023 13:56:22 +0200
Subject: [PATCH 08/13] fixes following kpet feedbacks

---
 src/kernel.hpp  |  5 +----
 src/printf.cpp  | 11 ++++++++---
 src/printf.hpp  |  4 ++--
 src/program.cpp |  8 ++++++--
 src/program.hpp |  2 ++
 src/queue.cpp   | 32 ++++++++++++++++++++++----------
 src/queue.hpp   | 21 +++++++++++++++++++--
 7 files changed, 60 insertions(+), 23 deletions(-)

diff --git a/src/kernel.hpp b/src/kernel.hpp
index f56de3b3..9591910c 100644
--- a/src/kernel.hpp
+++ b/src/kernel.hpp
@@ -145,10 +145,7 @@ struct cvk_kernel : public _cl_kernel, api_object<object_magic::kernel> {
         return m_args.at(arg_index).info.type_qualifier;
     }
 
-    bool uses_printf() const {
-        return m_program->kernel_flags(m_name) &
-               NonSemanticClspvReflectionMayUsePrintf;
-    }
+    bool uses_printf() const { return m_entry_point->uses_printf(); }
 
     bool requires_serialized_execution() const { return uses_printf(); }
 
diff --git a/src/printf.cpp b/src/printf.cpp
index 76cee1ae..2a7bf799 100644
--- a/src/printf.cpp
+++ b/src/printf.cpp
@@ -229,21 +229,26 @@ void process_printf(char*& data, const printf_descriptor_map_t& descs) {
     printf("%s", printf_out.str().c_str());
 }
 
-void cvk_printf(cvk_mem* printf_buffer,
+cl_int cvk_printf(cvk_mem* printf_buffer,
                 const printf_descriptor_map_t& descriptors) {
+    CVK_ASSERT(printf_buffer);
     if (!printf_buffer->map()) {
         cvk_error("Could not map printf buffer");
-        return;
+        return CL_OUT_OF_RESOURCES;
     }
     char* data = static_cast<char*>(printf_buffer->host_va());
     auto buffer_size = printf_buffer->size();
+    const auto bytes_written_size = sizeof(uint32_t);
+    const auto data_size = buffer_size - bytes_written_size;
     auto bytes_written = read_inc_buff<uint32_t>(data) * 4;
     auto* data_start = data;
 
     while (static_cast<size_t>(data - data_start) < bytes_written &&
-           static_cast<size_t>(data - data_start) < buffer_size) {
+           static_cast<size_t>(data - data_start) < data_size) {
         process_printf(data, descriptors);
     }
 
     printf_buffer->unmap();
+
+    return CL_SUCCESS;
 }
diff --git a/src/printf.hpp b/src/printf.hpp
index 2bab00c3..9941ab18 100644
--- a/src/printf.hpp
+++ b/src/printf.hpp
@@ -27,5 +27,5 @@ struct printf_descriptor {
 using printf_descriptor_map_t = std::unordered_map<uint32_t, printf_descriptor>;
 
 // Process the contents of the printf buffer and print the results to stdout
-void cvk_printf(cvk_mem* printf_buffer,
-                const printf_descriptor_map_t& descriptors);
+cl_int cvk_printf(cvk_mem* printf_buffer,
+                  const printf_descriptor_map_t& descriptors);
diff --git a/src/program.cpp b/src/program.cpp
index 7303de83..5a30d7e9 100644
--- a/src/program.cpp
+++ b/src/program.cpp
@@ -1832,8 +1832,7 @@ bool cvk_entry_point::build_descriptor_sets_layout_bindings_for_printf_buffer(
     binding_stat_map& smap) {
     std::vector<VkDescriptorSetLayoutBinding> layoutBindings;
     if (m_program->printf_buffer_info().size > 0 &&
-        (m_program->kernel_flags(m_name) &
-         NonSemanticClspvReflectionMayUsePrintf)) {
+        (uses_printf())) {
         auto info = m_program->printf_buffer_info();
         if (info.type != module_buffer_type::storage_buffer) {
             return true;
@@ -2154,3 +2153,8 @@ std::unique_ptr<cvk_buffer> cvk_entry_point::allocate_pod_buffer() {
 
     return buffer;
 }
+
+bool cvk_entry_point::uses_printf() const {
+    return m_program->kernel_flags(m_name) &
+           NonSemanticClspvReflectionMayUsePrintf;
+}
diff --git a/src/program.hpp b/src/program.hpp
index 891bac8c..53277c61 100644
--- a/src/program.hpp
+++ b/src/program.hpp
@@ -460,6 +460,8 @@ class cvk_entry_point {
 
     cvk_program* program() const { return m_program; }
 
+    bool uses_printf() const;
+
 private:
     const uint32_t MAX_INSTANCES = 2 * 1024; // FIXME find a better definition
 
diff --git a/src/queue.cpp b/src/queue.cpp
index 7bcbe626..2f505b2c 100644
--- a/src/queue.cpp
+++ b/src/queue.cpp
@@ -540,7 +540,7 @@ bool cvk_command_buffer::submit_and_wait() {
     return true;
 }
 
-void cvk_command_kernel::update_global_push_constants(
+cl_int cvk_command_kernel::update_global_push_constants(
     cvk_command_buffer& command_buffer) {
     auto program = m_kernel->program();
 
@@ -597,7 +597,11 @@ void cvk_command_kernel::update_global_push_constants(
         CVK_ASSERT(pc->size == 8);
         CVK_ASSERT(program->uses_printf());
 
-        auto buffer = m_queue->printf_buffer();
+        auto buffer = m_queue->get_printf_buffer();
+        if (buffer == nullptr) {
+            cvk_error_fn("printf buffer was not created");
+            return CL_OUT_OF_RESOURCES;
+        }
         auto dev_addr = buffer->device_address();
 
         vkCmdPushConstants(command_buffer, m_kernel->pipeline_layout(),
@@ -653,6 +657,7 @@ void cvk_command_kernel::update_global_push_constants(
             }
         }
     }
+    return CL_SUCCESS;
 }
 
 cl_int cvk_command_kernel::dispatch_uniform_region_within_vklimits(
@@ -956,10 +961,10 @@ cvk_command_kernel::build_batchable_inner(cvk_command_buffer& command_buffer) {
     // Setup printf buffer descriptor if needed
     if (m_kernel->program()->uses_printf()) {
         // Create and initialize the printf buffer
-        auto buffer = m_queue->printf_buffer();
-        if (buffer->map()) {
-            memset(buffer->host_va(), 0, 4);
-            buffer->unmap();
+        auto buffer = m_queue->get_or_create_printf_buffer();
+        auto err = m_queue->reset_printf_buffer();
+        if (err != CL_SUCCESS) {
+            return err;
         }
 
         if (m_kernel->program()->printf_buffer_info().type ==
@@ -996,10 +1001,13 @@ cvk_command_kernel::build_batchable_inner(cvk_command_buffer& command_buffer) {
                                 m_argument_values->descriptor_sets(), 0, 0);
     }
 
-    update_global_push_constants(command_buffer);
+    auto err = update_global_push_constants(command_buffer);
+    if (err != CL_SUCCESS) {
+        return err;
+    }
 
     // Dispatch work
-    auto err = build_and_dispatch_regions(command_buffer);
+    err = build_and_dispatch_regions(command_buffer);
     if (err != CL_SUCCESS) {
         return err;
     }
@@ -1033,8 +1041,12 @@ cvk_command_kernel::build_batchable_inner(cvk_command_buffer& command_buffer) {
 
 cl_int cvk_command_kernel::do_post_action() {
     if (m_kernel->uses_printf()) {
-        cvk_printf(m_queue->printf_buffer(),
-                   m_kernel->program()->printf_descriptors());
+        auto buffer = m_queue->get_printf_buffer();
+        if (buffer == nullptr) {
+            cvk_error_fn("printf buffer was not created");
+            return CL_OUT_OF_RESOURCES;
+        }
+        return cvk_printf(buffer, m_kernel->program()->printf_descriptors());
     }
 
     return CL_SUCCESS;
diff --git a/src/queue.hpp b/src/queue.hpp
index bd24f420..f4bbfc7d 100644
--- a/src/queue.hpp
+++ b/src/queue.hpp
@@ -172,7 +172,7 @@ struct cvk_command_queue : public _cl_command_queue,
         return m_command_pool.free_command_buffer(cmdbuf);
     }
 
-    cvk_buffer* printf_buffer() {
+    cvk_buffer* get_or_create_printf_buffer() {
         if (!m_printf_buffer) {
             cl_int status;
             m_printf_buffer = cvk_buffer::create(
@@ -182,6 +182,23 @@ struct cvk_command_queue : public _cl_command_queue,
         return m_printf_buffer.get();
     }
 
+    cvk_buffer* get_printf_buffer() {
+        if (!m_printf_buffer) {
+            return nullptr;
+        }
+        return m_printf_buffer.get();
+    }
+
+    cl_int reset_printf_buffer() {
+        if (m_printf_buffer && m_printf_buffer->map()) {
+            memset(m_printf_buffer->host_va(), 0, 4);
+            m_printf_buffer->unmap();
+            return CL_SUCCESS;
+        }
+        cvk_error_fn("Could not reset printf buffer");
+        return CL_OUT_OF_RESOURCES;
+    }
+
     void command_pool_lock() { m_command_pool.lock(); }
 
     void command_pool_unlock() { m_command_pool.unlock(); }
@@ -766,7 +783,7 @@ struct cvk_command_kernel final : public cvk_command_batchable {
 private:
     CHECK_RETURN cl_int
     build_and_dispatch_regions(cvk_command_buffer& command_buffer);
-    void update_global_push_constants(cvk_command_buffer& command_buffer);
+    CHECK_RETURN cl_int update_global_push_constants(cvk_command_buffer& command_buffer);
     CHECK_RETURN cl_int dispatch_uniform_region_within_vklimits(
         const cvk_ndrange& region, cvk_command_buffer& command_buffer);
     CHECK_RETURN cl_int dispatch_uniform_region_iterate(

From d33db68352e1e192fef7c0593f94a8f7c7b22b79 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin@chromium.org>
Date: Mon, 10 Jul 2023 13:56:52 +0200
Subject: [PATCH 09/13] add tests

---
 src/exports.map          |   2 +
 src/program.cpp          |   6 +-
 src/unit.cpp             |   7 ++
 src/unit.hpp             |   4 ++
 src/utils.cpp            |  13 ++--
 src/utils.hpp            |   4 +-
 tests/api/CMakeLists.txt |   1 +
 tests/api/printf.cpp     | 141 +++++++++++++++++++++++++++++++++++++++
 8 files changed, 170 insertions(+), 8 deletions(-)
 create mode 100644 tests/api/printf.cpp

diff --git a/src/exports.map b/src/exports.map
index c2aac27f..9bb5cdfd 100644
--- a/src/exports.map
+++ b/src/exports.map
@@ -2,6 +2,8 @@ CLVK_UNIT_TESTING_FCT {
 global:
     clvk_override_device_max_compute_work_group_count;
     clvk_restore_device_properties;
+    clvk_override_printf_buffer_size;
+    cvk_mkdtemp;
 local:
     *;
 };
diff --git a/src/program.cpp b/src/program.cpp
index 5a30d7e9..f025b6bf 100644
--- a/src/program.cpp
+++ b/src/program.cpp
@@ -966,7 +966,8 @@ cl_int cvk_program::parse_user_spec_constants() {
     std::filesystem::path tmp_prefix(config.compiler_temp_dir());
     std::filesystem::path tmp_suffix("clvk-XXXXXX");
     std::string tmp_template = (tmp_prefix / tmp_suffix).string();
-    const char* tmp = cvk_mkdtemp(tmp_template);
+    const char* tmp =
+        cvk_mkdtemp((char*)tmp_template.c_str, tmp_template.size());
     if (tmp == nullptr) {
         cvk_error_fn("Could not create temporary folder \"%s\"",
                      tmp_template.c_str());
@@ -1361,7 +1362,8 @@ cl_build_status cvk_program::do_build_inner(const cvk_device* device) {
         std::filesystem::path tmp_prefix(config.compiler_temp_dir());
         std::filesystem::path tmp_suffix("clvk-XXXXXX");
         std::string tmp_template = (tmp_prefix / tmp_suffix).string();
-        const char* tmp = cvk_mkdtemp(tmp_template);
+        const char* tmp =
+            cvk_mkdtemp((char*)tmp_template.c_str(), tmp_template.size());
         if (tmp == nullptr) {
             cvk_error_fn("Could not create temporary folder \"%s\"",
                          tmp_template.c_str());
diff --git a/src/unit.cpp b/src/unit.cpp
index 4c275b39..d5a6fb31 100644
--- a/src/unit.cpp
+++ b/src/unit.cpp
@@ -37,6 +37,13 @@ void CL_API_CALL clvk_restore_device_properties(cl_device_id device) {
 
     icd_downcast(device)->restore_device_properties();
 }
+
+void CL_API_CALL clvk_override_printf_buffer_size(uint32_t size) {
+    auto printf_buffer_size =
+        (config_value<uint32_t>*)&config.printf_buffer_size;
+    printf_buffer_size->value = size;
+    printf_buffer_size->set = true;
+}
 }
 
 #endif
diff --git a/src/unit.hpp b/src/unit.hpp
index f6ff6259..4ee381ea 100644
--- a/src/unit.hpp
+++ b/src/unit.hpp
@@ -24,6 +24,10 @@ void CL_API_CALL clvk_override_device_max_compute_work_group_count(
     cl_device_id device, uint32_t x, uint32_t y, uint32_t z);
 
 void CL_API_CALL clvk_restore_device_properties(cl_device_id device);
+
+void CL_API_CALL clvk_override_printf_buffer_size(uint32_t size);
+
+char* CL_API_CALL clvk_mkdtemp(char* tmpl, size_t size);
 }
 
 #endif
diff --git a/src/utils.cpp b/src/utils.cpp
index bc99b472..5afc2978 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -26,21 +26,24 @@
 #include <io.h>
 #endif
 
-char* cvk_mkdtemp(std::string& tmpl) {
+extern "C" {
+char* CL_API_CALL cvk_mkdtemp(char* tmpl, size_t size) {
 #ifdef WIN32
-    if (_mktemp_s(&tmpl.front(), tmpl.size() + 1) != 0) {
+    if (_mktemp_s(tmpl, size + 1) != 0) {
         return nullptr;
     }
 
-    if (!CreateDirectory(tmpl.c_str(), nullptr)) {
+    if (!CreateDirectory(tmpl, nullptr)) {
         return nullptr;
     }
 
-    return &tmpl.front();
+    return tmpl;
 #else
-    return mkdtemp(&tmpl.front());
+    UNUSED(size);
+    return mkdtemp(tmpl);
 #endif
 }
+}
 
 int cvk_exec(const std::string& cmd, std::string* output) {
 #ifdef WIN32
diff --git a/src/utils.hpp b/src/utils.hpp
index 52d73b6f..750d88fe 100644
--- a/src/utils.hpp
+++ b/src/utils.hpp
@@ -26,7 +26,9 @@
 #define CHECK_RETURN
 #endif
 
-char* cvk_mkdtemp(std::string& tmpl);
+extern "C" {
+char* CL_API_CALL cvk_mkdtemp(char* tmpl, size_t size);
+}
 int cvk_exec(const std::string& cmd, std::string* output = nullptr);
 
 #define CVK_VK_CHECK_INTERNAL(logfn, res, msg)                                 \
diff --git a/tests/api/CMakeLists.txt b/tests/api/CMakeLists.txt
index d99e50cf..abc84e39 100644
--- a/tests/api/CMakeLists.txt
+++ b/tests/api/CMakeLists.txt
@@ -20,6 +20,7 @@ add_gtest_executable(api_tests
     local_buffer.cpp
     main.cpp
     platform.cpp
+    printf.cpp
     profiling.cpp
     simple.cpp
     simple_image.cpp
diff --git a/tests/api/printf.cpp b/tests/api/printf.cpp
new file mode 100644
index 00000000..f29879b8
--- /dev/null
+++ b/tests/api/printf.cpp
@@ -0,0 +1,141 @@
+// Copyright 2023 The clvk authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifdef CLVK_UNIT_TESTING_ENABLED
+
+#include "testcl.hpp"
+#include "utils.hpp"
+#include "unit.hpp"
+
+#include <filesystem>
+
+static std::string stdoutFileName;
+
+#define BUFFER_SIZE 1024
+static char stdoutBuffer[BUFFER_SIZE];
+
+static void releaseStdout(int fd) {
+    fflush(stdout);
+    dup2(fd, fileno(stdout));
+    close(fd);
+}
+
+static bool getStdout(int& fd) {
+    fd = dup(fileno(stdout));
+    if (!freopen(stdoutFileName.c_str(), "w", stdout)) {
+        fprintf(stderr, "ERROR!\n");
+        releaseStdout(fd);
+        return false;
+    }
+    return true;
+}
+
+static char* getStdoutContent() {
+    FILE* f;
+    memset(stdoutBuffer, 0, BUFFER_SIZE);
+    fflush(stdout);
+    f = fopen(stdoutFileName.c_str(), "r");
+    if (f == nullptr)
+        return nullptr;
+
+    char* ptr = stdoutBuffer;
+    do {
+        ptr += strlen(ptr);
+        ptr = fgets(ptr, BUFFER_SIZE, f);
+    } while (ptr != nullptr);
+    fclose(f);
+
+    return stdoutBuffer;
+}
+
+struct temp_folder_deletion {
+    ~temp_folder_deletion() {
+        if (!m_path.empty())
+            std::filesystem::remove_all(m_path.c_str());
+    }
+    void set_path(std::string path) { m_path = path; }
+
+private:
+    std::string m_path;
+};
+
+static std::string getStdoutFileName(temp_folder_deletion& temp) {
+    char template_tmp_dir[] = "clvk-XXXXXX";
+    std::filesystem::path prefix(
+        cvk_mkdtemp(template_tmp_dir, sizeof(template_tmp_dir)));
+    std::filesystem::path suffix("stdout_buffer");
+    temp.set_path(prefix.string());
+    return (prefix / suffix).string();
+}
+
+TEST_F(WithCommandQueue, SimplePrintf) {
+    temp_folder_deletion temp;
+    stdoutFileName = getStdoutFileName(temp);
+
+    int fd;
+    ASSERT_TRUE(getStdout(fd));
+
+    const char message[] = "Hello World!";
+    char* source = nullptr;
+    asprintf(&source, "kernel void test_printf() { printf(\"%s\");}", message);
+    ASSERT_NE(source, nullptr);
+    auto kernel = CreateKernel(source, "test_printf");
+    free(source);
+
+    size_t gws = 1;
+    size_t lws = 1;
+    EnqueueNDRangeKernel(kernel, 1, nullptr, &gws, &lws, 0, nullptr, nullptr);
+    Finish();
+
+    releaseStdout(fd);
+    auto printf_buffer = getStdoutContent();
+    ASSERT_NE(printf_buffer, nullptr);
+
+    ASSERT_STREQ(printf_buffer, message);
+}
+
+TEST_F(WithCommandQueue, TooLongPrintf) {
+    clvk_override_printf_buffer_size(24);
+
+    temp_folder_deletion temp;
+    stdoutFileName = getStdoutFileName(temp);
+
+    int fd;
+    ASSERT_TRUE(getStdout(fd));
+
+    const char* source = R"(
+    kernel void test_printf() {
+      for (unsigned i = 0; i < 3; i++){
+        printf("get_global_id(%u) = %u\n", i, get_global_id(i));
+      }
+    }
+    )";
+    auto kernel = CreateKernel(source, "test_printf");
+
+    size_t gws = 1;
+    size_t lws = 1;
+    EnqueueNDRangeKernel(kernel, 1, nullptr, &gws, &lws, 0, nullptr, nullptr);
+    Finish();
+
+    releaseStdout(fd);
+    auto printf_buffer = getStdoutContent();
+    ASSERT_NE(printf_buffer, nullptr);
+
+    // We only get the first 2 prints because the buffer is too small to get the
+    // last one.
+    const char* message = "get_global_id(0) = 0\nget_global_id(1) = 0\n";
+    ASSERT_STREQ(printf_buffer, message);
+}
+
+#endif

From 3c5e03d5dc8808faa722a2a8e223e9dc5fccd003 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin@chromium.org>
Date: Mon, 10 Jul 2023 14:29:03 +0200
Subject: [PATCH 10/13] fix formatting

---
 src/printf.cpp       | 2 +-
 src/program.cpp      | 3 +--
 src/queue.hpp        | 3 ++-
 tests/api/printf.cpp | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/printf.cpp b/src/printf.cpp
index 2a7bf799..460012ac 100644
--- a/src/printf.cpp
+++ b/src/printf.cpp
@@ -230,7 +230,7 @@ void process_printf(char*& data, const printf_descriptor_map_t& descs) {
 }
 
 cl_int cvk_printf(cvk_mem* printf_buffer,
-                const printf_descriptor_map_t& descriptors) {
+                  const printf_descriptor_map_t& descriptors) {
     CVK_ASSERT(printf_buffer);
     if (!printf_buffer->map()) {
         cvk_error("Could not map printf buffer");
diff --git a/src/program.cpp b/src/program.cpp
index f025b6bf..d806ff89 100644
--- a/src/program.cpp
+++ b/src/program.cpp
@@ -1833,8 +1833,7 @@ bool cvk_entry_point::
 bool cvk_entry_point::build_descriptor_sets_layout_bindings_for_printf_buffer(
     binding_stat_map& smap) {
     std::vector<VkDescriptorSetLayoutBinding> layoutBindings;
-    if (m_program->printf_buffer_info().size > 0 &&
-        (uses_printf())) {
+    if (m_program->printf_buffer_info().size > 0 && (uses_printf())) {
         auto info = m_program->printf_buffer_info();
         if (info.type != module_buffer_type::storage_buffer) {
             return true;
diff --git a/src/queue.hpp b/src/queue.hpp
index f4bbfc7d..4acd0e5e 100644
--- a/src/queue.hpp
+++ b/src/queue.hpp
@@ -783,7 +783,8 @@ struct cvk_command_kernel final : public cvk_command_batchable {
 private:
     CHECK_RETURN cl_int
     build_and_dispatch_regions(cvk_command_buffer& command_buffer);
-    CHECK_RETURN cl_int update_global_push_constants(cvk_command_buffer& command_buffer);
+    CHECK_RETURN cl_int
+    update_global_push_constants(cvk_command_buffer& command_buffer);
     CHECK_RETURN cl_int dispatch_uniform_region_within_vklimits(
         const cvk_ndrange& region, cvk_command_buffer& command_buffer);
     CHECK_RETURN cl_int dispatch_uniform_region_iterate(
diff --git a/tests/api/printf.cpp b/tests/api/printf.cpp
index f29879b8..b4ecf22b 100644
--- a/tests/api/printf.cpp
+++ b/tests/api/printf.cpp
@@ -15,8 +15,8 @@
 #ifdef CLVK_UNIT_TESTING_ENABLED
 
 #include "testcl.hpp"
-#include "utils.hpp"
 #include "unit.hpp"
+#include "utils.hpp"
 
 #include <filesystem>
 

From 09632057acadb7f9add3435b52fc286ddceb6b1c Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin@chromium.org>
Date: Mon, 10 Jul 2023 14:48:49 +0200
Subject: [PATCH 11/13] fix parse_user_spec_constants

---
 src/program.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/program.cpp b/src/program.cpp
index d806ff89..e341e528 100644
--- a/src/program.cpp
+++ b/src/program.cpp
@@ -967,7 +967,7 @@ cl_int cvk_program::parse_user_spec_constants() {
     std::filesystem::path tmp_suffix("clvk-XXXXXX");
     std::string tmp_template = (tmp_prefix / tmp_suffix).string();
     const char* tmp =
-        cvk_mkdtemp((char*)tmp_template.c_str, tmp_template.size());
+        cvk_mkdtemp((char*)tmp_template.c_str(), tmp_template.size());
     if (tmp == nullptr) {
         cvk_error_fn("Could not create temporary folder \"%s\"",
                      tmp_template.c_str());

From d62ad0fb764efeb45bfd7033713c815a1c6d07ca Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin@chromium.org>
Date: Mon, 10 Jul 2023 15:06:26 +0200
Subject: [PATCH 12/13] remove use of asprintf

---
 tests/api/printf.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/api/printf.cpp b/tests/api/printf.cpp
index b4ecf22b..d445c2d4 100644
--- a/tests/api/printf.cpp
+++ b/tests/api/printf.cpp
@@ -87,11 +87,9 @@ TEST_F(WithCommandQueue, SimplePrintf) {
     ASSERT_TRUE(getStdout(fd));
 
     const char message[] = "Hello World!";
-    char* source = nullptr;
-    asprintf(&source, "kernel void test_printf() { printf(\"%s\");}", message);
-    ASSERT_NE(source, nullptr);
+    char source[512];
+    sprintf(source, "kernel void test_printf() { printf(\"%s\");}", message);
     auto kernel = CreateKernel(source, "test_printf");
-    free(source);
 
     size_t gws = 1;
     size_t lws = 1;

From 6fafdf1fc67956e9b6d53e2d47c04a7b8a6c2bc1 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <rjodin@chromium.org>
Date: Tue, 11 Jul 2023 08:04:55 +0200
Subject: [PATCH 13/13] copy cvk_mkdtemp in test to avoid having to export the
 symbol

---
 src/exports.map      |  1 -
 src/program.cpp      |  6 ++----
 src/unit.hpp         |  2 --
 src/utils.cpp        | 13 +++++--------
 src/utils.hpp        |  4 +---
 tests/api/printf.cpp | 27 ++++++++++++++++++++++++++-
 6 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/src/exports.map b/src/exports.map
index 9bb5cdfd..fc5c2f47 100644
--- a/src/exports.map
+++ b/src/exports.map
@@ -3,7 +3,6 @@ global:
     clvk_override_device_max_compute_work_group_count;
     clvk_restore_device_properties;
     clvk_override_printf_buffer_size;
-    cvk_mkdtemp;
 local:
     *;
 };
diff --git a/src/program.cpp b/src/program.cpp
index e341e528..268075d2 100644
--- a/src/program.cpp
+++ b/src/program.cpp
@@ -966,8 +966,7 @@ cl_int cvk_program::parse_user_spec_constants() {
     std::filesystem::path tmp_prefix(config.compiler_temp_dir());
     std::filesystem::path tmp_suffix("clvk-XXXXXX");
     std::string tmp_template = (tmp_prefix / tmp_suffix).string();
-    const char* tmp =
-        cvk_mkdtemp((char*)tmp_template.c_str(), tmp_template.size());
+    const char* tmp = cvk_mkdtemp(tmp_template);
     if (tmp == nullptr) {
         cvk_error_fn("Could not create temporary folder \"%s\"",
                      tmp_template.c_str());
@@ -1362,8 +1361,7 @@ cl_build_status cvk_program::do_build_inner(const cvk_device* device) {
         std::filesystem::path tmp_prefix(config.compiler_temp_dir());
         std::filesystem::path tmp_suffix("clvk-XXXXXX");
         std::string tmp_template = (tmp_prefix / tmp_suffix).string();
-        const char* tmp =
-            cvk_mkdtemp((char*)tmp_template.c_str(), tmp_template.size());
+        const char* tmp = cvk_mkdtemp(tmp_template);
         if (tmp == nullptr) {
             cvk_error_fn("Could not create temporary folder \"%s\"",
                          tmp_template.c_str());
diff --git a/src/unit.hpp b/src/unit.hpp
index 4ee381ea..b5e68685 100644
--- a/src/unit.hpp
+++ b/src/unit.hpp
@@ -26,8 +26,6 @@ void CL_API_CALL clvk_override_device_max_compute_work_group_count(
 void CL_API_CALL clvk_restore_device_properties(cl_device_id device);
 
 void CL_API_CALL clvk_override_printf_buffer_size(uint32_t size);
-
-char* CL_API_CALL clvk_mkdtemp(char* tmpl, size_t size);
 }
 
 #endif
diff --git a/src/utils.cpp b/src/utils.cpp
index 5afc2978..bc99b472 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -26,24 +26,21 @@
 #include <io.h>
 #endif
 
-extern "C" {
-char* CL_API_CALL cvk_mkdtemp(char* tmpl, size_t size) {
+char* cvk_mkdtemp(std::string& tmpl) {
 #ifdef WIN32
-    if (_mktemp_s(tmpl, size + 1) != 0) {
+    if (_mktemp_s(&tmpl.front(), tmpl.size() + 1) != 0) {
         return nullptr;
     }
 
-    if (!CreateDirectory(tmpl, nullptr)) {
+    if (!CreateDirectory(tmpl.c_str(), nullptr)) {
         return nullptr;
     }
 
-    return tmpl;
+    return &tmpl.front();
 #else
-    UNUSED(size);
-    return mkdtemp(tmpl);
+    return mkdtemp(&tmpl.front());
 #endif
 }
-}
 
 int cvk_exec(const std::string& cmd, std::string* output) {
 #ifdef WIN32
diff --git a/src/utils.hpp b/src/utils.hpp
index 750d88fe..52d73b6f 100644
--- a/src/utils.hpp
+++ b/src/utils.hpp
@@ -26,9 +26,7 @@
 #define CHECK_RETURN
 #endif
 
-extern "C" {
-char* CL_API_CALL cvk_mkdtemp(char* tmpl, size_t size);
-}
+char* cvk_mkdtemp(std::string& tmpl);
 int cvk_exec(const std::string& cmd, std::string* output = nullptr);
 
 #define CVK_VK_CHECK_INTERNAL(logfn, res, msg)                                 \
diff --git a/tests/api/printf.cpp b/tests/api/printf.cpp
index d445c2d4..db055178 100644
--- a/tests/api/printf.cpp
+++ b/tests/api/printf.cpp
@@ -20,6 +20,15 @@
 
 #include <filesystem>
 
+#ifdef __APPLE__
+#include <unistd.h>
+#endif
+
+#ifdef WIN32
+#include <Windows.h>
+#include <io.h>
+#endif
+
 static std::string stdoutFileName;
 
 #define BUFFER_SIZE 1024
@@ -70,10 +79,26 @@ struct temp_folder_deletion {
     std::string m_path;
 };
 
+static char* mkdtemp(char* tmpl, size_t size) {
+#ifdef WIN32
+    if (_mktemp_s(tmpl, size + 1) != 0) {
+        return nullptr;
+    }
+
+    if (!CreateDirectory(tmpl, nullptr)) {
+        return nullptr;
+    }
+
+    return tmpl;
+#else
+    return mkdtemp(tmpl);
+#endif
+}
+
 static std::string getStdoutFileName(temp_folder_deletion& temp) {
     char template_tmp_dir[] = "clvk-XXXXXX";
     std::filesystem::path prefix(
-        cvk_mkdtemp(template_tmp_dir, sizeof(template_tmp_dir)));
+        mkdtemp(template_tmp_dir, sizeof(template_tmp_dir)));
     std::filesystem::path suffix("stdout_buffer");
     temp.set_path(prefix.string());
     return (prefix / suffix).string();