Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle concurrent printf #725

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions src/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ cl_int cvk_command_kernel::update_global_push_constants(
CVK_ASSERT(pc->size == 8);
CVK_ASSERT(program->uses_printf());

auto buffer = m_queue->get_printf_buffer();
auto buffer = get_printf_buffer();
if (buffer == nullptr) {
cvk_error_fn("printf buffer was not created");
return CL_OUT_OF_RESOURCES;
Expand Down Expand Up @@ -1043,11 +1043,7 @@ cvk_command_kernel::build_batchable_inner(cvk_command_buffer& command_buffer) {
// Setup printf buffer descriptor if needed
if (m_kernel->program()->uses_printf()) {
// Create and initialize the printf buffer
auto buffer = m_queue->get_or_create_printf_buffer();
auto err = m_queue->reset_printf_buffer();
if (err != CL_SUCCESS) {
return err;
}
auto buffer = create_printf_buffer();

if (m_kernel->program()->printf_buffer_info().type ==
module_buffer_type::storage_buffer) {
Expand Down Expand Up @@ -1123,7 +1119,7 @@ cvk_command_kernel::build_batchable_inner(cvk_command_buffer& command_buffer) {

cl_int cvk_command_kernel::do_post_action() {
if (m_kernel->uses_printf()) {
auto buffer = m_queue->get_printf_buffer();
auto buffer = get_printf_buffer();
if (buffer == nullptr) {
cvk_error_fn("printf buffer was not created");
return CL_OUT_OF_RESOURCES;
Expand Down
35 changes: 31 additions & 4 deletions src/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ struct cvk_command_queue : public _cl_command_queue,
return m_command_pool.free_command_buffer(cmdbuf);
}


cvk_buffer* get_or_create_printf_buffer() {
CVK_ASSERT(m_context != nullptr);
if (!m_printf_buffer) {
Expand Down Expand Up @@ -275,8 +276,6 @@ struct cvk_command_queue : public _cl_command_queue,
TRACE_CNT_VAR(batch_in_flight_counter);
TRACE_CNT_VAR(group_in_flight_counter);

std::unique_ptr<cvk_buffer> m_printf_buffer;

std::vector<std::unique_ptr<cvk_queue_controller>> m_controllers;

friend struct cvk_queue_controller;
Expand Down Expand Up @@ -776,8 +775,9 @@ struct cvk_command_kernel final : public cvk_command_batchable {

cvk_command_kernel(cvk_command_queue* q, cvk_kernel* kernel, uint32_t dims,
const cvk_ndrange& ndrange)
: cvk_command_batchable(CL_COMMAND_NDRANGE_KERNEL, q), m_kernel(kernel),
m_dimensions(dims), m_ndrange(ndrange), m_pipeline(VK_NULL_HANDLE),
: cvk_command_batchable(CL_COMMAND_NDRANGE_KERNEL, q),
m_context(q->context()), m_kernel(kernel), m_dimensions(dims),
m_ndrange(ndrange), m_pipeline(VK_NULL_HANDLE),
m_argument_values(nullptr) {}

~cvk_command_kernel() {
Expand Down Expand Up @@ -805,6 +805,31 @@ struct cvk_command_kernel final : public cvk_command_batchable {
return argvals->memory_objects();
}

cvk_buffer* create_printf_buffer() {
if (!m_printf_buffer) {
cl_int status;
m_printf_buffer = cvk_buffer::create(
m_context, 0, m_context->get_printf_buffersize(), nullptr,
&status);
CVK_ASSERT(status == CL_SUCCESS);
}

if (m_printf_buffer && m_printf_buffer->map_write_only()) {
memset(m_printf_buffer->host_va(), 0, 4);
m_printf_buffer->unmap_to_write(0, 4);
} else {
cvk_error_fn("Could not reset printf buffer");
}
return m_printf_buffer.get();
}

cvk_buffer* get_printf_buffer() {
if (!m_printf_buffer) {
return nullptr;
}
return m_printf_buffer.get();
}

private:
CHECK_RETURN cl_int
build_and_dispatch_regions(cvk_command_buffer& command_buffer);
Expand All @@ -819,10 +844,12 @@ struct cvk_command_kernel final : public cvk_command_batchable {
CHECK_RETURN cl_int dispatch_uniform_region(
const cvk_ndrange& region, cvk_command_buffer& command_buffer);

cvk_context* m_context;
cvk_kernel_holder m_kernel;
uint32_t m_dimensions;
cvk_ndrange m_ndrange;
VkPipeline m_pipeline;
std::unique_ptr<cvk_buffer> m_printf_buffer;
std::shared_ptr<cvk_kernel_argument_values> m_argument_values;
};

Expand Down
15 changes: 15 additions & 0 deletions tests/simple-concurrent/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright 2024 The clvk authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

add_simple_static_and_dyn_executable(simple_test_concurrent_print simple.cpp)
128 changes: 128 additions & 0 deletions tests/simple-concurrent/simple.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// Copyright 2024 The clvk authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <assert.h>
#include <cstdio>
#include <cstdlib>
#include <unistd.h>

#define CL_TARGET_OPENCL_VERSION 120
#include "CL/cl.h"

#define CHECK_CL_ERRCODE(err) \
do { \
if (err != CL_SUCCESS) { \
fprintf(stderr, "%s:%d error after CL call: %d\n", __FILE__, \
__LINE__, err); \
return EXIT_FAILURE; \
} \
} while (0)

const char* program_source = R"(
kernel void test_simple(uint timeout)
{
printf("Hello World! %u\n", timeout);
while (timeout--);
}
)";

int main(int argc, char** argv) {
cl_platform_id platform;
cl_device_id device;
cl_int err;

assert(argc == 2);
uint32_t sleep = atoi(argv[1]);

// Get the first GPU device of the first platform
err = clGetPlatformIDs(1, &platform, nullptr);
CHECK_CL_ERRCODE(err);

char platform_name[128];
err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platform_name),
platform_name, nullptr);
CHECK_CL_ERRCODE(err);

printf("Platform: %s\n", platform_name);

err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &device, nullptr);
CHECK_CL_ERRCODE(err);

char device_name[128];
err = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name),
device_name, nullptr);
CHECK_CL_ERRCODE(err);

printf("Device: %s\n", device_name);

auto context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
CHECK_CL_ERRCODE(err);

// Create program
auto program =
clCreateProgramWithSource(context, 1, &program_source, nullptr, &err);
CHECK_CL_ERRCODE(err);

// Build program
err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
CHECK_CL_ERRCODE(err);

// Create kernel
auto kernel = clCreateKernel(program, "test_simple", &err);
CHECK_CL_ERRCODE(err);
auto kernel2 = clCreateKernel(program, "test_simple", &err);
CHECK_CL_ERRCODE(err);

// Create command queue
auto queue = clCreateCommandQueue(context, device, 0, &err);
CHECK_CL_ERRCODE(err);

// Set kernel arguments
cl_uint timeout = 10000;
err = clSetKernelArg(kernel, 0, sizeof(cl_uint), &timeout);
CHECK_CL_ERRCODE(err);

size_t gws = 1;
size_t lws = 1;

err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &gws, &lws, 0,
nullptr, nullptr);
CHECK_CL_ERRCODE(err);

timeout = 0;
err = clSetKernelArg(kernel2, 0, sizeof(cl_uint), &timeout);
CHECK_CL_ERRCODE(err);

err = clFlush(queue);
CHECK_CL_ERRCODE(err);

usleep(sleep);

err = clEnqueueNDRangeKernel(queue, kernel2, 1, nullptr, &gws, &lws, 0,
nullptr, nullptr);
CHECK_CL_ERRCODE(err);

// Complete execution
err = clFinish(queue);
CHECK_CL_ERRCODE(err);

// Cleanup
clReleaseCommandQueue(queue);
clReleaseKernel(kernel);
clReleaseKernel(kernel2);
clReleaseProgram(program);
clReleaseContext(context);

return 0;
}
Loading