diff --git a/.gitignore b/.gitignore index 50fc6363..e08b24ef 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,9 @@ third_party/robin-hood-hashing # C-Lion .idea/ cmake-build-*/ + +### Clangd cached index files +/.cache + +### The 'compile_commands' file can be generated at root +compile_commands.json diff --git a/CMakeLists.txt b/CMakeLists.txt index fcea8bf8..10a90e14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,7 @@ endif() project(amber) enable_testing() +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/docs/amber_script.md b/docs/amber_script.md index 3d7bad9d..7b6d8dad 100644 --- a/docs/amber_script.md +++ b/docs/amber_script.md @@ -989,18 +989,22 @@ value for `START_IDX` is 0. The default value for `COUNT` is the item count of vertex buffer minus the `START_IDX`. The same applies to `START_INSTANCE` (default 0) and `INSTANCE_COUNT` (default 1). +The `TIMED_EXECUTION` is an optional flag that can be passed to the run command. +This will cause Amber to insert device specific counters to time the execution +of this pipeline command. + ```groovy # Run the given |pipeline_name| which must be a `compute` pipeline. The # pipeline will be run with the given number of workgroups in the |x|, |y|, |z| # dimensions. Each of the x, y and z values must be a uint32. -RUN {pipeline_name} _x_ _y_ _z_ +RUN [TIMED_EXECUTION] {pipeline_name} _x_ _y_ _z_ ``` ```groovy # Run the given |pipeline_name| which must be a `graphics` pipeline. The # rectangle at |x|, |y|, |width|x|height| will be rendered. Ignores VERTEX_DATA # and INDEX_DATA on the given pipeline. -RUN {pipeline_name} \ +RUN [TIMED_EXECUTION] {pipeline_name} \  DRAW_RECT POS _x_in_pixels_ _y_in_pixels_ \  SIZE _width_in_pixels_ _height_in_pixels_ ``` @@ -1010,7 +1014,7 @@ RUN {pipeline_name} \ # grid at |x|, |y|, |width|x|height|, |columns|x|rows| will be rendered. # Ignores VERTEX_DATA and INDEX_DATA on the given pipeline. # For columns, rows of (5, 4) a total of 5*4=20 rectangles will be drawn. -RUN {pipeline_name} \ +RUN [TIMED_EXECUTION] {pipeline_name} \  DRAW_GRID POS _x_in_pixels_ _y_in_pixels_ \  SIZE _width_in_pixels_ _height_in_pixels_ \ CELLS _columns_of_cells_ _rows_of_cells_ @@ -1024,7 +1028,7 @@ RUN {pipeline_name} \ # will be processed. The draw is instanced if |inst_count_value| is greater # than one. In case of instanced draw |inst_value| controls the starting # instance ID. -RUN {pipeline_name} DRAW_ARRAY AS {topology} \ +RUN [TIMED_EXECUTION] {pipeline_name} DRAW_ARRAY AS {topology} \ [ START_IDX _value_ (default 0) ] \ [ COUNT _count_value_ (default vertex_buffer size - start_idx) ] \ [ START_INSTANCE _inst_value_ (default 0) ] \ @@ -1040,7 +1044,7 @@ RUN {pipeline_name} DRAW_ARRAY AS {topology} \ # will be processed. The draw is instanced if |inst_count_value| is greater # than one. In case of instanced draw |inst_value| controls the starting # instance ID. -RUN {pipeline_name} DRAW_ARRAY AS {topology} INDEXED \ +RUN [TIMED_EXECUTION] {pipeline_name} DRAW_ARRAY AS {topology} INDEXED \ [ START_IDX _value_ (default 0) ] \ [ COUNT _count_value_ (default index_buffer size - start_idx) ] \ [ START_INSTANCE _inst_value_ (default 0) ] \ @@ -1058,7 +1062,7 @@ RUN {pipeline_name} DRAW_ARRAY AS {topology} INDEXED \ # # The pipeline will be run with the given ray tracing dimensions |x|, |y|, |z|. # Each of the x, y and z values must be a uint32. -RUN {pipeline_name} \ +RUN [TIMED_EXECUTION] {pipeline_name} \ RAYGEN {ray_gen_sbt_name} \ [MISS {miss_sbt_name}] \ [HIT {hit_sbt_name}] \ diff --git a/include/amber/amber.h b/include/amber/amber.h index 0c679a31..57e29640 100644 --- a/include/amber/amber.h +++ b/include/amber/amber.h @@ -101,6 +101,9 @@ class Delegate { virtual amber::Result LoadBufferData(const std::string file_name, BufferDataFileType file_type, amber::BufferInfo* buffer) const = 0; + + /// Mechanism for gathering timing from 'TIME_EXECUTION' + virtual void ReportExecutionTiming(double time_in_ms) = 0; }; /// Stores configuration options for Amber. diff --git a/samples/amber.cc b/samples/amber.cc index 1fa177e6..7a8fe793 100644 --- a/samples/amber.cc +++ b/samples/amber.cc @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,7 @@ struct Options { bool log_graphics_calls = false; bool log_graphics_calls_time = false; bool log_execute_calls = false; + bool log_execution_timing = false; bool disable_spirv_validation = false; bool enable_pipeline_runtime_layer = false; std::string shader_filename; @@ -103,6 +105,7 @@ const char kUsage[] = R"(Usage: amber [options] SCRIPT [SCRIPTS...] --log-graphics-calls -- Log graphics API calls (only for Vulkan so far). --log-graphics-calls-time -- Log timing of graphics API calls timing (Vulkan only). --log-execute-calls -- Log each execute call before run. + --log-execution-timing -- Log timing results from each command with the 'TIMED_EXECUTION' flag. --disable-spirv-val -- Disable SPIR-V validation. --enable-runtime-layer -- Enable pipeline runtime layer. -h -- This help text. @@ -278,6 +281,8 @@ bool ParseArgs(const std::vector& args, Options* opts) { opts->log_graphics_calls = true; } else if (arg == "--log-graphics-calls-time") { opts->log_graphics_calls_time = true; + } else if (arg == "--log-execution-timing") { + opts->log_execution_timing = true; } else if (arg == "--log-execute-calls") { opts->log_execute_calls = true; } else if (arg == "--disable-spirv-val") { @@ -361,6 +366,16 @@ class SampleDelegate : public amber::Delegate { } } + void ReportExecutionTiming(double time_in_ms) override { + reported_execution_timing.push_back(time_in_ms); + } + + std::vector GetAndClearExecutionTiming() { + auto returning = reported_execution_timing; + reported_execution_timing.clear(); + return returning; + } + uint64_t GetTimestampNs() const override { return timestamp::SampleGetTimestampNs(); } @@ -400,6 +415,7 @@ class SampleDelegate : public amber::Delegate { bool log_graphics_calls_time_ = false; bool log_execute_calls_ = false; std::string path_ = ""; + std::vector reported_execution_timing; }; std::string disassemble(const std::string& env, @@ -519,7 +535,7 @@ int main(int argc, const char** argv) { recipe->SetFenceTimeout(static_cast(options.fence_timeout)); recipe->SetPipelineRuntimeLayerEnabled( - options.enable_pipeline_runtime_layer); + options.enable_pipeline_runtime_layer); recipe_data.emplace_back(); recipe_data.back().file = file; @@ -621,12 +637,34 @@ int main(int argc, const char** argv) { amber::Amber am(&delegate); result = am.Execute(recipe, &amber_options); if (!result.IsSuccess()) { - std::cerr << file << ": " << result.Error() << std::endl; + std::cerr << file << ": " << result.Error() << "\n"; failures.push_back(file); // Note, we continue after failure to allow dumping the buffers which may // give clues as to the failure. } + auto execution_timing = delegate.GetAndClearExecutionTiming(); + if (result.IsSuccess() && options.log_execution_timing && + !execution_timing.empty()) { + std::cout << "Execution timing (in script-order):" << "\n"; + std::cout << " "; + bool is_first_iter = true; + for (auto& timing : execution_timing) { + if (!is_first_iter) { + std::cout << ", "; + } + is_first_iter = false; + std::cout << timing; + } + std::cout << "\n"; + std::sort(execution_timing.begin(), execution_timing.end()); + auto report_median = + (execution_timing[execution_timing.size() / 2] + + execution_timing[(execution_timing.size() - 1) / 2]) / + 2; + std::cout << "Execution time median = " << report_median << " ms" << "\n"; + } + // Dump the shader assembly if (!options.shader_filename.empty()) { #if AMBER_ENABLE_SPIRV_TOOLS diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 08198407..6fd64462 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -153,6 +153,7 @@ if (${AMBER_ENABLE_TESTS}) amberscript/parser_raytracing_test.cc amberscript/parser_repeat_test.cc amberscript/parser_run_test.cc + amberscript/parser_run_timed_execution_test.cc amberscript/parser_sampler_test.cc amberscript/parser_set_test.cc amberscript/parser_shader_opt_test.cc diff --git a/src/amber.cc b/src/amber.cc index ef5658f0..20ebcf81 100644 --- a/src/amber.cc +++ b/src/amber.cc @@ -132,11 +132,10 @@ Result CreateEngineAndCheckRequirements(const Recipe* recipe, // Engine initialization checks requirements. Current backends don't do // much else. Refactor this if they end up doing to much here. - Result r = - engine->Initialize(opts->config, delegate, script->GetRequiredFeatures(), - script->GetRequiredProperties(), - script->GetRequiredInstanceExtensions(), - script->GetRequiredDeviceExtensions()); + Result r = engine->Initialize( + opts->config, delegate, script->GetRequiredFeatures(), + script->GetRequiredProperties(), script->GetRequiredInstanceExtensions(), + script->GetRequiredDeviceExtensions()); if (!r.IsSuccess()) return r; diff --git a/src/amberscript/parser.cc b/src/amberscript/parser.cc index 1e4f2cdc..863cf241 100644 --- a/src/amberscript/parser.cc +++ b/src/amberscript/parser.cc @@ -2706,6 +2706,14 @@ Result Parser::ParseBufferInitializerFile(Buffer* buffer) { Result Parser::ParseRun() { auto token = tokenizer_->NextToken(); + + // Timed execution option for this specific run. + bool is_timed_execution = false; + if (token->AsString() == "TIMED_EXECUTION") { + token = tokenizer_->NextToken(); + is_timed_execution = true; + } + if (!token->IsIdentifier()) return Result("missing pipeline name for RUN command"); @@ -2718,6 +2726,9 @@ Result Parser::ParseRun() { if (pipeline->IsRayTracing()) { auto cmd = MakeUnique(pipeline); cmd->SetLine(line); + if (is_timed_execution) { + cmd->SetTimedExecution(); + } while (true) { if (tokenizer_->PeekNextToken()->IsInteger()) @@ -2791,6 +2802,9 @@ Result Parser::ParseRun() { auto cmd = MakeUnique(pipeline); cmd->SetLine(line); cmd->SetX(token->AsUint32()); + if (is_timed_execution) { + cmd->SetTimedExecution(); + } token = tokenizer_->NextToken(); if (!token->IsInteger()) { @@ -2840,6 +2854,9 @@ Result Parser::ParseRun() { MakeUnique(pipeline, *pipeline->GetPipelineData()); cmd->SetLine(line); cmd->EnableOrtho(); + if (is_timed_execution) { + cmd->SetTimedExecution(); + } Result r = token->ConvertToDouble(); if (!r.IsSuccess()) @@ -2909,6 +2926,9 @@ Result Parser::ParseRun() { auto cmd = MakeUnique(pipeline, *pipeline->GetPipelineData()); cmd->SetLine(line); + if (is_timed_execution) { + cmd->SetTimedExecution(); + } Result r = token->ConvertToDouble(); if (!r.IsSuccess()) @@ -3082,6 +3102,9 @@ Result Parser::ParseRun() { cmd->SetVertexCount(count); cmd->SetInstanceCount(instance_count); cmd->SetFirstInstance(start_instance); + if (is_timed_execution) { + cmd->SetTimedExecution(); + } if (indexed) cmd->EnableIndexed(); diff --git a/src/amberscript/parser_buffer_test.cc b/src/amberscript/parser_buffer_test.cc index aeac2cbd..74859364 100644 --- a/src/amberscript/parser_buffer_test.cc +++ b/src/amberscript/parser_buffer_test.cc @@ -31,10 +31,12 @@ class DummyDelegate : public amber::Delegate { bool LogExecuteCalls() const override { return false; } void SetLogExecuteCalls(bool) {} bool LogGraphicsCallsTime() const override { return false; } + void SetLogGraphicsCallsTime(bool) {} uint64_t GetTimestampNs() const override { return 0; } void SetScriptPath(std::string) {} + void ReportExecutionTiming(double) override {} amber::Result LoadBufferData(const std::string, amber::BufferDataFileType type, amber::BufferInfo* buffer) const override { diff --git a/src/amberscript/parser_run_test.cc b/src/amberscript/parser_run_test.cc index 0f6b23f0..f823527c 100644 --- a/src/amberscript/parser_run_test.cc +++ b/src/amberscript/parser_run_test.cc @@ -48,6 +48,7 @@ RUN my_pipeline 2 4 5 EXPECT_EQ(2U, cmd->AsCompute()->GetX()); EXPECT_EQ(4U, cmd->AsCompute()->GetY()); EXPECT_EQ(5U, cmd->AsCompute()->GetZ()); + EXPECT_FALSE(cmd->AsCompute()->IsTimedExecution()); } TEST_F(AmberScriptParserTest, RunWithoutPipeline) { @@ -218,6 +219,7 @@ RUN my_pipeline DRAW_RECT POS 2 4 SIZE 10 20)"; EXPECT_FLOAT_EQ(4.f, cmd->AsDrawRect()->GetY()); EXPECT_FLOAT_EQ(10.f, cmd->AsDrawRect()->GetWidth()); EXPECT_FLOAT_EQ(20.f, cmd->AsDrawRect()->GetHeight()); + EXPECT_FALSE(cmd->AsDrawRect()->IsTimedExecution()); } TEST_F(AmberScriptParserTest, RunDrawRectWithComputePipelineInvalid) { @@ -519,6 +521,7 @@ RUN my_pipeline DRAW_GRID POS 2 4 SIZE 10 20 CELLS 4 5)"; EXPECT_FLOAT_EQ(20.f, cmd->AsDrawGrid()->GetHeight()); EXPECT_EQ(4u, cmd->AsDrawGrid()->GetColumns()); EXPECT_EQ(5u, cmd->AsDrawGrid()->GetRows()); + EXPECT_FALSE(cmd->AsDrawGrid()->IsTimedExecution()); } TEST_F(AmberScriptParserTest, RunDrawGridWithComputePipelineInvalid) { @@ -887,6 +890,7 @@ RUN my_pipeline DRAW_ARRAY AS TRIANGLE_LIST START_IDX 1 COUNT 2)"; EXPECT_EQ(Topology::kTriangleList, cmd->GetTopology()); EXPECT_EQ(1U, cmd->GetFirstVertexIndex()); EXPECT_EQ(2U, cmd->GetVertexCount()); + EXPECT_FALSE(cmd->IsTimedExecution()); } TEST_F(AmberScriptParserTest, RunDrawArraysInstanced) { @@ -926,6 +930,7 @@ RUN my_pipeline DRAW_ARRAY AS TRIANGLE_LIST START_IDX 1 COUNT 2 START_INSTANCE 2 EXPECT_EQ(Topology::kTriangleList, cmd->GetTopology()); EXPECT_EQ(1U, cmd->GetFirstVertexIndex()); EXPECT_EQ(2U, cmd->GetVertexCount()); + EXPECT_FALSE(cmd->IsTimedExecution()); } TEST_F(AmberScriptParserTest, RunDrawArraysCountOmitted) { @@ -966,6 +971,7 @@ RUN my_pipeline DRAW_ARRAY AS TRIANGLE_LIST START_IDX 1)"; EXPECT_EQ(1U, cmd->GetFirstVertexIndex()); // There are 3 elements in the vertex buffer, but we start at element 1. EXPECT_EQ(2U, cmd->GetVertexCount()); + EXPECT_FALSE(cmd->IsTimedExecution()); } TEST_F(AmberScriptParserTest, RunDrawArraysStartIdxAndCountOmitted) { @@ -1006,6 +1012,7 @@ RUN my_pipeline DRAW_ARRAY AS TRIANGLE_LIST)"; EXPECT_EQ(static_cast(0U), cmd->GetFirstVertexIndex()); // There are 3 elements in the vertex buffer. EXPECT_EQ(3U, cmd->GetVertexCount()); + EXPECT_FALSE(cmd->IsTimedExecution()); } TEST_F(AmberScriptParserTest, RunDrawArraysIndexed) { @@ -1052,6 +1059,7 @@ RUN my_pipeline DRAW_ARRAY AS TRIANGLE_LIST INDEXED)"; EXPECT_EQ(static_cast(0U), cmd->GetFirstVertexIndex()); // There are 3 elements in the vertex buffer. EXPECT_EQ(3U, cmd->GetVertexCount()); + EXPECT_FALSE(cmd->IsTimedExecution()); } TEST_F(AmberScriptParserTest, RunDrawArraysIndexedMissingIndexData) { diff --git a/src/amberscript/parser_run_timed_execution_test.cc b/src/amberscript/parser_run_timed_execution_test.cc new file mode 100644 index 00000000..a794b2c4 --- /dev/null +++ b/src/amberscript/parser_run_timed_execution_test.cc @@ -0,0 +1,279 @@ +// Copyright 2024 The Amber Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or parseried. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gtest/gtest.h" +#include "src/amberscript/parser.h" + +namespace amber { +namespace amberscript { + +using AmberScriptParserTest = testing::Test; + +TEST_F(AmberScriptParserTest, RunComputeTimedExecution) { + std::string in = R"( +SHADER compute my_shader GLSL +void main() { + gl_FragColor = vec3(2, 3, 4); +} +END + +PIPELINE compute my_pipeline + ATTACH my_shader +END + +RUN TIMED_EXECUTION my_pipeline 2 4 5 +)"; + + Parser parser; + Result r = parser.Parse(in); + ASSERT_TRUE(r.IsSuccess()) << r.Error(); + + auto script = parser.GetScript(); + const auto& commands = script->GetCommands(); + ASSERT_EQ(1U, commands.size()); + + auto* cmd = commands[0].get(); + ASSERT_TRUE(cmd->IsCompute()); + EXPECT_EQ(2U, cmd->AsCompute()->GetX()); + EXPECT_EQ(4U, cmd->AsCompute()->GetY()); + EXPECT_EQ(5U, cmd->AsCompute()->GetZ()); + EXPECT_TRUE(cmd->AsCompute()->IsTimedExecution()); +} + +TEST_F(AmberScriptParserTest, RunComputeNoTimedExecution) { + std::string in = R"( +SHADER compute my_shader GLSL +void main() { + gl_FragColor = vec3(2, 3, 4); +} +END + +PIPELINE compute my_pipeline + ATTACH my_shader +END + +RUN my_pipeline 2 4 5 +)"; + + Parser parser; + Result r = parser.Parse(in); + ASSERT_TRUE(r.IsSuccess()) << r.Error(); + + auto script = parser.GetScript(); + const auto& commands = script->GetCommands(); + ASSERT_EQ(1U, commands.size()); + + auto* cmd = commands[0].get(); + ASSERT_TRUE(cmd->IsCompute()); + EXPECT_EQ(2U, cmd->AsCompute()->GetX()); + EXPECT_EQ(4U, cmd->AsCompute()->GetY()); + EXPECT_EQ(5U, cmd->AsCompute()->GetZ()); + EXPECT_FALSE(cmd->AsCompute()->IsTimedExecution()); +} + +TEST_F(AmberScriptParserTest, RunDrawRectTimedExecution) { + std::string in = R"( +SHADER vertex my_shader PASSTHROUGH +SHADER fragment my_fragment GLSL +# GLSL Shader +END + +PIPELINE graphics my_pipeline + ATTACH my_shader + ATTACH my_fragment +END + +RUN TIMED_EXECUTION my_pipeline DRAW_RECT POS 2 4 SIZE 10 20)"; + + Parser parser; + Result r = parser.Parse(in); + ASSERT_TRUE(r.IsSuccess()) << r.Error(); + + auto script = parser.GetScript(); + const auto& commands = script->GetCommands(); + ASSERT_EQ(1U, commands.size()); + + auto* cmd = commands[0].get(); + ASSERT_TRUE(cmd->IsDrawRect()); + EXPECT_TRUE(cmd->AsDrawRect()->IsOrtho()); + EXPECT_FALSE(cmd->AsDrawRect()->IsPatch()); + EXPECT_FLOAT_EQ(2.f, cmd->AsDrawRect()->GetX()); + EXPECT_FLOAT_EQ(4.f, cmd->AsDrawRect()->GetY()); + EXPECT_FLOAT_EQ(10.f, cmd->AsDrawRect()->GetWidth()); + EXPECT_FLOAT_EQ(20.f, cmd->AsDrawRect()->GetHeight()); + EXPECT_TRUE(cmd->AsDrawRect()->IsTimedExecution()); +} + +TEST_F(AmberScriptParserTest, RunDrawGridTimedExecution) { + std::string in = R"( +SHADER vertex my_shader PASSTHROUGH +SHADER fragment my_fragment GLSL +# GLSL Shader +END + +PIPELINE graphics my_pipeline + ATTACH my_shader + ATTACH my_fragment +END + +RUN TIMED_EXECUTION my_pipeline DRAW_GRID POS 2 4 SIZE 10 20 CELLS 4 5)"; + + Parser parser; + Result r = parser.Parse(in); + ASSERT_TRUE(r.IsSuccess()) << r.Error(); + + auto script = parser.GetScript(); + const auto& commands = script->GetCommands(); + ASSERT_EQ(1U, commands.size()); + + auto* cmd = commands[0].get(); + ASSERT_TRUE(cmd->IsDrawGrid()); + EXPECT_FLOAT_EQ(2.f, cmd->AsDrawGrid()->GetX()); + EXPECT_FLOAT_EQ(4.f, cmd->AsDrawGrid()->GetY()); + EXPECT_FLOAT_EQ(10.f, cmd->AsDrawGrid()->GetWidth()); + EXPECT_FLOAT_EQ(20.f, cmd->AsDrawGrid()->GetHeight()); + EXPECT_EQ(4u, cmd->AsDrawGrid()->GetColumns()); + EXPECT_EQ(5u, cmd->AsDrawGrid()->GetRows()); + EXPECT_TRUE(cmd->AsDrawGrid()->IsTimedExecution()); +} + +TEST_F(AmberScriptParserTest, RunDrawArraysTimedExecution) { + std::string in = R"( +SHADER vertex my_shader PASSTHROUGH +SHADER fragment my_fragment GLSL +# GLSL Shader +END +BUFFER vtex_buf DATA_TYPE vec3 DATA +1 2 3 +4 5 6 +7 8 9 +END + +PIPELINE graphics my_pipeline + ATTACH my_shader + ATTACH my_fragment + VERTEX_DATA vtex_buf LOCATION 0 +END + +RUN TIMED_EXECUTION my_pipeline DRAW_ARRAY AS TRIANGLE_LIST START_IDX 1 COUNT 2)"; + + Parser parser; + Result r = parser.Parse(in); + ASSERT_TRUE(r.IsSuccess()) << r.Error(); + + auto script = parser.GetScript(); + const auto& commands = script->GetCommands(); + ASSERT_EQ(1U, commands.size()); + + ASSERT_TRUE(commands[0]->IsDrawArrays()); + + auto* cmd = commands[0]->AsDrawArrays(); + EXPECT_FALSE(cmd->IsIndexed()); + EXPECT_EQ(static_cast(1U), cmd->GetInstanceCount()); + EXPECT_EQ(static_cast(0U), cmd->GetFirstInstance()); + EXPECT_EQ(Topology::kTriangleList, cmd->GetTopology()); + EXPECT_EQ(1U, cmd->GetFirstVertexIndex()); + EXPECT_EQ(2U, cmd->GetVertexCount()); + EXPECT_TRUE(cmd->IsTimedExecution()); +} + +TEST_F(AmberScriptParserTest, RunDrawArraysInstancedTimedExecution) { + std::string in = R"( +SHADER vertex my_shader PASSTHROUGH +SHADER fragment my_fragment GLSL +# GLSL Shader +END +BUFFER vtex_buf DATA_TYPE vec3 DATA +1 2 3 +4 5 6 +7 8 9 +END + +PIPELINE graphics my_pipeline + ATTACH my_shader + ATTACH my_fragment + VERTEX_DATA vtex_buf LOCATION 0 +END + +RUN TIMED_EXECUTION my_pipeline DRAW_ARRAY AS TRIANGLE_LIST START_IDX 1 COUNT 2 START_INSTANCE 2 INSTANCE_COUNT 10)"; + + Parser parser; + Result r = parser.Parse(in); + ASSERT_TRUE(r.IsSuccess()) << r.Error(); + + auto script = parser.GetScript(); + const auto& commands = script->GetCommands(); + ASSERT_EQ(1U, commands.size()); + + ASSERT_TRUE(commands[0]->IsDrawArrays()); + + auto* cmd = commands[0]->AsDrawArrays(); + EXPECT_FALSE(cmd->IsIndexed()); + EXPECT_EQ(static_cast(10U), cmd->GetInstanceCount()); + EXPECT_EQ(static_cast(2U), cmd->GetFirstInstance()); + EXPECT_EQ(Topology::kTriangleList, cmd->GetTopology()); + EXPECT_EQ(1U, cmd->GetFirstVertexIndex()); + EXPECT_EQ(2U, cmd->GetVertexCount()); + EXPECT_TRUE(cmd->IsTimedExecution()); +} + +TEST_F(AmberScriptParserTest, RunDrawArraysIndexedTimedExecution) { + std::string in = R"( +SHADER vertex my_shader PASSTHROUGH +SHADER fragment my_fragment GLSL +# GLSL Shader +END +BUFFER vtex_buf DATA_TYPE vec3 DATA +1 2 3 +4 5 6 +7 8 9 +END +BUFFER idx_buf DATA_TYPE vec3 DATA +9 8 7 +6 5 4 +3 2 1 +END + +PIPELINE graphics my_pipeline + ATTACH my_shader + ATTACH my_fragment + VERTEX_DATA vtex_buf LOCATION 0 + INDEX_DATA idx_buf +END + +RUN TIMED_EXECUTION my_pipeline DRAW_ARRAY AS TRIANGLE_LIST INDEXED)"; + + Parser parser; + Result r = parser.Parse(in); + ASSERT_TRUE(r.IsSuccess()) << r.Error(); + + auto script = parser.GetScript(); + const auto& commands = script->GetCommands(); + ASSERT_EQ(1U, commands.size()); + + ASSERT_TRUE(commands[0]->IsDrawArrays()); + + auto* cmd = commands[0]->AsDrawArrays(); + EXPECT_TRUE(cmd->IsIndexed()); + EXPECT_EQ(static_cast(1U), cmd->GetInstanceCount()); + EXPECT_EQ(static_cast(0U), cmd->GetFirstInstance()); + EXPECT_EQ(Topology::kTriangleList, cmd->GetTopology()); + EXPECT_EQ(static_cast(0U), cmd->GetFirstVertexIndex()); + // There are 3 elements in the vertex buffer. + EXPECT_EQ(3U, cmd->GetVertexCount()); + EXPECT_TRUE(cmd->IsTimedExecution()); +} + +} // namespace amberscript +} // namespace amber diff --git a/src/command.h b/src/command.h index c8012216..485fc604 100644 --- a/src/command.h +++ b/src/command.h @@ -142,10 +142,14 @@ class PipelineCommand : public Command { Pipeline* GetPipeline() const { return pipeline_; } + void SetTimedExecution() { timed_execution_ = true; } + bool IsTimedExecution() const { return timed_execution_; } + protected: PipelineCommand(Type type, Pipeline* pipeline); Pipeline* pipeline_ = nullptr; + bool timed_execution_ = false; }; /// Command to draw a rectangle on screen. diff --git a/src/vulkan/compute_pipeline.cc b/src/vulkan/compute_pipeline.cc index dd7a990e..23fd127e 100644 --- a/src/vulkan/compute_pipeline.cc +++ b/src/vulkan/compute_pipeline.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "src/vulkan/compute_pipeline.h" +#include #include "src/vulkan/command_pool.h" #include "src/vulkan/device.h" @@ -66,7 +67,10 @@ Result ComputePipeline::CreateVkComputePipeline( return {}; } -Result ComputePipeline::Compute(uint32_t x, uint32_t y, uint32_t z) { +Result ComputePipeline::Compute(uint32_t x, + uint32_t y, + uint32_t z, + bool is_timed_execution) { Result r = SendDescriptorDataToDeviceIfNeeded(); if (!r.IsSuccess()) return r; @@ -85,7 +89,7 @@ Result ComputePipeline::Compute(uint32_t x, uint32_t y, uint32_t z) { // it must be submitted separately, because using a descriptor set // while updating it is not safe. UpdateDescriptorSetsIfNeeded(); - + CreateTimingQueryObjectIfNeeded(is_timed_execution); { CommandBufferGuard guard(GetCommandBuffer()); if (!guard.IsRecording()) @@ -100,13 +104,15 @@ Result ComputePipeline::Compute(uint32_t x, uint32_t y, uint32_t z) { device_->GetPtrs()->vkCmdBindPipeline(command_->GetVkCommandBuffer(), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + BeginTimerQuery(); device_->GetPtrs()->vkCmdDispatch(command_->GetVkCommandBuffer(), x, y, z); + EndTimerQuery(); r = guard.Submit(GetFenceTimeout(), GetPipelineRuntimeLayerEnabled()); if (!r.IsSuccess()) return r; } - + DestroyTimingQueryObjectIfNeeded(); r = ReadbackDescriptorsToHostDataQueue(); if (!r.IsSuccess()) return r; diff --git a/src/vulkan/compute_pipeline.h b/src/vulkan/compute_pipeline.h index d6597beb..53f2221e 100644 --- a/src/vulkan/compute_pipeline.h +++ b/src/vulkan/compute_pipeline.h @@ -36,7 +36,7 @@ class ComputePipeline : public Pipeline { Result Initialize(CommandPool* pool); - Result Compute(uint32_t x, uint32_t y, uint32_t z); + Result Compute(uint32_t x, uint32_t y, uint32_t z, bool is_timed_execution); private: Result CreateVkComputePipeline(const VkPipelineLayout& pipeline_layout, diff --git a/src/vulkan/device.cc b/src/vulkan/device.cc index 0aebd7c5..61dcaa05 100644 --- a/src/vulkan/device.cc +++ b/src/vulkan/device.cc @@ -411,12 +411,14 @@ Device::Device(VkInstance instance, VkPhysicalDevice physical_device, uint32_t queue_family_index, VkDevice device, - VkQueue queue) + VkQueue queue, + Delegate* delegate) : instance_(instance), physical_device_(physical_device), device_(device), queue_(queue), - queue_family_index_(queue_family_index) {} + queue_family_index_(queue_family_index), + delegate_(delegate) {} Device::~Device() = default; @@ -450,9 +452,14 @@ bool Device::SupportsApiVersion(uint32_t major, #pragma clang diagnostic pop } +void Device::ReportExecutionTiming(double time_in_ms) { + if (delegate_) { + delegate_->ReportExecutionTiming(time_in_ms); + } +} + Result Device::Initialize( PFN_vkGetInstanceProcAddr getInstanceProcAddr, - Delegate* delegate, const std::vector& required_features, const std::vector& required_properties, const std::vector& required_device_extensions, @@ -460,7 +467,7 @@ Result Device::Initialize( const VkPhysicalDeviceFeatures2KHR& available_features2, const VkPhysicalDeviceProperties2KHR& available_properties2, const std::vector& available_extensions) { - Result r = LoadVulkanPointers(getInstanceProcAddr, delegate); + Result r = LoadVulkanPointers(getInstanceProcAddr, delegate_); if (!r.IsSuccess()) return r; @@ -813,9 +820,9 @@ Result Device::Initialize( ptr = s->pNext; } -#define CHK_P(R, P, NAME, S1, S2) \ - do { \ - if (R == -1 && P == #NAME) \ +#define CHK_P(R, P, NAME, S1, S2) \ + do { \ + if (R == -1 && P == #NAME) \ R = ((S1 && S1->NAME) || (S2 && S2->NAME)) ? 1 : 0; \ } while (false) @@ -853,8 +860,7 @@ Result Device::Initialize( return Result("Vulkan: Device::Initialize missing " + prop + " property"); if (supported == -1) - return Result( - "Vulkan: Device::Initialize property not handled " + prop); + return Result("Vulkan: Device::Initialize property not handled " + prop); } ptrs_.vkGetPhysicalDeviceMemoryProperties(physical_device_, @@ -1075,6 +1081,14 @@ uint32_t Device::GetMaxPushConstants() const { return physical_device_properties_.limits.maxPushConstantsSize; } +bool Device::IsTimestampComputeAndGraphicsSupported() const { + return physical_device_properties_.limits.timestampComputeAndGraphics; +} + +float Device::GetTimestampPeriod() const { + return physical_device_properties_.limits.timestampPeriod; +} + bool Device::IsDescriptorSetInBounds(uint32_t descriptor_set) const { VkPhysicalDeviceProperties properties = VkPhysicalDeviceProperties(); GetPtrs()->vkGetPhysicalDeviceProperties(physical_device_, &properties); diff --git a/src/vulkan/device.h b/src/vulkan/device.h index 8cda4b7a..0ce0529a 100644 --- a/src/vulkan/device.h +++ b/src/vulkan/device.h @@ -42,11 +42,11 @@ class Device { VkPhysicalDevice physical_device, uint32_t queue_family_index, VkDevice device, - VkQueue queue); + VkQueue queue, + Delegate* delegate); virtual ~Device(); Result Initialize(PFN_vkGetInstanceProcAddr getInstanceProcAddr, - Delegate* delegate, const std::vector& required_features, const std::vector& required_properties, const std::vector& required_device_extensions, @@ -94,6 +94,15 @@ class Device { /// Returns ray tracing shader group handle size. uint32_t GetRayTracingShaderGroupHandleSize() const; + // Returns true if we have support for timestamps. + bool IsTimestampComputeAndGraphicsSupported() const; + + // Returns a float used to convert between timestamps and actual elapsed time. + float GetTimestampPeriod() const; + + // Each timed execution reports timing to the device and on to the delegate. + void ReportExecutionTiming(double time_in_ns); + private: Result LoadVulkanPointers(PFN_vkGetInstanceProcAddr, Delegate* delegate); bool SupportsApiVersion(uint32_t major, uint32_t minor, uint32_t patch); @@ -110,6 +119,8 @@ class Device { uint32_t shader_group_handle_size_ = 0; VulkanPtrs ptrs_; + + Delegate* delegate_ = nullptr; }; } // namespace vulkan diff --git a/src/vulkan/engine_vulkan.cc b/src/vulkan/engine_vulkan.cc index fc36e30b..18e506ad 100644 --- a/src/vulkan/engine_vulkan.cc +++ b/src/vulkan/engine_vulkan.cc @@ -139,13 +139,12 @@ Result EngineVulkan::Initialize( device_ = MakeUnique(vk_config->instance, vk_config->physical_device, vk_config->queue_family_index, vk_config->device, - vk_config->queue); + vk_config->queue, delegate); Result r = device_->Initialize( - vk_config->vkGetInstanceProcAddr, delegate, features, properties, - device_extensions, vk_config->available_features, - vk_config->available_features2, vk_config->available_properties2, - vk_config->available_device_extensions); + vk_config->vkGetInstanceProcAddr, features, properties, device_extensions, + vk_config->available_features, vk_config->available_features2, + vk_config->available_properties2, vk_config->available_device_extensions); if (!r.IsSuccess()) return r; @@ -463,8 +462,7 @@ Result EngineVulkan::GetVkShaderStageInfo( return r; *stage_info = VkPipelineShaderStageCreateInfo(); - stage_info->sType = - VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stage_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; stage_info->flags = shader_info.create_flags; stage_info->stage = stage; stage_info->module = shader_info.shader; @@ -536,15 +534,14 @@ Result EngineVulkan::GetVkShaderGroupInfo( return Result("Invalid shader group"); VkRayTracingShaderGroupCreateInfoKHR group_info = { - VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - nullptr, - VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR, - VK_SHADER_UNUSED_KHR, - VK_SHADER_UNUSED_KHR, - VK_SHADER_UNUSED_KHR, - VK_SHADER_UNUSED_KHR, - nullptr - }; + VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + nullptr, + VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR, + VK_SHADER_UNUSED_KHR, + VK_SHADER_UNUSED_KHR, + VK_SHADER_UNUSED_KHR, + VK_SHADER_UNUSED_KHR, + nullptr}; if (sg->IsGeneralGroup()) { group_info.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR; @@ -673,13 +670,17 @@ Result EngineVulkan::DoDrawRect(const DrawRectCommand* command) { buf->GetFormat()->SizeInBytes()); DrawArraysCommand draw(command->GetPipeline(), *command->GetPipelineData()); + if (command->IsTimedExecution()) { + draw.SetTimedExecution(); + } draw.SetTopology(command->IsPatch() ? Topology::kPatchList : Topology::kTriangleStrip); draw.SetFirstVertexIndex(0); draw.SetVertexCount(4); draw.SetInstanceCount(1); - Result r = graphics->Draw(&draw, vertex_buffer.get()); + Result r = + graphics->Draw(&draw, vertex_buffer.get(), command->IsTimedExecution()); if (!r.IsSuccess()) return r; @@ -761,12 +762,16 @@ Result EngineVulkan::DoDrawGrid(const DrawGridCommand* command) { buf->GetFormat()->SizeInBytes()); DrawArraysCommand draw(command->GetPipeline(), *command->GetPipelineData()); + if (command->IsTimedExecution()) { + draw.SetTimedExecution(); + } draw.SetTopology(Topology::kTriangleList); draw.SetFirstVertexIndex(0); draw.SetVertexCount(vertices); draw.SetInstanceCount(1); - Result r = graphics->Draw(&draw, vertex_buffer.get()); + Result r = + graphics->Draw(&draw, vertex_buffer.get(), command->IsTimedExecution()); if (!r.IsSuccess()) return r; @@ -778,8 +783,8 @@ Result EngineVulkan::DoDrawArrays(const DrawArraysCommand* command) { if (!info.vk_pipeline) return Result("Vulkan::DrawArrays for Non-Graphics Pipeline"); - return info.vk_pipeline->AsGraphics()->Draw(command, - info.vertex_buffer.get()); + return info.vk_pipeline->AsGraphics()->Draw(command, info.vertex_buffer.get(), + command->IsTimedExecution()); } Result EngineVulkan::DoCompute(const ComputeCommand* command) { @@ -788,7 +793,8 @@ Result EngineVulkan::DoCompute(const ComputeCommand* command) { return Result("Vulkan: Compute called for non-compute pipeline."); return info.vk_pipeline->AsCompute()->Compute( - command->GetX(), command->GetY(), command->GetZ()); + command->GetX(), command->GetY(), command->GetZ(), + command->IsTimedExecution()); } Result EngineVulkan::InitDependendLibraries(amber::Pipeline* pipeline, @@ -848,7 +854,8 @@ Result EngineVulkan::DoTraceRays(const RayTracingCommand* command) { rSBT, mSBT, hSBT, cSBT, command->GetX(), command->GetY(), command->GetZ(), pipeline->GetMaxPipelineRayPayloadSize(), pipeline->GetMaxPipelineRayHitAttributeSize(), - pipeline->GetMaxPipelineRayRecursionDepth(), libs); + pipeline->GetMaxPipelineRayRecursionDepth(), libs, + command->IsTimedExecution()); } Result EngineVulkan::DoEntryPoint(const EntryPointCommand* command) { diff --git a/src/vulkan/graphics_pipeline.cc b/src/vulkan/graphics_pipeline.cc index 9db5eb79..556c91f3 100644 --- a/src/vulkan/graphics_pipeline.cc +++ b/src/vulkan/graphics_pipeline.cc @@ -881,7 +881,8 @@ Result GraphicsPipeline::Clear() { } Result GraphicsPipeline::Draw(const DrawArraysCommand* command, - VertexBuffer* vertex_buffer) { + VertexBuffer* vertex_buffer, + bool is_timed_execution) { Result r = SendDescriptorDataToDeviceIfNeeded(); if (!r.IsSuccess()) return r; @@ -902,7 +903,7 @@ Result GraphicsPipeline::Draw(const DrawArraysCommand* command, // it must be submitted separately, because using a descriptor set // while updating it is not safe. UpdateDescriptorSetsIfNeeded(); - + CreateTimingQueryObjectIfNeeded(is_timed_execution); { CommandBufferGuard cmd_buf_guard(GetCommandBuffer()); if (!cmd_buf_guard.IsRecording()) @@ -916,6 +917,10 @@ Result GraphicsPipeline::Draw(const DrawArraysCommand* command, frame_->CopyBuffersToImages(); frame_->TransferImagesToDevice(GetCommandBuffer()); + // Timing must be place outside the render pass scope. The full pipeline + // barrier used by our specific implementation cannot be within a + // renderpass. + BeginTimerQuery(); { RenderPassGuard render_pass_guard(this); @@ -943,6 +948,7 @@ Result GraphicsPipeline::Draw(const DrawArraysCommand* command, // VkRunner spec says // "vertexCount will be used as the index count, firstVertex // becomes the vertex offset and firstIndex will always be zero." + device_->GetPtrs()->vkCmdDrawIndexed( command_->GetVkCommandBuffer(), command->GetVertexCount(), /* indexCount */ @@ -958,7 +964,7 @@ Result GraphicsPipeline::Draw(const DrawArraysCommand* command, command->GetFirstInstance()); } } - + EndTimerQuery(); frame_->TransferImagesToHost(command_.get()); r = cmd_buf_guard.Submit(GetFenceTimeout(), @@ -966,7 +972,7 @@ Result GraphicsPipeline::Draw(const DrawArraysCommand* command, if (!r.IsSuccess()) return r; } - + DestroyTimingQueryObjectIfNeeded(); r = ReadbackDescriptorsToHostDataQueue(); if (!r.IsSuccess()) return r; diff --git a/src/vulkan/graphics_pipeline.h b/src/vulkan/graphics_pipeline.h index 4bc5f7d0..c4bb6574 100644 --- a/src/vulkan/graphics_pipeline.h +++ b/src/vulkan/graphics_pipeline.h @@ -59,7 +59,9 @@ class GraphicsPipeline : public Pipeline { Result SetClearStencil(uint32_t stencil); Result SetClearDepth(float depth); - Result Draw(const DrawArraysCommand* command, VertexBuffer* vertex_buffer); + Result Draw(const DrawArraysCommand* command, + VertexBuffer* vertex_buffer, + bool is_timed_execution); VkRenderPass GetVkRenderPass() const { return render_pass_; } FrameBuffer* GetFrameBuffer() const { return frame_.get(); } diff --git a/src/vulkan/pipeline.cc b/src/vulkan/pipeline.cc index 6f3a724e..fd8dd4ee 100644 --- a/src/vulkan/pipeline.cc +++ b/src/vulkan/pipeline.cc @@ -16,6 +16,7 @@ #include "src/vulkan/pipeline.h" #include +#include #include #include @@ -37,6 +38,13 @@ namespace { const char* kDefaultEntryPointName = "main"; +constexpr VkMemoryBarrier kMemoryBarrierFull = { + VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr, + VK_ACCESS_2_MEMORY_READ_BIT_KHR | VK_ACCESS_2_MEMORY_WRITE_BIT_KHR, + VK_ACCESS_2_MEMORY_READ_BIT_KHR | VK_ACCESS_2_MEMORY_WRITE_BIT_KHR}; + +constexpr uint32_t kNumQueryObjects = 2; + } // namespace Pipeline::Pipeline( @@ -253,6 +261,84 @@ void Pipeline::UpdateDescriptorSetsIfNeeded() { } } +void Pipeline::CreateTimingQueryObjectIfNeeded(bool is_timed_execution) { + if (!is_timed_execution || + !device_->IsTimestampComputeAndGraphicsSupported()) { + return; + } + in_timed_execution_ = true; + VkQueryPoolCreateInfo pool_create_info{ + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + nullptr, + 0, + VK_QUERY_TYPE_TIMESTAMP, + kNumQueryObjects, + 0}; + device_->GetPtrs()->vkCreateQueryPool( + device_->GetVkDevice(), &pool_create_info, nullptr, &query_pool_); +} + +void Pipeline::DestroyTimingQueryObjectIfNeeded() { + if (!in_timed_execution_) { + return; + } + + // Flags set so we may/will wait on the CPU for the availiblity of our + // queries. + const VkQueryResultFlags flags = + VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT; + std::array time_stamps = {}; + constexpr VkDeviceSize kStrideBytes = sizeof(uint64_t); + + device_->GetPtrs()->vkGetQueryPoolResults( + device_->GetVkDevice(), query_pool_, 0, kNumQueryObjects, + sizeof(time_stamps), time_stamps.data(), kStrideBytes, flags); + double time_in_ns = static_cast(time_stamps[1] - time_stamps[0]) * + static_cast(device_->GetTimestampPeriod()); + + constexpr double kNsToMsTime = 1.0 / 1000000.0; + device_->ReportExecutionTiming(time_in_ns * kNsToMsTime); + device_->GetPtrs()->vkDestroyQueryPool(device_->GetVkDevice(), query_pool_, + nullptr); + in_timed_execution_ = false; +} + +void Pipeline::BeginTimerQuery() { + if (!in_timed_execution_) { + return; + } + + device_->GetPtrs()->vkCmdResetQueryPool(command_->GetVkCommandBuffer(), + query_pool_, 0, kNumQueryObjects); + // Full barrier prevents any work from before the point being still in the + // pipeline. + device_->GetPtrs()->vkCmdPipelineBarrier( + command_->GetVkCommandBuffer(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &kMemoryBarrierFull, 0, nullptr, + 0, nullptr); + constexpr uint32_t kBeginQueryIndexOffset = 0; + device_->GetPtrs()->vkCmdWriteTimestamp(command_->GetVkCommandBuffer(), + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + query_pool_, kBeginQueryIndexOffset); +} + +void Pipeline::EndTimerQuery() { + if (!in_timed_execution_) { + return; + } + + // Full barrier ensures that work including in our timing is executed before + // the timestamp. + device_->GetPtrs()->vkCmdPipelineBarrier( + command_->GetVkCommandBuffer(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &kMemoryBarrierFull, 0, nullptr, + 0, nullptr); + constexpr uint32_t kEndQueryIndexOffset = 1; + device_->GetPtrs()->vkCmdWriteTimestamp(command_->GetVkCommandBuffer(), + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + query_pool_, kEndQueryIndexOffset); +} + Result Pipeline::RecordPushConstant(const VkPipelineLayout& pipeline_layout) { return push_constant_->RecordPushConstantVkCommand(command_.get(), pipeline_layout); diff --git a/src/vulkan/pipeline.h b/src/vulkan/pipeline.h index 7cae2d51..68fce4e7 100644 --- a/src/vulkan/pipeline.h +++ b/src/vulkan/pipeline.h @@ -29,6 +29,7 @@ #include "src/vulkan/command_buffer.h" #include "src/vulkan/push_constant.h" #include "src/vulkan/resource.h" +#include "vulkan/vulkan_core.h" namespace amber { @@ -89,7 +90,7 @@ class Pipeline { PipelineType type, Device* device, uint32_t fence_timeout_ms, - bool pipeline_runtime_layer_enabled, + bool pipeline_runtime_layer_enabled, const std::vector& shader_stage_info, VkPipelineCreateFlags create_flags = 0); @@ -101,6 +102,13 @@ class Pipeline { Descriptor** desc); void UpdateDescriptorSetsIfNeeded(); + // This functions are used in benchmarking when 'TIMED_EXECUTION' option is + // specifed. + void CreateTimingQueryObjectIfNeeded(bool is_timed_execution); + void DestroyTimingQueryObjectIfNeeded(); + void BeginTimerQuery(); + void EndTimerQuery(); + Result SendDescriptorDataToDeviceIfNeeded(); void BindVkDescriptorSets(const VkPipelineLayout& pipeline_layout); @@ -114,8 +122,9 @@ class Pipeline { const char* GetEntryPointName(VkShaderStageFlagBits stage) const; uint32_t GetFenceTimeout() const { return fence_timeout_ms_; } - bool GetPipelineRuntimeLayerEnabled() - const { return pipeline_runtime_layer_enabled_; } + bool GetPipelineRuntimeLayerEnabled() const { + return pipeline_runtime_layer_enabled_; + } Result CreateVkPipelineLayout(VkPipelineLayout* pipeline_layout); @@ -129,6 +138,7 @@ class Pipeline { pipeline_ = pipeline; } + VkQueryPool query_pool_ = VK_NULL_HANDLE; VkPipeline pipeline_ = VK_NULL_HANDLE; VkPipelineLayout pipeline_layout_ = VK_NULL_HANDLE; @@ -171,6 +181,7 @@ class Pipeline { entry_points_; std::unique_ptr push_constant_; + bool in_timed_execution_ = false; }; } // namespace vulkan diff --git a/src/vulkan/raytracing_pipeline.cc b/src/vulkan/raytracing_pipeline.cc index ea0d8f13..7a4f8569 100644 --- a/src/vulkan/raytracing_pipeline.cc +++ b/src/vulkan/raytracing_pipeline.cc @@ -186,7 +186,8 @@ Result RayTracingPipeline::TraceRays(amber::SBT* rSBT, uint32_t maxPipelineRayPayloadSize, uint32_t maxPipelineRayHitAttributeSize, uint32_t maxPipelineRayRecursionDepth, - const std::vector& libs) { + const std::vector& libs, + bool is_timed_execution) { Result r = SendDescriptorDataToDeviceIfNeeded(); if (!r.IsSuccess()) return r; @@ -200,7 +201,7 @@ Result RayTracingPipeline::TraceRays(amber::SBT* rSBT, // it must be submitted separately, because using a descriptor set // while updating it is not safe. UpdateDescriptorSetsIfNeeded(); - + CreateTimingQueryObjectIfNeeded(is_timed_execution); { CommandBufferGuard guard(GetCommandBuffer()); if (!guard.IsRecording()) @@ -247,12 +248,13 @@ Result RayTracingPipeline::TraceRays(amber::SBT* rSBT, device_->GetPtrs()->vkCmdTraceRaysKHR(command_->GetVkCommandBuffer(), &rSBTRegion, &mSBTRegion, &hSBTRegion, &cSBTRegion, x, y, z); - + BeginTimerQuery(); r = guard.Submit(GetFenceTimeout(), GetPipelineRuntimeLayerEnabled()); + EndTimerQuery(); if (!r.IsSuccess()) return r; } - + DestroyTimingQueryObjectIfNeeded(); r = ReadbackDescriptorsToHostDataQueue(); if (!r.IsSuccess()) return r; diff --git a/src/vulkan/raytracing_pipeline.h b/src/vulkan/raytracing_pipeline.h index 6193e9eb..6ef9c08f 100644 --- a/src/vulkan/raytracing_pipeline.h +++ b/src/vulkan/raytracing_pipeline.h @@ -64,7 +64,8 @@ class RayTracingPipeline : public Pipeline { uint32_t maxPipelineRayPayloadSize, uint32_t maxPipelineRayHitAttributeSize, uint32_t maxPipelineRayRecursionDepth, - const std::vector& lib); + const std::vector& lib, + bool is_timed_execution); BlasesMap* GetBlases() override { return blases_; } TlasesMap* GetTlases() override { return tlases_; } diff --git a/src/vulkan/vertex_buffer_test.cc b/src/vulkan/vertex_buffer_test.cc index eb8a7bd8..14b9ee9d 100644 --- a/src/vulkan/vertex_buffer_test.cc +++ b/src/vulkan/vertex_buffer_test.cc @@ -36,7 +36,8 @@ class DummyDevice : public Device { VkPhysicalDevice(), 0u, VkDevice(this), - VkQueue()) { + VkQueue(), + nullptr) { memory_.resize(64); dummyPtrs_.vkCreateBuffer = vkCreateBuffer; dummyPtrs_.vkGetBufferMemoryRequirements = vkGetBufferMemoryRequirements; diff --git a/src/vulkan/vk-funcs-1-0.inc b/src/vulkan/vk-funcs-1-0.inc index 50a821b3..033e49fe 100644 --- a/src/vulkan/vk-funcs-1-0.inc +++ b/src/vulkan/vk-funcs-1-0.inc @@ -19,9 +19,12 @@ AMBER_VK_FUNC(vkCmdDrawIndexed) AMBER_VK_FUNC(vkCmdEndRenderPass) AMBER_VK_FUNC(vkCmdPipelineBarrier) AMBER_VK_FUNC(vkCmdPushConstants) +AMBER_VK_FUNC(vkCmdResetQueryPool) +AMBER_VK_FUNC(vkCmdWriteTimestamp) AMBER_VK_FUNC(vkCreateBuffer) AMBER_VK_FUNC(vkCreateBufferView) AMBER_VK_FUNC(vkGetBufferDeviceAddress) +AMBER_VK_FUNC(vkGetQueryPoolResults) AMBER_VK_FUNC(vkCreateCommandPool) AMBER_VK_FUNC(vkCreateComputePipelines) AMBER_VK_FUNC(vkCreateDescriptorPool) @@ -32,6 +35,7 @@ AMBER_VK_FUNC(vkCreateGraphicsPipelines) AMBER_VK_FUNC(vkCreateImage) AMBER_VK_FUNC(vkCreateImageView) AMBER_VK_FUNC(vkCreatePipelineLayout) +AMBER_VK_FUNC(vkCreateQueryPool) AMBER_VK_FUNC(vkCreateRenderPass) AMBER_VK_FUNC(vkCreateSampler) AMBER_VK_FUNC(vkCreateShaderModule) @@ -46,6 +50,7 @@ AMBER_VK_FUNC(vkDestroyImage) AMBER_VK_FUNC(vkDestroyImageView) AMBER_VK_FUNC(vkDestroyPipeline) AMBER_VK_FUNC(vkDestroyPipelineLayout) +AMBER_VK_FUNC(vkDestroyQueryPool) AMBER_VK_FUNC(vkDestroyRenderPass) AMBER_VK_FUNC(vkDestroySampler) AMBER_VK_FUNC(vkDestroyShaderModule) diff --git a/tests/cases/compute_timed_execution_single.amber b/tests/cases/compute_timed_execution_single.amber new file mode 100644 index 00000000..77e295e5 --- /dev/null +++ b/tests/cases/compute_timed_execution_single.amber @@ -0,0 +1,42 @@ +#!amber +# Copyright 2024 The Amber Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SHADER compute atomic_sum_all GLSL +#version 430 + +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +layout(set = 0, binding = 0) buffer BlockUint { + uint data; +} ssbo_uint; + +void main() { + atomicAdd(ssbo_uint.data, uint(1)); +} +END + +BUFFER buf_uint DATA_TYPE uint32 DATA +0 +END + + +PIPELINE compute pipeline + ATTACH atomic_sum_all + BIND BUFFER buf_uint AS storage DESCRIPTOR_SET 0 BINDING 0 +END + +RUN TIMED_EXECUTION pipeline 128 128 1 + +EXPECT buf_uint IDX 0 EQ 4194304 diff --git a/tests/cases/draw_rect_timed_execution.amber b/tests/cases/draw_rect_timed_execution.amber new file mode 100644 index 00000000..1309d8db --- /dev/null +++ b/tests/cases/draw_rect_timed_execution.amber @@ -0,0 +1,42 @@ +#!amber +# Copyright 2024 The Amber Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SHADER vertex vert_shader PASSTHROUGH +SHADER fragment frag_shader GLSL +#version 430 +layout(location = 0) out vec4 color_out; +void main() { + float x = gl_FragCoord.x; + // Small busy loop. + // x final result will be zero. + for(int i= 0;i <10;i++) { + x = x*0.00001; + } + color_out = vec4(x, 0.0, 0.0, 1.0); +} +END + +BUFFER framebuffer FORMAT B8G8R8A8_UNORM + +PIPELINE graphics my_pipeline + ATTACH vert_shader + ATTACH frag_shader + FRAMEBUFFER_SIZE 1024 1024 + BIND BUFFER framebuffer AS color LOCATION 0 +END + +RUN TIMED_EXECUTION my_pipeline DRAW_RECT POS 0 0 SIZE 1024 1024 +EXPECT framebuffer IDX 0 0 SIZE 1024 1024 EQ_RGBA 0 0 0 255 + \ No newline at end of file