diff --git a/.gitignore b/.gitignore
index 50fc6363..e08b24ef 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,9 @@ third_party/robin-hood-hashing
 # C-Lion
 .idea/
 cmake-build-*/
+
+### Clangd cached index files
+/.cache
+
+### The 'compile_commands' file can be generated at root
+compile_commands.json
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fcea8bf8..10a90e14 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,6 +26,7 @@ endif()
 project(amber)
 enable_testing()
 
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
diff --git a/docs/amber_script.md b/docs/amber_script.md
index 3d7bad9d..7b6d8dad 100644
--- a/docs/amber_script.md
+++ b/docs/amber_script.md
@@ -989,18 +989,22 @@ value for `START_IDX` is 0. The default value for `COUNT` is the item count of
 vertex buffer minus the `START_IDX`. The same applies to `START_INSTANCE`
 (default 0) and `INSTANCE_COUNT` (default 1).
 
+The `TIMED_EXECUTION` is an optional flag that can be passed to the run command.
+This will cause Amber to insert device specific counters to time the execution
+of this pipeline command.
+
 ```groovy
 # Run the given |pipeline_name| which must be a `compute` pipeline. The
 # pipeline will be run with the given number of workgroups in the |x|, |y|, |z|
 # dimensions. Each of the x, y and z values must be a uint32.
-RUN {pipeline_name} _x_ _y_ _z_
+RUN [TIMED_EXECUTION] {pipeline_name} _x_ _y_ _z_
 ```
 
 ```groovy
 # Run the given |pipeline_name| which must be a `graphics` pipeline. The
 # rectangle at |x|, |y|, |width|x|height| will be rendered. Ignores VERTEX_DATA
 # and INDEX_DATA on the given pipeline.
-RUN {pipeline_name} \
+RUN [TIMED_EXECUTION] {pipeline_name} \
   DRAW_RECT POS _x_in_pixels_ _y_in_pixels_ \
   SIZE _width_in_pixels_ _height_in_pixels_
 ```
@@ -1010,7 +1014,7 @@ RUN {pipeline_name} \
 # grid at |x|, |y|, |width|x|height|, |columns|x|rows| will be rendered.
 # Ignores VERTEX_DATA and INDEX_DATA on the given pipeline.
 # For columns, rows of (5, 4) a total of 5*4=20 rectangles will be drawn.
-RUN {pipeline_name} \
+RUN [TIMED_EXECUTION] {pipeline_name} \
   DRAW_GRID POS _x_in_pixels_ _y_in_pixels_ \
   SIZE _width_in_pixels_ _height_in_pixels_ \
   CELLS _columns_of_cells_ _rows_of_cells_
@@ -1024,7 +1028,7 @@ RUN {pipeline_name} \
 # will be processed. The draw is instanced if |inst_count_value| is greater
 # than one. In case of instanced draw |inst_value| controls the starting
 # instance ID.
-RUN {pipeline_name} DRAW_ARRAY AS {topology} \
+RUN [TIMED_EXECUTION] {pipeline_name} DRAW_ARRAY AS {topology} \
     [ START_IDX _value_ (default 0) ] \
     [ COUNT _count_value_ (default vertex_buffer size - start_idx) ] \
     [ START_INSTANCE _inst_value_ (default 0) ] \
@@ -1040,7 +1044,7 @@ RUN {pipeline_name} DRAW_ARRAY AS {topology} \
 # will be processed. The draw is instanced if |inst_count_value| is greater
 # than one. In case of instanced draw |inst_value| controls the starting
 # instance ID.
-RUN {pipeline_name} DRAW_ARRAY AS {topology} INDEXED \
+RUN [TIMED_EXECUTION] {pipeline_name} DRAW_ARRAY AS {topology} INDEXED \
     [ START_IDX _value_ (default 0) ] \
     [ COUNT _count_value_ (default index_buffer size - start_idx) ] \
     [ START_INSTANCE _inst_value_ (default 0) ] \
@@ -1058,7 +1062,7 @@ RUN {pipeline_name} DRAW_ARRAY AS {topology} INDEXED \
 #
 # The pipeline will be run with the given ray tracing dimensions |x|, |y|, |z|.
 # Each of the x, y and z values must be a uint32.
-RUN {pipeline_name} \
+RUN [TIMED_EXECUTION] {pipeline_name} \
     RAYGEN {ray_gen_sbt_name} \
     [MISS {miss_sbt_name}] \
     [HIT {hit_sbt_name}] \
diff --git a/include/amber/amber.h b/include/amber/amber.h
index 0c679a31..57e29640 100644
--- a/include/amber/amber.h
+++ b/include/amber/amber.h
@@ -101,6 +101,9 @@ class Delegate {
   virtual amber::Result LoadBufferData(const std::string file_name,
                                        BufferDataFileType file_type,
                                        amber::BufferInfo* buffer) const = 0;
+
+  /// Mechanism for gathering timing from 'TIME_EXECUTION'
+  virtual void ReportExecutionTiming(double time_in_ms) = 0;
 };
 
 /// Stores configuration options for Amber.
diff --git a/samples/amber.cc b/samples/amber.cc
index 1fa177e6..7a8fe793 100644
--- a/samples/amber.cc
+++ b/samples/amber.cc
@@ -23,6 +23,7 @@
 #include <fstream>
 #include <iomanip>
 #include <iostream>
+#include <ostream>
 #include <set>
 #include <string>
 #include <utility>
@@ -67,6 +68,7 @@ struct Options {
   bool log_graphics_calls = false;
   bool log_graphics_calls_time = false;
   bool log_execute_calls = false;
+  bool log_execution_timing = false;
   bool disable_spirv_validation = false;
   bool enable_pipeline_runtime_layer = false;
   std::string shader_filename;
@@ -103,6 +105,7 @@ const char kUsage[] = R"(Usage: amber [options] SCRIPT [SCRIPTS...]
   --log-graphics-calls      -- Log graphics API calls (only for Vulkan so far).
   --log-graphics-calls-time -- Log timing of graphics API calls timing (Vulkan only).
   --log-execute-calls       -- Log each execute call before run.
+  --log-execution-timing    -- Log timing results from each command with the 'TIMED_EXECUTION' flag.
   --disable-spirv-val       -- Disable SPIR-V validation.
   --enable-runtime-layer    -- Enable pipeline runtime layer.
   -h                        -- This help text.
@@ -278,6 +281,8 @@ bool ParseArgs(const std::vector<std::string>& args, Options* opts) {
       opts->log_graphics_calls = true;
     } else if (arg == "--log-graphics-calls-time") {
       opts->log_graphics_calls_time = true;
+    } else if (arg == "--log-execution-timing") {
+      opts->log_execution_timing = true;
     } else if (arg == "--log-execute-calls") {
       opts->log_execute_calls = true;
     } else if (arg == "--disable-spirv-val") {
@@ -361,6 +366,16 @@ class SampleDelegate : public amber::Delegate {
     }
   }
 
+  void ReportExecutionTiming(double time_in_ms) override {
+    reported_execution_timing.push_back(time_in_ms);
+  }
+
+  std::vector<double> GetAndClearExecutionTiming() {
+    auto returning = reported_execution_timing;
+    reported_execution_timing.clear();
+    return returning;
+  }
+
   uint64_t GetTimestampNs() const override {
     return timestamp::SampleGetTimestampNs();
   }
@@ -400,6 +415,7 @@ class SampleDelegate : public amber::Delegate {
   bool log_graphics_calls_time_ = false;
   bool log_execute_calls_ = false;
   std::string path_ = "";
+  std::vector<double> reported_execution_timing;
 };
 
 std::string disassemble(const std::string& env,
@@ -519,7 +535,7 @@ int main(int argc, const char** argv) {
       recipe->SetFenceTimeout(static_cast<uint32_t>(options.fence_timeout));
 
     recipe->SetPipelineRuntimeLayerEnabled(
-      options.enable_pipeline_runtime_layer);
+        options.enable_pipeline_runtime_layer);
 
     recipe_data.emplace_back();
     recipe_data.back().file = file;
@@ -621,12 +637,34 @@ int main(int argc, const char** argv) {
     amber::Amber am(&delegate);
     result = am.Execute(recipe, &amber_options);
     if (!result.IsSuccess()) {
-      std::cerr << file << ": " << result.Error() << std::endl;
+      std::cerr << file << ": " << result.Error() << "\n";
       failures.push_back(file);
       // Note, we continue after failure to allow dumping the buffers which may
       // give clues as to the failure.
     }
 
+    auto execution_timing = delegate.GetAndClearExecutionTiming();
+    if (result.IsSuccess() && options.log_execution_timing &&
+        !execution_timing.empty()) {
+      std::cout << "Execution timing (in script-order):" << "\n";
+      std::cout << "    ";
+      bool is_first_iter = true;
+      for (auto& timing : execution_timing) {
+        if (!is_first_iter) {
+          std::cout << ", ";
+        }
+        is_first_iter = false;
+        std::cout << timing;
+      }
+      std::cout << "\n";
+      std::sort(execution_timing.begin(), execution_timing.end());
+      auto report_median =
+          (execution_timing[execution_timing.size() / 2] +
+           execution_timing[(execution_timing.size() - 1) / 2]) /
+          2;
+      std::cout << "Execution time median = " << report_median << " ms" << "\n";
+    }
+
     // Dump the shader assembly
     if (!options.shader_filename.empty()) {
 #if AMBER_ENABLE_SPIRV_TOOLS
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 08198407..6fd64462 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -153,6 +153,7 @@ if (${AMBER_ENABLE_TESTS})
     amberscript/parser_raytracing_test.cc
     amberscript/parser_repeat_test.cc
     amberscript/parser_run_test.cc
+    amberscript/parser_run_timed_execution_test.cc
     amberscript/parser_sampler_test.cc
     amberscript/parser_set_test.cc
     amberscript/parser_shader_opt_test.cc
diff --git a/src/amber.cc b/src/amber.cc
index ef5658f0..20ebcf81 100644
--- a/src/amber.cc
+++ b/src/amber.cc
@@ -132,11 +132,10 @@ Result CreateEngineAndCheckRequirements(const Recipe* recipe,
 
   // Engine initialization checks requirements.  Current backends don't do
   // much else.  Refactor this if they end up doing to much here.
-  Result r =
-      engine->Initialize(opts->config, delegate, script->GetRequiredFeatures(),
-                         script->GetRequiredProperties(),
-                         script->GetRequiredInstanceExtensions(),
-                         script->GetRequiredDeviceExtensions());
+  Result r = engine->Initialize(
+      opts->config, delegate, script->GetRequiredFeatures(),
+      script->GetRequiredProperties(), script->GetRequiredInstanceExtensions(),
+      script->GetRequiredDeviceExtensions());
   if (!r.IsSuccess())
     return r;
 
diff --git a/src/amberscript/parser.cc b/src/amberscript/parser.cc
index 1e4f2cdc..863cf241 100644
--- a/src/amberscript/parser.cc
+++ b/src/amberscript/parser.cc
@@ -2706,6 +2706,14 @@ Result Parser::ParseBufferInitializerFile(Buffer* buffer) {
 
 Result Parser::ParseRun() {
   auto token = tokenizer_->NextToken();
+
+  // Timed execution option for this specific run.
+  bool is_timed_execution = false;
+  if (token->AsString() == "TIMED_EXECUTION") {
+    token = tokenizer_->NextToken();
+    is_timed_execution = true;
+  }
+
   if (!token->IsIdentifier())
     return Result("missing pipeline name for RUN command");
 
@@ -2718,6 +2726,9 @@ Result Parser::ParseRun() {
   if (pipeline->IsRayTracing()) {
     auto cmd = MakeUnique<RayTracingCommand>(pipeline);
     cmd->SetLine(line);
+    if (is_timed_execution) {
+      cmd->SetTimedExecution();
+    }
 
     while (true) {
       if (tokenizer_->PeekNextToken()->IsInteger())
@@ -2791,6 +2802,9 @@ Result Parser::ParseRun() {
     auto cmd = MakeUnique<ComputeCommand>(pipeline);
     cmd->SetLine(line);
     cmd->SetX(token->AsUint32());
+    if (is_timed_execution) {
+      cmd->SetTimedExecution();
+    }
 
     token = tokenizer_->NextToken();
     if (!token->IsInteger()) {
@@ -2840,6 +2854,9 @@ Result Parser::ParseRun() {
         MakeUnique<DrawRectCommand>(pipeline, *pipeline->GetPipelineData());
     cmd->SetLine(line);
     cmd->EnableOrtho();
+    if (is_timed_execution) {
+      cmd->SetTimedExecution();
+    }
 
     Result r = token->ConvertToDouble();
     if (!r.IsSuccess())
@@ -2909,6 +2926,9 @@ Result Parser::ParseRun() {
     auto cmd =
         MakeUnique<DrawGridCommand>(pipeline, *pipeline->GetPipelineData());
     cmd->SetLine(line);
+    if (is_timed_execution) {
+      cmd->SetTimedExecution();
+    }
 
     Result r = token->ConvertToDouble();
     if (!r.IsSuccess())
@@ -3082,6 +3102,9 @@ Result Parser::ParseRun() {
     cmd->SetVertexCount(count);
     cmd->SetInstanceCount(instance_count);
     cmd->SetFirstInstance(start_instance);
+    if (is_timed_execution) {
+      cmd->SetTimedExecution();
+    }
 
     if (indexed)
       cmd->EnableIndexed();
diff --git a/src/amberscript/parser_buffer_test.cc b/src/amberscript/parser_buffer_test.cc
index aeac2cbd..74859364 100644
--- a/src/amberscript/parser_buffer_test.cc
+++ b/src/amberscript/parser_buffer_test.cc
@@ -31,10 +31,12 @@ class DummyDelegate : public amber::Delegate {
   bool LogExecuteCalls() const override { return false; }
   void SetLogExecuteCalls(bool) {}
   bool LogGraphicsCallsTime() const override { return false; }
+
   void SetLogGraphicsCallsTime(bool) {}
   uint64_t GetTimestampNs() const override { return 0; }
   void SetScriptPath(std::string) {}
 
+  void ReportExecutionTiming(double) override {}
   amber::Result LoadBufferData(const std::string,
                                amber::BufferDataFileType type,
                                amber::BufferInfo* buffer) const override {
diff --git a/src/amberscript/parser_run_test.cc b/src/amberscript/parser_run_test.cc
index 0f6b23f0..f823527c 100644
--- a/src/amberscript/parser_run_test.cc
+++ b/src/amberscript/parser_run_test.cc
@@ -48,6 +48,7 @@ RUN my_pipeline 2 4 5
   EXPECT_EQ(2U, cmd->AsCompute()->GetX());
   EXPECT_EQ(4U, cmd->AsCompute()->GetY());
   EXPECT_EQ(5U, cmd->AsCompute()->GetZ());
+  EXPECT_FALSE(cmd->AsCompute()->IsTimedExecution());
 }
 
 TEST_F(AmberScriptParserTest, RunWithoutPipeline) {
@@ -218,6 +219,7 @@ RUN my_pipeline DRAW_RECT POS 2 4 SIZE 10 20)";
   EXPECT_FLOAT_EQ(4.f, cmd->AsDrawRect()->GetY());
   EXPECT_FLOAT_EQ(10.f, cmd->AsDrawRect()->GetWidth());
   EXPECT_FLOAT_EQ(20.f, cmd->AsDrawRect()->GetHeight());
+  EXPECT_FALSE(cmd->AsDrawRect()->IsTimedExecution());
 }
 
 TEST_F(AmberScriptParserTest, RunDrawRectWithComputePipelineInvalid) {
@@ -519,6 +521,7 @@ RUN my_pipeline DRAW_GRID POS 2 4 SIZE 10 20 CELLS 4 5)";
   EXPECT_FLOAT_EQ(20.f, cmd->AsDrawGrid()->GetHeight());
   EXPECT_EQ(4u, cmd->AsDrawGrid()->GetColumns());
   EXPECT_EQ(5u, cmd->AsDrawGrid()->GetRows());
+  EXPECT_FALSE(cmd->AsDrawGrid()->IsTimedExecution());
 }
 
 TEST_F(AmberScriptParserTest, RunDrawGridWithComputePipelineInvalid) {
@@ -887,6 +890,7 @@ RUN my_pipeline DRAW_ARRAY AS TRIANGLE_LIST START_IDX 1 COUNT 2)";
   EXPECT_EQ(Topology::kTriangleList, cmd->GetTopology());
   EXPECT_EQ(1U, cmd->GetFirstVertexIndex());
   EXPECT_EQ(2U, cmd->GetVertexCount());
+  EXPECT_FALSE(cmd->IsTimedExecution());
 }
 
 TEST_F(AmberScriptParserTest, RunDrawArraysInstanced) {
@@ -926,6 +930,7 @@ RUN my_pipeline DRAW_ARRAY AS TRIANGLE_LIST START_IDX 1 COUNT 2 START_INSTANCE 2
   EXPECT_EQ(Topology::kTriangleList, cmd->GetTopology());
   EXPECT_EQ(1U, cmd->GetFirstVertexIndex());
   EXPECT_EQ(2U, cmd->GetVertexCount());
+  EXPECT_FALSE(cmd->IsTimedExecution());
 }
 
 TEST_F(AmberScriptParserTest, RunDrawArraysCountOmitted) {
@@ -966,6 +971,7 @@ RUN my_pipeline DRAW_ARRAY AS TRIANGLE_LIST START_IDX 1)";
   EXPECT_EQ(1U, cmd->GetFirstVertexIndex());
   // There are 3 elements in the vertex buffer, but we start at element 1.
   EXPECT_EQ(2U, cmd->GetVertexCount());
+  EXPECT_FALSE(cmd->IsTimedExecution());
 }
 
 TEST_F(AmberScriptParserTest, RunDrawArraysStartIdxAndCountOmitted) {
@@ -1006,6 +1012,7 @@ RUN my_pipeline DRAW_ARRAY AS TRIANGLE_LIST)";
   EXPECT_EQ(static_cast<uint32_t>(0U), cmd->GetFirstVertexIndex());
   // There are 3 elements in the vertex buffer.
   EXPECT_EQ(3U, cmd->GetVertexCount());
+  EXPECT_FALSE(cmd->IsTimedExecution());
 }
 
 TEST_F(AmberScriptParserTest, RunDrawArraysIndexed) {
@@ -1052,6 +1059,7 @@ RUN my_pipeline DRAW_ARRAY AS TRIANGLE_LIST INDEXED)";
   EXPECT_EQ(static_cast<uint32_t>(0U), cmd->GetFirstVertexIndex());
   // There are 3 elements in the vertex buffer.
   EXPECT_EQ(3U, cmd->GetVertexCount());
+  EXPECT_FALSE(cmd->IsTimedExecution());
 }
 
 TEST_F(AmberScriptParserTest, RunDrawArraysIndexedMissingIndexData) {
diff --git a/src/amberscript/parser_run_timed_execution_test.cc b/src/amberscript/parser_run_timed_execution_test.cc
new file mode 100644
index 00000000..a794b2c4
--- /dev/null
+++ b/src/amberscript/parser_run_timed_execution_test.cc
@@ -0,0 +1,279 @@
+// Copyright 2024 The Amber Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or parseried.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gtest/gtest.h"
+#include "src/amberscript/parser.h"
+
+namespace amber {
+namespace amberscript {
+
+using AmberScriptParserTest = testing::Test;
+
+TEST_F(AmberScriptParserTest, RunComputeTimedExecution) {
+  std::string in = R"(
+SHADER compute my_shader GLSL
+void main() {
+  gl_FragColor = vec3(2, 3, 4);
+}
+END
+
+PIPELINE compute my_pipeline
+  ATTACH my_shader
+END
+
+RUN TIMED_EXECUTION my_pipeline 2 4 5
+)";
+
+  Parser parser;
+  Result r = parser.Parse(in);
+  ASSERT_TRUE(r.IsSuccess()) << r.Error();
+
+  auto script = parser.GetScript();
+  const auto& commands = script->GetCommands();
+  ASSERT_EQ(1U, commands.size());
+
+  auto* cmd = commands[0].get();
+  ASSERT_TRUE(cmd->IsCompute());
+  EXPECT_EQ(2U, cmd->AsCompute()->GetX());
+  EXPECT_EQ(4U, cmd->AsCompute()->GetY());
+  EXPECT_EQ(5U, cmd->AsCompute()->GetZ());
+  EXPECT_TRUE(cmd->AsCompute()->IsTimedExecution());
+}
+
+TEST_F(AmberScriptParserTest, RunComputeNoTimedExecution) {
+  std::string in = R"(
+SHADER compute my_shader GLSL
+void main() {
+  gl_FragColor = vec3(2, 3, 4);
+}
+END
+
+PIPELINE compute my_pipeline
+  ATTACH my_shader
+END
+
+RUN my_pipeline 2 4 5
+)";
+
+  Parser parser;
+  Result r = parser.Parse(in);
+  ASSERT_TRUE(r.IsSuccess()) << r.Error();
+
+  auto script = parser.GetScript();
+  const auto& commands = script->GetCommands();
+  ASSERT_EQ(1U, commands.size());
+
+  auto* cmd = commands[0].get();
+  ASSERT_TRUE(cmd->IsCompute());
+  EXPECT_EQ(2U, cmd->AsCompute()->GetX());
+  EXPECT_EQ(4U, cmd->AsCompute()->GetY());
+  EXPECT_EQ(5U, cmd->AsCompute()->GetZ());
+  EXPECT_FALSE(cmd->AsCompute()->IsTimedExecution());
+}
+
+TEST_F(AmberScriptParserTest, RunDrawRectTimedExecution) {
+  std::string in = R"(
+SHADER vertex my_shader PASSTHROUGH
+SHADER fragment my_fragment GLSL
+# GLSL Shader
+END
+
+PIPELINE graphics my_pipeline
+  ATTACH my_shader
+  ATTACH my_fragment
+END
+
+RUN TIMED_EXECUTION my_pipeline DRAW_RECT POS 2 4 SIZE 10 20)";
+
+  Parser parser;
+  Result r = parser.Parse(in);
+  ASSERT_TRUE(r.IsSuccess()) << r.Error();
+
+  auto script = parser.GetScript();
+  const auto& commands = script->GetCommands();
+  ASSERT_EQ(1U, commands.size());
+
+  auto* cmd = commands[0].get();
+  ASSERT_TRUE(cmd->IsDrawRect());
+  EXPECT_TRUE(cmd->AsDrawRect()->IsOrtho());
+  EXPECT_FALSE(cmd->AsDrawRect()->IsPatch());
+  EXPECT_FLOAT_EQ(2.f, cmd->AsDrawRect()->GetX());
+  EXPECT_FLOAT_EQ(4.f, cmd->AsDrawRect()->GetY());
+  EXPECT_FLOAT_EQ(10.f, cmd->AsDrawRect()->GetWidth());
+  EXPECT_FLOAT_EQ(20.f, cmd->AsDrawRect()->GetHeight());
+  EXPECT_TRUE(cmd->AsDrawRect()->IsTimedExecution());
+}
+
+TEST_F(AmberScriptParserTest, RunDrawGridTimedExecution) {
+  std::string in = R"(
+SHADER vertex my_shader PASSTHROUGH
+SHADER fragment my_fragment GLSL
+# GLSL Shader
+END
+
+PIPELINE graphics my_pipeline
+  ATTACH my_shader
+  ATTACH my_fragment
+END
+
+RUN TIMED_EXECUTION my_pipeline DRAW_GRID POS 2 4 SIZE 10 20 CELLS 4 5)";
+
+  Parser parser;
+  Result r = parser.Parse(in);
+  ASSERT_TRUE(r.IsSuccess()) << r.Error();
+
+  auto script = parser.GetScript();
+  const auto& commands = script->GetCommands();
+  ASSERT_EQ(1U, commands.size());
+
+  auto* cmd = commands[0].get();
+  ASSERT_TRUE(cmd->IsDrawGrid());
+  EXPECT_FLOAT_EQ(2.f, cmd->AsDrawGrid()->GetX());
+  EXPECT_FLOAT_EQ(4.f, cmd->AsDrawGrid()->GetY());
+  EXPECT_FLOAT_EQ(10.f, cmd->AsDrawGrid()->GetWidth());
+  EXPECT_FLOAT_EQ(20.f, cmd->AsDrawGrid()->GetHeight());
+  EXPECT_EQ(4u, cmd->AsDrawGrid()->GetColumns());
+  EXPECT_EQ(5u, cmd->AsDrawGrid()->GetRows());
+  EXPECT_TRUE(cmd->AsDrawGrid()->IsTimedExecution());
+}
+
+TEST_F(AmberScriptParserTest, RunDrawArraysTimedExecution) {
+  std::string in = R"(
+SHADER vertex my_shader PASSTHROUGH
+SHADER fragment my_fragment GLSL
+# GLSL Shader
+END
+BUFFER vtex_buf DATA_TYPE vec3<float> DATA
+1 2 3
+4 5 6
+7 8 9
+END
+
+PIPELINE graphics my_pipeline
+  ATTACH my_shader
+  ATTACH my_fragment
+  VERTEX_DATA vtex_buf LOCATION 0
+END
+
+RUN TIMED_EXECUTION my_pipeline DRAW_ARRAY AS TRIANGLE_LIST START_IDX 1 COUNT 2)";
+
+  Parser parser;
+  Result r = parser.Parse(in);
+  ASSERT_TRUE(r.IsSuccess()) << r.Error();
+
+  auto script = parser.GetScript();
+  const auto& commands = script->GetCommands();
+  ASSERT_EQ(1U, commands.size());
+
+  ASSERT_TRUE(commands[0]->IsDrawArrays());
+
+  auto* cmd = commands[0]->AsDrawArrays();
+  EXPECT_FALSE(cmd->IsIndexed());
+  EXPECT_EQ(static_cast<uint32_t>(1U), cmd->GetInstanceCount());
+  EXPECT_EQ(static_cast<uint32_t>(0U), cmd->GetFirstInstance());
+  EXPECT_EQ(Topology::kTriangleList, cmd->GetTopology());
+  EXPECT_EQ(1U, cmd->GetFirstVertexIndex());
+  EXPECT_EQ(2U, cmd->GetVertexCount());
+  EXPECT_TRUE(cmd->IsTimedExecution());
+}
+
+TEST_F(AmberScriptParserTest, RunDrawArraysInstancedTimedExecution) {
+  std::string in = R"(
+SHADER vertex my_shader PASSTHROUGH
+SHADER fragment my_fragment GLSL
+# GLSL Shader
+END
+BUFFER vtex_buf DATA_TYPE vec3<float> DATA
+1 2 3
+4 5 6
+7 8 9
+END
+
+PIPELINE graphics my_pipeline
+  ATTACH my_shader
+  ATTACH my_fragment
+  VERTEX_DATA vtex_buf LOCATION 0
+END
+
+RUN TIMED_EXECUTION my_pipeline DRAW_ARRAY AS TRIANGLE_LIST START_IDX 1 COUNT 2 START_INSTANCE 2 INSTANCE_COUNT 10)";
+
+  Parser parser;
+  Result r = parser.Parse(in);
+  ASSERT_TRUE(r.IsSuccess()) << r.Error();
+
+  auto script = parser.GetScript();
+  const auto& commands = script->GetCommands();
+  ASSERT_EQ(1U, commands.size());
+
+  ASSERT_TRUE(commands[0]->IsDrawArrays());
+
+  auto* cmd = commands[0]->AsDrawArrays();
+  EXPECT_FALSE(cmd->IsIndexed());
+  EXPECT_EQ(static_cast<uint32_t>(10U), cmd->GetInstanceCount());
+  EXPECT_EQ(static_cast<uint32_t>(2U), cmd->GetFirstInstance());
+  EXPECT_EQ(Topology::kTriangleList, cmd->GetTopology());
+  EXPECT_EQ(1U, cmd->GetFirstVertexIndex());
+  EXPECT_EQ(2U, cmd->GetVertexCount());
+  EXPECT_TRUE(cmd->IsTimedExecution());
+}
+
+TEST_F(AmberScriptParserTest, RunDrawArraysIndexedTimedExecution) {
+  std::string in = R"(
+SHADER vertex my_shader PASSTHROUGH
+SHADER fragment my_fragment GLSL
+# GLSL Shader
+END
+BUFFER vtex_buf DATA_TYPE vec3<float> DATA
+1 2 3
+4 5 6
+7 8 9
+END
+BUFFER idx_buf DATA_TYPE vec3<float> DATA
+9 8 7
+6 5 4
+3 2 1
+END
+
+PIPELINE graphics my_pipeline
+  ATTACH my_shader
+  ATTACH my_fragment
+  VERTEX_DATA vtex_buf LOCATION 0
+  INDEX_DATA idx_buf
+END
+
+RUN TIMED_EXECUTION my_pipeline DRAW_ARRAY AS TRIANGLE_LIST INDEXED)";
+
+  Parser parser;
+  Result r = parser.Parse(in);
+  ASSERT_TRUE(r.IsSuccess()) << r.Error();
+
+  auto script = parser.GetScript();
+  const auto& commands = script->GetCommands();
+  ASSERT_EQ(1U, commands.size());
+
+  ASSERT_TRUE(commands[0]->IsDrawArrays());
+
+  auto* cmd = commands[0]->AsDrawArrays();
+  EXPECT_TRUE(cmd->IsIndexed());
+  EXPECT_EQ(static_cast<uint32_t>(1U), cmd->GetInstanceCount());
+  EXPECT_EQ(static_cast<uint32_t>(0U), cmd->GetFirstInstance());
+  EXPECT_EQ(Topology::kTriangleList, cmd->GetTopology());
+  EXPECT_EQ(static_cast<uint32_t>(0U), cmd->GetFirstVertexIndex());
+  // There are 3 elements in the vertex buffer.
+  EXPECT_EQ(3U, cmd->GetVertexCount());
+  EXPECT_TRUE(cmd->IsTimedExecution());
+}
+
+}  // namespace amberscript
+}  // namespace amber
diff --git a/src/command.h b/src/command.h
index c8012216..485fc604 100644
--- a/src/command.h
+++ b/src/command.h
@@ -142,10 +142,14 @@ class PipelineCommand : public Command {
 
   Pipeline* GetPipeline() const { return pipeline_; }
 
+  void SetTimedExecution() { timed_execution_ = true; }
+  bool IsTimedExecution() const { return timed_execution_; }
+
  protected:
   PipelineCommand(Type type, Pipeline* pipeline);
 
   Pipeline* pipeline_ = nullptr;
+  bool timed_execution_ = false;
 };
 
 /// Command to draw a rectangle on screen.
diff --git a/src/vulkan/compute_pipeline.cc b/src/vulkan/compute_pipeline.cc
index dd7a990e..23fd127e 100644
--- a/src/vulkan/compute_pipeline.cc
+++ b/src/vulkan/compute_pipeline.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "src/vulkan/compute_pipeline.h"
+#include <cstdint>
 
 #include "src/vulkan/command_pool.h"
 #include "src/vulkan/device.h"
@@ -66,7 +67,10 @@ Result ComputePipeline::CreateVkComputePipeline(
   return {};
 }
 
-Result ComputePipeline::Compute(uint32_t x, uint32_t y, uint32_t z) {
+Result ComputePipeline::Compute(uint32_t x,
+                                uint32_t y,
+                                uint32_t z,
+                                bool is_timed_execution) {
   Result r = SendDescriptorDataToDeviceIfNeeded();
   if (!r.IsSuccess())
     return r;
@@ -85,7 +89,7 @@ Result ComputePipeline::Compute(uint32_t x, uint32_t y, uint32_t z) {
   // it must be submitted separately, because using a descriptor set
   // while updating it is not safe.
   UpdateDescriptorSetsIfNeeded();
-
+  CreateTimingQueryObjectIfNeeded(is_timed_execution);
   {
     CommandBufferGuard guard(GetCommandBuffer());
     if (!guard.IsRecording())
@@ -100,13 +104,15 @@ Result ComputePipeline::Compute(uint32_t x, uint32_t y, uint32_t z) {
     device_->GetPtrs()->vkCmdBindPipeline(command_->GetVkCommandBuffer(),
                                           VK_PIPELINE_BIND_POINT_COMPUTE,
                                           pipeline);
+    BeginTimerQuery();
     device_->GetPtrs()->vkCmdDispatch(command_->GetVkCommandBuffer(), x, y, z);
+    EndTimerQuery();
 
     r = guard.Submit(GetFenceTimeout(), GetPipelineRuntimeLayerEnabled());
     if (!r.IsSuccess())
       return r;
   }
-
+  DestroyTimingQueryObjectIfNeeded();
   r = ReadbackDescriptorsToHostDataQueue();
   if (!r.IsSuccess())
     return r;
diff --git a/src/vulkan/compute_pipeline.h b/src/vulkan/compute_pipeline.h
index d6597beb..53f2221e 100644
--- a/src/vulkan/compute_pipeline.h
+++ b/src/vulkan/compute_pipeline.h
@@ -36,7 +36,7 @@ class ComputePipeline : public Pipeline {
 
   Result Initialize(CommandPool* pool);
 
-  Result Compute(uint32_t x, uint32_t y, uint32_t z);
+  Result Compute(uint32_t x, uint32_t y, uint32_t z, bool is_timed_execution);
 
  private:
   Result CreateVkComputePipeline(const VkPipelineLayout& pipeline_layout,
diff --git a/src/vulkan/device.cc b/src/vulkan/device.cc
index 0aebd7c5..61dcaa05 100644
--- a/src/vulkan/device.cc
+++ b/src/vulkan/device.cc
@@ -411,12 +411,14 @@ Device::Device(VkInstance instance,
                VkPhysicalDevice physical_device,
                uint32_t queue_family_index,
                VkDevice device,
-               VkQueue queue)
+               VkQueue queue,
+               Delegate* delegate)
     : instance_(instance),
       physical_device_(physical_device),
       device_(device),
       queue_(queue),
-      queue_family_index_(queue_family_index) {}
+      queue_family_index_(queue_family_index),
+      delegate_(delegate) {}
 
 Device::~Device() = default;
 
@@ -450,9 +452,14 @@ bool Device::SupportsApiVersion(uint32_t major,
 #pragma clang diagnostic pop
 }
 
+void Device::ReportExecutionTiming(double time_in_ms) {
+  if (delegate_) {
+    delegate_->ReportExecutionTiming(time_in_ms);
+  }
+}
+
 Result Device::Initialize(
     PFN_vkGetInstanceProcAddr getInstanceProcAddr,
-    Delegate* delegate,
     const std::vector<std::string>& required_features,
     const std::vector<std::string>& required_properties,
     const std::vector<std::string>& required_device_extensions,
@@ -460,7 +467,7 @@ Result Device::Initialize(
     const VkPhysicalDeviceFeatures2KHR& available_features2,
     const VkPhysicalDeviceProperties2KHR& available_properties2,
     const std::vector<std::string>& available_extensions) {
-  Result r = LoadVulkanPointers(getInstanceProcAddr, delegate);
+  Result r = LoadVulkanPointers(getInstanceProcAddr, delegate_);
   if (!r.IsSuccess())
     return r;
 
@@ -813,9 +820,9 @@ Result Device::Initialize(
     ptr = s->pNext;
   }
 
-#define CHK_P(R, P, NAME, S1, S2) \
-  do {                            \
-    if (R == -1 && P == #NAME) \
+#define CHK_P(R, P, NAME, S1, S2)                         \
+  do {                                                    \
+    if (R == -1 && P == #NAME)                            \
       R = ((S1 && S1->NAME) || (S2 && S2->NAME)) ? 1 : 0; \
   } while (false)
 
@@ -853,8 +860,7 @@ Result Device::Initialize(
       return Result("Vulkan: Device::Initialize missing " + prop + " property");
 
     if (supported == -1)
-      return Result(
-          "Vulkan: Device::Initialize property not handled " + prop);
+      return Result("Vulkan: Device::Initialize property not handled " + prop);
   }
 
   ptrs_.vkGetPhysicalDeviceMemoryProperties(physical_device_,
@@ -1075,6 +1081,14 @@ uint32_t Device::GetMaxPushConstants() const {
   return physical_device_properties_.limits.maxPushConstantsSize;
 }
 
+bool Device::IsTimestampComputeAndGraphicsSupported() const {
+  return physical_device_properties_.limits.timestampComputeAndGraphics;
+}
+
+float Device::GetTimestampPeriod() const {
+  return physical_device_properties_.limits.timestampPeriod;
+}
+
 bool Device::IsDescriptorSetInBounds(uint32_t descriptor_set) const {
   VkPhysicalDeviceProperties properties = VkPhysicalDeviceProperties();
   GetPtrs()->vkGetPhysicalDeviceProperties(physical_device_, &properties);
diff --git a/src/vulkan/device.h b/src/vulkan/device.h
index 8cda4b7a..0ce0529a 100644
--- a/src/vulkan/device.h
+++ b/src/vulkan/device.h
@@ -42,11 +42,11 @@ class Device {
          VkPhysicalDevice physical_device,
          uint32_t queue_family_index,
          VkDevice device,
-         VkQueue queue);
+         VkQueue queue,
+         Delegate* delegate);
   virtual ~Device();
 
   Result Initialize(PFN_vkGetInstanceProcAddr getInstanceProcAddr,
-                    Delegate* delegate,
                     const std::vector<std::string>& required_features,
                     const std::vector<std::string>& required_properties,
                     const std::vector<std::string>& required_device_extensions,
@@ -94,6 +94,15 @@ class Device {
   /// Returns ray tracing shader group handle size.
   uint32_t GetRayTracingShaderGroupHandleSize() const;
 
+  // Returns true if we have support for timestamps.
+  bool IsTimestampComputeAndGraphicsSupported() const;
+
+  // Returns a float used to convert between timestamps and actual elapsed time.
+  float GetTimestampPeriod() const;
+
+  // Each timed execution reports timing to the device and on to the delegate.
+  void ReportExecutionTiming(double time_in_ns);
+
  private:
   Result LoadVulkanPointers(PFN_vkGetInstanceProcAddr, Delegate* delegate);
   bool SupportsApiVersion(uint32_t major, uint32_t minor, uint32_t patch);
@@ -110,6 +119,8 @@ class Device {
   uint32_t shader_group_handle_size_ = 0;
 
   VulkanPtrs ptrs_;
+
+  Delegate* delegate_ = nullptr;
 };
 
 }  // namespace vulkan
diff --git a/src/vulkan/engine_vulkan.cc b/src/vulkan/engine_vulkan.cc
index fc36e30b..18e506ad 100644
--- a/src/vulkan/engine_vulkan.cc
+++ b/src/vulkan/engine_vulkan.cc
@@ -139,13 +139,12 @@ Result EngineVulkan::Initialize(
 
   device_ = MakeUnique<Device>(vk_config->instance, vk_config->physical_device,
                                vk_config->queue_family_index, vk_config->device,
-                               vk_config->queue);
+                               vk_config->queue, delegate);
 
   Result r = device_->Initialize(
-      vk_config->vkGetInstanceProcAddr, delegate, features, properties,
-      device_extensions, vk_config->available_features,
-      vk_config->available_features2, vk_config->available_properties2,
-      vk_config->available_device_extensions);
+      vk_config->vkGetInstanceProcAddr, features, properties, device_extensions,
+      vk_config->available_features, vk_config->available_features2,
+      vk_config->available_properties2, vk_config->available_device_extensions);
   if (!r.IsSuccess())
     return r;
 
@@ -463,8 +462,7 @@ Result EngineVulkan::GetVkShaderStageInfo(
     return r;
 
   *stage_info = VkPipelineShaderStageCreateInfo();
-  stage_info->sType =
-      VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+  stage_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
   stage_info->flags = shader_info.create_flags;
   stage_info->stage = stage;
   stage_info->module = shader_info.shader;
@@ -536,15 +534,14 @@ Result EngineVulkan::GetVkShaderGroupInfo(
       return Result("Invalid shader group");
 
     VkRayTracingShaderGroupCreateInfoKHR group_info = {
-      VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
-      nullptr,
-      VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR,
-      VK_SHADER_UNUSED_KHR,
-      VK_SHADER_UNUSED_KHR,
-      VK_SHADER_UNUSED_KHR,
-      VK_SHADER_UNUSED_KHR,
-      nullptr
-    };
+        VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR,
+        nullptr,
+        VK_RAY_TRACING_SHADER_GROUP_TYPE_MAX_ENUM_KHR,
+        VK_SHADER_UNUSED_KHR,
+        VK_SHADER_UNUSED_KHR,
+        VK_SHADER_UNUSED_KHR,
+        VK_SHADER_UNUSED_KHR,
+        nullptr};
 
     if (sg->IsGeneralGroup()) {
       group_info.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR;
@@ -673,13 +670,17 @@ Result EngineVulkan::DoDrawRect(const DrawRectCommand* command) {
                          buf->GetFormat()->SizeInBytes());
 
   DrawArraysCommand draw(command->GetPipeline(), *command->GetPipelineData());
+  if (command->IsTimedExecution()) {
+    draw.SetTimedExecution();
+  }
   draw.SetTopology(command->IsPatch() ? Topology::kPatchList
                                       : Topology::kTriangleStrip);
   draw.SetFirstVertexIndex(0);
   draw.SetVertexCount(4);
   draw.SetInstanceCount(1);
 
-  Result r = graphics->Draw(&draw, vertex_buffer.get());
+  Result r =
+      graphics->Draw(&draw, vertex_buffer.get(), command->IsTimedExecution());
   if (!r.IsSuccess())
     return r;
 
@@ -761,12 +762,16 @@ Result EngineVulkan::DoDrawGrid(const DrawGridCommand* command) {
                          buf->GetFormat()->SizeInBytes());
 
   DrawArraysCommand draw(command->GetPipeline(), *command->GetPipelineData());
+  if (command->IsTimedExecution()) {
+    draw.SetTimedExecution();
+  }
   draw.SetTopology(Topology::kTriangleList);
   draw.SetFirstVertexIndex(0);
   draw.SetVertexCount(vertices);
   draw.SetInstanceCount(1);
 
-  Result r = graphics->Draw(&draw, vertex_buffer.get());
+  Result r =
+      graphics->Draw(&draw, vertex_buffer.get(), command->IsTimedExecution());
   if (!r.IsSuccess())
     return r;
 
@@ -778,8 +783,8 @@ Result EngineVulkan::DoDrawArrays(const DrawArraysCommand* command) {
   if (!info.vk_pipeline)
     return Result("Vulkan::DrawArrays for Non-Graphics Pipeline");
 
-  return info.vk_pipeline->AsGraphics()->Draw(command,
-                                              info.vertex_buffer.get());
+  return info.vk_pipeline->AsGraphics()->Draw(command, info.vertex_buffer.get(),
+                                              command->IsTimedExecution());
 }
 
 Result EngineVulkan::DoCompute(const ComputeCommand* command) {
@@ -788,7 +793,8 @@ Result EngineVulkan::DoCompute(const ComputeCommand* command) {
     return Result("Vulkan: Compute called for non-compute pipeline.");
 
   return info.vk_pipeline->AsCompute()->Compute(
-      command->GetX(), command->GetY(), command->GetZ());
+      command->GetX(), command->GetY(), command->GetZ(),
+      command->IsTimedExecution());
 }
 
 Result EngineVulkan::InitDependendLibraries(amber::Pipeline* pipeline,
@@ -848,7 +854,8 @@ Result EngineVulkan::DoTraceRays(const RayTracingCommand* command) {
       rSBT, mSBT, hSBT, cSBT, command->GetX(), command->GetY(), command->GetZ(),
       pipeline->GetMaxPipelineRayPayloadSize(),
       pipeline->GetMaxPipelineRayHitAttributeSize(),
-      pipeline->GetMaxPipelineRayRecursionDepth(), libs);
+      pipeline->GetMaxPipelineRayRecursionDepth(), libs,
+      command->IsTimedExecution());
 }
 
 Result EngineVulkan::DoEntryPoint(const EntryPointCommand* command) {
diff --git a/src/vulkan/graphics_pipeline.cc b/src/vulkan/graphics_pipeline.cc
index 9db5eb79..556c91f3 100644
--- a/src/vulkan/graphics_pipeline.cc
+++ b/src/vulkan/graphics_pipeline.cc
@@ -881,7 +881,8 @@ Result GraphicsPipeline::Clear() {
 }
 
 Result GraphicsPipeline::Draw(const DrawArraysCommand* command,
-                              VertexBuffer* vertex_buffer) {
+                              VertexBuffer* vertex_buffer,
+                              bool is_timed_execution) {
   Result r = SendDescriptorDataToDeviceIfNeeded();
   if (!r.IsSuccess())
     return r;
@@ -902,7 +903,7 @@ Result GraphicsPipeline::Draw(const DrawArraysCommand* command,
   // it must be submitted separately, because using a descriptor set
   // while updating it is not safe.
   UpdateDescriptorSetsIfNeeded();
-
+  CreateTimingQueryObjectIfNeeded(is_timed_execution);
   {
     CommandBufferGuard cmd_buf_guard(GetCommandBuffer());
     if (!cmd_buf_guard.IsRecording())
@@ -916,6 +917,10 @@ Result GraphicsPipeline::Draw(const DrawArraysCommand* command,
     frame_->CopyBuffersToImages();
     frame_->TransferImagesToDevice(GetCommandBuffer());
 
+    // Timing must be place outside the render pass scope. The full pipeline
+    // barrier used by our specific implementation cannot be within a
+    // renderpass.
+    BeginTimerQuery();
     {
       RenderPassGuard render_pass_guard(this);
 
@@ -943,6 +948,7 @@ Result GraphicsPipeline::Draw(const DrawArraysCommand* command,
         // VkRunner spec says
         //   "vertexCount will be used as the index count, firstVertex
         //    becomes the vertex offset and firstIndex will always be zero."
+
         device_->GetPtrs()->vkCmdDrawIndexed(
             command_->GetVkCommandBuffer(),
             command->GetVertexCount(),   /* indexCount */
@@ -958,7 +964,7 @@ Result GraphicsPipeline::Draw(const DrawArraysCommand* command,
             command->GetFirstInstance());
       }
     }
-
+    EndTimerQuery();
     frame_->TransferImagesToHost(command_.get());
 
     r = cmd_buf_guard.Submit(GetFenceTimeout(),
@@ -966,7 +972,7 @@ Result GraphicsPipeline::Draw(const DrawArraysCommand* command,
     if (!r.IsSuccess())
       return r;
   }
-
+  DestroyTimingQueryObjectIfNeeded();
   r = ReadbackDescriptorsToHostDataQueue();
   if (!r.IsSuccess())
     return r;
diff --git a/src/vulkan/graphics_pipeline.h b/src/vulkan/graphics_pipeline.h
index 4bc5f7d0..c4bb6574 100644
--- a/src/vulkan/graphics_pipeline.h
+++ b/src/vulkan/graphics_pipeline.h
@@ -59,7 +59,9 @@ class GraphicsPipeline : public Pipeline {
   Result SetClearStencil(uint32_t stencil);
   Result SetClearDepth(float depth);
 
-  Result Draw(const DrawArraysCommand* command, VertexBuffer* vertex_buffer);
+  Result Draw(const DrawArraysCommand* command,
+              VertexBuffer* vertex_buffer,
+              bool is_timed_execution);
 
   VkRenderPass GetVkRenderPass() const { return render_pass_; }
   FrameBuffer* GetFrameBuffer() const { return frame_.get(); }
diff --git a/src/vulkan/pipeline.cc b/src/vulkan/pipeline.cc
index 6f3a724e..fd8dd4ee 100644
--- a/src/vulkan/pipeline.cc
+++ b/src/vulkan/pipeline.cc
@@ -16,6 +16,7 @@
 #include "src/vulkan/pipeline.h"
 
 #include <algorithm>
+#include <array>
 #include <limits>
 #include <utility>
 
@@ -37,6 +38,13 @@ namespace {
 
 const char* kDefaultEntryPointName = "main";
 
+constexpr VkMemoryBarrier kMemoryBarrierFull = {
+    VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr,
+    VK_ACCESS_2_MEMORY_READ_BIT_KHR | VK_ACCESS_2_MEMORY_WRITE_BIT_KHR,
+    VK_ACCESS_2_MEMORY_READ_BIT_KHR | VK_ACCESS_2_MEMORY_WRITE_BIT_KHR};
+
+constexpr uint32_t kNumQueryObjects = 2;
+
 }  // namespace
 
 Pipeline::Pipeline(
@@ -253,6 +261,84 @@ void Pipeline::UpdateDescriptorSetsIfNeeded() {
   }
 }
 
+void Pipeline::CreateTimingQueryObjectIfNeeded(bool is_timed_execution) {
+  if (!is_timed_execution ||
+      !device_->IsTimestampComputeAndGraphicsSupported()) {
+    return;
+  }
+  in_timed_execution_ = true;
+  VkQueryPoolCreateInfo pool_create_info{
+      VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+      nullptr,
+      0,
+      VK_QUERY_TYPE_TIMESTAMP,
+      kNumQueryObjects,
+      0};
+  device_->GetPtrs()->vkCreateQueryPool(
+      device_->GetVkDevice(), &pool_create_info, nullptr, &query_pool_);
+}
+
+void Pipeline::DestroyTimingQueryObjectIfNeeded() {
+  if (!in_timed_execution_) {
+    return;
+  }
+
+  // Flags set so we may/will wait on the CPU for the availiblity of our
+  // queries.
+  const VkQueryResultFlags flags =
+      VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT;
+  std::array<uint64_t, kNumQueryObjects> time_stamps = {};
+  constexpr VkDeviceSize kStrideBytes = sizeof(uint64_t);
+
+  device_->GetPtrs()->vkGetQueryPoolResults(
+      device_->GetVkDevice(), query_pool_, 0, kNumQueryObjects,
+      sizeof(time_stamps), time_stamps.data(), kStrideBytes, flags);
+  double time_in_ns = static_cast<double>(time_stamps[1] - time_stamps[0]) *
+                      static_cast<double>(device_->GetTimestampPeriod());
+
+  constexpr double kNsToMsTime = 1.0 / 1000000.0;
+  device_->ReportExecutionTiming(time_in_ns * kNsToMsTime);
+  device_->GetPtrs()->vkDestroyQueryPool(device_->GetVkDevice(), query_pool_,
+                                         nullptr);
+  in_timed_execution_ = false;
+}
+
+void Pipeline::BeginTimerQuery() {
+  if (!in_timed_execution_) {
+    return;
+  }
+
+  device_->GetPtrs()->vkCmdResetQueryPool(command_->GetVkCommandBuffer(),
+                                          query_pool_, 0, kNumQueryObjects);
+  // Full barrier prevents any work from before the point being still in the
+  // pipeline.
+  device_->GetPtrs()->vkCmdPipelineBarrier(
+      command_->GetVkCommandBuffer(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+      VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &kMemoryBarrierFull, 0, nullptr,
+      0, nullptr);
+  constexpr uint32_t kBeginQueryIndexOffset = 0;
+  device_->GetPtrs()->vkCmdWriteTimestamp(command_->GetVkCommandBuffer(),
+                                          VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                          query_pool_, kBeginQueryIndexOffset);
+}
+
+void Pipeline::EndTimerQuery() {
+  if (!in_timed_execution_) {
+    return;
+  }
+
+  // Full barrier ensures that work including in our timing is executed before
+  // the timestamp.
+  device_->GetPtrs()->vkCmdPipelineBarrier(
+      command_->GetVkCommandBuffer(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+      VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &kMemoryBarrierFull, 0, nullptr,
+      0, nullptr);
+  constexpr uint32_t kEndQueryIndexOffset = 1;
+  device_->GetPtrs()->vkCmdWriteTimestamp(command_->GetVkCommandBuffer(),
+                                          VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                                          query_pool_, kEndQueryIndexOffset);
+}
+
 Result Pipeline::RecordPushConstant(const VkPipelineLayout& pipeline_layout) {
   return push_constant_->RecordPushConstantVkCommand(command_.get(),
                                                      pipeline_layout);
diff --git a/src/vulkan/pipeline.h b/src/vulkan/pipeline.h
index 7cae2d51..68fce4e7 100644
--- a/src/vulkan/pipeline.h
+++ b/src/vulkan/pipeline.h
@@ -29,6 +29,7 @@
 #include "src/vulkan/command_buffer.h"
 #include "src/vulkan/push_constant.h"
 #include "src/vulkan/resource.h"
+#include "vulkan/vulkan_core.h"
 
 namespace amber {
 
@@ -89,7 +90,7 @@ class Pipeline {
       PipelineType type,
       Device* device,
       uint32_t fence_timeout_ms,
-      bool    pipeline_runtime_layer_enabled,
+      bool pipeline_runtime_layer_enabled,
       const std::vector<VkPipelineShaderStageCreateInfo>& shader_stage_info,
       VkPipelineCreateFlags create_flags = 0);
 
@@ -101,6 +102,13 @@ class Pipeline {
                            Descriptor** desc);
   void UpdateDescriptorSetsIfNeeded();
 
+  // This functions are used in benchmarking when 'TIMED_EXECUTION' option is
+  // specifed.
+  void CreateTimingQueryObjectIfNeeded(bool is_timed_execution);
+  void DestroyTimingQueryObjectIfNeeded();
+  void BeginTimerQuery();
+  void EndTimerQuery();
+
   Result SendDescriptorDataToDeviceIfNeeded();
   void BindVkDescriptorSets(const VkPipelineLayout& pipeline_layout);
 
@@ -114,8 +122,9 @@ class Pipeline {
 
   const char* GetEntryPointName(VkShaderStageFlagBits stage) const;
   uint32_t GetFenceTimeout() const { return fence_timeout_ms_; }
-  bool     GetPipelineRuntimeLayerEnabled()
-       const { return pipeline_runtime_layer_enabled_; }
+  bool GetPipelineRuntimeLayerEnabled() const {
+    return pipeline_runtime_layer_enabled_;
+  }
 
   Result CreateVkPipelineLayout(VkPipelineLayout* pipeline_layout);
 
@@ -129,6 +138,7 @@ class Pipeline {
     pipeline_ = pipeline;
   }
 
+  VkQueryPool query_pool_ = VK_NULL_HANDLE;
   VkPipeline pipeline_ = VK_NULL_HANDLE;
   VkPipelineLayout pipeline_layout_ = VK_NULL_HANDLE;
 
@@ -171,6 +181,7 @@ class Pipeline {
       entry_points_;
 
   std::unique_ptr<PushConstant> push_constant_;
+  bool in_timed_execution_ = false;
 };
 
 }  // namespace vulkan
diff --git a/src/vulkan/raytracing_pipeline.cc b/src/vulkan/raytracing_pipeline.cc
index ea0d8f13..7a4f8569 100644
--- a/src/vulkan/raytracing_pipeline.cc
+++ b/src/vulkan/raytracing_pipeline.cc
@@ -186,7 +186,8 @@ Result RayTracingPipeline::TraceRays(amber::SBT* rSBT,
                                      uint32_t maxPipelineRayPayloadSize,
                                      uint32_t maxPipelineRayHitAttributeSize,
                                      uint32_t maxPipelineRayRecursionDepth,
-                                     const std::vector<VkPipeline>& libs) {
+                                     const std::vector<VkPipeline>& libs,
+                                     bool is_timed_execution) {
   Result r = SendDescriptorDataToDeviceIfNeeded();
   if (!r.IsSuccess())
     return r;
@@ -200,7 +201,7 @@ Result RayTracingPipeline::TraceRays(amber::SBT* rSBT,
   // it must be submitted separately, because using a descriptor set
   // while updating it is not safe.
   UpdateDescriptorSetsIfNeeded();
-
+  CreateTimingQueryObjectIfNeeded(is_timed_execution);
   {
     CommandBufferGuard guard(GetCommandBuffer());
     if (!guard.IsRecording())
@@ -247,12 +248,13 @@ Result RayTracingPipeline::TraceRays(amber::SBT* rSBT,
     device_->GetPtrs()->vkCmdTraceRaysKHR(command_->GetVkCommandBuffer(),
                                           &rSBTRegion, &mSBTRegion, &hSBTRegion,
                                           &cSBTRegion, x, y, z);
-
+    BeginTimerQuery();
     r = guard.Submit(GetFenceTimeout(), GetPipelineRuntimeLayerEnabled());
+    EndTimerQuery();
     if (!r.IsSuccess())
       return r;
   }
-
+  DestroyTimingQueryObjectIfNeeded();
   r = ReadbackDescriptorsToHostDataQueue();
   if (!r.IsSuccess())
     return r;
diff --git a/src/vulkan/raytracing_pipeline.h b/src/vulkan/raytracing_pipeline.h
index 6193e9eb..6ef9c08f 100644
--- a/src/vulkan/raytracing_pipeline.h
+++ b/src/vulkan/raytracing_pipeline.h
@@ -64,7 +64,8 @@ class RayTracingPipeline : public Pipeline {
                    uint32_t maxPipelineRayPayloadSize,
                    uint32_t maxPipelineRayHitAttributeSize,
                    uint32_t maxPipelineRayRecursionDepth,
-                   const std::vector<VkPipeline>& lib);
+                   const std::vector<VkPipeline>& lib,
+                   bool is_timed_execution);
 
   BlasesMap* GetBlases() override { return blases_; }
   TlasesMap* GetTlases() override { return tlases_; }
diff --git a/src/vulkan/vertex_buffer_test.cc b/src/vulkan/vertex_buffer_test.cc
index eb8a7bd8..14b9ee9d 100644
--- a/src/vulkan/vertex_buffer_test.cc
+++ b/src/vulkan/vertex_buffer_test.cc
@@ -36,7 +36,8 @@ class DummyDevice : public Device {
                VkPhysicalDevice(),
                0u,
                VkDevice(this),
-               VkQueue()) {
+               VkQueue(),
+               nullptr) {
     memory_.resize(64);
     dummyPtrs_.vkCreateBuffer = vkCreateBuffer;
     dummyPtrs_.vkGetBufferMemoryRequirements = vkGetBufferMemoryRequirements;
diff --git a/src/vulkan/vk-funcs-1-0.inc b/src/vulkan/vk-funcs-1-0.inc
index 50a821b3..033e49fe 100644
--- a/src/vulkan/vk-funcs-1-0.inc
+++ b/src/vulkan/vk-funcs-1-0.inc
@@ -19,9 +19,12 @@ AMBER_VK_FUNC(vkCmdDrawIndexed)
 AMBER_VK_FUNC(vkCmdEndRenderPass)
 AMBER_VK_FUNC(vkCmdPipelineBarrier)
 AMBER_VK_FUNC(vkCmdPushConstants)
+AMBER_VK_FUNC(vkCmdResetQueryPool)
+AMBER_VK_FUNC(vkCmdWriteTimestamp)
 AMBER_VK_FUNC(vkCreateBuffer)
 AMBER_VK_FUNC(vkCreateBufferView)
 AMBER_VK_FUNC(vkGetBufferDeviceAddress)
+AMBER_VK_FUNC(vkGetQueryPoolResults)
 AMBER_VK_FUNC(vkCreateCommandPool)
 AMBER_VK_FUNC(vkCreateComputePipelines)
 AMBER_VK_FUNC(vkCreateDescriptorPool)
@@ -32,6 +35,7 @@ AMBER_VK_FUNC(vkCreateGraphicsPipelines)
 AMBER_VK_FUNC(vkCreateImage)
 AMBER_VK_FUNC(vkCreateImageView)
 AMBER_VK_FUNC(vkCreatePipelineLayout)
+AMBER_VK_FUNC(vkCreateQueryPool)
 AMBER_VK_FUNC(vkCreateRenderPass)
 AMBER_VK_FUNC(vkCreateSampler)
 AMBER_VK_FUNC(vkCreateShaderModule)
@@ -46,6 +50,7 @@ AMBER_VK_FUNC(vkDestroyImage)
 AMBER_VK_FUNC(vkDestroyImageView)
 AMBER_VK_FUNC(vkDestroyPipeline)
 AMBER_VK_FUNC(vkDestroyPipelineLayout)
+AMBER_VK_FUNC(vkDestroyQueryPool)
 AMBER_VK_FUNC(vkDestroyRenderPass)
 AMBER_VK_FUNC(vkDestroySampler)
 AMBER_VK_FUNC(vkDestroyShaderModule)
diff --git a/tests/cases/compute_timed_execution_single.amber b/tests/cases/compute_timed_execution_single.amber
new file mode 100644
index 00000000..77e295e5
--- /dev/null
+++ b/tests/cases/compute_timed_execution_single.amber
@@ -0,0 +1,42 @@
+#!amber
+# Copyright 2024 The Amber Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SHADER compute atomic_sum_all GLSL
+#version 430
+
+layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in;
+
+layout(set = 0, binding = 0) buffer BlockUint {
+  uint data;
+} ssbo_uint;
+
+void main() {
+    atomicAdd(ssbo_uint.data, uint(1));
+}
+END
+
+BUFFER buf_uint DATA_TYPE uint32 DATA
+0
+END
+
+
+PIPELINE compute pipeline
+  ATTACH atomic_sum_all
+  BIND BUFFER buf_uint AS storage DESCRIPTOR_SET 0 BINDING 0
+END
+
+RUN TIMED_EXECUTION pipeline 128 128 1
+
+EXPECT buf_uint IDX 0 EQ 4194304
diff --git a/tests/cases/draw_rect_timed_execution.amber b/tests/cases/draw_rect_timed_execution.amber
new file mode 100644
index 00000000..1309d8db
--- /dev/null
+++ b/tests/cases/draw_rect_timed_execution.amber
@@ -0,0 +1,42 @@
+#!amber
+# Copyright 2024 The Amber Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SHADER vertex vert_shader PASSTHROUGH
+SHADER fragment frag_shader GLSL
+#version 430
+layout(location = 0) out vec4 color_out;
+void main() {
+  float x = gl_FragCoord.x;
+  // Small busy loop.
+  // x final result will be zero.
+  for(int i= 0;i <10;i++) {
+    x = x*0.00001;
+  }
+  color_out = vec4(x, 0.0, 0.0, 1.0);
+}
+END
+
+BUFFER framebuffer FORMAT B8G8R8A8_UNORM
+
+PIPELINE graphics my_pipeline
+  ATTACH vert_shader
+  ATTACH frag_shader
+  FRAMEBUFFER_SIZE 1024 1024
+  BIND BUFFER framebuffer AS color LOCATION 0
+END
+
+RUN TIMED_EXECUTION my_pipeline DRAW_RECT POS 0 0 SIZE 1024 1024
+EXPECT framebuffer IDX 0 0 SIZE 1024 1024 EQ_RGBA 0 0 0 255
+ 
\ No newline at end of file