Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Docs/options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,9 @@ This section describes in detail all available configuration options exposed by
.. confval:: ref_metrics
:type: path
:default: empty

.. confval:: synchronize_queues
:type: bool
:default: false

When enabled, the layer will insert synchronization primitives to ensure that any work is executed without overlap with the other queues. This can help to reduce variability in performance measurements caused by asynchronous GPU execution. However, it will also significantly impact the overall performance of the application, so it is recommended to use this option only for specific use cases where accurate timing of individual queues is required.
33 changes: 28 additions & 5 deletions VkLayer_profiler_layer/VkLayer_profiler_layer.json.in
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,10 @@
"dependence": {
"mode": "ALL",
"settings": [
{ "key": "output", "value": "trace" }
{
"key": "output",
"value": "trace"
}
]
}
}
Expand Down Expand Up @@ -167,7 +170,10 @@
"dependence": {
"mode": "ALL",
"settings": [
{ "key": "enable_performance_query_ext", "value": "intel" }
{
"key": "enable_performance_query_ext",
"value": "intel"
}
]
}
},
Expand All @@ -181,8 +187,14 @@
"dependence": {
"mode": "ALL",
"settings": [
{ "key": "enable_performance_query_ext", "value": "intel" },
{ "key": "performance_query_mode", "value": "stream" }
{
"key": "enable_performance_query_ext",
"value": "intel"
},
{
"key": "performance_query_mode",
"value": "stream"
}
]
}
},
Expand All @@ -196,7 +208,10 @@
"dependence": {
"mode": "ALL",
"settings": [
{ "key": "enable_performance_query_ext", "value": "intel" }
{
"key": "enable_performance_query_ext",
"value": "intel"
}
]
}
}
Expand Down Expand Up @@ -329,6 +344,14 @@
"env": "VKPROF_ref_metrics",
"type": "LOAD_FILE",
"default": ""
},
{
"key": "synchronize_queues",
"label": "Synchronize queues",
"description": "Synchronize all command buffer submissions to prevent concurrent execution on multiple queues during measurements.",
"env": "VKPROF_synchronize_queues",
"type": "BOOL",
"default": false
}
]
}
Expand Down
36 changes: 36 additions & 0 deletions VkLayer_profiler_layer/profiler/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,24 @@ namespace Profiler

/***********************************************************************************\

Function:
SetSynchronizeQueues

Description:
Enable or disable synchronization of queue submissions.

\***********************************************************************************/
VkResult DeviceProfiler::SetSynchronizeQueues( bool synchronize )
{
std::scoped_lock lk( m_SubmitMutex );

m_Config.m_SynchronizeQueues = synchronize;

return VK_SUCCESS;
}

/***********************************************************************************\

\***********************************************************************************/
std::shared_ptr<DeviceProfilerFrameData> DeviceProfiler::GetData()
{
Expand Down Expand Up @@ -1337,6 +1355,12 @@ namespace Profiler
\***********************************************************************************/
void DeviceProfiler::PreSubmitCommandBuffers( VkQueue queue )
{
// Synchronize access to the queue if requested.
if( m_Config.m_SynchronizeQueues )
{
m_SubmitMutex.lock();
}

// Configure the queue for performance counters collection, if needed.
if( m_pPerformanceCounters )
{
Expand Down Expand Up @@ -1486,8 +1510,20 @@ namespace Profiler
}
}

// If synchronization between queues is enabled, wait with the mutex locked to ensure it is completed before the next submission.
if( m_Config.m_SynchronizeQueues )
{
m_pDevice->Callbacks.QueueWaitIdle( queue );
}

// Get data captured during the last frame
ResolveFrameData( tip );

// Release the lock acquired in PreSubmitCommandBuffers.
if( m_Config.m_SynchronizeQueues )
{
m_SubmitMutex.unlock();
}
}

/***********************************************************************************\
Expand Down
5 changes: 4 additions & 1 deletion VkLayer_profiler_layer/profiler/profiler.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2025 Lukasz Stalmirski
// Copyright (c) 2019-2026 Lukasz Stalmirski
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -78,6 +78,7 @@ namespace Profiler
VkResult SetFrameDelimiter( VkProfilerFrameDelimiterEXT );
VkResult SetDataBufferSize( uint32_t );
VkResult SetMinDataBufferSize( uint32_t );
VkResult SetSynchronizeQueues( bool );
std::shared_ptr<DeviceProfilerFrameData> GetData();

ProfilerCommandBuffer& GetCommandBuffer( VkCommandBuffer commandBuffer );
Expand Down Expand Up @@ -200,6 +201,8 @@ namespace Profiler
DeviceProfilerSynchronization m_Synchronization;
DeviceProfilerSynchronizationTimestamps m_SynchronizationTimestamps;

std::mutex m_SubmitMutex;

// Whether VK_KHR_pipeline_executable_properties is available for the profiled device.
// In such case the internal representations of pipelines may be inspected to give more insight on potential performance issues.
bool m_PipelineExecutablePropertiesEnabled;
Expand Down
4 changes: 3 additions & 1 deletion VkLayer_profiler_layer/profiler/profiler_frontend.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2025 Lukasz Stalmirski
// Copyright (c) 2025-2026 Lukasz Stalmirski
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -82,6 +82,8 @@ namespace Profiler
virtual VkProfilerModeEXT GetProfilerSamplingMode() = 0;
virtual VkResult SetProfilerSamplingMode( VkProfilerModeEXT mode ) = 0;

virtual VkResult SetSynchronizeQueues( bool synchronize ) = 0;

virtual std::string GetObjectName( const struct VkObject& object ) = 0;
virtual void SetObjectName( const struct VkObject& object, const std::string& name ) = 0;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2025 Lukasz Stalmirski
// Copyright (c) 2025-2026 Lukasz Stalmirski
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -528,6 +528,20 @@ namespace Profiler

/***********************************************************************************\

Function:
SetSynchronizeQueues

Description:
Enable or disable synchronization of command buffer submissions.

\***********************************************************************************/
VkResult DeviceProfilerLayerFrontend::SetSynchronizeQueues( bool synchronize )
{
return m_pProfiler->SetSynchronizeQueues( synchronize );
}

/***********************************************************************************\

Function:
GetObjectName

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2025 Lukasz Stalmirski
// Copyright (c) 2025-2026 Lukasz Stalmirski
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -82,6 +82,8 @@ namespace Profiler
VkProfilerModeEXT GetProfilerSamplingMode() final;
VkResult SetProfilerSamplingMode( VkProfilerModeEXT mode ) final;

VkResult SetSynchronizeQueues( bool synchronizeQueues ) final;

std::string GetObjectName( const VkObject& object ) final;
void SetObjectName( const VkObject& object, const std::string& name ) final;

Expand Down
3 changes: 2 additions & 1 deletion VkLayer_profiler_layer/profiler_overlay/lang/en_us.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2021 Lukasz Stalmirski
// Copyright (c) 2019-2026 Lukasz Stalmirski
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -176,6 +176,7 @@ namespace Profiler
inline static constexpr char ShowDebugLabels[] = "Show debug labels";
inline static constexpr char ShowShaderCapabilities[] = "Show shader capabilities";
inline static constexpr char ShowEntryPoints[] = "Show entry points";
inline static constexpr char SynchronizeQueues[] = "Synchronize queues";
inline static constexpr char TimeUnit[] = "Time unit";

inline static constexpr char Milliseconds[] = "ms";
Expand Down
9 changes: 9 additions & 0 deletions VkLayer_profiler_layer/profiler_overlay/profiler_overlay.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,8 @@ namespace Profiler
}

SetMaxFrameCount( std::max( config.m_FrameCount, 0 ) );

m_SynchronizeQueues = config.m_SynchronizeQueues;
}

// Don't leave object in partly-initialized state if something went wrong
Expand Down Expand Up @@ -644,6 +646,7 @@ namespace Profiler
m_ShowAllTopPipelines = false;
m_ShowActiveFrame = false;
m_ShowEntryPoints = false;
m_SynchronizeQueues = false;

m_SetLastMainWindowPos = false;

Expand Down Expand Up @@ -6494,6 +6497,12 @@ namespace Profiler
SetMaxFrameCount( std::max<uint32_t>( 0, maxFrameCount ) );
}

// Control queue synchronization
if( ImGui::Checkbox( Lang::SynchronizeQueues, &m_SynchronizeQueues ) )
{
m_Frontend.SetSynchronizeQueues( m_SynchronizeQueues );
}

// Select sampling mode (constant in runtime for now)
ImGui::BeginDisabled();
{
Expand Down
1 change: 1 addition & 0 deletions VkLayer_profiler_layer/profiler_overlay/profiler_overlay.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ namespace Profiler
bool m_ShowAllTopPipelines;
bool m_ShowActiveFrame;
bool m_ShowEntryPoints;
bool m_SynchronizeQueues;

bool GetShowActiveFrame() const;
const FrameDataList& GetActiveFramesList() const;
Expand Down