diff --git a/Docs/options.rst b/Docs/options.rst index 0bb5fd0a..bf84b784 100644 --- a/Docs/options.rst +++ b/Docs/options.rst @@ -199,3 +199,9 @@ This section describes in detail all available configuration options exposed by .. confval:: ref_metrics :type: path :default: empty + +.. confval:: synchronize_queues + :type: bool + :default: false + + When enabled, the layer will insert synchronization primitives to ensure that any work is executed without overlap with the other queues. This can help to reduce variability in performance measurements caused by asynchronous GPU execution. However, it will also significantly impact the overall performance of the application, so it is recommended to use this option only for specific use cases where accurate timing of individual queues is required. diff --git a/VkLayer_profiler_layer/VkLayer_profiler_layer.json.in b/VkLayer_profiler_layer/VkLayer_profiler_layer.json.in index 45bf0fc3..69dad2b6 100644 --- a/VkLayer_profiler_layer/VkLayer_profiler_layer.json.in +++ b/VkLayer_profiler_layer/VkLayer_profiler_layer.json.in @@ -106,7 +106,10 @@ "dependence": { "mode": "ALL", "settings": [ - { "key": "output", "value": "trace" } + { + "key": "output", + "value": "trace" + } ] } } @@ -167,7 +170,10 @@ "dependence": { "mode": "ALL", "settings": [ - { "key": "enable_performance_query_ext", "value": "intel" } + { + "key": "enable_performance_query_ext", + "value": "intel" + } ] } }, @@ -181,8 +187,14 @@ "dependence": { "mode": "ALL", "settings": [ - { "key": "enable_performance_query_ext", "value": "intel" }, - { "key": "performance_query_mode", "value": "stream" } + { + "key": "enable_performance_query_ext", + "value": "intel" + }, + { + "key": "performance_query_mode", + "value": "stream" + } ] } }, @@ -196,7 +208,10 @@ "dependence": { "mode": "ALL", "settings": [ - { "key": "enable_performance_query_ext", "value": "intel" } + { + "key": "enable_performance_query_ext", + "value": "intel" + } ] } } @@ -329,6 +344,14 @@ "env": "VKPROF_ref_metrics", "type": "LOAD_FILE", "default": "" + }, + { + "key": "synchronize_queues", + "label": "Synchronize queues", + "description": "Synchronize all command buffer submissions to prevent concurrent execution on multiple queues during measurements.", + "env": "VKPROF_synchronize_queues", + "type": "BOOL", + "default": false } ] } diff --git a/VkLayer_profiler_layer/profiler/profiler.cpp b/VkLayer_profiler_layer/profiler/profiler.cpp index 50705e22..8bdff1ed 100644 --- a/VkLayer_profiler_layer/profiler/profiler.cpp +++ b/VkLayer_profiler_layer/profiler/profiler.cpp @@ -647,6 +647,24 @@ namespace Profiler /***********************************************************************************\ + Function: + SetSynchronizeQueues + + Description: + Enable or disable synchronization of queue submissions. + + \***********************************************************************************/ + VkResult DeviceProfiler::SetSynchronizeQueues( bool synchronize ) + { + std::scoped_lock lk( m_SubmitMutex ); + + m_Config.m_SynchronizeQueues = synchronize; + + return VK_SUCCESS; + } + + /***********************************************************************************\ + \***********************************************************************************/ std::shared_ptr DeviceProfiler::GetData() { @@ -1337,6 +1355,12 @@ namespace Profiler \***********************************************************************************/ void DeviceProfiler::PreSubmitCommandBuffers( VkQueue queue ) { + // Synchronize access to the queue if requested. + if( m_Config.m_SynchronizeQueues ) + { + m_SubmitMutex.lock(); + } + // Configure the queue for performance counters collection, if needed. if( m_pPerformanceCounters ) { @@ -1486,8 +1510,20 @@ namespace Profiler } } + // If synchronization between queues is enabled, wait with the mutex locked to ensure it is completed before the next submission. + if( m_Config.m_SynchronizeQueues ) + { + m_pDevice->Callbacks.QueueWaitIdle( queue ); + } + // Get data captured during the last frame ResolveFrameData( tip ); + + // Release the lock acquired in PreSubmitCommandBuffers. + if( m_Config.m_SynchronizeQueues ) + { + m_SubmitMutex.unlock(); + } } /***********************************************************************************\ diff --git a/VkLayer_profiler_layer/profiler/profiler.h b/VkLayer_profiler_layer/profiler/profiler.h index a76f1b53..f62cabef 100644 --- a/VkLayer_profiler_layer/profiler/profiler.h +++ b/VkLayer_profiler_layer/profiler/profiler.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2025 Lukasz Stalmirski +// Copyright (c) 2019-2026 Lukasz Stalmirski // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -78,6 +78,7 @@ namespace Profiler VkResult SetFrameDelimiter( VkProfilerFrameDelimiterEXT ); VkResult SetDataBufferSize( uint32_t ); VkResult SetMinDataBufferSize( uint32_t ); + VkResult SetSynchronizeQueues( bool ); std::shared_ptr GetData(); ProfilerCommandBuffer& GetCommandBuffer( VkCommandBuffer commandBuffer ); @@ -200,6 +201,8 @@ namespace Profiler DeviceProfilerSynchronization m_Synchronization; DeviceProfilerSynchronizationTimestamps m_SynchronizationTimestamps; + std::mutex m_SubmitMutex; + // Whether VK_KHR_pipeline_executable_properties is available for the profiled device. // In such case the internal representations of pipelines may be inspected to give more insight on potential performance issues. bool m_PipelineExecutablePropertiesEnabled; diff --git a/VkLayer_profiler_layer/profiler/profiler_frontend.h b/VkLayer_profiler_layer/profiler/profiler_frontend.h index d65e584a..fbed389a 100644 --- a/VkLayer_profiler_layer/profiler/profiler_frontend.h +++ b/VkLayer_profiler_layer/profiler/profiler_frontend.h @@ -1,4 +1,4 @@ -// Copyright (c) 2025 Lukasz Stalmirski +// Copyright (c) 2025-2026 Lukasz Stalmirski // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -82,6 +82,8 @@ namespace Profiler virtual VkProfilerModeEXT GetProfilerSamplingMode() = 0; virtual VkResult SetProfilerSamplingMode( VkProfilerModeEXT mode ) = 0; + virtual VkResult SetSynchronizeQueues( bool synchronize ) = 0; + virtual std::string GetObjectName( const struct VkObject& object ) = 0; virtual void SetObjectName( const struct VkObject& object, const std::string& name ) = 0; diff --git a/VkLayer_profiler_layer/profiler_layer_objects/profiler_layer_frontend.cpp b/VkLayer_profiler_layer/profiler_layer_objects/profiler_layer_frontend.cpp index e635e906..defae433 100644 --- a/VkLayer_profiler_layer/profiler_layer_objects/profiler_layer_frontend.cpp +++ b/VkLayer_profiler_layer/profiler_layer_objects/profiler_layer_frontend.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2025 Lukasz Stalmirski +// Copyright (c) 2025-2026 Lukasz Stalmirski // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -528,6 +528,20 @@ namespace Profiler /***********************************************************************************\ + Function: + SetSynchronizeQueues + + Description: + Enable or disable synchronization of command buffer submissions. + + \***********************************************************************************/ + VkResult DeviceProfilerLayerFrontend::SetSynchronizeQueues( bool synchronize ) + { + return m_pProfiler->SetSynchronizeQueues( synchronize ); + } + + /***********************************************************************************\ + Function: GetObjectName diff --git a/VkLayer_profiler_layer/profiler_layer_objects/profiler_layer_frontend.h b/VkLayer_profiler_layer/profiler_layer_objects/profiler_layer_frontend.h index 275c567a..6d011cd9 100644 --- a/VkLayer_profiler_layer/profiler_layer_objects/profiler_layer_frontend.h +++ b/VkLayer_profiler_layer/profiler_layer_objects/profiler_layer_frontend.h @@ -1,4 +1,4 @@ -// Copyright (c) 2025 Lukasz Stalmirski +// Copyright (c) 2025-2026 Lukasz Stalmirski // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -82,6 +82,8 @@ namespace Profiler VkProfilerModeEXT GetProfilerSamplingMode() final; VkResult SetProfilerSamplingMode( VkProfilerModeEXT mode ) final; + VkResult SetSynchronizeQueues( bool synchronizeQueues ) final; + std::string GetObjectName( const VkObject& object ) final; void SetObjectName( const VkObject& object, const std::string& name ) final; diff --git a/VkLayer_profiler_layer/profiler_overlay/lang/en_us.h b/VkLayer_profiler_layer/profiler_overlay/lang/en_us.h index 4a0b9937..34641f08 100644 --- a/VkLayer_profiler_layer/profiler_overlay/lang/en_us.h +++ b/VkLayer_profiler_layer/profiler_overlay/lang/en_us.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2021 Lukasz Stalmirski +// Copyright (c) 2019-2026 Lukasz Stalmirski // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -176,6 +176,7 @@ namespace Profiler inline static constexpr char ShowDebugLabels[] = "Show debug labels"; inline static constexpr char ShowShaderCapabilities[] = "Show shader capabilities"; inline static constexpr char ShowEntryPoints[] = "Show entry points"; + inline static constexpr char SynchronizeQueues[] = "Synchronize queues"; inline static constexpr char TimeUnit[] = "Time unit"; inline static constexpr char Milliseconds[] = "ms"; diff --git a/VkLayer_profiler_layer/profiler_overlay/profiler_overlay.cpp b/VkLayer_profiler_layer/profiler_overlay/profiler_overlay.cpp index 96f0ca39..69eb505e 100644 --- a/VkLayer_profiler_layer/profiler_overlay/profiler_overlay.cpp +++ b/VkLayer_profiler_layer/profiler_overlay/profiler_overlay.cpp @@ -554,6 +554,8 @@ namespace Profiler } SetMaxFrameCount( std::max( config.m_FrameCount, 0 ) ); + + m_SynchronizeQueues = config.m_SynchronizeQueues; } // Don't leave object in partly-initialized state if something went wrong @@ -644,6 +646,7 @@ namespace Profiler m_ShowAllTopPipelines = false; m_ShowActiveFrame = false; m_ShowEntryPoints = false; + m_SynchronizeQueues = false; m_SetLastMainWindowPos = false; @@ -6494,6 +6497,12 @@ namespace Profiler SetMaxFrameCount( std::max( 0, maxFrameCount ) ); } + // Control queue synchronization + if( ImGui::Checkbox( Lang::SynchronizeQueues, &m_SynchronizeQueues ) ) + { + m_Frontend.SetSynchronizeQueues( m_SynchronizeQueues ); + } + // Select sampling mode (constant in runtime for now) ImGui::BeginDisabled(); { diff --git a/VkLayer_profiler_layer/profiler_overlay/profiler_overlay.h b/VkLayer_profiler_layer/profiler_overlay/profiler_overlay.h index 68267400..21380185 100644 --- a/VkLayer_profiler_layer/profiler_overlay/profiler_overlay.h +++ b/VkLayer_profiler_layer/profiler_overlay/profiler_overlay.h @@ -170,6 +170,7 @@ namespace Profiler bool m_ShowAllTopPipelines; bool m_ShowActiveFrame; bool m_ShowEntryPoints; + bool m_SynchronizeQueues; bool GetShowActiveFrame() const; const FrameDataList& GetActiveFramesList() const;