diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h index 017b0a0cdaff2..e109ead049a04 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.h @@ -25,6 +25,7 @@ #include "common/HashCombine.h" #include "common/MRCHelpers.h" +#include "common/Timer.h" #include "GS/GS.h" #include "GSMTLDeviceInfo.h" #include "GSMTLSharedHeader.h" @@ -286,6 +287,14 @@ class GSDeviceMTL final : public GSDevice MRCOwned> m_vertex_upload_cmdbuf; MRCOwned> m_vertex_upload_encoder; + // Spinning yay + u32 m_spin_enable = false; + u32 m_spin_cycles = 0; + Common::Timer m_spin_timer; + MRCOwned> m_spin_buf; + MRCOwned> m_spin_pipeline; + MRCOwned> m_last_spin_cmdbuf; + struct DebugEntry { enum Op { Push, Insert, Pop } op; diff --git a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm index a663aab80ebe7..b3fc0f80de0fa 100644 --- a/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm +++ b/pcsx2/GS/Renderers/Metal/GSDeviceMTL.mm @@ -258,6 +258,40 @@ dev->m_last_finished_draw.store(newval, std::memory_order_release); } }]; + if (m_last_spin_cmdbuf) + { + double seconds_to_spin = m_spin_timer.GetTimeSeconds(); + [m_current_render_cmdbuf addCompletedHandler:[spin = std::move(m_last_spin_cmdbuf), backref = m_backref, seconds_to_spin](id render){ + if (@available(macOS 10.15, iOS 10.3, *)) + { + CFTimeInterval start = [spin GPUStartTime]; + CFTimeInterval end = [spin GPUEndTime]; + CFTimeInterval rstart = [render GPUStartTime]; + [[maybe_unused]] CFTimeInterval spin_time = end - start; + [[maybe_unused]] CFTimeInterval total_time = rstart - start; + // Console.WriteLn("Spin Result: duration: %.2fms, time to fill: %.2fms, ratio: %.2f, target: %.2fms", spin_time * 1000, total_time * 1000, spin_time / total_time, seconds_to_spin * 1000); + std::lock_guard lock(backref->first); + if (GSDeviceMTL* dev = backref->second) + { + if (!start || !end) + { + Console.Warning("Spin never ended???"); + } + else if (spin_time < seconds_to_spin - 0.001) + { + dev->m_spin_cycles += (dev->m_spin_cycles >> 4); + // Console.WriteLn("Spin finished early, increasing cycles to %d", dev->m_spin_cycles); + } + else if (spin_time > seconds_to_spin) + { + dev->m_spin_cycles -= (dev->m_spin_cycles >> 4); + // Console.WriteLn("Spin finished late, reducing cycles to %d", dev->m_spin_cycles); + } + } + } + }]; + m_last_spin_cmdbuf = nil; + } [m_current_render_cmdbuf commit]; m_current_render_cmdbuf = nil; m_current_draw++; @@ -613,6 +647,11 @@ static void setFnConstantI(MTLFunctionConstantValues* fc, unsigned int value, GS m_queue = MRCRetain((__bridge id)m_display->GetRenderContext()); MTLPixelFormat layer_px_fmt = [(__bridge CAMetalLayer*)m_display->GetRenderSurface() pixelFormat]; + if (const char* env = getenv("MTL_SPIN_READBACK")) + m_spin_enable = env[0] == '1' || env[0] == 'y' || env[0] == 'Y'; + else + m_spin_enable = false; + m_features.geometry_shader = false; m_features.image_load_store = m_dev.features.primid; m_features.texture_barrier = true; @@ -952,10 +991,42 @@ static void setFnConstantI(MTLFunctionConstantValues* fc, unsigned int value, GS destinationOffset:0 destinationBytesPerRow:out_map.pitch destinationBytesPerImage:size]; + if (m_spin_enable) + [encoder updateFence:m_draw_sync_fence]; [encoder endEncoding]; [cmdbuf popDebugGroup]; FlushEncoders(); + if (m_spin_enable) + { + if (@available(macOS 10.15, iOS 10.3, *)) + { + id spin_cmdbuf = [m_queue commandBuffer]; + id spin_enc = [spin_cmdbuf computeCommandEncoder]; + [spin_enc waitForFence:m_draw_sync_fence]; + if (!m_spin_pipeline) + m_spin_pipeline = MRCTransfer([m_dev.dev newComputePipelineStateWithFunction:LoadShader(@"waste_time") error:nil]); + if (!m_spin_buf) + m_spin_buf = MRCTransfer([m_dev.dev newBufferWithLength:4 options:MTLResourceStorageModeShared]); + *(u32*)[m_spin_buf contents] = 0; + [spin_enc setComputePipelineState:m_spin_pipeline]; + [spin_enc setBuffer:m_spin_buf offset:0 atIndex:1]; + if (m_spin_cycles < 1024) + m_spin_cycles = 1024; + // Console.WriteLn("Metal: Spin %d iters", m_spin_cycles); + [spin_enc setBytes:&m_spin_cycles length:4 atIndex:0]; + [spin_enc dispatchThreads:MTLSizeMake(1, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + [spin_enc endEncoding]; + [spin_cmdbuf commit]; + m_last_spin_cmdbuf = MRCRetain(spin_cmdbuf); + // Don't let the CPU go to sleep either! + while (![cmdbuf GPUEndTime]) + ShortSpin(); + + m_spin_timer.Reset(); + } + } + [cmdbuf waitUntilCompleted]; out_map.bits = static_cast([m_texture_download_buf contents]); diff --git a/pcsx2/GS/Renderers/Metal/convert.metal b/pcsx2/GS/Renderers/Metal/convert.metal index e54a94020471f..70e0c50cddfb3 100644 --- a/pcsx2/GS/Renderers/Metal/convert.metal +++ b/pcsx2/GS/Renderers/Metal/convert.metal @@ -376,3 +376,12 @@ fragment float4 ps_shadeboost(float4 p [[position]], DirectReadTextureIn return float4(conColor, 1); } + +kernel void waste_time(constant uint& cycles [[buffer(0)]], device uint* spin [[buffer(1)]]) +{ + uint value = spin[0]; + for (uint i = 0; i < cycles; i++) { + value = spin[value]; + } + spin[0] = value; +}